diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/config.yaml b/v2xverse_late_multiclass_2025_01_28_08_49_56/config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d33b18ba973a87f2fcfe6d6ccd879a8c104c54a4
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/config.yaml
@@ -0,0 +1,213 @@
+comm_range: 200
+data_augment:
+- ALONG_AXIS_LIST:
+  - x
+  NAME: random_world_flip
+- NAME: random_world_rotation
+  WORLD_ROT_ANGLE:
+  - -0.78539816
+  - 0.78539816
+- NAME: random_world_scaling
+  WORLD_SCALE_RANGE:
+  - 0.95
+  - 1.05
+fusion:
+  args:
+    clip_pc: false
+    proj_first: false
+  core_method: intermediatemulticlass
+  dataset: v2xverse
+input_source:
+- lidar
+label_type: lidar
+loss:
+  args:
+    cls_weight: 5.0
+    code_weights:
+    - 1.0
+    - 1.0
+    - 1.0
+    - 1.0
+    - 1.0
+    - 1.0
+    - 5.0
+    - 5.0
+    loc_weight: 1.0
+    target_assigner_config:
+      box_coder: ResidualCoder
+      cav_lidar_range: &id004
+      - -36
+      - -12
+      - -22
+      - 36
+      - 12
+      - 14
+      gaussian_overlap: 0.1
+      max_objs: 40
+      min_radius: 2
+      out_size_factor: 2
+      voxel_size: &id001
+      - 0.125
+      - 0.125
+      - 36
+  core_method: center_point_loss_multiclass
+lr_scheduler:
+  core_method: multistep
+  gamma: 0.1
+  step_size:
+  - 8
+  - 15
+model:
+  args:
+    anchor_number: 3
+    att:
+      feat_dim: 64
+    base_bev_backbone:
+      compression: 0
+      layer_nums: &id002
+      - 3
+      - 4
+      - 5
+      layer_strides:
+      - 2
+      - 2
+      - 2
+      num_filters: &id003
+      - 64
+      - 128
+      - 256
+      num_upsample_filter:
+      - 128
+      - 128
+      - 128
+      resnet: true
+      upsample_strides:
+      - 1
+      - 2
+      - 4
+      voxel_size: *id001
+    fusion_args:
+      agg_operator:
+        feature_dim: 256
+        mode: MAX
+      downsample_rate: 2
+      dropout_rate: 0
+      in_channels: 256
+      layer_nums: *id002
+      multi_scale: false
+      n_head: 8
+      num_filters: *id003
+      only_attention: true
+      voxel_size: *id001
+    fusion_method: max
+    lidar_range: *id004
+    max_cav: 5
+    multi_class: true
+    out_size_factor: 2
+    pillar_vfe:
+      num_filters:
+      - 64
+      use_absolute_xyz: true
+      use_norm: true
+      with_distance: false
+    point_pillar_scatter:
+      grid_size: !!python/object/apply:numpy.core.multiarray._reconstruct
+        args:
+        - !!python/name:numpy.ndarray ''
+        - !!python/tuple
+          - 0
+        - !!binary |
+          Yg==
+        state: !!python/tuple
+        - 1
+        - !!python/tuple
+          - 3
+        - !!python/object/apply:numpy.dtype
+          args:
+          - i8
+          - 0
+          - 1
+          state: !!python/tuple
+          - 3
+          - <
+          - null
+          - null
+          - null
+          - -1
+          - -1
+          - 0
+        - false
+        - !!binary |
+          QAIAAAAAAADAAAAAAAAAAAEAAAAAAAAA
+      num_features: 64
+    shrink_header:
+      dim:
+      - 128
+      input_dim: 384
+      kernal_size:
+      - 3
+      padding:
+      - 1
+      stride:
+      - 1
+    supervise_fusion: false
+    supervise_single: true
+    voxel_size: *id001
+  core_method: point_pillar_single_multiclass
+name: v2xverse_late_multiclass
+noise_setting: !!python/object/apply:collections.OrderedDict
+- - - add_noise
+    - false
+optimizer:
+  args:
+    eps: 1.0e-10
+    weight_decay: 0.0001
+  core_method: Adam
+  lr: 0.002
+postprocess:
+  anchor_args:
+    D: 1
+    H: 192
+    W: 576
+    cav_lidar_range: *id004
+    feature_stride: 2
+    h: 1.56
+    l: 3.9
+    num: 1
+    r: &id005
+    - 0
+    vd: 36
+    vh: 0.125
+    vw: 0.125
+    w: 1.6
+  core_method: VoxelPostprocessor
+  dir_args:
+    anchor_yaw: *id005
+    dir_offset: 0.7853
+    num_bins: 1
+  gt_range: *id004
+  max_num: 100
+  nms_thresh: 0.15
+  order: hwl
+  target_args:
+    neg_threshold: 0.45
+    pos_threshold: 0.6
+    score_threshold: 0.2
+preprocess:
+  args:
+    max_points_per_voxel: 32
+    max_voxel_test: 70000
+    max_voxel_train: 32000
+    voxel_size: *id001
+  cav_lidar_range: *id004
+  core_method: SpVoxelPreprocessor
+root_dir: external_paths/data_root
+test_dir: external_paths/data_root
+train_params:
+  batch_size: 4
+  epoches: 40
+  eval_freq: 1
+  max_cav: 5
+  save_freq: 1
+validate_dir: external_paths/data_root
+yaml_parser: load_point_pillar_params
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/events.out.tfevents.1738072197.poliwag.engin.umich.edu b/v2xverse_late_multiclass_2025_01_28_08_49_56/events.out.tfevents.1738072197.poliwag.engin.umich.edu
new file mode 100644
index 0000000000000000000000000000000000000000..369f7e00f6e94ac83726248cdc23478f4f096ef1
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/events.out.tfevents.1738072197.poliwag.engin.umich.edu
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac3b8a28e7fba347631b57fb22d403037b9f1fa244f0b566d60222d5c9bf5756
+size 498679515
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/net_epoch_bestval_at14.pth b/v2xverse_late_multiclass_2025_01_28_08_49_56/net_epoch_bestval_at14.pth
new file mode 100644
index 0000000000000000000000000000000000000000..08ff64964d58d62e93590dff809cb59a9c65735e
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/net_epoch_bestval_at14.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba3fef03956eb6da6eb9721db6baf142f81f85ac84cd95324c1e37065d387b50
+size 32820345
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..072108341c943d97d9ccd526e1aec39dccdb9836
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/__pycache__/__init__.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1a3d17bf3b63d117b7c030907001c890fa2da586
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/__init__.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/augment_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/augment_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2cbd6b4b39372cbe9bde9e480fa45d4ea6a7068c
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/augment_utils.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/data_augmentor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/data_augmentor.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1e549f4c1aadf71f8fe17d8364b04b61ba22d6eb
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/__pycache__/data_augmentor.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/augment_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/augment_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..af12bedcf1111e9ea4db37ac10395cdce566960e
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/augment_utils.py
@@ -0,0 +1,88 @@
+# -*- coding: utf-8 -*-
+# Author: OpenPCDet
+
+import numpy as np
+
+from opencood.utils import common_utils
+
+
+def random_flip_along_x(gt_boxes, points):
+    """
+    Args:
+        gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
+        points: (M, 3 + C)
+    Returns:
+    """
+    enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5])
+    if enable:
+        gt_boxes[:, 1] = -gt_boxes[:, 1]
+        gt_boxes[:, 6] = -gt_boxes[:, 6]
+        points[:, 1] = -points[:, 1]
+
+        if gt_boxes.shape[1] > 7:
+            gt_boxes[:, 8] = -gt_boxes[:, 8]
+
+    return gt_boxes, points
+
+
+def random_flip_along_y(gt_boxes, points):
+    """
+    Args:
+        gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
+        points: (M, 3 + C)
+    Returns:
+    """
+    enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5])
+    if enable:
+        gt_boxes[:, 0] = -gt_boxes[:, 0]
+        gt_boxes[:, 6] = -(gt_boxes[:, 6] + np.pi)
+        points[:, 0] = -points[:, 0]
+
+        if gt_boxes.shape[1] > 7:
+            gt_boxes[:, 7] = -gt_boxes[:, 7]
+
+    return gt_boxes, points
+
+
+def global_rotation(gt_boxes, points, rot_range):
+    """
+    Args:
+        gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
+        points: (M, 3 + C),
+        rot_range: [min, max]
+    Returns:
+    """
+    noise_rotation = np.random.uniform(rot_range[0],
+                                       rot_range[1])
+    points = common_utils.rotate_points_along_z(points[np.newaxis, :, :],
+                                                np.array([noise_rotation]))[0]
+
+    gt_boxes[:, 0:3] = \
+        common_utils.rotate_points_along_z(gt_boxes[np.newaxis, :, 0:3],
+                                           np.array([noise_rotation]))[0]
+    gt_boxes[:, 6] += noise_rotation
+
+    if gt_boxes.shape[1] > 7:
+        gt_boxes[:, 7:9] = common_utils.rotate_points_along_z(
+            np.hstack((gt_boxes[:, 7:9], np.zeros((gt_boxes.shape[0], 1))))[
+            np.newaxis, :, :],
+            np.array([noise_rotation]))[0][:, 0:2]
+
+    return gt_boxes, points
+
+
+def global_scaling(gt_boxes, points, scale_range):
+    """
+    Args:
+        gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
+        points: (M, 3 + C),
+        scale_range: [min, max]
+    Returns:
+    """
+    if scale_range[1] - scale_range[0] < 1e-3:
+        return gt_boxes, points
+    noise_scale = np.random.uniform(scale_range[0], scale_range[1])
+    points[:, :3] *= noise_scale
+    gt_boxes[:, :6] *= noise_scale
+
+    return gt_boxes, points
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/data_augmentor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/data_augmentor.py
new file mode 100644
index 0000000000000000000000000000000000000000..82e5533c1d4857c39bac0272b597d5ecd14d3956
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/augmentor/data_augmentor.py
@@ -0,0 +1,120 @@
+# -*- coding: utf-8 -*-
+"""
+Class for data augmentation
+"""
+# Author: Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+from functools import partial
+
+import numpy as np
+
+from opencood.data_utils.augmentor import augment_utils
+
+
+class DataAugmentor(object):
+    """
+    Data Augmentor.
+
+    Parameters
+    ----------
+    augment_config : list
+        A list of augmentation configuration.
+
+    Attributes
+    ----------
+    data_augmentor_queue : list
+        The list of data augmented functions.
+    """
+
+    def __init__(self, augment_config, train=True):
+        self.data_augmentor_queue = []
+        self.train = train
+
+        for cur_cfg in augment_config:
+            cur_augmentor = getattr(self, cur_cfg['NAME'])(config=cur_cfg)
+            self.data_augmentor_queue.append(cur_augmentor)
+
+    def random_world_flip(self, data_dict=None, config=None):
+        if data_dict is None:
+            return partial(self.random_world_flip, config=config)
+
+        gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \
+                                    data_dict['object_bbx_mask'], \
+                                    data_dict['lidar_np']
+        gt_boxes_valid = gt_boxes[gt_mask == 1]
+
+        for cur_axis in config['ALONG_AXIS_LIST']:
+            assert cur_axis in ['x', 'y']
+            gt_boxes_valid, points = getattr(augment_utils,
+                                             'random_flip_along_%s' % cur_axis)(
+                gt_boxes_valid, points,
+            )
+
+        gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid
+
+        data_dict['object_bbx_center'] = gt_boxes
+        data_dict['object_bbx_mask'] = gt_mask
+        data_dict['lidar_np'] = points
+
+        return data_dict
+
+    def random_world_rotation(self, data_dict=None, config=None):
+        if data_dict is None:
+            return partial(self.random_world_rotation, config=config)
+
+        rot_range = config['WORLD_ROT_ANGLE']
+        if not isinstance(rot_range, list):
+            rot_range = [-rot_range, rot_range]
+
+        gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \
+                                    data_dict['object_bbx_mask'], \
+                                    data_dict['lidar_np']
+        gt_boxes_valid = gt_boxes[gt_mask == 1]
+        gt_boxes_valid, points = augment_utils.global_rotation(
+            gt_boxes_valid, points, rot_range=rot_range
+        )
+        gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid
+
+        data_dict['object_bbx_center'] = gt_boxes
+        data_dict['object_bbx_mask'] = gt_mask
+        data_dict['lidar_np'] = points
+
+        return data_dict
+
+    def random_world_scaling(self, data_dict=None, config=None):
+        if data_dict is None:
+            return partial(self.random_world_scaling, config=config)
+
+        gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \
+                                    data_dict['object_bbx_mask'], \
+                                    data_dict['lidar_np']
+        gt_boxes_valid = gt_boxes[gt_mask == 1]
+
+        gt_boxes_valid, points = augment_utils.global_scaling(
+            gt_boxes_valid, points, config['WORLD_SCALE_RANGE']
+        )
+        gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid
+
+        data_dict['object_bbx_center'] = gt_boxes
+        data_dict['object_bbx_mask'] = gt_mask
+        data_dict['lidar_np'] = points
+
+        return data_dict
+
+    def forward(self, data_dict):
+        """
+        Args:
+            data_dict:
+                points: (N, 3 + C_in)
+                gt_boxes: optional, (N, 7) [x, y, z, dx, dy, dz, heading]
+                gt_names: optional, (N), string
+                ...
+
+        Returns:
+        """
+        if self.train:
+            for cur_augmentor in self.data_augmentor_queue:
+                data_dict = cur_augmentor(data_dict=data_dict)
+
+        return data_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0fbdc4333c7b7cad70e442b811ceda71a8a9373
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__init__.py
@@ -0,0 +1,35 @@
+from opencood.data_utils.datasets.late_fusion_dataset import getLateFusionDataset
+from opencood.data_utils.datasets.late_heter_fusion_dataset import getLateheterFusionDataset
+from opencood.data_utils.datasets.late_multiclass_fusion_dataset import getLatemulticlassFusionDataset
+from opencood.data_utils.datasets.early_fusion_dataset import getEarlyFusionDataset
+from opencood.data_utils.datasets.intermediate_fusion_dataset import getIntermediateFusionDataset
+from opencood.data_utils.datasets.intermediate_multiclass_fusion_dataset import getIntermediatemulticlassFusionDataset
+from opencood.data_utils.datasets.intermediate_2stage_fusion_dataset import getIntermediate2stageFusionDataset
+from opencood.data_utils.datasets.intermediate_heter_fusion_dataset import getIntermediateheterFusionDataset
+from opencood.data_utils.datasets.basedataset.opv2v_basedataset import OPV2VBaseDataset
+from opencood.data_utils.datasets.basedataset.v2xsim_basedataset import V2XSIMBaseDataset
+from opencood.data_utils.datasets.basedataset.dairv2x_basedataset import DAIRV2XBaseDataset
+from opencood.data_utils.datasets.basedataset.v2xset_basedataset import V2XSETBaseDataset
+from opencood.data_utils.datasets.basedataset.v2xverse_basedataset import V2XVERSEBaseDataset
+from opencood.data_utils.datasets.late_multiclass_fusion_dataset import getLatemulticlassFusionDataset
+from opencood.data_utils.datasets.early_multiclass_fusion_dataset import getEarlymulticlassFusionDataset
+
+def build_dataset(dataset_cfg, visualize=False, train=True):
+    fusion_name = dataset_cfg['fusion']['core_method']
+    dataset_name = dataset_cfg['fusion']['dataset']
+
+    assert fusion_name in ['late', 'lateheter', 'intermediate', 'intermediate2stage', 'intermediateheter', 'intermediatemulticlass', 'early', 'latemulticlass', 'earlymulticlass']
+    assert dataset_name in ['opv2v', 'v2xsim', 'dairv2x', 'v2xset', 'v2xverse']
+
+    fusion_dataset_func = "get" + fusion_name.capitalize() + "FusionDataset"
+    fusion_dataset_func = eval(fusion_dataset_func)
+    base_dataset_cls = dataset_name.upper() + "BaseDataset"
+    base_dataset_cls = eval(base_dataset_cls)
+
+    dataset = fusion_dataset_func(base_dataset_cls)(
+        params=dataset_cfg,
+        visualize=visualize,
+        train=train
+    )
+
+    return dataset
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c97ffb53483f77c963cfa88928cf4ee217152248
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/__init__.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_fusion_dataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a636d44ab6cf8d26897791599e383f68fa56ea55
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_fusion_dataset.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_multiclass_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_multiclass_fusion_dataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0de6a681dad9df9943a012a13a3a95f4b129dc05
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/early_multiclass_fusion_dataset.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_2stage_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_2stage_fusion_dataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8dfa895b8f48fe755dde61b409a1e3ce1477cfea
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_2stage_fusion_dataset.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_fusion_dataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2150a33bc33feef273fb9a702609df74f740510a
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_fusion_dataset.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_heter_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_heter_fusion_dataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..286561c63570b9b338cc7512289f1a4a38e7291f
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_heter_fusion_dataset.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_multiclass_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_multiclass_fusion_dataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..355a4bb6e683fd9a6756662a05659bfdc9550216
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/intermediate_multiclass_fusion_dataset.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_fusion_dataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e25f0750f45864c0eeecdd8cbc6dcbaabfeb23a1
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_fusion_dataset.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_heter_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_heter_fusion_dataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d2d63c6a5f0d650a81a6e6b28e40d6d6b8dfb258
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_heter_fusion_dataset.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_multiclass_fusion_dataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_multiclass_fusion_dataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..725417113b70f712c3a3f7d025af36da5f438053
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/__pycache__/late_multiclass_fusion_dataset.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/dairv2x_basedataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/dairv2x_basedataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3fe14c4aef2efa77fcb296e7c3134b76e50e2225
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/dairv2x_basedataset.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/opv2v_basedataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/opv2v_basedataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..80aa5cc0c8e851a06562c9a671ff808a835939a3
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/opv2v_basedataset.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xset_basedataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xset_basedataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8ba1765ac57e011e45d2e64479e9a046921cb8c4
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xset_basedataset.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xsim_basedataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xsim_basedataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ab1a85a5ac0b6f14520c7952aad357b630cd841d
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xsim_basedataset.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xverse_basedataset.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xverse_basedataset.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b69bab71d254e0dfd9ea6de4212cd670abf6fef8
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/__pycache__/v2xverse_basedataset.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/dairv2x_basedataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/dairv2x_basedataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..35d6641f358e7813adeb492680571ae7066eeebf
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/dairv2x_basedataset.py
@@ -0,0 +1,285 @@
+import os
+from collections import OrderedDict
+import cv2
+import h5py
+import torch
+import numpy as np
+from functools import partial
+from torch.utils.data import Dataset
+from PIL import Image
+import random
+import opencood.utils.pcd_utils as pcd_utils
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.hypes_yaml.yaml_utils import load_yaml
+from opencood.utils.pcd_utils import downsample_lidar_minimum
+from opencood.utils.camera_utils import load_camera_data, load_intrinsic_DAIR_V2X
+from opencood.utils.common_utils import read_json
+from opencood.utils.transformation_utils import tfm_to_pose, rot_and_trans_to_trasnformation_matrix
+from opencood.utils.transformation_utils import veh_side_rot_and_trans_to_trasnformation_matrix
+from opencood.utils.transformation_utils import inf_side_rot_and_trans_to_trasnformation_matrix
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+
+class DAIRV2XBaseDataset(Dataset):
+    def __init__(self, params, visualize, train=True):
+        self.params = params
+        self.visualize = visualize
+        self.train = train
+
+        self.pre_processor = build_preprocessor(params["preprocess"], train)
+        self.post_processor = build_postprocessor(params["postprocess"], train)
+        self.post_processor.generate_gt_bbx = self.post_processor.generate_gt_bbx_by_iou
+        if 'data_augment' in params: # late and early
+            self.data_augmentor = DataAugmentor(params['data_augment'], train)
+        else: # intermediate
+            self.data_augmentor = None
+
+        if 'clip_pc' in params['fusion']['args'] and params['fusion']['args']['clip_pc']:
+            self.clip_pc = True
+        else:
+            self.clip_pc = False
+
+        if 'train_params' not in params or 'max_cav' not in params['train_params']:
+            self.max_cav = 2
+        else:
+            self.max_cav = params['train_params']['max_cav']
+
+        self.load_lidar_file = True if 'lidar' in params['input_source'] or self.visualize else False
+        self.load_camera_file = True if 'camera' in params['input_source'] else False
+        self.load_depth_file = True if 'depth' in params['input_source'] else False
+
+        assert self.load_depth_file is False
+
+        self.label_type = params['label_type'] # 'lidar' or 'camera'
+        self.generate_object_center = self.generate_object_center_lidar if self.label_type == "lidar" \
+                                                    else self.generate_object_center_camera
+
+        if self.load_camera_file:
+            self.data_aug_conf = params["fusion"]["args"]["data_aug_conf"]
+
+        if self.train:
+            split_dir = params['root_dir']
+        else:
+            split_dir = params['validate_dir']
+
+        self.root_dir = params['data_dir']
+
+        self.split_info = read_json(split_dir)
+        co_datainfo = read_json(os.path.join(self.root_dir, 'cooperative/data_info.json'))
+        self.co_data = OrderedDict()
+        for frame_info in co_datainfo:
+            veh_frame_id = frame_info['vehicle_image_path'].split("/")[-1].replace(".jpg", "")
+            self.co_data[veh_frame_id] = frame_info
+
+        if "noise_setting" not in self.params:
+            self.params['noise_setting'] = OrderedDict()
+            self.params['noise_setting']['add_noise'] = False
+    
+    def reinitialize(self):
+        pass
+
+    def retrieve_base_data(self, idx):
+        """
+        Given the index, return the corresponding data.
+        NOTICE!
+        It is different from Intermediate Fusion and Early Fusion
+        Label is not cooperative and loaded for both veh side and inf side.
+        Parameters
+        ----------
+        idx : int
+            Index given by dataloader.
+        Returns
+        -------
+        data : dict
+            The dictionary contains loaded yaml params and lidar data for
+            each cav.
+        """
+        veh_frame_id = self.split_info[idx]
+        frame_info = self.co_data[veh_frame_id]
+        system_error_offset = frame_info["system_error_offset"]
+        data = OrderedDict()
+
+        data[0] = OrderedDict()
+        data[0]['ego'] = True
+        data[1] = OrderedDict()
+        data[1]['ego'] = False
+
+        data[0]['params'] = OrderedDict()
+        data[1]['params'] = OrderedDict()
+        
+        # pose of agent 
+        lidar_to_novatel = read_json(os.path.join(self.root_dir,'vehicle-side/calib/lidar_to_novatel/'+str(veh_frame_id)+'.json'))
+        novatel_to_world = read_json(os.path.join(self.root_dir,'vehicle-side/calib/novatel_to_world/'+str(veh_frame_id)+'.json'))
+        transformation_matrix = veh_side_rot_and_trans_to_trasnformation_matrix(lidar_to_novatel, novatel_to_world)
+        data[0]['params']['lidar_pose'] = tfm_to_pose(transformation_matrix)
+
+        inf_frame_id = frame_info['infrastructure_image_path'].split("/")[-1].replace(".jpg", "")
+        virtuallidar_to_world = read_json(os.path.join(self.root_dir,'infrastructure-side/calib/virtuallidar_to_world/'+str(inf_frame_id)+'.json'))
+        transformation_matrix = inf_side_rot_and_trans_to_trasnformation_matrix(virtuallidar_to_world, system_error_offset)
+        data[1]['params']['lidar_pose'] = tfm_to_pose(transformation_matrix)
+
+        data[0]['params']['vehicles_front'] = read_json(os.path.join(self.root_dir,frame_info['cooperative_label_path'].replace("label_world", "label_world_backup"))) 
+        data[0]['params']['vehicles_all'] = read_json(os.path.join(self.root_dir,frame_info['cooperative_label_path'])) 
+
+        data[1]['params']['vehicles_front'] = [] # we only load cooperative label in vehicle side
+        data[1]['params']['vehicles_all'] = [] # we only load cooperative label in vehicle side
+
+        if self.load_camera_file:
+            data[0]['camera_data'] = load_camera_data([os.path.join(self.root_dir, frame_info["vehicle_image_path"])])
+            data[0]['params']['camera0'] = OrderedDict()
+            data[0]['params']['camera0']['extrinsic'] = rot_and_trans_to_trasnformation_matrix( \
+                                            read_json(os.path.join(self.root_dir, 'vehicle-side/calib/lidar_to_camera/'+str(veh_frame_id)+'.json')))
+            data[0]['params']['camera0']['intrinsic'] = load_intrinsic_DAIR_V2X( \
+                                            read_json(os.path.join(self.root_dir, 'vehicle-side/calib/camera_intrinsic/'+str(veh_frame_id)+'.json')))
+            
+            data[1]['camera_data']= load_camera_data([os.path.join(self.root_dir,frame_info["infrastructure_image_path"])])
+            data[1]['params']['camera0'] = OrderedDict()
+            data[1]['params']['camera0']['extrinsic'] = rot_and_trans_to_trasnformation_matrix( \
+                                            read_json(os.path.join(self.root_dir, 'infrastructure-side/calib/virtuallidar_to_camera/'+str(inf_frame_id)+'.json')))
+            data[1]['params']['camera0']['intrinsic'] = load_intrinsic_DAIR_V2X( \
+                                            read_json(os.path.join(self.root_dir, 'infrastructure-side/calib/camera_intrinsic/'+str(inf_frame_id)+'.json')))
+
+
+        if self.load_lidar_file or self.visualize:
+            data[0]['lidar_np'], _ = pcd_utils.read_pcd(os.path.join(self.root_dir,frame_info["vehicle_pointcloud_path"]))
+            data[1]['lidar_np'], _ = pcd_utils.read_pcd(os.path.join(self.root_dir,frame_info["infrastructure_pointcloud_path"]))
+
+
+        # Label for single side
+        data[0]['params']['vehicles_single_front'] = read_json(os.path.join(self.root_dir, \
+                                'vehicle-side/label/lidar_backup/{}.json'.format(veh_frame_id)))
+        data[0]['params']['vehicles_single_all'] = read_json(os.path.join(self.root_dir, \
+                                'vehicle-side/label/lidar/{}.json'.format(veh_frame_id)))
+        data[1]['params']['vehicles_single_front'] = read_json(os.path.join(self.root_dir, \
+                                'infrastructure-side/label/virtuallidar/{}.json'.format(inf_frame_id)))
+        data[1]['params']['vehicles_single_all'] = read_json(os.path.join(self.root_dir, \
+                                'infrastructure-side/label/virtuallidar/{}.json'.format(inf_frame_id)))
+
+        if getattr(self, "heterogeneous", False):
+            self.generate_object_center_lidar = \
+                                partial(self.generate_object_center_single_hetero, modality='lidar')
+            self.generate_object_center_camera = \
+                                partial(self.generate_object_center_single_hetero, modality='camera')
+
+            # by default
+            data[0]['modality_name'] = 'm1'
+            data[1]['modality_name'] = 'm2'
+            # veh cam inf lidar
+            data[0]['modality_name'] = 'm2'
+            data[1]['modality_name'] = 'm1'
+
+            if self.train: # randomly choose LiDAR or Camera to be Ego
+                p = np.random.rand()
+                if p > 0.5:
+                    data[0], data[1] = data[1], data[0]
+                    data[0]['ego'] = True
+                    data[1]['ego'] = False
+            else:
+                # evaluate, the agent of ego modality should be ego
+                if self.adaptor.mapping_dict[data[0]['modality_name']] not in self.ego_modality and \
+                    self.adaptor.mapping_dict[data[1]['modality_name']] in self.ego_modality:
+                    data[0], data[1] = data[1], data[0]
+                    data[0]['ego'] = True
+                    data[1]['ego'] = False
+
+            data[0]['modality_name'] = self.adaptor.reassign_cav_modality(data[0]['modality_name'], 0)
+            data[1]['modality_name'] = self.adaptor.reassign_cav_modality(data[1]['modality_name'], 1)
+            
+
+        return data
+
+
+    def __len__(self):
+        return len(self.split_info)
+
+    def __getitem__(self, idx):
+        pass
+
+
+    def generate_object_center_lidar(self,
+                               cav_contents,
+                               reference_lidar_pose):
+        """
+        reference lidar 's coordinate 
+        """
+        for cav_content in cav_contents:
+            cav_content['params']['vehicles'] = cav_content['params']['vehicles_all']
+        return self.post_processor.generate_object_center_dairv2x(cav_contents,
+                                                        reference_lidar_pose)
+
+    def generate_object_center_camera(self,
+                               cav_contents,
+                               reference_lidar_pose):
+        """
+        reference lidar 's coordinate 
+        """
+        for cav_content in cav_contents:
+            cav_content['params']['vehicles'] = cav_content['params']['vehicles_front']
+        return self.post_processor.generate_object_center_dairv2x(cav_contents,
+                                                        reference_lidar_pose)
+                                                        
+    ### Add new func for single side
+    def generate_object_center_single(self,
+                               cav_contents,
+                               reference_lidar_pose,
+                               **kwargs):
+        """
+        veh or inf 's coordinate. 
+
+        reference_lidar_pose is of no use.
+        """
+        suffix = "_single"
+        for cav_content in cav_contents:
+            cav_content['params']['vehicles_single'] = \
+                    cav_content['params']['vehicles_single_front'] if self.label_type == 'camera' else \
+                    cav_content['params']['vehicles_single_all']
+        return self.post_processor.generate_object_center_dairv2x_single(cav_contents, suffix)
+
+    ### Add for heterogeneous, transforming the single label from self coord. to ego coord.
+    def generate_object_center_single_hetero(self,
+                                            cav_contents,
+                                            reference_lidar_pose, 
+                                            modality):
+        """
+        loading the object from single agent. 
+        
+        The same as *generate_object_center_single*, but it will transform the object to reference(ego) coordinate,
+        using reference_lidar_pose.
+        """
+        suffix = "_single"
+        for cav_content in cav_contents:
+            cav_content['params']['vehicles_single'] = \
+                    cav_content['params']['vehicles_single_front'] if modality == 'camera' else \
+                    cav_content['params']['vehicles_single_all']
+        return self.post_processor.generate_object_center_dairv2x_single_hetero(cav_contents, reference_lidar_pose, suffix)
+
+
+    def get_ext_int(self, params, camera_id):
+        lidar_to_camera = params["camera%d" % camera_id]['extrinsic'].astype(np.float32) # R_cw
+        camera_to_lidar = np.linalg.inv(lidar_to_camera) # R_wc
+        camera_intrinsic = params["camera%d" % camera_id]['intrinsic'].astype(np.float32
+        )
+        return camera_to_lidar, camera_intrinsic
+
+    def augment(self, lidar_np, object_bbx_center, object_bbx_mask):
+        """
+        Given the raw point cloud, augment by flipping and rotation.
+        Parameters
+        ----------
+        lidar_np : np.ndarray
+            (n, 4) shape
+        object_bbx_center : np.ndarray
+            (n, 7) shape to represent bbx's x, y, z, h, w, l, yaw
+        object_bbx_mask : np.ndarray
+            Indicate which elements in object_bbx_center are padded.
+        """
+        tmp_dict = {'lidar_np': lidar_np,
+                    'object_bbx_center': object_bbx_center,
+                    'object_bbx_mask': object_bbx_mask}
+        tmp_dict = self.data_augmentor.forward(tmp_dict)
+
+        lidar_np = tmp_dict['lidar_np']
+        object_bbx_center = tmp_dict['object_bbx_center']
+        object_bbx_mask = tmp_dict['object_bbx_mask']
+
+        return lidar_np, object_bbx_center, object_bbx_mask
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/opv2v_basedataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/opv2v_basedataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..8bf0662325c49fdbe7ee4c375873ab9632e1c5ac
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/opv2v_basedataset.py
@@ -0,0 +1,479 @@
+
+import os
+from collections import OrderedDict
+import cv2
+import h5py
+import torch
+import numpy as np
+from torch.utils.data import Dataset
+from PIL import Image
+import json
+import random
+import opencood.utils.pcd_utils as pcd_utils
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.hypes_yaml.yaml_utils import load_yaml
+from opencood.utils.camera_utils import load_camera_data
+from opencood.utils.transformation_utils import x1_to_x2
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+
+class OPV2VBaseDataset(Dataset):
+    def __init__(self, params, visualize, train=True):
+        self.params = params
+        self.visualize = visualize
+        self.train = train
+
+        self.pre_processor = build_preprocessor(params["preprocess"], train)
+        self.post_processor = build_postprocessor(params["postprocess"], train)
+        if 'data_augment' in params: # late and early
+            self.data_augmentor = DataAugmentor(params['data_augment'], train)
+        else: # intermediate
+            self.data_augmentor = None
+
+        if self.train:
+            root_dir = params['root_dir']
+        else:
+            root_dir = params['validate_dir']
+        self.root_dir = root_dir 
+        
+        print("Dataset dir:", root_dir)
+
+        if 'train_params' not in params or \
+                'max_cav' not in params['train_params']:
+            self.max_cav = 5
+        else:
+            self.max_cav = params['train_params']['max_cav']
+
+        self.load_lidar_file = True if 'lidar' in params['input_source'] or self.visualize else False
+        self.load_camera_file = True if 'camera' in params['input_source'] else False
+        self.load_depth_file = True if 'depth' in params['input_source'] else False
+
+        self.label_type = params['label_type'] # 'lidar' or 'camera'
+        self.generate_object_center = self.generate_object_center_lidar if self.label_type == "lidar" \
+                                            else self.generate_object_center_camera
+        self.generate_object_center_single = self.generate_object_center # will it follows 'self.generate_object_center' when 'self.generate_object_center' change?
+
+        if self.load_camera_file:
+            self.data_aug_conf = params["fusion"]["args"]["data_aug_conf"]
+
+        # by default, we load lidar, camera and metadata. But users may
+        # define additional inputs/tasks
+        self.add_data_extension = \
+            params['add_data_extension'] if 'add_data_extension' \
+                                            in params else []
+
+        if "noise_setting" not in self.params:
+            self.params['noise_setting'] = OrderedDict()
+            self.params['noise_setting']['add_noise'] = False
+
+        # first load all paths of different scenarios
+        scenario_folders = sorted([os.path.join(root_dir, x)
+                                   for x in os.listdir(root_dir) if
+                                   os.path.isdir(os.path.join(root_dir, x))])
+
+        self.scenario_folders = scenario_folders
+
+        self.reinitialize()
+
+
+    def reinitialize(self):
+        # Structure: {scenario_id : {cav_1 : {timestamp1 : {yaml: path,
+        # lidar: path, cameras:list of path}}}}
+        self.scenario_database = OrderedDict()
+        self.len_record = []
+
+        # loop over all scenarios
+        for (i, scenario_folder) in enumerate(self.scenario_folders):
+            self.scenario_database.update({i: OrderedDict()})
+
+            # at least 1 cav should show up
+            if self.train:
+                cav_list = [x for x in os.listdir(scenario_folder)
+                            if os.path.isdir(
+                        os.path.join(scenario_folder, x))]
+                # cav_list = sorted(cav_list)
+                random.shuffle(cav_list)
+            else:
+                cav_list = sorted([x for x in os.listdir(scenario_folder)
+                                   if os.path.isdir(
+                        os.path.join(scenario_folder, x))])
+            assert len(cav_list) > 0
+
+            """
+            roadside unit data's id is always negative, so here we want to
+            make sure they will be in the end of the list as they shouldn't
+            be ego vehicle.
+            """
+            if int(cav_list[0]) < 0:
+                cav_list = cav_list[1:] + [cav_list[0]]
+
+            """
+            make the first cav to be ego modality
+            """
+            if getattr(self, "heterogeneous", False):
+                scenario_name = scenario_folder.split("/")[-1]
+                cav_list = self.adaptor.reorder_cav_list(cav_list, scenario_name)
+
+
+            # loop over all CAV data
+            for (j, cav_id) in enumerate(cav_list):
+                if j > self.max_cav - 1:
+                    print('too many cavs reinitialize')
+                    break
+                self.scenario_database[i][cav_id] = OrderedDict()
+
+                # save all yaml files to the dictionary
+                cav_path = os.path.join(scenario_folder, cav_id)
+
+                yaml_files = \
+                    sorted([os.path.join(cav_path, x)
+                            for x in os.listdir(cav_path) if
+                            x.endswith('.yaml') and 'additional' not in x])
+                
+                # this timestamp is not ready
+                yaml_files = [x for x in yaml_files if not ("2021_08_20_21_10_24" in x and "000265" in x)]
+
+                timestamps = self.extract_timestamps(yaml_files)
+
+                for timestamp in timestamps:
+                    self.scenario_database[i][cav_id][timestamp] = \
+                        OrderedDict()
+                    yaml_file = os.path.join(cav_path,
+                                             timestamp + '.yaml')
+                    lidar_file = os.path.join(cav_path,
+                                              timestamp + '.pcd')
+                    camera_files = self.find_camera_files(cav_path, 
+                                                timestamp)
+                    depth_files = self.find_camera_files(cav_path, 
+                                                timestamp, sensor="depth")
+
+                    self.scenario_database[i][cav_id][timestamp]['yaml'] = \
+                        yaml_file
+                    self.scenario_database[i][cav_id][timestamp]['lidar'] = \
+                        lidar_file
+                    self.scenario_database[i][cav_id][timestamp]['cameras'] = \
+                        camera_files
+                    self.scenario_database[i][cav_id][timestamp]['depths'] = \
+                        depth_files
+
+                    if getattr(self, "heterogeneous", False):
+                        scenario_name = scenario_folder.split("/")[-1]
+
+                        cav_modality = self.adaptor.reassign_cav_modality(self.modality_assignment[scenario_name][cav_id] , j)
+
+                        self.scenario_database[i][cav_id][timestamp]['modality_name'] = cav_modality
+
+                        self.scenario_database[i][cav_id][timestamp]['lidar'] = \
+                            self.adaptor.switch_lidar_channels(cav_modality, lidar_file)
+
+
+                   # load extra data
+                    for file_extension in self.add_data_extension:
+                        file_name = \
+                            os.path.join(cav_path,
+                                         timestamp + '_' + file_extension)
+
+                        self.scenario_database[i][cav_id][timestamp][
+                            file_extension] = file_name                  
+
+                # Assume all cavs will have the same timestamps length. Thus
+                # we only need to calculate for the first vehicle in the 
+                # scene.
+                if j == 0:
+                    # we regard the agent with the minimum id as the ego
+                    self.scenario_database[i][cav_id]['ego'] = True
+                    if not self.len_record:
+                        self.len_record.append(len(timestamps))
+                    else:
+                        prev_last = self.len_record[-1]
+                        self.len_record.append(prev_last + len(timestamps))
+                else:
+                    self.scenario_database[i][cav_id]['ego'] = False
+
+
+    def retrieve_base_data(self, idx):
+        """
+        Given the index, return the corresponding data.
+
+        Parameters
+        ----------
+        idx : int
+            Index given by dataloader.
+
+        Returns
+        -------
+        data : dict
+            The dictionary contains loaded yaml params and lidar data for
+            each cav.
+        """
+        # we loop the accumulated length list to see get the scenario index
+        scenario_index = 0
+        for i, ele in enumerate(self.len_record):
+            if idx < ele:
+                scenario_index = i
+                break
+        scenario_database = self.scenario_database[scenario_index]
+
+        # check the timestamp index
+        timestamp_index = idx if scenario_index == 0 else \
+            idx - self.len_record[scenario_index - 1]
+        # retrieve the corresponding timestamp key
+        timestamp_key = self.return_timestamp_key(scenario_database,
+                                                  timestamp_index)
+        data = OrderedDict()
+        # load files for all CAVs
+        for cav_id, cav_content in scenario_database.items():
+            data[cav_id] = OrderedDict()
+            data[cav_id]['ego'] = cav_content['ego']
+
+            # load param file: json is faster than yaml
+            json_file = cav_content[timestamp_key]['yaml'].replace("yaml", "json")
+            if os.path.exists(json_file):
+                with open(json_file, "r") as f:
+                    data[cav_id]['params'] = json.load(f)
+            else:
+                data[cav_id]['params'] = \
+                    load_yaml(cav_content[timestamp_key]['yaml'])
+
+            # load camera file: hdf5 is faster than png
+            hdf5_file = cav_content[timestamp_key]['cameras'][0].replace("camera0.png", "imgs.hdf5")
+
+            if os.path.exists(hdf5_file):
+                with h5py.File(hdf5_file, "r") as f:
+                    data[cav_id]['camera_data'] = []
+                    data[cav_id]['depth_data'] = []
+                    for i in range(4):
+                        data[cav_id]['camera_data'].append(Image.fromarray(f[f'camera{i}'][()]))
+                        data[cav_id]['depth_data'].append(Image.fromarray(f[f'depth{i}'][()]))
+            else:
+                if self.load_camera_file:
+                    data[cav_id]['camera_data'] = \
+                        load_camera_data(cav_content[timestamp_key]['cameras'])
+                if self.load_depth_file:
+                    data[cav_id]['depth_data'] = \
+                        load_camera_data(cav_content[timestamp_key]['depths']) 
+
+            # load lidar file
+            if self.load_lidar_file or self.visualize:
+                data[cav_id]['lidar_np'] = \
+                    pcd_utils.pcd_to_np(cav_content[timestamp_key]['lidar'])
+
+            if getattr(self, "heterogeneous", False):
+                data[cav_id]['modality_name'] = cav_content[timestamp_key]['modality_name']
+
+            for file_extension in self.add_data_extension:
+                # if not find in the current directory
+                # go to additional folder
+                if not os.path.exists(cav_content[timestamp_key][file_extension]):
+                    cav_content[timestamp_key][file_extension] = cav_content[timestamp_key][file_extension].replace("train","additional/train")
+                    cav_content[timestamp_key][file_extension] = cav_content[timestamp_key][file_extension].replace("validate","additional/validate")
+                    cav_content[timestamp_key][file_extension] = cav_content[timestamp_key][file_extension].replace("test","additional/test")
+                    
+                if '.yaml' in file_extension:
+                    data[cav_id][file_extension] = \
+                        load_yaml(cav_content[timestamp_key][file_extension])
+                else:
+                    data[cav_id][file_extension] = \
+                        cv2.imread(cav_content[timestamp_key][file_extension])
+
+
+        return data
+
+    def __len__(self):
+        return self.len_record[-1]
+
+    def __getitem__(self, idx):
+        """
+        Abstract method, needs to be define by the children class.
+        """
+        pass
+
+    @staticmethod
+    def extract_timestamps(yaml_files):
+        """
+        Given the list of the yaml files, extract the mocked timestamps.
+
+        Parameters
+        ----------
+        yaml_files : list
+            The full path of all yaml files of ego vehicle
+
+        Returns
+        -------
+        timestamps : list
+            The list containing timestamps only.
+        """
+        timestamps = []
+
+        for file in yaml_files:
+            res = file.split('/')[-1]
+
+            timestamp = res.replace('.yaml', '')
+            timestamps.append(timestamp)
+
+        return timestamps
+
+    @staticmethod
+    def return_timestamp_key(scenario_database, timestamp_index):
+        """
+        Given the timestamp index, return the correct timestamp key, e.g.
+        2 --> '000078'.
+
+        Parameters
+        ----------
+        scenario_database : OrderedDict
+            The dictionary contains all contents in the current scenario.
+
+        timestamp_index : int
+            The index for timestamp.
+
+        Returns
+        -------
+        timestamp_key : str
+            The timestamp key saved in the cav dictionary.
+        """
+        # get all timestamp keys
+        timestamp_keys = list(scenario_database.items())[0][1]
+        # retrieve the correct index
+        timestamp_key = list(timestamp_keys.items())[timestamp_index][0]
+
+        return timestamp_key
+
+    @staticmethod
+    def find_camera_files(cav_path, timestamp, sensor="camera"):
+        """
+        Retrieve the paths to all camera files.
+
+        Parameters
+        ----------
+        cav_path : str
+            The full file path of current cav.
+
+        timestamp : str
+            Current timestamp
+
+        sensor : str
+            "camera" or "depth" 
+
+        Returns
+        -------
+        camera_files : list
+            The list containing all camera png file paths.
+        """
+        camera0_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}0.png')
+        camera1_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}1.png')
+        camera2_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}2.png')
+        camera3_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}3.png')
+        return [camera0_file, camera1_file, camera2_file, camera3_file]
+
+
+    def augment(self, lidar_np, object_bbx_center, object_bbx_mask):
+        """
+        Given the raw point cloud, augment by flipping and rotation.
+
+        Parameters
+        ----------
+        lidar_np : np.ndarray
+            (n, 4) shape
+
+        object_bbx_center : np.ndarray
+            (n, 7) shape to represent bbx's x, y, z, h, w, l, yaw
+
+        object_bbx_mask : np.ndarray
+            Indicate which elements in object_bbx_center are padded.
+        """
+        tmp_dict = {'lidar_np': lidar_np,
+                    'object_bbx_center': object_bbx_center,
+                    'object_bbx_mask': object_bbx_mask}
+        tmp_dict = self.data_augmentor.forward(tmp_dict)
+
+        lidar_np = tmp_dict['lidar_np']
+        object_bbx_center = tmp_dict['object_bbx_center']
+        object_bbx_mask = tmp_dict['object_bbx_mask']
+
+        return lidar_np, object_bbx_center, object_bbx_mask
+
+
+    def generate_object_center_lidar(self,
+                               cav_contents,
+                               reference_lidar_pose):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+        The object_bbx_center is in ego coordinate.
+
+        Notice: it is a wrap of postprocessor
+
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+            in fact it is used in get_item_single_car, so the list length is 1
+
+        reference_lidar_pose : list
+            The final target lidar pose with length 6.
+
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+        return self.post_processor.generate_object_center(cav_contents,
+                                                        reference_lidar_pose)
+
+    def generate_object_center_camera(self, 
+                                cav_contents, 
+                                reference_lidar_pose):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+        The object_bbx_center is in ego coordinate.
+
+        Notice: it is a wrap of postprocessor
+
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+            in fact it is used in get_item_single_car, so the list length is 1
+
+        reference_lidar_pose : list
+            The final target lidar pose with length 6.
+        
+        visibility_map : np.ndarray
+            for OPV2V, its 256*256 resolution. 0.39m per pixel. heading up.
+
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+        return self.post_processor.generate_visible_object_center(
+            cav_contents, reference_lidar_pose
+        )
+
+    def get_ext_int(self, params, camera_id):
+        camera_coords = np.array(params["camera%d" % camera_id]["cords"]).astype(
+            np.float32)
+        camera_to_lidar = x1_to_x2(
+            camera_coords, params["lidar_pose_clean"]
+        ).astype(np.float32)  # T_LiDAR_camera
+        camera_to_lidar = camera_to_lidar @ np.array(
+            [[0, 0, 1, 0], [1, 0, 0, 0], [0, -1, 0, 0], [0, 0, 0, 1]],
+            dtype=np.float32)  # UE4 coord to opencv coord
+        camera_intrinsic = np.array(params["camera%d" % camera_id]["intrinsic"]).astype(
+            np.float32
+        )
+        return camera_to_lidar, camera_intrinsic
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xset_basedataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xset_basedataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..52804df0ecde048154e0259457a0fb5df896a2d3
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xset_basedataset.py
@@ -0,0 +1,24 @@
+from opencood.data_utils.datasets.basedataset.opv2v_basedataset import OPV2VBaseDataset
+
+# All the same as OPV2V
+class V2XSETBaseDataset(OPV2VBaseDataset):
+    def __init__(self, params, visulize, train=True):
+        super().__init__(params, visulize, train)
+
+        if self.load_camera_file is True: # '2021_09_09_13_20_58'. This scenario has only 3 camera files?
+            scenario_folders_new = [x for x in self.scenario_folders if '2021_09_09_13_20_58' not in x]
+            self.scenario_folders = scenario_folders_new
+            self.reinitialize()
+
+
+    def generate_object_center_camera(self, 
+                                cav_contents, 
+                                reference_lidar_pose):
+        """
+        Since V2XSet has not release bev_visiblity map, we can only filter object by range.
+
+        Suppose the detection range of camera is within 50m
+        """
+        return self.post_processor.generate_object_center_v2xset_camera(
+            cav_contents, reference_lidar_pose
+        )
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xsim_basedataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xsim_basedataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ca114ffab6fa684bd25458bd16775bdb08a487f
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xsim_basedataset.py
@@ -0,0 +1,238 @@
+# Author: Yangheng Zhao <zhaoyangheng-sjtu@sjtu.edu.cn>
+import os
+import pickle
+from collections import OrderedDict
+from typing import Dict
+from abc import abstractmethod
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.utils.common_utils import read_json
+from opencood.utils.transformation_utils import tfm_to_pose
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+
+class V2XSIMBaseDataset(Dataset):
+    """
+        First version.
+        Load V2X-sim 2.0 using yifan lu's pickle file. 
+        Only support LiDAR data.
+    """
+
+    def __init__(self,
+                 params: Dict,
+                 visualize: bool = False,
+                 train: bool = True):
+        self.params = params
+        self.visualize = visualize
+        self.train = train
+
+        self.pre_processor = build_preprocessor(params["preprocess"], train)
+        self.post_processor = build_postprocessor(params["postprocess"], train)
+        if 'data_augment' in params: # late and early
+            self.data_augmentor = DataAugmentor(params['data_augment'], train)
+        else: # intermediate
+            self.data_augmentor = None
+
+        if self.train:
+            root_dir = params['root_dir']
+        else:
+            root_dir = params['validate_dir']
+        self.root_dir = root_dir
+
+        print("Dataset dir:", root_dir)
+
+        if 'train_params' not in params or \
+                'max_cav' not in params['train_params']:
+            self.max_cav = 5
+        else:
+            self.max_cav = params['train_params']['max_cav']
+
+        self.load_lidar_file = True if 'lidar' in params['input_source'] or self.visualize else False
+        self.load_camera_file = True if 'camera' in params['input_source'] else False
+        self.load_depth_file = True if 'depth' in params['input_source'] else False
+
+        self.label_type = params['label_type'] # 'lidar' or 'camera'
+        assert self.label_type in ['lidar', 'camera']
+
+        self.generate_object_center = self.generate_object_center_lidar if self.label_type == "lidar" \
+                                            else self.generate_object_center_camera
+        self.generate_object_center_single = self.generate_object_center
+
+        self.add_data_extension = \
+            params['add_data_extension'] if 'add_data_extension' \
+                                            in params else []
+
+        if "noise_setting" not in self.params:
+            self.params['noise_setting'] = OrderedDict()
+            self.params['noise_setting']['add_noise'] = False
+        
+        with open(self.root_dir, 'rb') as f:
+            dataset_info = pickle.load(f)
+        self.dataset_info_pkl = dataset_info
+
+        # TODO param: one as ego or all as ego?
+        self.ego_mode = 'one'  # "all"
+
+        self.reinitialize()
+
+    def reinitialize(self):
+        self.scene_database = OrderedDict()
+        if self.ego_mode == 'one':
+            self.len_record = len(self.dataset_info_pkl)
+        else:
+            raise NotImplementedError(self.ego_mode)
+
+        for i, scene_info in enumerate(self.dataset_info_pkl):
+            self.scene_database.update({i: OrderedDict()})
+            cav_num = scene_info['agent_num']
+            assert cav_num > 0
+
+            if self.train:
+                cav_ids = 1 + np.random.permutation(cav_num)
+            else:
+                cav_ids = list(range(1, cav_num + 1))
+            
+
+            for j, cav_id in enumerate(cav_ids):
+                if j > self.max_cav - 1:
+                    print('too many cavs reinitialize')
+                    break
+
+                self.scene_database[i][cav_id] = OrderedDict()
+
+                self.scene_database[i][cav_id]['ego'] = j==0
+
+                self.scene_database[i][cav_id]['lidar'] = scene_info[f'lidar_path_{cav_id}']
+                # need to delete this line is running in /GPFS
+                self.scene_database[i][cav_id]['lidar'] = \
+                    self.scene_database[i][cav_id]['lidar'].replace("/GPFS/rhome/yifanlu/workspace/dataset/v2xsim2-complete", "dataset/V2X-Sim-2.0")
+
+                self.scene_database[i][cav_id]['params'] = OrderedDict()
+                self.scene_database[i][cav_id][
+                    'params']['lidar_pose'] = tfm_to_pose(
+                        scene_info[f"lidar_pose_{cav_id}"]
+                    )  # [x, y, z, roll, pitch, yaw]
+                self.scene_database[i][cav_id]['params'][
+                    'vehicles'] = scene_info[f'labels_{cav_id}'][
+                        'gt_boxes_global']
+                self.scene_database[i][cav_id]['params'][
+                    'object_ids'] = scene_info[f'labels_{cav_id}'][
+                        'gt_object_ids'].tolist()
+
+    def __len__(self) -> int:
+        return self.len_record
+
+    @abstractmethod
+    def __getitem__(self, index):
+        pass
+
+    def retrieve_base_data(self, idx):
+        """
+        Given the index, return the corresponding data.
+
+        Parameters
+        ----------
+        idx : int
+            Index given by dataloader.
+
+        Returns
+        -------
+        data : dict
+            The dictionary contains loaded yaml params and lidar data for
+            each cav.
+        """
+
+        data = OrderedDict()
+        # {
+        #     'cav_id0':{
+        #         'ego': bool,
+        #         'params': {
+        #           'lidar_pose': [x, y, z, roll, pitch, yaw],
+        #           'vehicles':{
+        #                   'id': {'angle', 'center', 'extent', 'location'},
+        #                   ...
+        #               }
+        #           },# 包含agent位置信息和object信息
+        #         'camera_data':,
+        #         'depth_data':,
+        #         'lidar_np':,
+        #         ...
+        #     }
+        #     'cav_id1': ,
+        #     ...
+        # }
+        scene = self.scene_database[idx]
+        for cav_id, cav_content in scene.items():
+            data[f'{cav_id}'] = OrderedDict()
+            data[f'{cav_id}']['ego'] = cav_content['ego']
+
+            data[f'{cav_id}']['params'] = cav_content['params']
+
+            # load the corresponding data into the dictionary
+            nbr_dims = 4  # x,y,z,intensity
+            scan = np.fromfile(cav_content['lidar'], dtype='float32')
+            points = scan.reshape((-1, 5))[:, :nbr_dims]
+            data[f'{cav_id}']['lidar_np'] = points
+
+        return data
+
+    def generate_object_center_lidar(self, cav_contents, reference_lidar_pose):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+
+        Notice: it is a wrap of postprocessor function
+
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+            in fact it is used in get_item_single_car, so the list length is 1
+
+        reference_lidar_pose : list
+            The final target lidar pose with length 6.
+
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+
+        return self.post_processor.generate_object_center_v2x(
+            cav_contents, reference_lidar_pose)
+
+    def generate_object_center_camera(self, cav_contents, reference_lidar_pose):
+        raise NotImplementedError()
+
+    def augment(self, lidar_np, object_bbx_center, object_bbx_mask):
+        """
+        Given the raw point cloud, augment by flipping and rotation.
+
+        Parameters
+        ----------
+        lidar_np : np.ndarray
+            (n, 4) shape
+
+        object_bbx_center : np.ndarray
+            (n, 7) shape to represent bbx's x, y, z, h, w, l, yaw
+
+        object_bbx_mask : np.ndarray
+            Indicate which elements in object_bbx_center are padded.
+        """
+        tmp_dict = {'lidar_np': lidar_np,
+                    'object_bbx_center': object_bbx_center,
+                    'object_bbx_mask': object_bbx_mask}
+        tmp_dict = self.data_augmentor.forward(tmp_dict)
+
+        lidar_np = tmp_dict['lidar_np']
+        object_bbx_center = tmp_dict['object_bbx_center']
+        object_bbx_mask = tmp_dict['object_bbx_mask']
+
+        return lidar_np, object_bbx_center, object_bbx_mask
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xverse_basedataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xverse_basedataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ef2a524169daab2f87c3e210921d8a89990689c
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/basedataset/v2xverse_basedataset.py
@@ -0,0 +1,1118 @@
+
+import os
+from collections import OrderedDict
+import cv2
+import h5py
+import torch
+import torchvision
+import numpy as np
+from torch.utils.data import Dataset
+from PIL import Image
+import json
+import random
+import re
+import math
+
+import logging
+_logger = logging.getLogger(__name__)
+
+import opencood.utils.pcd_utils as pcd_utils
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.hypes_yaml.yaml_utils import load_yaml
+from opencood.utils.camera_utils import load_camera_data
+from opencood.utils.transformation_utils import x1_to_x2
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+
+
+class V2XVERSEBaseDataset(Dataset):
+    def __init__(self, params, visualize, train=True):
+        self.params = params
+        self.visualize = visualize
+        self.train = train
+
+        self.pre_processor = build_preprocessor(params["preprocess"], train)
+        self.post_processor = build_postprocessor(params["postprocess"], train)
+        self.data_augmentor = DataAugmentor(params['data_augment'],
+                                            train)
+
+        self.frame_gap = params.get('frame_gap',200)
+        self.time_delay = params.get('time_delay',0)
+
+        if 'target_assigner_config' in self.params['loss']['args']:
+            self.det_range = self.params['loss']['args']['target_assigner_config']['cav_lidar_range'] # [-36, -36, -22, 36, 36, 14]
+        else:
+            self.det_range = [-36, -36, -22, 36, 36, 14]
+
+        if self.time_delay % self.frame_gap != 0:
+            print("Time delay of v2xverse dataset should be a multiple of frame_gap !")
+        self.frame_delay = int(self.time_delay / self.frame_gap)
+        print(f'*** time_delay = {self.time_delay} ***')
+
+        self.test_flag = False
+        if self.train:
+            root_dir = params['root_dir']
+            towns = [1,2,3,4,6]
+        elif not visualize:
+            root_dir = params['validate_dir']
+            towns = [7,10] # [6,7,8,9,10]
+        else:
+            root_dir = params['test_dir']
+            towns = [5]
+            self.test_flag = True
+        self.root_dir = root_dir 
+        self.clock = 0
+
+        print("Dataset dir:", root_dir)
+
+        if 'train_params' not in params or \
+                'max_cav' not in params['train_params']:
+            self.max_cav = 5
+        else:
+            self.max_cav = params['train_params']['max_cav']
+
+        self.load_lidar_file = True if 'lidar' in params['input_source'] or self.visualize else False
+        self.load_camera_file = True if 'camera' in params['input_source'] else False
+        self.load_depth_file = True if 'depth' in params['input_source'] else False
+
+        self.label_type = params['label_type'] # 'lidar' or 'camera'
+        self.generate_object_center = self.generate_object_center_lidar if self.label_type == "lidar" \
+                                            else self.generate_object_center_camera
+        self.generate_object_center_single = self.generate_object_center # will it follows 'self.generate_object_center' when 'self.generate_object_center' change?
+
+        if self.load_camera_file:
+            self.data_aug_conf = params["fusion"]["args"]["data_aug_conf"]
+
+        # by default, we load lidar, camera and metadata. But users may
+        # define additional inputs/tasks
+        self.add_data_extension = \
+            params['add_data_extension'] if 'add_data_extension' \
+                                            in params else []
+
+        if "noise_setting" not in self.params:
+            self.params['noise_setting'] = OrderedDict()
+            self.params['noise_setting']['add_noise'] = False
+
+        if root_dir is None:
+            print('Not loading from an existing dataset!')
+            return
+        if not os.path.exists(root_dir):
+            print('Dataset path do not exists!')
+            return
+
+        # first load all paths of different scenarios
+        scenario_folders = sorted([os.path.join(root_dir, x)
+                                   for x in os.listdir(root_dir) if
+                                   os.path.isdir(os.path.join(root_dir, x))])
+        self.scenario_folders = scenario_folders
+
+        #################################
+        ## v2xverse data load
+        #################################
+
+        self.rsu_change_frame = 25
+        self.route_frames = []
+
+        data_index_name = 'dataset_index.txt'
+        if 'index_file' in self.params:
+            data_index_name = self.params['index_file'] + '.txt'
+        print('data_index_name:', data_index_name)
+        dataset_indexs = self._load_text(data_index_name).split('\n')
+
+        filter_file = None
+        if 'filte_danger' in self.params:
+            if os.path.exists(os.path.join(self.root_dir,self.params['filte_danger'])):
+                filter_file = self._load_json(self.params['filte_danger'])
+
+        weathers = [0,1,2,3,4,5,6,7,8,9,10]
+        pattern = re.compile('weather-(\d+).*town(\d\d)')
+        for line in dataset_indexs:
+            if len(line.split()) != 3:
+                continue
+            path, frames, egos = line.split()
+            route_path = os.path.join(self.root_dir, path)
+            frames = int(frames)
+            res = pattern.findall(path)
+            if len(res) != 1:
+                continue
+            weather = int(res[0][0])
+            town = int(res[0][1])            
+            if weather not in weathers or town not in towns:
+                continue
+
+            files = os.listdir(route_path)
+            ego_files = [file for file in files if file.startswith('ego')]
+            rsu_files = [file for file in files if file.startswith('rsu')]
+
+            # recompute rsu change frames
+            file_len_list = []
+            if len(rsu_files) > 0:
+                for rsu_file in ['rsu_1000', 'rsu_1001']:
+                    if rsu_file in rsu_files:
+                        rsu_frame_len = len(os.listdir(os.path.join(route_path,rsu_file,'measurements')))
+                        file_len_list.append(rsu_frame_len)
+            self.rsu_change_frame = max(file_len_list) + 1
+
+            for j, file in enumerate(ego_files):
+                ego_path = os.path.join(path, file)
+                others_list = ego_files[:j]+ego_files[j+1:]
+                others_path_list = []
+                for others in others_list:
+                    others_path_list.append(os.path.join(path, others))
+
+                for i in range(frames):
+                    # reduce the ratio of frames not at junction
+                    if filter_file is not None:
+                        danger_frame_flag = False
+                        for route_id in filter_file:
+                            if route_path.endswith(filter_file[route_id]['sub_path']):
+                                for junction_range in filter_file[route_id]['selected_frames'][file]:
+                                    if i > junction_range[0] and i < junction_range[1]+15:
+                                        danger_frame_flag = True
+                        if (not danger_frame_flag):
+                            continue
+                    scene_dict = {}
+                    scene_dict['ego'] = ego_path
+                    scene_dict['other_egos'] = others_path_list
+                    scene_dict['num_car'] = len(ego_files)
+                    scene_dict['rsu'] = []
+                    # order of rsu
+                    if i%self.rsu_change_frame != 0  and len(rsu_files)>0:
+                        order = int(i/self.rsu_change_frame)+1 #  int(i/10)+1 
+                        rsu_path = 'rsu_{}00{}'.format(order, ego_path[-1])
+                        if True: # os.path.exists(os.path.join(route_path, rsu_path,'measurements','{}.json'.format(str(i).zfill(4)))):
+                            scene_dict['rsu'].append(os.path.join(path, rsu_path))
+
+                    self.route_frames.append((scene_dict, i)) # (scene_dict, i)
+        self.label_mode = self.params.get('label_mode', 'v2xverse')
+        self.first_det = False
+        print("Sub route dir nums: %d" % len(self.route_frames))
+
+    def _load_text(self, path):
+        text = open(os.path.join(self.root_dir,path), 'r').read()
+        return text
+
+    def _load_image(self, path):
+        trans_totensor = torchvision.transforms.ToTensor()
+        trans_toPIL = torchvision.transforms.ToPILImage()
+        try:
+            img = Image.open(os.path.join(self.root_dir,path))
+            img_tensor = trans_totensor(img)
+            img_PIL = trans_toPIL(img_tensor)
+        except Exception as e:
+            _logger.info(path)
+            n = path[-8:-4]
+            new_path = path[:-8] + "%04d.jpg" % (int(n) - 1)
+            img = Image.open(os.path.join(self.root_dir,new_path))
+            img_tensor = trans_totensor(img)
+            img_PIL = trans_toPIL(img_tensor)
+        return img_PIL
+
+    def _load_json(self, path):
+        try:
+            json_value = json.load(open(os.path.join(self.root_dir,path)))
+        except Exception as e:
+            _logger.info(path)
+            n = path[-9:-5]
+            new_path = path[:-9] + "%04d.json" % (int(n) - 1)
+            json_value = json.load(open(os.path.join(self.root_dir,new_path)))
+        return json_value
+
+    def _load_npy(self, path):
+        try:
+            array = np.load(os.path.join(self.root_dir,path), allow_pickle=True)
+        except Exception as e:
+            _logger.info(path)
+            n = path[-8:-4]
+            new_path = path[:-8] + "%04d.npy" % (int(n) - 1)
+            array = np.load(os.path.join(self.root_dir,new_path), allow_pickle=True)
+        return array
+
+    def get_one_record(self, route_dir, frame_id, agent='ego', visible_actors=None, tpe='all', extra_source=None):
+        '''
+        Parameters
+        ----------
+        scene_dict: str, index given by dataloader.
+        frame_id: int, frame id.
+
+        Returns
+        -------
+        data:  
+            structure: dict{
+                ####################
+                # input to the model
+                ####################
+                'agent': 'ego' or 'other_ego', # whether it is the ego car
+                'rgb_[direction]': torch.Tenser, # direction in [left, right, center], shape (3, 128, 128)
+                'rgb': torch.Tensor, front rgb image , # shape (3, 224, 224) 
+                'measurements': torch.Tensor, size [7]: the first 6 dims is the onehot vector of command, and the last dim is car speed
+                'command': int, 0-5, discrete command signal 0:left, 1:right, 2:straight, 
+                                                    # 3: lane follow, 4:lane change left, 5: lane change right
+                'pose': np.array, shape(3,), lidar pose[gps_x, gps_y, theta]
+                'detmap_pose': pose for density map
+                'target_point': torch.Tensor, size[2], (x,y) coordinate in the left hand coordinate system,
+                                                                 where X-axis towards right side of the car
+                'lidar': np.ndarray, # shape (3, 224, 224), 2D projection of lidar, range x:[-28m, 28m], y:[-28m,28m]
+                                        in the right hand coordinate system with X-axis towards left of car
+                ####################
+                # target of model
+                ####################
+                'img_traffic': not yet used in model
+                'command_waypoints': torch.Tensor, size[10,2], 10 (x,y) coordinates in the same coordinate system with target point
+                'is_junction': int, 0 or 1, 1 means the car is at junction
+                'traffic_light_state': int, 0 or 1
+                'det_data': np.array, (400,7), flattened density map, 7 feature dims corresponds to 
+                                                [prob_obj, box bias_X, box bias_Y, box_orientation, l, w, speed]
+                'img_traj': not yet used in model
+                'stop_sign': int, 0 or 1, exist of stop sign
+        },
+        '''
+
+        output_record = OrderedDict()
+
+        if agent == 'ego':
+            output_record['ego'] = True
+        else:
+            output_record['ego'] = False
+
+        BEV = None
+
+        if route_dir is not None:
+            measurements = self._load_json(os.path.join(route_dir, "measurements", "%04d.json" % frame_id))
+            actors_data = self._load_json(os.path.join(route_dir, "actors_data", "%04d.json" % frame_id))
+        elif extra_source is not None:
+            if 'actors_data' in extra_source:
+                actors_data = extra_source['actors_data']
+            else:
+                actors_data = {}
+            measurements = extra_source['measurements']
+
+        ego_loc = np.array([measurements['x'], measurements['y']])
+        output_record['params'] = {}
+        
+        cam_list = ['front','right','left','rear']
+        cam_angle_list = [0, 60, -60, 180]
+        for cam_id in range(4):
+            output_record['params']['camera{}'.format(cam_id)] = {}
+            output_record['params']['camera{}'.format(cam_id)]['cords'] = [measurements['x'], measurements['y'], 1.0,\
+	 						                                                0,measurements['theta']/np.pi*180+cam_angle_list[cam_id],0]
+            output_record['params']['camera{}'.format(cam_id)]['extrinsic'] = measurements['camera_{}_extrinsics'.format(cam_list[cam_id])]
+            output_record['params']['camera{}'.format(cam_id)]['intrinsic'] = measurements['camera_{}_intrinsics'.format(cam_list[cam_id])]
+
+        if 'speed' in measurements:
+            output_record['params']['ego_speed'] = measurements['speed']*3.6
+        else:
+            output_record['params']['ego_speed'] = 0
+
+        output_record['params']['lidar_pose'] = \
+                        [measurements['lidar_pose_x'], measurements['lidar_pose_y'], 0, \
+                        0,measurements['theta']/np.pi*180-90,0]
+        self.distance_to_map_center = (self.det_range[3]-self.det_range[0])/2+self.det_range[0]
+        output_record['params']['map_pose'] = \
+                        [measurements['lidar_pose_x'] + self.distance_to_map_center*np.cos(measurements["theta"]-np.pi/2),
+                         measurements['lidar_pose_y'] + self.distance_to_map_center*np.sin(measurements["theta"]-np.pi/2), 0, \
+                        0,measurements['theta']/np.pi*180-90,0]
+        detmap_pose_x = measurements['lidar_pose_x'] + self.distance_to_map_center*np.cos(measurements["theta"]-np.pi/2)
+        detmap_pose_y = measurements['lidar_pose_y'] + self.distance_to_map_center*np.sin(measurements["theta"]-np.pi/2)
+        detmap_theta = measurements["theta"] + np.pi/2
+        output_record['detmap_pose'] = np.array([-detmap_pose_y, detmap_pose_x, detmap_theta])
+        output_record['params']['lidar_pose_clean'] = output_record['params']['lidar_pose']
+        output_record['params']['plan_trajectory'] = []
+        output_record['params']['true_ego_pos'] = \
+                        [measurements['lidar_pose_x'], measurements['lidar_pose_y'], 0, \
+                         0,measurements['theta']/np.pi*180,0]
+        output_record['params']['predicted_ego_pos'] = \
+                        [measurements['lidar_pose_x'], measurements['lidar_pose_y'], 0, \
+                        0,measurements['theta']/np.pi*180,0]
+        
+        if tpe == 'all':
+            if route_dir is not None:
+                lidar = self._load_npy(os.path.join(route_dir, "lidar", "%04d.npy" % frame_id))
+                output_record['rgb_front'] = self._load_image(os.path.join(route_dir, "rgb_front", "%04d.jpg" % frame_id))
+                output_record['rgb_left'] = self._load_image(os.path.join(route_dir, "rgb_left", "%04d.jpg" % frame_id))
+                output_record['rgb_right'] = self._load_image(os.path.join(route_dir, "rgb_right", "%04d.jpg" % frame_id))
+                output_record['rgb_rear'] = self._load_image(os.path.join(route_dir, "rgb_rear", "%04d.jpg" % frame_id))
+                if agent != 'rsu':
+                    BEV = self._load_image(os.path.join(route_dir, "birdview", "%04d.jpg" % frame_id))
+            elif extra_source is not None:
+                lidar = extra_source['lidar']
+                if 'rgb_front' in extra_source:
+                    output_record['rgb_front'] = extra_source['rgb_front']
+                    output_record['rgb_left'] = extra_source['rgb_left']
+                    output_record['rgb_right'] = extra_source['rgb_right']
+                    output_record['rgb_rear'] = extra_source['rgb_rear']
+                else:
+                    output_record['rgb_front'] = None
+                    output_record['rgb_left'] = None
+                    output_record['rgb_right'] = None
+                    output_record['rgb_rear'] = None
+                BEV = None
+
+            output_record['lidar_np'] = lidar
+            lidar_transformed = np.zeros((output_record['lidar_np'].shape))
+            lidar_transformed[:,0] = output_record['lidar_np'][:,1]
+            lidar_transformed[:,1] = -output_record['lidar_np'][:,0]
+            lidar_transformed[:,2:] = output_record['lidar_np'][:,2:]
+            output_record['lidar_np'] = lidar_transformed.astype(np.float32)
+            output_record['lidar_np'][:, 2] += measurements['lidar_pose_z']
+
+        if visible_actors is not None:
+            actors_data = self.filter_actors_data_according_to_visible(actors_data, visible_actors)
+
+        ################ LSS debug TODO: clean up this function #####################
+        if not self.first_det:
+            import copy
+            if True: # agent=='rsu':
+                measurements["affected_light_id"] = -1
+                measurements["is_vehicle_present"] = []
+                measurements["is_bike_present"] = []
+                measurements["is_junction_vehicle_present"] = []
+                measurements["is_pedestrian_present"] = []
+                measurements["future_waypoints"] = []
+            cop3_range = [36,12,12,12, 0.25]
+            heatmap = generate_heatmap_multiclass(
+                copy.deepcopy(measurements), copy.deepcopy(actors_data), max_distance=36
+            )
+            self.det_data = (
+                generate_det_data_multiclass(
+                    heatmap, copy.deepcopy(measurements), copy.deepcopy(actors_data), cop3_range
+                )
+                .reshape(3, int((cop3_range[0]+cop3_range[1])/cop3_range[4]
+                            *(cop3_range[2]+cop3_range[3])/cop3_range[4]), -1) #(2, H*W,7)
+                .astype(np.float32)
+            )
+            self.first_det = True
+            if self.label_mode == 'cop3':
+                self.first_det = False
+        output_record['det_data'] = self.det_data
+        ##############################################################
+        if agent == 'rsu' :
+            for actor_id in actors_data.keys():
+                if actors_data[actor_id]['tpe'] == 0:
+                    box = actors_data[actor_id]['box']
+                    if abs(box[0]-0.8214) < 0.01 and abs(box[1]-0.18625) < 0.01 :
+                        actors_data[actor_id]['tpe'] = 3
+
+        output_record['params']['vehicles'] = {}
+        for actor_id in actors_data.keys():
+
+            ######################
+            ## debug
+            ######################
+            # if agent == 'ego':
+            #     continue
+
+            if tpe in [0, 1, 3]:
+                if actors_data[actor_id]['tpe'] != tpe:
+                    continue
+
+            # exclude ego car
+            loc_actor = np.array(actors_data[actor_id]['loc'][0:2])
+            dis = np.linalg.norm(ego_loc - loc_actor)
+            if dis < 0.1:
+                continue
+
+            if not ('box' in actors_data[actor_id].keys() and 'ori' in actors_data[actor_id].keys() and 'loc' in actors_data[actor_id].keys()):
+                continue
+            output_record['params']['vehicles'][actor_id] = {}
+            output_record['params']['vehicles'][actor_id]['tpe'] = actors_data[actor_id]['tpe']
+            yaw = math.degrees(math.atan(actors_data[actor_id]['ori'][1]/actors_data[actor_id]['ori'][0]))
+            pitch = math.degrees(math.asin(actors_data[actor_id]['ori'][2]))
+            output_record['params']['vehicles'][actor_id]['angle'] = [0,yaw,pitch]
+            output_record['params']['vehicles'][actor_id]['center'] = [0,0,actors_data[actor_id]['box'][2]]
+            output_record['params']['vehicles'][actor_id]['extent'] = actors_data[actor_id]['box']
+            output_record['params']['vehicles'][actor_id]['location'] = [actors_data[actor_id]['loc'][0],actors_data[actor_id]['loc'][1],0]
+            output_record['params']['vehicles'][actor_id]['speed'] = 3.6 * math.sqrt(actors_data[actor_id]['vel'][0]**2+actors_data[actor_id]['vel'][1]**2 )
+
+        direction_list = ['front','left','right','rear']
+        theta_list = [0,-60,60,180]
+        dis_list = [0,0,0,-2.6]
+        camera_data_list = []
+        for i, direction in enumerate(direction_list):
+            if 'rgb_{}'.format(direction) in output_record:
+                camera_data_list.append(output_record['rgb_{}'.format(direction)])
+            dis_to_lidar = dis_list[i]
+            output_record['params']['camera{}'.format(i)]['cords'] = \
+                                                                    [measurements['x'] + dis_to_lidar*np.sin(measurements['theta']), measurements['y'] - dis_to_lidar*np.cos(measurements['theta']), 2.3,\
+                                                                    0,measurements['theta']/np.pi*180 - 90  + theta_list[i],0]
+            output_record['params']['camera{}'.format(i)]['extrinsic'] = measurements['camera_{}_extrinsics'.format(direction_list[i])]
+            output_record['params']['camera{}'.format(i)]['intrinsic'] = measurements['camera_{}_intrinsics'.format(direction_list[i])]
+        output_record['camera_data'] = camera_data_list
+        bev_visibility_np = 255*np.ones((256,256,3), dtype=np.uint8)
+        output_record['bev_visibility.png'] = bev_visibility_np
+
+        if agent != 'rsu':
+            output_record['BEV'] = BEV
+        else:
+            output_record['BEV'] = None
+        return output_record
+
+    def filter_actors_data_according_to_visible(self, actors_data, visible_actors):
+        to_del_id = []
+        for actors_id in actors_data.keys():
+            if actors_id in visible_actors:
+                continue
+            to_del_id.append(actors_id)
+        for actors_id in to_del_id:
+            del actors_data[actors_id]
+        return actors_data
+
+    def get_visible_actors_one_term(self, route_dir, frame_id):
+        cur_visible_actors = []
+        actors_data = self._load_json(os.path.join(route_dir, "actors_data", "%04d.json" % frame_id))
+
+        for actors_id in actors_data:
+            if actors_data[actors_id]['tpe']==2:
+                continue
+            if not 'lidar_visible' in actors_data[actors_id]:
+                cur_visible_actors.append(actors_id)
+                print('Lose of lidar_visible!')
+                continue
+            if actors_data[actors_id]['lidar_visible']==1:
+                cur_visible_actors.append(actors_id)
+        return cur_visible_actors
+
+    def get_visible_actors(self, scene_dict, frame_id):
+        visible_actors = {} # id only
+        if self.test_flag:
+            visible_actors['car_0'] = None
+            for i, route_dir in enumerate(scene_dict['other_egos']):
+                visible_actors['car_{}'.format(i+1)] = None
+            for i, rsu_dir in enumerate(scene_dict['rsu']):
+                visible_actors['rsu_{}'.format(i)] = None
+        else:
+            visible_actors['car_0'] = self.get_visible_actors_one_term(scene_dict['ego'], frame_id)
+            if self.params['train_params']['max_cav'] > 1:
+                for i, route_dir in enumerate(scene_dict['other_egos']):
+                    visible_actors['car_{}'.format(i+1)] = self.get_visible_actors_one_term(route_dir, frame_id)
+                for i, rsu_dir in enumerate(scene_dict['rsu']):
+                    visible_actors['rsu_{}'.format(i)] = self.get_visible_actors_one_term(rsu_dir, frame_id)
+            for keys in visible_actors:
+                visible_actors[keys] = list(set(visible_actors[keys]))
+        return visible_actors
+
+    def retrieve_base_data(self, idx, tpe='all', extra_source=None, data_dir=None):
+        if extra_source is None:
+            if data_dir is not None:
+                scene_dict, frame_id = data_dir
+            else:
+                scene_dict, frame_id = self.route_frames[idx]
+            frame_id_latency = frame_id - self.frame_delay
+            visible_actors = None
+            visible_actors = self.get_visible_actors(scene_dict, frame_id)
+            data = OrderedDict()
+            data['car_0'] = self.get_one_record(scene_dict['ego'], frame_id , agent='ego', visible_actors=visible_actors['car_0'], tpe=tpe)
+            if self.params['train_params']['max_cav'] > 1:
+                for i, route_dir in enumerate(scene_dict['other_egos']):
+                    try:
+                        data['car_{}'.format(i+1)] = self.get_one_record(route_dir, frame_id_latency , agent='other_ego', visible_actors=visible_actors['car_{}'.format(i+1)], tpe=tpe)
+                    except:
+                        print('load other ego failed')
+                        continue
+            if self.params['train_params']['max_cav'] > 2:
+                for i, rsu_dir in enumerate(scene_dict['rsu']):
+                    try:
+                        data['rsu_{}'.format(i)] = self.get_one_record(rsu_dir, frame_id_latency, agent='rsu', visible_actors=visible_actors['rsu_{}'.format(i)], tpe=tpe)
+                    except:
+                        print('load rsu failed')
+                        continue
+        else:
+            data = OrderedDict()
+            scene_dict = None
+            frame_id = None
+            data['car_0'] = self.get_one_record(route_dir=None, frame_id=None , agent='ego', visible_actors=None, tpe=tpe, extra_source=extra_source['car_data'][0])
+            if self.params['train_params']['max_cav'] > 1:
+                if len(extra_source['car_data']) > 1:
+                    for i in range(len(extra_source['car_data'])-1):
+                        data['car_{}'.format(i+1)] = self.get_one_record(route_dir=None, frame_id=None , agent='other_ego', visible_actors=None, tpe=tpe, extra_source=extra_source['car_data'][i+1])
+                for i in range(len(extra_source['rsu_data'])):
+                    data['rsu_{}'.format(i)] = self.get_one_record(route_dir=None, frame_id=None , agent='rsu', visible_actors=None, tpe=tpe, extra_source=extra_source['rsu_data'][i])            
+        data['car_0']['scene_dict'] = scene_dict
+        data['car_0']['frame_id'] = frame_id
+        return data
+
+
+    def __len__(self):
+        return len(self.route_frames)
+
+    def __getitem__(self, idx):
+        """
+        Abstract method, needs to be define by the children class.
+        """
+        pass
+
+    @staticmethod
+    def extract_timestamps(yaml_files):
+        """
+        Given the list of the yaml files, extract the mocked timestamps.
+
+        Parameters
+        ----------
+        yaml_files : list
+            The full path of all yaml files of ego vehicle
+
+        Returns
+        -------
+        timestamps : list
+            The list containing timestamps only.
+        """
+        timestamps = []
+
+        for file in yaml_files:
+            res = file.split('/')[-1]
+
+            timestamp = res.replace('.yaml', '')
+            timestamps.append(timestamp)
+
+        return timestamps
+
+    @staticmethod
+    def return_timestamp_key(scenario_database, timestamp_index):
+        """
+        Given the timestamp index, return the correct timestamp key, e.g.
+        2 --> '000078'.
+
+        Parameters
+        ----------
+        scenario_database : OrderedDict
+            The dictionary contains all contents in the current scenario.
+
+        timestamp_index : int
+            The index for timestamp.
+
+        Returns
+        -------
+        timestamp_key : str
+            The timestamp key saved in the cav dictionary.
+        """
+        # get all timestamp keys
+        timestamp_keys = list(scenario_database.items())[0][1]
+        # retrieve the correct index
+        timestamp_key = list(timestamp_keys.items())[timestamp_index][0]
+
+        return timestamp_key
+
+    @staticmethod
+    def find_camera_files(cav_path, timestamp, sensor="camera"):
+        """
+        Retrieve the paths to all camera files.
+
+        Parameters
+        ----------
+        cav_path : str
+            The full file path of current cav.
+
+        timestamp : str
+            Current timestamp
+
+        sensor : str
+            "camera" or "depth" 
+
+        Returns
+        -------
+        camera_files : list
+            The list containing all camera png file paths.
+        """
+        camera0_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}0.png')
+        camera1_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}1.png')
+        camera2_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}2.png')
+        camera3_file = os.path.join(cav_path,
+                                    timestamp + f'_{sensor}3.png')
+        return [camera0_file, camera1_file, camera2_file, camera3_file]
+
+
+    def augment(self, lidar_np, object_bbx_center, object_bbx_mask):
+        """
+        Given the raw point cloud, augment by flipping and rotation.
+
+        Parameters
+        ----------
+        lidar_np : np.ndarray
+            (n, 4) shape
+
+        object_bbx_center : np.ndarray
+            (n, 7) shape to represent bbx's x, y, z, h, w, l, yaw
+
+        object_bbx_mask : np.ndarray
+            Indicate which elements in object_bbx_center are padded.
+        """
+        tmp_dict = {'lidar_np': lidar_np,
+                    'object_bbx_center': object_bbx_center,
+                    'object_bbx_mask': object_bbx_mask}
+        tmp_dict = self.data_augmentor.forward(tmp_dict)
+
+        lidar_np = tmp_dict['lidar_np']
+        object_bbx_center = tmp_dict['object_bbx_center']
+        object_bbx_mask = tmp_dict['object_bbx_mask']
+
+        return lidar_np, object_bbx_center, object_bbx_mask
+
+
+    def generate_object_center_lidar(self,
+                               cav_contents,
+                               reference_lidar_pose):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+        The object_bbx_center is in ego coordinate.
+
+        Notice: it is a wrap of postprocessor
+
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+            in fact it is used in get_item_single_car, so the list length is 1
+
+        reference_lidar_pose : list
+            The final target lidar pose with length 6.
+
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+        return self.post_processor.generate_object_center(cav_contents,
+                                                        reference_lidar_pose)
+
+    def generate_object_center_camera(self, 
+                                cav_contents, 
+                                reference_lidar_pose):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+        The object_bbx_center is in ego coordinate.
+
+        Notice: it is a wrap of postprocessor
+
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+            in fact it is used in get_item_single_car, so the list length is 1
+
+        reference_lidar_pose : list
+            The final target lidar pose with length 6.
+        
+        visibility_map : np.ndarray
+            for OPV2V, its 256*256 resolution. 0.39m per pixel. heading up.
+
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+        return self.post_processor.generate_visible_object_center(
+            cav_contents, reference_lidar_pose
+        )
+
+    def get_ext_int(self, params, camera_id):
+        if self.params['extrinsic'] == 1:
+            return self.get_ext_int_1(params, camera_id)
+        elif self.params['extrinsic'] == 2:
+            return self.get_ext_int_2(params, camera_id)
+    def get_ext_int_1(self, params, camera_id):
+        camera_coords = np.array(params["camera%d" % camera_id]["cords"]).astype(
+            np.float32)
+        camera_to_lidar = x1_to_x2(
+            camera_coords, params["lidar_pose_clean"]
+        ).astype(np.float32)  # T_LiDAR_camera
+        camera_to_lidar = camera_to_lidar @ np.array(
+            [[0, 0, 1, 0], [1, 0, 0, 0], [0, -1, 0, 0], [0, 0, 0, 1]],
+            dtype=np.float32)  # UE4 coord to opencv coord
+        camera_intrinsic = np.array(params["camera%d" % camera_id]["intrinsic"]).astype(
+            np.float32
+        )
+        return camera_to_lidar, camera_intrinsic
+    def get_ext_int_2(self, params, camera_id):
+        camera_extrinsic = np.array(params["camera%d" % camera_id]["extrinsic"]).astype(
+            np.float32)
+        camera_extrinsic = camera_extrinsic @ np.array(
+            [[0, 0, 1, 0], [1, 0, 0, 0], [0, -1, 0, 0], [0, 0, 0, 1]],
+            dtype=np.float32)  # UE4 coord to opencv coord
+        camera_intrinsic = np.array(params["camera%d" % camera_id]["intrinsic"]).astype(
+            np.float32
+        )
+        return camera_extrinsic, camera_intrinsic
+VALUES = [255]
+EXTENT = [0]
+def generate_heatmap_multiclass(measurements, actors_data, max_distance=30, pixels_per_meter=8):
+    actors_data_multiclass = {
+        0: {}, 1: {}, 2:{}, 3:{}
+    }
+    for _id in actors_data.keys():
+        actors_data_multiclass[actors_data[_id]['tpe']][_id] = actors_data[_id]
+    heatmap_0 = generate_heatmap(measurements, actors_data_multiclass[0], max_distance, pixels_per_meter)
+    heatmap_1 = generate_heatmap(measurements, actors_data_multiclass[1], max_distance, pixels_per_meter)
+    # heatmap_2 = generate_heatmap(measurements, actors_data_multiclass[2], max_distance, pixels_per_meter) # traffic light, not used
+    heatmap_3 = generate_heatmap(measurements, actors_data_multiclass[3], max_distance, pixels_per_meter)
+    return {0: heatmap_0, 1: heatmap_1, 3: heatmap_3}
+
+def get_yaw_angle(forward_vector):
+    forward_vector = forward_vector / np.linalg.norm(forward_vector)
+    yaw = math.acos(forward_vector[0])
+    if forward_vector[1] < 0:
+        yaw = 2 * np.pi - yaw
+    return yaw
+
+def generate_heatmap(measurements, actors_data, max_distance=30, pixels_per_meter=8):
+    img_size = max_distance * pixels_per_meter * 2
+    img = np.zeros((img_size, img_size, 3), np.int)
+    ego_x = measurements["lidar_pose_x"]
+    ego_y = measurements["lidar_pose_y"]
+    ego_theta = measurements["theta"]
+    R = np.array(
+        [
+            [np.cos(ego_theta), -np.sin(ego_theta)],
+            [np.sin(ego_theta), np.cos(ego_theta)],
+        ]
+    )
+    ego_id = None
+    for _id in actors_data:
+        color = np.array([1, 1, 1])
+        if actors_data[_id]["tpe"] == 2:
+            if int(_id) == int(measurements["affected_light_id"]):
+                if actors_data[_id]["sta"] == 0:
+                    color = np.array([1, 1, 1])
+                else:
+                    color = np.array([0, 0, 0])
+                yaw = get_yaw_angle(actors_data[_id]["ori"])
+                TR = np.array([[np.cos(yaw), np.sin(yaw)], [-np.sin(yaw), np.cos(yaw)]])
+                actors_data[_id]["loc"] = np.array(
+                    actors_data[_id]["loc"][:2]
+                ) + TR.T.dot(np.array(actors_data[_id]["taigger_loc"])[:2])
+                actors_data[_id]["ori"] = np.array(actors_data[_id]["ori"])
+                actors_data[_id]["box"] = np.array(actors_data[_id]["trigger_box"]) * 2
+            else:
+                continue
+        raw_loc = actors_data[_id]["loc"]
+        if (raw_loc[0] - ego_x) ** 2 + (raw_loc[1] - ego_y) ** 2 <= 2:
+            ego_id = _id
+            color = np.array([0, 1, 1])
+        new_loc = R.T.dot(np.array([raw_loc[0] - ego_x, raw_loc[1] - ego_y]))
+        actors_data[_id]["loc"] = np.array(new_loc)
+        raw_ori = actors_data[_id]["ori"]
+        new_ori = R.T.dot(np.array([raw_ori[0], raw_ori[1]]))
+        actors_data[_id]["ori"] = np.array(new_ori)
+        actors_data[_id]["box"] = np.array(actors_data[_id]["box"])
+        if int(_id) in measurements["is_vehicle_present"]:
+            color = np.array([1, 1, 1])
+        elif int(_id) in measurements["is_bike_present"]:
+            color = np.array([1, 1, 1])
+        elif int(_id) in measurements["is_junction_vehicle_present"]:
+            color = np.array([1, 1, 1])
+        elif int(_id) in measurements["is_pedestrian_present"]:
+            color = np.array([1, 1, 1])
+        actors_data[_id]["color"] = color
+
+    if ego_id is not None and ego_id in actors_data:
+        del actors_data[ego_id]  # Do not show ego car
+    for _id in actors_data:
+        if actors_data[_id]["tpe"] == 2:
+            continue  # FIXME donot add traffix light
+            if int(_id) != int(measurements["affected_light_id"]):
+                continue
+            if actors_data[_id]["sta"] != 0:
+                continue
+        act_img = np.zeros((img_size, img_size, 3), np.uint8)
+        loc = actors_data[_id]["loc"][:2]
+        ori = actors_data[_id]["ori"][:2]
+        box = actors_data[_id]["box"]
+        if box[0] < 1.5:
+            box = box * 1.5  # FIXME enlarge the size of pedstrian and bike
+        color = actors_data[_id]["color"]
+        for i in range(len(VALUES)):
+            act_img = add_rect(
+                act_img,
+                loc,
+                ori,
+                box + EXTENT[i],
+                VALUES[i],
+                pixels_per_meter,
+                max_distance,
+                color,
+            )
+        act_img = np.clip(act_img, 0, 255)
+        img = img + act_img
+    img = np.clip(img, 0, 255)
+    img = img.astype(np.uint8)
+    img = img[:, :, 0]
+    return img
+
+def add_rect(img, loc, ori, box, value, pixels_per_meter, max_distance, color):
+    img_size = max_distance * pixels_per_meter * 2
+    vet_ori = np.array([-ori[1], ori[0]])
+    hor_offset = box[0] * ori
+    vet_offset = box[1] * vet_ori
+    left_up = (loc + hor_offset + vet_offset + max_distance) * pixels_per_meter
+    left_down = (loc + hor_offset - vet_offset + max_distance) * pixels_per_meter
+    right_up = (loc - hor_offset + vet_offset + max_distance) * pixels_per_meter
+    right_down = (loc - hor_offset - vet_offset + max_distance) * pixels_per_meter
+    left_up = np.around(left_up).astype(np.int)
+    left_down = np.around(left_down).astype(np.int)
+    right_down = np.around(right_down).astype(np.int)
+    right_up = np.around(right_up).astype(np.int)
+    left_up = list(left_up)
+    left_down = list(left_down)
+    right_up = list(right_up)
+    right_down = list(right_down)
+    color = [int(x) for x in value * color]
+    cv2.fillConvexPoly(img, np.array([left_up, left_down, right_down, right_up]), color)
+    return img
+
+def generate_det_data_multiclass(
+    heatmap, measurements, actors_data, det_range=[30,10,10,10, 0.8]
+):  
+    actors_data_multiclass = {
+        0: {}, 1: {}, 2: {}, 3:{}
+    }
+    for _id in actors_data.keys():
+        actors_data_multiclass[actors_data[_id]['tpe']][_id] = actors_data[_id]
+    det_data = []
+    for _class in range(4):
+        if _class != 2:
+            det_data.append(generate_det_data(heatmap[_class], measurements, actors_data_multiclass[_class], det_range))
+
+    return np.array(det_data)
+
+from skimage.measure import block_reduce
+
+def generate_det_data(
+    heatmap, measurements, actors_data, det_range=[30,10,10,10, 0.8]
+):
+    res = det_range[4]
+    max_distance = max(det_range)
+    traffic_heatmap = block_reduce(heatmap, block_size=(int(8*res), int(8*res)), func=np.mean)
+    traffic_heatmap = np.clip(traffic_heatmap, 0.0, 255.0)
+    traffic_heatmap = traffic_heatmap[:int((det_range[0]+det_range[1])/res), int((max_distance-det_range[2])/res):int((max_distance+det_range[3])/res)]
+    det_data = np.zeros((int((det_range[0]+det_range[1])/res), int((det_range[2]+det_range[3])/res), 7)) # (50,25,7)
+    vertical, horizontal = det_data.shape[:2]
+
+    ego_x = measurements["lidar_pose_x"]
+    ego_y = measurements["lidar_pose_y"]
+    ego_theta = measurements["theta"]
+    R = np.array(
+        [
+            [np.cos(ego_theta), -np.sin(ego_theta)],
+            [np.sin(ego_theta), np.cos(ego_theta)],
+        ]
+    )
+    need_deleted_ids = []
+    for _id in actors_data:
+        raw_loc = actors_data[_id]["loc"]
+        new_loc = R.T.dot(np.array([raw_loc[0] - ego_x, raw_loc[1] - ego_y]))
+        new_loc[1] = -new_loc[1]
+        actors_data[_id]["loc"] = np.array(new_loc)
+        raw_ori = actors_data[_id]["ori"]
+        new_ori = R.T.dot(np.array([raw_ori[0], raw_ori[1]]))
+        dis = new_loc[0] ** 2 + new_loc[1] ** 2
+        if (
+            dis <= 2
+            or dis >= (max_distance) ** 2 * 2
+            or "box" not in actors_data[_id]
+            or actors_data[_id]['tpe'] == 2
+        ):
+            need_deleted_ids.append(_id)
+            continue
+        actors_data[_id]["ori"] = np.array(new_ori)
+        actors_data[_id]["box"] = np.array(actors_data[_id]["box"])
+
+    for _id in need_deleted_ids:
+        del actors_data[_id]
+
+    for i in range(vertical):  # 50
+        for j in range(horizontal):  # 25
+            if traffic_heatmap[i][j] < 0.05 * 255.0:
+                continue
+            center_x, center_y = convert_grid_to_xy(i, j, det_range)
+            min_dis = 1000
+            min_id = None
+            for _id in actors_data:
+                loc = actors_data[_id]["loc"][:2]
+                ori = actors_data[_id]["ori"][:2]
+                box = actors_data[_id]["box"]
+                dis = (loc[0] - center_x) ** 2 + (loc[1] - center_y) ** 2
+                if dis < min_dis:
+                    min_dis = dis
+                    min_id = _id
+
+            if min_id is None:
+                continue
+
+            loc = actors_data[min_id]["loc"][:2]
+            ori = actors_data[min_id]["ori"][:2]
+            box = actors_data[min_id]["box"]
+            theta = (get_yaw_angle(ori) / np.pi + 2) % 2
+            speed = np.linalg.norm(actors_data[min_id]["vel"])
+
+            # prob = np.power(0.5 / max(0.5, np.sqrt(min_dis)), 0.5)
+
+            det_data[i][j] = np.array(
+                [
+                    0,
+                    (loc[0] - center_x) * 3.0,
+                    (loc[1] - center_y) * 3.0,
+                    theta / 2.0,
+                    box[0] / 7.0,
+                    box[1] / 4.0,
+                    0,
+                ]
+            )
+
+    heatmap = np.zeros((int((det_range[0]+det_range[1])/res), int((det_range[2]+det_range[3])/res))) # (50,25)
+    for _id in actors_data:
+        loc = actors_data[_id]["loc"][:2]
+        ori = actors_data[_id]["ori"][:2]
+        box = actors_data[_id]["box"]
+        try:
+            x,y = loc
+            i,j = convert_xy_to_grid(x,y,det_range)
+            i = int(np.around(i))
+            j = int(np.around(j))
+
+            if i < vertical and i > 0 and j > 0 and j < horizontal:
+                det_data[i][j][-1] = 1.0
+
+            ################## Gaussian Heatmap #####################
+            w, h = box[:2]/det_range[4]
+            heatmap = draw_heatmap(heatmap, h, w, j, i)
+            #########################################################
+
+            # theta = (get_yaw_angle(ori) / np.pi + 2) % 2
+            # center_x, center_y = convert_grid_to_xy(i, j, det_range)
+
+            # det_data[i][j] = np.array(
+            #     [
+            #         0,
+            #         (loc[0] - center_x) * 3.0,
+            #         (loc[1] - center_y) * 3.0,
+            #         theta / 2.0,
+            #         box[0] / 7.0,
+            #         box[1] / 4.0,
+            #         0,
+            #     ]
+            # )
+
+        except:
+            print('actor data error, skip!')
+    det_data[:,:,0] = heatmap
+    return det_data
+
+def convert_grid_to_xy(i, j, det_range):
+    x = det_range[4]*(j + 0.5) - det_range[2]
+    y = det_range[0] - det_range[4]*(i+0.5)
+    return x, y
+
+def convert_xy_to_grid(x, y, det_range):
+    j = (x + det_range[2]) / det_range[4] - 0.5
+    i = (det_range[0] - y) / det_range[4] - 0.5
+    return i, j
+
+def draw_heatmap(heatmap, h, w, x, y):
+    feature_map_size = heatmap.shape
+    radius = gaussian_radius(
+                    (h, w),
+                    min_overlap=0.1)
+    radius = max(2, int(radius))
+
+    # throw out not in range objects to avoid out of array
+    # area when creating the heatmap
+    if not (0 <= y < feature_map_size[0]
+            and 0 <= x < feature_map_size[1]):
+        return heatmap
+
+    heatmap = draw_gaussian(heatmap, (x,y), radius) 
+    return heatmap
+
+def draw_gaussian(heatmap, center, radius, k=1):
+    """Get gaussian masked heatmap.
+
+    Args:
+        heatmap (torch.Tensor): Heatmap to be masked.
+        center (torch.Tensor): Center coord of the heatmap.
+        radius (int): Radius of gausian.
+        K (int): Multiple of masked_gaussian. Defaults to 1.
+
+    Returns:
+        torch.Tensor: Masked heatmap.
+    """
+    diameter = 2 * radius + 1
+    gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6)
+
+    x, y = int(center[0]), int(center[1])
+
+    height, width = heatmap.shape[0:2]
+
+    left, right = min(x, radius), min(width - x, radius + 1)
+    top, bottom = min(y, radius), min(height - y, radius + 1)
+
+    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
+    masked_gaussian = gaussian[radius - top:radius + bottom,
+                                radius - left:radius + right]
+    
+    if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
+        # torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
+        np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
+    #     masked_heatmap = np.max([masked_heatmap[None,], (masked_gaussian * k)[None,]], axis=0)[0]
+    # heatmap[y - top:y + bottom, x - left:x + right] = masked_heatmap
+    return heatmap
+
+def gaussian_2d(shape, sigma=1):
+    """Generate gaussian map.
+
+    Args:
+        shape (list[int]): Shape of the map.
+        sigma (float): Sigma to generate gaussian map.
+            Defaults to 1.
+
+    Returns:
+        np.ndarray: Generated gaussian map.
+    """
+    m, n = [(ss - 1.) / 2. for ss in shape]
+    y, x = np.ogrid[-m:m + 1, -n:n + 1]
+
+    h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
+    h[h < np.finfo(h.dtype).eps * h.max()] = 0
+    return h
+
+def gaussian_radius(det_size, min_overlap=0.5):
+    """Get radius of gaussian.
+
+    Args:
+        det_size (tuple[torch.Tensor]): Size of the detection result.
+        min_overlap (float): Gaussian_overlap. Defaults to 0.5.
+
+    Returns:
+        torch.Tensor: Computed radius.
+    """
+    height, width = det_size
+    
+    a1 = 1
+    b1 = (height + width)
+    c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
+    sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
+    r1 = (b1 + sq1) / (2 * a1)
+
+    a2 = 4
+    b2 = 2 * (height + width)
+    c2 = (1 - min_overlap) * width * height
+    sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
+    r2 = (b2 + sq2) / (2 * a2)
+
+    a3 = 4 * min_overlap
+    b3 = -2 * min_overlap * (height + width)
+    c3 = (min_overlap - 1) * width * height
+    sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
+    r3 = (b3 + sq3) / (2 * a3)
+    return min(r1, r2, r3)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_fusion_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..ddac124dcebaa55b65940062d5f013580b5e234a
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_fusion_dataset.py
@@ -0,0 +1,414 @@
+# early fusion dataset
+import torch
+import numpy as np
+from opencood.utils.pcd_utils import downsample_lidar_minimum
+import math
+from collections import OrderedDict
+
+from opencood.utils import box_utils
+from opencood.utils.common_utils import merge_features_to_dict
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.hypes_yaml.yaml_utils import load_yaml
+from opencood.utils.pcd_utils import \
+    mask_points_by_range, mask_ego_points, shuffle_points, \
+    downsample_lidar_minimum
+from opencood.utils.transformation_utils import x1_to_x2
+
+
+def getEarlyFusionDataset(cls):
+    class EarlyFusionDataset(cls):
+        """
+        This dataset is used for early fusion, where each CAV transmit the raw
+        point cloud to the ego vehicle.
+        """
+        def __init__(self, params, visualize, train=True):
+            super(EarlyFusionDataset, self).__init__(params, visualize, train)
+            self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \
+                                        else False
+            assert self.supervise_single is False
+            self.proj_first = False if 'proj_first' not in params['fusion']['args']\
+                                         else params['fusion']['args']['proj_first']
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+
+            self.heterogeneous = False
+            if 'heter' in params:
+                self.heterogeneous = True
+
+        def __getitem__(self, idx):
+            base_data_dict = self.retrieve_base_data(idx)
+
+            processed_data_dict = OrderedDict()
+            processed_data_dict['ego'] = {}
+
+            ego_id = -1
+            ego_lidar_pose = []
+
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    break
+
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+
+            projected_lidar_stack = []
+            object_stack = []
+            object_id_stack = []
+
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                # check if the cav is within the communication range with ego
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+                if distance > self.params['comm_range']:
+                    continue
+
+                selected_cav_processed = self.get_item_single_car(
+                    selected_cav_base,
+                    ego_lidar_pose)
+                # all these lidar and object coordinates are projected to ego
+                # already.
+                projected_lidar_stack.append(
+                    selected_cav_processed['projected_lidar'])
+                object_stack.append(selected_cav_processed['object_bbx_center'])
+                object_id_stack += selected_cav_processed['object_ids']
+
+            # exclude all repetitive objects
+            unique_indices = \
+                [object_id_stack.index(x) for x in set(object_id_stack)]
+            object_stack = np.vstack(object_stack)
+            object_stack = object_stack[unique_indices]
+
+            # make sure bounding boxes across all frames have the same number
+            object_bbx_center = \
+                np.zeros((self.params['postprocess']['max_num'], 7))
+            mask = np.zeros(self.params['postprocess']['max_num'])
+            object_bbx_center[:object_stack.shape[0], :] = object_stack
+            mask[:object_stack.shape[0]] = 1
+
+            # convert list to numpy array, (N, 4)
+            projected_lidar_stack = np.vstack(projected_lidar_stack)
+
+            # data augmentation
+            projected_lidar_stack, object_bbx_center, mask = \
+                self.augment(projected_lidar_stack, object_bbx_center, mask)
+
+            # we do lidar filtering in the stacked lidar
+            projected_lidar_stack = mask_points_by_range(projected_lidar_stack,
+                                                        self.params['preprocess'][
+                                                            'cav_lidar_range'])
+            # augmentation may remove some of the bbx out of range
+            object_bbx_center_valid = object_bbx_center[mask == 1]
+            object_bbx_center_valid, range_mask = \
+                box_utils.mask_boxes_outside_range_numpy(object_bbx_center_valid,
+                                                        self.params['preprocess'][
+                                                            'cav_lidar_range'],
+                                                        self.params['postprocess'][
+                                                            'order'],
+                                                        return_mask=True
+                                                        )
+            mask[object_bbx_center_valid.shape[0]:] = 0
+            object_bbx_center[:object_bbx_center_valid.shape[0]] = \
+                object_bbx_center_valid
+            object_bbx_center[object_bbx_center_valid.shape[0]:] = 0
+            unique_indices = list(np.array(unique_indices)[range_mask])
+
+            # pre-process the lidar to voxel/bev/downsampled lidar
+            lidar_dict = self.pre_processor.preprocess(projected_lidar_stack)
+
+            # generate the anchor boxes
+            anchor_box = self.post_processor.generate_anchor_box()
+
+            # generate targets label
+            label_dict = \
+                self.post_processor.generate_label(
+                    gt_box_center=object_bbx_center,
+                    anchors=anchor_box,
+                    mask=mask)
+
+            processed_data_dict['ego'].update(
+                {'object_bbx_center': object_bbx_center,
+                'object_bbx_mask': mask,
+                'object_ids': [object_id_stack[i] for i in unique_indices],
+                'anchor_box': anchor_box,
+                'processed_lidar': lidar_dict,
+                'label_dict': label_dict})
+
+            if self.visualize:
+                processed_data_dict['ego'].update({'origin_lidar':
+                                                    projected_lidar_stack})
+
+            return processed_data_dict
+
+        def get_item_single_car(self, selected_cav_base, ego_pose):
+            """
+            Project the lidar and bbx to ego space first, and then do clipping.
+
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+            ego_pose : list
+                The ego vehicle lidar pose under world coordinate.
+
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+
+            # calculate the transformation matrix
+            transformation_matrix = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose'],
+                        ego_pose)
+
+            # retrieve objects under ego coordinates
+            object_bbx_center, object_bbx_mask, object_ids = \
+                self.generate_object_center([selected_cav_base],
+                                                        ego_pose)
+
+            # filter lidar
+            lidar_np = selected_cav_base['lidar_np']
+            lidar_np = shuffle_points(lidar_np)
+            # remove points that hit itself
+            lidar_np = mask_ego_points(lidar_np)
+            # project the lidar to ego space
+            lidar_np[:, :3] = \
+                box_utils.project_points_by_matrix_torch(lidar_np[:, :3],
+                                                        transformation_matrix)
+
+            selected_cav_processed.update(
+                {'object_bbx_center': object_bbx_center[object_bbx_mask == 1],
+                'object_ids': object_ids,
+                'projected_lidar': lidar_np})
+
+            return selected_cav_processed
+
+        def collate_batch_test(self, batch):
+            """
+            Customized collate function for pytorch dataloader during testing
+            for late fusion dataset.
+
+            Parameters
+            ----------
+            batch : dict
+
+            Returns
+            -------
+            batch : dict
+                Reformatted batch.
+            """
+            # currently, we only support batch size of 1 during testing
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            batch = batch[0] # only ego
+
+            output_dict = {}
+
+            for cav_id, cav_content in batch.items():
+                output_dict.update({cav_id: {}})
+                # shape: (1, max_num, 7)
+                object_bbx_center = \
+                    torch.from_numpy(np.array([cav_content['object_bbx_center']]))
+                object_bbx_mask = \
+                    torch.from_numpy(np.array([cav_content['object_bbx_mask']]))
+                object_ids = cav_content['object_ids']
+
+                # the anchor box is the same for all bounding boxes usually, thus
+                # we don't need the batch dimension.
+                if cav_content['anchor_box'] is not None:
+                    output_dict[cav_id].update({'anchor_box':
+                        torch.from_numpy(np.array(
+                            cav_content[
+                                'anchor_box']))})
+                if self.visualize:
+                    origin_lidar = [cav_content['origin_lidar']]
+
+                # processed lidar dictionary
+                processed_lidar_torch_dict = \
+                    self.pre_processor.collate_batch(
+                        [cav_content['processed_lidar']])
+                # label dictionary
+                label_torch_dict = \
+                    self.post_processor.collate_batch([cav_content['label_dict']])
+
+                # save the transformation matrix (4, 4) to ego vehicle
+                transformation_matrix_torch = \
+                    torch.from_numpy(np.identity(4)).float()
+                transformation_matrix_clean_torch = \
+                    torch.from_numpy(np.identity(4)).float()
+
+                output_dict[cav_id].update({'object_bbx_center': object_bbx_center,
+                                            'object_bbx_mask': object_bbx_mask,
+                                            'processed_lidar': processed_lidar_torch_dict,
+                                            'label_dict': label_torch_dict,
+                                            'object_ids': object_ids,
+                                            'transformation_matrix': transformation_matrix_torch,
+                                            'transformation_matrix_clean': transformation_matrix_clean_torch})
+
+                if self.visualize:
+                    origin_lidar = \
+                        np.array(
+                            downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                    origin_lidar = torch.from_numpy(origin_lidar)
+                    output_dict[cav_id].update({'origin_lidar': origin_lidar})
+
+            return output_dict
+        
+        def collate_batch_train(self, batch):
+            # Intermediate fusion is different the other two
+            output_dict = {'ego': {}}
+
+            object_bbx_center = []
+            object_bbx_mask = []
+            object_ids = []
+            processed_lidar_list = []
+            image_inputs_list = []
+            # used to record different scenario
+            label_dict_list = []
+            origin_lidar = []
+            
+            # heterogeneous
+            lidar_agent_list = []
+
+            # pairwise transformation matrix
+            pairwise_t_matrix_list = []
+            
+            ### 2022.10.10 single gt ####
+            if self.supervise_single:
+                pos_equal_one_single = []
+                neg_equal_one_single = []
+                targets_single = []
+
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                object_bbx_center.append(ego_dict['object_bbx_center'])
+                object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                object_ids.append(ego_dict['object_ids'])
+                if self.load_lidar_file:
+                    processed_lidar_list.append(ego_dict['processed_lidar'])
+                if self.load_camera_file:
+                    image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict.
+                
+                label_dict_list.append(ego_dict['label_dict'])
+
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+
+                ### 2022.10.10 single gt ####
+                if self.supervise_single:
+                    pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one'])
+                    neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one'])
+                    targets_single.append(ego_dict['single_label_dict_torch']['targets'])
+
+                # heterogeneous
+                if self.heterogeneous:
+                    lidar_agent_list.append(ego_dict['lidar_agent'])
+
+            # convert to numpy, (B, max_num, 7)
+            object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+            object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_lidar_list)
+
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                    for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords'
+                        merged_feature_dict[k] = [v[index] for index in lidar_agent_idx]
+
+                if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0):
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(merged_feature_dict)
+                    output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict})
+
+            if self.load_camera_file:
+                merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat')
+
+                if self.heterogeneous:
+                    camera_agent = 1 - lidar_agent
+                    camera_agent_idx = camera_agent.nonzero()[0].tolist()
+                    if sum(camera_agent) != 0:
+                        for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ...
+                            merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx])
+                            
+                if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0):
+                    output_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+            
+            label_torch_dict = \
+                self.post_processor.collate_batch(label_dict_list)
+
+            # for centerpoint
+            label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask})
+
+            # (B, max_cav)
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+
+            # add pairwise_t_matrix to label dict
+
+            # object id is only used during inference, where batch size is 1.
+            # so here we only get the first element.
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'label_dict': label_torch_dict,
+                                    'object_ids': object_ids[0]})
+
+
+            if self.visualize:
+                origin_lidar = \
+                    np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                origin_lidar = torch.from_numpy(origin_lidar)
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+
+            if self.supervise_single:
+                output_dict['ego'].update({
+                    "label_dict_single" : 
+                        {"pos_equal_one": torch.cat(pos_equal_one_single, dim=0),
+                        "neg_equal_one": torch.cat(neg_equal_one_single, dim=0),
+                        "targets": torch.cat(targets_single, dim=0)}
+                })
+
+            if self.heterogeneous:
+                output_dict['ego'].update({
+                    "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...]
+                })
+
+            return output_dict
+
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+
+            output_dict :dict
+                The dictionary containing the output of the model.
+
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = \
+                self.post_processor.post_process(data_dict, output_dict)
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            return pred_box_tensor, pred_score, gt_box_tensor
+
+    return EarlyFusionDataset
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_multiclass_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_multiclass_fusion_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6043ef809e8e4c71cf6a8e2347d93ef33c58dd5
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/early_multiclass_fusion_dataset.py
@@ -0,0 +1,899 @@
+# early fusion dataset
+import random
+import math
+from collections import OrderedDict
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+# from opencood.utils.heter_utils import AgentSelector
+from opencood.utils.common_utils import merge_features_to_dict
+from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation
+from opencood.utils.pose_utils import add_noise_data_dict, add_noise_data_dict_asymmetric
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    mask_ego_points_v2,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+from opencood.utils.common_utils import read_json
+
+
+def getEarlymulticlassFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class EarlymulticlassFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            # supervise single
+            self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \
+                                        else False
+            self.proj_first = False if 'proj_first' not in params['fusion']['args']\
+                                         else params['fusion']['args']['proj_first']
+
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+
+            self.heterogeneous = False
+            if 'heter' in params:
+                self.heterogeneous = True
+                self.selector = AgentSelector(params['heter'], self.max_cav)
+            self.kd_flag = params.get('kd_flag', False)
+            self.box_align = False
+            if "box_align" in params:
+                self.box_align = True
+                self.stage1_result_path = params['box_align']['train_result'] if train else params['box_align']['val_result']
+                self.stage1_result = read_json(self.stage1_result_path)
+                self.box_align_args = params['box_align']['args']
+            self.multiclass = params['model']['args']['multi_class']
+            self.online_eval_only = False
+
+        def get_item_single_car(self, selected_cav_base, ego_cav_base, base_data_dict, tpe='all', cav_id='car_0', online_eval=False):
+            """
+            Process a single CAV's information for the train/test pipeline.
+
+
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+            ego_pose : list, length 6
+                The ego vehicle lidar pose under world coordinate.
+            ego_pose_clean : list, length 6
+                only used for gt box generation
+
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean']
+            selected_pose, selected_pose_clean = selected_cav_base['params']['lidar_pose'], selected_cav_base['params']['lidar_pose_clean']
+            
+            # calculate the transformation matrix
+            transformation_matrix = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose'],
+                        ego_pose) # T_ego_cav
+            transformation_matrix_clean = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose_clean'],
+                        ego_pose_clean)
+            
+            # lidar
+            if tpe == 'all':
+                if self.load_lidar_file or self.visualize:
+                    # process lidar
+                    lidar_np = selected_cav_base['lidar_np']
+                    lidar_np = shuffle_points(lidar_np)
+                    # remove points that hit itself
+                    if not cav_id.startswith('rsu'):
+                        lidar_np = mask_ego_points_v2(lidar_np)
+                    # project the lidar to ego space
+                    # x,y,z in ego space
+
+                    project_lidar_bank = []
+                    lidar_bank = []
+                    for agent_id in base_data_dict:
+                        collab_cav_base = base_data_dict[agent_id]
+                        collab_lidar_np = collab_cav_base['lidar_np']
+                        collab_lidar_np = shuffle_points(collab_lidar_np)
+                        # remove points that hit itself
+                        if not agent_id.startswith('rsu'):
+                            collab_lidar_np = mask_ego_points_v2(collab_lidar_np)
+                        # project the lidar to ego space
+                        # x,y,z in ego space
+                        
+                        # calculate the transformation matrix
+                        transformation_matrix_for_selected = \
+                            x1_to_x2(collab_cav_base['params']['lidar_pose'],
+                                    selected_pose) # T_ego_cav
+
+                        projected_collab_lidar = \
+                            box_utils.project_points_by_matrix_torch(collab_lidar_np[:, :3],
+                                                                        transformation_matrix_for_selected)
+                        project_lidar_bank.append(projected_collab_lidar)
+                        lidar_bank.append(collab_lidar_np)
+
+                    projected_lidar = np.concatenate(project_lidar_bank, axis=0)
+                    lidar_np = np.concatenate(lidar_bank, axis=0)
+
+                    # if self.proj_first:
+                    lidar_np[:, :3] = projected_lidar
+                    if self.visualize:
+                        # filter lidar
+                        if not selected_cav_base['ego']:
+                            projected_lidar *= 0
+                        selected_cav_processed.update({'projected_lidar': projected_lidar})
+
+                    if self.kd_flag:
+                        lidar_proj_np = copy.deepcopy(lidar_np)
+                        lidar_proj_np[:,:3] = projected_lidar
+
+                        selected_cav_processed.update({'projected_lidar': lidar_proj_np})
+
+                    processed_lidar = self.pre_processor.preprocess(lidar_np)
+                    selected_cav_processed.update({'processed_features': processed_lidar})
+
+            if not online_eval:
+                # generate targets label single GT, note the reference pose is itself.
+                object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center(
+                    [selected_cav_base], selected_cav_base['params']['lidar_pose']
+                )
+
+                label_dict = {}
+                if tpe == 'all':
+                    # unused label
+                    if False:
+                        label_dict = self.post_processor.generate_label(
+                            gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+                        )
+                selected_cav_processed.update({
+                                    "single_label_dict": label_dict,
+                                    "single_object_bbx_center": object_bbx_center,
+                                    "single_object_bbx_mask": object_bbx_mask})
+
+            if tpe == 'all':
+                # camera
+                if self.load_camera_file:
+                    camera_data_list = selected_cav_base["camera_data"]
+
+                    params = selected_cav_base["params"]
+                    imgs = []
+                    rots = []
+                    trans = []
+                    intrins = []
+                    extrinsics = []
+                    post_rots = []
+                    post_trans = []
+
+                    for idx, img in enumerate(camera_data_list):
+                        camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+
+                        intrin = torch.from_numpy(camera_intrinsic)
+                        rot = torch.from_numpy(
+                            camera_to_lidar[:3, :3]
+                        )  # R_wc, we consider world-coord is the lidar-coord
+                        tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+
+                        post_rot = torch.eye(2)
+                        post_tran = torch.zeros(2)
+
+                        img_src = [img]
+
+                        # depth
+                        if self.load_depth_file:
+                            depth_img = selected_cav_base["depth_data"][idx]
+                            img_src.append(depth_img)
+                        else:
+                            depth_img = None
+
+                        # data augmentation
+                        resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                            self.data_aug_conf, self.train
+                        )
+                        img_src, post_rot2, post_tran2 = img_transform(
+                            img_src,
+                            post_rot,
+                            post_tran,
+                            resize=resize,
+                            resize_dims=resize_dims,
+                            crop=crop,
+                            flip=flip,
+                            rotate=rotate,
+                        )
+                        # for convenience, make augmentation matrices 3x3
+                        post_tran = torch.zeros(3)
+                        post_rot = torch.eye(3)
+                        post_tran[:2] = post_tran2
+                        post_rot[:2, :2] = post_rot2
+
+                        # decouple RGB and Depth
+
+                        img_src[0] = normalize_img(img_src[0])
+                        if self.load_depth_file:
+                            img_src[1] = img_to_tensor(img_src[1]) * 255
+
+                        imgs.append(torch.cat(img_src, dim=0))
+                        intrins.append(intrin)
+                        extrinsics.append(torch.from_numpy(camera_to_lidar))
+                        rots.append(rot)
+                        trans.append(tran)
+                        post_rots.append(post_rot)
+                        post_trans.append(post_tran)
+
+                    selected_cav_processed.update(
+                        {
+                        "image_inputs": 
+                            {
+                                "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W]
+                                "intrins": torch.stack(intrins),
+                                "extrinsics": torch.stack(extrinsics),
+                                "rots": torch.stack(rots),
+                                "trans": torch.stack(trans),
+                                "post_rots": torch.stack(post_rots),
+                                "post_trans": torch.stack(post_trans),
+                            }
+                        }
+                    )
+
+                # anchor box
+                selected_cav_processed.update({"anchor_box": self.anchor_box})
+
+
+            if not online_eval:
+                # note the reference pose ego
+                object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base],
+                                                            ego_pose_clean)
+                selected_cav_processed.update(
+                    {
+                        "object_bbx_center": object_bbx_center[object_bbx_mask == 1],
+                        "object_bbx_mask": object_bbx_mask,
+                        "object_ids": object_ids,
+                    }
+                )
+
+            selected_cav_processed.update(
+                {
+                    'transformation_matrix': transformation_matrix,
+                    'transformation_matrix_clean': transformation_matrix_clean
+                }
+            )
+
+            return selected_cav_processed
+
+        def __getitem__(self, idx, extra_source=None, data_dir=None):
+
+            if data_dir is not None:
+                extra_source=1
+
+            object_bbx_center_list = []
+            object_bbx_mask_list = []
+            object_id_dict = {}
+
+            object_bbx_center_list_single = []
+            object_bbx_mask_list_single = []
+
+
+            output_dict = {}
+            for tpe in ['all', 0, 1, 3]:
+                output_single_class = self.__getitem_single_class__(idx, tpe, extra_source, data_dir)
+                output_dict[tpe] = output_single_class
+                if tpe == 'all' and extra_source==None:
+                    continue
+                elif tpe == 'all' and extra_source!=None:
+                    break
+                object_bbx_center_list.append(output_single_class['ego']['object_bbx_center'])
+                object_bbx_mask_list.append(output_single_class['ego']['object_bbx_mask'])
+                if self.supervise_single:
+                    object_bbx_center_list_single.append(output_single_class['ego']['single_object_bbx_center_torch'])
+                    object_bbx_mask_list_single.append(output_single_class['ego']['single_object_bbx_mask_torch'])
+
+                object_id_dict[tpe] = output_single_class['ego']['object_ids']
+            
+            if self.multiclass and extra_source==None:
+                output_dict['all']['ego']['object_bbx_center'] = np.stack(object_bbx_center_list, axis=0)
+                output_dict['all']['ego']['object_bbx_mask'] = np.stack(object_bbx_mask_list, axis=0)
+                if self.supervise_single:
+                    output_dict['all']['ego']['single_object_bbx_center_torch'] = torch.stack(object_bbx_center_list_single, axis=1)
+                    output_dict['all']['ego']['single_object_bbx_mask_torch'] = torch.stack(object_bbx_mask_list_single, axis=1)
+
+                output_dict['all']['ego']['object_ids'] = object_id_dict
+            # print('finish get item')
+            return output_dict['all']
+
+        def __getitem_single_class__(self, idx, tpe=None, extra_source=None, data_dir=None):
+
+            if extra_source is None and data_dir is None:
+                base_data_dict = self.retrieve_base_data(idx, tpe) ## {id:{'ego':True/False, 'params': {'lidar_pose','speed','vehicles','ego_pos',...}, 'lidar_np': array (N,4)}}
+            elif data_dir is not None:
+                base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, data_dir=data_dir)
+            elif extra_source is not None:
+                base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, extra_source=extra_source)
+
+            # base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting'])
+            base_data_dict = add_noise_data_dict_asymmetric(base_data_dict,self.params['noise_setting'])
+            processed_data_dict = OrderedDict()
+            processed_data_dict['ego'] = {}
+
+            ego_id = -1
+            ego_lidar_pose = []
+            ego_cav_base = None
+
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_cav_base = cav_content
+                    break
+                
+            assert cav_id == list(base_data_dict.keys())[
+                0], "The first element in the OrderedDict must be ego"
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+
+            agents_image_inputs = []
+            processed_features = []
+            object_stack = []
+            object_id_stack = []
+            single_label_list = []
+            single_object_bbx_center_list = []
+            single_object_bbx_mask_list = []
+            too_far = []
+            lidar_pose_list = []
+            lidar_pose_clean_list = []
+            cav_id_list = []
+            projected_lidar_clean_list = [] # disconet
+
+            if self.visualize or self.kd_flag:
+                projected_lidar_stack = []
+
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                # check if the cav is within the communication range with ego
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+
+                # if distance is too far, we will just skip this agent
+                if distance > self.params['comm_range']:
+                    too_far.append(cav_id)
+                    continue
+
+                lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean'])
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose
+                cav_id_list.append(cav_id)   
+
+            for cav_id in too_far:
+                base_data_dict.pop(cav_id)
+
+            if self.box_align and str(idx) in self.stage1_result.keys():  # False
+                from opencood.models.sub_modules.box_align_v2 import box_alignment_relative_sample_np
+                stage1_content = self.stage1_result[str(idx)]
+                if stage1_content is not None:
+                    all_agent_id_list = stage1_content['cav_id_list'] # include those out of range
+                    all_agent_corners_list = stage1_content['pred_corner3d_np_list']
+                    all_agent_uncertainty_list = stage1_content['uncertainty_np_list']
+
+                    cur_agent_id_list = cav_id_list
+                    cur_agent_pose = [base_data_dict[cav_id]['params']['lidar_pose'] for cav_id in cav_id_list]
+                    cur_agnet_pose = np.array(cur_agent_pose)
+                    cur_agent_in_all_agent = [all_agent_id_list.index(cur_agent) for cur_agent in cur_agent_id_list] # indexing current agent in `all_agent_id_list`
+
+                    pred_corners_list = [np.array(all_agent_corners_list[cur_in_all_ind], dtype=np.float64) 
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+                    uncertainty_list = [np.array(all_agent_uncertainty_list[cur_in_all_ind], dtype=np.float64) 
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+
+                    if sum([len(pred_corners) for pred_corners in pred_corners_list]) != 0:
+                        refined_pose = box_alignment_relative_sample_np(pred_corners_list,
+                                                                        cur_agnet_pose, 
+                                                                        uncertainty_list=uncertainty_list, 
+                                                                        **self.box_align_args)
+                        cur_agnet_pose[:,[0,1,4]] = refined_pose 
+
+                        for i, cav_id in enumerate(cav_id_list):
+                            lidar_pose_list[i] = cur_agnet_pose[i].tolist()
+                            base_data_dict[cav_id]['params']['lidar_pose'] = cur_agnet_pose[i].tolist()
+
+            pairwise_t_matrix = \
+                get_pairwise_transformation(base_data_dict,
+                                                self.max_cav,
+                                                self.proj_first)
+
+            lidar_poses = np.array(lidar_pose_list).reshape(-1, 6)  # [N_cav, 6]
+            lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6)  # [N_cav, 6]
+            
+            # merge preprocessed features from different cavs into the same dict
+            cav_num = len(cav_id_list)
+            
+            # heterogeneous 
+            if self.heterogeneous:
+                lidar_agent, camera_agent = self.selector.select_agent(idx)
+                lidar_agent = lidar_agent[:cav_num]
+                processed_data_dict['ego'].update({"lidar_agent": lidar_agent})
+            
+            for _i, cav_id in enumerate(cav_id_list):
+                selected_cav_base = base_data_dict[cav_id]
+
+                # dynamic object center generator! for heterogeneous input
+                if (not self.visualize) and self.heterogeneous and lidar_agent[_i]:
+                    self.generate_object_center = self.generate_object_center_lidar
+                elif (not self.visualize) and self.heterogeneous and (not lidar_agent[_i]):
+                    self.generate_object_center = self.generate_object_center_camera
+
+                selected_cav_processed = self.get_item_single_car(
+                    selected_cav_base,
+                    ego_cav_base,
+                    base_data_dict,
+                    tpe,
+                    cav_id,
+                    extra_source!=None)
+                    
+                if extra_source==None:
+                    object_stack.append(selected_cav_processed['object_bbx_center'])
+                    object_id_stack += selected_cav_processed['object_ids']
+                if tpe == 'all':
+                    if self.load_lidar_file:
+                        processed_features.append(
+                            selected_cav_processed['processed_features'])
+                    if self.load_camera_file:
+                        agents_image_inputs.append(
+                            selected_cav_processed['image_inputs'])
+
+                    if self.visualize or self.kd_flag:
+                        projected_lidar_stack.append(
+                            selected_cav_processed['projected_lidar'])
+                
+                if self.supervise_single and extra_source==None:
+                    single_label_list.append(selected_cav_processed['single_label_dict'])
+                    single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center'])
+                    single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask'])
+
+            # generate single view GT label
+            if self.supervise_single and extra_source==None:
+                single_label_dicts = {}
+                if tpe == 'all':
+                    # unused label
+                    if False:
+                        single_label_dicts = self.post_processor.collate_batch(single_label_list)
+                single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list))
+                single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list))
+                processed_data_dict['ego'].update({
+                    "single_label_dict_torch": single_label_dicts,
+                    "single_object_bbx_center_torch": single_object_bbx_center,
+                    "single_object_bbx_mask_torch": single_object_bbx_mask,
+                    })
+
+            if self.kd_flag:
+                stack_lidar_np = np.vstack(projected_lidar_stack)
+                stack_lidar_np = mask_points_by_range(stack_lidar_np,
+                                            self.params['preprocess'][
+                                                'cav_lidar_range'])
+                stack_feature_processed = self.pre_processor.preprocess(stack_lidar_np)
+                processed_data_dict['ego'].update({'teacher_processed_lidar':
+                stack_feature_processed})
+
+            if extra_source is None:
+                # exclude all repetitive objects    
+                unique_indices = \
+                    [object_id_stack.index(x) for x in set(object_id_stack)]
+                object_stack = np.vstack(object_stack)
+                object_stack = object_stack[unique_indices]
+
+                # make sure bounding boxes across all frames have the same number
+                object_bbx_center = \
+                    np.zeros((self.params['postprocess']['max_num'], 7))
+                mask = np.zeros(self.params['postprocess']['max_num'])
+                object_bbx_center[:object_stack.shape[0], :] = object_stack
+                mask[:object_stack.shape[0]] = 1
+
+                processed_data_dict['ego'].update(
+                    {'object_bbx_center': object_bbx_center,  # (100,7)
+                    'object_bbx_mask': mask, # (100,)
+                    'object_ids': [object_id_stack[i] for i in unique_indices],     
+                    }   
+                )
+        
+
+            # generate targets label
+            label_dict = {}
+            if tpe == 'all':
+                # unused label
+                if False:
+                    label_dict = \
+                        self.post_processor.generate_label(
+                            gt_box_center=object_bbx_center,
+                            anchors=self.anchor_box,
+                            mask=mask)
+
+            processed_data_dict['ego'].update(
+                {
+                'anchor_box': self.anchor_box,
+                'label_dict': label_dict,
+                'cav_num': cav_num,
+                'pairwise_t_matrix': pairwise_t_matrix,
+                'lidar_poses_clean': lidar_poses_clean,
+                'lidar_poses': lidar_poses})
+
+            if tpe == 'all':
+                if self.load_lidar_file:
+                    merged_feature_dict = merge_features_to_dict(processed_features)
+                    processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict})
+                if self.load_camera_file:
+                    merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack')
+                    processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+
+                if self.visualize:
+                    processed_data_dict['ego'].update({'origin_lidar':
+                                                    #    projected_lidar_stack})
+                        np.vstack(
+                            projected_lidar_stack)})
+                    processed_data_dict['ego'].update({'lidar_len': [len(projected_lidar_stack[i]) for i in range(len(projected_lidar_stack))]})
+
+
+                processed_data_dict['ego'].update({'sample_idx': idx,
+                                                    'cav_id_list': cav_id_list})
+
+                img_front_list = []
+                img_left_list = []
+                img_right_list = []
+                BEV_list = []
+
+                if self.visualize:
+                    for car_id in base_data_dict:
+                        if not base_data_dict[car_id]['ego'] == True:
+                            continue
+                        if 'rgb_front' in base_data_dict[car_id] and 'rgb_left' in base_data_dict[car_id] and 'rgb_right' in base_data_dict[car_id] and 'BEV' in base_data_dict[car_id] :
+                            img_front_list.append(base_data_dict[car_id]['rgb_front'])
+                            img_left_list.append(base_data_dict[car_id]['rgb_left'])
+                            img_right_list.append(base_data_dict[car_id]['rgb_right'])
+                            BEV_list.append(base_data_dict[car_id]['BEV'])
+                processed_data_dict['ego'].update({'img_front': img_front_list,
+                                                    'img_left': img_left_list,
+                                                    'img_right': img_right_list,
+                                                    'BEV': BEV_list})
+            processed_data_dict['ego'].update({'scene_dict': base_data_dict['car_0']['scene_dict'],
+                                                    'frame_id': base_data_dict['car_0']['frame_id'],
+                                                    })    
+
+
+            return processed_data_dict
+
+
+        def collate_batch_train(self, batch, online_eval_only=False):
+            # Intermediate fusion is different the other two
+            output_dict = {'ego': {}}
+
+            object_bbx_center = []
+            object_bbx_mask = []
+            object_ids = []
+            processed_lidar_list = []
+            image_inputs_list = []
+            # used to record different scenario
+            record_len = []
+            label_dict_list = []
+            lidar_pose_list = []
+            origin_lidar = []
+            lidar_len = []
+            lidar_pose_clean_list = []
+            
+            # heterogeneous
+            lidar_agent_list = []
+
+            # pairwise transformation matrix
+            pairwise_t_matrix_list = []
+
+            # disconet
+            teacher_processed_lidar_list = []
+
+            # image
+            img_front = []
+            img_left = []
+            img_right = []
+            BEV = []
+
+            dict_list = []
+            
+            ### 2022.10.10 single gt ####
+            if self.supervise_single:
+                pos_equal_one_single = []
+                neg_equal_one_single = []
+                targets_single = []
+                object_bbx_center_single = []
+                object_bbx_mask_single = []
+
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                if not online_eval_only:
+                    object_bbx_center.append(ego_dict['object_bbx_center'])
+                    object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                    object_ids.append(ego_dict['object_ids'])
+                else:
+                    object_ids.append(None)
+                lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6]
+                lidar_pose_clean_list.append(ego_dict['lidar_poses_clean'])
+                if self.load_lidar_file:
+                    processed_lidar_list.append(ego_dict['processed_lidar'])
+                if self.load_camera_file:
+                    image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict.
+                
+                record_len.append(ego_dict['cav_num'])
+                label_dict_list.append(ego_dict['label_dict'])
+                pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix'])
+
+                dict_list.append([ego_dict['scene_dict'], ego_dict['frame_id']])
+
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+                    lidar_len.append(ego_dict['lidar_len'])
+                    if len(ego_dict['img_front']) > 0 and len(ego_dict['img_right']) > 0 and len(ego_dict['img_left']) > 0 and len(ego_dict['BEV']) > 0:
+                        img_front.append(ego_dict['img_front'][0])
+                        img_left.append(ego_dict['img_left'][0])
+                        img_right.append(ego_dict['img_right'][0])
+                        BEV.append(ego_dict['BEV'][0])
+
+
+                if self.kd_flag:
+                    teacher_processed_lidar_list.append(ego_dict['teacher_processed_lidar'])
+
+                ### 2022.10.10 single gt ####
+                if self.supervise_single and not online_eval_only:
+                    # unused label
+                    if False:
+                        pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one'])
+                        neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one'])
+                        targets_single.append(ego_dict['single_label_dict_torch']['targets'])
+                    object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch'])
+                    object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch'])
+
+                # heterogeneous
+                if self.heterogeneous:
+                    lidar_agent_list.append(ego_dict['lidar_agent'])
+
+            # convert to numpy, (B, max_num, 7)
+            if not online_eval_only:
+                object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+                object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+            else:
+                object_bbx_center = None
+                object_bbx_mask = None
+
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_lidar_list)
+
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                    for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords'
+                        merged_feature_dict[k] = [v[index] for index in lidar_agent_idx]
+
+                if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0):
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(merged_feature_dict)
+                    output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict})
+
+            if self.load_camera_file:
+                merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat')
+
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    camera_agent = 1 - lidar_agent
+                    camera_agent_idx = camera_agent.nonzero()[0].tolist()
+                    if sum(camera_agent) != 0:
+                        for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ...
+                            merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx])
+                            
+                if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0):
+                    output_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+            
+            record_len = torch.from_numpy(np.array(record_len, dtype=int))
+            lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0))
+            lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0))
+
+            # unused label
+            label_torch_dict = {}
+            if False:
+                label_torch_dict = \
+                    self.post_processor.collate_batch(label_dict_list)
+            # for centerpoint
+            label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                     'object_bbx_mask': object_bbx_mask})
+
+            # (B, max_cav)
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+
+            # add pairwise_t_matrix to label dict
+            label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            label_torch_dict['record_len'] = record_len
+            
+
+            # object id is only used during inference, where batch size is 1.
+            # so here we only get the first element.
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'record_len': record_len,
+                                    'label_dict': label_torch_dict,
+                                    'object_ids': object_ids[0],
+                                    'pairwise_t_matrix': pairwise_t_matrix,
+                                    'lidar_pose_clean': lidar_pose_clean,
+                                    'lidar_pose': lidar_pose,
+                                    'anchor_box': self.anchor_box_torch})
+
+
+            output_dict['ego'].update({'dict_list': dict_list})
+
+            if self.visualize:
+                origin_lidar = torch.from_numpy(np.array(origin_lidar))
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+                lidar_len = np.array(lidar_len)
+                output_dict['ego'].update({'lidar_len': lidar_len})
+                output_dict['ego'].update({'img_front': img_front})
+                output_dict['ego'].update({'img_right': img_right})
+                output_dict['ego'].update({'img_left': img_left})
+                output_dict['ego'].update({'BEV': BEV})
+
+            if self.kd_flag:
+                teacher_processed_lidar_torch_dict = \
+                    self.pre_processor.collate_batch(teacher_processed_lidar_list)
+                output_dict['ego'].update({'teacher_processed_lidar':teacher_processed_lidar_torch_dict})
+
+
+            if self.supervise_single and not online_eval_only:
+                output_dict['ego'].update({
+                    "label_dict_single":{
+                            # "pos_equal_one": torch.cat(pos_equal_one_single, dim=0),
+                            # "neg_equal_one": torch.cat(neg_equal_one_single, dim=0),
+                            # "targets": torch.cat(targets_single, dim=0),
+                            # for centerpoint
+                            "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                            "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                        },
+                    "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                    "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                })
+
+            if self.heterogeneous:
+                output_dict['ego'].update({
+                    "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...]
+                })
+
+            return output_dict
+
+        def collate_batch_test(self, batch, online_eval_only=False):
+
+            self.online_eval_only = online_eval_only
+
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            output_dict = self.collate_batch_train(batch, online_eval_only)
+            if output_dict is None:
+                return None
+
+            # check if anchor box in the batch
+            if batch[0]['ego']['anchor_box'] is not None:
+                output_dict['ego'].update({'anchor_box':
+                    self.anchor_box_torch})
+
+            # save the transformation matrix (4, 4) to ego vehicle
+            # transformation is only used in post process (no use.)
+            # we all predict boxes in ego coord.
+            transformation_matrix_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            transformation_matrix_clean_torch = \
+                torch.from_numpy(np.identity(4)).float()
+
+            output_dict['ego'].update({'transformation_matrix':
+                                        transformation_matrix_torch,
+                                        'transformation_matrix_clean':
+                                        transformation_matrix_clean_torch,})
+
+            output_dict['ego'].update({
+                "sample_idx": batch[0]['ego']['sample_idx'],
+                "cav_id_list": batch[0]['ego']['cav_id_list']
+            })
+
+            return output_dict
+
+
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+
+            output_dict :dict
+                The dictionary containing the output of the model.
+
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = \
+                self.post_processor.post_process(data_dict, output_dict)
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            return pred_box_tensor, pred_score, gt_box_tensor
+
+        def post_process_multiclass(self, data_dict, output_dict, online_eval_only=False):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+
+            output_dict :dict
+                The dictionary containing the output of the model.
+
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+
+            if online_eval_only == False:
+                online_eval_only = self.online_eval_only
+
+            num_class = output_dict['ego']['cls_preds'].shape[1]
+            
+
+            pred_box_tensor_list = []
+            pred_score_list = []
+            gt_box_tensor_list = []
+
+            num_list = [0,1,3]
+
+            for i in range(num_class):
+                data_dict_single = copy.deepcopy(data_dict)
+                output_dict_single = copy.deepcopy(output_dict)
+                if not online_eval_only:
+                    data_dict_single['ego']['object_bbx_center'] = data_dict['ego']['object_bbx_center'][:,i,:,:]
+                    data_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['object_bbx_mask'][:,i,:]
+                    data_dict_single['ego']['object_ids'] = data_dict['ego']['object_ids'][num_list[i]]
+
+                output_dict_single['ego']['cls_preds'] = output_dict['ego']['cls_preds'][:,i:i+1,:,:]
+                output_dict_single['ego']['reg_preds'] = output_dict['ego']['reg_preds_multiclass'][:,i,:,:]
+
+                pred_box_tensor, pred_score = \
+                    self.post_processor.post_process(data_dict_single, output_dict_single)
+
+                if not online_eval_only:
+                    gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict_single)
+                else:
+                    gt_box_tensor = None
+
+                pred_box_tensor_list.append(pred_box_tensor)
+                pred_score_list.append(pred_score)
+                gt_box_tensor_list.append(gt_box_tensor)
+
+            return pred_box_tensor_list, pred_score_list, gt_box_tensor_list
+
+    return EarlymulticlassFusionDataset
+
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_2stage_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_2stage_fusion_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e65f96b42d512b63b5ffb8c2fd5d68f178f1edb
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_2stage_fusion_dataset.py
@@ -0,0 +1,603 @@
+# intermediate fusion dataset
+import random
+import math
+from collections import OrderedDict
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+from opencood.utils.common_utils import merge_features_to_dict
+from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation
+from opencood.utils.pose_utils import add_noise_data_dict
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+
+def getIntermediate2stageFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class Intermediate2stageFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            # intermediate and supervise single
+            self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \
+                                        else False
+            # it is assert to be False but by default it will load single label for 1-stage training.
+            assert self.supervise_single is False 
+
+            self.proj_first = False if 'proj_first' not in params['fusion']['args']\
+                                         else params['fusion']['args']['proj_first']
+
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+
+            self.heterogeneous = False
+            if 'heter' in params:
+                self.heterogeneous = True
+
+        def get_item_single_car(self, selected_cav_base, ego_cav_base):
+            """
+            Process a single CAV's information for the train/test pipeline.
+
+
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+            ego_pose : list, length 6
+                The ego vehicle lidar pose under world coordinate.
+            ego_pose_clean : list, length 6
+                only used for gt box generation
+
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean']
+
+            # calculate the transformation matrix
+            transformation_matrix = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose'],
+                        ego_pose) # T_ego_cav
+            transformation_matrix_clean = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose_clean'],
+                        ego_pose_clean)
+            
+            # lidar
+            if self.load_lidar_file or self.visualize:
+                # process lidar
+                lidar_np = selected_cav_base['lidar_np']
+                lidar_np = shuffle_points(lidar_np)
+                # remove points that hit itself
+                lidar_np = mask_ego_points(lidar_np)
+
+                # no projected lidar
+                no_project_lidar = copy.deepcopy(lidar_np)
+
+                # project the lidar to ego space
+                # x,y,z in ego space
+                projected_lidar = \
+                    box_utils.project_points_by_matrix_torch(lidar_np[:, :3],
+                                                                transformation_matrix)
+                if self.proj_first: # 
+                    lidar_np[:, :3] = projected_lidar
+
+                if self.visualize:
+                    # filter lidar
+                    selected_cav_processed.update({'projected_lidar': projected_lidar})
+                    
+                processed_lidar = self.pre_processor.preprocess(lidar_np)
+                selected_cav_processed.update({'projected_lidar': projected_lidar,
+                                               'no_projected_lidar': no_project_lidar,
+                                               'processed_features': processed_lidar})
+
+            # generate targets label single GT, note the reference pose is itself.
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center(
+                [selected_cav_base], selected_cav_base['params']['lidar_pose']
+            )
+            label_dict = self.post_processor.generate_label(
+                gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+            )
+            selected_cav_processed.update({"object_bbx_center_no_coop": object_bbx_center[object_bbx_mask==1],
+                                           "single_label_dict": label_dict})
+
+            # camera
+            if self.load_camera_file:
+                camera_data_list = selected_cav_base["camera_data"]
+
+                params = selected_cav_base["params"]
+                imgs = []
+                rots = []
+                trans = []
+                intrins = []
+                post_rots = []
+                post_trans = []
+
+                for idx, img in enumerate(camera_data_list):
+                    camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+
+                    intrin = torch.from_numpy(camera_intrinsic)
+                    rot = torch.from_numpy(
+                        camera_to_lidar[:3, :3]
+                    )  # R_wc, we consider world-coord is the lidar-coord
+                    tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+
+                    post_rot = torch.eye(2)
+                    post_tran = torch.zeros(2)
+
+                    img_src = [img]
+
+                    # depth
+                    if self.load_depth_file:
+                        depth_img = selected_cav_base["depth_data"][idx]
+                        img_src.append(depth_img)
+                    else:
+                        depth_img = None
+
+                    # data augmentation
+                    resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                        self.data_aug_conf, self.train
+                    )
+                    img_src, post_rot2, post_tran2 = img_transform(
+                        img_src,
+                        post_rot,
+                        post_tran,
+                        resize=resize,
+                        resize_dims=resize_dims,
+                        crop=crop,
+                        flip=flip,
+                        rotate=rotate,
+                    )
+                    # for convenience, make augmentation matrices 3x3
+                    post_tran = torch.zeros(3)
+                    post_rot = torch.eye(3)
+                    post_tran[:2] = post_tran2
+                    post_rot[:2, :2] = post_rot2
+
+                    # decouple RGB and Depth
+
+                    img_src[0] = normalize_img(img_src[0])
+                    if self.load_depth_file:
+                        img_src[1] = img_to_tensor(img_src[1]) * 255
+
+                    imgs.append(torch.cat(img_src, dim=0))
+                    intrins.append(intrin)
+                    rots.append(rot)
+                    trans.append(tran)
+                    post_rots.append(post_rot)
+                    post_trans.append(post_tran)
+
+                selected_cav_processed.update(
+                    {
+                    "image_inputs": 
+                        {
+                            "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W]
+                            "intrins": torch.stack(intrins),
+                            "rots": torch.stack(rots),
+                            "trans": torch.stack(trans),
+                            "post_rots": torch.stack(post_rots),
+                            "post_trans": torch.stack(post_trans),
+                        }
+                    }
+                )
+
+            # anchor box
+            selected_cav_processed.update({"anchor_box": self.anchor_box})
+
+            # note the reference pose ego
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base],
+                                                        ego_pose_clean)
+
+            selected_cav_processed.update(
+                {
+                    "object_bbx_center": object_bbx_center[object_bbx_mask == 1],
+                    "object_bbx_mask": object_bbx_mask,
+                    "object_ids": object_ids,
+                    'transformation_matrix': transformation_matrix,
+                    'transformation_matrix_clean': transformation_matrix_clean
+                }
+            )
+
+
+            return selected_cav_processed
+
+        def __getitem__(self, idx):
+            base_data_dict = self.retrieve_base_data(idx)
+            base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting'])
+
+            processed_data_dict = OrderedDict()
+            processed_data_dict['ego'] = {}
+
+            ego_id = -1
+            ego_lidar_pose = []
+            ego_cav_base = None
+
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_cav_base = cav_content
+                    break
+                
+            assert cav_id == list(base_data_dict.keys())[
+                0], "The first element in the OrderedDict must be ego"
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+
+            agents_image_inputs = []
+            processed_features = []
+            object_stack = []
+            object_id_stack = []
+            single_label_list = []
+            too_far = []
+            lidar_pose_list = []
+            lidar_pose_clean_list = []
+            cav_id_list = []
+
+            projected_lidar_stack = []
+            no_projected_lidar_stack = []
+
+            vsa_lidar_stack = []
+
+            if self.visualize:
+                projected_lidar_stack = []
+
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                # check if the cav is within the communication range with ego
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+
+                # if distance is too far, we will just skip this agent
+                if distance > self.params['comm_range']:
+                    too_far.append(cav_id)
+                    continue
+
+                lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean'])
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose
+                cav_id_list.append(cav_id)   
+
+            for cav_id in too_far:
+                base_data_dict.pop(cav_id)
+
+            
+            pairwise_t_matrix = \
+                get_pairwise_transformation(base_data_dict,
+                                                self.max_cav,
+                                                self.proj_first)
+
+            lidar_poses = np.array(lidar_pose_list).reshape(-1, 6)  # [N_cav, 6]
+            lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6)  # [N_cav, 6]
+            
+            # merge preprocessed features from different cavs into the same dict
+            cav_num = len(cav_id_list)
+
+            # heterogeneous 
+            if self.heterogeneous:
+                lidar_agent, camera_agent = self.selector.select_agent(idx)
+                lidar_agent = lidar_agent[:cav_num]
+                processed_data_dict['ego'].update({"lidar_agent": lidar_agent})
+
+            
+            for _i, cav_id in enumerate(cav_id_list):
+                selected_cav_base = base_data_dict[cav_id]
+
+                # dynamic object center generator! for heterogeneous input.
+                if (not self.visualize) and self.heterogeneous and lidar_agent[_i]:
+                    self.generate_object_center = self.generate_object_center_lidar
+                elif (not self.visualize) and self.heterogeneous and (not lidar_agent[_i]):
+                    self.generate_object_center = self.generate_object_center_camera
+
+                selected_cav_processed = self.get_item_single_car(
+                    selected_cav_base,
+                    ego_cav_base)
+                
+                object_stack.append(selected_cav_processed['object_bbx_center'])
+                object_id_stack += selected_cav_processed['object_ids']
+
+                if self.load_lidar_file:
+                    processed_features.append(
+                        selected_cav_processed['processed_features'])
+                    if self.proj_first:
+                        vsa_lidar_stack.append(selected_cav_processed['projected_lidar'])
+                    else:
+                        vsa_lidar_stack.append(selected_cav_processed['no_projected_lidar'])
+
+                if self.load_camera_file:
+                    agents_image_inputs.append(
+                        selected_cav_processed['image_inputs'])
+
+                if self.visualize:
+                    projected_lidar_stack.append(
+                        selected_cav_processed['projected_lidar'])
+                
+                single_label_list.append(selected_cav_processed['single_label_dict'])
+
+            # generate single view label (no coop) label
+            label_dict_no_coop = single_label_list # [{cav1_label}, {cav2_label}...]
+
+            
+            # exclude all repetitive objects    
+            unique_indices = \
+                [object_id_stack.index(x) for x in set(object_id_stack)]
+            object_stack = np.vstack(object_stack)
+            object_stack = object_stack[unique_indices]
+
+            # make sure bounding boxes across all frames have the same number
+            object_bbx_center = \
+                np.zeros((self.params['postprocess']['max_num'], 7))
+            mask = np.zeros(self.params['postprocess']['max_num'])
+            object_bbx_center[:object_stack.shape[0], :] = object_stack
+            mask[:object_stack.shape[0]] = 1
+            
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_features)
+                processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict,
+                                                   'vsa_lidar': vsa_lidar_stack})
+            if self.load_camera_file:
+                merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack')
+                processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+
+            # generate targets label
+            label_dict_coop = \
+                self.post_processor.generate_label(
+                    gt_box_center=object_bbx_center,
+                    anchors=self.anchor_box,
+                    mask=mask)
+
+            label_dict = {
+                'stage1': label_dict_no_coop, # list 
+                'stage2': label_dict_coop # dict
+            }
+
+            processed_data_dict['ego'].update(
+                {'object_bbx_center': object_bbx_center,
+                'object_bbx_mask': mask,
+                'object_ids': [object_id_stack[i] for i in unique_indices],
+                'anchor_box': self.anchor_box,
+                'label_dict': label_dict,
+                'cav_num': cav_num,
+                'pairwise_t_matrix': pairwise_t_matrix,
+                'lidar_poses_clean': lidar_poses_clean,
+                'lidar_poses': lidar_poses})
+
+
+            if self.visualize:
+                processed_data_dict['ego'].update({'origin_lidar':
+                    np.vstack(
+                        projected_lidar_stack)})
+
+
+            processed_data_dict['ego'].update({'sample_idx': idx,
+                                                'cav_id_list': cav_id_list})
+
+            return processed_data_dict
+
+
+        def collate_batch_train(self, batch):
+            # Intermediate fusion is different the other two
+            output_dict = {'ego': {}}
+
+            object_bbx_center = []
+            object_bbx_mask = []
+            object_ids = []
+            processed_lidar_list = []
+            image_inputs_list = []
+            # used to record different scenario
+            record_len = []
+            label_dict_no_coop_batch_list = []
+            label_dict_list = []
+            lidar_pose_list = []
+            origin_lidar = []
+            vsa_lidar = []
+            lidar_pose_clean_list = []
+            
+            # pairwise transformation matrix
+            pairwise_t_matrix_list = []
+
+            # heterogeneous
+            lidar_agent_list = []
+            
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                object_bbx_center.append(ego_dict['object_bbx_center'])
+                object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                object_ids.append(ego_dict['object_ids'])
+                lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6]
+                lidar_pose_clean_list.append(ego_dict['lidar_poses_clean'])
+                if self.load_lidar_file:
+                    processed_lidar_list.append(ego_dict['processed_lidar'])
+                    vsa_lidar.append(ego_dict['vsa_lidar'])
+                if self.load_camera_file:
+                    image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict.
+                
+                record_len.append(ego_dict['cav_num'])
+                label_dict_no_coop_batch_list.append(ego_dict['label_dict']['stage1'])
+                label_dict_list.append(ego_dict['label_dict']['stage2'])
+                
+                pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix'])
+
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+                
+                # heterogeneous
+                if self.heterogeneous:
+                    lidar_agent_list.append(ego_dict['lidar_agent'])
+
+
+            # convert to numpy, (B, max_num, 7)
+            object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+            object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+
+            # example: {'voxel_features':[np.array([1,2,3]]),
+            # np.array([3,5,6]), ...]}
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_lidar_list)
+                # [sum(record_len), C, H, W]
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                    for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords'
+                        merged_feature_dict[k] = [v[index] for index in lidar_agent_idx]
+
+                if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0):
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(merged_feature_dict)
+                    output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict})
+
+            if self.load_camera_file:
+                merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat')
+                
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    camera_agent = 1 - lidar_agent
+                    camera_agent_idx = camera_agent.nonzero()[0].tolist()
+                    if sum(camera_agent) != 0:
+                        for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ...
+                            merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx])
+                            
+                if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0):
+                    output_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+            
+            record_len = torch.from_numpy(np.array(record_len, dtype=int))
+            lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0))
+            lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0))
+            label_dict_no_coop_cavs_batch_list = [label_dict for label_dict_cavs_list in
+                                    label_dict_no_coop_batch_list for label_dict in
+                                    label_dict_cavs_list]
+            label_no_coop_torch_dict = \
+                                    self.post_processor.collate_batch(label_dict_no_coop_cavs_batch_list)
+
+            label_torch_dict = \
+                self.post_processor.collate_batch(label_dict_list)
+
+            # (B, max_cav)
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+
+            # add pairwise_t_matrix to label dict
+            label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            label_torch_dict['record_len'] = record_len
+
+            # object id is only used during inference, where batch size is 1.
+            # so here we only get the first element.
+            output_dict['ego'].update({ 'object_bbx_center': object_bbx_center,
+                                        'object_bbx_mask': object_bbx_mask,
+                                        'record_len': record_len,
+                                        'label_dict': {
+                                            'stage1': label_no_coop_torch_dict,
+                                            'stage2': label_torch_dict,
+                                        },
+                                        'object_ids': object_ids[0],
+                                        'pairwise_t_matrix': pairwise_t_matrix,
+                                        'lidar_pose_clean': lidar_pose_clean,
+                                        'lidar_pose': lidar_pose,
+                                        'proj_first': self.proj_first,
+                                        'anchor_box': self.anchor_box_torch})
+
+            if self.load_lidar_file:
+                coords = []
+                idx = 0
+                for b in range(len(batch)):
+                    for points in vsa_lidar[b]:
+                        assert len(points) != 0
+                        coor_pad = np.pad(points, ((0, 0), (1, 0)),
+                                        mode="constant", constant_values=idx)
+                        coords.append(coor_pad)
+                        idx += 1
+                origin_lidar_for_vsa = np.concatenate(coords, axis=0)
+                origin_lidar_for_vsa = torch.from_numpy(origin_lidar_for_vsa)
+                output_dict['ego'].update({'origin_lidar_for_vsa': origin_lidar_for_vsa})
+
+            if self.visualize:
+                origin_lidar = \
+                    np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                origin_lidar = torch.from_numpy(origin_lidar)
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+
+            if self.heterogeneous:
+                output_dict['ego'].update({
+                    "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...]
+                })
+
+            return output_dict
+
+        def collate_batch_test(self, batch):
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            output_dict = self.collate_batch_train(batch)
+            if output_dict is None:
+                return None
+
+            # check if anchor box in the batch
+            output_dict['ego'].update({'anchor_box': self.anchor_box_torch})
+
+            # save the transformation matrix (4, 4) to ego vehicle
+            # transformation is only used in post process (no use.)
+            # we all predict boxes in ego coord.
+            transformation_matrix_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            transformation_matrix_clean_torch = \
+                torch.from_numpy(np.identity(4)).float()
+
+            output_dict['ego'].update({'transformation_matrix':
+                                        transformation_matrix_torch,
+                                        'transformation_matrix_clean':
+                                        transformation_matrix_clean_torch,})
+
+            output_dict['ego'].update({
+                "sample_idx": batch[0]['ego']['sample_idx'],
+                "cav_id_list": batch[0]['ego']['cav_id_list']
+            })
+
+            return output_dict
+
+
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+
+            output_dict :dict
+                The dictionary containing the output of the model.
+
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = \
+                self.post_processor.post_process(data_dict, output_dict)
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            return pred_box_tensor, pred_score, gt_box_tensor
+
+
+    return Intermediate2stageFusionDataset
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_fusion_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e720e16ea91b61435f2bb147013b44b6e5bf94d
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_fusion_dataset.py
@@ -0,0 +1,679 @@
+# intermediate fusion dataset
+import random
+import math
+from collections import OrderedDict
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+from opencood.utils.common_utils import merge_features_to_dict
+from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation
+from opencood.utils.pose_utils import add_noise_data_dict
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+from opencood.utils.common_utils import read_json
+
+
+def getIntermediateFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class IntermediateFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            # intermediate and supervise single
+            self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \
+                                        else False
+            self.proj_first = False if 'proj_first' not in params['fusion']['args']\
+                                         else params['fusion']['args']['proj_first']
+
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+
+            self.heterogeneous = False
+            if 'heter' in params:
+                self.heterogeneous = True
+
+            self.kd_flag = params.get('kd_flag', False)
+
+            self.box_align = False
+            if "box_align" in params:
+                self.box_align = True
+                self.stage1_result_path = params['box_align']['train_result'] if train else params['box_align']['val_result']
+                self.stage1_result = read_json(self.stage1_result_path)
+                self.box_align_args = params['box_align']['args']
+                
+
+
+
+        def get_item_single_car(self, selected_cav_base, ego_cav_base):
+            """
+            Process a single CAV's information for the train/test pipeline.
+
+
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+            ego_pose : list, length 6
+                The ego vehicle lidar pose under world coordinate.
+            ego_pose_clean : list, length 6
+                only used for gt box generation
+
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean']
+
+            # calculate the transformation matrix
+            transformation_matrix = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose'],
+                        ego_pose) # T_ego_cav
+            transformation_matrix_clean = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose_clean'],
+                        ego_pose_clean)
+            
+            # lidar
+            if self.load_lidar_file or self.visualize:
+                # process lidar
+                lidar_np = selected_cav_base['lidar_np']
+                lidar_np = shuffle_points(lidar_np)
+                # remove points that hit itself
+                lidar_np = mask_ego_points(lidar_np)
+                # project the lidar to ego space
+                # x,y,z in ego space
+                projected_lidar = \
+                    box_utils.project_points_by_matrix_torch(lidar_np[:, :3],
+                                                                transformation_matrix)
+                if self.proj_first:
+                    lidar_np[:, :3] = projected_lidar
+
+                if self.visualize:
+                    # filter lidar
+                    selected_cav_processed.update({'projected_lidar': projected_lidar})
+
+                if self.kd_flag:
+                    lidar_proj_np = copy.deepcopy(lidar_np)
+                    lidar_proj_np[:,:3] = projected_lidar
+
+                    selected_cav_processed.update({'projected_lidar': lidar_proj_np})
+
+                processed_lidar = self.pre_processor.preprocess(lidar_np)
+                selected_cav_processed.update({'processed_features': processed_lidar})
+
+            # generate targets label single GT, note the reference pose is itself.
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center(
+                [selected_cav_base], selected_cav_base['params']['lidar_pose']
+            )
+            label_dict = self.post_processor.generate_label(
+                gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+            )
+            selected_cav_processed.update({
+                                "single_label_dict": label_dict,
+                                "single_object_bbx_center": object_bbx_center,
+                                "single_object_bbx_mask": object_bbx_mask})
+
+            # camera
+            if self.load_camera_file:
+                camera_data_list = selected_cav_base["camera_data"]
+
+                params = selected_cav_base["params"]
+                imgs = []
+                rots = []
+                trans = []
+                intrins = []
+                extrinsics = []
+                post_rots = []
+                post_trans = []
+
+                for idx, img in enumerate(camera_data_list):
+                    camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+
+                    intrin = torch.from_numpy(camera_intrinsic)
+                    rot = torch.from_numpy(
+                        camera_to_lidar[:3, :3]
+                    )  # R_wc, we consider world-coord is the lidar-coord
+                    tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+
+                    post_rot = torch.eye(2)
+                    post_tran = torch.zeros(2)
+
+                    img_src = [img]
+
+                    # depth
+                    if self.load_depth_file:
+                        depth_img = selected_cav_base["depth_data"][idx]
+                        img_src.append(depth_img)
+                    else:
+                        depth_img = None
+
+                    # data augmentation
+                    resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                        self.data_aug_conf, self.train
+                    )
+                    img_src, post_rot2, post_tran2 = img_transform(
+                        img_src,
+                        post_rot,
+                        post_tran,
+                        resize=resize,
+                        resize_dims=resize_dims,
+                        crop=crop,
+                        flip=flip,
+                        rotate=rotate,
+                    )
+                    # for convenience, make augmentation matrices 3x3
+                    post_tran = torch.zeros(3)
+                    post_rot = torch.eye(3)
+                    post_tran[:2] = post_tran2
+                    post_rot[:2, :2] = post_rot2
+
+                    # decouple RGB and Depth
+
+                    img_src[0] = normalize_img(img_src[0])
+                    if self.load_depth_file:
+                        img_src[1] = img_to_tensor(img_src[1]) * 255
+
+                    imgs.append(torch.cat(img_src, dim=0))
+                    intrins.append(intrin)
+                    extrinsics.append(torch.from_numpy(camera_to_lidar))
+                    rots.append(rot)
+                    trans.append(tran)
+                    post_rots.append(post_rot)
+                    post_trans.append(post_tran)
+                    
+
+                selected_cav_processed.update(
+                    {
+                    "image_inputs": 
+                        {
+                            "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W]
+                            "intrins": torch.stack(intrins),
+                            "extrinsics": torch.stack(extrinsics),
+                            "rots": torch.stack(rots),
+                            "trans": torch.stack(trans),
+                            "post_rots": torch.stack(post_rots),
+                            "post_trans": torch.stack(post_trans),
+                        }
+                    }
+                )
+
+            # anchor box
+            selected_cav_processed.update({"anchor_box": self.anchor_box})
+
+            # note the reference pose ego
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base],
+                                                        ego_pose_clean)
+
+            selected_cav_processed.update(
+                {
+                    "object_bbx_center": object_bbx_center[object_bbx_mask == 1],
+                    "object_bbx_mask": object_bbx_mask,
+                    "object_ids": object_ids,
+                    'transformation_matrix': transformation_matrix,
+                    'transformation_matrix_clean': transformation_matrix_clean
+                }
+            )
+
+
+            return selected_cav_processed
+
+        def __getitem__(self, idx):
+            base_data_dict = self.retrieve_base_data(idx)
+            base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting'])
+
+            processed_data_dict = OrderedDict()
+            processed_data_dict['ego'] = {}
+
+            ego_id = -1
+            ego_lidar_pose = []
+            ego_cav_base = None
+
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_cav_base = cav_content
+                    break
+                
+            assert cav_id == list(base_data_dict.keys())[
+                0], "The first element in the OrderedDict must be ego"
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+
+            agents_image_inputs = []
+            processed_features = []
+            object_stack = []
+            object_id_stack = []
+            single_label_list = []
+            single_object_bbx_center_list = []
+            single_object_bbx_mask_list = []
+            too_far = []
+            lidar_pose_list = []
+            lidar_pose_clean_list = []
+            cav_id_list = []
+            projected_lidar_clean_list = [] # disconet
+
+            if self.visualize or self.kd_flag:
+                projected_lidar_stack = []
+
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                # check if the cav is within the communication range with ego
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+
+                # if distance is too far, we will just skip this agent
+                if distance > self.params['comm_range']:
+                    too_far.append(cav_id)
+                    continue
+
+                lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean'])
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose
+                cav_id_list.append(cav_id)   
+
+            for cav_id in too_far:
+                base_data_dict.pop(cav_id)
+
+            ########## Updated by Yifan Lu 2022.1.26 ############
+            # box align to correct pose.
+            # stage1_content contains all agent. Even out of comm range.
+            if self.box_align and str(idx) in self.stage1_result.keys():
+                from opencood.models.sub_modules.box_align_v2 import box_alignment_relative_sample_np
+                stage1_content = self.stage1_result[str(idx)]
+                if stage1_content is not None:
+                    all_agent_id_list = stage1_content['cav_id_list'] # include those out of range
+                    all_agent_corners_list = stage1_content['pred_corner3d_np_list']
+                    all_agent_uncertainty_list = stage1_content['uncertainty_np_list']
+
+                    cur_agent_id_list = cav_id_list
+                    cur_agent_pose = [base_data_dict[cav_id]['params']['lidar_pose'] for cav_id in cav_id_list]
+                    cur_agnet_pose = np.array(cur_agent_pose)
+                    cur_agent_in_all_agent = [all_agent_id_list.index(cur_agent) for cur_agent in cur_agent_id_list] # indexing current agent in `all_agent_id_list`
+
+                    pred_corners_list = [np.array(all_agent_corners_list[cur_in_all_ind], dtype=np.float64) 
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+                    uncertainty_list = [np.array(all_agent_uncertainty_list[cur_in_all_ind], dtype=np.float64) 
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+
+                    if sum([len(pred_corners) for pred_corners in pred_corners_list]) != 0:
+                        refined_pose = box_alignment_relative_sample_np(pred_corners_list,
+                                                                        cur_agnet_pose, 
+                                                                        uncertainty_list=uncertainty_list, 
+                                                                        **self.box_align_args)
+                        cur_agnet_pose[:,[0,1,4]] = refined_pose 
+
+                        for i, cav_id in enumerate(cav_id_list):
+                            lidar_pose_list[i] = cur_agnet_pose[i].tolist()
+                            base_data_dict[cav_id]['params']['lidar_pose'] = cur_agnet_pose[i].tolist()
+
+
+
+            pairwise_t_matrix = \
+                get_pairwise_transformation(base_data_dict,
+                                                self.max_cav,
+                                                self.proj_first)
+
+            lidar_poses = np.array(lidar_pose_list).reshape(-1, 6)  # [N_cav, 6]
+            lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6)  # [N_cav, 6]
+            
+            # merge preprocessed features from different cavs into the same dict
+            cav_num = len(cav_id_list)
+            
+            # heterogeneous 
+            if self.heterogeneous:
+                lidar_agent, camera_agent = self.selector.select_agent(idx)
+                lidar_agent = lidar_agent[:cav_num]
+                processed_data_dict['ego'].update({"lidar_agent": lidar_agent})
+            
+            for _i, cav_id in enumerate(cav_id_list):
+                selected_cav_base = base_data_dict[cav_id]
+
+                # dynamic object center generator! for heterogeneous input
+                if (not self.visualize) and self.heterogeneous and lidar_agent[_i]:
+                    self.generate_object_center = self.generate_object_center_lidar
+                elif (not self.visualize) and self.heterogeneous and (not lidar_agent[_i]):
+                    self.generate_object_center = self.generate_object_center_camera
+
+                selected_cav_processed = self.get_item_single_car(
+                    selected_cav_base,
+                    ego_cav_base)
+                    
+                object_stack.append(selected_cav_processed['object_bbx_center'])
+                object_id_stack += selected_cav_processed['object_ids']
+                if self.load_lidar_file:
+                    processed_features.append(
+                        selected_cav_processed['processed_features'])
+                if self.load_camera_file:
+                    agents_image_inputs.append(
+                        selected_cav_processed['image_inputs'])
+
+                if self.visualize or self.kd_flag:
+                    projected_lidar_stack.append(
+                        selected_cav_processed['projected_lidar'])
+                
+                if self.supervise_single:
+                    single_label_list.append(selected_cav_processed['single_label_dict'])
+                    single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center'])
+                    single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask'])
+
+            # generate single view GT label
+            if self.supervise_single:
+                single_label_dicts = self.post_processor.collate_batch(single_label_list)
+                single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list))
+                single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list))
+                processed_data_dict['ego'].update({
+                    "single_label_dict_torch": single_label_dicts,
+                    "single_object_bbx_center_torch": single_object_bbx_center,
+                    "single_object_bbx_mask_torch": single_object_bbx_mask,
+                    })
+
+            if self.kd_flag:
+                stack_lidar_np = np.vstack(projected_lidar_stack)
+                stack_lidar_np = mask_points_by_range(stack_lidar_np,
+                                            self.params['preprocess'][
+                                                'cav_lidar_range'])
+                stack_feature_processed = self.pre_processor.preprocess(stack_lidar_np)
+                processed_data_dict['ego'].update({'teacher_processed_lidar':
+                stack_feature_processed})
+
+            
+            # exclude all repetitive objects    
+            unique_indices = \
+                [object_id_stack.index(x) for x in set(object_id_stack)]
+            object_stack = np.vstack(object_stack)
+            object_stack = object_stack[unique_indices]
+
+            # make sure bounding boxes across all frames have the same number
+            object_bbx_center = \
+                np.zeros((self.params['postprocess']['max_num'], 7))
+            mask = np.zeros(self.params['postprocess']['max_num'])
+            object_bbx_center[:object_stack.shape[0], :] = object_stack
+            mask[:object_stack.shape[0]] = 1
+            
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_features)
+                processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict})
+            if self.load_camera_file:
+                merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack')
+                processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+
+
+            # generate targets label
+            label_dict = \
+                self.post_processor.generate_label(
+                    gt_box_center=object_bbx_center,
+                    anchors=self.anchor_box,
+                    mask=mask)
+
+            processed_data_dict['ego'].update(
+                {'object_bbx_center': object_bbx_center,
+                'object_bbx_mask': mask,
+                'object_ids': [object_id_stack[i] for i in unique_indices],
+                'anchor_box': self.anchor_box,
+                'label_dict': label_dict,
+                'cav_num': cav_num,
+                'pairwise_t_matrix': pairwise_t_matrix,
+                'lidar_poses_clean': lidar_poses_clean,
+                'lidar_poses': lidar_poses})
+
+
+            if self.visualize:
+                processed_data_dict['ego'].update({'origin_lidar':
+                    np.vstack(
+                        projected_lidar_stack)})
+
+
+            processed_data_dict['ego'].update({'sample_idx': idx,
+                                                'cav_id_list': cav_id_list})
+
+            return processed_data_dict
+
+
+        def collate_batch_train(self, batch):
+            # Intermediate fusion is different the other two
+            output_dict = {'ego': {}}
+
+            object_bbx_center = []
+            object_bbx_mask = []
+            object_ids = []
+            processed_lidar_list = []
+            image_inputs_list = []
+            # used to record different scenario
+            record_len = []
+            label_dict_list = []
+            lidar_pose_list = []
+            origin_lidar = []
+            lidar_pose_clean_list = []
+            
+            # heterogeneous
+            lidar_agent_list = []
+
+            # pairwise transformation matrix
+            pairwise_t_matrix_list = []
+
+            # disconet
+            teacher_processed_lidar_list = []
+            
+            ### 2022.10.10 single gt ####
+            if self.supervise_single:
+                pos_equal_one_single = []
+                neg_equal_one_single = []
+                targets_single = []
+                object_bbx_center_single = []
+                object_bbx_mask_single = []
+
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                object_bbx_center.append(ego_dict['object_bbx_center'])
+                object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                object_ids.append(ego_dict['object_ids'])
+                lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6]
+                lidar_pose_clean_list.append(ego_dict['lidar_poses_clean'])
+                if self.load_lidar_file:
+                    processed_lidar_list.append(ego_dict['processed_lidar'])
+                if self.load_camera_file:
+                    image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict.
+                
+                record_len.append(ego_dict['cav_num'])
+                label_dict_list.append(ego_dict['label_dict'])
+                pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix'])
+
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+
+                if self.kd_flag:
+                    teacher_processed_lidar_list.append(ego_dict['teacher_processed_lidar'])
+
+                ### 2022.10.10 single gt ####
+                if self.supervise_single:
+                    pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one'])
+                    neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one'])
+                    targets_single.append(ego_dict['single_label_dict_torch']['targets'])
+                    object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch'])
+                    object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch'])
+
+                # heterogeneous
+                if self.heterogeneous:
+                    lidar_agent_list.append(ego_dict['lidar_agent'])
+
+            # convert to numpy, (B, max_num, 7)
+            object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+            object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_lidar_list)
+
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                    for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords'
+                        merged_feature_dict[k] = [v[index] for index in lidar_agent_idx]
+
+                if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0):
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(merged_feature_dict)
+                    output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict})
+
+            if self.load_camera_file:
+                merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat')
+
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    camera_agent = 1 - lidar_agent
+                    camera_agent_idx = camera_agent.nonzero()[0].tolist()
+                    if sum(camera_agent) != 0:
+                        for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ...
+                            merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx])
+                            
+                if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0):
+                    output_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+            
+            record_len = torch.from_numpy(np.array(record_len, dtype=int))
+            lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0))
+            lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0))
+            label_torch_dict = \
+                self.post_processor.collate_batch(label_dict_list)
+
+            # for centerpoint
+            label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                     'object_bbx_mask': object_bbx_mask})
+
+            # (B, max_cav)
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+
+            # add pairwise_t_matrix to label dict
+            label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            label_torch_dict['record_len'] = record_len
+            
+
+            # object id is only used during inference, where batch size is 1.
+            # so here we only get the first element.
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'record_len': record_len,
+                                    'label_dict': label_torch_dict,
+                                    'object_ids': object_ids[0],
+                                    'pairwise_t_matrix': pairwise_t_matrix,
+                                    'lidar_pose_clean': lidar_pose_clean,
+                                    'lidar_pose': lidar_pose,
+                                    'anchor_box': self.anchor_box_torch})
+
+
+            if self.visualize:
+                origin_lidar = \
+                    np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                origin_lidar = torch.from_numpy(origin_lidar)
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+
+            if self.kd_flag:
+                teacher_processed_lidar_torch_dict = \
+                    self.pre_processor.collate_batch(teacher_processed_lidar_list)
+                output_dict['ego'].update({'teacher_processed_lidar':teacher_processed_lidar_torch_dict})
+
+
+            if self.supervise_single:
+                output_dict['ego'].update({
+                    "label_dict_single":{
+                            "pos_equal_one": torch.cat(pos_equal_one_single, dim=0),
+                            "neg_equal_one": torch.cat(neg_equal_one_single, dim=0),
+                            "targets": torch.cat(targets_single, dim=0),
+                            # for centerpoint
+                            "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                            "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                        },
+                    "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                    "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                })
+
+            if self.heterogeneous:
+                output_dict['ego'].update({
+                    "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...]
+                })
+
+            return output_dict
+
+        def collate_batch_test(self, batch):
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            output_dict = self.collate_batch_train(batch)
+            if output_dict is None:
+                return None
+
+            # check if anchor box in the batch
+            if batch[0]['ego']['anchor_box'] is not None:
+                output_dict['ego'].update({'anchor_box':
+                    self.anchor_box_torch})
+
+            # save the transformation matrix (4, 4) to ego vehicle
+            # transformation is only used in post process (no use.)
+            # we all predict boxes in ego coord.
+            transformation_matrix_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            transformation_matrix_clean_torch = \
+                torch.from_numpy(np.identity(4)).float()
+
+            output_dict['ego'].update({'transformation_matrix':
+                                        transformation_matrix_torch,
+                                        'transformation_matrix_clean':
+                                        transformation_matrix_clean_torch,})
+
+            output_dict['ego'].update({
+                "sample_idx": batch[0]['ego']['sample_idx'],
+                "cav_id_list": batch[0]['ego']['cav_id_list']
+            })
+
+            return output_dict
+
+
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+
+            output_dict :dict
+                The dictionary containing the output of the model.
+
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = \
+                self.post_processor.post_process(data_dict, output_dict)
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            return pred_box_tensor, pred_score, gt_box_tensor
+
+
+    return IntermediateFusionDataset
+
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_heter_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_heter_fusion_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4796807cff709d31cba726db34207962addb417
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_heter_fusion_dataset.py
@@ -0,0 +1,752 @@
+'''
+intermediate heter fusion dataset
+
+Note that for DAIR-V2X dataset,
+Each agent should retrieve the objects itself, and merge them by iou, 
+instead of using the cooperative label.
+''' 
+
+import random
+import math
+from collections import OrderedDict
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+from opencood.utils.common_utils import merge_features_to_dict, compute_iou, convert_format
+from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation
+from opencood.utils.pose_utils import add_noise_data_dict
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+from opencood.utils.common_utils import read_json
+from opencood.utils.heter_utils import Adaptor
+
+
+def getIntermediateheterFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class IntermediateheterFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            # intermediate and supervise single
+            self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \
+                                        else False
+            self.proj_first = False if 'proj_first' not in params['fusion']['args']\
+                                         else params['fusion']['args']['proj_first']
+
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+
+            self.heterogeneous = True
+            self.modality_assignment = read_json(params['heter']['assignment_path'])
+            self.ego_modality = params['heter']['ego_modality'] # "m1" or "m1&m2" or "m3"
+
+            self.modality_name_list = list(params['heter']['modality_setting'].keys())
+            self.sensor_type_dict = OrderedDict()
+
+            lidar_channels_dict = params['heter'].get('lidar_channels_dict', OrderedDict())
+            mapping_dict = params['heter']['mapping_dict']
+            cav_preference = params['heter'].get("cav_preference", None)
+
+            self.adaptor = Adaptor(self.ego_modality,
+                                   self.modality_name_list,
+                                   self.modality_assignment,
+                                   lidar_channels_dict,
+                                   mapping_dict,
+                                   cav_preference,
+                                   train)
+
+            for modality_name, modal_setting in params['heter']['modality_setting'].items():
+                self.sensor_type_dict[modality_name] = modal_setting['sensor_type']
+                if modal_setting['sensor_type'] == 'lidar':
+                    setattr(self, f"pre_processor_{modality_name}", build_preprocessor(modal_setting['preprocess'], train))
+
+                elif modal_setting['sensor_type'] == 'camera':
+                    setattr(self, f"data_aug_conf_{modality_name}", modal_setting['data_aug_conf'])
+
+                else:
+                    raise("Not support this type of sensor")
+
+            self.reinitialize()
+                
+
+            self.kd_flag = params.get('kd_flag', False)
+
+            self.box_align = False
+            if "box_align" in params:
+                self.box_align = True
+                self.stage1_result_path = params['box_align']['train_result'] if train else params['box_align']['val_result']
+                self.stage1_result = read_json(self.stage1_result_path)
+                self.box_align_args = params['box_align']['args']
+                
+
+
+        def get_item_single_car(self, selected_cav_base, ego_cav_base):
+            """
+            Process a single CAV's information for the train/test pipeline.
+
+
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+            ego_pose : list, length 6
+                The ego vehicle lidar pose under world coordinate.
+            ego_pose_clean : list, length 6
+                only used for gt box generation
+
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean']
+
+            # calculate the transformation matrix
+            transformation_matrix = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose'],
+                        ego_pose) # T_ego_cav
+            transformation_matrix_clean = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose_clean'],
+                        ego_pose_clean)
+            
+            modality_name = selected_cav_base['modality_name']
+            sensor_type = self.sensor_type_dict[modality_name]
+
+            # lidar
+            if sensor_type == "lidar" or self.visualize:
+                # process lidar
+                lidar_np = selected_cav_base['lidar_np']
+                lidar_np = shuffle_points(lidar_np)
+                # remove points that hit itself
+                lidar_np = mask_ego_points(lidar_np)
+                # project the lidar to ego space
+                # x,y,z in ego space
+                projected_lidar = \
+                    box_utils.project_points_by_matrix_torch(lidar_np[:, :3],
+                                                                transformation_matrix)
+                if self.proj_first:
+                    lidar_np[:, :3] = projected_lidar
+
+                if self.visualize:
+                    # filter lidar
+                    selected_cav_processed.update({'projected_lidar': projected_lidar})
+
+                if self.kd_flag:
+                    lidar_proj_np = copy.deepcopy(lidar_np)
+                    lidar_proj_np[:,:3] = projected_lidar
+
+                    selected_cav_processed.update({'projected_lidar': lidar_proj_np})
+
+                if sensor_type == "lidar":
+                    processed_lidar = eval(f"self.pre_processor_{modality_name}").preprocess(lidar_np)
+                    selected_cav_processed.update({f'processed_features_{modality_name}': processed_lidar})
+
+            # generate targets label single GT, note the reference pose is itself.
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center(
+                [selected_cav_base], selected_cav_base['params']['lidar_pose']
+            )
+            label_dict = self.post_processor.generate_label(
+                gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+            )
+            selected_cav_processed.update({
+                                "single_label_dict": label_dict,
+                                "single_object_bbx_center": object_bbx_center,
+                                "single_object_bbx_mask": object_bbx_mask})
+
+            # camera
+            if sensor_type == "camera":
+                camera_data_list = selected_cav_base["camera_data"]
+                params = selected_cav_base["params"]
+                imgs = []
+                rots = []
+                trans = []
+                intrins = []
+                extrinsics = []
+                post_rots = []
+                post_trans = []
+
+                for idx, img in enumerate(camera_data_list):
+                    camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+
+                    intrin = torch.from_numpy(camera_intrinsic)
+                    rot = torch.from_numpy(
+                        camera_to_lidar[:3, :3]
+                    )  # R_wc, we consider world-coord is the lidar-coord
+                    tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+
+                    post_rot = torch.eye(2)
+                    post_tran = torch.zeros(2)
+
+                    img_src = [img]
+
+                    # depth
+                    if self.load_depth_file:
+                        depth_img = selected_cav_base["depth_data"][idx]
+                        img_src.append(depth_img)
+                    else:
+                        depth_img = None
+
+                    # data augmentation
+                    resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                        eval(f"self.data_aug_conf_{modality_name}"), self.train
+                    )
+                    img_src, post_rot2, post_tran2 = img_transform(
+                        img_src,
+                        post_rot,
+                        post_tran,
+                        resize=resize,
+                        resize_dims=resize_dims,
+                        crop=crop,
+                        flip=flip,
+                        rotate=rotate,
+                    )
+                    # for convenience, make augmentation matrices 3x3
+                    post_tran = torch.zeros(3)
+                    post_rot = torch.eye(3)
+                    post_tran[:2] = post_tran2
+                    post_rot[:2, :2] = post_rot2
+
+                    # decouple RGB and Depth
+
+                    img_src[0] = normalize_img(img_src[0])
+                    if self.load_depth_file:
+                        img_src[1] = img_to_tensor(img_src[1]) * 255
+
+                    imgs.append(torch.cat(img_src, dim=0))
+                    intrins.append(intrin)
+                    extrinsics.append(torch.from_numpy(camera_to_lidar))
+                    rots.append(rot)
+                    trans.append(tran)
+                    post_rots.append(post_rot)
+                    post_trans.append(post_tran)
+                    
+
+                selected_cav_processed.update(
+                    {
+                    f"image_inputs_{modality_name}": 
+                        {
+                            "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W]
+                            "intrins": torch.stack(intrins),
+                            "extrinsics": torch.stack(extrinsics),
+                            "rots": torch.stack(rots),
+                            "trans": torch.stack(trans),
+                            "post_rots": torch.stack(post_rots),
+                            "post_trans": torch.stack(post_trans),
+                        }
+                    }
+                )
+
+            # anchor box
+            selected_cav_processed.update({"anchor_box": self.anchor_box})
+
+            # note the reference pose ego
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base],
+                                                        ego_pose_clean)
+
+            selected_cav_processed.update(
+                {
+                    "object_bbx_center": object_bbx_center[object_bbx_mask == 1],
+                    "object_bbx_mask": object_bbx_mask,
+                    "object_ids": object_ids,
+                    'transformation_matrix': transformation_matrix,
+                    'transformation_matrix_clean': transformation_matrix_clean
+                }
+            )
+
+
+            return selected_cav_processed
+
+        def __getitem__(self, idx):
+            base_data_dict = self.retrieve_base_data(idx)
+            base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting'])
+
+            processed_data_dict = OrderedDict()
+            processed_data_dict['ego'] = {}
+
+            ego_id = -1
+            ego_lidar_pose = []
+            ego_cav_base = None
+
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_cav_base = cav_content
+                    break
+                
+            assert cav_id == list(base_data_dict.keys())[
+                0], "The first element in the OrderedDict must be ego"
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+
+            
+            input_list_m1 = [] # can contain lidar or camera
+            input_list_m2 = []
+            input_list_m3 = []
+            input_list_m4 = []
+
+            agent_modality_list = []
+            object_stack = []
+            object_id_stack = []
+            single_label_list = []
+            single_object_bbx_center_list = []
+            single_object_bbx_mask_list = []
+            exclude_agent = []
+            lidar_pose_list = []
+            lidar_pose_clean_list = []
+            cav_id_list = []
+            projected_lidar_clean_list = [] # disconet
+
+            if self.visualize or self.kd_flag:
+                projected_lidar_stack = []
+
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                # check if the cav is within the communication range with ego
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+
+                # if distance is too far, we will just skip this agent
+                if distance > self.params['comm_range']:
+                    exclude_agent.append(cav_id)
+                    continue
+                
+                # if modality not match
+                if self.adaptor.unmatched_modality(selected_cav_base['modality_name']):
+                    exclude_agent.append(cav_id)
+                    continue
+
+                lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean'])
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose
+                cav_id_list.append(cav_id)   
+
+            if len(cav_id_list) == 0:
+                return None
+
+            for cav_id in exclude_agent:
+                base_data_dict.pop(cav_id)
+
+            ########## Updated by Yifan Lu 2022.1.26 ############
+            # box align to correct pose.
+            # stage1_content contains all agent. Even out of comm range.
+            if self.box_align and str(idx) in self.stage1_result.keys():
+                from opencood.models.sub_modules.box_align_v2 import box_alignment_relative_sample_np
+                stage1_content = self.stage1_result[str(idx)]
+                if stage1_content is not None:
+                    all_agent_id_list = stage1_content['cav_id_list'] # include those out of range
+                    all_agent_corners_list = stage1_content['pred_corner3d_np_list']
+                    all_agent_uncertainty_list = stage1_content['uncertainty_np_list']
+
+                    cur_agent_id_list = cav_id_list
+                    cur_agent_pose = [base_data_dict[cav_id]['params']['lidar_pose'] for cav_id in cav_id_list]
+                    cur_agnet_pose = np.array(cur_agent_pose)
+                    cur_agent_in_all_agent = [all_agent_id_list.index(cur_agent) for cur_agent in cur_agent_id_list] # indexing current agent in `all_agent_id_list`
+
+                    pred_corners_list = [np.array(all_agent_corners_list[cur_in_all_ind], dtype=np.float64) 
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+                    uncertainty_list = [np.array(all_agent_uncertainty_list[cur_in_all_ind], dtype=np.float64) 
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+
+                    if sum([len(pred_corners) for pred_corners in pred_corners_list]) != 0:
+                        refined_pose = box_alignment_relative_sample_np(pred_corners_list,
+                                                                        cur_agnet_pose, 
+                                                                        uncertainty_list=uncertainty_list, 
+                                                                        **self.box_align_args)
+                        cur_agnet_pose[:,[0,1,4]] = refined_pose 
+
+                        for i, cav_id in enumerate(cav_id_list):
+                            lidar_pose_list[i] = cur_agnet_pose[i].tolist()
+                            base_data_dict[cav_id]['params']['lidar_pose'] = cur_agnet_pose[i].tolist()
+
+
+
+            pairwise_t_matrix = \
+                get_pairwise_transformation(base_data_dict,
+                                                self.max_cav,
+                                                self.proj_first)
+
+            lidar_poses = np.array(lidar_pose_list).reshape(-1, 6)  # [N_cav, 6]
+            lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6)  # [N_cav, 6]
+            
+            # merge preprocessed features from different cavs into the same dict
+            cav_num = len(cav_id_list)
+            
+            for _i, cav_id in enumerate(cav_id_list):
+                selected_cav_base = base_data_dict[cav_id]
+                modality_name = selected_cav_base['modality_name']
+                sensor_type = self.sensor_type_dict[selected_cav_base['modality_name']]
+
+                # dynamic object center generator! for heterogeneous input
+                if not self.visualize:
+                    self.generate_object_center = eval(f"self.generate_object_center_{sensor_type}")
+                # need discussion. In test phase, use lidar label.
+                else: 
+                    self.generate_object_center = self.generate_object_center_lidar
+
+                selected_cav_processed = self.get_item_single_car(
+                    selected_cav_base,
+                    ego_cav_base)
+                
+                object_stack.append(selected_cav_processed['object_bbx_center'])
+                object_id_stack += selected_cav_processed['object_ids']
+
+
+                if sensor_type == "lidar":
+                    eval(f"input_list_{modality_name}").append(selected_cav_processed[f"processed_features_{modality_name}"])
+                elif sensor_type == "camera":
+                    eval(f"input_list_{modality_name}").append(selected_cav_processed[f"image_inputs_{modality_name}"])
+                else:
+                    raise
+                
+                agent_modality_list.append(modality_name)
+
+                if self.visualize or self.kd_flag:
+                    projected_lidar_stack.append(
+                        selected_cav_processed['projected_lidar'])
+                
+                if self.supervise_single or self.heterogeneous:
+                    single_label_list.append(selected_cav_processed['single_label_dict'])
+                    single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center'])
+                    single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask'])
+
+            # generate single view GT label
+            if self.supervise_single or self.heterogeneous:
+                single_label_dicts = self.post_processor.collate_batch(single_label_list)
+                single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list))
+                single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list))
+                processed_data_dict['ego'].update({
+                    "single_label_dict_torch": single_label_dicts,
+                    "single_object_bbx_center_torch": single_object_bbx_center,
+                    "single_object_bbx_mask_torch": single_object_bbx_mask,
+                    })
+
+            if self.kd_flag:
+                stack_lidar_np = np.vstack(projected_lidar_stack)
+                stack_lidar_np = mask_points_by_range(stack_lidar_np,
+                                            self.params['preprocess'][
+                                                'cav_lidar_range'])
+                stack_feature_processed = self.pre_processor.preprocess(stack_lidar_np)
+                processed_data_dict['ego'].update({'teacher_processed_lidar':
+                stack_feature_processed})
+
+            
+            # exculude all repetitve objects, DAIR-V2X
+            if self.params['fusion']['dataset'] == 'dairv2x':
+                if len(object_stack) == 1:
+                    object_stack = object_stack[0]
+                else:
+                    ego_boxes_np = object_stack[0]
+                    cav_boxes_np = object_stack[1]
+                    order = self.params['postprocess']['order']
+                    ego_corners_np = box_utils.boxes_to_corners_3d(ego_boxes_np, order)
+                    cav_corners_np = box_utils.boxes_to_corners_3d(cav_boxes_np, order)
+                    ego_polygon_list = list(convert_format(ego_corners_np))
+                    cav_polygon_list = list(convert_format(cav_corners_np))
+                    iou_thresh = 0.05 
+
+
+                    gt_boxes_from_cav = []
+                    for i in range(len(cav_polygon_list)):
+                        cav_polygon = cav_polygon_list[i]
+                        ious = compute_iou(cav_polygon, ego_polygon_list)
+                        if (ious > iou_thresh).any():
+                            continue
+                        gt_boxes_from_cav.append(cav_boxes_np[i])
+                    
+                    if len(gt_boxes_from_cav):
+                        object_stack_from_cav = np.stack(gt_boxes_from_cav)
+                        object_stack = np.vstack([ego_boxes_np, object_stack_from_cav])
+                    else:
+                        object_stack = ego_boxes_np
+
+                unique_indices = np.arange(object_stack.shape[0])
+                object_id_stack = np.arange(object_stack.shape[0])
+            else:
+                # exclude all repetitive objects, OPV2V-H
+                unique_indices = \
+                    [object_id_stack.index(x) for x in set(object_id_stack)]
+                object_stack = np.vstack(object_stack)
+                object_stack = object_stack[unique_indices]
+
+            # make sure bounding boxes across all frames have the same number
+            object_bbx_center = \
+                np.zeros((self.params['postprocess']['max_num'], 7))
+            mask = np.zeros(self.params['postprocess']['max_num'])
+            object_bbx_center[:object_stack.shape[0], :] = object_stack
+            mask[:object_stack.shape[0]] = 1
+            
+            for modality_name in self.modality_name_list:
+                if self.sensor_type_dict[modality_name] == "lidar":
+                    merged_feature_dict = merge_features_to_dict(eval(f"input_list_{modality_name}")) 
+                    processed_data_dict['ego'].update({f'input_{modality_name}': merged_feature_dict}) # maybe None
+                elif self.sensor_type_dict[modality_name] == "camera":
+                    merged_image_inputs_dict = merge_features_to_dict(eval(f"input_list_{modality_name}"), merge='stack')
+                    processed_data_dict['ego'].update({f'input_{modality_name}': merged_image_inputs_dict}) # maybe None
+
+            processed_data_dict['ego'].update({'agent_modality_list': agent_modality_list})
+
+            # generate targets label
+            label_dict = \
+                self.post_processor.generate_label(
+                    gt_box_center=object_bbx_center,
+                    anchors=self.anchor_box,
+                    mask=mask)
+
+            processed_data_dict['ego'].update(
+                {'object_bbx_center': object_bbx_center,
+                'object_bbx_mask': mask,
+                'object_ids': [object_id_stack[i] for i in unique_indices],
+                'anchor_box': self.anchor_box,
+                'label_dict': label_dict,
+                'cav_num': cav_num,
+                'pairwise_t_matrix': pairwise_t_matrix,
+                'lidar_poses_clean': lidar_poses_clean,
+                'lidar_poses': lidar_poses})
+
+
+            if self.visualize:
+                processed_data_dict['ego'].update({'origin_lidar':
+                    np.vstack(
+                        projected_lidar_stack)})
+
+
+            processed_data_dict['ego'].update({'sample_idx': idx,
+                                                'cav_id_list': cav_id_list})
+
+            return processed_data_dict
+
+
+        def collate_batch_train(self, batch):
+            # Intermediate fusion is different the other two
+            output_dict = {'ego': {}}
+
+            object_bbx_center = []
+            object_bbx_mask = []
+            object_ids = []
+            inputs_list_m1 = [] 
+            inputs_list_m2 = []
+            inputs_list_m3 = []
+            inputs_list_m4 = []
+            agent_modality_list = []
+            # used to record different scenario
+            record_len = []
+            label_dict_list = []
+            lidar_pose_list = []
+            origin_lidar = []
+            lidar_pose_clean_list = []
+
+            # pairwise transformation matrix
+            pairwise_t_matrix_list = []
+
+            # disconet
+            teacher_processed_lidar_list = []
+            
+            ### 2022.10.10 single gt ####
+            if self.supervise_single or self.heterogeneous:
+                pos_equal_one_single = []
+                neg_equal_one_single = []
+                targets_single = []
+                object_bbx_center_single = []
+                object_bbx_mask_single = []
+
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                object_bbx_center.append(ego_dict['object_bbx_center'])
+                object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                object_ids.append(ego_dict['object_ids'])
+                lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6]
+                lidar_pose_clean_list.append(ego_dict['lidar_poses_clean'])
+
+                for modality_name in self.modality_name_list:
+                    if ego_dict[f'input_{modality_name}'] is not None:
+                        eval(f"inputs_list_{modality_name}").append(ego_dict[f'input_{modality_name}']) # OrderedDict() if empty?
+
+                agent_modality_list.extend(ego_dict['agent_modality_list'])
+                
+                record_len.append(ego_dict['cav_num'])
+                label_dict_list.append(ego_dict['label_dict'])
+                pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix'])
+
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+
+                if self.kd_flag:
+                    teacher_processed_lidar_list.append(ego_dict['teacher_processed_lidar'])
+
+                ### 2022.10.10 single gt ####
+                if self.supervise_single or self.heterogeneous:
+                    pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one'])
+                    neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one'])
+                    targets_single.append(ego_dict['single_label_dict_torch']['targets'])
+                    object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch'])
+                    object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch'])
+
+
+            # convert to numpy, (B, max_num, 7)
+            object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+            object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+
+
+            # 2023.2.5
+            for modality_name in self.modality_name_list:
+                if len(eval(f"inputs_list_{modality_name}")) != 0:
+                    if self.sensor_type_dict[modality_name] == "lidar":
+                        merged_feature_dict = merge_features_to_dict(eval(f"inputs_list_{modality_name}"))
+                        processed_lidar_torch_dict = eval(f"self.pre_processor_{modality_name}").collate_batch(merged_feature_dict)
+                        output_dict['ego'].update({f'inputs_{modality_name}': processed_lidar_torch_dict})
+
+                    elif self.sensor_type_dict[modality_name] == "camera":
+                        merged_image_inputs_dict = merge_features_to_dict(eval(f"inputs_list_{modality_name}"), merge='cat')
+                        output_dict['ego'].update({f'inputs_{modality_name}': merged_image_inputs_dict})
+
+
+            output_dict['ego'].update({"agent_modality_list": agent_modality_list})
+            
+            record_len = torch.from_numpy(np.array(record_len, dtype=int))
+            lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0))
+            lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0))
+            label_torch_dict = \
+                self.post_processor.collate_batch(label_dict_list)
+
+            # for centerpoint
+            label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                     'object_bbx_mask': object_bbx_mask})
+
+            # (B, max_cav)
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+
+            # add pairwise_t_matrix to label dict
+            label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            label_torch_dict['record_len'] = record_len
+            
+
+            # object id is only used during inference, where batch size is 1.
+            # so here we only get the first element.
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'record_len': record_len,
+                                    'label_dict': label_torch_dict,
+                                    'object_ids': object_ids[0],
+                                    'pairwise_t_matrix': pairwise_t_matrix,
+                                    'lidar_pose_clean': lidar_pose_clean,
+                                    'lidar_pose': lidar_pose,
+                                    'anchor_box': self.anchor_box_torch})
+
+
+            if self.visualize:
+                origin_lidar = \
+                    np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                origin_lidar = torch.from_numpy(origin_lidar)
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+
+            if self.kd_flag:
+                teacher_processed_lidar_torch_dict = \
+                    self.pre_processor.collate_batch(teacher_processed_lidar_list)
+                output_dict['ego'].update({'teacher_processed_lidar':teacher_processed_lidar_torch_dict})
+
+
+            if self.supervise_single  or self.heterogeneous:
+                output_dict['ego'].update({
+                    "label_dict_single":{
+                            "pos_equal_one": torch.cat(pos_equal_one_single, dim=0),
+                            "neg_equal_one": torch.cat(neg_equal_one_single, dim=0),
+                            "targets": torch.cat(targets_single, dim=0),
+                            # for centerpoint
+                            "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                            "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                        },
+                    "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                    "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                })
+
+            return output_dict
+
+        def collate_batch_test(self, batch):
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            if batch[0] is None:
+                return None
+            output_dict = self.collate_batch_train(batch)
+            if output_dict is None:
+                return None
+
+            # check if anchor box in the batch
+            if batch[0]['ego']['anchor_box'] is not None:
+                output_dict['ego'].update({'anchor_box':
+                    self.anchor_box_torch})
+
+            # save the transformation matrix (4, 4) to ego vehicle
+            # transformation is only used in post process (no use.)
+            # we all predict boxes in ego coord.
+            transformation_matrix_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            transformation_matrix_clean_torch = \
+                torch.from_numpy(np.identity(4)).float()
+
+            output_dict['ego'].update({'transformation_matrix':
+                                        transformation_matrix_torch,
+                                        'transformation_matrix_clean':
+                                        transformation_matrix_clean_torch,})
+
+            output_dict['ego'].update({
+                "sample_idx": batch[0]['ego']['sample_idx'],
+                "cav_id_list": batch[0]['ego']['cav_id_list'],
+                "agent_modality_list": batch[0]['ego']['agent_modality_list']
+            })
+
+            return output_dict
+
+
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+
+            output_dict :dict
+                The dictionary containing the output of the model.
+
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = \
+                self.post_processor.post_process(data_dict, output_dict)
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            return pred_box_tensor, pred_score, gt_box_tensor
+
+
+    return IntermediateheterFusionDataset
+
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_multiclass_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_multiclass_fusion_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a120724ef30eedc9b80202b0d25a8ff8af7be4d
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/intermediate_multiclass_fusion_dataset.py
@@ -0,0 +1,892 @@
+# intermediate fusion dataset
+import random
+import math
+from collections import OrderedDict
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+# from opencood.utils.heter_utils import AgentSelector
+from opencood.utils.common_utils import merge_features_to_dict
+from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation, get_pairwise_transformation_asymmetric
+from opencood.utils.pose_utils import add_noise_data_dict, add_noise_data_dict_asymmetric
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    mask_ego_points_v2,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+from opencood.utils.common_utils import read_json
+
+
+def getIntermediatemulticlassFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class IntermediatemulticlassFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            # intermediate and supervise single
+            self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \
+                                        else False
+            self.proj_first = False if 'proj_first' not in params['fusion']['args']\
+                                         else params['fusion']['args']['proj_first']
+
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+
+            self.heterogeneous = False
+            if 'heter' in params:
+                self.heterogeneous = True
+                self.selector = AgentSelector(params['heter'], self.max_cav)
+
+            self.kd_flag = params.get('kd_flag', False)
+
+            self.box_align = False
+            if "box_align" in params:
+                self.box_align = True
+                self.stage1_result_path = params['box_align']['train_result'] if train else params['box_align']['val_result']
+                self.stage1_result = read_json(self.stage1_result_path)
+                self.box_align_args = params['box_align']['args']
+
+            self.multiclass = params['model']['args']['multi_class']
+            self.online_eval_only = False                
+
+        def get_item_single_car(self, selected_cav_base, ego_cav_base, tpe='all', cav_id='car_0', online_eval=False):
+            """
+            Process a single CAV's information for the train/test pipeline.
+
+
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+            ego_pose : list, length 6
+                The ego vehicle lidar pose under world coordinate.
+            ego_pose_clean : list, length 6
+                only used for gt box generation
+
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean']
+
+            # calculate the transformation matrix
+            transformation_matrix = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose'],
+                        ego_pose) # T_ego_cav
+            transformation_matrix_clean = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose_clean'],
+                        ego_pose_clean)
+            
+            # lidar
+            if tpe == 'all':
+                if self.load_lidar_file or self.visualize:
+                    # process lidar
+                    lidar_np = selected_cav_base['lidar_np']
+                    lidar_np = shuffle_points(lidar_np)
+                    # remove points that hit itself
+                    if not cav_id.startswith('rsu'):
+                        lidar_np = mask_ego_points_v2(lidar_np)
+                    # project the lidar to ego space
+                    # x,y,z in ego space
+                    projected_lidar = \
+                        box_utils.project_points_by_matrix_torch(lidar_np[:, :3],
+                                                                    transformation_matrix)
+                    if self.proj_first:
+                        lidar_np[:, :3] = projected_lidar
+
+                    if self.visualize:
+                        # filter lidar
+                        selected_cav_processed.update({'projected_lidar': projected_lidar})
+
+                    if self.kd_flag:
+                        lidar_proj_np = copy.deepcopy(lidar_np)
+                        lidar_proj_np[:,:3] = projected_lidar
+
+                        selected_cav_processed.update({'projected_lidar': lidar_proj_np})
+
+                    processed_lidar = self.pre_processor.preprocess(lidar_np)
+                    selected_cav_processed.update({'processed_features': processed_lidar})
+
+            if True: # not online_eval:
+                # generate targets label single GT, note the reference pose is itself.
+                object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center(
+                    [selected_cav_base], selected_cav_base['params']['lidar_pose']
+                )
+                label_dict = {}
+                if tpe == 'all':
+                    # unused label
+                    if False:
+                        label_dict = self.post_processor.generate_label(
+                            gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+                        )
+                selected_cav_processed.update({
+                                    "single_label_dict": label_dict,
+                                    "single_object_bbx_center": object_bbx_center,
+                                    "single_object_bbx_mask": object_bbx_mask})
+
+            if tpe == 'all':
+                # camera
+                if self.load_camera_file:
+                    camera_data_list = selected_cav_base["camera_data"]
+
+                    params = selected_cav_base["params"]
+                    imgs = []
+                    rots = []
+                    trans = []
+                    intrins = []
+                    extrinsics = []
+                    post_rots = []
+                    post_trans = []
+
+                    for idx, img in enumerate(camera_data_list):
+                        camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+
+                        intrin = torch.from_numpy(camera_intrinsic)
+                        rot = torch.from_numpy(
+                            camera_to_lidar[:3, :3]
+                        )  # R_wc, we consider world-coord is the lidar-coord
+                        tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+
+                        post_rot = torch.eye(2)
+                        post_tran = torch.zeros(2)
+
+                        img_src = [img]
+
+                        # depth
+                        if self.load_depth_file:
+                            depth_img = selected_cav_base["depth_data"][idx]
+                            img_src.append(depth_img)
+                        else:
+                            depth_img = None
+
+                        # data augmentation
+                        resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                            self.data_aug_conf, self.train
+                        )
+                        img_src, post_rot2, post_tran2 = img_transform(
+                            img_src,
+                            post_rot,
+                            post_tran,
+                            resize=resize,
+                            resize_dims=resize_dims,
+                            crop=crop,
+                            flip=flip,
+                            rotate=rotate,
+                        )
+                        # for convenience, make augmentation matrices 3x3
+                        post_tran = torch.zeros(3)
+                        post_rot = torch.eye(3)
+                        post_tran[:2] = post_tran2
+                        post_rot[:2, :2] = post_rot2
+
+                        # decouple RGB and Depth
+
+                        img_src[0] = normalize_img(img_src[0])
+                        if self.load_depth_file:
+                            img_src[1] = img_to_tensor(img_src[1]) * 255
+
+                        imgs.append(torch.cat(img_src, dim=0))
+                        intrins.append(intrin)
+                        extrinsics.append(torch.from_numpy(camera_to_lidar))
+                        rots.append(rot)
+                        trans.append(tran)
+                        post_rots.append(post_rot)
+                        post_trans.append(post_tran)
+                        
+
+                    selected_cav_processed.update(
+                        {
+                        "image_inputs": 
+                            {
+                                "imgs": torch.stack(imgs), # [Ncam, 3or4, H, W]
+                                "intrins": torch.stack(intrins),
+                                "extrinsics": torch.stack(extrinsics),
+                                "rots": torch.stack(rots),
+                                "trans": torch.stack(trans),
+                                "post_rots": torch.stack(post_rots),
+                                "post_trans": torch.stack(post_trans),
+                            }
+                        }
+                    )
+
+                # anchor box
+                selected_cav_processed.update({"anchor_box": self.anchor_box})
+
+            if True: # not online_eval:
+                # note the reference pose ego
+                object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base],
+                                                            ego_pose_clean)
+                selected_cav_processed.update(
+                    {
+                        "object_bbx_center": object_bbx_center[object_bbx_mask == 1],
+                        "object_bbx_mask": object_bbx_mask,
+                        "object_ids": object_ids,
+                    }
+                )
+            selected_cav_processed.update(
+                {
+                    'transformation_matrix': transformation_matrix,
+                    'transformation_matrix_clean': transformation_matrix_clean
+                }
+            )
+
+
+            return selected_cav_processed
+
+        def __getitem__(self, idx, extra_source=None, data_dir=None, plan_without_perception_gt=True):
+            if (data_dir is not None) and (plan_without_perception_gt):
+                extra_source=1
+            object_bbx_center_list = []
+            object_bbx_mask_list = []
+            object_id_dict = {}
+
+            object_bbx_center_list_single = []
+            object_bbx_mask_list_single = []
+
+
+            output_dict = {}
+            for tpe in ['all', 0, 1, 3]:
+                output_single_class = self.__getitem_single_class__(idx, tpe, extra_source, data_dir)
+                output_dict[tpe] = output_single_class
+                if tpe == 'all':
+                    continue
+                elif tpe == 'all' and extra_source!=None:
+                    break
+                object_bbx_center_list.append(output_single_class['ego']['object_bbx_center'])
+                object_bbx_mask_list.append(output_single_class['ego']['object_bbx_mask'])
+                if self.supervise_single:
+                    object_bbx_center_list_single.append(output_single_class['ego']['single_object_bbx_center_torch'])
+                    object_bbx_mask_list_single.append(output_single_class['ego']['single_object_bbx_mask_torch'])
+
+                object_id_dict[tpe] = output_single_class['ego']['object_ids']
+            
+            if True: # self.multiclass and extra_source==None:
+                output_dict['all']['ego']['object_bbx_center'] = np.stack(object_bbx_center_list, axis=0)
+                output_dict['all']['ego']['object_bbx_mask'] = np.stack(object_bbx_mask_list, axis=0)
+                if self.supervise_single:
+                    output_dict['all']['ego']['single_object_bbx_center_torch'] = torch.stack(object_bbx_center_list_single, axis=1)
+                    output_dict['all']['ego']['single_object_bbx_mask_torch'] = torch.stack(object_bbx_mask_list_single, axis=1)
+
+                output_dict['all']['ego']['object_ids'] = object_id_dict
+            # print('finish get item')
+            return output_dict['all']
+
+        def __getitem_single_class__(self, idx, tpe=None, extra_source=None, data_dir=None):
+
+            if extra_source is None and data_dir is None:
+                base_data_dict = self.retrieve_base_data(idx, tpe)
+            elif data_dir is not None:
+                base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, data_dir=data_dir)
+            elif extra_source is not None:
+                base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, extra_source=extra_source)
+            
+            base_data_dict = add_noise_data_dict_asymmetric(base_data_dict,self.params['noise_setting'])
+            processed_data_dict = OrderedDict()
+            processed_data_dict['ego'] = {}
+
+            ego_id = -1
+            ego_lidar_pose = []
+            ego_cav_base = None
+
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_cav_base = cav_content
+                    break
+                
+            assert cav_id == list(base_data_dict.keys())[
+                0], "The first element in the OrderedDict must be ego"
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+
+            agents_image_inputs = []
+            processed_features = []
+            object_stack = []
+            object_id_stack = []
+            single_label_list = []
+            single_object_bbx_center_list = []
+            single_object_bbx_mask_list = []
+            too_far = []
+            lidar_pose_list = []
+            lidar_pose_clean_list = []
+            cav_id_list = []
+            projected_lidar_clean_list = [] # disconet
+
+            if self.visualize or self.kd_flag:
+                projected_lidar_stack = []
+
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                # check if the cav is within the communication range with ego
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+
+                # if distance is too far, we will just skip this agent
+                if distance > self.params['comm_range']:
+                    too_far.append(cav_id)
+                    continue
+
+                lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean'])
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose']) # 6dof pose
+                cav_id_list.append(cav_id)   
+
+            for cav_id in too_far:
+                base_data_dict.pop(cav_id)
+
+            ########## Updated by Yifan Lu 2022.1.26 ############
+            # box align to correct pose.
+            # stage1_content contains all agent. Even out of comm range.
+            if self.box_align and str(idx) in self.stage1_result.keys():  # False
+                from opencood.models.sub_modules.box_align_v2 import box_alignment_relative_sample_np
+                stage1_content = self.stage1_result[str(idx)]
+                if stage1_content is not None:
+                    all_agent_id_list = stage1_content['cav_id_list'] # include those out of range
+                    all_agent_corners_list = stage1_content['pred_corner3d_np_list']
+                    all_agent_uncertainty_list = stage1_content['uncertainty_np_list']
+
+                    cur_agent_id_list = cav_id_list
+                    cur_agent_pose = [base_data_dict[cav_id]['params']['lidar_pose'] for cav_id in cav_id_list]
+                    cur_agnet_pose = np.array(cur_agent_pose)
+                    cur_agent_in_all_agent = [all_agent_id_list.index(cur_agent) for cur_agent in cur_agent_id_list] # indexing current agent in `all_agent_id_list`
+
+                    pred_corners_list = [np.array(all_agent_corners_list[cur_in_all_ind], dtype=np.float64) 
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+                    uncertainty_list = [np.array(all_agent_uncertainty_list[cur_in_all_ind], dtype=np.float64) 
+                                            for cur_in_all_ind in cur_agent_in_all_agent]
+
+                    if sum([len(pred_corners) for pred_corners in pred_corners_list]) != 0:
+                        refined_pose = box_alignment_relative_sample_np(pred_corners_list,
+                                                                        cur_agnet_pose, 
+                                                                        uncertainty_list=uncertainty_list, 
+                                                                        **self.box_align_args)
+                        cur_agnet_pose[:,[0,1,4]] = refined_pose 
+
+                        for i, cav_id in enumerate(cav_id_list):
+                            lidar_pose_list[i] = cur_agnet_pose[i].tolist()
+                            base_data_dict[cav_id]['params']['lidar_pose'] = cur_agnet_pose[i].tolist()
+
+
+
+            pairwise_t_matrix = \
+                get_pairwise_transformation_asymmetric(base_data_dict,
+                                                self.max_cav,
+                                                self.proj_first)
+
+            lidar_poses = np.array(lidar_pose_list).reshape(-1, 6)  # [N_cav, 6]
+            lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6)  # [N_cav, 6]
+            
+            # merge preprocessed features from different cavs into the same dict
+            cav_num = len(cav_id_list)
+            
+            # heterogeneous 
+            if self.heterogeneous:
+                lidar_agent, camera_agent = self.selector.select_agent(idx)
+                lidar_agent = lidar_agent[:cav_num]
+                processed_data_dict['ego'].update({"lidar_agent": lidar_agent})
+            
+            for _i, cav_id in enumerate(cav_id_list):
+                selected_cav_base = base_data_dict[cav_id]
+
+                # dynamic object center generator! for heterogeneous input
+                if (not self.visualize) and self.heterogeneous and lidar_agent[_i]:
+                    self.generate_object_center = self.generate_object_center_lidar
+                elif (not self.visualize) and self.heterogeneous and (not lidar_agent[_i]):
+                    self.generate_object_center = self.generate_object_center_camera
+
+                selected_cav_processed = self.get_item_single_car(
+                    selected_cav_base,
+                    ego_cav_base,
+                    tpe,
+                    cav_id,
+                    extra_source!=None)
+                    
+                if True: #extra_source==None:
+                    object_stack.append(selected_cav_processed['object_bbx_center'])
+                    object_id_stack += selected_cav_processed['object_ids']
+                if tpe == 'all':
+                    if self.load_lidar_file:
+                        processed_features.append(
+                            selected_cav_processed['processed_features'])
+                    if self.load_camera_file:
+                        agents_image_inputs.append(
+                            selected_cav_processed['image_inputs'])
+
+                    if self.visualize or self.kd_flag:
+                        projected_lidar_stack.append(
+                            selected_cav_processed['projected_lidar'])
+                
+                if True: #self.supervise_single and extra_source==None:
+                    single_label_list.append(selected_cav_processed['single_label_dict'])
+                    single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center'])
+                    single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask'])
+
+            # generate single view GT label
+            if True: # self.supervise_single and extra_source==None:
+                single_label_dicts = {}
+                if tpe == 'all':
+                    # unused label
+                    if False:
+                        single_label_dicts = self.post_processor.collate_batch(single_label_list)
+                single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list))
+                single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list))
+                processed_data_dict['ego'].update({
+                    "single_label_dict_torch": single_label_dicts,
+                    "single_object_bbx_center_torch": single_object_bbx_center,
+                    "single_object_bbx_mask_torch": single_object_bbx_mask,
+                    })
+
+            if self.kd_flag:
+                stack_lidar_np = np.vstack(projected_lidar_stack)
+                stack_lidar_np = mask_points_by_range(stack_lidar_np,
+                                            self.params['preprocess'][
+                                                'cav_lidar_range'])
+                stack_feature_processed = self.pre_processor.preprocess(stack_lidar_np)
+                processed_data_dict['ego'].update({'teacher_processed_lidar':
+                stack_feature_processed})
+
+            if True: # extra_source is None:
+                # exclude all repetitive objects    
+                unique_indices = \
+                    [object_id_stack.index(x) for x in set(object_id_stack)]
+                object_stack = np.vstack(object_stack)
+                object_stack = object_stack[unique_indices]
+
+                # make sure bounding boxes across all frames have the same number
+                object_bbx_center = \
+                    np.zeros((self.params['postprocess']['max_num'], 7))
+                mask = np.zeros(self.params['postprocess']['max_num'])
+                object_bbx_center[:object_stack.shape[0], :] = object_stack
+                mask[:object_stack.shape[0]] = 1
+
+                processed_data_dict['ego'].update(
+                    {'object_bbx_center': object_bbx_center,  # (100,7)
+                    'object_bbx_mask': mask, # (100,)
+                    'object_ids': [object_id_stack[i] for i in unique_indices],     
+                    }   
+                )
+
+            # generate targets label
+            label_dict = {}
+            if tpe == 'all':
+                # unused label
+                if False:
+                    label_dict = \
+                        self.post_processor.generate_label(
+                            gt_box_center=object_bbx_center,
+                            anchors=self.anchor_box,
+                            mask=mask)
+
+            processed_data_dict['ego'].update(
+                {
+                'anchor_box': self.anchor_box,
+                'label_dict': label_dict,
+                'cav_num': cav_num,
+                'pairwise_t_matrix': pairwise_t_matrix,
+                'lidar_poses_clean': lidar_poses_clean,
+                'lidar_poses': lidar_poses})
+
+            if tpe == 'all':
+                if self.load_lidar_file:
+                    merged_feature_dict = merge_features_to_dict(processed_features)
+                    processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict})
+                if self.load_camera_file:
+                    merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack')
+                    processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+
+                if self.visualize:
+                    processed_data_dict['ego'].update({'origin_lidar':
+                                                    #    projected_lidar_stack})
+                        np.vstack(
+                            projected_lidar_stack)})
+                    processed_data_dict['ego'].update({'lidar_len': [len(projected_lidar_stack[i]) for i in range(len(projected_lidar_stack))]})
+
+
+                processed_data_dict['ego'].update({'sample_idx': idx,
+                                                    'cav_id_list': cav_id_list})
+
+                img_front_list = []
+                img_left_list = []
+                img_right_list = []
+                BEV_list = []
+
+                if self.visualize:
+                    for car_id in base_data_dict:
+                        if not base_data_dict[car_id]['ego'] == True:
+                            continue
+                        if 'rgb_front' in base_data_dict[car_id] and 'rgb_left' in base_data_dict[car_id] and 'rgb_right' in base_data_dict[car_id] and 'BEV' in base_data_dict[car_id] :
+                            img_front_list.append(base_data_dict[car_id]['rgb_front'])
+                            img_left_list.append(base_data_dict[car_id]['rgb_left'])
+                            img_right_list.append(base_data_dict[car_id]['rgb_right'])
+                            BEV_list.append(base_data_dict[car_id]['BEV'])
+                processed_data_dict['ego'].update({'img_front': img_front_list,
+                                                    'img_left': img_left_list,
+                                                    'img_right': img_right_list,
+                                                    'BEV': BEV_list})
+            processed_data_dict['ego'].update({'scene_dict': base_data_dict['car_0']['scene_dict'],
+                                                    'frame_id': base_data_dict['car_0']['frame_id'],
+                                                    })    
+
+
+            # TODO: LSS debug
+            processed_data_dict['ego'].update({"det_data": base_data_dict['car_0']['det_data']})
+            detmap_pose_list = []
+            for car_id in base_data_dict:
+                detmap_pose_list.append(base_data_dict[car_id]['detmap_pose'])
+            detmap_pose_list = torch.from_numpy(np.array(detmap_pose_list))
+            processed_data_dict['ego'].update({"detmap_pose": detmap_pose_list})
+            ##
+
+            return processed_data_dict
+
+
+        def collate_batch_train(self, batch, online_eval_only=False):
+            # Intermediate fusion is different the other two
+            output_dict = {'ego': {}}
+
+            object_bbx_center = []
+            object_bbx_mask = []
+            object_ids = []
+            processed_lidar_list = []
+            image_inputs_list = []
+            # used to record different scenario
+            record_len = []
+            label_dict_list = []
+            lidar_pose_list = []
+            origin_lidar = []
+            lidar_len = []
+            lidar_pose_clean_list = []
+            
+            # heterogeneous
+            lidar_agent_list = []
+
+            # pairwise transformation matrix
+            pairwise_t_matrix_list = []
+
+            # disconet
+            teacher_processed_lidar_list = []
+
+            # image
+            img_front = []
+            img_left = []
+            img_right = []
+            BEV = []
+
+            dict_list = []
+            
+            # TODO: LSS debug
+            det_data = []
+            detmap_pose = []
+
+            ### 2022.10.10 single gt ####
+            if self.supervise_single:
+                pos_equal_one_single = []
+                neg_equal_one_single = []
+                targets_single = []
+                object_bbx_center_single = []
+                object_bbx_mask_single = []
+
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                det_data.append(torch.from_numpy(ego_dict['det_data']).unsqueeze(0))
+                detmap_pose.append(ego_dict['detmap_pose'])
+                if not online_eval_only:
+                    object_bbx_center.append(ego_dict['object_bbx_center'])
+                    object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                    object_ids.append(ego_dict['object_ids'])
+                else:
+                    object_ids.append(None)
+                lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6]
+                lidar_pose_clean_list.append(ego_dict['lidar_poses_clean'])
+                if self.load_lidar_file:
+                    processed_lidar_list.append(ego_dict['processed_lidar'])
+                if self.load_camera_file:
+                    image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict.
+                
+                record_len.append(ego_dict['cav_num'])
+                label_dict_list.append(ego_dict['label_dict'])
+                pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix'])
+
+                dict_list.append([ego_dict['scene_dict'], ego_dict['frame_id']])
+
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+                    lidar_len.append(ego_dict['lidar_len'])
+                    if len(ego_dict['img_front']) > 0 and len(ego_dict['img_right']) > 0 and len(ego_dict['img_left']) > 0 and len(ego_dict['BEV']) > 0:
+                        img_front.append(ego_dict['img_front'][0])
+                        img_left.append(ego_dict['img_left'][0])
+                        img_right.append(ego_dict['img_right'][0])
+                        BEV.append(ego_dict['BEV'][0])
+
+
+                if self.kd_flag:
+                    teacher_processed_lidar_list.append(ego_dict['teacher_processed_lidar'])
+
+                ### 2022.10.10 single gt ####
+                if self.supervise_single and not online_eval_only:
+                    # unused label
+                    if False:
+                        pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one'])
+                        neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one'])
+                        targets_single.append(ego_dict['single_label_dict_torch']['targets'])
+                    object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch'])
+                    object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch'])
+
+                # heterogeneous
+                if self.heterogeneous:
+                    lidar_agent_list.append(ego_dict['lidar_agent'])
+
+            # convert to numpy, (B, max_num, 7)
+            if not online_eval_only:
+                object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+                object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+            else:
+                object_bbx_center = None
+                object_bbx_mask = None
+
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_lidar_list)
+
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                    for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords'
+                        merged_feature_dict[k] = [v[index] for index in lidar_agent_idx]
+
+                if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0):
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(merged_feature_dict)
+                    output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict})
+
+            if self.load_camera_file:
+                merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat')
+
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    camera_agent = 1 - lidar_agent
+                    camera_agent_idx = camera_agent.nonzero()[0].tolist()
+                    if sum(camera_agent) != 0:
+                        for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ...
+                            merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx])
+                            
+                if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0):
+                    output_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+            
+            record_len = torch.from_numpy(np.array(record_len, dtype=int))
+            lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0))
+            lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0))
+            # unused label
+            label_torch_dict = {}
+            if False:
+                label_torch_dict = \
+                    self.post_processor.collate_batch(label_dict_list)
+
+            # for centerpoint
+            label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                     'object_bbx_mask': object_bbx_mask})
+
+            # (B, max_cav)
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+
+            # add pairwise_t_matrix to label dict
+            label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            label_torch_dict['record_len'] = record_len
+            
+
+            # object id is only used during inference, where batch size is 1.
+            # so here we only get the first element.
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'record_len': record_len,
+                                    'label_dict': label_torch_dict,
+                                    'object_ids': object_ids[0],
+                                    'pairwise_t_matrix': pairwise_t_matrix,
+                                    'lidar_pose_clean': lidar_pose_clean,
+                                    'lidar_pose': lidar_pose,
+                                    'anchor_box': self.anchor_box_torch})
+
+
+            output_dict['ego'].update({'dict_list': dict_list})
+
+            if self.visualize:
+                origin_lidar = torch.from_numpy(np.array(origin_lidar))
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+                lidar_len = np.array(lidar_len)
+                output_dict['ego'].update({'lidar_len': lidar_len})
+                output_dict['ego'].update({'img_front': img_front})
+                output_dict['ego'].update({'img_right': img_right})
+                output_dict['ego'].update({'img_left': img_left})
+                output_dict['ego'].update({'BEV': BEV})
+
+            if self.kd_flag:
+                teacher_processed_lidar_torch_dict = \
+                    self.pre_processor.collate_batch(teacher_processed_lidar_list)
+                output_dict['ego'].update({'teacher_processed_lidar':teacher_processed_lidar_torch_dict})
+
+
+            if self.supervise_single and not online_eval_only:
+                output_dict['ego'].update({
+                    "label_dict_single":{
+                            # for centerpoint
+                            "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                            "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                        },
+                    "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                    "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                })
+
+            if self.heterogeneous:
+                output_dict['ego'].update({
+                    "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...]
+                })
+
+            # TODO: LSS debug
+            det_data = torch.cat(det_data, dim=0)      
+            detmap_pose = torch.cat(detmap_pose, dim=0)
+            output_dict['ego'].update({'detmap_pose': detmap_pose})
+
+            output_dict['ego']['label_dict'].update({
+                                    'det_data': det_data})
+            return output_dict
+
+        def collate_batch_test(self, batch, online_eval_only=False):
+
+            self.online_eval_only = online_eval_only
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            output_dict = self.collate_batch_train(batch, online_eval_only)
+            if output_dict is None:
+                return None
+
+            # check if anchor box in the batch
+            if batch[0]['ego']['anchor_box'] is not None:
+                output_dict['ego'].update({'anchor_box':
+                    self.anchor_box_torch})
+
+            # save the transformation matrix (4, 4) to ego vehicle
+            # transformation is only used in post process (no use.)
+            # we all predict boxes in ego coord.
+            transformation_matrix_torch = \
+                torch.from_numpy(np.identity(4)).float()
+            transformation_matrix_clean_torch = \
+                torch.from_numpy(np.identity(4)).float()
+
+            output_dict['ego'].update({'transformation_matrix':
+                                        transformation_matrix_torch,
+                                        'transformation_matrix_clean':
+                                        transformation_matrix_clean_torch,})
+
+            output_dict['ego'].update({
+                "sample_idx": batch[0]['ego']['sample_idx'],
+                "cav_id_list": batch[0]['ego']['cav_id_list']
+            })
+
+            return output_dict
+
+
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+
+            output_dict :dict
+                The dictionary containing the output of the model.
+
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = \
+                self.post_processor.post_process(data_dict, output_dict)
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            return pred_box_tensor, pred_score, gt_box_tensor
+
+        def post_process_multiclass(self, data_dict, output_dict, online_eval_only=False):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+
+            output_dict :dict
+                The dictionary containing the output of the model.
+
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+
+            if online_eval_only == False:
+                online_eval_only = self.online_eval_only
+
+            num_class = output_dict['ego']['cls_preds'].shape[1]
+            
+
+            pred_box_tensor_list = []
+            pred_score_list = []
+            gt_box_tensor_list = []
+
+            num_list = [0,1,3]
+
+            for i in range(num_class):
+                data_dict_single = copy.deepcopy(data_dict)
+                output_dict_single = copy.deepcopy(output_dict)
+                if not online_eval_only:
+                    data_dict_single['ego']['object_bbx_center'] = data_dict['ego']['object_bbx_center'][:,i,:,:]
+                    data_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['object_bbx_mask'][:,i,:]
+                    data_dict_single['ego']['object_ids'] = data_dict['ego']['object_ids'][num_list[i]]
+
+                output_dict_single['ego']['cls_preds'] = output_dict['ego']['cls_preds'][:,i:i+1,:,:]
+                output_dict_single['ego']['reg_preds'] = output_dict['ego']['reg_preds_multiclass'][:,i,:,:]
+
+                pred_box_tensor, pred_score = \
+                    self.post_processor.post_process(data_dict_single, output_dict_single)
+                if not online_eval_only:
+                    gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict_single)
+                else:
+                    gt_box_tensor = None
+
+                pred_box_tensor_list.append(pred_box_tensor)
+                pred_score_list.append(pred_score)
+                gt_box_tensor_list.append(gt_box_tensor)
+
+            return pred_box_tensor_list, pred_score_list, gt_box_tensor_list
+
+    return IntermediatemulticlassFusionDataset
+
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_fusion_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fd9095e5e566dcfd7b945cdacd9331e1838c29e
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_fusion_dataset.py
@@ -0,0 +1,564 @@
+# late fusion dataset
+import random
+import math
+from collections import OrderedDict
+import cv2
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.utils.transformation_utils import x1_to_x2
+from opencood.utils.pose_utils import add_noise_data_dict
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+
+
+def getLateFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class LateFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+
+            self.heterogeneous = False
+            if 'heter' in params:
+                self.heterogeneous = True
+
+        def __getitem__(self, idx):
+            base_data_dict = self.retrieve_base_data(idx)
+            if self.train:
+                reformat_data_dict = self.get_item_train(base_data_dict)
+            else:
+                reformat_data_dict = self.get_item_test(base_data_dict, idx)
+
+            return reformat_data_dict
+
+        def get_item_train(self, base_data_dict):
+            processed_data_dict = OrderedDict()
+            base_data_dict = add_noise_data_dict(
+                base_data_dict, self.params["noise_setting"]
+            )
+            # during training, we return a random cav's data
+            # only one vehicle is in processed_data_dict
+            if not self.visualize:
+                selected_cav_id, selected_cav_base = random.choice(
+                    list(base_data_dict.items())
+                )
+            else:
+                selected_cav_id, selected_cav_base = list(base_data_dict.items())[0]
+            
+            selected_cav_processed = self.get_item_single_car(selected_cav_base)
+            processed_data_dict.update({"ego": selected_cav_processed})
+
+            return processed_data_dict
+
+
+        def get_item_test(self, base_data_dict, idx):
+            """
+                processed_data_dict.keys() = ['ego', "650", "659", ...]
+            """
+            base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting'])
+
+            processed_data_dict = OrderedDict()
+            ego_id = -1
+            ego_lidar_pose = []
+            cav_id_list = []
+            lidar_pose_list = []
+
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_lidar_pose_clean = cav_content['params']['lidar_pose_clean']
+                    break
+
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+                if distance > self.params['comm_range']:
+                    continue
+                cav_id_list.append(cav_id)
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose'])
+
+            cav_id_list_newname = []
+            for cav_id in cav_id_list:
+                selected_cav_base = base_data_dict[cav_id]
+                # find the transformation matrix from current cav to ego.
+                cav_lidar_pose = selected_cav_base['params']['lidar_pose']
+                transformation_matrix = x1_to_x2(cav_lidar_pose, ego_lidar_pose)
+                cav_lidar_pose_clean = selected_cav_base['params']['lidar_pose_clean']
+                transformation_matrix_clean = x1_to_x2(cav_lidar_pose_clean, ego_lidar_pose_clean)
+
+                selected_cav_processed = \
+                    self.get_item_single_car(selected_cav_base)
+                selected_cav_processed.update({'transformation_matrix': transformation_matrix,
+                                            'transformation_matrix_clean': transformation_matrix_clean})
+                update_cav = "ego" if cav_id == ego_id else cav_id
+                processed_data_dict.update({update_cav: selected_cav_processed})
+                cav_id_list_newname.append(update_cav)
+            
+            # heterogeneous
+            if self.heterogeneous:
+                processed_data_dict['ego']['idx'] = idx
+                processed_data_dict['ego']['cav_list'] = cav_id_list_newname
+
+            return processed_data_dict
+
+
+        def get_item_single_car(self, selected_cav_base):
+            """
+            Process a single CAV's information for the train/test pipeline.
+
+
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+
+            # label
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center_single(
+                [selected_cav_base], selected_cav_base["params"]["lidar_pose_clean"]
+            )
+
+            # lidar
+            if self.load_lidar_file or self.visualize:
+                lidar_np = selected_cav_base['lidar_np']
+                lidar_np = shuffle_points(lidar_np)
+                lidar_np = mask_points_by_range(lidar_np,
+                                                self.params['preprocess'][
+                                                    'cav_lidar_range'])
+                # remove points that hit ego vehicle
+                lidar_np = mask_ego_points(lidar_np)
+
+                # data augmentation, seems very important for single agent training, because lack of data diversity.
+                # only work for lidar modality in training.
+                if not self.heterogeneous:
+                    lidar_np, object_bbx_center, object_bbx_mask = \
+                    self.augment(lidar_np, object_bbx_center, object_bbx_mask)
+
+                lidar_dict = self.pre_processor.preprocess(lidar_np)
+                selected_cav_processed.update({'processed_lidar': lidar_dict})
+
+
+                
+
+            if self.visualize:
+                selected_cav_processed.update({'origin_lidar': lidar_np})
+
+            # camera
+            if self.load_camera_file:
+                # adapted from https://github.com/nv-tlabs/lift-splat-shoot/blob/master/src/data.py
+                camera_data_list = selected_cav_base["camera_data"]
+
+                params = selected_cav_base["params"]
+                imgs = []
+                rots = []
+                trans = []
+                intrins = []
+                extrinsics = [] # cam_to_lidar
+                post_rots = []
+                post_trans = []
+
+                for idx, img in enumerate(camera_data_list):
+                    camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+
+                    intrin = torch.from_numpy(camera_intrinsic)
+                    rot = torch.from_numpy(
+                        camera_to_lidar[:3, :3]
+                    )  # R_wc, we consider world-coord is the lidar-coord
+                    tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+
+                    post_rot = torch.eye(2)
+                    post_tran = torch.zeros(2)
+
+                    img_src = [img]
+
+                    # depth
+                    if self.load_depth_file:
+                        depth_img = selected_cav_base["depth_data"][idx]
+                        img_src.append(depth_img)
+                    else:
+                        depth_img = None
+
+                    # data augmentation
+                    resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                        self.data_aug_conf, self.train
+                    )
+                    img_src, post_rot2, post_tran2 = img_transform(
+                        img_src,
+                        post_rot,
+                        post_tran,
+                        resize=resize,
+                        resize_dims=resize_dims,
+                        crop=crop,
+                        flip=flip,
+                        rotate=rotate,
+                    )
+                    # for convenience, make augmentation matrices 3x3
+                    post_tran = torch.zeros(3)
+                    post_rot = torch.eye(3)
+                    post_tran[:2] = post_tran2
+                    post_rot[:2, :2] = post_rot2
+
+                    img_src[0] = normalize_img(img_src[0])
+                    if self.load_depth_file:
+                        img_src[1] = img_to_tensor(img_src[1]) * 255
+
+                    imgs.append(torch.cat(img_src, dim=0))
+                    intrins.append(intrin)
+                    extrinsics.append(torch.from_numpy(camera_to_lidar))
+                    rots.append(rot)
+                    trans.append(tran)
+                    post_rots.append(post_rot)
+                    post_trans.append(post_tran)
+
+                selected_cav_processed.update(
+                    {
+                    "image_inputs": 
+                        {
+                            "imgs": torch.stack(imgs), # [N, 3or4, H, W]
+                            "intrins": torch.stack(intrins),
+                            "extrinsics": torch.stack(extrinsics),
+                            "rots": torch.stack(rots),
+                            "trans": torch.stack(trans),
+                            "post_rots": torch.stack(post_rots),
+                            "post_trans": torch.stack(post_trans),
+                        }
+                    }
+                )
+
+            
+            selected_cav_processed.update(
+                {
+                    "object_bbx_center": object_bbx_center,
+                    "object_bbx_mask": object_bbx_mask,
+                    "object_ids": object_ids,
+                }
+            )
+
+            # generate targets label
+            label_dict = self.post_processor.generate_label(
+                gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+            )
+            selected_cav_processed.update({"label_dict": label_dict})
+
+            return selected_cav_processed
+
+
+        def collate_batch_train(self, batch):
+            """
+            Customized collate function for pytorch dataloader during training
+            for early and late fusion dataset.
+
+            Parameters
+            ----------
+            batch : dict
+
+            Returns
+            -------
+            batch : dict
+                Reformatted batch.
+            """
+            # during training, we only care about ego.
+            output_dict = {'ego': {}}
+
+            object_bbx_center = []
+            object_bbx_mask = []
+            processed_lidar_list = []
+            label_dict_list = []
+            origin_lidar = []
+
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                object_bbx_center.append(ego_dict['object_bbx_center'])
+                object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                label_dict_list.append(ego_dict['label_dict'])
+                
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+
+            # convert to numpy, (B, max_num, 7)
+            object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+            object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+            label_torch_dict = \
+                self.post_processor.collate_batch(label_dict_list)
+
+            # for centerpoint
+            label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask})
+
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'anchor_box': torch.from_numpy(self.anchor_box),
+                                    'label_dict': label_torch_dict})
+            if self.visualize:
+                origin_lidar = \
+                    np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                origin_lidar = torch.from_numpy(origin_lidar)
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+
+            if self.load_lidar_file:
+                for i in range(len(batch)):
+                    processed_lidar_list.append(batch[i]['ego']['processed_lidar'])
+                processed_lidar_torch_dict = \
+                    self.pre_processor.collate_batch(processed_lidar_list)
+                output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict})
+
+            if self.load_camera_file:
+                # collate ego camera information
+                imgs_batch = []
+                rots_batch = []
+                trans_batch = []
+                intrins_batch = []
+                extrinsics_batch = []
+                post_trans_batch = []
+                post_rots_batch = []
+                for i in range(len(batch)):
+                    ego_dict = batch[i]["ego"]["image_inputs"]
+                    imgs_batch.append(ego_dict["imgs"])
+                    rots_batch.append(ego_dict["rots"])
+                    trans_batch.append(ego_dict["trans"])
+                    intrins_batch.append(ego_dict["intrins"])
+                    extrinsics_batch.append(ego_dict["extrinsics"])
+                    post_trans_batch.append(ego_dict["post_trans"])
+                    post_rots_batch.append(ego_dict["post_rots"])
+
+                output_dict["ego"].update({
+                    "image_inputs":
+                        {
+                            "imgs": torch.stack(imgs_batch),  # [B, N, C, H, W]
+                            "rots": torch.stack(rots_batch),
+                            "trans": torch.stack(trans_batch),
+                            "intrins": torch.stack(intrins_batch),
+                            "post_trans": torch.stack(post_trans_batch),
+                            "post_rots": torch.stack(post_rots_batch),
+                        }
+                    }
+                )
+
+
+            return output_dict
+
+        def collate_batch_test(self, batch):
+            """
+            Customized collate function for pytorch dataloader during testing
+            for late fusion dataset.
+
+            Parameters
+            ----------
+            batch : dict
+
+            Returns
+            -------
+            batch : dict
+                Reformatted batch.
+            """
+            # currently, we only support batch size of 1 during testing
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            batch = batch[0]
+
+            output_dict = {}
+
+            # heterogeneous
+            if self.heterogeneous:
+                idx = batch['ego']['idx']
+                cav_list = batch['ego']['cav_list'] # ['ego', '650' ..]
+                cav_num = len(batch)
+                lidar_agent, camera_agent = self.selector.select_agent(idx)
+                lidar_agent = lidar_agent[:cav_num] # [1,0,0,1,0]
+                lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                lidar_agent_cav_id = [cav_list[index] for index in lidar_agent_idx] # ['ego', ...]
+        
+
+            # for late fusion, we also need to stack the lidar for better
+            # visualization
+            if self.visualize:
+                projected_lidar_list = []
+                origin_lidar = []
+
+            for cav_id, cav_content in batch.items():
+                output_dict.update({cav_id: {}})
+                # shape: (1, max_num, 7)
+                object_bbx_center = \
+                    torch.from_numpy(np.array([cav_content['object_bbx_center']]))
+                object_bbx_mask = \
+                    torch.from_numpy(np.array([cav_content['object_bbx_mask']]))
+                object_ids = cav_content['object_ids']
+
+                # the anchor box is the same for all bounding boxes usually, thus
+                # we don't need the batch dimension.
+                output_dict[cav_id].update(
+                    {"anchor_box": self.anchor_box_torch}
+                )
+
+                transformation_matrix = cav_content['transformation_matrix']
+                if self.visualize:
+                    origin_lidar = [cav_content['origin_lidar']]
+                    if (self.params['only_vis_ego'] is False) or (cav_id=='ego'):
+                        projected_lidar = copy.deepcopy(cav_content['origin_lidar'])
+                        projected_lidar[:, :3] = \
+                            box_utils.project_points_by_matrix_torch(
+                                projected_lidar[:, :3],
+                                transformation_matrix)
+                        projected_lidar_list.append(projected_lidar)
+
+                if self.load_lidar_file:
+                    # processed lidar dictionary
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(
+                            [cav_content['processed_lidar']])
+                    output_dict[cav_id].update({'processed_lidar': processed_lidar_torch_dict})
+
+                if self.load_camera_file:
+                    imgs_batch = [cav_content["image_inputs"]["imgs"]]
+                    rots_batch = [cav_content["image_inputs"]["rots"]]
+                    trans_batch = [cav_content["image_inputs"]["trans"]]
+                    intrins_batch = [cav_content["image_inputs"]["intrins"]]
+                    extrinsics_batch = [cav_content["image_inputs"]["extrinsics"]]
+                    post_trans_batch = [cav_content["image_inputs"]["post_trans"]]
+                    post_rots_batch = [cav_content["image_inputs"]["post_rots"]]
+
+                    output_dict[cav_id].update({
+                        "image_inputs":
+                            {
+                                "imgs": torch.stack(imgs_batch),
+                                "rots": torch.stack(rots_batch),
+                                "trans": torch.stack(trans_batch),
+                                "intrins": torch.stack(intrins_batch),
+                                "extrinsics": torch.stack(extrinsics_batch),
+                                "post_trans": torch.stack(post_trans_batch),
+                                "post_rots": torch.stack(post_rots_batch),
+                            }
+                        }
+                    )
+
+                # heterogeneous
+                if self.heterogeneous:
+                    if cav_id in lidar_agent_cav_id:
+                        output_dict[cav_id].pop('image_inputs')
+                    else:
+                        output_dict[cav_id].pop('processed_lidar')
+
+                # label dictionary
+                label_torch_dict = \
+                    self.post_processor.collate_batch([cav_content['label_dict']])
+                    
+                # for centerpoint
+                label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                         'object_bbx_mask': object_bbx_mask})
+
+                # save the transformation matrix (4, 4) to ego vehicle
+                transformation_matrix_torch = \
+                    torch.from_numpy(
+                        np.array(cav_content['transformation_matrix'])).float()
+                
+                # late fusion training, no noise
+                transformation_matrix_clean_torch = \
+                    torch.from_numpy(
+                        np.array(cav_content['transformation_matrix_clean'])).float()
+
+                output_dict[cav_id].update({'object_bbx_center': object_bbx_center,
+                                            'object_bbx_mask': object_bbx_mask,
+                                            'label_dict': label_torch_dict,
+                                            'object_ids': object_ids,
+                                            'transformation_matrix': transformation_matrix_torch,
+                                            'transformation_matrix_clean': transformation_matrix_clean_torch})
+
+                if self.visualize:
+                    origin_lidar = \
+                        np.array(
+                            downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                    origin_lidar = torch.from_numpy(origin_lidar)
+                    output_dict[cav_id].update({'origin_lidar': origin_lidar})
+
+            if self.visualize:
+                projected_lidar_stack = [torch.from_numpy(
+                    np.vstack(projected_lidar_list))]
+                output_dict['ego'].update({'origin_lidar': projected_lidar_stack})
+                # output_dict['ego'].update({'projected_lidar_list': projected_lidar_list})
+
+            return output_dict
+
+
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+
+            output_dict :dict
+                The dictionary containing the output of the model.
+
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = self.post_processor.post_process(
+                data_dict, output_dict
+            )
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            return pred_box_tensor, pred_score, gt_box_tensor
+
+        def post_process_no_fusion(self, data_dict, output_dict_ego):
+            data_dict_ego = OrderedDict()
+            data_dict_ego["ego"] = data_dict["ego"]
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            pred_box_tensor, pred_score = self.post_processor.post_process(
+                data_dict_ego, output_dict_ego
+            )
+            return pred_box_tensor, pred_score, gt_box_tensor
+
+        def post_process_no_fusion_uncertainty(self, data_dict, output_dict_ego):
+            data_dict_ego = OrderedDict()
+            data_dict_ego['ego'] = data_dict['ego']
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            pred_box_tensor, pred_score, uncertainty = \
+                self.post_processor.post_process(data_dict_ego, output_dict_ego, return_uncertainty=True)
+            return pred_box_tensor, pred_score, gt_box_tensor, uncertainty
+
+    return LateFusionDataset
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_heter_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_heter_fusion_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..c518bc7e23b2b1fde59b38c052969c3c30e16b2c
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_heter_fusion_dataset.py
@@ -0,0 +1,565 @@
+# late fusion dataset
+import random
+import math
+from collections import OrderedDict
+import cv2
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.utils.transformation_utils import x1_to_x2
+from opencood.utils.pose_utils import add_noise_data_dict
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+from opencood.utils.common_utils import read_json
+from opencood.utils.common_utils import merge_features_to_dict
+from opencood.utils.heter_utils import Adaptor
+
+def getLateheterFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class LateheterFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+
+            self.heterogeneous = True
+            self.modality_assignment = read_json(params['heter']['assignment_path'])
+            self.ego_modality = params['heter']['ego_modality'] # "m1" or "m1&m2" or "m3"
+
+            self.modality_name_list = list(params['heter']['modality_setting'].keys())
+            self.sensor_type_dict = OrderedDict()
+            
+            lidar_channels_dict = params['heter'].get('lidar_channels_dict', OrderedDict())
+            mapping_dict = params['heter']['mapping_dict']
+
+            self.adaptor = Adaptor(self.ego_modality, 
+                                   self.modality_name_list,
+                                   self.modality_assignment,
+                                   lidar_channels_dict,
+                                   mapping_dict,
+                                   None,
+                                   train)
+
+            for modality_name, modal_setting in params['heter']['modality_setting'].items():
+                self.sensor_type_dict[modality_name] = modal_setting['sensor_type']
+                if modal_setting['sensor_type'] == 'lidar':
+                    setattr(self, f"pre_processor_{modality_name}", build_preprocessor(modal_setting['preprocess'], train))
+
+                elif modal_setting['sensor_type'] == 'camera':
+                    setattr(self, f"data_aug_conf_{modality_name}", modal_setting['data_aug_conf'])
+
+                else:
+                    raise("Not support this type of sensor")
+
+            self.reinitialize()
+
+        def __getitem__(self, idx):
+            base_data_dict = self.retrieve_base_data(idx)
+            if self.train:
+                reformat_data_dict = self.get_item_train(base_data_dict)
+            else:
+                reformat_data_dict = self.get_item_test(base_data_dict, idx)
+            return reformat_data_dict
+
+        def get_item_train(self, base_data_dict):
+            processed_data_dict = OrderedDict()
+            base_data_dict = add_noise_data_dict(
+                base_data_dict, self.params["noise_setting"]
+            )
+            # during training, we return a random cav's data
+            # only one vehicle is in processed_data_dict
+            if not self.visualize:
+                options = []
+                for cav_id, cav_content in base_data_dict.items():
+                    if cav_content['modality_name'] in self.ego_modality:
+                        options.append(cav_id)
+                selected_cav_base = base_data_dict[random.choice(options)]
+            else:
+                selected_cav_id, selected_cav_base = list(base_data_dict.items())[0]
+            
+            selected_cav_processed = self.get_item_single_car(selected_cav_base)
+            processed_data_dict.update({"ego": selected_cav_processed})
+
+            return processed_data_dict
+
+
+        def get_item_test(self, base_data_dict, idx):
+            """
+                processed_data_dict.keys() = ['ego', "650", "659", ...]
+            """
+            base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting'])
+
+            processed_data_dict = OrderedDict()
+            ego_id = -1
+            ego_lidar_pose = []
+            cav_id_list = []
+            lidar_pose_list = []
+
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_lidar_pose_clean = cav_content['params']['lidar_pose_clean']
+                    break
+
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+                if distance > self.params['comm_range']:
+                    continue
+
+                if self.adaptor.unmatched_modality(selected_cav_base['modality_name']):
+                    continue
+
+                cav_id_list.append(cav_id)
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose'])
+
+            cav_id_list_newname = []
+            for cav_id in cav_id_list:
+                selected_cav_base = base_data_dict[cav_id]
+                # find the transformation matrix from current cav to ego.
+                cav_lidar_pose = selected_cav_base['params']['lidar_pose']
+                transformation_matrix = x1_to_x2(cav_lidar_pose, ego_lidar_pose)
+                cav_lidar_pose_clean = selected_cav_base['params']['lidar_pose_clean']
+                transformation_matrix_clean = x1_to_x2(cav_lidar_pose_clean, ego_lidar_pose_clean)
+
+                # In test phase, we all use lidar label for fair comparison. (need discussion)
+                self.label_type = 'lidar' # DAIRV2X
+                self.generate_object_center = self.generate_object_center_lidar # OPV2V, V2XSET
+
+                selected_cav_processed = \
+                    self.get_item_single_car(selected_cav_base)
+                selected_cav_processed.update({'transformation_matrix': transformation_matrix,
+                                            'transformation_matrix_clean': transformation_matrix_clean})
+                update_cav = "ego" if cav_id == ego_id else cav_id
+                processed_data_dict.update({update_cav: selected_cav_processed})
+                cav_id_list_newname.append(update_cav)
+            
+
+            return processed_data_dict
+
+
+        def get_item_single_car(self, selected_cav_base):
+            """
+            Process a single CAV's information for the train/test pipeline.
+
+
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+            modality_name = selected_cav_base['modality_name']
+            sensor_type = self.sensor_type_dict[modality_name]
+
+            # label
+            object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center_single(
+                [selected_cav_base], selected_cav_base["params"]["lidar_pose_clean"]
+            )
+
+            # lidar
+            if sensor_type == "lidar" or self.visualize:
+                lidar_np = selected_cav_base['lidar_np']
+                lidar_np = shuffle_points(lidar_np)
+                lidar_np = mask_points_by_range(lidar_np,
+                                                self.params['preprocess'][
+                                                    'cav_lidar_range'])
+                # remove points that hit ego vehicle
+                lidar_np = mask_ego_points(lidar_np)
+
+                # data augmentation, seems very important for single agent training, because lack of data diversity.
+                # only work for lidar modality in training.
+                lidar_np, object_bbx_center, object_bbx_mask = \
+                self.augment(lidar_np, object_bbx_center, object_bbx_mask)
+                if sensor_type == "lidar":
+                    processed_lidar = eval(f"self.pre_processor_{modality_name}").preprocess(lidar_np)
+                    selected_cav_processed.update({f'processed_features_{modality_name}': processed_lidar})
+                
+
+            if self.visualize:
+                selected_cav_processed.update({'origin_lidar': lidar_np})
+
+            # camera
+            if sensor_type == "camera":
+                # adapted from https://github.com/nv-tlabs/lift-splat-shoot/blob/master/src/data.py
+                camera_data_list = selected_cav_base["camera_data"]
+
+                params = selected_cav_base["params"]
+                imgs = []
+                rots = []
+                trans = []
+                intrins = []
+                extrinsics = [] # cam_to_lidar
+                post_rots = []
+                post_trans = []
+
+                for idx, img in enumerate(camera_data_list):
+                    camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+
+                    intrin = torch.from_numpy(camera_intrinsic)
+                    rot = torch.from_numpy(
+                        camera_to_lidar[:3, :3]
+                    )  # R_wc, we consider world-coord is the lidar-coord
+                    tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+
+                    post_rot = torch.eye(2)
+                    post_tran = torch.zeros(2)
+
+                    img_src = [img]
+
+                    # depth
+                    if self.load_depth_file:
+                        depth_img = selected_cav_base["depth_data"][idx]
+                        img_src.append(depth_img)
+                    else:
+                        depth_img = None
+
+                    # data augmentation
+                    resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                        eval(f"self.data_aug_conf_{modality_name}"), self.train
+                    )
+                    img_src, post_rot2, post_tran2 = img_transform(
+                        img_src,
+                        post_rot,
+                        post_tran,
+                        resize=resize,
+                        resize_dims=resize_dims,
+                        crop=crop,
+                        flip=flip,
+                        rotate=rotate,
+                    )
+                    # for convenience, make augmentation matrices 3x3
+                    post_tran = torch.zeros(3)
+                    post_rot = torch.eye(3)
+                    post_tran[:2] = post_tran2
+                    post_rot[:2, :2] = post_rot2
+
+                    img_src[0] = normalize_img(img_src[0])
+                    if self.load_depth_file:
+                        img_src[1] = img_to_tensor(img_src[1]) * 255
+
+                    imgs.append(torch.cat(img_src, dim=0))
+                    intrins.append(intrin)
+                    extrinsics.append(torch.from_numpy(camera_to_lidar))
+                    rots.append(rot)
+                    trans.append(tran)
+                    post_rots.append(post_rot)
+                    post_trans.append(post_tran)
+
+                selected_cav_processed.update(
+                    {
+                    f"image_inputs_{modality_name}": 
+                        {
+                            "imgs": torch.stack(imgs), # [N, 3or4, H, W]
+                            "intrins": torch.stack(intrins),
+                            "extrinsics": torch.stack(extrinsics),
+                            "rots": torch.stack(rots),
+                            "trans": torch.stack(trans),
+                            "post_rots": torch.stack(post_rots),
+                            "post_trans": torch.stack(post_trans),
+                        }
+                    }
+                )
+
+            
+            selected_cav_processed.update(
+                {
+                    "object_bbx_center": object_bbx_center,
+                    "object_bbx_mask": object_bbx_mask,
+                    "object_ids": object_ids,
+                    "modality_name": modality_name
+                }
+            )
+
+            # generate targets label
+            label_dict = self.post_processor.generate_label(
+                gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+            )
+            selected_cav_processed.update({"label_dict": label_dict})
+
+            return selected_cav_processed
+
+
+        def collate_batch_train(self, batch):
+            """
+            Customized collate function for pytorch dataloader during training
+            for early and late fusion dataset.
+
+            Parameters
+            ----------
+            batch : dict
+
+            Returns
+            -------
+            batch : dict
+                Reformatted batch.
+            """
+            # during training, we only care about ego.
+            output_dict = {'ego': {}}
+
+            object_bbx_center = []
+            object_bbx_mask = []
+            label_dict_list = []
+            origin_lidar = []
+            inputs_list_m1 = [] 
+            inputs_list_m2 = []
+            inputs_list_m3 = []
+            inputs_list_m4 = []
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                object_bbx_center.append(ego_dict['object_bbx_center'])
+                object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                label_dict_list.append(ego_dict['label_dict'])
+                
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+
+            # convert to numpy, (B, max_num, 7)
+            object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+            object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+            label_torch_dict = \
+                self.post_processor.collate_batch(label_dict_list)
+
+            # for centerpoint
+            label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask})
+
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'anchor_box': torch.from_numpy(self.anchor_box),
+                                    'label_dict': label_torch_dict})
+            if self.visualize:
+                origin_lidar = \
+                    np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                origin_lidar = torch.from_numpy(origin_lidar)
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+
+
+            
+                
+            for modality_name in self.modality_name_list:
+                sensor_type = self.sensor_type_dict[modality_name]
+                for i in range(len(batch)):
+                    ego_dict = batch[i]['ego']
+                    if f'processed_features_{modality_name}' in ego_dict:
+                        eval(f"inputs_list_{modality_name}").append(ego_dict[f'processed_features_{modality_name}']) 
+                    elif f'image_inputs_{modality_name}' in ego_dict:
+                        eval(f"inputs_list_{modality_name}").append(ego_dict[f'image_inputs_{modality_name}']) 
+
+                if self.sensor_type_dict[modality_name] == "lidar":
+                    processed_lidar_torch_dict = eval(f"self.pre_processor_{modality_name}").collate_batch(eval(f"inputs_list_{modality_name}"))
+                    output_dict['ego'].update({f'inputs_{modality_name}': processed_lidar_torch_dict})
+                elif self.sensor_type_dict[modality_name] == "camera":
+                    merged_image_inputs_dict = merge_features_to_dict(eval(f"inputs_list_{modality_name}"), merge='stack')
+                    output_dict['ego'].update({f'inputs_{modality_name}': merged_image_inputs_dict})
+
+            return output_dict
+
+        def collate_batch_test(self, batch):
+            """
+            Customized collate function for pytorch dataloader during testing
+            for late fusion dataset.
+
+            Parameters
+            ----------
+            batch : dict
+
+            Returns
+            -------
+            batch : dict
+                Reformatted batch.
+            """
+            # currently, we only support batch size of 1 during testing
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            batch = batch[0]
+
+            output_dict = {}
+
+            # for late fusion, we also need to stack the lidar for better
+            # visualization
+            if self.visualize:
+                projected_lidar_list = []
+                origin_lidar = []
+
+            for cav_id, cav_content in batch.items():
+                modality_name = cav_content['modality_name']
+                sensor_type = self.sensor_type_dict[modality_name]
+
+                output_dict.update({cav_id: {}})
+                # shape: (1, max_num, 7)
+                object_bbx_center = \
+                    torch.from_numpy(np.array([cav_content['object_bbx_center']]))
+                object_bbx_mask = \
+                    torch.from_numpy(np.array([cav_content['object_bbx_mask']]))
+                object_ids = cav_content['object_ids']
+
+                # the anchor box is the same for all bounding boxes usually, thus
+                # we don't need the batch dimension.
+                output_dict[cav_id].update(
+                    {"anchor_box": self.anchor_box_torch}
+                )
+
+                transformation_matrix = cav_content['transformation_matrix']
+                if self.visualize:
+                    origin_lidar = [cav_content['origin_lidar']]
+                    if (self.params.get('only_vis_ego', True) is False) or (cav_id=='ego'):
+                        projected_lidar = copy.deepcopy(cav_content['origin_lidar'])
+                        projected_lidar[:, :3] = \
+                            box_utils.project_points_by_matrix_torch(
+                                projected_lidar[:, :3],
+                                transformation_matrix)
+                        projected_lidar_list.append(projected_lidar)
+
+                if sensor_type == "lidar":
+                    # processed lidar dictionary
+                    processed_lidar_torch_dict = \
+                        eval(f"self.pre_processor_{modality_name}").collate_batch([cav_content[f'processed_features_{modality_name}']])
+                    output_dict[cav_id].update({f'inputs_{modality_name}': processed_lidar_torch_dict})
+
+                if sensor_type == 'camera':
+                    imgs_batch = [cav_content[f"image_inputs_{modality_name}"]["imgs"]]
+                    rots_batch = [cav_content[f"image_inputs_{modality_name}"]["rots"]]
+                    trans_batch = [cav_content[f"image_inputs_{modality_name}"]["trans"]]
+                    intrins_batch = [cav_content[f"image_inputs_{modality_name}"]["intrins"]]
+                    extrinsics_batch = [cav_content[f"image_inputs_{modality_name}"]["extrinsics"]]
+                    post_trans_batch = [cav_content[f"image_inputs_{modality_name}"]["post_trans"]]
+                    post_rots_batch = [cav_content[f"image_inputs_{modality_name}"]["post_rots"]]
+
+                    output_dict[cav_id].update({
+                        f"inputs_{modality_name}":
+                            {
+                                "imgs": torch.stack(imgs_batch),
+                                "rots": torch.stack(rots_batch),
+                                "trans": torch.stack(trans_batch),
+                                "intrins": torch.stack(intrins_batch),
+                                "extrinsics": torch.stack(extrinsics_batch),
+                                "post_trans": torch.stack(post_trans_batch),
+                                "post_rots": torch.stack(post_rots_batch),
+                            }
+                        }
+                    )
+
+
+                # label dictionary
+                label_torch_dict = \
+                    self.post_processor.collate_batch([cav_content['label_dict']])
+                    
+                # for centerpoint
+                label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                         'object_bbx_mask': object_bbx_mask})
+
+                # save the transformation matrix (4, 4) to ego vehicle
+                transformation_matrix_torch = \
+                    torch.from_numpy(
+                        np.array(cav_content['transformation_matrix'])).float()
+                
+                # late fusion training, no noise
+                transformation_matrix_clean_torch = \
+                    torch.from_numpy(
+                        np.array(cav_content['transformation_matrix_clean'])).float()
+
+                output_dict[cav_id].update({'object_bbx_center': object_bbx_center,
+                                            'object_bbx_mask': object_bbx_mask,
+                                            'label_dict': label_torch_dict,
+                                            'object_ids': object_ids,
+                                            'transformation_matrix': transformation_matrix_torch,
+                                            'transformation_matrix_clean': transformation_matrix_clean_torch,
+                                            'modality_name': modality_name})
+
+                if self.visualize:
+                    origin_lidar = \
+                        np.array(
+                            downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                    origin_lidar = torch.from_numpy(origin_lidar)
+                    output_dict[cav_id].update({'origin_lidar': origin_lidar})
+
+            if self.visualize:
+                projected_lidar_stack = [torch.from_numpy(
+                    np.vstack(projected_lidar_list))]
+                output_dict['ego'].update({'origin_lidar': projected_lidar_stack})
+                # output_dict['ego'].update({'projected_lidar_list': projected_lidar_list})
+
+            return output_dict
+
+
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+
+            output_dict :dict
+                The dictionary containing the output of the model.
+
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = self.post_processor.post_process(
+                data_dict, output_dict
+            )
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            return pred_box_tensor, pred_score, gt_box_tensor
+
+        def post_process_no_fusion(self, data_dict, output_dict_ego):
+            data_dict_ego = OrderedDict()
+            data_dict_ego["ego"] = data_dict["ego"]
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            pred_box_tensor, pred_score = self.post_processor.post_process(
+                data_dict_ego, output_dict_ego
+            )
+            return pred_box_tensor, pred_score, gt_box_tensor
+
+        def post_process_no_fusion_uncertainty(self, data_dict, output_dict_ego):
+            data_dict_ego = OrderedDict()
+            data_dict_ego['ego'] = data_dict['ego']
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            pred_box_tensor, pred_score, uncertainty = \
+                self.post_processor.post_process(data_dict_ego, output_dict_ego, return_uncertainty=True)
+            return pred_box_tensor, pred_score, gt_box_tensor, uncertainty
+
+    return LateheterFusionDataset
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multi_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multi_fusion_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..71dbbe07a881bc356351d78e6a4cc683a547e97f
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multi_fusion_dataset.py
@@ -0,0 +1,631 @@
+# late fusion dataset
+import random
+import math
+from collections import OrderedDict
+import cv2
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.utils.transformation_utils import x1_to_x2
+from opencood.utils.pose_utils import add_noise_data_dict
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+
+
+
+def getLateclassFusionDataset(cls):
+    """
+    cls: the BaseDataset or父类数据集, 负责一些基础接口，如:
+         - retrieve_base_data()
+         - generate_object_center_single()
+         - self.post_processor
+         - self.pre_processor
+         - self.selector (如果用了 heterogeneous 配置)
+         等等
+    """
+    class LateclassFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+
+            # 是否启用异构学习(例如只选择某些Agent用lidar，某些Agent用camera)
+            self.heterogeneous = False
+            if "heter" in params:
+                self.heterogeneous = True
+
+            # 是否为多类别
+            self.multiclass = params["model"]["args"].get("multi_class", False)
+
+            # 根据需要，可在这里给定多类别的类别 ID 列表
+            # 比如 [0, 1, 3] 分别对应 car / pedestrian / cyclist 等
+            self.class_list = params.get("class_list", [0, 1, 3])
+            # 若项目里您是通过 [ 'all', 0, 1, 3 ] 这种方式区分，也可自行调整
+
+            # 用于可视化
+            self.visualize = visualize
+            self.train = train
+
+        def __getitem__(self, idx):
+            """
+            训练阶段：随机选 1 个 CAV 做 late 监督(与LateFusionDataset一致)；
+            测试/验证阶段：保留所有范围内 CAV 的信息。
+            """
+            base_data_dict = self.retrieve_base_data(idx)
+            if self.train:
+                reformat_data_dict = self.get_item_train(base_data_dict)
+            else:
+                reformat_data_dict = self.get_item_test(base_data_dict, idx)
+            return reformat_data_dict
+
+        def get_item_train(self, base_data_dict):
+            """
+            训练阶段的处理逻辑：通常是只抽取 1 个 CAV（含有 label），
+            以减少内存开销、保持与单车训练类似。
+            """
+            from collections import OrderedDict
+            processed_data_dict = OrderedDict()
+
+            # 数据扰动（如果有）
+            base_data_dict = self.add_noise_data_if_needed(base_data_dict)
+
+            # 只随机抽取一个 CAV
+            if not self.visualize:
+                selected_cav_id, selected_cav_base = random.choice(
+                    list(base_data_dict.items())
+                )
+            else:
+                # 若要可视化，通常选 ego 做可视化
+                selected_cav_id, selected_cav_base = list(base_data_dict.items())[0]
+
+            # 处理单个车辆（含多类别的 bbox）
+            cav_processed = self.get_item_single_car(selected_cav_base)
+            processed_data_dict["ego"] = cav_processed
+            return processed_data_dict
+
+        def get_item_test(self, base_data_dict, idx):
+            """
+            测试/验证阶段：保留所有在 comm_range 内的 CAV，都要 late fusion 的 label。
+            """
+            from collections import OrderedDict
+            import math
+
+            base_data_dict = self.add_noise_data_if_needed(base_data_dict)
+
+            processed_data_dict = OrderedDict()
+            ego_id, ego_pose = -1, None
+            # 首先找到 ego
+            for cav_id, cav_content in base_data_dict.items():
+                if cav_content["ego"]:
+                    ego_id = cav_id
+                    ego_pose = cav_content["params"]["lidar_pose"]
+                    ego_pose_clean = cav_content["params"]["lidar_pose_clean"]
+                    break
+            assert ego_id != -1
+
+            cav_id_list = []
+            for cav_id, cav_content in base_data_dict.items():
+                distance = math.sqrt(
+                    (cav_content["params"]["lidar_pose"][0] - ego_pose[0]) ** 2
+                    + (cav_content["params"]["lidar_pose"][1] - ego_pose[1]) ** 2
+                )
+                if distance <= self.params["comm_range"]:
+                    cav_id_list.append(cav_id)
+
+            cav_id_list_newname = []
+            for cav_id in cav_id_list:
+                selected_cav_base = base_data_dict[cav_id]
+                transformation_matrix = self.x1_to_x2(
+                    selected_cav_base["params"]["lidar_pose"], ego_pose
+                )
+                transformation_matrix_clean = self.x1_to_x2(
+                    selected_cav_base["params"]["lidar_pose_clean"], ego_pose_clean
+                )
+                cav_processed = self.get_item_single_car(selected_cav_base)
+                cav_processed.update(
+                    {
+                        "transformation_matrix": transformation_matrix,
+                        "transformation_matrix_clean": transformation_matrix_clean,
+                    }
+                )
+                # 若是 ego 自身，就命名为 "ego"，否则保持 cav_id
+                update_cav_key = "ego" if cav_id == ego_id else cav_id
+                processed_data_dict[update_cav_key] = cav_processed
+                cav_id_list_newname.append(update_cav_key)
+
+            # heterogeneous 额外信息
+            if self.heterogeneous:
+                processed_data_dict["ego"]["idx"] = idx
+                processed_data_dict["ego"]["cav_list"] = cav_id_list_newname
+
+            return processed_data_dict
+
+        def get_item_single_car(self, cav_base):
+            """
+            处理单辆车的信息，生成其多类别的 label、lidar 数据、camera 数据等等。
+            """
+            selected_cav_processed = {}
+
+            # 1) 生成多类别或单类别目标框
+            #   如果多类别，就将 cav_base 中属于各类的目标框分开存储/或一次性存 [num_class, max_box, 7]
+            if self.multiclass:
+                # 举例：将 class_list = [0,1,3] 三个类别分别解析
+                # 最简单做法是：对 cav_base["params"]["lidar_pose_clean"] 调用多次 generate_object_center_single
+                # 并把结果堆叠
+                all_box_list, all_mask_list, all_ids_list = [], [], []
+                for cls_id in self.class_list:
+                    box_c, mask_c, ids_c = self.generate_object_center_single(
+                        [cav_base],
+                        cav_base["params"]["lidar_pose_clean"],
+                        class_type=cls_id,  # 您可在 generate_object_center_single 里根据 class_type 做过滤
+                    )
+                    all_box_list.append(box_c)
+                    all_mask_list.append(mask_c)
+                    all_ids_list.append(ids_c)
+
+                # 堆叠成 [num_class, max_box, 7] / [num_class, max_box]
+                # 需注意每次 generate_object_center_single 返回的 max_box 数量可能不同,
+                # 这里需统一补零或 slice 到相同维度(可参考已有Late/IntermediateFusion实现).
+                object_bbx_center, object_bbx_mask = self.stack_multiclass_label(
+                    all_box_list, all_mask_list
+                )
+                # object_ids 可以按类别各存一个 list，也可以只存 [num_class, ...]
+                object_ids = all_ids_list  # 也可做特殊处理
+            else:
+                # 单类别情况下：直接一次即可
+                object_bbx_center, object_bbx_mask, object_ids = (
+                    self.generate_object_center_single(
+                        [cav_base], cav_base["params"]["lidar_pose_clean"]
+                    )
+                )
+
+            # 2) lidar 处理(或 camera)
+            #   若需要 lidar，可做 voxelize -> self.pre_processor
+            if self.load_lidar_file or self.visualize:
+                lidar_np = cav_base["lidar_np"]
+                # 一些基础处理，如 shuffle_points, mask_points_by_range, mask_ego_points 等
+                lidar_np = self.basic_lidar_preprocess(lidar_np)
+                # 数据增强(根据需要)
+                lidar_np, object_bbx_center, object_bbx_mask = self.augment_if_needed(
+                    lidar_np, object_bbx_center, object_bbx_mask
+                )
+                # 真正处理，如 voxelize/BEV projection
+                processed_lidar = self.pre_processor.preprocess(lidar_np)
+                selected_cav_processed["processed_lidar"] = processed_lidar
+
+                if self.visualize:
+                    selected_cav_processed["origin_lidar"] = lidar_np
+
+            # 3) camera 处理
+            if self.load_camera_file:
+                # 类似 LateFusionDataset 中的逻辑
+                camera_inputs = self.process_camera_data(cav_base)
+                selected_cav_processed["image_inputs"] = camera_inputs
+
+            # 4) 保存多类别框
+            selected_cav_processed.update(
+                {
+                    "object_bbx_center": object_bbx_center,
+                    "object_bbx_mask": object_bbx_mask,
+                    "object_ids": object_ids,
+                }
+            )
+
+            # 5) 生成 label，若多类别则也要多类别 label
+            if self.multiclass:
+                # 自行封装 post_processor.generate_label(...) 以支持 multi-class
+                # 也可对每个类别分别调用
+                label_dict = self.post_processor.generate_label_multiclass(
+                    object_bbx_center,  # [num_class, max_box, 7]
+                    self.anchor_box,
+                    object_bbx_mask,    # [num_class, max_box]
+                )
+            else:
+                label_dict = self.post_processor.generate_label(
+                    object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+                )
+
+            selected_cav_processed["label_dict"] = label_dict
+            return selected_cav_processed
+
+        ############################
+        # collate_batch 相关处理  #
+        ############################
+        def collate_batch_train(self, batch):
+            """
+            训练集的 collate：
+            由于本示例中 train 阶段只随机取了 1 个 CAV，直接按 batch 拼接即可。
+            若您想要真正多 CAV 的 late 监督训练，则需参考 test collate 的思路。
+            """
+            import torch
+            from collections import OrderedDict
+            output_dict = {"ego": {}}
+
+            object_bbx_center_list = []
+            object_bbx_mask_list = []
+            label_dict_list = []
+            origin_lidar_list = []
+
+            processed_lidar_list = []
+
+            for item in batch:
+                ego_data = item["ego"]
+                object_bbx_center_list.append(ego_data["object_bbx_center"])
+                object_bbx_mask_list.append(ego_data["object_bbx_mask"])
+                label_dict_list.append(ego_data["label_dict"])
+
+                if self.visualize and "origin_lidar" in ego_data:
+                    origin_lidar_list.append(ego_data["origin_lidar"])
+
+                if "processed_lidar" in ego_data:
+                    processed_lidar_list.append(ego_data["processed_lidar"])
+
+            # 转成 tensor
+            object_bbx_center_torch = self.list_to_tensor(object_bbx_center_list)
+            object_bbx_mask_torch = self.list_to_tensor(object_bbx_mask_list)
+
+            # 多类别 label 的 collate (或单类别)
+            label_torch_dict = self.post_processor.collate_batch(label_dict_list)
+            # 若使用 centerpoint, 还要再把 object_bbx_center_torch 等融合进 label_torch_dict
+            label_torch_dict.update(
+                {
+                    "object_bbx_center": object_bbx_center_torch,
+                    "object_bbx_mask": object_bbx_mask_torch,
+                }
+            )
+
+            output_dict["ego"].update(
+                {
+                    "object_bbx_center": object_bbx_center_torch,
+                    "object_bbx_mask": object_bbx_mask_torch,
+                    "anchor_box": torch.from_numpy(self.anchor_box),
+                    "label_dict": label_torch_dict,
+                }
+            )
+
+            # lidar
+            if len(processed_lidar_list) > 0:
+                processed_lidar_torch_dict = self.pre_processor.collate_batch(
+                    processed_lidar_list
+                )
+                output_dict["ego"]["processed_lidar"] = processed_lidar_torch_dict
+
+            # camera
+            if self.load_camera_file:
+                # 类似 LateFusionDataset: 将 batch 里的 camera 信息按维度拼起来
+                camera_inputs = self.collate_camera_inputs_train(batch)
+                output_dict["ego"]["image_inputs"] = camera_inputs
+
+            # visualization
+            if self.visualize and len(origin_lidar_list) > 0:
+                # 您可以根据需要 downsample
+                origin_lidar_torch = self.list_to_tensor(origin_lidar_list)
+                output_dict["ego"]["origin_lidar"] = origin_lidar_torch
+
+            return output_dict
+
+        def collate_batch_test(self, batch):
+            """
+            测试集（或验证集）的 collate：
+            一般只支持 batch_size=1（尤其在多 CAV 的情况下），
+            然后把每个 CAV 单独拿出来做 late 处理。
+            """
+            assert len(batch) == 1, "Test time batch_size must be 1 for late fusion!"
+            batch = batch[0]
+
+            output_dict = {}
+            # heterogeneous
+            if self.heterogeneous and "idx" in batch["ego"]:
+                idx = batch["ego"]["idx"]
+                cav_list = batch["ego"]["cav_list"]
+                # 选择哪些 cav 用 lidar / camera
+                # lidar_agent, camera_agent = self.selector.select_agent(idx)
+                # ...
+
+            # 收集并 collate
+            if self.visualize:
+                import copy
+                projected_lidar_list = []
+
+            for cav_id, cav_content in batch.items():
+                output_dict[cav_id] = {}
+                # 把 object_bbx_center/mask 变成 [1, ...]
+                object_bbx_center = self.unsqueeze_to_batch(cav_content["object_bbx_center"])
+                object_bbx_mask = self.unsqueeze_to_batch(cav_content["object_bbx_mask"])
+
+                label_dict = self.post_processor.collate_batch([cav_content["label_dict"]])
+                # centerpoint 需把 object_bbx_center/mask 再塞回 label_dict
+                label_dict.update(
+                    {
+                        "object_bbx_center": object_bbx_center,
+                        "object_bbx_mask": object_bbx_mask,
+                    }
+                )
+
+                # lidar
+                if "processed_lidar" in cav_content:
+                    # 只有 1 个 cav 的 processed_lidar
+                    processed_lidar_torch = self.pre_processor.collate_batch(
+                        [cav_content["processed_lidar"]]
+                    )
+                    output_dict[cav_id]["processed_lidar"] = processed_lidar_torch
+
+                # camera
+                if self.load_camera_file and "image_inputs" in cav_content:
+                    # 同理，只拼一个
+                    cam_torch = self.collate_camera_inputs_test(cav_content)
+                    output_dict[cav_id]["image_inputs"] = cam_torch
+
+                # heterogeneous 可根据 cav_id 判断是否保留/剔除
+                # if self.heterogeneous:
+                #     pass
+
+                # 保存变换矩阵
+                output_dict[cav_id]["transformation_matrix"] = torch.from_numpy(
+                    cav_content["transformation_matrix"]
+                ).float()
+                output_dict[cav_id]["transformation_matrix_clean"] = torch.from_numpy(
+                    cav_content["transformation_matrix_clean"]
+                ).float()
+
+                # label + 其他信息
+                output_dict[cav_id].update(
+                    {
+                        "object_bbx_center": object_bbx_center,
+                        "object_bbx_mask": object_bbx_mask,
+                        "label_dict": label_dict,
+                        "anchor_box": self.anchor_box_torch,
+                        "object_ids": cav_content["object_ids"],
+                    }
+                )
+
+                if self.visualize and "origin_lidar" in cav_content:
+                    output_dict[cav_id]["origin_lidar"] = torch.from_numpy(
+                        cav_content["origin_lidar"]
+                    )
+
+            # 若需要把多 cav 的点云拼接到 ego 上做可视化，可以在这里做拼接
+            return output_dict
+
+        ######################################
+        #          多类别后处理示例          #
+        ######################################
+        def post_process(self, data_dict, output_dict):
+            """
+            如果是多类别，就调用 self.post_process_multiclass，
+            否则与普通 late fusion 相同。
+            """
+            if self.multiclass:
+                # 返回 [List of pred_box], [List of score], [List of gt_box]，每个元素对应一个类别
+                return self.post_process_multiclass(data_dict, output_dict)
+            else:
+                pred_box, pred_score = self.post_processor.post_process(data_dict, output_dict)
+                gt_box = self.post_processor.generate_gt_bbx(data_dict)
+                return pred_box, pred_score, gt_box
+
+        def post_process_multiclass(self, data_dict, output_dict):
+            """
+            多类别的后处理，每个类别各跑一次 NMS 或类似处理，然后拼一起返回。
+            """
+            import copy
+
+            # num_class = len(self.class_list)
+            pred_box_tensor_list = []
+            pred_score_list = []
+            gt_box_tensor_list = []
+
+            # 对每个类别独立后处理
+            for i, cls_id in enumerate(self.class_list):
+                # 1) 拷贝出仅包含该类别的数据
+                data_dict_single, output_dict_single = self.split_single_class(
+                    data_dict, output_dict, class_index=i
+                )
+                # 2) 跑后处理
+                pred_box_tensor, pred_score = self.post_processor.post_process(
+                    data_dict_single, output_dict_single
+                )
+                gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict_single)
+
+                pred_box_tensor_list.append(pred_box_tensor)
+                pred_score_list.append(pred_score)
+                gt_box_tensor_list.append(gt_box_tensor)
+
+            return pred_box_tensor_list, pred_score_list, gt_box_tensor_list
+
+        ############################################
+        # 下方放一些复用/简化方法(根据项目适配即可)  #
+        ############################################
+        def add_noise_data_if_needed(self, base_data_dict):
+            """
+            根据 self.params["noise_setting"] 等需求决定是否进行噪声扰动。
+            这里直接调用已有的 add_noise_data_dict 或 add_noise_data_dict_asymmetric。
+            """
+            from opencood.utils.pose_utils import add_noise_data_dict
+            # 如果想用非对称噪声，请自行替换
+            return add_noise_data_dict(base_data_dict, self.params["noise_setting"])
+
+        def basic_lidar_preprocess(self, lidar_np):
+            """
+            一些通用的点云预处理，如范围裁剪、shuffle、去除自车点等。
+            """
+            from opencood.utils.pcd_utils import (
+                shuffle_points,
+                mask_points_by_range,
+                mask_ego_points,
+            )
+            lidar_np = shuffle_points(lidar_np)
+            lidar_np = mask_points_by_range(lidar_np, self.params["preprocess"]["cav_lidar_range"])
+            lidar_np = mask_ego_points(lidar_np)
+            return lidar_np
+
+        def augment_if_needed(self, lidar_np, object_bbx_center, object_bbx_mask):
+            """
+            若 self.train 并且无需异构，可对点云/标签做数据增强。
+            """
+            if self.train and not self.heterogeneous:
+                lidar_np, object_bbx_center, object_bbx_mask = self.augment(
+                    lidar_np, object_bbx_center, object_bbx_mask
+                )
+            return lidar_np, object_bbx_center, object_bbx_mask
+
+        def process_camera_data(self, cav_base):
+            """
+            将相机图像根据参数（分辨率缩放、裁剪、flip 等）做增广，并返回成一个 dict。
+            可参考 LateFusionDataset / LSS 处理流程。
+            """
+            # 这里仅示例化简, 具体实现请参考原 LateFusionDataset 中的 get_item_single_car -> process_camera_data
+            camera_data_list = cav_base["camera_data"]
+            # ... 做增广与 transform ...
+            camera_inputs = {"imgs": None, "rots": None, ...}
+            return camera_inputs
+
+        def collate_camera_inputs_train(self, batch):
+            """
+            将 train batch 里多帧图像按维度拼接，比如 [B, N, C, H, W]
+            """
+            # 略，参考 LateFusionDataset 的 collate_batch_train
+            return {}
+
+        def collate_camera_inputs_test(self, cav_content):
+            """
+            测试阶段只 collate 单个 cav
+            """
+            # 参考 LateFusionDataset 的 collate_batch_test
+            return {}
+
+        def stack_multiclass_label(self, box_list, mask_list):
+            """
+            输入是一个 list，每个元素是 (max_box, 7)/(max_box,),
+            最终拼成 [num_class, max_box, 7] / [num_class, max_box]。
+            若每个类别分配的 max_box 不同，需要先找最大值再做 padding。
+            """
+            import numpy as np
+            num_class = len(box_list)
+            max_box_counts = [b.shape[0] for b in box_list]
+            M = max(max_box_counts) if max_box_counts else 0
+
+            # 组合
+            box_array = []
+            mask_array = []
+            for i in range(num_class):
+                cur_box = box_list[i]
+                cur_mask = mask_list[i]
+                pad_size = M - cur_box.shape[0]
+                if pad_size > 0:
+                    # 在 0 处 padding
+                    cur_box = np.concatenate(
+                        [cur_box, np.zeros((pad_size, 7), dtype=cur_box.dtype)], axis=0
+                    )
+                    cur_mask = np.concatenate(
+                        [cur_mask, np.zeros(pad_size, dtype=cur_mask.dtype)], axis=0
+                    )
+                box_array.append(cur_box[None, ...])   # [1, M, 7]
+                mask_array.append(cur_mask[None, ...]) # [1, M]
+
+            if len(box_array) == 0:
+                # 说明没对象
+                return np.zeros((0, 0, 7)), np.zeros((0, 0))
+
+            box_array = np.concatenate(box_array, axis=0)   # [num_class, M, 7]
+            mask_array = np.concatenate(mask_array, axis=0) # [num_class, M]
+            return box_array, mask_array
+
+        def split_single_class(self, data_dict, output_dict, class_index):
+            """
+            post_process_multiclass 用到：
+            将 data_dict/output_dict 中多类别的 object_bbx_center/mask
+            拆分出第 class_index 个类别的子数据，以便单独跑 NMS。
+            """
+            import copy
+            data_dict_single = {"ego": {}}
+            output_dict_single = {}
+
+            # 遍历所有 cav (late fusion)
+            for cav_id in data_dict.keys():
+                cav_content = data_dict[cav_id]
+                cav_output = output_dict[cav_id]
+
+                # 如果 object_bbx_center 是 [num_class, M, 7]，mask 是 [num_class, M]
+                # 拆分出 cav_idx = class_index 这一路
+                single_box_center = cav_content["object_bbx_center"][class_index, ...]
+                single_mask = cav_content["object_bbx_mask"][class_index, ...]
+                # object_ids 如果是按类别存储的list，可按 class_index 取即可
+                # 如果合并一起，需要自己额外做记录
+                if isinstance(cav_content["object_ids"], list):
+                    single_ids = cav_content["object_ids"][class_index]
+                else:
+                    single_ids = cav_content["object_ids"]  # 或者看具体储存方式
+
+                # 类似地，对网络输出 cls_preds, reg_preds_multiclass 都要取第 class_index 路
+                # 具体看原网络 forward 的输出 shape
+                cls_preds_single = cav_output["cls_preds"][
+                    :, class_index : class_index + 1, :, :
+                ]  # e.g. [B,1,H,W]
+                reg_preds_single = cav_output["reg_preds_multiclass"][
+                    :, class_index, :, :
+                ]  # [B,H,W,Nreg]
+
+                # 构造新的 data_dict_single / output_dict_single
+                data_dict_single[cav_id] = copy.deepcopy(cav_content)
+                data_dict_single[cav_id]["object_bbx_center"] = single_box_center[None, ...]  # 保留一个 batch 维
+                data_dict_single[cav_id]["object_bbx_mask"] = single_mask[None, ...]
+                data_dict_single[cav_id]["object_ids"] = single_ids
+
+                output_dict_single[cav_id] = copy.deepcopy(cav_output)
+                output_dict_single[cav_id]["cls_preds"] = cls_preds_single
+                output_dict_single[cav_id]["reg_preds"] = reg_preds_single
+
+            return data_dict_single, output_dict_single
+
+        ###################################################
+        # 一些工具函数(和原 LateFusionDataset/中间类一致) #
+        ###################################################
+        def x1_to_x2(self, lidar_pose1, lidar_pose2):
+            """
+            位姿变换矩阵, 与 opencood.utils.transformation_utils.x1_to_x2 一致。
+            """
+            return x1_to_x2(lidar_pose1, lidar_pose2)
+
+        def list_to_tensor(self, data_list):
+            """
+            简易把 list of np.array 变成 torch.Tensor, 做 batch 拼接用。
+            """
+            import numpy as np
+            import torch
+            if len(data_list) == 0:
+                return None
+            arr = np.stack(data_list, axis=0)
+            return torch.from_numpy(arr)
+
+        def unsqueeze_to_batch(self, arr):
+            """
+            如果 arr 是 np.ndarray，就转成 [1, ...]，再转成 torch。
+            """
+            import numpy as np
+            import torch
+            if isinstance(arr, np.ndarray):
+                arr = arr[None, ...]  # 在前面加一个 batch 维
+                arr = torch.from_numpy(arr)
+            elif isinstance(arr, torch.Tensor) and arr.dim() == 2:
+                # [M,7] -> [1,M,7]
+                arr = arr.unsqueeze(0)
+            return arr
+
+    return LateMultiFusionDataset
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multiclass_fusion_dataset.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multiclass_fusion_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..7cf62fac764d4722b466a966965820d638b7cbdc
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/datasets/late_multiclass_fusion_dataset.py
@@ -0,0 +1,1233 @@
+# late fusion dataset
+import random
+import math
+from collections import OrderedDict
+import cv2
+import numpy as np
+import torch
+import copy
+from icecream import ic
+from PIL import Image
+import pickle as pkl
+from opencood.utils import box_utils as box_utils
+from opencood.data_utils.pre_processor import build_preprocessor
+from opencood.data_utils.post_processor import build_postprocessor
+from opencood.utils.camera_utils import (
+    sample_augmentation,
+    img_transform,
+    normalize_img,
+    img_to_tensor,
+)
+from opencood.data_utils.augmentor.data_augmentor import DataAugmentor
+from opencood.utils.transformation_utils import x1_to_x2, x_to_world, get_pairwise_transformation
+from opencood.utils.pose_utils import add_noise_data_dict, add_noise_data_dict_asymmetric
+from opencood.utils.pcd_utils import (
+    mask_points_by_range,
+    mask_ego_points,
+    mask_ego_points_v2,
+    shuffle_points,
+    downsample_lidar_minimum,
+)
+from opencood.utils.common_utils import merge_features_to_dict
+
+def getLatemulticlassFusionDataset(cls):
+    """
+    cls: the Basedataset.
+    """
+    class LatemulticlassFusionDataset(cls):
+        def __init__(self, params, visualize, train=True):
+            super().__init__(params, visualize, train)
+            self.anchor_box = self.post_processor.generate_anchor_box()
+            self.anchor_box_torch = torch.from_numpy(self.anchor_box)
+
+            self.heterogeneous = False
+            if 'heter' in params:
+                self.heterogeneous = True
+            
+            self.multiclass = params['model']['args']['multi_class']
+            
+            self.proj_first = False if 'proj_first' not in params['fusion']['args']\
+                                         else params['fusion']['args']['proj_first']
+            
+            # self.proj_first = False
+            self.supervise_single = True if ('supervise_single' in params['model']['args'] and params['model']['args']['supervise_single']) \
+                                        else False
+            # self.supervise_single = False
+            self.online_eval_only = False
+
+
+        def __getitem__(self, idx, extra_source=None, data_dir=None):
+
+            if data_dir is not None:
+                extra_source=1
+
+            object_bbx_center_list = []
+            object_bbx_mask_list = []
+            object_id_dict = {}
+
+            object_bbx_center_list_single = []
+            object_bbx_mask_list_single = []
+
+            gt_object_bbx_center_list = []
+            gt_object_bbx_mask_list = []
+            gt_object_id_dict = {}
+
+            gt_object_bbx_center_list_single = []
+            gt_object_bbx_mask_list_single = []
+
+            output_dict = {}
+            for tpe in ['all', 0, 1, 3]:
+                output_single_class = self.__getitem_single_class__(idx, tpe, extra_source, data_dir)
+                output_dict[tpe] = output_single_class
+                if tpe == 'all' and extra_source is None:
+                    continue
+                elif tpe == 'all' and extra_source is not None:
+                    break
+                object_bbx_center_list.append(output_single_class['ego']['object_bbx_center'])
+                object_bbx_mask_list.append(output_single_class['ego']['object_bbx_mask'])
+                object_id_dict[tpe] = output_single_class['ego']['object_ids']
+
+                gt_object_bbx_center_list.append(output_single_class['ego']['gt_object_bbx_center'])
+                gt_object_bbx_mask_list.append(output_single_class['ego']['gt_object_bbx_mask'])
+                gt_object_id_dict[tpe] = output_single_class['ego']['gt_object_ids']
+
+            if self.multiclass and extra_source is None:
+                output_dict['all']['ego']['object_bbx_center'] = np.stack(object_bbx_center_list, axis=0)
+                output_dict['all']['ego']['object_bbx_mask'] = np.stack(object_bbx_mask_list, axis=0)
+                output_dict['all']['ego']['object_ids'] = object_id_dict
+
+                output_dict['all']['ego']['gt_object_bbx_center'] = np.stack(gt_object_bbx_center_list, axis=0)
+                output_dict['all']['ego']['gt_object_bbx_mask'] = np.stack(gt_object_bbx_mask_list, axis=0)
+                output_dict['all']['ego']['gt_object_ids'] = gt_object_id_dict
+            
+
+            return output_dict['all']
+
+        def __getitem_single_class__(self, idx, tpe=None, extra_source=None, data_dir=None):
+
+            if extra_source is None and data_dir is None:
+                base_data_dict = self.retrieve_base_data(idx, tpe) ## {id:{'ego':True/False, 'params': {'lidar_pose','speed','vehicles','ego_pos',...}, 'lidar_np': array (N,4)}}
+            elif data_dir is not None:
+                base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, data_dir=data_dir)
+            elif extra_source is not None:
+                base_data_dict = self.retrieve_base_data(idx=None, tpe=tpe, extra_source=extra_source)
+
+            # base_data_dict = add_noise_data_dict(base_data_dict,self.params['noise_setting'])
+            base_data_dict = add_noise_data_dict_asymmetric(base_data_dict,self.params['noise_setting'])
+            processed_data_dict = OrderedDict()
+            processed_data_dict['ego'] = {}
+            ego_id = -1
+            ego_lidar_pose = []
+            ego_cav_base = None
+            cav_id_list = []
+            lidar_pose_list = []
+            too_far = []
+            # first find the ego vehicle's lidar pose
+            for cav_id, cav_content in base_data_dict.items():
+                
+                if cav_content['ego']:
+                    ego_id = cav_id
+                    ego_lidar_pose = cav_content['params']['lidar_pose']
+                    ego_lidar_pose_clean = cav_content['params']['lidar_pose_clean']
+                    ego_cav_base = cav_content
+                    break
+
+            assert ego_id != -1
+            assert len(ego_lidar_pose) > 0
+
+            agents_image_inputs = []
+            processed_features = []
+            object_stack = []
+            object_mask_stack = []
+            object_id_stack = []
+
+            gt_object_stack = []
+            gt_object_mask_stack = []
+            gt_object_id_stack = []
+
+            single_label_list = []
+            single_object_bbx_center_list = []
+            single_object_bbx_mask_list = []
+            too_far = []
+            lidar_pose_list = []
+            lidar_pose_clean_list = []
+            cav_id_list = []
+            projected_lidar_clean_list = [] # disconet
+
+            if self.visualize:
+                projected_lidar_stack = []    
+
+            # loop over all CAVs to process information
+            for cav_id, selected_cav_base in base_data_dict.items():
+                distance = \
+                    math.sqrt((selected_cav_base['params']['lidar_pose'][0] -
+                            ego_lidar_pose[0]) ** 2 + (
+                                    selected_cav_base['params'][
+                                        'lidar_pose'][1] - ego_lidar_pose[
+                                        1]) ** 2)
+                if distance > self.params['comm_range']:
+                    too_far.append(cav_id)
+                    continue
+                cav_id_list.append(cav_id)
+                lidar_pose_list.append(selected_cav_base['params']['lidar_pose'])
+                lidar_pose_clean_list.append(selected_cav_base['params']['lidar_pose_clean'])                
+
+            for cav_id in too_far:
+                base_data_dict.pop(cav_id)
+            
+            pairwise_t_matrix = \
+                get_pairwise_transformation(base_data_dict,
+                                                self.max_cav,
+                                                self.proj_first)
+            cav_num = len(cav_id_list)
+            cav_id_list_newname = []
+
+            lidar_poses = np.array(lidar_pose_list).reshape(-1, 6)  # [N_cav, 6]
+            lidar_poses_clean = np.array(lidar_pose_clean_list).reshape(-1, 6)  # [N_cav, 6]
+
+            for cav_id in cav_id_list:
+                selected_cav_base = base_data_dict[cav_id]
+                # find the transformation matrix from current cav to ego.
+                cav_lidar_pose = selected_cav_base['params']['lidar_pose']
+                transformation_matrix = x1_to_x2(cav_lidar_pose, ego_lidar_pose)
+                cav_lidar_pose_clean = selected_cav_base['params']['lidar_pose_clean']
+                transformation_matrix_clean = x1_to_x2(cav_lidar_pose_clean, ego_lidar_pose_clean)
+
+                selected_cav_processed = \
+                    self.get_item_single_car(selected_cav_base, 
+                                            ego_cav_base,
+                                            tpe,
+                                            extra_source!=None)
+                selected_cav_processed.update({'transformation_matrix': transformation_matrix,
+                                            'transformation_matrix_clean': transformation_matrix_clean})
+                if extra_source is None:
+                    object_stack.append(selected_cav_processed['object_bbx_center'])
+                    object_mask_stack.append(selected_cav_processed['object_bbx_mask'])
+                    object_id_stack += selected_cav_processed['object_ids']
+                    
+                
+                    gt_object_stack.append(selected_cav_processed['gt_object_bbx_center'])
+                    gt_object_mask_stack.append(selected_cav_processed['gt_object_bbx_mask'])
+                    gt_object_id_stack += selected_cav_processed['gt_object_ids']
+
+                if tpe == 'all':
+                        
+                    if self.load_lidar_file:
+                        processed_features.append(
+                            selected_cav_processed['processed_lidar'])
+                       
+                    if self.load_camera_file:
+                        agents_image_inputs.append(
+                            selected_cav_processed['image_inputs'])
+                    
+                    if self.visualize:
+                        projected_lidar_stack.append(
+                            selected_cav_processed['projected_lidar'])
+                    
+
+                if self.supervise_single  and extra_source is None :
+                    single_label_list.append(selected_cav_processed['single_label_dict'])
+                    single_object_bbx_center_list.append(selected_cav_processed['single_object_bbx_center'])
+                    single_object_bbx_mask_list.append(selected_cav_processed['single_object_bbx_mask'])
+                
+                update_cav = "ego" if cav_id == ego_id else cav_id
+                processed_data_dict.update({update_cav: selected_cav_processed})
+                cav_id_list_newname.append(update_cav)
+            
+            if self.supervise_single and extra_source is None:
+                single_label_dicts = {}
+                if tpe == 'all':
+                    # unused label
+                    if False:
+                        single_label_dicts = self.post_processor.collate_batch(single_label_list)
+                single_object_bbx_center = torch.from_numpy(np.array(single_object_bbx_center_list))
+                single_object_bbx_mask = torch.from_numpy(np.array(single_object_bbx_mask_list))
+                processed_data_dict['ego'].update({
+                    "single_label_dict_torch": single_label_dicts,
+                    "single_object_bbx_center_torch": single_object_bbx_center,
+                    "single_object_bbx_mask_torch": single_object_bbx_mask,
+                    })
+
+            # heterogeneous
+            if self.heterogeneous:
+                processed_data_dict['ego']['idx'] = idx
+                processed_data_dict['ego']['cav_list'] = cav_id_list_newname
+            
+            if extra_source is None:
+                unique_indices = \
+                    [object_id_stack.index(x) for x in set(object_id_stack)]
+                object_stack = np.vstack(object_stack)
+                object_mask_stack = np.concatenate(object_mask_stack)
+                object_stack = object_stack[unique_indices]
+                object_mask_stack = object_mask_stack[unique_indices]
+
+                # make sure bounding boxes across all frames have the same number
+                object_bbx_center = \
+                    np.zeros((self.params['postprocess']['max_num'], 7))
+                mask = np.zeros(self.params['postprocess']['max_num'])
+                object_bbx_center[:object_stack.shape[0], :] = object_stack
+                mask[:object_mask_stack.shape[0]] = object_mask_stack
+                # mask[:object_mask_stack.shape[0]] = 1
+
+                gt_unique_indices = \
+                    [gt_object_id_stack.index(x) for x in set(gt_object_id_stack)]
+                gt_object_stack = np.vstack(gt_object_stack)
+                gt_object_mask_stack = np.concatenate(gt_object_mask_stack)
+                gt_object_stack = gt_object_stack[gt_unique_indices]
+                gt_object_mask_stack = gt_object_mask_stack[unique_indices]
+
+                # make sure bounding boxes across all frames have the same number
+                gt_object_bbx_center = \
+                    np.zeros((self.params['postprocess']['max_num'], 7))
+                gt_mask = np.zeros(self.params['postprocess']['max_num'])
+                gt_object_bbx_center[:gt_object_stack.shape[0], :] = gt_object_stack
+                gt_mask[:gt_object_mask_stack.shape[0]] = gt_object_mask_stack
+                # gt_mask[:gt_object_mask_stack.shape[0]] = 1
+
+                processed_data_dict['ego'].update(
+                    {'object_bbx_center': object_bbx_center,  # (100,7)
+                    'object_bbx_mask': mask, # (100,)
+                    'object_ids': [object_id_stack[i] for i in unique_indices],     
+                    }   
+                )
+
+            # generate targets label
+            label_dict = {}
+            # if tpe == 'all':
+            # unused label
+            if extra_source is None:
+                label_dict = \
+                    self.post_processor.generate_label(
+                        gt_box_center=object_bbx_center,
+                        anchors=self.anchor_box,
+                        mask=mask)
+                gt_label_dict = \
+                    self.post_processor.generate_label(
+                        gt_box_center=gt_object_bbx_center,
+                        anchors=self.anchor_box,
+                        mask=gt_mask)
+
+
+                processed_data_dict['ego'].update(
+                    {'gt_object_bbx_center': gt_object_bbx_center,  # (100,7)
+                    'gt_object_bbx_mask': gt_mask, # (100,)
+                    'gt_object_ids': [gt_object_id_stack[i] for i in gt_unique_indices],
+                    'gt_label_dict': gt_label_dict})
+
+            processed_data_dict['ego'].update(
+                {
+                'anchor_box': self.anchor_box,
+                'label_dict': label_dict,
+                'cav_num': cav_num,
+                'pairwise_t_matrix': pairwise_t_matrix,
+                'lidar_poses_clean': lidar_poses_clean,
+                'lidar_poses': lidar_poses})
+        
+            if tpe == 'all':
+                if self.load_lidar_file:
+                    merged_feature_dict = merge_features_to_dict(processed_features)
+                    processed_data_dict['ego'].update({'processed_lidar': merged_feature_dict})
+                
+                if self.load_camera_file:
+                    merged_image_inputs_dict = merge_features_to_dict(agents_image_inputs, merge='stack')
+                    processed_data_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+                
+                if self.visualize:
+                    processed_data_dict['ego'].update({'origin_lidar':
+                                                    #    projected_lidar_stack})
+                        np.vstack(
+                            projected_lidar_stack)})
+                    processed_data_dict['ego'].update({'lidar_len': [len(projected_lidar_stack[i]) for i in range(len(projected_lidar_stack))]})
+                
+
+                processed_data_dict['ego'].update({'sample_idx': idx,
+                                                    'cav_id_list': cav_id_list})
+
+                img_front_list = []
+                img_left_list = []
+                img_right_list = []
+                BEV_list = []
+
+                if self.visualize:
+                    for car_id in base_data_dict:
+                        if not base_data_dict[car_id]['ego'] == True:
+                            continue
+                        if 'rgb_front' in base_data_dict[car_id] and 'rgb_left' in base_data_dict[car_id] and 'rgb_right' in base_data_dict[car_id] and 'BEV' in base_data_dict[car_id] :
+                            img_front_list.append(base_data_dict[car_id]['rgb_front'])
+                            img_left_list.append(base_data_dict[car_id]['rgb_left'])
+                            img_right_list.append(base_data_dict[car_id]['rgb_right'])
+                            BEV_list.append(base_data_dict[car_id]['BEV'])
+                processed_data_dict['ego'].update({'img_front': img_front_list,
+                                                    'img_left': img_left_list,
+                                                    'img_right': img_right_list,
+                                                    'BEV': BEV_list})
+            processed_data_dict['ego'].update({'scene_dict': base_data_dict['car_0']['scene_dict'],
+                                                    'frame_id': base_data_dict['car_0']['frame_id'],
+                                                    })
+
+            return processed_data_dict
+
+        def get_item_single_car(self, selected_cav_base, ego_cav_base, tpe, online_eval=False):
+            """
+            Process a single CAV's information for the train/test pipeline.
+
+
+            Parameters
+            ----------
+            selected_cav_base : dict
+                The dictionary contains a single CAV's raw information.
+                including 'params', 'camera_data'
+
+            Returns
+            -------
+            selected_cav_processed : dict
+                The dictionary contains the cav's processed information.
+            """
+            selected_cav_processed = {}
+
+            if not online_eval:
+                # label
+                object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center_single(
+                    [selected_cav_base], selected_cav_base["params"]["lidar_pose_clean"]
+                )
+            
+            ego_pose, ego_pose_clean = ego_cav_base['params']['lidar_pose'], ego_cav_base['params']['lidar_pose_clean']
+
+            
+            # calculate the transformation matrix
+            transformation_matrix = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose'],
+                        ego_pose) # T_ego_cav
+            transformation_matrix_clean = \
+                x1_to_x2(selected_cav_base['params']['lidar_pose_clean'],
+                        ego_pose_clean)
+            
+            # lidar
+            if tpe == 'all':
+                if self.load_lidar_file or self.visualize:
+                    lidar_np = selected_cav_base['lidar_np']
+                    lidar_np = shuffle_points(lidar_np)
+                    lidar_np = mask_points_by_range(lidar_np,
+                                                self.params['preprocess'][
+                                                    'cav_lidar_range'])
+                    # remove points that hit ego vehicle
+                    lidar_np = mask_ego_points_v2(lidar_np)
+
+                    # data augmentation, seems very important for single agent training, because lack of data diversity.
+                    # only work for lidar modality in training.
+                    if not self.heterogeneous and not online_eval:
+                        lidar_np, object_bbx_center, object_bbx_mask = \
+                        self.augment(lidar_np, object_bbx_center, object_bbx_mask)
+                    
+                    projected_lidar = \
+                        box_utils.project_points_by_matrix_torch(lidar_np[:, :3], transformation_matrix)
+                      
+                    if self.proj_first:
+                        lidar_np[:, :3] = projected_lidar
+                    
+                    if self.visualize:
+                        # filter lidar
+                        selected_cav_processed.update({'projected_lidar': projected_lidar})
+                    
+                    lidar_dict = self.pre_processor.preprocess(lidar_np)
+                    selected_cav_processed.update({'processed_lidar': lidar_dict})
+        
+                if self.visualize:
+                    selected_cav_processed.update({'origin_lidar': lidar_np})
+            
+            if not online_eval:
+                object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center(
+                    [selected_cav_base], selected_cav_base['params']['lidar_pose']
+                )
+
+                gt_object_bbx_center, gt_object_bbx_mask, gt_object_ids = self.generate_object_center(
+                    [selected_cav_base], selected_cav_base['params']['lidar_pose']
+                )
+                
+                label_dict = self.post_processor.generate_label(
+                    gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+                )
+
+                gt_label_dict = self.post_processor.generate_label(
+                    gt_box_center=gt_object_bbx_center, anchors=self.anchor_box, mask=gt_object_bbx_mask
+                )
+
+                selected_cav_processed.update({
+                                    "single_label_dict": label_dict,
+                                    "single_object_bbx_center": object_bbx_center,
+                                    "single_object_bbx_mask": object_bbx_mask})
+
+            # camera
+            if tpe == 'all':
+                if self.load_camera_file:
+                    # adapted from https://github.com/nv-tlabs/lift-splat-shoot/blob/master/src/data.py
+                    camera_data_list = selected_cav_base["camera_data"]
+
+                    params = selected_cav_base["params"]
+                    imgs = []
+                    rots = []
+                    trans = []
+                    intrins = []
+                    extrinsics = [] # cam_to_lidar
+                    post_rots = []
+                    post_trans = []
+    
+                    for idx, img in enumerate(camera_data_list):
+                        camera_to_lidar, camera_intrinsic = self.get_ext_int(params, idx)
+
+                        intrin = torch.from_numpy(camera_intrinsic)
+                        rot = torch.from_numpy(
+                            camera_to_lidar[:3, :3]
+                        )  # R_wc, we consider world-coord is the lidar-coord
+                        tran = torch.from_numpy(camera_to_lidar[:3, 3])  # T_wc
+    
+                        post_rot = torch.eye(2)
+                        post_tran = torch.zeros(2)
+
+                        img_src = [img]
+
+                        # depth
+                        if self.load_depth_file:
+                            depth_img = selected_cav_base["depth_data"][idx]
+                            img_src.append(depth_img)
+                        else:
+                            depth_img = None
+
+                        # data augmentation
+                        resize, resize_dims, crop, flip, rotate = sample_augmentation(
+                            self.data_aug_conf, self.train
+                        )
+                        img_src, post_rot2, post_tran2 = img_transform(
+                            img_src,
+                            post_rot,
+                            post_tran,
+                            resize=resize,
+                            resize_dims=resize_dims,
+                            crop=crop,
+                            flip=flip,
+                            rotate=rotate,
+                        )
+                        # for convenience, make augmentation matrices 3x3
+                        post_tran = torch.zeros(3)
+                        post_rot = torch.eye(3)
+                        post_tran[:2] = post_tran2
+                        post_rot[:2, :2] = post_rot2
+
+                        img_src[0] = normalize_img(img_src[0])
+                        if self.load_depth_file:
+                            img_src[1] = img_to_tensor(img_src[1]) * 255
+
+                        imgs.append(torch.cat(img_src, dim=0))
+                        intrins.append(intrin)
+                        extrinsics.append(torch.from_numpy(camera_to_lidar))
+                        rots.append(rot)
+                        trans.append(tran)
+                        post_rots.append(post_rot)
+                        post_trans.append(post_tran)
+
+                    selected_cav_processed.update(
+                        {
+                        "image_inputs": 
+                            {
+                                "imgs": torch.stack(imgs), # [N, 3or4, H, W]
+                                "intrins": torch.stack(intrins),
+                                "extrinsics": torch.stack(extrinsics),
+                                "rots": torch.stack(rots),
+                                "trans": torch.stack(trans),
+                                "post_rots": torch.stack(post_rots),
+                                "post_trans": torch.stack(post_trans),
+                            }
+                        }
+                    )
+            
+                selected_cav_processed.update({"anchor_box": self.anchor_box})
+
+            if not online_eval:
+                object_bbx_center, object_bbx_mask, object_ids = self.generate_object_center([selected_cav_base],
+                                                            ego_pose_clean)
+
+                gt_object_bbx_center, gt_object_bbx_mask, gt_object_ids = self.generate_object_center([selected_cav_base],
+                                                            ego_pose_clean)
+                selected_cav_processed.update(
+                    {
+                        "object_bbx_center": object_bbx_center,
+                        "object_bbx_mask": object_bbx_mask,
+                        "object_ids": object_ids,
+                    }
+                )
+
+                selected_cav_processed.update(
+                    {
+                        "gt_object_bbx_center": gt_object_bbx_center[gt_object_bbx_mask == 1],
+                        "gt_object_bbx_mask": gt_object_bbx_mask,
+                        "gt_object_ids": gt_object_ids
+                    }
+                )
+
+                # generate targets label
+                label_dict = self.post_processor.generate_label(
+                    gt_box_center=object_bbx_center, anchors=self.anchor_box, mask=object_bbx_mask
+                )
+                selected_cav_processed.update({"label_dict": label_dict})
+
+            selected_cav_processed.update(
+                {
+                    'transformation_matrix': transformation_matrix,
+                    'transformation_matrix_clean': transformation_matrix_clean
+                }
+            )
+
+            return selected_cav_processed
+
+
+        def collate_batch_train(self, batch, online_eval_only=False):
+            """
+            Customized collate function for pytorch dataloader during training
+            for early and late fusion dataset.
+
+            Parameters
+            ----------
+            batch : dict
+
+            Returns
+            -------
+            batch : dict
+                Reformatted batch.
+            """
+            # during training, we only care about ego.
+            output_dict = {'ego': {}}
+
+            object_bbx_center = []
+            object_bbx_mask = []
+            processed_lidar_list = []
+            label_dict_list = []
+            origin_lidar = []
+
+            gt_object_bbx_center = []
+            gt_object_bbx_mask = []
+            gt_object_ids = []
+            gt_label_dict_list = []
+            record_len = []
+            
+            object_ids = []
+            image_inputs_list = []
+            # used to record different scenario
+            record_len = []
+            label_dict_list = []
+            lidar_pose_list = []
+            origin_lidar = []
+            lidar_len = []
+            lidar_pose_clean_list = []
+
+            # heterogeneous
+            lidar_agent_list = []
+            
+            # pairwise transformation matrix
+            pairwise_t_matrix_list = []
+
+            # disconet
+            teacher_processed_lidar_list = []
+
+            # image
+            img_front = []
+            img_left = []
+            img_right = []
+            BEV = []
+
+            dict_list = []
+
+            if self.supervise_single:
+                pos_equal_one_single = []
+                neg_equal_one_single = []
+                targets_single = []
+                object_bbx_center_single = []
+                object_bbx_mask_single = []
+
+            for i in range(len(batch)):
+                ego_dict = batch[i]['ego']
+                
+                if not online_eval_only:
+                    object_bbx_center.append(ego_dict['object_bbx_center'])
+                    object_bbx_mask.append(ego_dict['object_bbx_mask'])
+                    object_ids.append(ego_dict['object_ids'])
+                    
+                    gt_object_bbx_center.append(ego_dict['gt_object_bbx_center'])
+                    gt_object_bbx_mask.append(ego_dict['gt_object_bbx_mask'])
+                
+                    gt_object_ids.append(ego_dict['gt_object_ids'])
+
+                    label_dict_list.append(ego_dict['label_dict'])
+
+                    gt_label_dict_list.append(ego_dict['gt_label_dict'])
+
+                else:
+                    object_ids.append(None)    
+                    gt_object_ids.append(None)
+
+                lidar_pose_list.append(ego_dict['lidar_poses']) # ego_dict['lidar_pose'] is np.ndarray [N,6]
+                lidar_pose_clean_list.append(ego_dict['lidar_poses_clean'])
+
+                if self.load_lidar_file:
+                    processed_lidar_list.append(ego_dict['processed_lidar'])
+                if self.load_camera_file:
+                    image_inputs_list.append(ego_dict['image_inputs']) # different cav_num, ego_dict['image_inputs'] is dict.
+
+                record_len.append(ego_dict['cav_num'])
+                pairwise_t_matrix_list.append(ego_dict['pairwise_t_matrix'])
+                
+                dict_list.append([ego_dict['scene_dict'], ego_dict['frame_id']])
+
+                if self.visualize:
+                    origin_lidar.append(ego_dict['origin_lidar'])
+                    # lidar_len.append(ego_dict['lidar_len'])
+                    if len(ego_dict['img_front']) > 0 and len(ego_dict['img_right']) > 0 and len(ego_dict['img_left']) > 0 and len(ego_dict['BEV']) > 0:
+                        img_front.append(ego_dict['img_front'][0])
+                        img_left.append(ego_dict['img_left'][0])
+                        img_right.append(ego_dict['img_right'][0])
+                        BEV.append(ego_dict['BEV'][0])
+
+                if self.supervise_single and not online_eval_only:
+                    # unused label
+                    if False:
+                        pos_equal_one_single.append(ego_dict['single_label_dict_torch']['pos_equal_one'])
+                        neg_equal_one_single.append(ego_dict['single_label_dict_torch']['neg_equal_one'])
+                        targets_single.append(ego_dict['single_label_dict_torch']['targets'])
+                    object_bbx_center_single.append(ego_dict['single_object_bbx_center_torch'])
+                    object_bbx_mask_single.append(ego_dict['single_object_bbx_mask_torch'])
+
+                # heterogeneous
+                if self.heterogeneous:
+                    lidar_agent_list.append(ego_dict['lidar_agent'])
+
+            # convert to numpy, (B, max_num, 7)
+            if not online_eval_only:
+                object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
+                object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
+                gt_object_bbx_center = torch.from_numpy(np.array(gt_object_bbx_center))
+                gt_object_bbx_mask = torch.from_numpy(np.array(gt_object_bbx_mask))
+            else:
+                object_bbx_center = None
+                object_bbx_mask = None
+                gt_object_bbx_center = None
+                gt_object_bbx_mask = None
+
+
+            # unused label
+            label_torch_dict = {}
+            if False:
+                label_torch_dict = \
+                    self.post_processor.collate_batch(label_dict_list)
+
+            record_len = torch.from_numpy(np.array(record_len))
+            record_len = torch.from_numpy(np.array(record_len, dtype=int))
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+            label_torch_dict['record_len'] = record_len
+            label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            # for centerpoint
+            if not online_eval_only:
+                label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                        'object_bbx_mask': object_bbx_mask})
+                output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                        'object_bbx_mask': object_bbx_mask,})
+            output_dict['ego'].update({
+                                    'anchor_box': torch.from_numpy(self.anchor_box),
+                                    'label_dict': label_torch_dict,
+                                    'record_len': record_len,
+                                    'pairwise_t_matrix': pairwise_t_matrix})
+            if self.visualize:
+                origin_lidar = \
+                    np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                origin_lidar = torch.from_numpy(origin_lidar)
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+
+            if self.load_lidar_file:
+                merged_feature_dict = merge_features_to_dict(processed_lidar_list)
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                    for k, v in merged_feature_dict.items(): # 'voxel_features' 'voxel_num_points' 'voxel_coords'
+                        merged_feature_dict[k] = [v[index] for index in lidar_agent_idx]
+
+                if not self.heterogeneous or (self.heterogeneous and sum(lidar_agent) != 0):
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(merged_feature_dict)
+                    output_dict['ego'].update({'processed_lidar': processed_lidar_torch_dict})
+                
+            if self.load_camera_file:
+                # collate ego camera information
+                imgs_batch = []
+                rots_batch = []
+                trans_batch = []
+                intrins_batch = []
+                extrinsics_batch = []
+                post_trans_batch = []
+                post_rots_batch = []
+                for i in range(len(batch)):
+                    ego_dict = batch[i]["ego"]["image_inputs"]
+                    imgs_batch.append(ego_dict["imgs"])
+                    rots_batch.append(ego_dict["rots"])
+                    trans_batch.append(ego_dict["trans"])
+                    intrins_batch.append(ego_dict["intrins"])
+                    extrinsics_batch.append(ego_dict["extrinsics"])
+                    post_trans_batch.append(ego_dict["post_trans"])
+                    post_rots_batch.append(ego_dict["post_rots"])
+
+                output_dict["ego"].update({
+                    "image_inputs":
+                        {
+                            "imgs": torch.stack(imgs_batch),  # [B, N, C, H, W]
+                            "rots": torch.stack(rots_batch),
+                            "trans": torch.stack(trans_batch),
+                            "intrins": torch.stack(intrins_batch),
+                            "post_trans": torch.stack(post_trans_batch),
+                            "post_rots": torch.stack(post_rots_batch),
+                        }
+                    }
+                )
+
+                merged_image_inputs_dict = merge_features_to_dict(image_inputs_list, merge='cat')
+
+                if self.heterogeneous:
+                    lidar_agent = np.concatenate(lidar_agent_list)
+                    camera_agent = 1 - lidar_agent
+                    camera_agent_idx = camera_agent.nonzero()[0].tolist()
+                    if sum(camera_agent) != 0:
+                        for k, v in merged_image_inputs_dict.items(): # 'imgs' 'rots' 'trans' ...
+                            merged_image_inputs_dict[k] = torch.stack([v[index] for index in camera_agent_idx])
+
+                if not self.heterogeneous or (self.heterogeneous and sum(camera_agent) != 0):
+                    output_dict['ego'].update({'image_inputs': merged_image_inputs_dict})
+            
+            record_len = torch.from_numpy(np.array(record_len, dtype=int))
+            pairwise_t_matrix = torch.from_numpy(np.array(pairwise_t_matrix_list))
+            label_torch_dict['record_len'] = record_len
+            label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            lidar_pose = torch.from_numpy(np.concatenate(lidar_pose_list, axis=0))
+            lidar_pose_clean = torch.from_numpy(np.concatenate(lidar_pose_clean_list, axis=0))
+
+            if not online_eval_only:
+                label_torch_dict = \
+                    self.post_processor.collate_batch(label_dict_list)
+
+                gt_label_torch_dict = \
+                    self.post_processor.collate_batch(gt_label_dict_list)
+
+                # for centerpoint
+                label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                        'object_bbx_mask': object_bbx_mask})          
+
+                gt_label_torch_dict.update({'gt_object_bbx_center': gt_object_bbx_center,
+                                        'gt_object_bbx_mask': gt_object_bbx_mask})
+            else:
+                gt_label_torch_dict = {}
+
+            gt_label_torch_dict['pairwise_t_matrix'] = pairwise_t_matrix
+            gt_label_torch_dict['record_len'] = record_len
+
+            # object id is only used during inference, where batch size is 1.
+            # so here we only get the first element.
+            output_dict['ego'].update({'object_bbx_center': object_bbx_center,
+                                    'object_bbx_mask': object_bbx_mask,
+                                    'record_len': record_len,
+                                    'label_dict': label_torch_dict,
+                                    'object_ids': object_ids[0],
+                                    'pairwise_t_matrix': pairwise_t_matrix,
+                                    'lidar_pose_clean': lidar_pose_clean,
+                                    'lidar_pose': lidar_pose,
+                                    'anchor_box': self.anchor_box_torch})
+            
+            output_dict['ego'].update({'gt_object_bbx_center': gt_object_bbx_center,
+                                    'gt_object_bbx_mask': gt_object_bbx_mask,
+                                    'gt_label_dict': gt_label_torch_dict,
+                                    'gt_object_ids': gt_object_ids[0]})
+
+            output_dict['ego'].update({'dict_list': dict_list})
+            output_dict['ego'].update({'record_len': record_len,
+                                       'pairwise_t_matrix': pairwise_t_matrix
+                })
+
+            if self.visualize:
+                origin_lidar = torch.from_numpy(np.array(origin_lidar))
+                output_dict['ego'].update({'origin_lidar': origin_lidar})
+                output_dict['ego'].update({'img_front': img_front})
+                output_dict['ego'].update({'img_right': img_right})
+                output_dict['ego'].update({'img_left': img_left})
+                output_dict['ego'].update({'BEV': BEV})
+            
+            if self.supervise_single and not online_eval_only:
+                output_dict['ego'].update({
+                    "label_dict_single":{
+                            # "pos_equal_one": torch.cat(pos_equal_one_single, dim=0),
+                            # "neg_equal_one": torch.cat(neg_equal_one_single, dim=0),
+                            # "targets": torch.cat(targets_single, dim=0),
+                            # for centerpoint
+                            "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                            "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                        },
+                    "object_bbx_center_single": torch.cat(object_bbx_center_single, dim=0),
+                    "object_bbx_mask_single": torch.cat(object_bbx_mask_single, dim=0)
+                })
+
+            if self.heterogeneous:
+                output_dict['ego'].update({
+                    "lidar_agent_record": torch.from_numpy(np.concatenate(lidar_agent_list)) # [0,1,1,0,1...]
+                })
+        
+
+            return output_dict
+
+        def collate_batch_test(self, batch, online_eval_only=False):
+            """
+            Customized collate function for pytorch dataloader during testing
+            for late fusion dataset.
+
+            Parameters
+            ----------
+            batch : dict
+
+            Returns
+            -------
+            batch : dicn
+                Reformatted batch.
+            """
+            # currently, we only support batch size of 1 during testing
+            assert len(batch) <= 1, "Batch size 1 is required during testing!"
+            
+            self.online_eval_only = online_eval_only
+
+            output_dict = self.collate_batch_train(batch, online_eval_only)
+            if output_dict is None:
+                return None
+            
+            batch = batch[0]
+            
+            if batch['ego']['anchor_box'] is not None:
+                output_dict['ego'].update({'anchor_box':
+                    self.anchor_box_torch})
+            
+            record_len = torch.from_numpy(np.array([batch['ego']['cav_num']]))
+            pairwise_t_matrix = torch.from_numpy(np.array([batch['ego']['pairwise_t_matrix']]))
+
+            output_dict['ego'].update({'record_len': record_len,
+                'pairwise_t_matrix': pairwise_t_matrix
+                })
+
+            # heterogeneous
+            if self.heterogeneous:
+                idx = batch['ego']['idx']
+                cav_list = batch['ego']['cav_list'] # ['ego', '650' ..]
+                cav_num = len(batch)
+                lidar_agent, camera_agent = self.selector.select_agent(idx)
+                lidar_agent = lidar_agent[:cav_num] # [1,0,0,1,0]
+                lidar_agent_idx = lidar_agent.nonzero()[0].tolist()
+                lidar_agent_cav_id = [cav_list[index] for index in lidar_agent_idx] # ['ego', ...]
+        
+
+            # for late fusion, we also need to stack the lidar for better
+            # visualization
+            if self.visualize:
+                projected_lidar_list = []
+                origin_lidar = []
+            
+            for cav_id, cav_content in batch.items():
+                if cav_id != 'ego':
+                    output_dict.update({cav_id: {}})
+                # output_dict.update({cav_id: {}})
+
+                if not online_eval_only:
+                    object_bbx_center = \
+                        torch.from_numpy(np.array([cav_content['object_bbx_center']]))
+                    object_bbx_mask = \
+                        torch.from_numpy(np.array([cav_content['object_bbx_mask']]))
+                    object_ids = cav_content['object_ids']
+
+                # the anchor box is the same for all bounding boxes usually, thus
+                # we don't need the batch dimension.
+                output_dict[cav_id].update(
+                    {"anchor_box": self.anchor_box_torch}
+                )
+
+                transformation_matrix = cav_content['transformation_matrix']
+                
+                if self.visualize:
+                    origin_lidar = [cav_content['origin_lidar']]
+                    if (self.params['only_vis_ego'] is False) or (cav_id=='ego'):
+                        projected_lidar = copy.deepcopy(cav_content['origin_lidar'])
+                        projected_lidar[:, :3] = \
+                            box_utils.project_points_by_matrix_torch(
+                                projected_lidar[:, :3],
+                                transformation_matrix)
+                        projected_lidar_list.append(projected_lidar)
+                
+                
+                if self.load_lidar_file:
+                    # processed lidar dictionary
+                    #if 'processed_features' in cav_content.keys():
+                    
+                    merged_feature_dict = merge_features_to_dict([cav_content['processed_lidar']])
+                    processed_lidar_torch_dict = \
+                        self.pre_processor.collate_batch(merged_feature_dict)
+                    output_dict[cav_id].update({'processed_lidar': processed_lidar_torch_dict})
+
+                if self.load_camera_file:
+                    imgs_batch = [cav_content["image_inputs"]["imgs"]]
+                    rots_batch = [cav_content["image_inputs"]["rots"]]
+                    trans_batch = [cav_content["image_inputs"]["trans"]]
+                    intrins_batch = [cav_content["image_inputs"]["intrins"]]
+                    extrinsics_batch = [cav_content["image_inputs"]["extrinsics"]]
+                    post_trans_batch = [cav_content["image_inputs"]["post_trans"]]
+                    post_rots_batch = [cav_content["image_inputs"]["post_rots"]]
+
+                    output_dict[cav_id].update({
+                        "image_inputs":
+                            {
+                                "imgs": torch.stack(imgs_batch),
+                                "rots": torch.stack(rots_batch),
+                                "trans": torch.stack(trans_batch),
+                                "intrins": torch.stack(intrins_batch),
+                                "extrinsics": torch.stack(extrinsics_batch),
+                                "post_trans": torch.stack(post_trans_batch),
+                                "post_rots": torch.stack(post_rots_batch),
+                            }
+                        }
+                    )
+
+                # heterogeneous
+                if self.heterogeneous:
+                    if cav_id in lidar_agent_cav_id:
+                        output_dict[cav_id].pop('image_inputs')
+                    else:
+                        output_dict[cav_id].pop('processed_lidar')
+
+                if not online_eval_only:
+                    # label dictionary
+                    label_torch_dict = \
+                        self.post_processor.collate_batch([cav_content['label_dict']])
+                        
+                    # for centerpoint
+                    label_torch_dict.update({'object_bbx_center': object_bbx_center,
+                                            'object_bbx_mask': object_bbx_mask})
+
+                # save the transformation matrix (4, 4) to ego vehicle
+                transformation_matrix_torch = \
+                    torch.from_numpy(
+                        np.array(cav_content['transformation_matrix'])).float()
+                
+                # late fusion training, no noise
+                transformation_matrix_clean_torch = \
+                    torch.from_numpy(
+                        np.array(cav_content['transformation_matrix_clean'])).float()
+
+                if not online_eval_only:
+                    output_dict[cav_id].update({'object_bbx_center': object_bbx_center,
+                                                'object_bbx_mask': object_bbx_mask,
+                                                'label_dict': label_torch_dict,
+                                                # 'record_len': record_len,
+                                                'object_ids': object_ids,})                    
+                output_dict[cav_id].update({
+                                            'transformation_matrix': transformation_matrix_torch,
+                                            'transformation_matrix_clean': transformation_matrix_clean_torch})
+
+
+                if 'cav_num' in cav_content.keys():
+                    record_len = torch.from_numpy(np.array([cav_content['cav_num']]))
+                    output_dict[cav_id].update({'record_len': record_len})
+
+                if 'pairwise_t_matrix' in cav_content.keys():
+                    pairwise_t_matrix = torch.from_numpy(np.array([cav_content['pairwise_t_matrix']]))
+                    output_dict[cav_id].update({'pairwise_t_matrix': pairwise_t_matrix})
+
+
+                
+                if self.visualize:
+                    origin_lidar = \
+                        np.array(
+                            downsample_lidar_minimum(pcd_np_list=origin_lidar))
+                    origin_lidar = torch.from_numpy(origin_lidar)
+                    output_dict[cav_id].update({'origin_lidar': origin_lidar})
+                
+            if self.visualize:
+                projected_lidar_stack = [torch.from_numpy(
+                    np.vstack(projected_lidar_list))]
+                output_dict['ego'].update({'origin_lidar': projected_lidar_stack})
+
+            output_dict['ego'].update({
+                "sample_idx": batch['ego']['sample_idx'],
+                "cav_id_list": batch['ego']['cav_id_list']
+            })
+            batch_record_len = output_dict['ego']['record_len']
+
+            for cav_id in output_dict.keys():
+                if 'record_len' in output_dict[cav_id].keys():
+                    continue
+                output_dict[cav_id].update({'record_len': batch_record_len})
+            
+            
+            return output_dict
+
+
+        def post_process(self, data_dict, output_dict):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+
+            output_dict :dict
+                The dictionary containing the output of the model.
+
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+            pred_box_tensor, pred_score = self.post_processor.post_process(
+                data_dict, output_dict
+            )
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            return pred_box_tensor, pred_score, gt_box_tensor
+
+        def post_process_no_fusion(self, data_dict, output_dict_ego):
+            data_dict_ego = OrderedDict()
+            data_dict_ego["ego"] = data_dict["ego"]
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            pred_box_tensor, pred_score = self.post_processor.post_process(
+                data_dict_ego, output_dict_ego
+            )
+            return pred_box_tensor, pred_score, gt_box_tensor
+        
+        def post_process_multiclass(self, data_dict, output_dict, online_eval_only=False):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+
+            output_dict :dict
+                The dictionary containing the output of the model.
+
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+
+            if online_eval_only == False:
+                online_eval_only = self.online_eval_only
+
+            num_class = output_dict['ego']['cls_preds'].shape[1]
+            pred_box_tensor_list = []
+            pred_score_list = []
+            gt_box_tensor_list = []
+
+            num_list = [0,1,3]
+
+            for i in range(num_class):
+                data_dict_single = copy.deepcopy(data_dict)
+                gt_dict_single = {'ego': {}}
+                gt_dict_single['ego'] = copy.deepcopy(data_dict['ego'])
+                output_dict_single = copy.deepcopy(output_dict)
+                if not online_eval_only:
+                    data_dict_single['ego']['object_bbx_center'] = data_dict['ego']['object_bbx_center'][:,i,:,:]
+                    data_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['object_bbx_mask'][:,i,:]
+                    data_dict_single['ego']['object_ids'] = data_dict['ego']['object_ids'][num_list[i]]
+                    gt_dict_single['ego']['object_bbx_center'] = data_dict['ego']['gt_object_bbx_center'][:,i,:,:]
+                    gt_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['gt_object_bbx_mask'][:,i,:]
+                    gt_dict_single['ego']['object_ids'] = data_dict['ego']['gt_object_ids'][num_list[i]]
+                
+
+                for cav in output_dict_single.keys():
+                    output_dict_single[cav]['cls_preds'] = output_dict[cav]['cls_preds'][:,i:i+1,:,:]
+                    output_dict_single[cav]['reg_preds'] = output_dict[cav]['reg_preds_multiclass'][:,i,:,:]
+
+                pred_box_tensor, pred_score = \
+                    self.post_processor.post_process(data_dict_single, output_dict_single)
+
+                if not online_eval_only:
+                    gt_box_tensor = self.post_processor.generate_gt_bbx(gt_dict_single)
+                else:
+                    gt_box_tensor = None
+
+                pred_box_tensor_list.append(pred_box_tensor)
+                pred_score_list.append(pred_score)
+                gt_box_tensor_list.append(gt_box_tensor)
+
+            return pred_box_tensor_list, pred_score_list, gt_box_tensor_list
+
+        def post_process_multiclass_no_fusion(self, data_dict, output_dict_ego, online_eval_only=False):
+            """
+            Process the outputs of the model to 2D/3D bounding box.
+
+            Parameters
+            ----------
+            data_dict : dict
+                The dictionary containing the origin input data of model.
+
+            output_dict :dict
+                The dictionary containing the output of the model.
+
+            Returns
+            -------
+            pred_box_tensor : torch.Tensor
+                The tensor of prediction bounding box after NMS.
+            gt_box_tensor : torch.Tensor
+                The tensor of gt bounding box.
+            """
+
+            online_eval_only = self.online_eval_only
+
+            num_class = data_dict['ego']['object_bbx_center'].shape[1]
+
+
+            pred_box_tensor_list = []
+            pred_score_list = []
+            gt_box_tensor_list = []
+
+            num_list = [0,1,3]
+            
+            for i in range(num_class):
+                data_dict_single = copy.deepcopy(data_dict)
+                gt_dict_single = {'ego': {}}
+                gt_dict_single['ego'] = copy.deepcopy(data_dict['ego'])
+                output_dict_single = copy.deepcopy(output_dict_ego)
+                data_dict_single['ego']['object_bbx_center'] = data_dict['ego']['object_bbx_center'][:,i,:,:]
+                data_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['object_bbx_mask'][:,i,:]
+                data_dict_single['ego']['object_ids'] = data_dict['ego']['object_ids'][num_list[i]]
+                gt_dict_single['ego']['object_bbx_center'] = data_dict['ego']['gt_object_bbx_center'][:,i,:,:]
+                gt_dict_single['ego']['object_bbx_mask'] = data_dict['ego']['gt_object_bbx_mask'][:,i,:]
+                gt_dict_single['ego']['object_ids'] = data_dict['ego']['gt_object_ids'][num_list[i]]
+                output_dict_single['ego']['cls_preds'] = output_dict_ego['ego']['cls_preds'][:,i:i+1,:,:]
+                output_dict_single['ego']['reg_preds'] = output_dict_ego['ego']['reg_preds_multiclass'][:,i,:,:]
+                data_dict_single_ego = OrderedDict()
+                data_dict_single_ego["ego"] = data_dict_single["ego"]
+                pred_box_tensor, pred_score = \
+                    self.post_processor.post_process(data_dict_single_ego, output_dict_single)
+                gt_box_tensor = self.post_processor.generate_gt_bbx(gt_dict_single)
+                             
+
+                pred_box_tensor_list.append(pred_box_tensor)
+                pred_score_list.append(pred_score)
+                gt_box_tensor_list.append(gt_box_tensor)
+
+            return pred_box_tensor_list, pred_score_list, gt_box_tensor_list
+
+        def post_process_no_fusion_uncertainty(self, data_dict, output_dict_ego):
+            data_dict_ego = OrderedDict()
+            data_dict_ego['ego'] = data_dict['ego']
+            gt_box_tensor = self.post_processor.generate_gt_bbx(data_dict)
+
+            pred_box_tensor, pred_score, uncertainty = \
+                self.post_processor.post_process(data_dict_ego, output_dict_ego, return_uncertainty=True)
+            return pred_box_tensor, pred_score, gt_box_tensor, uncertainty
+
+    return LatemulticlassFusionDataset
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..33f9e917b452cfb68436e8f04d1b0a348dbda5ea
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__init__.py
@@ -0,0 +1,27 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+from opencood.data_utils.post_processor.voxel_postprocessor import VoxelPostprocessor
+from opencood.data_utils.post_processor.bev_postprocessor import BevPostprocessor
+from opencood.data_utils.post_processor.ciassd_postprocessor import CiassdPostprocessor
+from opencood.data_utils.post_processor.fpvrcnn_postprocessor import FpvrcnnPostprocessor
+from opencood.data_utils.post_processor.uncertainty_voxel_postprocessor import UncertaintyVoxelPostprocessor
+
+__all__ = {
+    'VoxelPostprocessor': VoxelPostprocessor,
+    'BevPostprocessor': BevPostprocessor,
+    'CiassdPostprocessor': CiassdPostprocessor,
+    'FpvrcnnPostprocessor': FpvrcnnPostprocessor,
+    'UncertaintyVoxelPostprocessor': UncertaintyVoxelPostprocessor,
+}
+
+
+def build_postprocessor(anchor_cfg, train):
+    process_method_name = anchor_cfg['core_method']
+    anchor_generator = __all__[process_method_name](
+        anchor_params=anchor_cfg,
+        train=train
+    )
+
+    return anchor_generator
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2c6a9a3b26c0a683efd6841280e1e0ca7f35f780
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/__init__.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/base_postprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/base_postprocessor.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c8a1a222ffbe82e9cb9d0fd5ceadce13429ff633
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/base_postprocessor.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/bev_postprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/bev_postprocessor.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..560200cb57f4fc2819e9eb71dc359b67f60e3fc7
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/bev_postprocessor.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/ciassd_postprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/ciassd_postprocessor.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..258fd8aa88cffb50439ac207f19f2a560f9030e1
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/ciassd_postprocessor.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/fpvrcnn_postprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/fpvrcnn_postprocessor.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..01c5be1e2e8fffb4b64bb4041ec1305b6a015400
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/fpvrcnn_postprocessor.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/uncertainty_voxel_postprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/uncertainty_voxel_postprocessor.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a7d4596eda63e5a7df33b46f8b3c3cd43c4e667a
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/uncertainty_voxel_postprocessor.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/voxel_postprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/voxel_postprocessor.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b5be15343ef0b980ab264879719bbc710770f130
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/__pycache__/voxel_postprocessor.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/base_postprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/base_postprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..80a3833e61c007d767a286d8f13a26bcdef5cf24
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/base_postprocessor.py
@@ -0,0 +1,594 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+"""
+Template for AnchorGenerator
+"""
+
+import numpy as np
+import torch
+import cv2
+
+from opencood.utils import box_utils
+from opencood.utils import common_utils
+from opencood.utils.transformation_utils import x1_to_x2
+
+class BasePostprocessor(object):
+    """
+    Template for Anchor generator.
+
+    Parameters
+    ----------
+    anchor_params : dict
+        The dictionary containing all anchor-related parameters.
+    train : bool
+        Indicate train or test mode.
+
+    Attributes
+    ----------
+    bbx_dict : dictionary
+        Contain all objects information across the cav, key: id, value: bbx
+        coordinates (1, 7)
+    """
+
+    def __init__(self, anchor_params, train=True):
+        self.params = anchor_params
+        self.bbx_dict = {}
+        self.train = train
+
+    def generate_anchor_box(self):
+        # needs to be overloaded
+        return None
+
+    def generate_label(self, *argv):
+        return None
+
+    def generate_gt_bbx(self, data_dict):
+        """
+        The base postprocessor will generate 3d groundtruth bounding box.
+
+        For early and intermediate fusion,
+            data_dict only contains ego.
+
+        For late fusion,
+            data_dcit contains all cavs, so we need transformation matrix.
+            To generate gt boxes, transformation_matrix should be clean
+
+        Parameters
+        ----------
+        data_dict : dict
+            The dictionary containing the origin input data of model.
+
+        Returns
+        -------
+        gt_box3d_tensor : torch.Tensor
+            The groundtruth bounding box tensor, shape (N, 8, 3).
+        """
+        gt_box3d_list = []
+        # used to avoid repetitive bounding box
+        object_id_list = []
+
+        for cav_id, cav_content in data_dict.items():
+            # used to project gt bounding box to ego space
+            # object_bbx_center is clean.
+            transformation_matrix = cav_content['transformation_matrix_clean']
+
+            object_bbx_center = cav_content['object_bbx_center']
+            object_bbx_mask = cav_content['object_bbx_mask']
+            object_ids = cav_content['object_ids']
+            object_bbx_center = object_bbx_center[object_bbx_mask == 1]
+
+            # convert center to corner
+            object_bbx_corner = \
+                box_utils.boxes_to_corners_3d(object_bbx_center,
+                                              self.params['order'])
+            projected_object_bbx_corner = \
+                box_utils.project_box3d(object_bbx_corner.float(),
+                                        transformation_matrix)
+            gt_box3d_list.append(projected_object_bbx_corner)
+            # append the corresponding ids
+            object_id_list += object_ids
+
+        # gt bbx 3d
+        gt_box3d_list = torch.vstack(gt_box3d_list)
+        # some of the bbx may be repetitive, use the id list to filter
+        gt_box3d_selected_indices = \
+            [object_id_list.index(x) for x in set(object_id_list)]
+        gt_box3d_tensor = gt_box3d_list[gt_box3d_selected_indices]
+
+        # filter the gt_box to make sure all bbx are in the range. with z dim
+        gt_box3d_np = gt_box3d_tensor.cpu().numpy()
+        gt_box3d_np = box_utils.mask_boxes_outside_range_numpy(gt_box3d_np,
+                                                    self.params['gt_range'],
+                                                    order=None)
+        try:
+            gt_box3d_tensor = torch.from_numpy(gt_box3d_np).to(device=gt_box3d_list.device)
+        except:
+            print('load gt_box3d_tensor failed')
+            if len(gt_box3d_list)>0:
+                gt_box3d_tensor = torch.from_numpy(gt_box3d_np).to(device=gt_box3d_list[0].device)
+            else:
+                gt_box3d_tensor = None
+
+        return gt_box3d_tensor
+
+
+    def generate_gt_bbx_by_iou(self, data_dict):
+        """
+        This function is only used by DAIR-V2X + late fusion dataset
+
+        DAIR-V2X + late fusion dataset's label are from veh-side and inf-side
+        and do not have unique object id.
+
+        So we will filter the same object by IoU
+
+        The base postprocessor will generate 3d groundtruth bounding box.
+
+        For early and intermediate fusion,
+            data_dict only contains ego.
+
+        For late fusion,
+            data_dcit contains all cavs, so we need transformation matrix.
+            To generate gt boxes, transformation_matrix should be clean
+
+        Parameters
+        ----------
+        data_dict : dict
+            The dictionary containing the origin input data of model.
+
+        Returns
+        -------
+        gt_box3d_tensor : torch.Tensor
+            The groundtruth bounding box tensor, shape (N, 8, 3).
+        """
+        gt_box3d_list = []
+
+        for cav_id, cav_content in data_dict.items():
+            # used to project gt bounding box to ego space
+            # object_bbx_center is clean.
+            transformation_matrix = cav_content['transformation_matrix_clean']
+
+            object_bbx_center = cav_content['object_bbx_center']
+            object_bbx_mask = cav_content['object_bbx_mask']
+            object_ids = cav_content['object_ids']
+            object_bbx_center = object_bbx_center[object_bbx_mask == 1]
+
+            # convert center to corner
+            object_bbx_corner = \
+                box_utils.boxes_to_corners_3d(object_bbx_center,
+                                              self.params['order'])
+            projected_object_bbx_corner = \
+                box_utils.project_box3d(object_bbx_corner.float(),
+                                        transformation_matrix)
+            gt_box3d_list.append(projected_object_bbx_corner)
+
+        # if only ego agent
+        if len(data_dict) == 1:
+            gt_box3d_tensor = torch.vstack(gt_box3d_list)
+        # both veh-side and inf-side label
+        else:
+            veh_corners_np = gt_box3d_list[0].cpu().numpy()
+            inf_corners_np = gt_box3d_list[1].cpu().numpy()
+            inf_polygon_list = list(common_utils.convert_format(inf_corners_np))
+            veh_polygon_list = list(common_utils.convert_format(veh_corners_np))
+            iou_thresh = 0.05 
+
+
+            gt_from_inf = []
+            for i in range(len(inf_polygon_list)):
+                inf_polygon = inf_polygon_list[i]
+                ious = common_utils.compute_iou(inf_polygon, veh_polygon_list)
+                if (ious > iou_thresh).any():
+                    continue
+                gt_from_inf.append(inf_corners_np[i])
+            
+            if len(gt_from_inf):
+                gt_from_inf = np.stack(gt_from_inf)
+                gt_box3d = np.vstack([veh_corners_np, gt_from_inf])
+            else:
+                gt_box3d = veh_corners_np
+
+            gt_box3d_tensor = torch.from_numpy(gt_box3d).to(device=gt_box3d_list[0].device)
+
+        # mask_boxes_outside_range_numpy has filtering of z-dim
+        # gt_box3d_np = gt_box3d_tensor.cpu().numpy()
+        # gt_box3d_np = box_utils.mask_boxes_outside_range_numpy(gt_box3d_np,
+        #                                             self.params['gt_range'],
+        #                                             self.params['order'])
+        # gt_box3d_tensor = torch.from_numpy(gt_box3d_np).to(device=gt_box3d_list[0].device)
+
+        # need discussion. not filter z-dim.
+        mask = \
+            box_utils.get_mask_for_boxes_within_range_torch(gt_box3d_tensor, self.params['gt_range'])
+        gt_box3d_tensor = gt_box3d_tensor[mask, :, :]
+
+
+        return gt_box3d_tensor
+
+    def generate_object_center(self,
+                               cav_contents,
+                               reference_lidar_pose,
+                               enlarge_z=False):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+            in fact it is used in get_item_single_car, so the list length is 1
+
+        reference_lidar_pose : list
+            The final target lidar pose with length 6.
+
+        enlarge_z :
+            if True, enlarge the z axis range to include more object
+
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+        tmp_object_dict = {}
+        for cav_content in cav_contents:
+            tmp_object_dict.update(cav_content['params']['vehicles'])
+
+        output_dict = {}
+        filter_range = self.params['anchor_args']['cav_lidar_range'] \
+            if self.train else self.params['gt_range']
+
+        box_utils.project_world_objects(tmp_object_dict,
+                                        output_dict,
+                                        reference_lidar_pose,
+                                        filter_range,
+                                        self.params['order'],
+                                        enlarge_z)
+
+        object_np = np.zeros((self.params['max_num'], 7))
+        mask = np.zeros(self.params['max_num'])
+        object_ids = []
+
+        for i, (object_id, object_bbx) in enumerate(output_dict.items()):
+            object_np[i] = object_bbx[0, :]
+            mask[i] = 1
+            object_ids.append(object_id)
+        return object_np, mask, object_ids
+
+
+    def generate_object_center_v2x(self,
+                               cav_contents,
+                               reference_lidar_pose):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+            In fact, only the ego vehile needs to generate object center
+
+        reference_lidar_pose : list
+            The final target lidar pose with length 6.
+
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+        # from opencood.data_utils.datasets import GT_RANGE
+
+        assert len(cav_contents) == 1
+        
+        """
+        In old version, we only let ego agent return gt box.
+        Other agent return empty.
+
+        But it's not suitable for late fusion.
+        Also, we should filter out boxes that don't have any lidar point hits.
+
+        Thankfully, 'lidar_np' is in cav_contents[0].keys()
+        """
+
+
+        gt_boxes = cav_contents[0]['params']['vehicles'] # notice [N,10], 10 includes [x,y,z,dx,dy,dz,w,a,b,c]
+        object_ids = cav_contents[0]['params']['object_ids']
+        lidar_np = cav_contents[0]['lidar_np']
+        
+        tmp_object_dict = {"gt_boxes": gt_boxes, "object_ids":object_ids}
+
+        output_dict = {}
+        filter_range = self.params['anchor_args']['cav_lidar_range'] # v2x we don't use GT_RANGE.
+
+        box_utils.project_world_objects_v2x(tmp_object_dict,
+                                        output_dict,
+                                        reference_lidar_pose,
+                                        filter_range,
+                                        self.params['order'],
+                                        lidar_np=lidar_np)
+
+        object_np = np.zeros((self.params['max_num'], 7))
+        mask = np.zeros(self.params['max_num'])
+        object_ids = []
+
+
+        for i, (object_id, object_bbx) in enumerate(output_dict.items()):
+            object_np[i] = object_bbx[0, :]
+            mask[i] = 1
+            object_ids.append(object_id)
+
+        return object_np, mask, object_ids
+
+    def generate_object_center_dairv2x(self,
+                               cav_contents,
+                               reference_lidar_pose):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+
+        reference_lidar_pose : list
+            The final target lidar pose with length 6.
+
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+
+        # tmp_object_dict = {}
+        tmp_object_list = []
+        cav_content = cav_contents[0]
+        tmp_object_list = cav_content['params']['vehicles'] #世界坐标系下
+
+        output_dict = {}
+        filter_range = self.params['anchor_args']['cav_lidar_range']
+
+
+        box_utils.project_world_objects_dairv2x(tmp_object_list,
+                                        output_dict,
+                                        reference_lidar_pose,
+                                        filter_range,
+                                        self.params['order'])
+
+        object_np = np.zeros((self.params['max_num'], 7))
+        mask = np.zeros(self.params['max_num'])
+        object_ids = []
+
+        for i, (object_id, object_bbx) in enumerate(output_dict.items()):
+            object_np[i] = object_bbx[0, :]
+            mask[i] = 1
+            object_ids.append(object_id)
+
+        return object_np, mask, object_ids
+
+
+    def generate_object_center_dairv2x_single(self,
+                               cav_contents,
+                               suffix=""):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+
+        # tmp_object_dict = {}
+        tmp_object_list = []
+        cav_content = cav_contents[0]
+        tmp_object_list = cav_content['params'][f'vehicles{suffix}'] # ego 坐标系下
+
+        output_dict = {}
+        filter_range = self.params['anchor_args']['cav_lidar_range']
+
+
+        box_utils.load_single_objects_dairv2x(tmp_object_list,
+                                        output_dict,
+                                        filter_range,
+                                        self.params['order'])
+
+        object_np = np.zeros((self.params['max_num'], 7))
+        mask = np.zeros(self.params['max_num'])
+        object_ids = []
+
+        for i, (object_id, object_bbx) in enumerate(output_dict.items()):
+            object_np[i] = object_bbx[0, :]
+            mask[i] = 1
+            object_ids.append(object_id)
+
+        return object_np, mask, object_ids
+
+
+
+    def generate_object_center_dairv2x_single_hetero(self,
+                               cav_contents,
+                               reference_lidar_pose,
+                               suffix,
+                               ):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+
+        # tmp_object_dict = {}
+        tmp_object_list = []
+        cav_content = cav_contents[0]
+        tmp_object_list = cav_content['params'][f'vehicles{suffix}'] # ego 坐标系下
+
+        output_dict = {}
+        filter_range = self.params['anchor_args']['cav_lidar_range']
+
+        cav_coor = cav_content['params']['lidar_pose'] # T_world_cav
+        ego_coor = reference_lidar_pose # T_world_ego
+        T_ego_cav = x1_to_x2(cav_coor, ego_coor) # T_ego_cav
+
+        box_utils.load_single_objects_dairv2x_hetero(tmp_object_list,
+                                        output_dict,
+                                        filter_range,
+                                        T_ego_cav,
+                                        self.params['order'])
+
+        object_np = np.zeros((self.params['max_num'], 7))
+        mask = np.zeros(self.params['max_num'])
+        object_ids = []
+
+        for i, (object_id, object_bbx) in enumerate(output_dict.items()):
+            object_np[i] = object_bbx[0, :]
+            mask[i] = 1
+            object_ids.append(object_id)
+
+        return object_np, mask, object_ids
+
+
+
+
+
+    def generate_visible_object_center(self,
+                               cav_contents,
+                               reference_lidar_pose,
+                               enlarge_z=False):
+        """
+        Retrieve all objects in a format of (n, 7), where 7 represents
+        x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
+
+        Parameters
+        ----------
+        cav_contents : list
+            List of dictionary, save all cavs' information.
+            in fact it is used in get_item_single_car, so the list length is 1
+
+        reference_lidar_pose : list
+            The final target lidar pose with length 6.
+
+        visibility_map : np.ndarray, uint8
+            for OPV2V, its 256*256 resolution. 0.39m per pixel. heading up.
+
+        enlarge_z :
+            if True, enlarge the z axis range to include more object
+
+        Returns
+        -------
+        object_np : np.ndarray
+            Shape is (max_num, 7).
+        mask : np.ndarray
+            Shape is (max_num,).
+        object_ids : list
+            Length is number of bbx in current sample.
+        """
+
+        tmp_object_dict = {}
+        for cav_content in cav_contents:
+            tmp_object_dict.update(cav_content['params']['vehicles'])
+
+        output_dict = {}
+        filter_range = self.params['anchor_args']['cav_lidar_range'] # if self.train else GT_RANGE_OPV2V
+        inf_filter_range = [-1e5, -1e5, -1e5, 1e5, 1e5, 1e5]
+        visibility_map = np.asarray(cv2.cvtColor(cav_contents[0]["bev_visibility.png"], cv2.COLOR_BGR2GRAY))
+        ego_lidar_pose = cav_contents[0]["params"]["lidar_pose_clean"]
+
+        # 1-time filter: in ego coordinate, use visibility map to filter.
+        box_utils.project_world_visible_objects(tmp_object_dict,
+                                        output_dict,
+                                        ego_lidar_pose,
+                                        inf_filter_range,
+                                        self.params['order'],
+                                        visibility_map,
+                                        enlarge_z)
+
+        updated_tmp_object_dict = {}
+        for k, v in tmp_object_dict.items():
+            if k in output_dict:
+                updated_tmp_object_dict[k] = v # not visible
+        output_dict = {}
+
+        # 2-time filter: use reference_lidar_pose
+        box_utils.project_world_objects(updated_tmp_object_dict,
+                                        output_dict,
+                                        reference_lidar_pose,
+                                        filter_range,
+                                        self.params['order'],
+                                        enlarge_z)
+
+        object_np = np.zeros((self.params['max_num'], 7))
+        mask = np.zeros(self.params['max_num'])
+        object_ids = []
+
+        for i, (object_id, object_bbx) in enumerate(output_dict.items()):
+            object_np[i] = object_bbx[0, :]
+            mask[i] = 1
+            object_ids.append(object_id)
+
+        return object_np, mask, object_ids
+
+    def generate_object_center_v2xset_camera(self,
+                               cav_contents,
+                               reference_lidar_pose,
+                               enlarge_z=False):
+
+        tmp_object_dict = {}
+        for cav_content in cav_contents:
+            tmp_object_dict.update(cav_content['params']['vehicles'])
+
+        output_dict = {}
+        filter_range = [-45, -45, -3, 45, 45, 1]
+
+        box_utils.project_world_objects(tmp_object_dict,
+                                        output_dict,
+                                        reference_lidar_pose,
+                                        filter_range,
+                                        self.params['order'],
+                                        enlarge_z)
+
+        object_np = np.zeros((self.params['max_num'], 7))
+        mask = np.zeros(self.params['max_num'])
+        object_ids = []
+
+        for i, (object_id, object_bbx) in enumerate(output_dict.items()):
+            object_np[i] = object_bbx[0, :]
+            mask[i] = 1
+            object_ids.append(object_id)
+        return object_np, mask, object_ids
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/bev_postprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/bev_postprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..b08b189314f5307437235a3d33acd8179dc1a513
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/bev_postprocessor.py
@@ -0,0 +1,451 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang haxiang@g.ucla.edu
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+"""
+Anchor-free 2d Generator
+"""
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from opencood.utils.transformation_utils import dist_to_continuous
+from opencood.data_utils.post_processor.base_postprocessor \
+    import BasePostprocessor
+from opencood.utils import box_utils
+from opencood.visualization import vis_utils
+
+
+class BevPostprocessor(BasePostprocessor):
+    def __init__(self, anchor_params, train):
+        super(BevPostprocessor, self).__init__(anchor_params, train)
+        # self.geometry_param = anchor_params["geometry"]
+        self.geometry_param = anchor_params["geometry_param"]
+
+        # TODO
+        # Hard coded for now. Need to calculate for our own training dataset
+        self.target_mean = np.array([0.008, 0.001, 0.202, 0.2, 0.43, 1.368])
+        self.target_std_dev = np.array([0.866, 0.5, 0.954, 0.668, 0.09, 0.111])
+
+    def generate_anchor_box(self):
+        return None
+
+    def generate_label(self, **kwargs):
+        """
+        Generate targets for training.
+
+        Parameters
+        ----------
+        kwargs : list
+            gt_box_center:(max_num, 7)
+
+        Returns
+        -------
+        label_dict : dict
+            Dictionary that contains all target related info.
+        """
+        assert self.params['order'] == 'lwh', \
+            'Currently BEV only support lwh bbx order.'
+        # (max_num, 7)
+        gt_box_center = kwargs['gt_box_center']
+
+        # (max_num)
+        masks = kwargs['mask']
+
+        # (n, 7)
+        gt_box_center_valid = gt_box_center[masks == 1]
+        # (n, 4, 3)
+        bev_corners = box_utils.boxes_to_corners2d(gt_box_center_valid,
+                                                   self.params['order'])
+
+        n = gt_box_center_valid.shape[0]
+        # (n, 4, 2)
+        bev_corners = bev_corners[:, :, :2]
+        yaw = gt_box_center_valid[:, -1]
+        x, y = gt_box_center_valid[:, 0], gt_box_center_valid[:, 1]
+        dx, dy = gt_box_center_valid[:, 3], gt_box_center_valid[:, 4]
+        # (n, 6)
+        reg_targets = np.column_stack([np.cos(yaw), np.sin(yaw), x, y, dx, dy])
+
+        # target label map including classification and regression targets
+        label_map = np.zeros(self.geometry_param["label_shape"])
+        self.update_label_map(label_map, bev_corners, reg_targets)
+        label_map = self.normalize_targets(label_map)
+        label_dict = {
+            # (7, label_shape[0], label_shape[1])
+            "label_map": np.transpose(label_map, (2, 0, 1)).astype(np.float32),
+            "bev_corners": bev_corners
+        }
+        return label_dict
+
+    def update_label_map(self, label_map, bev_corners, reg_targets):
+        """
+        Update label_map based on bbx and regression targets.
+
+        Parameters
+        ----------
+        label_map : numpy.array
+            Targets array for classification and regression tasks with
+            the shape of label_shape. (H, W, 7). 
+
+        bev_corners : numpy.array
+            The bbx corners in lidar frame with shape (n, 4, 2)
+
+        reg_targets : numpy.array
+            Array containing the regression targets information. It need to be
+            further processed.
+
+        """
+        res = self.geometry_param["res"]
+        downsample_rate = self.geometry_param["downsample_rate"]
+
+        bev_origin = np.array([self.geometry_param["L1"],
+                               self.geometry_param["W1"]]).reshape(1, -1)
+
+        # discretized bbx corner representations -- (n, 4, 2)
+        # bev_corners is real coordinate
+        # bev_corners_dist is pixel coordinate
+        bev_corners_dist = (bev_corners - bev_origin) / res / downsample_rate
+        # generate the coordinates of m
+        x = np.arange(self.geometry_param["label_shape"][0]) # H (x in lidar coordinate)
+        y = np.arange(self.geometry_param["label_shape"][1]) # W (y in lidar coordinate)
+        xx, yy = np.meshgrid(x, y)
+
+        # (label_shape[0]*label_shape[1], 2)
+        points = np.concatenate([xx.reshape(-1, 1), yy.reshape(-1, 1)],
+                                axis=-1) # pixel 
+        bev_origin_dist = bev_origin / res / downsample_rate
+
+        # loop over each bbx, find the points within the bbx.
+        for i in range(bev_corners.shape[0]):
+            reg_target = reg_targets[i, :]
+
+            # find discredited points in bbx
+            points_in_box = \
+                box_utils.get_points_in_rotated_box(points,
+                                                    bev_corners_dist[i, ...])
+            # convert points to continuous space
+            points_continuous = dist_to_continuous(points_in_box,
+                                                   bev_origin_dist,
+                                                   res,
+                                                   downsample_rate)
+            actual_reg_target = np.repeat(reg_target.reshape(1, -1),
+                                          points_continuous.shape[0],
+                                          axis=0)
+            # build learning targets
+            actual_reg_target[:, 2:4] = \
+                actual_reg_target[:, 2:4] - points_continuous
+            actual_reg_target[:, 4:] = np.log(actual_reg_target[:, 4:])
+
+            # update label map
+            label_map[points_in_box[:, 0], points_in_box[:, 1], 0] = 1.0
+            label_map[points_in_box[:, 0], points_in_box[:, 1], 1:] = \
+                actual_reg_target
+
+    def normalize_targets(self, label_map):
+        """
+        Normalize label_map
+
+        Parameters
+        ----------
+        label_map : numpy.array
+            Targets array for classification and regression tasks with the
+            shape of label_shape.
+
+        Returns
+        -------
+        label_map: numpy.array
+            Nromalized label_map.
+
+        """
+        label_map[..., 1:] = \
+            (label_map[..., 1:] - self.target_mean) / self.target_std_dev
+        return label_map
+
+    def denormalize_reg_map(self, reg_map):
+        """
+        Denormalize the regression map
+
+        Parameters
+        ----------
+        reg_map : np.ndarray / torch.Tensor
+            Regression output mapwith the shape of (label_shape[0],
+            label_shape[1], 6).
+
+        Returns
+        -------
+        reg_map : np.ndarray / torch.Tensor
+            Denormalized regression map.
+
+        """
+        if isinstance(reg_map, np.ndarray):
+            target_mean = self.target_mean
+            target_std_dev = self.target_std_dev
+
+        else:
+            target_mean = \
+                torch.from_numpy(self.target_mean).to(reg_map.device)
+            target_std_dev = \
+                torch.from_numpy(self.target_std_dev).to(reg_map.device)
+        reg_map = reg_map * target_std_dev + target_mean
+        return reg_map
+
+    @staticmethod
+    def collate_batch(label_batch_list):
+        """
+        Customized collate function for target label generation.
+
+        Parameters
+        ----------
+        label_batch_list : list
+            The list of dictionary  that contains all labels for several
+            frames.
+
+        Returns
+        -------
+        processed_batch : dict
+            Reformatted labels in torch tensor.
+        """
+        label_map_list = [x["label_map"][np.newaxis, ...] for x in
+                          label_batch_list]
+        processed_batch = {
+            # (batch_size, 7, label_shape[0], label_shape[1])
+            "label_map": torch.from_numpy(np.concatenate(label_map_list,
+                                                         axis=0)),
+            "bev_corners": [torch.from_numpy(x["bev_corners"]) for x in
+                            label_batch_list]
+        }
+        return processed_batch
+
+    def post_process(self, data_dict, output_dict):
+        """
+        Process the outputs of the model to 2D bounding box.
+        Step1: convert each cav's output to bounding box format
+        Step2: project the bounding boxes to ego space.
+        Step:3 NMS
+
+        Parameters
+        ----------
+        data_dict : dict
+            The dictionary containing the origin input data of model.
+
+        output_dict :dict
+            The dictionary containing the output of the model.
+
+        Returns
+        -------
+        pred_box2d_tensor : torch.Tensor
+            The prediction bounding box tensor after NMS.
+
+        gt_box2d_tensor : torch.Tensor
+            The groundtruth bounding box tensor.
+        """
+
+        # the final bounding box list
+        pred_box2d_list = []
+        pred_score_list = []
+
+        for cav_id, cav_content in data_dict.items():
+            assert cav_id in output_dict
+            # the transformation matrix to ego space
+            transformation_matrix = cav_content['transformation_matrix']
+
+            # classification probability -- (label_shape[0], label_shape[1])
+            prob = output_dict[cav_id]['cls'].squeeze(0).squeeze(0)
+            prob = torch.sigmoid(prob)
+            # regression map -- (label_shape[0], label_shape[1], 6)
+            reg_map = output_dict[cav_id]['reg'].squeeze(0).permute(1, 2, 0)
+            reg_map = self.denormalize_reg_map(reg_map)
+            threshold = self.params['target_args']['score_threshold']
+            mask = torch.gt(prob, threshold)
+
+            if mask.sum() > 0:
+                # (number of high confidence bbx, 4, 2)
+                corners2d = self.reg_map_to_bbx_corners(reg_map, mask)
+                # assume the z-diviation in transformation_matrix is small,
+                # thus we can pad zeros to simulate the 3d transformation.
+                # (number of high confidence bbx, 4, 3)
+                box3d = F.pad(corners2d, (0, 1))
+                # (number of high confidence bbx, 4, 2)
+                projected_boxes2d = \
+                    box_utils.project_points_by_matrix_torch(box3d.view(-1, 3),
+                                                             transformation_matrix)[
+                    :, :2]
+
+                projected_boxes2d = projected_boxes2d.view(-1, 4, 2)
+                scores = prob[mask]
+                pred_box2d_list.append(projected_boxes2d)
+                pred_score_list.append(scores)
+
+        if len(pred_box2d_list):
+            pred_box2ds = torch.cat(pred_box2d_list, dim=0)
+            pred_scores = torch.cat(pred_score_list, dim=0)
+        else:
+            return None, None
+
+        keep_index = box_utils.nms_rotated(pred_box2ds, pred_scores,
+                                           self.params['nms_thresh'])
+        if len(keep_index):
+            pred_box2ds = pred_box2ds[keep_index]
+            pred_scores = pred_scores[keep_index]
+
+        # filter out the prediction out of the range.
+        mask = box_utils.get_mask_for_boxes_within_range_torch(pred_box2ds, self.params['gt_range'])
+        pred_box2ds = pred_box2ds[mask, :, :]
+        pred_scores = pred_scores[mask]
+        assert pred_scores.shape[0] == pred_box2ds.shape[0]
+        return pred_box2ds, pred_scores
+
+    def reg_map_to_bbx_corners(self, reg_map, mask):
+        """
+        Construct bbx from the regression output of the model.
+
+        Parameters
+        ----------
+        reg_map : torch.Tensor
+            Regression output of neural networks.
+
+        mask : torch.Tensor
+            Masks used to filter bbx.
+
+        Returns
+        -------
+        corners : torch.Tensor
+            Bbx output with shape (N, 4, 2).
+
+        """
+
+        assert len(reg_map.shape) == 3, \
+            "only support shape of label_shape i.e. (*, *, 6)"
+        device = reg_map.device
+
+        cos_t, sin_t, x, y, log_dx, log_dy = \
+            [tt.squeeze(-1) for tt in torch.chunk(reg_map, 6, dim=-1)]
+        yaw = torch.atan2(sin_t, cos_t)
+        dx, dy = log_dx.exp(), log_dy.exp()
+
+        grid_size = self.geometry_param["res"] * \
+                    self.geometry_param["downsample_rate"]
+        grid_x = torch.arange(self.geometry_param["L1"],
+                              self.geometry_param["L2"],
+                              grid_size, dtype=torch.float32, device=device)
+        grid_y = torch.arange(self.geometry_param["W1"],
+                              self.geometry_param["W2"],
+                              grid_size,
+                              dtype=torch.float32,
+                              device=device)
+
+        xx, yy = torch.meshgrid([grid_x, grid_y])
+        center_x = xx + x
+        center_y = yy + y
+
+        bbx2d = torch.stack([center_x, center_y, dx, dy, yaw], dim=-1)
+        bbx2d = bbx2d[mask, :]
+        corners = box_utils.boxes2d_to_corners2d(bbx2d)
+
+        return corners
+
+    def post_process_debug(self, data_dict, output_dict):
+        """
+        Process the outputs of the model to 2D bounding box for debug purpose.
+        Step1: convert each cav's output to bounding box format
+        Step2: project the bounding boxes to ego space.
+        Step:3 NMS
+
+        Parameters
+        ----------
+        data_dict : dict
+            The dictionary containing the origin input data of model.
+
+        output_dict :dict
+            The dictionary containing the output of the model.
+
+        Returns
+        -------
+        pred_box2d_tensor : torch.Tensor
+            The prediction bounding box tensor after NMS.
+        gt_box2d_tensor : torch.Tensor
+            The groundtruth bounding box tensor.
+        """
+        # the final bounding box list
+        pred_box2d_list = []
+        pred_score_list = []
+
+        # the transformation matrix to ego space
+        transformation_matrix = data_dict['transformation_matrix']
+
+        # classification probability -- (label_shape[0], label_shape[1])
+        prob = output_dict['cls'].squeeze(0).squeeze(0)
+        prob = torch.sigmoid(prob)
+
+        # regression map -- (label_shape[0], label_shape[1], 6)
+        reg_map = output_dict['reg'].squeeze(0).permute(1, 2, 0)
+        reg_map = self.denormalize_reg_map(reg_map)
+
+        threshold = 0.5
+        mask = torch.gt(prob, threshold)
+
+        if mask.sum() > 0:
+            # (number of high confidence bbx, 4, 2)
+            corners2d = self.reg_map_to_bbx_corners(reg_map, mask)
+            # assume the z-diviation in transformation_matrix is small,
+            # thus we can pad zeros to simulate the 3d transformation.
+            # (number of high confidence bbx, 4, 3)
+            box3d = F.pad(corners2d, (0, 1))
+
+            # (number of high confidence bbx, 4, 2)
+            projected_boxes2d = \
+                box_utils.project_points_by_matrix_torch(box3d.view(-1, 3),
+                                                         transformation_matrix)[:, :2]
+            projected_boxes2d = projected_boxes2d.view(-1, 4, 2)
+            scores = prob[mask]
+            pred_box2d_list.append(projected_boxes2d)
+            pred_score_list.append(scores)
+
+        pred_box2ds = torch.cat(pred_box2d_list, dim=0)
+        pred_scores = torch.cat(pred_score_list, dim=0)
+
+        keep_index = box_utils.nms_rotated(pred_box2ds,
+                                           pred_scores,
+                                           self.params['nms_thresh'])
+        pred_box2ds = pred_box2ds[keep_index]
+
+        # filter out the prediction out of the range.
+        mask = box_utils.get_mask_for_boxes_within_range_torch(pred_box2ds, self.params['gt_range'])
+        pred_box2ds = pred_box2ds[mask, :, :]
+        return pred_box2ds
+
+    @staticmethod
+    def visualize(pred_box_tensor, gt_tensor, pcd, show_vis, save_path,
+                  dataset=None):
+        """
+        Visualize the BEV 2D prediction, ground truth with point cloud together.
+
+        Parameters
+        ----------
+        pred_box_tensor : torch.Tensor
+            (N, 8, 3) prediction.
+
+        gt_tensor : torch.Tensor
+            (N, 8, 3) groundtruth bbx
+
+        pcd : torch.Tensor
+            PointCloud, (N, 4).
+
+        show_vis : bool
+            Whether to show visualization.
+
+        save_path : str
+            Save the visualization results to given path.
+
+        dataset : BaseDataset
+            opencood dataset object.
+        """
+        assert dataset is not None, "dataset argument can't be None"
+        vis_utils.visualize_single_sample_output_bev(pred_box_tensor,
+                                                     gt_tensor,
+                                                     pcd,
+                                                     dataset,
+                                                     show_vis,
+                                                     save_path)
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/ciassd_postprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/ciassd_postprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a5abd8176d3819d3323ab2b620d5a58acfb6527
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/ciassd_postprocessor.py
@@ -0,0 +1,168 @@
+"""
+3D Anchor Generator for Voxel
+"""
+import math
+import sys
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+from opencood.data_utils.post_processor.voxel_postprocessor \
+    import VoxelPostprocessor
+from opencood.utils import box_utils
+
+
+class CiassdPostprocessor(VoxelPostprocessor):
+    def __init__(self, anchor_params, train):
+        super(CiassdPostprocessor, self).__init__(anchor_params, train)
+        self.train = train
+        self.anchor_num = self.params['anchor_args']['num']
+
+    def post_process(self, data_dict, output_dict):
+        """
+        Process the outputs of the model to 2D/3D bounding box.
+        Step1: convert each cav's output to bounding box format
+        Step2: project the bounding boxes to ego space.
+        Step:3 NMS
+
+        Parameters
+        ----------
+        data_dict : dict
+            The dictionary containing the origin input data of model.
+
+        output_dict :dict
+            The dictionary containing the output of the model.
+
+        Returns
+        -------
+        pred_box3d_tensor : torch.Tensor
+            The prediction bounding box tensor after NMS.
+        gt_box3d_tensor : torch.Tensor
+            The groundtruth bounding box tensor.
+        """
+        # the final bounding box list
+        global batch_num_box_count
+        pred_box3d_original_list = []
+        pred_box3d_list = []
+        pred_box2d_list = []
+
+        for cav_id, cav_content in data_dict.items():
+            assert cav_id in output_dict
+            # the transformation matrix to ego space
+            if 'transformation_matrix' in cav_content:
+                transformation_matrix = cav_content['transformation_matrix']
+            else:
+                transformation_matrix = torch.from_numpy(np.identity(4)).float().\
+                    to(cav_content['anchor_box'].device)
+
+            # (H, W, anchor_num, 7)
+            anchor_box = cav_content['anchor_box']
+
+            # prediction result
+            preds_dict = output_dict[cav_id]['preds_dict_stage1']
+
+            # preds
+            prob = preds_dict['cls_preds']
+            prob = torch.sigmoid(prob.permute(0, 2, 3, 1).contiguous())
+            reg = preds_dict['box_preds'].permute(0, 2, 3, 1).contiguous()
+            iou = preds_dict['iou_preds'].permute(0, 2, 3, 1).contiguous().reshape(1, -1)
+            dir = preds_dict['dir_cls_preds'].permute(0, 2, 3, 1).contiguous().reshape(1, -1, 2) # [N, H*W*2, 2]
+
+            # convert regression map back to bounding box
+            # (N, W*L*anchor_num, 7)
+            batch_box3d = self.delta_to_boxes3d(reg, anchor_box)
+            mask = torch.gt(prob, self.params['target_args']['score_threshold']) # [N, H, W, 2]
+            batch_num_box_count = [int(m.sum()) for m in mask]
+            mask = mask.view(1, -1) # [1,N*H*W*2]
+            mask_reg = mask.unsqueeze(2).repeat(1, 1, 7)
+
+            # during validation/testing, the batch size should be 1
+            if not self.train:
+                assert batch_box3d.shape[0] == 1
+
+            boxes3d = torch.masked_select(batch_box3d.view(-1, 7), mask_reg[0]).view(-1, 7)
+            scores = torch.masked_select(prob.view(-1), mask[0])
+
+            dir_labels = torch.max(dir, dim=-1)[1]  # indices. shape [N, H*W*2].  value 0 or 1
+            dir_labels = dir_labels[mask] # sum(mask==1)
+            # top_labels = torch.zeros([scores.shape[0]], dtype=torch.long).cuda()
+            if scores.shape[0] != 0:
+                iou = (iou + 1) * 0.5
+                scores = scores * torch.pow(iou.masked_select(mask), 4)
+                # correct_direction
+                top_labels = (boxes3d[..., -1] > 0) ^ (dir_labels.byte() == 1)
+                boxes3d[..., -1] += torch.where(top_labels, torch.tensor(np.pi).type_as(boxes3d),
+                                                  torch.tensor(0.0).type_as(boxes3d))
+                pred_box3d_original_list.append(boxes3d.detach())
+
+            # convert output to bounding box
+            if len(boxes3d) != 0:
+                # (N, 8, 3)
+                boxes3d_corner = box_utils.boxes_to_corners_3d(boxes3d, order=self.params['order'])
+                # (N, 8, 3)
+                projected_boxes3d = box_utils.project_box3d(boxes3d_corner, transformation_matrix)
+                # convert 3d bbx to 2d, (N,4)
+                projected_boxes2d = box_utils.corner_to_standup_box_torch(projected_boxes3d)
+                # (N, 5)
+                boxes2d_score = torch.cat((projected_boxes2d, scores.unsqueeze(1)), dim=1)
+
+                pred_box2d_list.append(boxes2d_score)
+                pred_box3d_list.append(projected_boxes3d)
+
+        if len(pred_box2d_list) ==0 or len(pred_box3d_list) == 0:
+            return None, None
+        # shape: (N, 5)
+        pred_box2d_list = torch.vstack(pred_box2d_list)
+        # scores
+        scores = pred_box2d_list[:, -1]
+        # predicted 3d bbx
+        pred_box3d_tensor = torch.vstack(pred_box3d_list)
+        pred_box3d_original = torch.vstack(pred_box3d_original_list)
+
+        if not self.train:
+            # remove large bbx
+            keep_index_1 = box_utils.remove_large_pred_bbx(pred_box3d_tensor)
+            keep_index_2 = box_utils.remove_bbx_abnormal_z(pred_box3d_tensor)
+            keep_index = torch.logical_and(keep_index_1, keep_index_2)
+
+            pred_box3d_tensor = pred_box3d_tensor[keep_index]
+            scores = scores[keep_index]
+
+            # nms
+            keep_index = box_utils.nms_rotated(pred_box3d_tensor,
+                                               scores,
+                                               self.params['nms_thresh']
+                                               )
+
+            pred_box3d_tensor = pred_box3d_tensor[keep_index]
+
+            # select cooresponding score
+            scores = scores[keep_index]
+
+            # filter out the prediction out of the range.
+            mask = \
+                box_utils.get_mask_for_boxes_within_range_torch(pred_box3d_tensor)
+            pred_box3d_tensor = pred_box3d_tensor[mask, :, :]
+            scores = scores[mask]
+
+            assert scores.shape[0] == pred_box3d_tensor.shape[0]
+            return pred_box3d_tensor, scores
+        else:
+            cur_idx = 0
+            batch_pred_boxes3d = []
+            batch_scores = []
+            for n in batch_num_box_count:
+                cur_boxes = pred_box3d_tensor[cur_idx:cur_idx+n]
+                cur_scores = scores[cur_idx:cur_idx+n]
+                # nms
+                keep_index = box_utils.nms_rotated(cur_boxes,
+                                                   cur_scores,
+                                                   self.params['nms_thresh']
+                                                   )
+                cur_boxes = pred_box3d_original[cur_idx:cur_idx+n] # [:, [0, 1, 2, 5, 4, 3, 6]] # hwl -> lwh
+                batch_pred_boxes3d.append(cur_boxes[keep_index])
+                batch_scores.append(cur_scores[keep_index])
+                cur_idx += n
+
+            return batch_pred_boxes3d, batch_scores
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/fpvrcnn_postprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/fpvrcnn_postprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5e7dbde30f4578d7574411b56701ceae39de735
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/fpvrcnn_postprocessor.py
@@ -0,0 +1,247 @@
+"""
+3D Anchor Generator for Voxel
+"""
+import numpy as np
+import torch
+
+from opencood.data_utils.post_processor.voxel_postprocessor \
+    import VoxelPostprocessor
+from opencood.utils import box_utils
+from opencood.utils import common_utils
+from opencood.utils.common_utils import limit_period
+from icecream import ic
+
+class FpvrcnnPostprocessor(VoxelPostprocessor):
+    def __init__(self, anchor_params, train):
+        super(FpvrcnnPostprocessor, self).__init__(anchor_params, train)
+        # redetect box in stage2
+        self.redet = True if 'redet' in anchor_params and anchor_params['redet'] else False
+        print("Postprocessor Stage2 ReDetect: ", self.redet)
+
+    def post_process(self, data_dict, output_dict, stage1=False):
+        if stage1:
+            return self.post_process_stage1(data_dict, output_dict)
+        elif not self.redet: # stage2 refinement
+            return self.post_process_stage2(data_dict)
+        else: # stage2 redetect
+            return self.post_process_stage2_redet(data_dict, output_dict)
+
+    def post_process_stage1(self, data_dict, output_dict):
+        """
+        Process the outputs of the model to 2D/3D bounding box.
+        No NMS
+
+
+        Parameters
+        ----------
+        data_dict : dict
+            The dictionary containing the origin input data of model.
+
+        output_dict :dict
+            The dictionary containing the output of the model.
+
+        Returns
+        -------
+        pred_box3d_tensor : torch.Tensor
+            The prediction bounding box tensor after NMS.
+        gt_box3d_tensor : torch.Tensor
+            The groundtruth bounding box tensor.
+        """
+        # the final bounding box list
+        pred_corners_list = []
+        pred_box3d_list = []
+        score_list = []
+
+        for cav_id, cav_content in data_dict.items():
+            assert cav_id in output_dict
+
+            # (H, W, anchor_num, 7)
+            anchor_box = cav_content['anchor_box']
+
+            # prediction result
+            preds_dict = output_dict[cav_id]['stage1_out']
+
+            # preds
+            prob = preds_dict['cls_preds']
+            prob = torch.sigmoid(prob.permute(0, 2, 3, 1).contiguous())
+            reg = preds_dict['reg_preds']  # .permute(0, 2, 3, 1).contiguous()
+            dir = preds_dict['dir_preds'].permute(0, 2, 3, 1).contiguous().reshape(1, -1, 2)
+
+            batch_box3d = self.delta_to_boxes3d(reg, anchor_box) # hwl
+            mask = torch.gt(prob, self.params['target_args']['score_threshold'])
+            batch_num_box_count = [int(m.sum()) for m in mask]
+            mask = mask.view(1, -1)
+            mask_reg = mask.unsqueeze(2).repeat(1, 1, 7)
+
+            boxes3d = torch.masked_select(batch_box3d.view(-1, 7), mask_reg[0]).view(-1, 7) # hwl. right
+            scores = torch.masked_select(prob.view(-1), mask[0])
+
+            dir_labels = torch.max(dir, dim=-1)[1]
+            dir_labels = dir_labels[mask]
+
+            if scores.shape[0] != 0:
+                if 'iou_preds' in preds_dict:
+                    iou = torch.sigmoid(preds_dict['iou_preds'].permute(0, 2, 3, 1).contiguous()).reshape(1, -1)
+                    iou = torch.clamp(iou, min=0.0, max=1.0)
+                    iou = (iou + 1) * 0.5
+                    scores = scores * torch.pow(iou.masked_select(mask), 4)
+
+                # correct_direction
+                dir_offset = self.params['dir_args']['dir_offset']
+                num_bins = self.params['dir_args']['num_bins']
+
+                dir = preds_dict['dir_preds'].permute(0, 2, 3, 1).contiguous().reshape(1, -1, 2)
+                dir_cls_preds = dir[mask]
+                # if rot_gt > 0, then the label is 1, then the regression target is [0, 1]
+                dir_labels = torch.max(dir_cls_preds, dim=-1)[1]  # indices. shape [1, N*H*W*2].  value 0 or 1. If value is 1, then rot_gt > 0
+                
+                period = (2 * np.pi / num_bins) # pi
+                dir_rot = limit_period(
+                    boxes3d[..., 6] - dir_offset, 0, period
+                ) # 限制在0到pi之间
+                boxes3d[..., 6] = dir_rot + dir_offset + period * dir_labels.to(dir_cls_preds.dtype) # 转化0.25pi到2.5pi
+                boxes3d[..., 6] = limit_period(boxes3d[..., 6], 0.5, 2 * np.pi) # limit to [-pi, pi]
+
+
+                # filter invalid boxes
+                keep_idx = torch.logical_and((boxes3d[:, 3:6] > 1).all(dim=1), (boxes3d[:, 3:6] < 10).all(dim=1))
+                idx_start = 0
+                count = []
+                for i, n in enumerate(batch_num_box_count):
+                    count.append(int(keep_idx[idx_start:idx_start+n].sum()))
+                batch_num_box_count = count
+                boxes3d = boxes3d[keep_idx] # hwl
+                scores = scores[keep_idx]
+
+                # if the number of boxes is too huge, this would consume a lot of memory in the second stage
+                # therefore, randomly select some boxes if the box number is too big at the beginning of the training
+
+                # if len(boxes3d) > 300:
+                #     keep_idx = torch.multinomial(scores, 300)
+                #     idx_start = 0
+                #     count = []
+                #     for i, n in enumerate(batch_num_box_count):
+                #         count.append(int(torch.logical_and(keep_idx>=idx_start, keep_idx<idx_start + n).sum()))
+                #     batch_num_box_count = count
+                #     boxes3d = boxes3d[keep_idx] 
+                #     scores = scores[keep_idx]
+
+                pred_corners_list.append(box_utils.boxes_to_corners_3d(boxes3d, order=self.params['order']))
+                pred_box3d_list.append(boxes3d)
+                score_list.append(scores)
+
+
+        if len(pred_box3d_list) == 0:
+            return None, None
+
+        # predicted 3d bbx
+        pred_corners_tensor = torch.vstack(pred_corners_list)
+        pred_box3d_tensor = torch.vstack(pred_box3d_list)
+        scores = torch.hstack(score_list)
+
+        cur_idx = 0
+        batch_pred_boxes3d = []
+        batch_scores = []
+        for n in batch_num_box_count:
+            cur_corners = pred_corners_tensor[cur_idx:cur_idx+n]
+            cur_scores = scores[cur_idx:cur_idx+n]
+            # nms
+            keep_index = box_utils.nms_rotated(cur_corners,
+                                               cur_scores,
+                                               self.params['nms_thresh']
+                                               )
+            cur_boxes = pred_box3d_tensor[cur_idx:cur_idx+n] # keep hwl, no need to transform
+            batch_pred_boxes3d.append(cur_boxes[keep_index])
+            batch_scores.append(cur_scores[keep_index])
+            cur_idx += n
+
+        return batch_pred_boxes3d, batch_scores
+
+    def post_process_stage2(self, data_dict):
+        from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import nms_gpu
+        if 'stage2_out' not in data_dict['ego'].keys():
+            return None, None
+        output_dict = data_dict['ego']['stage2_out']
+        label_dict = data_dict['ego']['rcnn_label_dict']
+        rcnn_cls = output_dict['rcnn_cls'].sigmoid().view(-1)
+        
+        # use stage2 score
+        if 'rcnn_iou' in output_dict:
+            rcnn_iou = output_dict['rcnn_iou'].view(-1)
+            rcnn_iou = rcnn_iou / 2 + 0.5 # renormalize
+            rcnn_score = rcnn_cls * rcnn_iou**4
+        else:
+            rcnn_score = rcnn_cls
+        
+        # use stage1 score 
+        # rcnn_score = label_dict['rois_scores_stage1']
+
+
+        rcnn_reg = output_dict['rcnn_reg'].view(-1, 7)
+        rois_anchor = label_dict['rois_anchor'] # lwh order
+        rois = label_dict['rois'] # lwh order 
+        roi_center = rois[:, 0:3]
+        roi_ry = rois[:, 6] % (2 * np.pi)
+        boxes_local = box_utils.box_decode(rcnn_reg, rois_anchor)
+
+        # boxes_local = rcnn_reg + rois_anchor
+        detections = common_utils.rotate_points_along_z(
+            points=boxes_local.view(-1, 1, boxes_local.shape[-1]), angle=roi_ry.view(-1)
+        ).view(-1, boxes_local.shape[-1])
+        detections[:, :3] = detections[:, :3] + roi_center
+        detections[:, 6] = detections[:, 6] + roi_ry
+        mask = rcnn_score >= 0
+
+        detections = detections[mask]
+        scores = rcnn_score[mask]
+        # gt_boxes = label_dict['gt_of_rois_src'][mask]
+        mask = nms_gpu(detections, scores, thresh=0.01)[0]
+        boxes3d = detections[mask] # keep hwl
+
+        projected_boxes3d = None
+        if len(boxes3d) != 0:
+            # (N, 8, 3)
+            boxes3d_corner = \
+                box_utils.boxes_to_corners_3d(boxes3d,
+                                              order="lwh") # in stage 2, box encoding is dxdydz order
+            # (N, 8, 3)
+            projected_boxes3d = \
+                box_utils.project_box3d(boxes3d_corner,
+                                        data_dict['ego']['transformation_matrix'])
+
+        ## Added by Yifan Lu, filter box outside of GT range
+        if projected_boxes3d is None:
+            return None, None
+        scores = scores[mask]
+        cav_range = self.params['gt_range']
+        mask = box_utils.get_mask_for_boxes_within_range_torch(projected_boxes3d, cav_range)
+        projected_boxes3d = projected_boxes3d[mask]
+        scores = scores[mask]
+
+
+        return projected_boxes3d, scores
+
+    # def post_process_stage2(self, data_dict):
+    #     """
+    #     it's a pseduo stage2 process, but only output the stage1 rpn result.
+    #     """
+    #     from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import nms_gpu
+    #     if 'stage2_out' not in data_dict['ego'].keys():
+    #         return None, None
+    #     output_dict = data_dict['ego']['stage2_out']
+    #     label_dict = data_dict['ego']['rcnn_label_dict']
+    #     rcnn_score = label_dict['rois_scores_stage1']
+    #     rois = label_dict['rois'][:,[0,1,2,5,4,3,6]]
+
+    #     boxes3d_corner = \
+    #         box_utils.boxes_to_corners_3d(rois,
+    #                                         order=self.params['order'])
+    #     mask = box_utils.get_mask_for_boxes_within_range_torch(boxes3d_corner, self.params['gt_range'])
+    #     boxes3d_corner = boxes3d_corner[mask]
+    #     rcnn_score = rcnn_score[mask]
+
+    #     return boxes3d_corner, rcnn_score.flatten()
+
+
+    def post_process_stage2_redet(self, data_dict, output_dict):
+        return super().post_process(data_dict, output_dict)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/uncertainty_voxel_postprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/uncertainty_voxel_postprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..c18e1f77b8a2ca2449591b86b54028262b809e27
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/uncertainty_voxel_postprocessor.py
@@ -0,0 +1,251 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, OpenPCDet
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+"""
+3D Anchor Generator for Voxel
+"""
+import math
+import sys
+
+import numpy as np
+import torch
+from torch.nn.functional import sigmoid
+import torch.nn.functional as F
+
+from opencood.data_utils.post_processor.base_postprocessor \
+    import BasePostprocessor
+from opencood.data_utils.post_processor.voxel_postprocessor \
+    import VoxelPostprocessor
+from opencood.utils import box_utils
+from opencood.utils.box_overlaps import bbox_overlaps
+from opencood.visualization import vis_utils
+from opencood.utils.common_utils import limit_period
+
+
+class UncertaintyVoxelPostprocessor(VoxelPostprocessor):
+    def __init__(self, anchor_params, train):
+        super(UncertaintyVoxelPostprocessor, self).__init__(anchor_params, train)
+    
+    def post_process_stage1(self, stage1_output_dict, anchor_box):
+        """
+        This function is used to calculate the detections in advance 
+        and save them(after return) for CoAlign box alignment.
+        """
+        cls_preds = stage1_output_dict['cls_preds']
+        reg_preds = stage1_output_dict['reg_preds']
+        unc_preds = stage1_output_dict['unc_preds']
+
+        # the final bounding box list
+        uncertainty_dim = unc_preds.shape[1] // cls_preds.shape[1]
+        cls_preds = F.sigmoid(cls_preds.permute(0, 2, 3, 1).contiguous())  # [N, H, W, anchor_num]
+        unc_preds = unc_preds.permute(0,2,3,1).contiguous() #[N, H, W, anchor_num * 2]
+
+        # convert regression map back to bounding box
+        batch_box3d = self.delta_to_boxes3d(reg_preds, anchor_box)  # (N, W*L*2, 7)
+        mask = torch.gt(cls_preds, self.params['target_args']['score_threshold'])
+        batch_num_box_count = [int(m.sum()) for m in mask]
+        mask = mask.view(1, -1)
+        mask_reg = mask.unsqueeze(2).repeat(1, 1, 7)
+        mask_sm = mask.unsqueeze(2).repeat(1, 1, uncertainty_dim)
+        
+
+        boxes3d = torch.masked_select(batch_box3d.view(-1, 7), mask_reg[0]).view(-1, 7) 
+        uncertainty = torch.masked_select(unc_preds.view(-1,uncertainty_dim), mask_sm[0]).view(-1,uncertainty_dim) # [N*H*W*#anchor_num, 2] -> [num_select, 2]
+        scores = torch.masked_select(cls_preds.view(-1), mask[0])
+        if 'dir_preds' in stage1_output_dict and len(boxes3d) != 0:
+            dir_preds = stage1_output_dict['dir_preds']
+            dir_offset = self.params['dir_args']['dir_offset']
+            num_bins = self.params['dir_args']['num_bins']
+
+            dir_cls_preds = dir_preds.permute(0, 2, 3, 1).contiguous().reshape(1, -1, num_bins) # [1, N*H*W*2, 2]
+            dir_cls_preds = dir_cls_preds[mask]
+            # if rot_gt > 0, then the label is 1, then the regression target is [0, 1]
+            dir_labels = torch.max(dir_cls_preds, dim=-1)[1]  # indices. shape [1, N*H*W*2].  value 0 or 1. If value is 1, then rot_gt > 0
+            
+            period = (2 * np.pi / num_bins) # pi
+            dir_rot = limit_period(
+                boxes3d[..., 6] - dir_offset, 0, period
+            ) # 限制在0到pi之间
+            boxes3d[..., 6] = dir_rot + dir_offset + period * dir_labels.to(boxes3d.dtype) # 转化0.25pi到2.5pi
+            boxes3d[..., 6] = limit_period(boxes3d[..., 6], 0.5, 2 * np.pi) # limit to [-pi, pi]
+
+        # convert output to bounding box
+        if len(boxes3d) != 0:
+            # save origianl format box. [N, 7]
+            pred_box3d_original = boxes3d.detach()
+            # (N, 8, 3)
+            boxes3d_corner = box_utils.boxes_to_corners_3d(boxes3d, order=self.params['order'])
+            # (N, 8, 3)
+            pred_corners_tensor = boxes3d_corner  # box_utils.project_box3d(boxes3d_corner, transformation_matrix)
+            # convert 3d bbx to 2d, (N,4)
+            projected_boxes2d = box_utils.corner_to_standup_box_torch(pred_corners_tensor)
+            # (N, 5)
+            pred_box2d_score_tensor = torch.cat((projected_boxes2d, scores.unsqueeze(1)), dim=1)
+            scores = pred_box2d_score_tensor[:, -1]
+
+        else:
+             return None, None, None
+
+        # divide boxes to each cav
+
+        cur_idx = 0
+        batch_pred_corners3d = [] # [[N1, 8, 3], [N2, 8, 3], ...]
+        batch_pred_boxes3d = [] # [[N1, 7], [N2, 7], ...]
+        batch_uncertainty = [] # [[N1, 2], [N2, 2], ...]
+        batch_scores = []
+        for n in batch_num_box_count:
+            cur_corners = pred_corners_tensor[cur_idx: cur_idx+n]
+            cur_boxes = pred_box3d_original[cur_idx: cur_idx+n]
+            cur_scores = scores[cur_idx:cur_idx+n]
+            cur_uncertainty = uncertainty[cur_idx: cur_idx+n]
+            # nms
+            keep_index = box_utils.nms_rotated(cur_corners,
+                                               cur_scores,
+                                               self.params['nms_thresh']
+                                               )
+            batch_pred_corners3d.append(cur_corners[keep_index])
+            batch_pred_boxes3d.append(cur_boxes[keep_index])
+            batch_scores.append(cur_scores[keep_index])
+            batch_uncertainty.append(cur_uncertainty[keep_index])
+            cur_idx += n
+
+        return batch_pred_corners3d, batch_pred_boxes3d, batch_uncertainty
+
+
+    def post_process(self, data_dict, output_dict, return_uncertainty=False):
+        """
+        For fusion_method: no_w_uncertainty
+        """
+        # the final bounding box list
+        pred_box3d_list = []
+        pred_box2d_list = []
+        uncertainty_list = []
+        for cav_id, cav_content in data_dict.items():
+            if cav_id not in output_dict:
+                continue
+            # the transformation matrix to ego space
+            transformation_matrix = cav_content['transformation_matrix'] # no clean
+
+            # (H, W, anchor_num, 7)
+            anchor_box = cav_content['anchor_box']
+
+            # classification probability
+            uncertainty_dim = output_dict[cav_id]['unc_preds'].shape[1] // output_dict[cav_id]['cls_preds'].shape[1]
+            prob = output_dict[cav_id]['cls_preds']
+            prob = F.sigmoid(prob.permute(0, 2, 3, 1))
+            prob = prob.reshape(1, -1)
+
+            # regression map
+            reg = output_dict[cav_id]['reg_preds']
+
+            # uncertainty map
+            unc_preds = output_dict[cav_id]['unc_preds'].permute(0, 2, 3, 1).contiguous()
+            unc_preds = unc_preds.view(unc_preds.shape[0], -1, uncertainty_dim) # [N, H*W*#anchor_num, 2]
+
+            # convert regression map back to bounding box
+            batch_box3d = self.delta_to_boxes3d(reg, anchor_box) # (N, H*W*#anchor_num, 7)
+            mask = \
+                torch.gt(prob, self.params['target_args']['score_threshold'])
+            mask = mask.view(1, -1)
+            mask_reg = mask.unsqueeze(2).repeat(1, 1, 7)
+            mask_sm = mask.unsqueeze(2).repeat(1, 1, uncertainty_dim)
+            
+            # during validation/testing, the batch size should be 1
+            assert batch_box3d.shape[0] == 1
+            boxes3d = torch.masked_select(batch_box3d[0],
+                                          mask_reg[0]).view(-1, 7)
+            scores = torch.masked_select(prob[0], mask[0])
+            uncertainty = torch.masked_select(unc_preds[0], mask_sm[0]).view(-1, uncertainty_dim)
+
+
+            # adding dir classifier
+            if 'dir_preds' in output_dict[cav_id].keys() and len(boxes3d) != 0:
+                dir_offset = self.params['dir_args']['dir_offset']
+                num_bins = self.params['dir_args']['num_bins']
+
+
+                dir_preds  = output_dict[cav_id]['dir_preds'] # [N, H, W, 4]
+                dir_cls_preds = dir_preds.permute(0, 2, 3, 1).contiguous().reshape(1, -1, num_bins) # [1, N*H*W*2, 2]
+                dir_cls_preds = dir_cls_preds[mask]
+                # if rot_gt > 0, then the label is 1, then the regression target is [0, 1]
+                dir_labels = torch.max(dir_cls_preds, dim=-1)[1]  # indices. shape [1, N*H*W*2].  value 0 or 1. If value is 1, then rot_gt > 0
+                
+                period = (2 * np.pi / num_bins) # pi
+                dir_rot = limit_period(
+                    boxes3d[..., 6] - dir_offset, 0, period
+                ) # 限制在0到pi之间
+                boxes3d[..., 6] = dir_rot + dir_offset + period * dir_labels.to(dir_cls_preds.dtype) # 转化0.25pi到2.5pi
+                boxes3d[..., 6] = limit_period(boxes3d[..., 6], 0.5, 2 * np.pi) # limit to [-pi, pi]
+
+
+            # convert output to bounding box
+            if len(boxes3d) != 0:
+                # (N, 8, 3)
+                boxes3d_corner = \
+                    box_utils.boxes_to_corners_3d(boxes3d,
+                                                  order=self.params['order'])
+                # (N, 8, 3)
+                projected_boxes3d = \
+                    box_utils.project_box3d(boxes3d_corner,
+                                            transformation_matrix)
+                # convert 3d bbx to 2d, (N,4)
+                projected_boxes2d = \
+                    box_utils.corner_to_standup_box_torch(projected_boxes3d)
+                # (N, 5)
+                boxes2d_score = \
+                    torch.cat((projected_boxes2d, scores.unsqueeze(1)), dim=1)
+
+                pred_box2d_list.append(boxes2d_score)
+                pred_box3d_list.append(projected_boxes3d)
+                uncertainty_list.append(uncertainty)
+
+
+        if len(pred_box2d_list) ==0 or len(pred_box3d_list) == 0:
+            if return_uncertainty:
+                return None, None, None
+            return None, None
+        # shape: (N, 5)
+        pred_box2d_list = torch.vstack(pred_box2d_list)
+        uncertainty_list = torch.vstack(uncertainty_list)
+        uncertainty = uncertainty_list
+        # scores
+        scores = pred_box2d_list[:, -1]
+        # predicted 3d bbx
+        pred_box3d_tensor = torch.vstack(pred_box3d_list)
+        # remove large bbx
+        keep_index_1 = box_utils.remove_large_pred_bbx(pred_box3d_tensor)
+        keep_index_2 = box_utils.remove_bbx_abnormal_z(pred_box3d_tensor)
+        keep_index = torch.logical_and(keep_index_1, keep_index_2)
+
+        pred_box3d_tensor = pred_box3d_tensor[keep_index]
+        scores = scores[keep_index]
+        uncertainty = uncertainty[keep_index]
+
+        # nms
+        keep_index = box_utils.nms_rotated(pred_box3d_tensor,
+                                           scores,
+                                           self.params['nms_thresh']
+                                           )
+
+        pred_box3d_tensor = pred_box3d_tensor[keep_index]
+
+        # select cooresponding score
+        scores = scores[keep_index]
+        uncertainty = uncertainty[keep_index]
+
+        # filter out the prediction out of the range.
+        mask = \
+            box_utils.get_mask_for_boxes_within_range_torch(pred_box3d_tensor, self.params['gt_range'])
+        pred_box3d_tensor = pred_box3d_tensor[mask, :, :]
+        scores = scores[mask]
+        uncertainty = uncertainty[mask]
+
+        assert scores.shape[0] == pred_box3d_tensor.shape[0]
+        
+        if return_uncertainty:
+            return pred_box3d_tensor, scores, uncertainty
+
+        return pred_box3d_tensor, scores
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/voxel_postprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/voxel_postprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..479dc274d15ed3a3ec4f6c53aa2df4468e855e6b
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/post_processor/voxel_postprocessor.py
@@ -0,0 +1,484 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, OpenPCDet
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+"""
+3D Anchor Generator for Voxel
+"""
+import math
+import sys
+
+import numpy as np
+import torch
+from torch.nn.functional import sigmoid
+import torch.nn.functional as F
+
+from opencood.data_utils.post_processor.base_postprocessor \
+    import BasePostprocessor
+from opencood.utils import box_utils
+from opencood.utils.box_overlaps import bbox_overlaps
+from opencood.visualization import vis_utils
+from opencood.utils.common_utils import limit_period
+
+
+class VoxelPostprocessor(BasePostprocessor):
+    def __init__(self, anchor_params, train):
+        super(VoxelPostprocessor, self).__init__(anchor_params, train)
+        self.anchor_num = self.params['anchor_args']['num']
+
+    def generate_anchor_box(self):
+        # load_voxel_params and load_point_pillar_params leads to the same anchor
+        # if voxel_size * feature stride is the same.
+        W = self.params['anchor_args']['W']
+        H = self.params['anchor_args']['H']
+
+        l = self.params['anchor_args']['l']
+        w = self.params['anchor_args']['w']
+        h = self.params['anchor_args']['h']
+        r = self.params['anchor_args']['r']
+
+        assert self.anchor_num == len(r)
+        r = [math.radians(ele) for ele in r]
+
+        vh = self.params['anchor_args']['vh'] # voxel_size
+        vw = self.params['anchor_args']['vw']
+
+        xrange = [self.params['anchor_args']['cav_lidar_range'][0],
+                  self.params['anchor_args']['cav_lidar_range'][3]]
+        yrange = [self.params['anchor_args']['cav_lidar_range'][1],
+                  self.params['anchor_args']['cav_lidar_range'][4]]
+
+        if 'feature_stride' in self.params['anchor_args']:
+            feature_stride = self.params['anchor_args']['feature_stride']
+        else:
+            feature_stride = 2
+
+
+        x = np.linspace(xrange[0] + vw, xrange[1] - vw, W // feature_stride) # vw is not precise, vw * feature_stride / 2 should be better?
+        y = np.linspace(yrange[0] + vh, yrange[1] - vh, H // feature_stride)
+
+
+        cx, cy = np.meshgrid(x, y)
+        cx = np.tile(cx[..., np.newaxis], self.anchor_num) # center
+        cy = np.tile(cy[..., np.newaxis], self.anchor_num)
+        cz = np.ones_like(cx) * -1.0
+
+        w = np.ones_like(cx) * w
+        l = np.ones_like(cx) * l
+        h = np.ones_like(cx) * h
+
+        r_ = np.ones_like(cx)
+        for i in range(self.anchor_num):
+            r_[..., i] = r[i]
+
+        if self.params['order'] == 'hwl': # pointpillar
+            anchors = np.stack([cx, cy, cz, h, w, l, r_], axis=-1) # (50, 176, 2, 7)
+
+        elif self.params['order'] == 'lhw':
+            anchors = np.stack([cx, cy, cz, l, h, w, r_], axis=-1)
+        else:
+            sys.exit('Unknown bbx order.')
+
+        return anchors
+
+    def generate_label(self, **kwargs):
+        """
+        Generate targets for training.
+
+        Parameters
+        ----------
+        argv : list
+            gt_box_center:(max_num, 7), anchor:(H, W, anchor_num, 7)
+
+        Returns
+        -------
+        label_dict : dict
+            Dictionary that contains all target related info.
+        """
+        assert self.params['order'] == 'hwl', 'Currently Voxel only support' \
+                                              'hwl bbx order.'
+        # (max_num, 7)
+        gt_box_center = kwargs['gt_box_center']
+        # (H, W, anchor_num, 7)
+        anchors = kwargs['anchors']
+        # (max_num)
+        masks = kwargs['mask']
+
+        # (H, W)
+        feature_map_shape = anchors.shape[:2]
+
+        # (H*W*anchor_num, 7)
+        anchors = anchors.reshape(-1, 7)
+        # normalization factor, (H * W * anchor_num)
+        anchors_d = np.sqrt(anchors[:, 4] ** 2 + anchors[:, 5] ** 2)
+
+        # (H, W, 2)
+        pos_equal_one = np.zeros((*feature_map_shape, self.anchor_num))
+        neg_equal_one = np.zeros((*feature_map_shape, self.anchor_num))
+        # (H, W, self.anchor_num * 7)
+        targets = np.zeros((*feature_map_shape, self.anchor_num * 7))
+
+        # (n, 7)
+        gt_box_center_valid = gt_box_center[masks == 1]
+        # (n, 8, 3)
+        gt_box_corner_valid = \
+            box_utils.boxes_to_corners_3d(gt_box_center_valid,
+                                          self.params['order'])
+        # (H*W*anchor_num, 8, 3)
+        anchors_corner = \
+            box_utils.boxes_to_corners_3d(anchors,
+                                          order=self.params['order'])
+        # (H*W*anchor_num, 4)
+        anchors_standup_2d = \
+            box_utils.corner2d_to_standup_box(anchors_corner)
+        # (n, 4)
+        gt_standup_2d = \
+            box_utils.corner2d_to_standup_box(gt_box_corner_valid)
+
+        # (H*W*anchor_n)
+        iou = bbox_overlaps(
+            np.ascontiguousarray(anchors_standup_2d).astype(np.float32),
+            np.ascontiguousarray(gt_standup_2d).astype(np.float32),
+        )
+
+        # the anchor boxes has the largest iou across
+        # shape: (n)
+        id_highest = np.argmax(iou.T, axis=1)
+        # [0, 1, 2, ..., n-1]
+        id_highest_gt = np.arange(iou.T.shape[0])
+        # make sure all highest iou is larger than 0
+        mask = iou.T[id_highest_gt, id_highest] > 0
+        id_highest, id_highest_gt = id_highest[mask], id_highest_gt[mask]
+
+
+        # find anchors iou > params['pos_iou']
+        id_pos, id_pos_gt = \
+            np.where(iou >
+                     self.params['target_args']['pos_threshold'])
+        #  find anchors iou  params['neg_iou']
+        id_neg = np.where(np.sum(iou <
+                                 self.params['target_args']['neg_threshold'],
+                                 axis=1) == iou.shape[1])[0]
+        id_pos = np.concatenate([id_pos, id_highest])
+        id_pos_gt = np.concatenate([id_pos_gt, id_highest_gt])
+        id_pos, index = np.unique(id_pos, return_index=True)
+        id_pos_gt = id_pos_gt[index]
+        id_neg.sort()
+
+        # cal the target and set the equal one
+        index_x, index_y, index_z = np.unravel_index(
+            id_pos, (*feature_map_shape, self.anchor_num))
+        pos_equal_one[index_x, index_y, index_z] = 1
+
+        # calculate the targets
+        targets[index_x, index_y, np.array(index_z) * 7] = \
+            (gt_box_center_valid[id_pos_gt, 0] - anchors[id_pos, 0]) / anchors_d[id_pos]
+        targets[index_x, index_y, np.array(index_z) * 7 + 1] = \
+            (gt_box_center_valid[id_pos_gt, 1] - anchors[id_pos, 1]) / anchors_d[id_pos]
+        targets[index_x, index_y, np.array(index_z) * 7 + 2] = \
+            (gt_box_center_valid[id_pos_gt, 2] - anchors[id_pos, 2]) / anchors[id_pos, 3]
+        targets[index_x, index_y, np.array(index_z) * 7 + 3] = np.log(
+            gt_box_center_valid[id_pos_gt, 3] / anchors[id_pos, 3])
+        targets[index_x, index_y, np.array(index_z) * 7 + 4] = np.log(
+            gt_box_center_valid[id_pos_gt, 4] / anchors[id_pos, 4])
+        targets[index_x, index_y, np.array(index_z) * 7 + 5] = np.log(
+            gt_box_center_valid[id_pos_gt, 5] / anchors[id_pos, 5])
+        targets[index_x, index_y, np.array(index_z) * 7 + 6] = (
+                gt_box_center_valid[id_pos_gt, 6] - anchors[id_pos, 6])
+
+        index_x, index_y, index_z = np.unravel_index(
+            id_neg, (*feature_map_shape, self.anchor_num))
+        neg_equal_one[index_x, index_y, index_z] = 1
+
+        # to avoid a box be pos/neg in the same time
+        index_x, index_y, index_z = np.unravel_index(
+            id_highest, (*feature_map_shape, self.anchor_num))
+        neg_equal_one[index_x, index_y, index_z] = 0
+
+
+        label_dict = {'pos_equal_one': pos_equal_one,
+                      'neg_equal_one': neg_equal_one,
+                      'targets': targets}
+
+        return label_dict
+
+    @staticmethod
+    def collate_batch(label_batch_list):
+        """
+        Customized collate function for target label generation.
+
+        Parameters
+        ----------
+        label_batch_list : list
+            The list of dictionary  that contains all labels for several
+            frames.
+
+        Returns
+        -------
+        target_batch : dict
+            Reformatted labels in torch tensor.
+        """
+        pos_equal_one = []
+        neg_equal_one = []
+        targets = []
+
+        for i in range(len(label_batch_list)):
+            pos_equal_one.append(label_batch_list[i]['pos_equal_one'])
+            neg_equal_one.append(label_batch_list[i]['neg_equal_one'])
+            targets.append(label_batch_list[i]['targets'])
+
+        pos_equal_one = \
+            torch.from_numpy(np.array(pos_equal_one))
+        neg_equal_one = \
+            torch.from_numpy(np.array(neg_equal_one))
+        targets = \
+            torch.from_numpy(np.array(targets))
+
+        return {'targets': targets,
+                'pos_equal_one': pos_equal_one,
+                'neg_equal_one': neg_equal_one}
+
+    def post_process(self, data_dict, output_dict):
+        """
+        Process the outputs of the model to 2D/3D bounding box.
+        Step1: convert each cav's output to bounding box format
+        Step2: project the bounding boxes to ego space.
+        Step:3 NMS
+
+        For early and intermediate fusion,
+            data_dict only contains ego.
+
+        For late fusion,
+            data_dcit contains all cavs, so we need transformation matrix.
+
+
+        Parameters
+        ----------
+        data_dict : dict
+            The dictionary containing the origin input data of model.
+
+        output_dict :dict
+            The dictionary containing the output of the model.
+
+        Returns
+        -------
+        pred_box3d_tensor : torch.Tensor
+            The prediction bounding box tensor after NMS.
+        gt_box3d_tensor : torch.Tensor
+            The groundtruth bounding box tensor.
+        """
+        # the final bounding box list
+        pred_box3d_list = []
+        pred_box2d_list = []
+        for cav_id, cav_content in data_dict.items():
+            assert cav_id in output_dict
+            # the transformation matrix to ego space
+            transformation_matrix = cav_content['transformation_matrix'] # no clean
+
+            # rename variable 
+            if 'psm' in output_dict[cav_id]:
+                output_dict[cav_id]['cls_preds'] = output_dict[cav_id]['psm']
+            if 'rm' in output_dict:
+                output_dict[cav_id]['reg_preds'] = output_dict[cav_id]['rm']
+            if 'dm' in output_dict:
+                output_dict[cav_id]['dir_preds'] = output_dict[cav_id]['dm']
+
+            # (H, W, anchor_num, 7)
+            anchor_box = cav_content['anchor_box']
+
+            # classification probability
+            prob = output_dict[cav_id]['cls_preds']
+            prob = F.sigmoid(prob.permute(0, 2, 3, 1))
+            # for multi-class, we need to select the class with the highest prob
+            if prob.shape[-1] > 1:
+                prob = torch.max(prob, dim=-1)[0]
+            prob = prob.reshape(1, -1)
+
+            # regression map
+            reg = output_dict[cav_id]['reg_preds']
+
+            # convert regression map back to bounding box
+            if len(reg.shape) == 4: # anchor-based. PointPillars, SECOND
+                batch_box3d = self.delta_to_boxes3d(reg, anchor_box)
+            else: # anchor-free. CenterPoint
+                batch_box3d = reg.view(1, -1, 7)
+
+            mask = \
+                torch.gt(prob, self.params['target_args']['score_threshold'])
+            mask = mask.view(1, -1)
+            mask_reg = mask.unsqueeze(2).repeat(1, 1, 7)
+
+            # during validation/testing, the batch size should be 1
+            assert batch_box3d.shape[0] == 1
+            boxes3d = torch.masked_select(batch_box3d[0],
+                                          mask_reg[0]).view(-1, 7)
+            scores = torch.masked_select(prob[0], mask[0])
+
+            # adding dir classifier
+            if 'dir_preds' in output_dict[cav_id].keys() and len(boxes3d) !=0:
+                dir_offset = self.params['dir_args']['dir_offset']
+                num_bins = self.params['dir_args']['num_bins']
+
+
+                dm  = output_dict[cav_id]['dir_preds'] # [N, H, W, 4]
+                dir_cls_preds = dm.permute(0, 2, 3, 1).contiguous().reshape(1, -1, num_bins) # [1, N*H*W*2, 2]
+                dir_cls_preds = dir_cls_preds[mask]
+                # if rot_gt > 0, then the label is 1, then the regression target is [0, 1]
+                dir_labels = torch.max(dir_cls_preds, dim=-1)[1]  # indices. shape [1, N*H*W*2].  value 0 or 1. If value is 1, then rot_gt > 0
+                
+                period = (2 * np.pi / num_bins) # pi
+                dir_rot = limit_period(
+                    boxes3d[..., 6] - dir_offset, 0, period
+                ) # 限制在0到pi之间
+                boxes3d[..., 6] = dir_rot + dir_offset + period * dir_labels.to(dir_cls_preds.dtype) # 转化0.25pi到2.5pi
+                boxes3d[..., 6] = limit_period(boxes3d[..., 6], 0.5, 2 * np.pi) # limit to [-pi, pi]
+
+            if 'iou_preds' in output_dict[cav_id].keys() and len(boxes3d) != 0:
+                iou = torch.sigmoid(output_dict[cav_id]['iou_preds'].permute(0, 2, 3, 1).contiguous()).reshape(1, -1)
+                iou = torch.clamp(iou, min=0.0, max=1.0)
+                iou = (iou + 1) * 0.5
+                scores = scores * torch.pow(iou.masked_select(mask), 4)
+
+            # convert output to bounding box
+            if len(boxes3d) != 0:
+                # (N, 8, 3)
+                boxes3d_corner = \
+                    box_utils.boxes_to_corners_3d(boxes3d,
+                                                  order=self.params['order'])
+                
+                # STEP 2
+                # (N, 8, 3)
+                projected_boxes3d = \
+                    box_utils.project_box3d(boxes3d_corner,
+                                            transformation_matrix)
+                # convert 3d bbx to 2d, (N,4)
+                projected_boxes2d = \
+                    box_utils.corner_to_standup_box_torch(projected_boxes3d)
+                # (N, 5)
+                boxes2d_score = \
+                    torch.cat((projected_boxes2d, scores.unsqueeze(1)), dim=1)
+
+                pred_box2d_list.append(boxes2d_score)
+                pred_box3d_list.append(projected_boxes3d)
+
+        if len(pred_box2d_list) ==0 or len(pred_box3d_list) == 0:
+            return None, None
+        # shape: (N, 5)
+        pred_box2d_list = torch.vstack(pred_box2d_list)
+        # scores
+        scores = pred_box2d_list[:, -1]
+        # predicted 3d bbx
+        pred_box3d_tensor = torch.vstack(pred_box3d_list)
+        # remove large bbx
+        keep_index_1 = box_utils.remove_large_pred_bbx(pred_box3d_tensor)
+        keep_index_2 = box_utils.remove_bbx_abnormal_z(pred_box3d_tensor)
+        keep_index = torch.logical_and(keep_index_1, keep_index_2)
+
+        pred_box3d_tensor = pred_box3d_tensor[keep_index]
+        scores = scores[keep_index]
+
+        # STEP3
+        # nms
+        keep_index = box_utils.nms_rotated(pred_box3d_tensor,
+                                           scores,
+                                           self.params['nms_thresh']
+                                           )
+
+        pred_box3d_tensor = pred_box3d_tensor[keep_index]
+
+        # select cooresponding score
+        scores = scores[keep_index]
+        
+        # filter out the prediction out of the range. with z-dim
+        pred_box3d_np = pred_box3d_tensor.cpu().numpy()
+        pred_box3d_np, mask = box_utils.mask_boxes_outside_range_numpy(pred_box3d_np,
+                                                    self.params['gt_range'],
+                                                    order=None,
+                                                    return_mask=True)
+        pred_box3d_tensor = torch.from_numpy(pred_box3d_np).to(device=pred_box3d_tensor.device)
+        scores = scores[mask]
+
+        assert scores.shape[0] == pred_box3d_tensor.shape[0]
+
+        return pred_box3d_tensor, scores
+
+    @staticmethod
+    def delta_to_boxes3d(deltas, anchors):
+        """
+        Convert the output delta to 3d bbx.
+
+        Parameters
+        ----------
+        deltas : torch.Tensor
+            (N, 14, H, W)
+        anchors : torch.Tensor
+            (W, L, 2, 7) -> xyzhwlr
+
+        Returns
+        -------
+        box3d : torch.Tensor
+            (N, W*L*2, 7)
+        """
+        # batch size
+        N = deltas.shape[0]
+        deltas = deltas.permute(0, 2, 3, 1).contiguous().view(N, -1, 7)
+        boxes3d = torch.zeros_like(deltas)
+
+        if deltas.is_cuda:
+            anchors = anchors.cuda()
+            boxes3d = boxes3d.cuda()
+
+        # (W*L*2, 7)
+        anchors_reshaped = anchors.view(-1, 7).float()
+        # the diagonal of the anchor 2d box, (W*L*2)
+        anchors_d = torch.sqrt(
+            anchors_reshaped[:, 4] ** 2 + anchors_reshaped[:, 5] ** 2)
+        anchors_d = anchors_d.repeat(N, 2, 1).transpose(1, 2)
+        anchors_reshaped = anchors_reshaped.repeat(N, 1, 1)
+
+        # Inv-normalize to get xyz
+        boxes3d[..., [0, 1]] = torch.mul(deltas[..., [0, 1]], anchors_d) + \
+                               anchors_reshaped[..., [0, 1]]
+        boxes3d[..., [2]] = torch.mul(deltas[..., [2]],
+                                      anchors_reshaped[..., [3]]) + \
+                            anchors_reshaped[..., [2]]
+        # hwl
+        boxes3d[..., [3, 4, 5]] = torch.exp(
+            deltas[..., [3, 4, 5]]) * anchors_reshaped[..., [3, 4, 5]]
+        # yaw angle
+        boxes3d[..., 6] = deltas[..., 6] + anchors_reshaped[..., 6]
+
+        return boxes3d
+
+    @staticmethod
+    def visualize(pred_box_tensor, gt_tensor, pcd, show_vis, save_path, dataset=None):
+        """
+        Visualize the prediction, ground truth with point cloud together.
+
+        Parameters
+        ----------
+        pred_box_tensor : torch.Tensor
+            (N, 8, 3) prediction.
+
+        gt_tensor : torch.Tensor
+            (N, 8, 3) groundtruth bbx
+
+        pcd : torch.Tensor
+            PointCloud, (N, 4).
+
+        show_vis : bool
+            Whether to show visualization.
+
+        save_path : str
+            Save the visualization results to given path.
+
+        dataset : BaseDataset
+            opencood dataset object.
+
+        """
+        vis_utils.visualize_single_sample_output_gt(pred_box_tensor,
+                                                    gt_tensor,
+                                                    pcd,
+                                                    show_vis,
+                                                    save_path)
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4096f05d0d07d4adc11e232097768d0d4f169a24
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__init__.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+from opencood.data_utils.pre_processor.base_preprocessor import BasePreprocessor
+from opencood.data_utils.pre_processor.voxel_preprocessor import VoxelPreprocessor
+from opencood.data_utils.pre_processor.bev_preprocessor import BevPreprocessor
+from opencood.data_utils.pre_processor.sp_voxel_preprocessor import SpVoxelPreprocessor
+
+__all__ = {
+    'BasePreprocessor': BasePreprocessor,
+    'VoxelPreprocessor': VoxelPreprocessor,
+    'BevPreprocessor': BevPreprocessor,
+    'SpVoxelPreprocessor': SpVoxelPreprocessor
+}
+
+
+def build_preprocessor(preprocess_cfg, train):
+    process_method_name = preprocess_cfg['core_method']
+    error_message = f"{process_method_name} is not found. " \
+                     f"Please add your processor file's name in opencood/" \
+                     f"data_utils/processor/init.py"
+    assert process_method_name in ['BasePreprocessor', 'VoxelPreprocessor',
+                                   'BevPreprocessor', 'SpVoxelPreprocessor'], \
+        error_message
+
+    processor = __all__[process_method_name](
+        preprocess_params=preprocess_cfg,
+        train=train
+    )
+
+    return processor
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..681acc1dc7d70e93a280a6c31384f7bc509e4047
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/__init__.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/base_preprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/base_preprocessor.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3639e6c5518f5cd7c3ce14029804896303729ecd
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/base_preprocessor.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/bev_preprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/bev_preprocessor.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..45ff28b233651916134c0052dfc7c98d646d3657
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/bev_preprocessor.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/sp_voxel_preprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/sp_voxel_preprocessor.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..526a8985aa1ba1252d6040b4de3cccef98d44b52
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/sp_voxel_preprocessor.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/voxel_preprocessor.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/voxel_preprocessor.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..397b3ac2789fde44dc5053a7511028ab1a033508
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/__pycache__/voxel_preprocessor.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/base_preprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/base_preprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..34215183a3f67bbb4c3801f0e072e004734b2264
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/base_preprocessor.py
@@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+import numpy as np
+
+from opencood.utils import pcd_utils
+
+
+class BasePreprocessor(object):
+    """
+    Basic Lidar pre-processor.
+
+    Parameters
+    ----------
+    preprocess_params : dict
+        The dictionary containing all parameters of the preprocessing.
+
+    train : bool
+        Train or test mode.
+    """
+
+    def __init__(self, preprocess_params, train):
+        self.params = preprocess_params
+        self.train = train
+
+    def preprocess(self, pcd_np):
+        """
+        Preprocess the lidar points by simple sampling.
+
+        Parameters
+        ----------
+        pcd_np : np.ndarray
+            The raw lidar.
+
+        Returns
+        -------
+        data_dict : the output dictionary.
+        """
+        data_dict = {}
+        sample_num = self.params['args']['sample_num']
+
+        pcd_np = pcd_utils.downsample_lidar(pcd_np, sample_num)
+        data_dict['downsample_lidar'] = pcd_np
+
+        return data_dict
+
+    def project_points_to_bev_map(self, points, ratio=0.1):
+        """
+        Project points to BEV occupancy map with default ratio=0.1.
+
+        Parameters
+        ----------
+        points : np.ndarray
+            (N, 3) / (N, 4)
+
+        ratio : float
+            Discretization parameters. Default is 0.1.
+
+        Returns
+        -------
+        bev_map : np.ndarray
+            BEV occupancy map including projected points with shape
+            (img_row, img_col).
+
+        """
+        L1, W1, H1, L2, W2, H2 = self.params["cav_lidar_range"]
+        img_row = int((L2 - L1) / ratio)
+        img_col = int((W2 - W1) / ratio)
+        bev_map = np.zeros((img_row, img_col))
+        bev_origin = np.array([L1, W1, H1]).reshape(1, -1)
+        # (N, 3)
+        indices = ((points[:, :3] - bev_origin) / ratio).astype(int)
+        mask = np.logical_and(indices[:, 0] > 0, indices[:, 0] < img_row)
+        mask = np.logical_and(mask, np.logical_and(indices[:, 1] > 0,
+                                                   indices[:, 1] < img_col))
+        indices = indices[mask, :]
+        bev_map[indices[:, 0], indices[:, 1]] = 1
+        return bev_map
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/bev_preprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/bev_preprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b80ee04b24c69513a5e13964939e28b360584b8
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/bev_preprocessor.py
@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+"""
+Convert lidar to bev
+"""
+
+import numpy as np
+import torch
+from opencood.data_utils.pre_processor.base_preprocessor import \
+    BasePreprocessor
+
+
+class BevPreprocessor(BasePreprocessor):
+    def __init__(self, preprocess_params, train):
+        super(BevPreprocessor, self).__init__(preprocess_params, train)
+        self.lidar_range = self.params['cav_lidar_range']
+        self.geometry_param = preprocess_params["geometry_param"]
+
+    def preprocess(self, pcd_raw):
+        """
+        Preprocess the lidar points to BEV representations.
+
+        Parameters
+        ----------
+        pcd_raw : np.ndarray
+            The raw lidar.
+
+        Returns
+        -------
+        data_dict : the structured output dictionary.
+        """
+        bev = np.zeros(self.geometry_param['input_shape'], dtype=np.float32)
+        intensity_map_count = np.zeros((bev.shape[0], bev.shape[1]),
+                                       dtype=np.int)
+        bev_origin = np.array(
+            [self.geometry_param["L1"], self.geometry_param["W1"],
+             self.geometry_param["H1"]]).reshape(1, -1)
+
+        indices = ((pcd_raw[:, :3] - bev_origin) / self.geometry_param[
+            "res"]).astype(int)
+
+        # if any point hit this voxel, set the voxel to 1
+        for i in range(indices.shape[0]):
+            bev[indices[i, 0], indices[i, 1], indices[i, 2]] = 1
+            bev[indices[i, 0], indices[i, 1], -1] += pcd_raw[i, 3] # intensity
+            intensity_map_count[indices[i, 0], indices[i, 1]] += 1
+        divide_mask = intensity_map_count != 0
+        bev[divide_mask, -1] = np.divide(bev[divide_mask, -1],
+                                         intensity_map_count[divide_mask])
+
+        data_dict = {
+            "bev_input": np.transpose(bev, (2, 0, 1)) # (C,H,W)
+        }
+        return data_dict
+
+    @staticmethod
+    def collate_batch_list(batch):
+        """
+        Customized pytorch data loader collate function.
+
+        Parameters
+        ----------
+        batch : list
+            List of dictionary. Each dictionary represent a single frame.
+
+        Returns
+        -------
+        processed_batch : dict
+            Updated lidar batch.
+        """
+        bev_input_list = [
+            x["bev_input"][np.newaxis, ...] for x in batch
+        ]
+        processed_batch = {
+            "bev_input": torch.from_numpy(
+                np.concatenate(bev_input_list, axis=0))
+        }
+        return processed_batch
+
+    @staticmethod
+    def collate_batch_dict(batch):
+        """
+        Customized pytorch data loader collate function.
+
+        Parameters
+        ----------
+        batch : dict
+            Dict of list. Each element represents a CAV.
+
+        Returns
+        -------
+        processed_batch : dict
+            Updated lidar batch.
+        """
+        bev_input_list = [
+            x[np.newaxis, ...] for x in batch["bev_input"]
+        ]
+        processed_batch = {
+            "bev_input": torch.from_numpy(
+                np.concatenate(bev_input_list, axis=0))
+        }
+        return processed_batch
+
+    def collate_batch(self, batch):
+        """
+        Customized pytorch data loader collate function.
+
+        Parameters
+        ----------
+        batch : list / dict
+            Batched data.
+        Returns
+        -------
+        processed_batch : dict
+            Updated lidar batch.
+        """
+        if isinstance(batch, list):
+            return self.collate_batch_list(batch)
+        elif isinstance(batch, dict):
+            return self.collate_batch_dict(batch)
+        else:
+            raise NotImplemented
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/sp_voxel_preprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/sp_voxel_preprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..068965ee3300cb5d3d320dce881ac4ea5f03170f
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/sp_voxel_preprocessor.py
@@ -0,0 +1,174 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, OpenPCDet
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+"""
+Transform points to voxels using sparse conv library
+"""
+import sys
+
+import numpy as np
+import torch
+from icecream import ic
+
+from opencood.data_utils.pre_processor.base_preprocessor import \
+    BasePreprocessor
+
+
+class SpVoxelPreprocessor(BasePreprocessor):
+    def __init__(self, preprocess_params, train):
+        super(SpVoxelPreprocessor, self).__init__(preprocess_params,
+                                                  train)
+        self.spconv = 1
+        try:
+            # spconv v1.x
+            from spconv.utils import VoxelGeneratorV2 as VoxelGenerator
+        except:
+            # spconv v2.x
+            from cumm import tensorview as tv
+            from spconv.utils import Point2VoxelCPU3d as VoxelGenerator
+            self.tv = tv
+            self.spconv = 2
+        self.lidar_range = self.params['cav_lidar_range']
+        self.voxel_size = self.params['args']['voxel_size']
+        self.max_points_per_voxel = self.params['args']['max_points_per_voxel']
+
+        if train:
+            self.max_voxels = self.params['args']['max_voxel_train']
+        else:
+            self.max_voxels = self.params['args']['max_voxel_test']
+
+        grid_size = (np.array(self.lidar_range[3:6]) -
+                     np.array(self.lidar_range[0:3])) / np.array(self.voxel_size)
+        self.grid_size = np.round(grid_size).astype(np.int64)
+
+        # use sparse conv library to generate voxel
+        if self.spconv == 1:
+            self.voxel_generator = VoxelGenerator(
+                voxel_size=self.voxel_size,
+                point_cloud_range=self.lidar_range,
+                max_num_points=self.max_points_per_voxel,
+                max_voxels=self.max_voxels
+            )
+        else:
+            self.voxel_generator = VoxelGenerator(
+                vsize_xyz=self.voxel_size,
+                coors_range_xyz=self.lidar_range,
+                max_num_points_per_voxel=self.max_points_per_voxel,
+                num_point_features=4,
+                max_num_voxels=self.max_voxels
+            )
+
+    def preprocess(self, pcd_np):
+        data_dict = {}
+        if self.spconv == 1:
+            voxel_output = self.voxel_generator.generate(pcd_np)
+        else:
+            pcd_tv = self.tv.from_numpy(pcd_np)
+            voxel_output = self.voxel_generator.point_to_voxel(pcd_tv)
+        if isinstance(voxel_output, dict):
+            voxels, coordinates, num_points = \
+                voxel_output['voxels'], voxel_output['coordinates'], \
+                voxel_output['num_points_per_voxel']
+        else:
+            voxels, coordinates, num_points = voxel_output
+
+        if self.spconv == 2:
+            voxels = voxels.numpy()
+            coordinates = coordinates.numpy()
+            num_points = num_points.numpy()
+
+        data_dict['voxel_features'] = voxels
+        data_dict['voxel_coords'] = coordinates
+        data_dict['voxel_num_points'] = num_points
+
+        return data_dict
+
+    def collate_batch(self, batch):
+        """
+        Customized pytorch data loader collate function.
+
+        Parameters
+        ----------
+        batch : list or dict
+            List or dictionary.
+
+        Returns
+        -------
+        processed_batch : dict
+            Updated lidar batch.
+        """
+
+        if isinstance(batch, list):
+            return self.collate_batch_list(batch)
+        elif isinstance(batch, dict):
+            return self.collate_batch_dict(batch)
+        else:
+            sys.exit('Batch has too be a list or a dictionarn')
+
+    @staticmethod
+    def collate_batch_list(batch):
+        """
+        Customized pytorch data loader collate function.
+
+        Parameters
+        ----------
+        batch : list
+            List of dictionary. Each dictionary represent a single frame.
+
+        Returns
+        -------
+        processed_batch : dict
+            Updated lidar batch.
+        """
+        voxel_features = []
+        voxel_num_points = []
+        voxel_coords = []
+
+        for i in range(len(batch)):
+            voxel_features.append(batch[i]['voxel_features'])
+            voxel_num_points.append(batch[i]['voxel_num_points'])
+            coords = batch[i]['voxel_coords']
+            voxel_coords.append(
+                np.pad(coords, ((0, 0), (1, 0)),
+                       mode='constant', constant_values=i))
+
+        voxel_num_points = torch.from_numpy(np.concatenate(voxel_num_points))
+        voxel_features = torch.from_numpy(np.concatenate(voxel_features))
+        voxel_coords = torch.from_numpy(np.concatenate(voxel_coords))
+
+        return {'voxel_features': voxel_features,
+                'voxel_coords': voxel_coords,
+                'voxel_num_points': voxel_num_points}
+
+    @staticmethod
+    def collate_batch_dict(batch: dict):
+        """
+        Collate batch if the batch is a dictionary,
+        eg: {'voxel_features': [feature1, feature2...., feature n]}
+
+        Parameters
+        ----------
+        batch : dict
+
+        Returns
+        -------
+        processed_batch : dict
+            Updated lidar batch.
+        """
+        voxel_features = \
+            torch.from_numpy(np.concatenate(batch['voxel_features']))
+        voxel_num_points = \
+            torch.from_numpy(np.concatenate(batch['voxel_num_points']))
+        coords = batch['voxel_coords']
+        voxel_coords = []
+
+        for i in range(len(coords)):
+            voxel_coords.append(
+                np.pad(coords[i], ((0, 0), (1, 0)),
+                       mode='constant', constant_values=i))
+        voxel_coords = torch.from_numpy(np.concatenate(voxel_coords))
+
+        return {'voxel_features': voxel_features,
+                'voxel_coords': voxel_coords,
+                'voxel_num_points': voxel_num_points}
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/voxel_preprocessor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/voxel_preprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..2149ce39bb8c72fe4d8287ec5956a4817e111917
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/data_utils/pre_processor/voxel_preprocessor.py
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+"""
+Convert lidar to voxel. This class was manually designed, and we recommend
+to use sp_voxel_preprocessor.
+"""
+import sys
+
+import numpy as np
+import torch
+
+from opencood.data_utils.pre_processor.base_preprocessor import \
+    BasePreprocessor
+
+
+class VoxelPreprocessor(BasePreprocessor):
+    def __init__(self, preprocess_params, train):
+        super(VoxelPreprocessor, self).__init__(preprocess_params, train)
+        # TODO: add intermediate lidar range later
+        self.lidar_range = self.params['cav_lidar_range']
+
+        self.vw = self.params['args']['vw']
+        self.vh = self.params['args']['vh']
+        self.vd = self.params['args']['vd']
+        self.T = self.params['args']['T']
+
+    def preprocess(self, pcd_np):
+        """
+        Preprocess the lidar points by  voxelization.
+
+        Parameters
+        ----------
+        pcd_np : np.ndarray
+            The raw lidar.
+
+        Returns
+        -------
+        data_dict : the structured output dictionary.
+        """
+        data_dict = {}
+
+        # calculate the voxel coordinates
+        voxel_coords = ((pcd_np[:, :3] -
+                         np.floor(np.array([self.lidar_range[0],
+                                            self.lidar_range[1],
+                                            self.lidar_range[2]])) / (
+                             self.vw, self.vh, self.vd))).astype(np.int32)
+
+        # convert to  (D, H, W) as the paper
+        voxel_coords = voxel_coords[:, [2, 1, 0]]
+        voxel_coords, inv_ind, voxel_counts = np.unique(voxel_coords, axis=0,
+                                                        return_inverse=True,
+                                                        return_counts=True)
+
+        voxel_features = []
+
+        for i in range(len(voxel_coords)):
+            voxel = np.zeros((self.T, 7), dtype=np.float32)
+            pts = pcd_np[inv_ind == i]
+            if voxel_counts[i] > self.T:
+                pts = pts[:self.T, :]
+                voxel_counts[i] = self.T
+
+            # augment the points
+            voxel[:pts.shape[0], :] = np.concatenate((pts, pts[:, :3] -
+                                                      np.mean(pts[:, :3], 0)),
+                                                     axis=1)
+            voxel_features.append(voxel)
+
+        data_dict['voxel_features'] = np.array(voxel_features)
+        data_dict['voxel_coords'] = voxel_coords
+
+        return data_dict
+
+    def collate_batch(self, batch):
+        """
+        Customized pytorch data loader collate function.
+
+        Parameters
+        ----------
+        batch : list or dict
+            List or dictionary.
+
+        Returns
+        -------
+        processed_batch : dict
+            Updated lidar batch.
+        """
+
+        if isinstance(batch, list):
+            return self.collate_batch_list(batch)
+        elif isinstance(batch, dict):
+            return self.collate_batch_dict(batch)
+        else:
+            sys.exit('Batch has too be a list or a dictionarn')
+
+    @staticmethod
+    def collate_batch_list(batch):
+        """
+        Customized pytorch data loader collate function.
+
+        Parameters
+        ----------
+        batch : list
+            List of dictionary. Each dictionary represent a single frame.
+
+        Returns
+        -------
+        processed_batch : dict
+            Updated lidar batch.
+        """
+        voxel_features = []
+        voxel_coords = []
+
+        for i in range(len(batch)):
+            voxel_features.append(batch[i]['voxel_features'])
+            coords = batch[i]['voxel_coords']
+            voxel_coords.append(
+                np.pad(coords, ((0, 0), (1, 0)),
+                       mode='constant', constant_values=i))
+
+        voxel_features = torch.from_numpy(np.concatenate(voxel_features))
+        voxel_coords = torch.from_numpy(np.concatenate(voxel_coords))
+
+        return {'voxel_features': voxel_features,
+                'voxel_coords': voxel_coords}
+
+    @staticmethod
+    def collate_batch_dict(batch: dict):
+        """
+        Collate batch if the batch is a dictionary,
+        eg: {'voxel_features': [feature1, feature2...., feature n]}
+
+        Parameters
+        ----------
+        batch : dict
+
+        Returns
+        -------
+        processed_batch : dict
+            Updated lidar batch.
+        """
+        voxel_features = \
+            torch.from_numpy(np.concatenate(batch['voxel_features']))
+        coords = batch['voxel_coords']
+        voxel_coords = []
+
+        for i in range(len(coords)):
+            voxel_coords.append(
+                np.pad(coords[i], ((0, 0), (1, 0)),
+                       mode='constant', constant_values=i))
+        voxel_coords = torch.from_numpy(np.concatenate(voxel_coords))
+
+        return {'voxel_features': voxel_features,
+                'voxel_coords': voxel_coords}
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f7f2a0f9b3496c783b4ac624cf0c3fc2fa294ce1
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__pycache__/__init__.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__pycache__/center_point_loss_multiclass.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__pycache__/center_point_loss_multiclass.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..676be070edf9eef84df7f22f8c639c49d8cba455
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/__pycache__/center_point_loss_multiclass.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/heterception_loss.py.2stage b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/heterception_loss.py.2stage
new file mode 100644
index 0000000000000000000000000000000000000000..e4cec2839134406692d0f5575e1c2bb7c39a8385
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/heterception_loss.py.2stage
@@ -0,0 +1,251 @@
+import torch
+from torch import nn
+import numpy as np
+from opencood.loss.ciassd_loss import CiassdLoss, weighted_smooth_l1_loss
+from icecream import ic 
+
+class HeterceptionLoss(nn.Module):
+    def __init__(self, args):
+        super(HeterceptionLoss, self).__init__()
+        # self.ciassd_loss = CiassdLoss(args['stage1'])
+        self.ciassd_loss = CiassdLoss(args['shared_head_out'], keyname='shared_head_out')
+
+        self.cls = args['stage2']['cls']
+        self.reg = args['stage2']['reg']
+        self.iou = args['stage2']['iou']
+        self.kd = args['stage2']['kd']
+        self.cons = args['stage2']['cons']
+        self.kd_fn = nn.MSELoss(reduce='mean')
+
+        self.loss_dict = {}
+
+    def forward(self, output_dict, label_dict):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        ciassd_loss = self.ciassd_loss(output_dict, label_dict)
+
+        # only update ciassd if no bbox is detected in the first stage
+        if 'stage2_out' not in output_dict:
+            self.loss_dict = {
+                'loss': ciassd_loss,
+            }
+            return ciassd_loss
+
+        loss = 0
+        self.loss_dict = {}
+
+        # rcnn out
+        rcnn_cls = output_dict['stage2_out']['rcnn_cls'].view(1, -1, 1)
+        rcnn_iou = output_dict['stage2_out']['rcnn_iou'].view(1, -1, 1)
+        rcnn_reg = output_dict['stage2_out']['rcnn_reg'].view(1, -1, 7)
+
+        tgt_cls = output_dict['rcnn_label_dict']['cls_tgt'].view(1, -1, 1)
+        tgt_iou = output_dict['rcnn_label_dict']['iou_tgt'].view(1, -1, 1)
+        tgt_reg = output_dict['rcnn_label_dict']['reg_tgt'].view(1, -1, 7)
+
+        pos_norm = tgt_cls.sum()
+        # cls loss
+        loss_cls = weighted_sigmoid_binary_cross_entropy(rcnn_cls, tgt_cls)
+
+        
+        # iou loss
+        # TODO: also count the negative samples
+        tgt_iou = 2 * (tgt_iou - 0.5) # normalize to -1, 1
+        loss_iou = weighted_smooth_l1_loss(rcnn_iou, tgt_iou,
+                                           weights=tgt_cls).mean()
+
+        # regression loss
+        # [deprecated by Yifan Lu] Target resampling : Generate a weights mask to force the regressor concentrate on low iou predictions
+        # sample 50% with iou>0.7 and 50% < 0.7
+        weights = torch.ones(tgt_iou.shape, device=tgt_iou.device)
+        weights[tgt_cls == 0] = 0
+        # neg = torch.logical_and(tgt_iou < 0.7, tgt_cls != 0)
+        # pos = torch.logical_and(tgt_iou >= 0.7, tgt_cls != 0)
+        # num_neg = int(neg.sum(dim=1))
+        # num_pos = int(pos.sum(dim=1))
+        # num_pos_smps = max(num_neg, 2)
+        # pos_indices = torch.where(pos)[1]
+        # not_selsected = torch.randperm(num_pos)[:num_pos - num_pos_smps]
+        # # not_selsected_indices = pos_indices[not_selsected]
+        # weights[:, pos_indices[not_selsected]] = 0
+        loss_reg = weighted_smooth_l1_loss(rcnn_reg, tgt_reg,
+                                           weights=weights / max(weights.sum(),
+                                                                 1)).sum()
+
+        loss_cls_reduced = loss_cls * self.cls['weight']
+        loss_iou_reduced = loss_iou * self.iou['weight']
+        loss_reg_reduced = loss_reg * self.reg['weight']
+
+        # if torch.isnan(loss_reg_reduced):
+        #     print('debug')
+
+        rcnn_loss = loss_cls_reduced + loss_iou_reduced + loss_reg_reduced
+
+        # knowledge distillation
+        if 'kd_items' in output_dict:
+            lidar_features = output_dict['kd_items']["lidar_roi_features"] # [C, sum(bev_grids)]
+            camera_features = output_dict['kd_items']["camera_roi_features"] # [C, sum(bev_grids)]
+            kd_loss_reduced = self.kd_fn(lidar_features, camera_features) * self.kd['weight']
+            loss += kd_loss_reduced
+            self.loss_dict.update({'kd_loss': kd_loss_reduced})
+
+
+        # transformer-based consistency
+        if 'cons_items' in output_dict:
+            random_cav_mask = output_dict['cons_items']['random_cav_mask'] # [sum(RoI_Hi*RoI_Wi), max_cav]
+            valid_mask = random_cav_mask[:, 0] == 0
+            ego_feature = output_dict['cons_items']['fused_roi_feature'][:,0,:][valid_mask] # [sum(RoI_Hi*RoI_Wi), C]
+            cav_feature = output_dict['cons_items']['fused_roi_feature'][random_cav_mask==1][valid_mask]
+            cons_loss_reduced = self.kd_fn(ego_feature, cav_feature) * self.cons['weight']
+            loss += cons_loss_reduced
+            self.loss_dict.update({'cons_loss': cons_loss_reduced})
+
+        loss += rcnn_loss + ciassd_loss
+
+        self.loss_dict.update({
+            'loss': loss,
+            'rcnn_loss': rcnn_loss,
+            'cls_loss': loss_cls_reduced,
+            'iou_loss': loss_iou_reduced,
+            'reg_loss': loss_reg_reduced,
+        })
+
+        return loss
+
+    def logging(self, epoch, batch_id, batch_len, writer=None):
+        """
+        Print out  the loss function for current iteration.
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        ciassd_loss_dict = self.ciassd_loss.loss_dict
+        ciassd_total_loss = ciassd_loss_dict['total_loss']
+        reg_loss = ciassd_loss_dict['reg_loss']
+        cls_loss = ciassd_loss_dict['cls_loss']
+        dir_loss = ciassd_loss_dict['dir_loss']
+        if 'iou_loss' in ciassd_loss_dict:
+            iou_loss = ciassd_loss_dict['iou_loss'].item()
+        else:
+            iou_loss = 0
+
+
+        if (batch_id + 1) % 1 == 0:
+            str_to_print = "[epoch %d][%d/%d], || Loss: %.4f || Ciassd: %.4f " \
+                           "|| Cls1: %.4f || Loc1: %.4f || Dir1: %.4f || Iou1: %.4f" % (
+                               epoch, batch_id + 1, batch_len, self.loss_dict['loss'],
+                               ciassd_total_loss.item(), cls_loss.item(), reg_loss.item(),
+                               dir_loss.item(), iou_loss,
+                               )
+            if 'rcnn_loss' in self.loss_dict:
+                str_to_print += " || Rcnn: %.4f || Cls2: %.4f || Loc2: %.4f || Iou2: %.4f" % (
+                        self.loss_dict['rcnn_loss'],
+                        self.loss_dict['cls_loss'].item(),
+                        self.loss_dict['reg_loss'].item(),
+                        self.loss_dict['iou_loss'].item(),
+                    )
+            if 'kd_loss' in self.loss_dict:
+                str_to_print += " || Heter kd: %.4f " % (
+                        self.loss_dict['kd_loss'].item(),
+                    )
+            if 'cons_loss' in self.loss_dict:
+                str_to_print += " || Heter cons: %.4f " % (
+                        self.loss_dict['cons_loss'].item(),
+                    )
+            if 'sh_loss' in self.loss_dict:
+                str_to_print += " || Shared Head ciassd: %.4f " % (
+                        self.loss_dict['sh_loss'].item(),
+                    )
+            print(str_to_print)
+            
+        if writer:
+            writer.add_scalar('Ciassd_regression_loss', reg_loss.item(),
+                            epoch * batch_len + batch_id)
+            writer.add_scalar('Ciassd_Confidence_loss', cls_loss.item(),
+                            epoch * batch_len + batch_id)
+            writer.add_scalar('Ciassd_Direction_loss', dir_loss.item(),
+                            epoch * batch_len + batch_id)
+            writer.add_scalar('Ciassd_Iou_loss', iou_loss,
+                            epoch * batch_len + batch_id)
+            writer.add_scalar('Ciassd_loss', ciassd_total_loss.item(),
+                            epoch * batch_len + batch_id)
+                            
+            if 'rcnn_loss' in self.loss_dict:
+                writer.add_scalar('Rcnn_regression_loss',
+                                self.loss_dict['reg_loss'].item(),
+                                epoch * batch_len + batch_id)
+                writer.add_scalar('Rcnn_Confidence_loss',
+                                self.loss_dict['cls_loss'].item(),
+                                epoch * batch_len + batch_id)
+                writer.add_scalar('Rcnn_Iou_loss',
+                                self.loss_dict['iou_loss'].item(),
+                                epoch * batch_len + batch_id)
+                writer.add_scalar('Rcnn_loss', self.loss_dict['rcnn_loss'].item(),
+                                epoch * batch_len + batch_id)
+                writer.add_scalar('Total_loss', self.loss_dict['loss'].item(),
+                                epoch * batch_len + batch_id)
+
+            if 'kd_loss' in self.loss_dict:
+                writer.add_scalar('Heter_kd_loss',
+                                self.loss_dict['kd_loss'].item(),
+                                epoch * batch_len + batch_id)
+            if 'cons_loss' in self.loss_dict:
+                writer.add_scalar('Heter_cons_loss',
+                                self.loss_dict['cons_loss'].item(),
+                                epoch * batch_len + batch_id)
+            if 'sh_loss' in self.loss_dict:
+                writer.add_scalar('shared head ciassd_loss',
+                                self.loss_dict['sh_loss'].item(),
+                                epoch * batch_len + batch_id)
+
+
+def weighted_sigmoid_binary_cross_entropy(preds, tgts, weights=None,
+                                          class_indices=None):
+    if weights is not None:
+        weights = weights.unsqueeze(-1)
+    if class_indices is not None:
+        weights *= (
+            indices_to_dense_vector(class_indices, preds.shape[2])
+                .view(1, 1, -1)
+                .type_as(preds)
+        )
+    per_entry_cross_ent = nn.functional.binary_cross_entropy_with_logits(preds,
+                                                                         tgts,
+                                                                         weights)
+    return per_entry_cross_ent
+
+
+def indices_to_dense_vector(
+        indices, size, indices_value=1.0, default_value=0, dtype=np.float32
+):
+    """Creates dense vector with indices set to specific value and rest to zeros.
+    This function exists because it is unclear if it is safe to use
+        tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
+    with indices which are not ordered.
+    This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
+    Args:
+        indices: 1d Tensor with integer indices which are to be set to
+            indices_values.
+        size: scalar with size (integer) of output Tensor.
+        indices_value: values of elements specified by indices in the output vector
+        default_value: values of other elements in the output vector.
+        dtype: data type.
+    Returns:
+        dense 1D Tensor of shape [size] with indices set to indices_values and the
+            rest set to default_value.
+    """
+    dense = torch.zeros(size).fill_(default_value)
+    dense[indices] = indices_value
+
+    return dense
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/heterception_redet_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/heterception_redet_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..b0fd06567cabb2ec8cbcb5591208c649622fa2a0
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/heterception_redet_loss.py
@@ -0,0 +1,171 @@
+import torch
+from torch import nn
+import numpy as np
+from opencood.loss.ciassd_loss import CiassdLoss, weighted_smooth_l1_loss
+from icecream import ic 
+
+class HeterceptionReDetLoss(nn.Module):
+    def __init__(self, args):
+        super(HeterceptionReDetLoss, self).__init__()
+        # self.ciassd_loss = CiassdLoss(args['stage1'])
+        self.ciassd_loss = CiassdLoss(args['shared_head_out'], keyname='shared_head_out')
+        self.redet_loss = CiassdLoss(args['stage2'], keyname='stage2_out')
+
+
+        self.kd = args['stage2']['kd']
+        self.kd_fn = nn.MSELoss(reduce='mean')
+
+        self.loss_dict = {}
+
+    def forward(self, output_dict, label_dict):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        ciassd_loss = self.ciassd_loss(output_dict, label_dict['stage1'])
+
+        # only update ciassd if no bbox is detected in the first stage
+        if 'stage2_out' not in output_dict:
+            self.loss_dict = {
+                'loss': ciassd_loss,
+            }
+            return ciassd_loss
+        
+        output_dict['batch_size'] = len(output_dict['record_len'])
+        output_dict.pop('record_len')
+
+        redet_loss = self.redet_loss(output_dict, label_dict['stage2'])
+        loss = redet_loss + ciassd_loss
+
+        # knowledge distillation
+        if 'kd_items' in output_dict:
+            lidar_features = output_dict['kd_items']["lidar_roi_features"] # [C, sum(bev_grids)]
+            camera_features = output_dict['kd_items']["camera_roi_features"] # [C, sum(bev_grids)]
+            kd_loss_reduced = self.kd_fn(lidar_features, camera_features) * self.kd['weight']
+            loss += kd_loss_reduced
+            self.loss_dict.update({'kd_loss': kd_loss_reduced})
+
+        self.loss_dict.update({
+            'loss': loss,
+            'redet_loss': redet_loss,
+        })
+
+        return loss
+
+    def logging(self, epoch, batch_id, batch_len, writer=None):
+        """
+        Print out  the loss function for current iteration.
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        ciassd_loss_dict = self.ciassd_loss.loss_dict
+        ciassd_total_loss = ciassd_loss_dict['total_loss']
+        reg_loss = ciassd_loss_dict['reg_loss']
+        cls_loss = ciassd_loss_dict['cls_loss']
+        dir_loss = ciassd_loss_dict['dir_loss']
+
+
+        if (batch_id + 1) % 1 == 0:
+            str_to_print = "[epoch %d][%d/%d], || Loss: %.4f || Ciassd: %.4f " \
+                           "|| Cls1: %.4f || Loc1: %.4f || Dir1: %.4f " % (
+                               epoch, batch_id + 1, batch_len, self.loss_dict['loss'],
+                               ciassd_total_loss.item(), cls_loss.item(), reg_loss.item(),
+                               dir_loss.item()
+                               )
+            if 'redet_loss' in self.loss_dict:
+                str_to_print += " || redet_loss: %.4f || Cls2: %.4f || Loc2: %.4f || Dir2: %.4f" % (
+                        self.loss_dict['redet_loss'].item(),
+                        self.redet_loss.loss_dict['cls_loss'].item(),
+                        self.redet_loss.loss_dict['reg_loss'].item(),
+                        self.redet_loss.loss_dict['dir_loss'].item(),
+                    )
+            if 'kd_loss' in self.loss_dict:
+                str_to_print += " || Heter kd: %.4f " % (
+                        self.loss_dict['kd_loss'].item(),
+                    )
+
+
+            print(str_to_print)
+            
+        if writer:
+            writer.add_scalar('Ciassd_regression_loss', reg_loss.item(),
+                            epoch * batch_len + batch_id)
+            writer.add_scalar('Ciassd_Confidence_loss', cls_loss.item(),
+                            epoch * batch_len + batch_id)
+            writer.add_scalar('Ciassd_Direction_loss', dir_loss.item(),
+                            epoch * batch_len + batch_id)
+            writer.add_scalar('Ciassd_loss', ciassd_total_loss.item(),
+                            epoch * batch_len + batch_id)
+                            
+            if 'redet_loss' in self.loss_dict:
+                writer.add_scalar('ReDet_loss',
+                                self.loss_dict['redet_loss'].item(),
+                                epoch * batch_len + batch_id)
+                writer.add_scalar('ReDet_Confidence_loss',
+                                self.redet_loss.loss_dict['cls_loss'].item(),
+                                epoch * batch_len + batch_id)
+                writer.add_scalar('ReDet_regression_loss',
+                                self.redet_loss.loss_dict['reg_loss'].item(),
+                                epoch * batch_len + batch_id)
+                writer.add_scalar('ReDet_direction_loss', 
+                                self.redet_loss.loss_dict['dir_loss'].item(),
+                                epoch * batch_len + batch_id)
+                writer.add_scalar('Total_loss', self.loss_dict['loss'].item(),
+                                epoch * batch_len + batch_id)
+
+            if 'kd_loss' in self.loss_dict:
+                writer.add_scalar('Heter_kd_loss',
+                                self.loss_dict['kd_loss'].item(),
+                                epoch * batch_len + batch_id)
+
+
+
+def weighted_sigmoid_binary_cross_entropy(preds, tgts, weights=None,
+                                          class_indices=None):
+    if weights is not None:
+        weights = weights.unsqueeze(-1)
+    if class_indices is not None:
+        weights *= (
+            indices_to_dense_vector(class_indices, preds.shape[2])
+                .view(1, 1, -1)
+                .type_as(preds)
+        )
+    per_entry_cross_ent = nn.functional.binary_cross_entropy_with_logits(preds,
+                                                                         tgts,
+                                                                         weights)
+    return per_entry_cross_ent
+
+
+def indices_to_dense_vector(
+        indices, size, indices_value=1.0, default_value=0, dtype=np.float32
+):
+    """Creates dense vector with indices set to specific value and rest to zeros.
+    This function exists because it is unclear if it is safe to use
+        tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
+    with indices which are not ordered.
+    This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
+    Args:
+        indices: 1d Tensor with integer indices which are to be set to
+            indices_values.
+        size: scalar with size (integer) of output Tensor.
+        indices_value: values of elements specified by indices in the output vector
+        default_value: values of other elements in the output vector.
+        dtype: data type.
+    Returns:
+        dense 1D Tensor of shape [size] with indices set to indices_values and the
+            rest set to default_value.
+    """
+    dense = torch.zeros(size).fill_(default_value)
+    dense[indices] = indices_value
+
+    return dense
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_dir_depth_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_dir_depth_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f136edfd23c7b77d026194716d5c051471c864f
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_dir_depth_loss.py
@@ -0,0 +1,423 @@
+# -*- coding: utf-8 -*-
+# Author: Yifan Lu
+# Add direction classification loss
+# The originally point_pillar_loss.py, can not determine if the box heading is opposite to the GT.
+# Add depth loss (optional for camera based perception)
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from opencood.utils.common_utils import limit_period
+from icecream import ic
+
+class FocalLoss(nn.Module):
+    r"""Criterion that computes Focal loss.
+
+    According to :cite:`lin2018focal`, the Focal loss is computed as follows:
+
+    .. math::
+
+        \text{FL}(p_t) = -\alpha_t (1 - p_t)^{\gamma} \, \text{log}(p_t)
+
+    Where:
+       - :math:`p_t` is the model's estimated probability for each class.
+
+    Args:
+        alpha: Weighting factor :math:`\alpha \in [0, 1]`.
+        gamma: Focusing parameter :math:`\gamma >= 0`.
+        reduction: Specifies the reduction to apply to the
+          output: ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction
+          will be applied, ``'mean'``: the sum of the output will be divided by
+          the number of elements in the output, ``'sum'``: the output will be
+          summed.
+        eps: Deprecated: scalar to enforce numerical stability. This is no longer
+          used.
+
+    Shape:
+        - Input: :math:`(N, C, *)` where C = number of classes.
+        - Target: :math:`(N, *)` where each value is
+          :math:`0 ≤ targets[i] ≤ C−1`.
+
+    Example:
+        >>> N = 5  # num_classes
+        >>> kwargs = {"alpha": 0.5, "gamma": 2.0, "reduction": 'mean'}
+        >>> criterion = FocalLoss(**kwargs)
+        >>> input = torch.randn(1, N, 3, 5, requires_grad=True)
+        >>> target = torch.empty(1, 3, 5, dtype=torch.long).random_(N)
+        >>> output = criterion(input, target)
+        >>> output.backward()
+    """
+
+    def __init__(self, alpha, gamma = 2.0, reduction= 'none', smooth_target = False , eps = None) -> None:
+        super().__init__()
+        self.alpha = alpha
+        self.gamma = gamma
+        self.reduction = reduction
+        self.smooth_target = smooth_target
+        self.eps = eps
+        if self.smooth_target:
+            self.smooth_kernel = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1, bias=False)
+            self.smooth_kernel.weight = torch.nn.Parameter(torch.tensor([[[0.2, 0.9, 0.2]]]), requires_grad=False)
+            self.smooth_kernel = self.smooth_kernel.to(torch.device("cuda"))
+
+    def forward(self, input, target):
+        n = input.shape[0]
+        out_size = (n,) + input.shape[2:]
+
+        # compute softmax over the classes axis
+        input_soft = input.softmax(1)
+        log_input_soft = input.log_softmax(1)
+
+        # create the labels one hot tensor
+        D = input.shape[1]
+        if self.smooth_target:
+            target_one_hot = F.one_hot(target, num_classes=D).to(input).view(-1, D) # [N*H*W, D]
+            target_one_hot = self.smooth_kernel(target_one_hot.float().unsqueeze(1)).squeeze(1) # [N*H*W, D]
+            target_one_hot = target_one_hot.view(*target.shape, D).permute(0, 3, 1, 2)
+        else:
+            target_one_hot = F.one_hot(target, num_classes=D).to(input).permute(0, 3, 1, 2)
+        # compute the actual focal loss
+        weight = torch.pow(-input_soft + 1.0, self.gamma)
+
+        focal = -self.alpha * weight * log_input_soft
+        loss_tmp = torch.einsum('bc...,bc...->b...', (target_one_hot, focal))
+
+        if self.reduction == 'none':
+            loss = loss_tmp
+        elif self.reduction == 'mean':
+            loss = torch.mean(loss_tmp)
+        elif self.reduction == 'sum':
+            loss = torch.sum(loss_tmp)
+        else:
+            raise NotImplementedError(f"Invalid reduction mode: {self.reduction}")
+        return loss
+
+class WeightedSmoothL1Loss(nn.Module):
+    """
+    Code-wise Weighted Smooth L1 Loss modified based on fvcore.nn.smooth_l1_loss
+    https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py
+                  | 0.5 * x ** 2 / beta   if abs(x) < beta
+    smoothl1(x) = |
+                  | abs(x) - 0.5 * beta   otherwise,
+    where x = input - target.
+    """
+    def __init__(self, beta: float = 1.0 / 9.0, code_weights: list = None):
+        """
+        Args:
+            beta: Scalar float.
+                L1 to L2 change point.
+                For beta values < 1e-5, L1 loss is computed.
+            code_weights: (#codes) float list if not None.
+                Code-wise weights.
+        """
+        super(WeightedSmoothL1Loss, self).__init__()
+        self.beta = beta
+        if code_weights is not None:
+            self.code_weights = np.array(code_weights, dtype=np.float32)
+            self.code_weights = torch.from_numpy(self.code_weights).cuda()
+
+    @staticmethod
+    def smooth_l1_loss(diff, beta):
+        if beta < 1e-5:
+            loss = torch.abs(diff)
+        else:
+            n = torch.abs(diff)
+            loss = torch.where(n < beta, 0.5 * n ** 2 / beta, n - 0.5 * beta)
+
+        return loss
+
+    def forward(self, input: torch.Tensor,
+                target: torch.Tensor, weights: torch.Tensor = None):
+        """
+        Args:
+            input: (B, #anchors, #codes) float tensor.
+                Ecoded predicted locations of objects.
+            target: (B, #anchors, #codes) float tensor.
+                Regression targets.
+            weights: (B, #anchors) float tensor if not None.
+
+            #anchors = H * W * anchor_num
+
+        Returns:
+            loss: (B, #anchors) float tensor.
+                Weighted smooth l1 loss without reduction.
+        """
+        target = torch.where(torch.isnan(target), input, target)  # ignore nan targets
+
+        diff = input - target
+        loss = self.smooth_l1_loss(diff, self.beta)
+
+        # anchor-wise weighting
+        if weights is not None:
+            assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1]
+            loss = loss * weights.unsqueeze(-1)
+
+        return loss
+
+
+
+class PointPillarDirDepthLoss(nn.Module):
+    def __init__(self, args):
+        super(PointPillarDirDepthLoss, self).__init__()
+        self.reg_loss_func = WeightedSmoothL1Loss()
+        self.alpha = 0.25
+        self.gamma = 2.0
+
+        self.cls_weight = args['cls_weight']
+        self.reg_coe = args['reg']
+
+        self.dir_weight = args['dir_args']['dir_weight']
+        self.dir_offset = args['dir_args']['args']['dir_offset']
+        self.num_bins = args['dir_args']['args']['num_bins']
+        anchor_yaw = np.deg2rad(np.array(args['dir_args']['anchor_yaw']))  # for direction classification
+        self.anchor_yaw_map = torch.from_numpy(anchor_yaw).view(1,-1,1)  # [1,2,1]
+        self.anchor_num = self.anchor_yaw_map.shape[1]
+
+        self.depth_weight = args['depth_weight']
+        self.smooth_target = True if 'smooth_target' in args and args['smooth_target'] else False
+        self.use_fg_mask = True if 'use_fg_mask' in args and args['use_fg_mask'] else False
+        self.fg_weight = 3.25
+        self.bg_weight = 0.25
+        if self.smooth_target:
+            self.depth_loss_func = FocalLoss(alpha=self.alpha, gamma=self.gamma, reduction="none", smooth_target=True)
+        else:
+            self.depth_loss_func = FocalLoss(alpha=self.alpha, gamma=self.gamma, reduction="none")
+
+        self.loss_dict = {}
+
+    def forward(self, output_dict, target_dict, suffix=""):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        self.loss_dict = {}
+        rm = output_dict[f'rm{suffix}']  # [B, 14, 50, 176]
+        psm = output_dict[f'psm{suffix}'] # [B, 2, 50, 176]
+        targets = target_dict['targets']
+
+        cls_preds = psm.permute(0, 2, 3, 1).contiguous() # N, C, H, W -> N, H, W, C
+
+        box_cls_labels = target_dict['pos_equal_one']  # [B, 50, 176, 2]
+        """
+        Visualize
+        """
+        box_cls_labels = box_cls_labels.view(psm.shape[0], -1).contiguous()  # [B, 50*176*2]
+
+        positives = box_cls_labels > 0
+        negatives = box_cls_labels == 0
+        negative_cls_weights = negatives * 1.0
+        cls_weights = (negative_cls_weights + 1.0 * positives).float()
+        reg_weights = positives.float()
+
+        pos_normalizer = positives.sum(1, keepdim=True).float()
+        reg_weights /= torch.clamp(pos_normalizer, min=1.0) # [N, H*W*anchor_num]
+        cls_weights /= torch.clamp(pos_normalizer, min=1.0)
+        cls_targets = box_cls_labels
+        cls_targets = cls_targets.unsqueeze(dim=-1) 
+
+        cls_targets = cls_targets.squeeze(dim=-1)
+        one_hot_targets = torch.zeros(
+            *list(cls_targets.shape), 2,
+            dtype=cls_preds.dtype, device=cls_targets.device
+        ) # [B, H*W*C, 2], C=#anchor=2
+        one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0)
+        cls_preds = cls_preds.view(psm.shape[0], -1, 1) # [B, H*W*C, 1], C=#anchor=2
+        one_hot_targets = one_hot_targets[..., 1:]
+        
+        cls_loss_src = self.cls_loss_func(cls_preds,
+                                          one_hot_targets,
+                                          weights=cls_weights)  # [N, M]
+        cls_loss = cls_loss_src.sum() / psm.shape[0]
+        conf_loss = cls_loss * self.cls_weight
+
+        # regression
+        rm = rm.permute(0, 2, 3, 1).contiguous()
+        rm = rm.view(rm.size(0), -1, 7)
+        targets = targets.view(targets.size(0), -1, 7)
+        box_preds_sin, reg_targets_sin = self.add_sin_difference(rm,
+                                                                 targets)
+        loc_loss_src =\
+            self.reg_loss_func(box_preds_sin,
+                               reg_targets_sin,
+                               weights=reg_weights)
+        reg_loss = loc_loss_src.sum() / rm.shape[0]
+        reg_loss *= self.reg_coe
+
+        ######## direction ##########
+        dir_targets = self.get_direction_target(targets)
+        N =  output_dict[f"dm{suffix}"].shape[0]
+        dir_logits = output_dict[f"dm{suffix}"].permute(0, 2, 3, 1).contiguous().view(N, -1, 2) # [N, H*W*#anchor, 2]
+
+
+        dir_loss = softmax_cross_entropy_with_logits(dir_logits.view(-1, self.anchor_num), dir_targets.view(-1, self.anchor_num)) 
+        dir_loss = dir_loss.view(dir_logits.shape[:2]) * reg_weights # [N, H*W*anchor_num]
+        dir_loss = dir_loss.sum() * self.dir_weight / N
+
+        total_loss = reg_loss + conf_loss + dir_loss
+
+        ########## depth #############
+        if f"depth_items{suffix}" in output_dict and output_dict[f'depth_items{suffix}'] is not None:
+            # depth logdit: [N, D, H, W]
+            # depth gt indices: [N, H, W]
+            # fg_mask: [N, H, W]
+            depth_logit, depth_gt_indices = output_dict[f'depth_items{suffix}'][0], output_dict[f'depth_items{suffix}'][1]
+            depth_loss = self.depth_loss_func(depth_logit, depth_gt_indices) 
+            if self.use_fg_mask:
+                fg_mask = output_dict[f'depth_items{suffix}'][-1]
+                weight_mask = (fg_mask > 0) * self.fg_weight + (fg_mask == 0) * self.bg_weight
+                depth_loss *= weight_mask
+
+            depth_loss = depth_loss.mean() * self.depth_weight 
+
+            total_loss += depth_loss
+            self.loss_dict.update({'depth_loss': depth_loss})
+
+        self.loss_dict.update({'total_loss': total_loss,
+                               'reg_loss': reg_loss,
+                               'conf_loss': conf_loss,
+                               'dir_loss': dir_loss})
+
+        return total_loss
+
+    def cls_loss_func(self, input: torch.Tensor,
+                      target: torch.Tensor,
+                      weights: torch.Tensor):
+        """
+        Args:
+            input: (B, #anchors, #classes) float tensor.
+                Predicted logits for each class
+            target: (B, #anchors, #classes) float tensor.
+                One-hot encoded classification targets
+            weights: (B, #anchors) float tensor.
+                Anchor-wise weights.
+
+        Returns:
+            weighted_loss: (B, #anchors, #classes) float tensor after weighting.
+        """
+        pred_sigmoid = torch.sigmoid(input)
+        alpha_weight = target * self.alpha + (1 - target) * (1 - self.alpha)
+        pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid
+        focal_weight = alpha_weight * torch.pow(pt, self.gamma)
+
+        bce_loss = self.sigmoid_cross_entropy_with_logits(input, target)
+
+        loss = focal_weight * bce_loss
+
+        if weights.shape.__len__() == 2 or \
+                (weights.shape.__len__() == 1 and target.shape.__len__() == 2):
+            weights = weights.unsqueeze(-1)
+
+        assert weights.shape.__len__() == loss.shape.__len__()
+
+        return loss * weights
+
+    @staticmethod
+    def sigmoid_cross_entropy_with_logits(input: torch.Tensor, target: torch.Tensor):
+        """ PyTorch Implementation for tf.nn.sigmoid_cross_entropy_with_logits:
+            max(x, 0) - x * z + log(1 + exp(-abs(x))) in
+            https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
+
+        Args:
+            input: (B, #anchors, #classes) float tensor.
+                Predicted logits for each class
+            target: (B, #anchors, #classes) float tensor.
+                One-hot encoded classification targets
+
+        Returns:
+            loss: (B, #anchors, #classes) float tensor.
+                Sigmoid cross entropy loss without reduction
+        """
+        loss = torch.clamp(input, min=0) - input * target + \
+               torch.log1p(torch.exp(-torch.abs(input)))
+        return loss
+
+    @staticmethod
+    def add_sin_difference(boxes1, boxes2, dim=6):
+        assert dim != -1
+        rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \
+                            torch.cos(boxes2[..., dim:dim + 1])
+        rad_tg_encoding = torch.cos(boxes1[..., dim:dim + 1]) * \
+                          torch.sin(boxes2[..., dim:dim + 1])
+
+        boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding,
+                            boxes1[..., dim + 1:]], dim=-1)
+        boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding,
+                            boxes2[..., dim + 1:]], dim=-1)
+        return boxes1, boxes2
+
+    def get_direction_target(self, reg_targets):
+        """
+        Args:
+            reg_targets:  [N, H * W * #anchor_num, 7]
+                The last term is (theta_gt - theta_a)
+        
+        Returns:
+            dir_targets:
+                theta_gt: [N, H * W * #anchor_num, NUM_BIN] 
+                NUM_BIN = 2
+        """
+        # (1, 2, 1)
+        H_times_W_times_anchor_num = reg_targets.shape[1]
+        anchor_map = self.anchor_yaw_map.repeat(1, H_times_W_times_anchor_num//self.anchor_num, 1).to(reg_targets.device) # [1, H * W * #anchor_num, 1]
+        rot_gt = reg_targets[..., -1] + anchor_map[..., -1] # [N, H*W*anchornum]
+        offset_rot = limit_period(rot_gt - self.dir_offset, 0, 2 * np.pi)
+        dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / self.num_bins)).long()  # [N, H*W*anchornum]
+        dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=self.num_bins - 1)
+        # one_hot:
+        # if rot_gt > 0, then the label is 1, then the regression target is [0, 1]
+        dir_cls_targets = one_hot_f(dir_cls_targets, self.num_bins)
+        return dir_cls_targets
+
+
+
+    def logging(self, epoch, batch_id, batch_len, writer = None, suffix=""):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict['total_loss']
+        reg_loss = self.loss_dict['reg_loss']
+        conf_loss = self.loss_dict['conf_loss']
+        dir_loss = self.loss_dict['dir_loss']
+        depth_loss = 0 if 'depth_loss' not in self.loss_dict else self.loss_dict['depth_loss'].item()
+
+        print("[epoch %d][%d/%d]%s || Loss: %.4f || Conf Loss: %.4f"
+              " || Loc Loss: %.4f || Dir Loss: %.4f || depth loss %.4f" % (
+                  epoch, batch_id + 1, batch_len, suffix,
+                  total_loss.item(), conf_loss.item(), reg_loss.item(), dir_loss.item(), depth_loss))
+
+        if not writer is None:
+            writer.add_scalar('Regression_loss'+suffix, reg_loss.item(),
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Confidence_loss'+suffix, conf_loss.item(),
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Dir_loss'+suffix, dir_loss.item(),
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('depth_loss'+suffix, depth_loss,
+                            epoch*batch_len + batch_id)
+
+def one_hot_f(tensor, num_bins, dim=-1, on_value=1.0, dtype=torch.float32):
+    tensor_onehot = torch.zeros(*list(tensor.shape), num_bins, dtype=dtype, device=tensor.device) 
+    tensor_onehot.scatter_(dim, tensor.unsqueeze(dim).long(), on_value)                    
+    return tensor_onehot
+
+def softmax_cross_entropy_with_logits(logits, labels):
+    param = list(range(len(logits.shape)))
+    transpose_param = [0] + [param[-1]] + param[1:-1]
+    logits = logits.permute(*transpose_param)
+    loss_ftor = torch.nn.CrossEntropyLoss(reduction="none")
+    loss = loss_ftor(logits, labels.max(dim=-1)[1])
+    return loss
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_mash_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_mash_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed2ec6fdc872b1d35153f14964571b947d3db1c2
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_mash_loss.py
@@ -0,0 +1,298 @@
+# -*- coding: utf-8 -*-
+# Author: OpenPCDet, Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+
+class WeightedSmoothL1Loss(nn.Module):
+    """
+    Code-wise Weighted Smooth L1 Loss modified based on fvcore.nn.smooth_l1_loss
+    https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py
+                  | 0.5 * x ** 2 / beta   if abs(x) < beta
+    smoothl1(x) = |
+                  | abs(x) - 0.5 * beta   otherwise,
+    where x = input - target.
+    """
+    def __init__(self, beta: float = 1.0 / 9.0, code_weights: list = None):
+        """
+        Args:
+            beta: Scalar float.
+                L1 to L2 change point.
+                For beta values < 1e-5, L1 loss is computed.
+            code_weights: (#codes) float list if not None.
+                Code-wise weights.
+        """
+        super(WeightedSmoothL1Loss, self).__init__()
+        self.beta = beta
+        if code_weights is not None:
+            self.code_weights = np.array(code_weights, dtype=np.float32)
+            self.code_weights = torch.from_numpy(self.code_weights).cuda()
+
+    @staticmethod
+    def smooth_l1_loss(diff, beta):
+        if beta < 1e-5:
+            loss = torch.abs(diff)
+        else:
+            n = torch.abs(diff)
+            loss = torch.where(n < beta, 0.5 * n ** 2 / beta, n - 0.5 * beta)
+
+        return loss
+
+    def forward(self, input: torch.Tensor,
+                target: torch.Tensor, weights: torch.Tensor = None):
+        """
+        Args:
+            input: (B, #anchors, #codes) float tensor.
+                Ecoded predicted locations of objects.
+            target: (B, #anchors, #codes) float tensor.
+                Regression targets.
+            weights: (B, #anchors) float tensor if not None.
+
+        Returns:
+            loss: (B, #anchors) float tensor.
+                Weighted smooth l1 loss without reduction.
+        """
+        target = torch.where(torch.isnan(target), input, target)  # ignore nan targets
+
+        diff = input - target
+        loss = self.smooth_l1_loss(diff, self.beta)
+
+        # anchor-wise weighting
+        if weights is not None:
+            assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1]
+            loss = loss * weights.unsqueeze(-1)
+
+        return loss
+
+
+class PointPillarMashLoss(nn.Module):
+    def __init__(self, args):
+        super(PointPillarMashLoss, self).__init__()
+        self.reg_loss_func = WeightedSmoothL1Loss()
+        self.grid_loss_func = nn.CrossEntropyLoss()
+        self.alpha = 0.25
+        self.gamma = 2.0
+
+        self.cls_weight = args['cls_weight']
+        self.grid_weight = args['grid_weight']
+        self.reg_coe = args['reg']
+        self.H = args['H']
+        self.W = args['W']
+        self.downsample_rate = args['downsample_rate']
+        self.discrete_ratio = args['voxel_size'][0]
+
+        self.loss_dict = {}
+
+    def forward(self, output_dict, target_dict):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        rm = output_dict['rm']  # [B, 14, 50, 176]
+        psm = output_dict['psm'] # [B, 2, 50, 176]
+        targets = target_dict['targets']
+
+        cls_preds = psm.permute(0, 2, 3, 1).contiguous() # N, C, H, W -> N, H, W, C
+
+        box_cls_labels = target_dict['pos_equal_one']  # [B, 50, 176, 2]
+        box_cls_labels = box_cls_labels.view(psm.shape[0], -1).contiguous()
+
+        positives = box_cls_labels > 0
+        negatives = box_cls_labels == 0
+        negative_cls_weights = negatives * 1.0
+        cls_weights = (negative_cls_weights + 1.0 * positives).float()
+        reg_weights = positives.float()
+
+        pos_normalizer = positives.sum(1, keepdim=True).float()
+        reg_weights /= torch.clamp(pos_normalizer, min=1.0)
+        cls_weights /= torch.clamp(pos_normalizer, min=1.0)
+        cls_targets = box_cls_labels
+        cls_targets = cls_targets.unsqueeze(dim=-1)
+
+        cls_targets = cls_targets.squeeze(dim=-1)
+        one_hot_targets = torch.zeros(
+            *list(cls_targets.shape), 2,
+            dtype=cls_preds.dtype, device=cls_targets.device
+        )
+        one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0)
+        cls_preds = cls_preds.view(psm.shape[0], -1, 1)
+        one_hot_targets = one_hot_targets[..., 1:]
+
+        cls_loss_src = self.cls_loss_func(cls_preds,
+                                          one_hot_targets,
+                                          weights=cls_weights)  # [N, M]
+        cls_loss = cls_loss_src.sum() / psm.shape[0]
+        conf_loss = cls_loss * self.cls_weight
+
+        # regression
+        rm = rm.permute(0, 2, 3, 1).contiguous()
+        rm = rm.view(rm.size(0), -1, 7)
+        targets = targets.view(targets.size(0), -1, 7)
+        box_preds_sin, reg_targets_sin = self.add_sin_difference(rm,
+                                                                 targets)
+        loc_loss_src =\
+            self.reg_loss_func(box_preds_sin,
+                               reg_targets_sin,
+                               weights=reg_weights)
+        reg_loss = loc_loss_src.sum() / rm.shape[0]
+        reg_loss *= self.reg_coe
+
+        total_loss = reg_loss + conf_loss
+        self.loss_dict.update({'total_loss': total_loss.item(),
+                               'reg_loss': reg_loss.item(),
+                               'conf_loss': conf_loss.item()})
+
+        corr_vol = output_dict['corr_vol']
+        # grid loss
+        if corr_vol is not None:
+            pairwise_t_matrix = target_dict['pairwise_t_matrix']
+            record_len = target_dict['record_len']
+
+            pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+            pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * self.H / self.W
+            pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * self.W / self.H
+            pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * self.W) * 2
+            pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * self.H) * 2
+            
+            B = len(record_len)
+            t_matrix_list = []
+            for b in range(B):
+                if(record_len[b] == 1):
+                    continue
+                t_matrix_list.append(pairwise_t_matrix[b, 0, 1:record_len[b]]) # [N-1, 2, 3]
+
+            t_matrix = torch.cat(t_matrix_list, 0) 
+            N_ = t_matrix.shape[0]  # N_ is sum_i{Ni-1}
+            
+            grid_gt = F.affine_grid(t_matrix, (N_, 1, self.H, self.W)) # (N_, H, W, 2)
+            X = grid_gt[...,0]  # (N_, H, W)
+            Y = grid_gt[...,1]  # (N_, H, W)
+            X_idx = ((X / 2 + 0.5) * self.W).to(torch.long)  # (N_, H, W)
+            Y_idx = ((Y / 2 + 0.5) * self.H).to(torch.long)  # (N_, H, W)
+
+            idx = (Y_idx * self.W + X_idx)  # (N_, H, W)
+
+            # out of boundary
+            mask = torch.gt(grid_gt, -1) * torch.lt(grid_gt, 1)  # (N_, H, W,2)
+            mask = mask[...,0] * mask[...,1]  # (N_, H, W)
+            mask = ~ mask 
+            idx[mask] = self.H * self.W # No matching, set to empty, last dimension
+            corr_vol_gt = idx
+
+            grid_loss = self.grid_loss_func(corr_vol, corr_vol_gt)
+            grid_loss *= self.grid_weight
+
+            total_loss += grid_loss
+            self.loss_dict.update({"total_loss": total_loss.item(),
+                                    "grid_loss": grid_loss.item()})
+
+        
+
+        return total_loss
+
+    def cls_loss_func(self, input: torch.Tensor,
+                      target: torch.Tensor,
+                      weights: torch.Tensor):
+        """
+        Args:
+            input: (B, #anchors, #classes) float tensor.
+                Predicted logits for each class
+            target: (B, #anchors, #classes) float tensor.
+                One-hot encoded classification targets
+            weights: (B, #anchors) float tensor.
+                Anchor-wise weights.
+
+        Returns:
+            weighted_loss: (B, #anchors, #classes) float tensor after weighting.
+        """
+        pred_sigmoid = torch.sigmoid(input)
+        alpha_weight = target * self.alpha + (1 - target) * (1 - self.alpha)
+        pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid
+        focal_weight = alpha_weight * torch.pow(pt, self.gamma)
+
+        bce_loss = self.sigmoid_cross_entropy_with_logits(input, target)
+
+        loss = focal_weight * bce_loss
+
+        if weights.shape.__len__() == 2 or \
+                (weights.shape.__len__() == 1 and target.shape.__len__() == 2):
+            weights = weights.unsqueeze(-1)
+
+        assert weights.shape.__len__() == loss.shape.__len__()
+
+        return loss * weights
+
+    @staticmethod
+    def sigmoid_cross_entropy_with_logits(input: torch.Tensor, target: torch.Tensor):
+        """ PyTorch Implementation for tf.nn.sigmoid_cross_entropy_with_logits:
+            max(x, 0) - x * z + log(1 + exp(-abs(x))) in
+            https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
+
+        Args:
+            input: (B, #anchors, #classes) float tensor.
+                Predicted logits for each class
+            target: (B, #anchors, #classes) float tensor.
+                One-hot encoded classification targets
+
+        Returns:
+            loss: (B, #anchors, #classes) float tensor.
+                Sigmoid cross entropy loss without reduction
+        """
+        loss = torch.clamp(input, min=0) - input * target + \
+               torch.log1p(torch.exp(-torch.abs(input)))
+        return loss
+
+    @staticmethod
+    def add_sin_difference(boxes1, boxes2, dim=6):
+        assert dim != -1
+        rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \
+                            torch.cos(boxes2[..., dim:dim + 1])
+        rad_tg_encoding = torch.cos(boxes1[..., dim:dim + 1]) * \
+                          torch.sin(boxes2[..., dim:dim + 1])
+
+        boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding,
+                            boxes1[..., dim + 1:]], dim=-1)
+        boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding,
+                            boxes2[..., dim + 1:]], dim=-1)
+        return boxes1, boxes2
+
+
+    def logging(self, epoch, batch_id, batch_len, writer = None):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict['total_loss']
+        reg_loss = self.loss_dict['reg_loss']
+        conf_loss = self.loss_dict['conf_loss']
+        grid_loss = self.loss_dict['grid_loss']
+
+        print("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f"
+              " || Loc Loss: %.4f || Grid Loss: %.4f" % (
+                  epoch, batch_id + 1, batch_len,
+                  total_loss, conf_loss, reg_loss, grid_loss))
+
+        if not writer is None:
+            writer.add_scalar('Regression_loss', reg_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Confidence_loss', conf_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Grid_loss', grid_loss,
+                            epoch*batch_len + batch_id)
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_uncertainty_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_uncertainty_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..d04c9da93edc949e70b6baeefa7c73d8ad33773c
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_uncertainty_loss.py
@@ -0,0 +1,485 @@
+# -*- coding: utf-8 -*-
+# Author: OpenPCDet, Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+from tabnanny import verbose
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import d3d.mathh as mathh
+from opencood.utils.common_utils import limit_period
+from functools import partial
+
+class WeightedSmoothL1Loss(nn.Module):
+    """
+    Code-wise Weighted Smooth L1 Loss modified based on fvcore.nn.smooth_l1_loss
+    https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py
+                  | 0.5 * x ** 2 / beta   if abs(x) < beta
+    smoothl1(x) = |
+                  | abs(x) - 0.5 * beta   otherwise,
+    where x = input - target.
+    """
+    def __init__(self, beta: float = 1.0 / 9.0, code_weights: list = None):
+        """
+        Args:
+            beta: Scalar float.
+                L1 to L2 change point.
+                For beta values < 1e-5, L1 loss is computed.
+            code_weights: (#codes) float list if not None.
+                Code-wise weights.
+        """
+        super(WeightedSmoothL1Loss, self).__init__()
+        self.beta = beta
+        if code_weights is not None:
+            self.code_weights = np.array(code_weights, dtype=np.float32)
+            self.code_weights = torch.from_numpy(self.code_weights).cuda()
+
+    @staticmethod
+    def smooth_l1_loss(diff, beta):
+        if beta < 1e-5:
+            loss = torch.abs(diff)
+        else:
+            n = torch.abs(diff)
+            loss = torch.where(n < beta, 0.5 * n ** 2 / beta, n - 0.5 * beta)
+
+        return loss
+
+    def forward(self, input: torch.Tensor,
+                target: torch.Tensor, weights: torch.Tensor = None):
+        """
+        Args:
+            input: (B, #anchors, #codes) float tensor.
+                Ecoded predicted locations of objects.
+            target: (B, #anchors, #codes) float tensor.
+                Regression targets.
+            weights: (B, #anchors) float tensor if not None.
+
+        Returns:
+            loss: (B, #anchors) float tensor.
+                Weighted smooth l1 loss without reduction.
+        """
+        target = torch.where(torch.isnan(target), input, target)  # ignore nan targets
+
+        diff = input - target
+        loss = self.smooth_l1_loss(diff, self.beta)
+
+        # anchor-wise weighting
+        if weights is not None:
+            assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1]
+            loss = loss * weights.unsqueeze(-1)
+
+        return loss
+
+
+
+
+class KLLoss(nn.Module):
+    def __init__(self, args):
+        super(KLLoss, self).__init__()
+
+        self.angle_weight = args['angle_weight']
+        self.uncertainty_dim = args['uncertainty_dim']
+        if args['xy_loss_type'] == "l2":
+            self.xy_loss = self.kl_loss_l2
+        elif args['xy_loss_type'] == "l1":
+            self.xy_loss = self.kl_loss_l1
+        else:
+            raise "not implemented"
+
+        if args['angle_loss_type'] == "l2":
+            self.angle_loss = self.kl_loss_l2
+        elif args['angle_loss_type'] == "von":
+            lambda_V = args['lambda_V']
+            s0 = args['s0']
+            limit_period = args['limit_period']
+            self.angle_loss = partial(self.kl_loss_angular, lambda_V=lambda_V, s0=s0, limit_period=limit_period)
+        else:
+            raise "not implemented"
+
+
+
+
+    @staticmethod
+    def kl_loss_l2(diff, s):
+        """
+        Args:
+            diff: [B, 2]
+            s:    [B, 2]
+        Returns:
+            loss: [B, 2]
+        """
+        loss = 0.5*(torch.exp(-s) * (diff**2) + s)
+        return loss
+    
+    @staticmethod
+    def kl_loss_l1(diff, s):
+        """
+        Args:
+            diff: [B, 2]
+            s:    [B, 2]
+        Returns:
+            loss: [B, 2]
+        """
+        loss = 0.5*torch.exp(-s) * torch.abs(diff) + s
+        return loss
+    
+    @staticmethod
+    def kl_loss_angular(diff, s, lambda_V=1, s0=1, limit_period=False):
+        """
+        Args:
+            diff: [B, 1]
+            s:    [B, 1]
+            if limit_period, 
+                diff + 180 ~ diff. 
+        Returns:
+            loss: [B, 1]
+        """
+        exp_minus_s = torch.exp(-s)
+        if limit_period:
+            cos_abs = torch.abs(torch.cos(diff))
+            loss = loss = torch.log(mathh.i0e_cuda(exp_minus_s)*torch.exp(exp_minus_s)) - exp_minus_s * cos_abs.detach() + lambda_V * F.elu(s-s0)
+        else:
+            loss = torch.log(mathh.i0e_cuda(exp_minus_s)*torch.exp(exp_minus_s)) - exp_minus_s * torch.cos(diff) + lambda_V * F.elu(s-s0)
+
+        return loss
+
+    
+    def forward(self, input: torch.Tensor,
+                      target: torch.Tensor, 
+                      sm: torch.Tensor, 
+                      weights: torch.Tensor = None):
+        target = torch.where(torch.isnan(target), input, target)  # ignore nan targets
+        
+        
+
+        if self.uncertainty_dim == 3:
+            xy_diff = input[...,:2] - target[...,:2]
+            loss1 = self.xy_loss(xy_diff, sm[...,:2])
+            
+            theta_diff = input[...,7:8] - target[...,7:8]
+
+            loss2 = self.angle_weight * self.angle_loss(theta_diff, sm[...,2:3])
+
+            loss = torch.cat((loss1, loss2), dim=-1)
+            
+        elif self.uncertainty_dim == 7:
+            ## is this right?
+            other_diff = input[...,:6] - target[...,:6]
+            theta_diff = input[...,7:8] - target[...,7:8]
+
+            diff = torch.cat((other_diff, theta_diff), dim=-1)
+            loss = self.xy_loss(diff, sm)
+
+        elif self.uncertainty_dim == 2:
+            xy_diff = input[...,:2] - target[...,:2]
+            loss = self.xy_loss(xy_diff, sm[...,:2])
+        else:
+            raise "not implemented"
+
+        # anchor-wise weighting
+        if weights is not None:
+            assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1]
+            loss = loss * weights.unsqueeze(-1)
+        
+        return loss
+
+
+
+class PointPillarUncertaintyLoss(nn.Module):
+    def __init__(self, args):
+        super(PointPillarUncertaintyLoss, self).__init__()
+        self.reg_loss_func = WeightedSmoothL1Loss()
+        self.alpha = 0.25
+        self.gamma = 2.0
+
+        self.cls_weight = args['cls_weight']
+        self.kl_weight = args['kl_weight']
+        self.reg_coe = args['reg']
+        self.uncertainty_dim = args['kl_args']['uncertainty_dim']
+
+        if 'dir_args' in args.keys():
+            self.use_dir = True
+            self.dir_weight = args['dir_args']['dir_weight']
+            self.dir_offset = args['dir_args']['args']['dir_offset']
+            self.num_bins = args['dir_args']['args']['num_bins']
+            anchor_yaw = np.deg2rad(np.array(args['dir_args']['anchor_yaw']))  # for direction classification
+            self.anchor_yaw_map = torch.from_numpy(anchor_yaw).view(1,-1,1)  # [1,2,1]
+            self.anchor_num = self.anchor_yaw_map.shape[1]
+
+        else:
+            self.use_dir =False
+
+
+        self.kl_loss_func = KLLoss(args['kl_args'])
+
+        self.loss_dict = {}
+
+    def forward(self, output_dict, target_dict):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        rm = output_dict['rm']  # [B, 14, 50, 176]
+        psm = output_dict['psm'] # [B, 2, 50, 176]
+        sm = output_dict['sm']  # log of sigma^2 / scale [B, 6, 50 176]
+        targets = target_dict['targets']
+
+        cls_preds = psm.permute(0, 2, 3, 1).contiguous() # N, C, H, W -> N, H, W, C
+
+        box_cls_labels = target_dict['pos_equal_one']  # [B, 50, 176, 2] 
+        box_cls_labels = box_cls_labels.view(psm.shape[0], -1).contiguous() # -> [B, 50*176*2], two types of anchor
+
+        positives = box_cls_labels > 0
+        negatives = box_cls_labels == 0
+        negative_cls_weights = negatives * 1.0
+        cls_weights = (negative_cls_weights + 1.0 * positives).float() # all 1
+        reg_weights = positives.float()
+
+        pos_normalizer = positives.sum(1, keepdim=True).float() # positive number per sample
+        reg_weights /= torch.clamp(pos_normalizer, min=1.0)
+        cls_weights /= torch.clamp(pos_normalizer, min=1.0)
+        cls_targets = box_cls_labels
+        cls_targets = cls_targets.unsqueeze(dim=-1)
+
+        cls_targets = cls_targets.squeeze(dim=-1)
+        one_hot_targets = torch.zeros(
+            *list(cls_targets.shape), 2,
+            dtype=cls_preds.dtype, device=cls_targets.device
+        )
+        one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0)
+        cls_preds = cls_preds.view(psm.shape[0], -1, 1)
+        one_hot_targets = one_hot_targets[..., 1:]
+
+        cls_loss_src = self.cls_loss_func(cls_preds,
+                                          one_hot_targets,
+                                          weights=cls_weights)  # [N, M]
+        cls_loss = cls_loss_src.sum() / psm.shape[0]
+        conf_loss = cls_loss * self.cls_weight
+
+        ########## regression ##########
+        rm = rm.permute(0, 2, 3, 1).contiguous()
+        rm = rm.view(rm.size(0), -1, 7)
+        targets = targets.view(targets.size(0), -1, 7)
+
+        box_preds_sin, reg_targets_sin = self.add_sin_difference_dim(rm,
+                                                                 targets)
+        loc_loss_src =\
+            self.reg_loss_func(box_preds_sin[...,:7],
+                               reg_targets_sin[...,:7],
+                               weights=reg_weights)
+        reg_loss = loc_loss_src.sum() / rm.shape[0]
+        reg_loss *= self.reg_coe
+
+
+        ######## direction ##########
+        if self.use_dir:
+            dir_targets = self.get_direction_target(targets)
+            N =  output_dict["dm"].shape[0]
+            dir_logits = output_dict["dm"].permute(0, 2, 3, 1).contiguous().view(N, -1, 2) # [N, H*W*#anchor, 2]
+
+
+            dir_loss = softmax_cross_entropy_with_logits(dir_logits.view(-1, self.anchor_num), dir_targets.view(-1, self.anchor_num)) 
+
+            dir_loss = dir_loss.view(dir_logits.shape[:2]) * reg_weights # [N, H*W*anchor_num]
+
+            dir_loss = dir_loss.sum() * self.dir_weight / N
+
+        ######## kl #########
+        sm = sm.permute(0, 2, 3, 1).contiguous() # [N, H, W, #anchor_num * 3]
+        sm = sm.view(sm.size(0), -1, self.uncertainty_dim)
+
+        kl_loss_src = \
+            self.kl_loss_func(box_preds_sin,
+                              reg_targets_sin,
+                              sm,
+                              reg_weights)
+
+        kl_loss = kl_loss_src.sum() / sm.shape[0]
+        kl_loss *= self.kl_weight
+
+        # total_loss = reg_loss + conf_loss + kl_loss
+        total_loss = reg_loss + conf_loss
+
+        self.loss_dict.update({'total_loss': total_loss,
+                               'reg_loss': reg_loss,
+                               'conf_loss': conf_loss,
+                               'kl_loss': kl_loss})
+        
+        if self.use_dir:
+            # total_loss += dir_loss
+            self.loss_dict.update({'dir_loss': dir_loss})
+
+
+        return total_loss
+
+    def get_direction_target(self, reg_targets):
+        """
+        Args:
+            reg_targets:  [N, H * W * #anchor_num, 7]
+                The last term is (theta_gt - theta_a)
+        
+        Returns:
+            dir_targets:
+                theta_gt: [N, H * W * #anchor_num, NUM_BIN] 
+                NUM_BIN = 2
+        """
+        # (1, 2, 1)
+        H_times_W_times_anchor_num = reg_targets.shape[1]
+        anchor_map = self.anchor_yaw_map.repeat(1, H_times_W_times_anchor_num//self.anchor_num, 1).to(reg_targets.device) # [1, H * W * #anchor_num, 1]
+        rot_gt = reg_targets[..., -1] + anchor_map[..., -1] # [N, H*W*anchornum]
+        offset_rot = limit_period(rot_gt - self.dir_offset, 0, 2 * np.pi)
+        dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / self.num_bins)).long()  # [N, H*W*anchornum]
+        dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=self.num_bins - 1)
+        # one_hot:
+        # if rot_gt > 0, then the label is 1, then the regression target is [0, 1]
+        dir_cls_targets = one_hot_f(dir_cls_targets, self.num_bins)
+        return dir_cls_targets
+
+
+
+    def cls_loss_func(self, input: torch.Tensor,
+                      target: torch.Tensor,
+                      weights: torch.Tensor):
+        """
+        Args:
+            input: (B, #anchors, #classes) float tensor.
+                Predicted logits for each class
+            target: (B, #anchors, #classes) float tensor.
+                One-hot encoded classification targets
+            weights: (B, #anchors) float tensor.
+                Anchor-wise weights.
+
+        Returns:
+            weighted_loss: (B, #anchors, #classes) float tensor after weighting.
+        """
+        pred_sigmoid = torch.sigmoid(input)
+        alpha_weight = target * self.alpha + (1 - target) * (1 - self.alpha)
+        pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid
+        focal_weight = alpha_weight * torch.pow(pt, self.gamma)
+
+        bce_loss = self.sigmoid_cross_entropy_with_logits(input, target)
+
+        loss = focal_weight * bce_loss
+
+        if weights.shape.__len__() == 2 or \
+                (weights.shape.__len__() == 1 and target.shape.__len__() == 2):
+            weights = weights.unsqueeze(-1)
+
+        assert weights.shape.__len__() == loss.shape.__len__()
+
+        return loss * weights
+
+    @staticmethod
+    def sigmoid_cross_entropy_with_logits(input: torch.Tensor, target: torch.Tensor):
+        """ PyTorch Implementation for tf.nn.sigmoid_cross_entropy_with_logits:
+            max(x, 0) - x * z + log(1 + exp(-abs(x))) in
+            https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
+
+        Args:
+            input: (B, #anchors, #classes) float tensor.
+                Predicted logits for each class
+            target: (B, #anchors, #classes) float tensor.
+                One-hot encoded classification targets
+
+        Returns:
+            loss: (B, #anchors, #classes) float tensor.
+                Sigmoid cross entropy loss without reduction
+        """
+        loss = torch.clamp(input, min=0) - input * target + \
+               torch.log1p(torch.exp(-torch.abs(input)))
+        return loss
+
+    @staticmethod
+    def add_sin_difference_dim(boxes1, boxes2, dim=6):
+        """
+        This is different with other loss function.
+        Here we especially retain the angel
+
+            Add sin difference ?
+            Replace sin difference !
+
+        Returns:
+            [B, H*W, 7] -> [B, H*W, 8]
+        """
+        assert dim != -1
+
+        # sin(theta1 - theta2) = sin(theta1)*cos(theta2) - cos(theta1)*sin(theta2) 
+
+        rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \
+                            torch.cos(boxes2[..., dim:dim + 1])
+
+        rad_tg_encoding = torch.cos(boxes1[..., dim: dim + 1]) * \
+                          torch.sin(boxes2[..., dim: dim + 1])
+
+        # boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding,
+        #                     boxes1[..., dim + 1:]], dim=-1)
+        # boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding,
+        #                     boxes2[..., dim + 1:]], dim=-1)
+        
+        boxes1_encoded = torch.cat([boxes1[..., :dim], rad_pred_encoding,
+                            boxes1[..., dim:]], dim=-1)
+        boxes2_encoded = torch.cat([boxes2[..., :dim], rad_tg_encoding,
+                            boxes2[..., dim:]], dim=-1)
+
+        return boxes1_encoded, boxes2_encoded
+
+
+    def logging(self, epoch, batch_id, batch_len, writer = None):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict['total_loss']
+        reg_loss = self.loss_dict['reg_loss']
+        conf_loss = self.loss_dict['conf_loss']
+        kl_loss = self.loss_dict['kl_loss']
+        
+
+        print_msg = ("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f"
+                    " || Loc Loss: %.4f || KL Loss: %.4f" % (
+                        epoch, batch_id + 1, batch_len,
+                        total_loss.item(), conf_loss.item(), reg_loss.item(),  kl_loss.item()))
+        
+        if self.use_dir:
+            dir_loss = self.loss_dict['dir_loss']
+            print_msg += " || Dir Loss: %.4f" % dir_loss.item()
+
+        print(print_msg)
+
+        if not writer is None:
+            writer.add_scalar('Regression_loss', reg_loss.item(),
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Confidence_loss', conf_loss.item(),
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('kl_loss', kl_loss.item(),
+                            epoch*batch_len + batch_id)
+            if self.use_dir:
+                writer.add_scalar('dir_loss', dir_loss.item(),
+                            epoch*batch_len + batch_id)
+
+def one_hot_f(tensor, depth, dim=-1, on_value=1.0, dtype=torch.float32):
+    tensor_onehot = torch.zeros(*list(tensor.shape), depth, dtype=dtype, device=tensor.device) # [4, 70400, 2]
+    tensor_onehot.scatter_(dim, tensor.unsqueeze(dim).long(), on_value)                        # [4, 70400, 2]
+    return tensor_onehot
+
+def softmax_cross_entropy_with_logits(logits, labels):
+    param = list(range(len(logits.shape)))
+    transpose_param = [0] + [param[-1]] + param[1:-1]
+    logits = logits.permute(*transpose_param)
+    loss_ftor = torch.nn.CrossEntropyLoss(reduction="none")
+    loss = loss_ftor(logits, labels.max(dim=-1)[1])
+    return loss
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_v2v_robust_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_v2v_robust_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..64c041713a7e69f7456a638d619f552f94e58567
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/backup/point_pillar_v2v_robust_loss.py
@@ -0,0 +1,387 @@
+# -*- coding: utf-8 -*-
+# Author: OpenPCDet, Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+from icecream import ic
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+from opencood.models.sub_modules.v2v_robust_module import regroup
+from opencood.utils.transformation_utils import tfm_to_pose, tfm_to_pose_torch
+torch.set_printoptions(precision=3, sci_mode=False)
+
+class WeightedSmoothL1Loss(nn.Module):
+    """
+    Code-wise Weighted Smooth L1 Loss modified based on fvcore.nn.smooth_l1_loss
+    https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py
+                  | 0.5 * x ** 2 / beta   if abs(x) < beta
+    smoothl1(x) = |
+                  | abs(x) - 0.5 * beta   otherwise,
+    where x = input - target.
+    """
+    def __init__(self, beta: float = 1.0 / 9.0, code_weights: list = None):
+        """
+        Args:
+            beta: Scalar float.
+                L1 to L2 change point.
+                For beta values < 1e-5, L1 loss is computed.
+            code_weights: (#codes) float list if not None.
+                Code-wise weights.
+        """
+        super(WeightedSmoothL1Loss, self).__init__()
+        self.beta = beta
+        if code_weights is not None:
+            self.code_weights = np.array(code_weights, dtype=np.float32)
+            self.code_weights = torch.from_numpy(self.code_weights).cuda()
+
+    @staticmethod
+    def smooth_l1_loss(diff, beta):
+        if beta < 1e-5:
+            loss = torch.abs(diff)
+        else:
+            n = torch.abs(diff)
+            loss = torch.where(n < beta, 0.5 * n ** 2 / beta, n - 0.5 * beta)
+
+        return loss
+
+    def forward(self, input: torch.Tensor,
+                target: torch.Tensor, weights: torch.Tensor = None):
+        """
+        Args:
+            input: (B, #anchors, #codes) float tensor.
+                Ecoded predicted locations of objects.
+            target: (B, #anchors, #codes) float tensor.
+                Regression targets.
+            weights: (B, #anchors) float tensor if not None.
+
+        Returns:
+            loss: (B, #anchors) float tensor.
+                Weighted smooth l1 loss without reduction.
+        """
+        target = torch.where(torch.isnan(target), input, target)  # ignore nan targets
+
+        diff = input - target
+        loss = self.smooth_l1_loss(diff, self.beta)
+
+        # anchor-wise weighting
+        if weights is not None:
+            assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1]
+            loss = loss * weights.unsqueeze(-1)
+
+        return loss
+
+
+class PointPillarV2VRobustLoss(nn.Module):
+    def __init__(self, args):
+        super(PointPillarV2VRobustLoss, self).__init__()
+        self.reg_loss_func = WeightedSmoothL1Loss()
+        self.score_loss_func = nn.BCELoss(reduce=True, reduction="mean")
+        self.pose_loss_func = nn.SmoothL1Loss(reduce=True, reduction="mean", beta=1.0/9)
+        self.alpha = 0.25
+        self.gamma = 2.0
+
+        self.cls_weight = args['cls_weight']
+        self.reg_coe = args['reg']
+        self.score_weight = args['score_weight']
+        self.pose_weight = args['pose_weight']
+        self.loss_dict = {}
+
+    def forward(self, output_dict, target_dict):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        stage = output_dict['stage']
+
+        if stage == 0 or stage == 2:
+            rm = output_dict['rm']  # [B, 14, 50, 176]
+            psm = output_dict['psm'] # [B, 2, 50, 176]
+            targets = target_dict['targets']
+
+            cls_preds = psm.permute(0, 2, 3, 1).contiguous() # N, C, H, W -> N, H, W, C
+
+            box_cls_labels = target_dict['pos_equal_one']  # [B, 50, 176, 2]
+            box_cls_labels = box_cls_labels.view(psm.shape[0], -1).contiguous()
+
+            positives = box_cls_labels > 0
+            negatives = box_cls_labels == 0
+            negative_cls_weights = negatives * 1.0
+            cls_weights = (negative_cls_weights + 1.0 * positives).float()
+            reg_weights = positives.float()
+
+            pos_normalizer = positives.sum(1, keepdim=True).float()
+            reg_weights /= torch.clamp(pos_normalizer, min=1.0)
+            cls_weights /= torch.clamp(pos_normalizer, min=1.0)
+            cls_targets = box_cls_labels
+            cls_targets = cls_targets.unsqueeze(dim=-1)
+
+            cls_targets = cls_targets.squeeze(dim=-1)
+            one_hot_targets = torch.zeros(
+                *list(cls_targets.shape), 2,
+                dtype=cls_preds.dtype, device=cls_targets.device
+            )
+            one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0)
+            cls_preds = cls_preds.view(psm.shape[0], -1, 1)
+            one_hot_targets = one_hot_targets[..., 1:]
+
+            cls_loss_src = self.cls_loss_func(cls_preds,
+                                            one_hot_targets,
+                                            weights=cls_weights)  # [N, M]
+            cls_loss = cls_loss_src.sum() / psm.shape[0]
+            conf_loss = cls_loss * self.cls_weight
+
+            # regression
+            rm = rm.permute(0, 2, 3, 1).contiguous()
+            rm = rm.view(rm.size(0), -1, 7)
+            targets = targets.view(targets.size(0), -1, 7)
+            box_preds_sin, reg_targets_sin = self.add_sin_difference(rm,
+                                                                    targets)
+            loc_loss_src =\
+                self.reg_loss_func(box_preds_sin,
+                                reg_targets_sin,
+                                weights=reg_weights)
+            reg_loss = loc_loss_src.sum() / rm.shape[0]
+            reg_loss *= self.reg_coe
+
+            total_loss = reg_loss + conf_loss
+
+            self.loss_dict.update({'total_loss': total_loss,
+                                'reg_loss': reg_loss,
+                                'conf_loss': conf_loss})
+        else:
+            total_loss = 0
+
+        
+
+        # robust v2vnet part
+        record_len = target_dict['record_len'] # we can also put this in output_dict
+        if stage == 0:
+            scores = output_dict['scores']
+            choice = output_dict['choice']
+            
+            score_loss = self.attention_loss(scores, choice, record_len)
+            total_loss += self.score_weight * score_loss
+            self.loss_dict.update({'total_loss': total_loss,
+                                    'score_loss': score_loss})
+                                    
+        elif stage == 1 or stage == 2:
+            pairwise_corr = output_dict['pairwise_corr']
+            pairwise_t_matrix = output_dict['pairwise_t_matrix']
+            pairwise_t_matrix_gt = target_dict['pairwise_t_matrix']
+
+            pose_loss = self.pose_loss(pairwise_corr, pairwise_t_matrix, pairwise_t_matrix_gt, record_len)
+            total_loss += self.pose_weight * pose_loss
+            self.loss_dict.update({'total_loss': total_loss,
+                                    'pose_loss': pose_loss})
+        
+
+        return total_loss
+
+    def attention_loss(self, scores, choices, record_len):
+        """
+        Args:
+            scores: (B, L, L)
+                scores[b,i,i] is already 0. 
+            choices: (sum(N_cav), 1)
+                0 is strong noise, 1 is weak noise
+            record_len: 
+                list, shape [B]
+        """
+        # first build gt label from choice
+        B = scores.shape[0]
+        choice_split = regroup(choices, record_len)
+        label = torch.zeros_like(scores, device=scores.device)
+        mask = torch.zeros_like(scores, device=scores.device)
+        for b in range(B):
+            N = record_len[b]
+            choice = choice_split[b].float() # [N, 1]
+            choice = choice @ choice.T # [N, N]
+            
+            gamma = 0.85
+            label[b,:N,:N] = choice * gamma + (1-choice) * (1-gamma) # [N, N]
+
+            mask[b,:N,:N] = 1
+            mask[b,range(N),range(N)] = 0
+
+        mask = mask.bool()
+
+        input = torch.masked_select(scores, mask) 
+        target = torch.masked_select(label, mask)
+        print("input:", input)
+        print("target:", target)
+
+        return self.score_loss_func(input, target)
+
+    def pose_loss(self, pairwise_corr,  pairwise_t_matrix, pairwise_t_matrix_gt, record_len):
+        """
+        Args:
+            pairwise_corr: [B, L, L, 3]
+            pairwise_t_matrix/pairwise_t_matrix_gt: [B,L,L,4,4]
+            record_len: list, shape [B]
+        """
+
+        pairwise_t_matrix_gt = pairwise_t_matrix_gt.float()
+        B, L = pairwise_t_matrix.shape[:2]
+        mask = torch.zeros((B, L, L), device = pairwise_t_matrix.device)
+
+        for b in range(B):
+            N = record_len[b]
+            mask[b,:N,:N] = 1
+            mask[b,range(N), range(N)] = 0
+        
+        pair_corr_gt = torch.linalg.solve(pairwise_t_matrix.transpose(-2,-1), pairwise_t_matrix_gt.transpose(-2,-1)).transpose(-2,-1)
+ 
+        yaw = pairwise_corr[..., 2] # [B,L,L]
+        yaw_gt = torch.rad2deg(torch.atan2(pair_corr_gt[...,1,0], pair_corr_gt[...,0,0])) # [B,L,L]
+
+        x = pairwise_corr[..., 0] # [B,L,L]
+        x_gt = pair_corr_gt[..., 0,3]
+
+        y = pairwise_corr[..., 1] # [B,L,L]
+        y_gt = pair_corr_gt[..., 1,3]
+
+        mask = mask.bool()
+        mask = mask.view(B,L,L) # [B, L, L, ]
+
+        input_x = torch.masked_select(x, mask)
+        target_x = torch.masked_select(x_gt, mask)
+
+        input_y = torch.masked_select(y, mask)
+        target_y = torch.masked_select(y_gt, mask)
+
+        input_yaw = torch.masked_select(yaw, mask)
+        target_yaw = torch.masked_select(yaw_gt, mask)
+
+        loss_x = self.pose_loss_func(input_x, target_x)
+        loss_y = self.pose_loss_func(input_y, target_y)
+        loss_yaw = self.pose_loss_func(input_yaw, target_yaw)
+
+        lambda_trans = 2/3
+        lambda_rot = 1/3
+        print("x:\n", input_x, "\n", target_x)
+        print("y:\n", input_y, "\n", target_y)
+        print("yaw:\n", input_yaw, "\n", target_yaw)
+
+        return lambda_trans * (loss_x + loss_y) + lambda_rot * loss_yaw
+
+
+
+    def cls_loss_func(self, input: torch.Tensor,
+                      target: torch.Tensor,
+                      weights: torch.Tensor):
+        """
+        Args:
+            input: (B, #anchors, #classes) float tensor.
+                Predicted logits for each class
+            target: (B, #anchors, #classes) float tensor.
+                One-hot encoded classification targets
+            weights: (B, #anchors) float tensor.
+                Anchor-wise weights.
+
+        Returns:
+            weighted_loss: (B, #anchors, #classes) float tensor after weighting.
+        """
+        pred_sigmoid = torch.sigmoid(input)
+        alpha_weight = target * self.alpha + (1 - target) * (1 - self.alpha)
+        pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid
+        focal_weight = alpha_weight * torch.pow(pt, self.gamma)
+
+        bce_loss = self.sigmoid_cross_entropy_with_logits(input, target)
+
+        loss = focal_weight * bce_loss
+
+        if weights.shape.__len__() == 2 or \
+                (weights.shape.__len__() == 1 and target.shape.__len__() == 2):
+            weights = weights.unsqueeze(-1)
+
+        assert weights.shape.__len__() == loss.shape.__len__()
+
+        return loss * weights
+
+    @staticmethod
+    def sigmoid_cross_entropy_with_logits(input: torch.Tensor, target: torch.Tensor):
+        """ PyTorch Implementation for tf.nn.sigmoid_cross_entropy_with_logits:
+            max(x, 0) - x * z + log(1 + exp(-abs(x))) in
+            https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
+
+        Args:
+            input: (B, #anchors, #classes) float tensor.
+                Predicted logits for each class
+            target: (B, #anchors, #classes) float tensor.
+                One-hot encoded classification targets
+
+        Returns:
+            loss: (B, #anchors, #classes) float tensor.
+                Sigmoid cross entropy loss without reduction
+        """
+        loss = torch.clamp(input, min=0) - input * target + \
+               torch.log1p(torch.exp(-torch.abs(input)))
+        return loss
+
+    @staticmethod
+    def add_sin_difference(boxes1, boxes2, dim=6):
+        assert dim != -1
+        rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \
+                            torch.cos(boxes2[..., dim:dim + 1])
+        rad_tg_encoding = torch.cos(boxes1[..., dim:dim + 1]) * \
+                          torch.sin(boxes2[..., dim:dim + 1])
+
+        boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding,
+                            boxes1[..., dim + 1:]], dim=-1)
+        boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding,
+                            boxes2[..., dim + 1:]], dim=-1)
+        return boxes1, boxes2
+
+
+    def logging(self, epoch, batch_id, batch_len, writer = None):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict['total_loss'].item()
+        if 'reg_loss' in self.loss_dict:
+            reg_loss = self.loss_dict['reg_loss'].item()
+        else:
+            reg_loss = 0
+        if 'conf_loss' in self.loss_dict:
+            conf_loss = self.loss_dict['conf_loss'].item()
+        else:
+            conf_loss = 0
+        if "score_loss" in self.loss_dict:
+            score_loss = self.loss_dict['score_loss']
+        else:
+            score_loss = 0
+        if "pose_loss" in self.loss_dict:
+            pose_loss = self.loss_dict['pose_loss']
+        else:
+            pose_loss = 0
+
+        print("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f"
+              " || Loc Loss: %.4f || Score Loss: %.4f || Pose Loss: %.4f" % (
+                  epoch, batch_id + 1, batch_len,
+                  total_loss, conf_loss, reg_loss, score_loss, pose_loss))
+
+        if not writer is None:
+            writer.add_scalar('Regression_loss', reg_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Confidence_loss', conf_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Score_loss', score_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Pose_loss', pose_loss,
+                            epoch*batch_len + batch_id)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/center_point_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/center_point_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..d56357f43807611fd80ad27be91025dc8d6bcc34
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/center_point_loss.py
@@ -0,0 +1,697 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import math
+
+
+"""
+Gaussian Loss 
+"""
+class GaussianFocalLoss(nn.Module):
+    """GaussianFocalLoss is a variant of focal loss.
+
+    More details can be found in the `paper
+    <https://arxiv.org/abs/1808.01244>`_
+    Code is modified from `kp_utils.py
+    <https://github.com/princeton-vl/CornerNet/blob/master/models/py_utils/kp_utils.py#L152>`_  # noqa: E501
+    Please notice that the target in GaussianFocalLoss is a gaussian heatmap,
+    not 0/1 binary target.
+
+    Args:
+        alpha (float): Power of prediction.
+        gamma (float): Power of target for negtive samples.
+        reduction (str): Options are "none", "mean" and "sum".
+        loss_weight (float): Loss weight of current loss.
+    """
+
+    def __init__(self,
+                 alpha=2.0,
+                 gamma=4.0,
+                 reduction='mean',
+                 loss_weight=1.0):
+        super(GaussianFocalLoss, self).__init__()
+        self.alpha = alpha
+        self.gamma = gamma
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+
+    def forward(self,
+                pred,
+                target,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None):
+        """Forward function.
+
+        Args:
+            pred (torch.Tensor): The prediction.
+            target (torch.Tensor): The learning target of the prediction
+                in gaussian distribution.
+            weight (torch.Tensor, optional): The weight of loss for each
+                prediction. Defaults to None.
+            avg_factor (int, optional): Average factor that is used to average
+                the loss. Defaults to None.
+            reduction_override (str, optional): The reduction method used to
+                override the original reduction method of the loss.
+                Defaults to None.
+        """
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        loss_reg = self.loss_weight * gaussian_focal_loss(
+            pred,
+            target,
+            weight,
+            alpha=self.alpha,
+            gamma=self.gamma,
+            reduction=reduction,
+            avg_factor=avg_factor)
+        return loss_reg
+
+def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0):
+    """`Focal Loss <https://arxiv.org/abs/1708.02002>`_ for targets in gaussian
+    distribution.
+
+    Args:
+        pred (torch.Tensor): The prediction.
+        gaussian_target (torch.Tensor): The learning target of the prediction
+            in gaussian distribution.
+        alpha (float, optional): A balanced form for Focal Loss.
+            Defaults to 2.0.
+        gamma (float, optional): The gamma for calculating the modulating
+            factor. Defaults to 4.0.
+    """
+    eps = 1e-12
+    device = pred.device
+    pos_weights = gaussian_target.eq(1)
+    pos_weights = pos_weights.to(device)
+    neg_weights = (1 - gaussian_target).pow(gamma)
+    neg_weights = neg_weights.to(device)
+    pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights
+    neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights
+    return pos_loss + neg_loss
+
+
+def clip_sigmoid(x, eps=1e-4):
+    """Sigmoid function for input feature.
+
+    Args:
+        x (torch.Tensor): Input feature map with the shape of [B, N, H, W].
+        eps (float): Lower bound of the range to be clamped to. Defaults
+            to 1e-4.
+
+    Returns:
+        torch.Tensor: Feature map after sigmoid.
+    """
+    y = torch.clamp(torch.sigmoid(x), min=eps, max=1 - eps)
+    # y = torch.clamp(x.sigmoid_(), min=eps, max=1 - eps)
+    return y
+
+def _gather_feat(feat, ind, mask=None):
+    # feat : [bs, wxh, c]
+    dim  = feat.size(2)  
+    # ind : [bs, index, c]
+    ind  = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
+    feat = feat.gather(1, ind)  # 按照dim=1获取ind
+    if mask is not None:
+        mask = mask.unsqueeze(2).expand_as(feat)
+        feat = feat[mask]
+        feat = feat.view(-1, dim)
+    return feat
+
+
+def _transpose_and_gather_feat(feat, ind):
+    feat = feat.permute(0, 2, 3, 1).contiguous()   ## # from [bs c h w] to [bs, h, w, c] 
+    feat = feat.view(feat.size(0), -1, feat.size(3))  # to [bs, wxh, c]
+    feat = _gather_feat(feat, ind)
+    return feat
+
+
+
+class RegLoss(nn.Module):
+    '''Regression loss for an output tensor
+        Arguments:
+        output (batch x dim x h x w)
+        mask (batch x max_objects)
+        ind (batch x max_objects)
+        target (batch x max_objects x dim)
+    '''
+    def __init__(self):
+        super(RegLoss, self).__init__()
+    
+    def forward(self, output, mask, ind, target):
+        pred = _transpose_and_gather_feat(output, ind)
+        mask = mask.float().unsqueeze(2) 
+
+        loss = F.l1_loss(pred*mask, target*mask, reduction='none')
+        loss = loss / (mask.sum() + 1e-4)
+        loss = loss.transpose(2 ,0).sum(dim=2).sum(dim=1)
+        return loss
+
+
+
+class FastFocalLoss(nn.Module):
+    '''
+    Reimplemented focal loss, exactly the same as the CornerNet version.
+    Faster and costs much less memory.
+    '''
+    def __init__(self):
+        super(FastFocalLoss, self).__init__()
+
+    def forward(self, out, target, ind, mask, cat):
+        '''
+        Arguments:
+        out, target: B x C x H x W
+        ind, mask: B x M
+        cat (category id for peaks): B x M
+        '''
+        mask = mask.float()
+        gt = torch.pow(1 - target, 4)
+        neg_loss = torch.log(1 - out) * torch.pow(out, 2) * gt
+        neg_loss = neg_loss.sum()
+
+        pos_pred_pix = _transpose_and_gather_feat(out, ind) # B x M x C
+        pos_pred = pos_pred_pix.gather(2, cat.unsqueeze(2)) # B x M
+        num_pos = mask.sum()
+        pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) * \
+                mask.unsqueeze(2)
+        pos_loss = pos_loss.sum()
+        if num_pos == 0:
+            return - neg_loss
+        return - (pos_loss + neg_loss) / num_pos
+
+class CenterPointLoss(nn.Module):
+    def __init__(self, args):
+        super(CenterPointLoss, self).__init__()
+
+        self.cls_weight = args['cls_weight']
+        self.loc_weight = args['loc_weight']
+        self.code_weights = args['code_weights']
+        self.target_cfg = args['target_assigner_config']
+        self.lidar_range = self.target_cfg['cav_lidar_range']
+        self.voxel_size = self.target_cfg['voxel_size']
+
+        self.loss_cls = GaussianFocalLoss(reduction='mean')
+        self.crit = FastFocalLoss()
+        self.crit_reg = RegLoss()
+
+        self.loss_dict = {}
+
+    def forward(self, output_dict, target_dict, suffix=""):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        # Predictions 
+        box_preds = output_dict['bbox_preds{}'.format(suffix)].permute(0, 2, 3, 1).contiguous()  # [B, H, W, C]
+        cls_preds = clip_sigmoid(output_dict['cls_preds{}'.format(suffix)])
+        
+        # GTs
+        bbox_center = target_dict['object_bbx_center{}'.format(suffix)].cpu().numpy()
+        bbox_mask = target_dict['object_bbx_mask{}'.format(suffix)].cpu().numpy()
+        batch_size = bbox_mask.shape[0]
+
+        max_gt = int(max(bbox_mask.sum(axis=1)))
+        gt_boxes3d = np.zeros((batch_size, max_gt, bbox_center[0].shape[-1]), dtype=np.float32)  # [B, max_anchor_num, 7]
+        for k in range(batch_size):
+            gt_boxes3d[k, :int(bbox_mask[k].sum()), :] = bbox_center[k, :int(bbox_mask[k].sum()), :]
+        gt_boxes3d = torch.from_numpy(gt_boxes3d).to(box_preds.device)
+
+        targets_dict = self.assign_targets(
+            gt_boxes=gt_boxes3d   #    [B, max_anchor_num, 7 + C ]      heatmap [2,1,h,w]  anno_boxes [2,100,8] inds [2, 100]
+        )
+
+        cls_gt =  targets_dict['heatmaps']
+        box_gt = (targets_dict['anno_boxes'], targets_dict['inds'], targets_dict['masks'])
+
+        cls_loss = self.get_cls_layer_loss(cls_preds, cls_gt)
+        box_loss = self.get_box_reg_layer_loss(box_preds, box_gt)
+        rpn_loss = cls_loss + box_loss
+
+        self.loss_dict.update({ 'total_loss': rpn_loss.item(),
+                                'reg_loss': box_loss.item(),
+                                'cls_loss': cls_loss.item()})
+
+        return rpn_loss
+  
+    def logging(self, epoch, batch_id, batch_len, writer = None, suffix=""):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict.get('total_loss', 0)
+        reg_loss = self.loss_dict.get('reg_loss', 0)
+        cls_loss = self.loss_dict.get('cls_loss', 0)
+        
+        print("[epoch %d][%d/%d]%s, || Loss: %.4f || Conf Loss: %.4f"
+                    " || Loc Loss: %.4f" % (
+                        epoch, batch_id + 1, batch_len, suffix,
+                        total_loss, cls_loss, reg_loss))
+
+        if not writer is None:
+            writer.add_scalar('Regression_loss', reg_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Confidence_loss', cls_loss,
+                            epoch*batch_len + batch_id)
+                        
+
+    def get_cls_layer_loss(self, pred_heatmaps, gt_heatmaps):
+        num_pos = gt_heatmaps.eq(1).float().sum().item()
+
+        cls_loss = self.loss_cls(
+            pred_heatmaps,
+            gt_heatmaps,
+            avg_factor=max(num_pos, 1))
+
+        cls_loss = cls_loss * self.cls_weight
+        return cls_loss
+
+
+    def _gather_feat(self, feat, ind, mask=None):
+        """Gather feature map.
+
+        Given feature map and index, return indexed feature map.
+
+        Args:
+            feat (torch.tensor): Feature map with the shape of [B, H*W, 10].
+            ind (torch.Tensor): Index of the ground truth boxes with the
+                shape of [B, max_obj].
+            mask (torch.Tensor): Mask of the feature map with the shape
+                of [B, max_obj]. Default: None.
+
+        Returns:
+            torch.Tensor: Feature map after gathering with the shape
+                of [B, max_obj, 10].
+        """
+        device = feat.device  
+        dim = feat.size(2)
+        ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)   # 把 ind 和 dim 拼接在一起
+        feat = feat.gather(1, ind.to(device))
+        if mask is not None:
+            mask = mask.unsqueeze(2).expand_as(feat)
+            feat = feat[mask]
+            feat = feat.view(-1, dim)
+        return feat
+
+
+    def get_box_reg_layer_loss(self, bbox_preds, bbox_gt):
+        target_box, inds, masks = bbox_gt
+        pred = bbox_preds
+        ind = inds
+        num = masks.float().sum()
+        pred = pred.view(pred.size(0), -1, pred.size(3))     # [n, h*w, 8 ]
+        pred = self._gather_feat(pred, ind)
+        mask = masks.unsqueeze(2).expand_as(target_box).float()  ## 把 mask 的维度进行扩展
+        isnotnan = (~torch.isnan(target_box)).float()
+        mask *= isnotnan
+
+        code_weights = self.code_weights
+        bbox_weights = mask * mask.new_tensor(code_weights)
+        
+        loc_loss = l1_loss(
+            pred, target_box, bbox_weights, avg_factor=(num + 1e-4))
+
+        loc_loss = loc_loss * self.loc_weight
+        return loc_loss
+
+
+    def assign_targets(self, gt_boxes):
+        """Generate targets.
+
+        Args:
+            gt_boxes: ( M, 7+c) box + cls   ## 这个地方函数和centerpoint-kitti 那个不太一样，这里是分开进行计算的 
+
+        Returns:
+            Returns:
+                tuple[list[torch.Tensor]]: Tuple of target including \
+                    the following results in order.
+
+                    - list[torch.Tensor]: Heatmap scores.
+                    - list[torch.Tensor]: Ground truth boxes.
+                    - list[torch.Tensor]: Indexes indicating the \
+                        position of the valid boxes.
+                    - list[torch.Tensor]: Masks indicating which \
+                        boxes are valid.
+        """
+        if gt_boxes.shape[-1] == 8:
+            gt_bboxes_3d, gt_labels_3d = gt_boxes[..., :-1], gt_boxes[..., -1]    # gt_box [2,14,8] batch_size * bbox_num * 8
+            heatmaps, anno_boxes, inds, masks = self.get_targets_single(gt_bboxes_3d, gt_labels_3d)
+        elif gt_boxes.shape[-1] == 7:
+            gt_bboxes_3d = gt_boxes
+            heatmaps, anno_boxes, inds, masks = self.get_targets_single(gt_bboxes_3d)
+
+        # transpose heatmaps, because the dimension of tensors in each task is
+        # different, we have to use numpy instead of torch to do the transpose.
+        # heatmaps = np.array(heatmaps).transpose(1, 0).tolist()
+        # heatmaps = [torch.stack(hms_) for hms_ in heatmaps]
+        # # heatmaps = torch.from_numpy(np.array(heatmaps))
+        # # transpose anno_boxes
+        # anno_boxes = np.array(anno_boxes).transpose(1, 0).tolist()
+        # anno_boxes = [torch.stack(anno_boxes_) for anno_boxes_ in anno_boxes]
+        # # transpose inds
+        # inds = np.array(inds).transpose(1, 0).tolist()
+        # inds = [torch.stack(inds_) for inds_ in inds]
+        # # transpose inds
+        # masks = np.array(masks).transpose(1, 0).tolist()
+        # masks = [torch.stack(masks_) for masks_ in masks]
+
+        all_targets_dict = {
+            'heatmaps': heatmaps,
+            'anno_boxes': anno_boxes,
+            'inds': inds,
+            'masks': masks
+        }
+        
+        return all_targets_dict
+
+
+    def get_targets_single(self, gt_bbox_3d, gt_labels_3d=None):
+        
+        batch_size = gt_bbox_3d.shape[0]
+        device = gt_bbox_3d.device
+        max_objs = self.target_cfg['max_objs']
+        pc_range = self.lidar_range
+        voxel_size = self.voxel_size
+
+        grid_size = (np.array(self.lidar_range[3:6]) -
+                     np.array(self.lidar_range[0:3])) / np.array(self.voxel_size)
+        grid_size = np.round(grid_size).astype(np.int64)
+        feature_map_size = grid_size[:2] // self.target_cfg['out_size_factor']
+
+        draw_gaussian = draw_heatmap_gaussian
+        heatmaps, anno_boxes, inds, masks = [], [], [], []
+
+        for batch in range(batch_size):
+            task_boxes = gt_bbox_3d[batch, :, :]
+            if not gt_labels_3d is None:
+                task_classes = gt_labels_3d[batch, :]
+
+            heatmap = gt_bbox_3d.new_zeros(    # 辅助gt_bboxes_3d的属性
+                (1, feature_map_size[1],feature_map_size[0])) 
+
+            anno_box = gt_bbox_3d.new_zeros((max_objs, 8), 
+                                            dtype = torch.float32)
+            
+            ind = gt_bbox_3d.new_zeros((max_objs), dtype=torch.int64)
+            mask = gt_bbox_3d.new_zeros((max_objs), dtype=torch.uint8)
+
+            num_objs = min(task_boxes.shape[0], max_objs)
+        
+            for k in range(num_objs):
+                # 计算x的heatmap坐标
+                coor_x = (task_boxes[k][0] - pc_range[0]) / voxel_size[0] / self.target_cfg['out_size_factor']
+                coor_y = (task_boxes[k][1] - pc_range[1]) / voxel_size[1] / self.target_cfg['out_size_factor']
+                coor_z = (task_boxes[k][2] - pc_range[2]) / voxel_size[2] / self.target_cfg['out_size_factor']
+                h = task_boxes[k][3] / voxel_size[0] / self.target_cfg['out_size_factor']
+                w = task_boxes[k][4] / voxel_size[1] / self.target_cfg['out_size_factor']
+                l = task_boxes[k][5] / voxel_size[2] / self.target_cfg['out_size_factor']
+                rot = task_boxes[k][6]
+
+                if h > 0 and w > 0:
+                    radius = gaussian_radius(
+                        (h, w),
+                        min_overlap=self.target_cfg['gaussian_overlap'])
+                    radius = max(self.target_cfg['min_radius'], int(radius))
+
+                    center = torch.tensor([coor_x, coor_y],
+                                        dtype=torch.float32,
+                                        device=device)
+                    center_int = center.to(torch.int32)   ## bbox 的中心在heatmap 中的位置
+
+                    # throw out not in range objects to avoid out of array
+                    # area when creating the heatmap
+                    if not (0 <= center_int[0] < feature_map_size[0].item()
+                            and 0 <= center_int[1] < feature_map_size[1].item()):
+                        continue
+
+                    draw_gaussian(heatmap[0], center_int, radius) 
+                    
+                    x, y = center_int[0], center_int[1]
+                    assert (center_int[1] * feature_map_size[0] + center_int[0] <
+                                        feature_map_size[0] * feature_map_size[1])
+                    ind[k] = y * feature_map_size[0] + x
+                    mask[k] = 1
+                    # box_dim = task_boxes[k][3:6]
+                    # box_dim = box_dim.log()
+                    box_dim = torch.cat([h.unsqueeze(0), w.unsqueeze(0), l.unsqueeze(0)], dim=0)
+                    anno_box[k] = torch.cat([
+                        center - torch.tensor([x, y], device=device),
+                        coor_z.unsqueeze(0), box_dim,
+                        torch.sin(rot).unsqueeze(0),
+                        torch.cos(rot).unsqueeze(0),
+                    ])   # [x,y,z, w, h, l, sin(heading), cos(heading)]
+
+            heatmaps.append(heatmap)
+            anno_boxes.append(anno_box)
+            inds.append(ind)
+            masks.append(mask)
+            # import cv2; cv2.imwrite('test_{}.png'.format(batch), heatmap.cpu().numpy()[0]*255)
+        heatmaps = torch.stack(heatmaps)
+        anno_boxes = torch.stack(anno_boxes)
+        inds = torch.stack(inds)
+        masks = torch.stack(masks)
+        return heatmaps, anno_boxes, inds, masks  # [B, H, W]
+
+
+def gaussian_2d(shape, sigma=1):
+    """Generate gaussian map.
+
+    Args:
+        shape (list[int]): Shape of the map.
+        sigma (float): Sigma to generate gaussian map.
+            Defaults to 1.
+
+    Returns:
+        np.ndarray: Generated gaussian map.
+    """
+    m, n = [(ss - 1.) / 2. for ss in shape]
+    y, x = np.ogrid[-m:m + 1, -n:n + 1]
+
+    h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
+    h[h < np.finfo(h.dtype).eps * h.max()] = 0
+    return h
+
+
+ 
+def draw_heatmap_gaussian(heatmap, center, radius, k=1):
+    """Get gaussian masked heatmap.
+
+    Args:
+        heatmap (torch.Tensor): Heatmap to be masked.
+        center (torch.Tensor): Center coord of the heatmap.
+        radius (int): Radius of gausian.
+        K (int): Multiple of masked_gaussian. Defaults to 1.
+
+    Returns:
+        torch.Tensor: Masked heatmap.
+    """
+    diameter = 2 * radius + 1
+    gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6)
+
+    x, y = int(center[0]), int(center[1])
+
+    height, width = heatmap.shape[0:2]
+
+    left, right = min(x, radius), min(width - x, radius + 1)
+    top, bottom = min(y, radius), min(height - y, radius + 1)
+
+    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
+    masked_gaussian = torch.from_numpy(
+        gaussian[radius - top:radius + bottom,
+                 radius - left:radius + right]).to(heatmap.device,
+                                                   torch.float32)
+    if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
+        torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
+    return heatmap
+
+
+
+def gaussian_radius(det_size, min_overlap=0.5):
+    """Get radius of gaussian.
+
+    Args:
+        det_size (tuple[torch.Tensor]): Size of the detection result.
+        min_overlap (float): Gaussian_overlap. Defaults to 0.5.
+
+    Returns:
+        torch.Tensor: Computed radius.
+    """
+    height, width = det_size
+
+    a1 = 1
+    b1 = (height + width)
+    c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
+    sq1 = torch.sqrt(b1**2 - 4 * a1 * c1)
+    r1 = (b1 + sq1) / (2 * a1)
+
+    a2 = 4
+    b2 = 2 * (height + width)
+    c2 = (1 - min_overlap) * width * height
+    sq2 = torch.sqrt(b2**2 - 4 * a2 * c2)
+    r2 = (b2 + sq2) / (2 * a2)
+
+    a3 = 4 * min_overlap
+    b3 = -2 * min_overlap * (height + width)
+    c3 = (min_overlap - 1) * width * height
+    sq3 = torch.sqrt(b3**2 - 4 * a3 * c3)
+    r3 = (b3 + sq3) / (2 * a3)
+    return min(r1, r2, r3)
+
+
+
+import functools
+
+import torch.nn.functional as F
+
+
+def reduce_loss(loss, reduction):
+    """Reduce loss as specified.
+
+    Args:
+        loss (Tensor): Elementwise loss tensor.
+        reduction (str): Options are "none", "mean" and "sum".
+
+    Return:
+        Tensor: Reduced loss tensor.
+    """
+    reduction_enum = F._Reduction.get_enum(reduction)
+    # none: 0, elementwise_mean:1, sum: 2
+    if reduction_enum == 0:
+        return loss
+    elif reduction_enum == 1:
+        return loss.mean()
+    elif reduction_enum == 2:
+        return loss.sum()
+
+
+def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
+    """Apply element-wise weight and reduce loss.
+
+    Args:
+        loss (Tensor): Element-wise loss.
+        weight (Tensor): Element-wise weights.
+        reduction (str): Same as built-in losses of PyTorch.
+        avg_factor (float): Avarage factor when computing the mean of losses.
+
+    Returns:
+        Tensor: Processed loss values.
+    """
+    # if weight is specified, apply element-wise weight
+    
+    if weight is not None:
+        device = loss.device
+        weight = weight.to(device)
+        loss = loss * weight
+
+    # if avg_factor is not specified, just reduce the loss
+    if avg_factor is None:
+        loss = reduce_loss(loss, reduction)
+    else:
+        # if reduction is mean, then average the loss by avg_factor
+        if reduction == 'mean':
+            loss = loss.sum() / avg_factor
+        # if reduction is 'none', then do nothing, otherwise raise an error
+        elif reduction != 'none':
+            raise ValueError('avg_factor can not be used with reduction="sum"')
+    return loss
+
+
+def weighted_loss(loss_func):
+    """Create a weighted version of a given loss function.
+
+    To use this decorator, the loss function must have the signature like
+    `loss_func(pred, target, **kwargs)`. The function only needs to compute
+    element-wise loss without any reduction. This decorator will add weight
+    and reduction arguments to the function. The decorated function will have
+    the signature like `loss_func(pred, target, weight=None, reduction='mean',
+    avg_factor=None, **kwargs)`.
+
+    :Example:
+
+    >>> import torch
+    >>> @weighted_loss
+    >>> def l1_loss(pred, target):
+    >>>     return (pred - target).abs()
+
+    >>> pred = torch.Tensor([0, 2, 3])
+    >>> target = torch.Tensor([1, 1, 1])
+    >>> weight = torch.Tensor([1, 0, 1])
+
+    >>> l1_loss(pred, target)
+    tensor(1.3333)
+    >>> l1_loss(pred, target, weight)
+    tensor(1.)
+    >>> l1_loss(pred, target, reduction='none')
+    tensor([1., 1., 2.])
+    >>> l1_loss(pred, target, weight, avg_factor=2)
+    tensor(1.5000)
+    """
+
+    @functools.wraps(loss_func)
+    def wrapper(pred,
+                target,
+                weight=None,
+                reduction='mean',
+                avg_factor=None,
+                **kwargs):
+        # get element-wise loss
+        loss = loss_func(pred, target, **kwargs)
+        loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
+        return loss
+
+    return wrapper
+
+
+@weighted_loss
+def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0):
+    """`Focal Loss <https://arxiv.org/abs/1708.02002>`_ for targets in gaussian
+    distribution.
+
+    Args:
+        pred (torch.Tensor): The prediction.
+        gaussian_target (torch.Tensor): The learning target of the prediction
+            in gaussian distribution.
+        alpha (float, optional): A balanced form for Focal Loss.
+            Defaults to 2.0.
+        gamma (float, optional): The gamma for calculating the modulating
+            factor. Defaults to 4.0.
+    """
+    eps = 1e-12
+    device = pred.device
+    pos_weights = gaussian_target.eq(1)
+    pos_weights = pos_weights.to(device)
+    neg_weights = (1 - gaussian_target).pow(gamma)
+    neg_weights = neg_weights.to(device)
+    pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights
+    neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights
+    return pos_loss + neg_loss
+
+@weighted_loss
+def l1_loss(pred, target):
+    """L1 loss.
+
+    Args:
+        pred (torch.Tensor): The prediction.
+        target (torch.Tensor): The learning target of the prediction.
+
+    Returns:
+        torch.Tensor: Calculated loss
+    """
+    device = pred.device
+    target = target.to(device)
+    assert pred.size() == target.size() and target.numel() > 0
+    loss = torch.abs(pred - target)
+    return loss
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/center_point_loss_multiclass.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/center_point_loss_multiclass.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3fc1669a30939b8565903adfe980fc6fa94016b
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/center_point_loss_multiclass.py
@@ -0,0 +1,755 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import math
+
+
+class GaussianFocalLoss(nn.Module):
+    """GaussianFocalLoss is a variant of focal loss.
+
+    More details can be found in the `paper
+    <https://arxiv.org/abs/1808.01244>`_
+    Code is modified from `kp_utils.py
+    <https://github.com/princeton-vl/CornerNet/blob/master/models/py_utils/kp_utils.py#L152>`_  # noqa: E501
+    Please notice that the target in GaussianFocalLoss is a gaussian heatmap,
+    not 0/1 binary target.
+
+    Args:
+        alpha (float): Power of prediction.
+        gamma (float): Power of target for negtive samples.
+        reduction (str): Options are "none", "mean" and "sum".
+        loss_weight (float): Loss weight of current loss.
+    """
+
+    def __init__(self,
+                 alpha=2.0,
+                 gamma=4.0,
+                 reduction='mean',
+                 loss_weight=1.0):
+        super(GaussianFocalLoss, self).__init__()
+        self.alpha = alpha
+        self.gamma = gamma
+        self.reduction = reduction
+        self.loss_weight = loss_weight
+
+    def forward(self,
+                pred,
+                target,
+                weight=None,
+                avg_factor=None,
+                reduction_override=None):
+        """Forward function.
+
+        Args:
+            pred (torch.Tensor): The prediction.
+            target (torch.Tensor): The learning target of the prediction
+                in gaussian distribution.
+            weight (torch.Tensor, optional): The weight of loss for each
+                prediction. Defaults to None.
+            avg_factor (int, optional): Average factor that is used to average
+                the loss. Defaults to None.
+            reduction_override (str, optional): The reduction method used to
+                override the original reduction method of the loss.
+                Defaults to None.
+        """
+        assert reduction_override in (None, 'none', 'mean', 'sum')
+        reduction = (
+            reduction_override if reduction_override else self.reduction)
+        loss_reg = self.loss_weight * gaussian_focal_loss(
+            pred,  # [11, 1, 100, 100]
+            target,  # [11, 1, 100, 100]
+            weight,
+            alpha=self.alpha,
+            gamma=self.gamma,
+            reduction=reduction,
+            avg_factor=avg_factor)
+        return loss_reg
+
+def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0):
+    """`Focal Loss <https://arxiv.org/abs/1708.02002>`_ for targets in gaussian
+    distribution.
+
+    Args:
+        pred (torch.Tensor): The prediction.
+        gaussian_target (torch.Tensor): The learning target of the prediction
+            in gaussian distribution.
+        alpha (float, optional): A balanced form for Focal Loss.
+            Defaults to 2.0.
+        gamma (float, optional): The gamma for calculating the modulating
+            factor. Defaults to 4.0.
+    """
+    eps = 1e-12
+    device = pred.device
+    pos_weights = gaussian_target.eq(1)
+    pos_weights = pos_weights.to(device)
+    neg_weights = (1 - gaussian_target).pow(gamma)
+    neg_weights = neg_weights.to(device)
+    pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights
+    neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights
+    return pos_loss + neg_loss
+
+
+def clip_sigmoid(x, eps=1e-4):
+    """Sigmoid function for input feature.
+
+    Args:
+        x (torch.Tensor): Input feature map with the shape of [B, N, H, W].
+        eps (float): Lower bound of the range to be clamped to. Defaults
+            to 1e-4.
+
+    Returns:
+        torch.Tensor: Feature map after sigmoid.
+    """
+    y = torch.clamp(torch.sigmoid(x), min=eps, max=1 - eps)
+    # y = torch.clamp(x.sigmoid_(), min=eps, max=1 - eps)
+    return y
+
+def _gather_feat(feat, ind, mask=None):
+    # feat : [bs, wxh, c]
+    dim  = feat.size(2)  
+    # ind : [bs, index, c]
+    ind  = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
+    feat = feat.gather(1, ind)  # 按照dim=1获取ind
+    if mask is not None:
+        mask = mask.unsqueeze(2).expand_as(feat)
+        feat = feat[mask]
+        feat = feat.view(-1, dim)
+    return feat
+
+
+def _transpose_and_gather_feat(feat, ind):
+    feat = feat.permute(0, 2, 3, 1).contiguous()   ## # from [bs c h w] to [bs, h, w, c] 
+    feat = feat.view(feat.size(0), -1, feat.size(3))  # to [bs, wxh, c]
+    feat = _gather_feat(feat, ind)
+    return feat
+
+
+
+class RegLoss(nn.Module):
+    '''Regression loss for an output tensor
+        Arguments:
+        output (batch x dim x h x w)
+        mask (batch x max_objects)
+        ind (batch x max_objects)
+        target (batch x max_objects x dim)
+    '''
+    def __init__(self):
+        super(RegLoss, self).__init__()
+    
+    def forward(self, output, mask, ind, target):
+        pred = _transpose_and_gather_feat(output, ind)
+        mask = mask.float().unsqueeze(2) 
+
+        loss = F.l1_loss(pred*mask, target*mask, reduction='none')
+        loss = loss / (mask.sum() + 1e-4)
+        loss = loss.transpose(2 ,0).sum(dim=2).sum(dim=1)
+        return loss
+
+
+
+class FastFocalLoss(nn.Module):
+    '''
+    Reimplemented focal loss, exactly the same as the CornerNet version.
+    Faster and costs much less memory.
+    '''
+    def __init__(self):
+        super(FastFocalLoss, self).__init__()
+
+    def forward(self, out, target, ind, mask, cat):
+        '''
+        Arguments:
+        out, target: B x C x H x W
+        ind, mask: B x M
+        cat (category id for peaks): B x M
+        '''
+        mask = mask.float()
+        gt = torch.pow(1 - target, 4)
+        neg_loss = torch.log(1 - out) * torch.pow(out, 2) * gt
+        neg_loss = neg_loss.sum()
+
+        pos_pred_pix = _transpose_and_gather_feat(out, ind) # B x M x C
+        pos_pred = pos_pred_pix.gather(2, cat.unsqueeze(2)) # B x M
+        num_pos = mask.sum()
+        pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) * \
+                mask.unsqueeze(2)
+        pos_loss = pos_loss.sum()
+        if num_pos == 0:
+            return - neg_loss
+        return - (pos_loss + neg_loss) / num_pos
+
+class CenterPointLossmulticlass(nn.Module):
+    def __init__(self, args):
+        super(CenterPointLossmulticlass, self).__init__()
+
+        self.cls_weight = args['cls_weight']
+        self.loc_weight = args['loc_weight']
+        self.code_weights = args['code_weights']
+        self.target_cfg = args['target_assigner_config']
+        self.lidar_range = self.target_cfg['cav_lidar_range']
+        self.voxel_size = self.target_cfg['voxel_size']
+
+        self.loss_cls = GaussianFocalLoss(reduction='mean')
+        self.crit = FastFocalLoss()
+        self.crit_reg = RegLoss()
+
+        self.loss_dict = {}
+
+        self.detail_loss = {}
+        self.cls_output = {}
+
+    def forward(self, output_dict, target_dict, suffix=""):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        # Predictions 
+        box_preds = output_dict['bbox_preds{}'.format(suffix)].permute(0, 2, 3, 1).contiguous()  # [B, H, W, C*K] C=8
+        cls_preds = clip_sigmoid(output_dict['cls_preds{}'.format(suffix)])   # [B, 1, H, W] -> [B, 1*K, H, W]
+        
+        
+        # GTs
+        bbox_center_all = target_dict['object_bbx_center{}'.format(suffix)].cpu().numpy() # (4,100,7) -> (4,3,100,7)
+        bbox_mask_all = target_dict['object_bbx_mask{}'.format(suffix)].cpu().numpy() # (4,100) -> (4,3,100)
+        batch_size = bbox_mask_all.shape[0]
+        num_class = bbox_center_all.shape[1]
+        cls_gt_list = []
+        box_gt_list = []
+        for i in range(num_class):
+
+            bbox_center = bbox_center_all[:,i,:,:]
+            bbox_mask = bbox_mask_all[:,i,:]
+
+            max_gt = int(max(bbox_mask.sum(axis=1)))
+            gt_boxes3d = np.zeros((batch_size, max_gt, bbox_center[0].shape[-1]), dtype=np.float32)  # [B, max_anchor_num, 7]
+            for k in range(batch_size):
+                gt_boxes3d[k, :int(bbox_mask[k].sum()), :] = bbox_center[k, :int(bbox_mask[k].sum()), :]
+            gt_boxes3d = torch.from_numpy(gt_boxes3d).to(box_preds.device)
+
+            targets_dict = self.assign_targets(
+                gt_boxes=gt_boxes3d   #    [B, max_anchor_num, 7 + C ]      heatmap [2,1,h,w]  anno_boxes [2,100,8] inds [2, 100]
+            )
+
+            cls_gt_list.append(targets_dict['heatmaps']) # [B, 1, H, W]
+            box_gt_list.append((targets_dict['anno_boxes'], targets_dict['inds'], targets_dict['masks']))
+
+        cls_gt = torch.stack(cls_gt_list, dim=1)
+        cls_preds = cls_preds.unsqueeze(2)
+
+        cls_loss = self.get_cls_layer_loss(cls_preds, cls_gt)
+
+        box_loss = 0
+        box_preds = box_preds.view(box_preds.shape[0], box_preds.shape[1], box_preds.shape[2], int(box_preds.shape[3]/8), 8)
+
+        dim_list = []
+        for i in range(num_class):
+            box_gt = box_gt_list[i]
+            loss_all, loss_dim = self.get_box_reg_layer_loss(box_preds[:,:,:,i,:], box_gt)
+            box_loss += loss_all
+            # box_loss += self.get_box_reg_layer_loss(box_preds[:,:,:,i,:], box_gt)
+            self.detail_loss.update({'box_loss_{}'.format(i): loss_dim})
+
+        rpn_loss = cls_loss + box_loss
+
+        self.loss_dict.update({ 'total_loss': rpn_loss.item(),
+                                'reg_loss': box_loss.item(),
+                                'cls_loss': cls_loss.item()})
+
+        self.cls_output.update({'output_cls_{}'.format(suffix): cls_preds,
+                                'target_cls_{}'.format(suffix): cls_gt})
+
+
+        return rpn_loss
+  
+    def logging(self, epoch, batch_id, batch_len, writer = None, suffix=""):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict.get('total_loss', 0)
+        reg_loss = self.loss_dict.get('reg_loss', 0)
+        cls_loss = self.loss_dict.get('cls_loss', 0)
+        
+        print("[epoch %d][%d/%d]%s, || Loss: %.4f || Conf Loss: %.4f"
+                    " || Loc Loss: %.4f" % (
+                        epoch, batch_id + 1, batch_len, suffix,
+                        total_loss, cls_loss, reg_loss))
+
+        # for i in range(3):
+        #     print('class {} reg loss: '.format(i), self.detail_loss['box_loss_{}'.format(i)])
+
+        if not writer is None:
+            writer.add_scalar('Regression_loss', reg_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Confidence_loss', cls_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Total_loss', total_loss,
+                            epoch*batch_len + batch_id)                            
+
+            for key in self.detail_loss.keys():
+                loss_list = self.detail_loss.get(key, [])
+                if len(loss_list) > 0:
+                    for k in range(len(loss_list)):
+                        writer.add_scalar('{}_dim{}'.format(key,k), loss_list[k],
+                                        epoch*batch_len + batch_id)
+
+            # for i in range(self.cls_output['output_cls_{}'.format(suffix)].shape[0]):
+            #     writer.add_image('agent_{}_{}_output'.format(i, suffix), self.cls_output['output_cls_{}'.format(suffix)][i, 0:1, 0], 1, dataformats='CHW')
+            #     writer.add_image('agent_{}_{}_target'.format(i, suffix), self.cls_output['target_cls_{}'.format(suffix)][i, 0:1, 0], 1, dataformats='CHW')
+
+                        
+
+    def get_cls_layer_loss(self, pred_heatmaps, gt_heatmaps):
+        num_pos = gt_heatmaps.eq(1).float().sum().item()
+
+        cls_loss = self.loss_cls(
+            pred_heatmaps,
+            gt_heatmaps,
+            avg_factor=max(num_pos, 1))
+
+        cls_loss = cls_loss * self.cls_weight
+        return cls_loss
+
+
+    def _gather_feat(self, feat, ind, mask=None):
+        """Gather feature map.
+
+        Given feature map and index, return indexed feature map.
+
+        Args:
+            feat (torch.tensor): Feature map with the shape of [B, H*W, 10].
+            ind (torch.Tensor): Index of the ground truth boxes with the
+                shape of [B, max_obj].
+            mask (torch.Tensor): Mask of the feature map with the shape
+                of [B, max_obj]. Default: None.
+
+        Returns:
+            torch.Tensor: Feature map after gathering with the shape
+                of [B, max_obj, 10].
+        """
+        device = feat.device  
+        dim = feat.size(2)
+        ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)   # 把 ind 和 dim 拼接在一起
+        feat = feat.gather(1, ind.to(device))
+        if mask is not None:
+            mask = mask.unsqueeze(2).expand_as(feat)
+            feat = feat[mask]
+            feat = feat.view(-1, dim)
+        return feat
+
+
+    def get_box_reg_layer_loss(self, bbox_preds, bbox_gt):
+        target_box, inds, masks = bbox_gt
+        pred = bbox_preds
+        ind = inds
+        num = masks.float().sum()
+        pred = pred.view(pred.size(0), -1, pred.size(3))     # [n, h*w, 8 ]
+        pred = self._gather_feat(pred, ind)
+        mask = masks.unsqueeze(2).expand_as(target_box).float()  ## 把 mask 的维度进行扩展
+        isnotnan = (~torch.isnan(target_box)).float()
+        mask *= isnotnan
+
+        code_weights = self.code_weights
+        bbox_weights = mask * mask.new_tensor(code_weights)
+        ## pred, target_box [4,100,8]
+        loc_loss = l1_loss(
+            pred, target_box, bbox_weights, avg_factor=(num + 1e-4))
+
+        loc_loss = loc_loss * self.loc_weight
+
+        loss_dim = []
+        import copy
+        for i in range(8):
+            code_weights2 = code_weights.copy()
+            for j in range(8):
+                if j != i:
+                    code_weights2[j] = 0
+            bbox_weights = mask * mask.new_tensor(code_weights2)
+
+            loc_loss_2 = l1_loss(
+                pred, target_box, bbox_weights, avg_factor=(num + 1e-4))
+            loc_loss_single = loc_loss_2 * self.loc_weight
+
+            loss_dim.append(loc_loss_single.item())
+
+        return loc_loss, loss_dim
+
+    def assign_targets(self, gt_boxes):
+        """Generate targets.
+
+        Args:
+            gt_boxes: ( M, 7+c) box + cls   ## 这个地方函数和centerpoint-kitti 那个不太一样，这里是分开进行计算的 
+
+        Returns:
+            Returns:
+                tuple[list[torch.Tensor]]: Tuple of target including \
+                    the following results in order.
+
+                    - list[torch.Tensor]: Heatmap scores.
+                    - list[torch.Tensor]: Ground truth boxes.
+                    - list[torch.Tensor]: Indexes indicating the \
+                        position of the valid boxes.
+                    - list[torch.Tensor]: Masks indicating which \
+                        boxes are valid.
+        """
+        if gt_boxes.shape[-1] == 8:
+            gt_bboxes_3d, gt_labels_3d = gt_boxes[..., :-1], gt_boxes[..., -1]    # gt_box [2,14,8] batch_size * bbox_num * 8
+            heatmaps, anno_boxes, inds, masks = self.get_targets_single(gt_bboxes_3d, gt_labels_3d)
+        elif gt_boxes.shape[-1] == 7:
+            gt_bboxes_3d = gt_boxes
+            heatmaps, anno_boxes, inds, masks = self.get_targets_single(gt_bboxes_3d)
+
+        # transpose heatmaps, because the dimension of tensors in each task is
+        # different, we have to use numpy instead of torch to do the transpose.
+        # heatmaps = np.array(heatmaps).transpose(1, 0).tolist()
+        # heatmaps = [torch.stack(hms_) for hms_ in heatmaps]
+        # # heatmaps = torch.from_numpy(np.array(heatmaps))
+        # # transpose anno_boxes
+        # anno_boxes = np.array(anno_boxes).transpose(1, 0).tolist()
+        # anno_boxes = [torch.stack(anno_boxes_) for anno_boxes_ in anno_boxes]
+        # # transpose inds
+        # inds = np.array(inds).transpose(1, 0).tolist()
+        # inds = [torch.stack(inds_) for inds_ in inds]
+        # # transpose inds
+        # masks = np.array(masks).transpose(1, 0).tolist()
+        # masks = [torch.stack(masks_) for masks_ in masks]
+
+        all_targets_dict = {
+            'heatmaps': heatmaps,
+            'anno_boxes': anno_boxes,
+            'inds': inds,
+            'masks': masks
+        }
+        
+        return all_targets_dict
+
+
+    def get_targets_single(self, gt_bbox_3d, gt_labels_3d=None):
+        
+        batch_size = gt_bbox_3d.shape[0]
+        device = gt_bbox_3d.device
+        max_objs = self.target_cfg['max_objs']
+        pc_range = self.lidar_range
+        voxel_size = self.voxel_size
+
+        grid_size = (np.array(self.lidar_range[3:6]) -
+                     np.array(self.lidar_range[0:3])) / np.array(self.voxel_size)
+        grid_size = np.round(grid_size).astype(np.int64)
+        feature_map_size = grid_size[:2] // self.target_cfg['out_size_factor']
+
+        draw_gaussian = draw_heatmap_gaussian
+        heatmaps, anno_boxes, inds, masks = [], [], [], []
+
+        for batch in range(batch_size):
+            task_boxes = gt_bbox_3d[batch, :, :]
+            if not gt_labels_3d is None:
+                task_classes = gt_labels_3d[batch, :]
+
+            heatmap = gt_bbox_3d.new_zeros(    # 辅助gt_bboxes_3d的属性
+                (1, feature_map_size[1],feature_map_size[0])) 
+
+            anno_box = gt_bbox_3d.new_zeros((max_objs, 8), 
+                                            dtype = torch.float32)
+            
+            ind = gt_bbox_3d.new_zeros((max_objs), dtype=torch.int64)
+            mask = gt_bbox_3d.new_zeros((max_objs), dtype=torch.uint8)
+
+            num_objs = min(task_boxes.shape[0], max_objs)
+        
+            for k in range(num_objs):
+                # 计算x的heatmap坐标
+                coor_x = (task_boxes[k][0] - pc_range[0]) / voxel_size[0] / self.target_cfg['out_size_factor']
+                coor_y = (task_boxes[k][1] - pc_range[1]) / voxel_size[1] / self.target_cfg['out_size_factor']
+                coor_z = (task_boxes[k][2] - pc_range[2]) / voxel_size[2] / self.target_cfg['out_size_factor']
+                h = task_boxes[k][3] / voxel_size[0] / self.target_cfg['out_size_factor']
+                w = task_boxes[k][4] / voxel_size[1] / self.target_cfg['out_size_factor']
+                l = task_boxes[k][5] / voxel_size[2] / self.target_cfg['out_size_factor']
+                rot = task_boxes[k][6]
+
+                if h > 0 and w > 0:
+                    radius = gaussian_radius(
+                        (h, w),
+                        min_overlap=self.target_cfg['gaussian_overlap'])
+                    radius = max(self.target_cfg['min_radius'], int(radius))
+
+                    center = torch.tensor([coor_x, coor_y],
+                                        dtype=torch.float32,
+                                        device=device)
+                    center_int = center.to(torch.int32)   ## bbox 的中心在heatmap 中的位置
+
+                    # throw out not in range objects to avoid out of array
+                    # area when creating the heatmap
+                    if not (0 <= center_int[0] < feature_map_size[0].item()
+                            and 0 <= center_int[1] < feature_map_size[1].item()):
+                        continue
+
+                    draw_gaussian(heatmap[0], center_int, radius) 
+                    
+                    x, y = center_int[0], center_int[1]
+                    assert (center_int[1] * feature_map_size[0] + center_int[0] <
+                                        feature_map_size[0] * feature_map_size[1])
+                    ind[k] = y * feature_map_size[0] + x
+                    mask[k] = 1
+                    # box_dim = task_boxes[k][3:6]
+                    # box_dim = box_dim.log()
+                    box_dim = torch.cat([h.unsqueeze(0), w.unsqueeze(0), l.unsqueeze(0)], dim=0)
+                    anno_box[k] = torch.cat([
+                        center - torch.tensor([x, y], device=device),
+                        coor_z.unsqueeze(0), box_dim,
+                        torch.sin(rot).unsqueeze(0),
+                        torch.cos(rot).unsqueeze(0),
+                    ])   # [x,y,z, w, h, l, sin(heading), cos(heading)]
+
+            heatmaps.append(heatmap)
+            anno_boxes.append(anno_box)
+            inds.append(ind)
+            masks.append(mask)
+            # import cv2; cv2.imwrite('test_{}.png'.format(batch), heatmap.cpu().numpy()[0]*255)
+        heatmaps = torch.stack(heatmaps)
+        anno_boxes = torch.stack(anno_boxes)
+        inds = torch.stack(inds)
+        masks = torch.stack(masks)
+        return heatmaps, anno_boxes, inds, masks  # [B, H, W]
+
+
+def gaussian_2d(shape, sigma=1):
+    """Generate gaussian map.
+
+    Args:
+        shape (list[int]): Shape of the map.
+        sigma (float): Sigma to generate gaussian map.
+            Defaults to 1.
+
+    Returns:
+        np.ndarray: Generated gaussian map.
+    """
+    m, n = [(ss - 1.) / 2. for ss in shape]
+    y, x = np.ogrid[-m:m + 1, -n:n + 1]
+
+    h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
+    h[h < np.finfo(h.dtype).eps * h.max()] = 0
+    return h
+
+
+ 
+def draw_heatmap_gaussian(heatmap, center, radius, k=1):
+    """Get gaussian masked heatmap.
+
+    Args:
+        heatmap (torch.Tensor): Heatmap to be masked.
+        center (torch.Tensor): Center coord of the heatmap.
+        radius (int): Radius of gausian.
+        K (int): Multiple of masked_gaussian. Defaults to 1.
+
+    Returns:
+        torch.Tensor: Masked heatmap.
+    """
+    diameter = 2 * radius + 1
+    gaussian = gaussian_2d((diameter, diameter), sigma=diameter / 6)
+
+    x, y = int(center[0]), int(center[1])
+
+    height, width = heatmap.shape[0:2]
+
+    left, right = min(x, radius), min(width - x, radius + 1)
+    top, bottom = min(y, radius), min(height - y, radius + 1)
+
+    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
+    masked_gaussian = torch.from_numpy(
+        gaussian[radius - top:radius + bottom,
+                 radius - left:radius + right]).to(heatmap.device,
+                                                   torch.float32)
+    if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
+        torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
+    return heatmap
+
+
+
+def gaussian_radius(det_size, min_overlap=0.5):
+    """Get radius of gaussian.
+
+    Args:
+        det_size (tuple[torch.Tensor]): Size of the detection result.
+        min_overlap (float): Gaussian_overlap. Defaults to 0.5.
+
+    Returns:
+        torch.Tensor: Computed radius.
+    """
+    height, width = det_size
+
+    a1 = 1
+    b1 = (height + width)
+    c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
+    sq1 = torch.sqrt(b1**2 - 4 * a1 * c1)
+    r1 = (b1 + sq1) / (2 * a1)
+
+    a2 = 4
+    b2 = 2 * (height + width)
+    c2 = (1 - min_overlap) * width * height
+    sq2 = torch.sqrt(b2**2 - 4 * a2 * c2)
+    r2 = (b2 + sq2) / (2 * a2)
+
+    a3 = 4 * min_overlap
+    b3 = -2 * min_overlap * (height + width)
+    c3 = (min_overlap - 1) * width * height
+    sq3 = torch.sqrt(b3**2 - 4 * a3 * c3)
+    r3 = (b3 + sq3) / (2 * a3)
+    return min(r1, r2, r3)
+
+
+
+import functools
+
+import torch.nn.functional as F
+
+
+def reduce_loss(loss, reduction):
+    """Reduce loss as specified.
+
+    Args:
+        loss (Tensor): Elementwise loss tensor.
+        reduction (str): Options are "none", "mean" and "sum".
+
+    Return:
+        Tensor: Reduced loss tensor.
+    """
+    reduction_enum = F._Reduction.get_enum(reduction)
+    # none: 0, elementwise_mean:1, sum: 2
+    if reduction_enum == 0:
+        return loss
+    elif reduction_enum == 1:
+        return loss.mean()
+    elif reduction_enum == 2:
+        return loss.sum()
+
+
+def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
+    """Apply element-wise weight and reduce loss.
+
+    Args:
+        loss (Tensor): Element-wise loss.
+        weight (Tensor): Element-wise weights.
+        reduction (str): Same as built-in losses of PyTorch.
+        avg_factor (float): Avarage factor when computing the mean of losses.
+
+    Returns:
+        Tensor: Processed loss values.
+    """
+    # if weight is specified, apply element-wise weight
+    
+    if weight is not None:
+        device = loss.device
+        weight = weight.to(device)
+        loss = loss * weight
+
+    # if avg_factor is not specified, just reduce the loss
+    if avg_factor is None:
+        loss = reduce_loss(loss, reduction)
+    else:
+        # if reduction is mean, then average the loss by avg_factor
+        if reduction == 'mean':
+            loss = loss.sum() / avg_factor
+        # if reduction is 'none', then do nothing, otherwise raise an error
+        elif reduction != 'none':
+            raise ValueError('avg_factor can not be used with reduction="sum"')
+    return loss
+
+
+def weighted_loss(loss_func):
+    """Create a weighted version of a given loss function.
+
+    To use this decorator, the loss function must have the signature like
+    `loss_func(pred, target, **kwargs)`. The function only needs to compute
+    element-wise loss without any reduction. This decorator will add weight
+    and reduction arguments to the function. The decorated function will have
+    the signature like `loss_func(pred, target, weight=None, reduction='mean',
+    avg_factor=None, **kwargs)`.
+
+    :Example:
+
+    >>> import torch
+    >>> @weighted_loss
+    >>> def l1_loss(pred, target):
+    >>>     return (pred - target).abs()
+
+    >>> pred = torch.Tensor([0, 2, 3])
+    >>> target = torch.Tensor([1, 1, 1])
+    >>> weight = torch.Tensor([1, 0, 1])
+
+    >>> l1_loss(pred, target)
+    tensor(1.3333)
+    >>> l1_loss(pred, target, weight)
+    tensor(1.)
+    >>> l1_loss(pred, target, reduction='none')
+    tensor([1., 1., 2.])
+    >>> l1_loss(pred, target, weight, avg_factor=2)
+    tensor(1.5000)
+    """
+
+    @functools.wraps(loss_func)
+    def wrapper(pred,
+                target,
+                weight=None,
+                reduction='mean',
+                avg_factor=None,
+                **kwargs):
+        # get element-wise loss
+        loss = loss_func(pred, target, **kwargs)
+        loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
+        return loss
+
+    return wrapper
+
+
+@weighted_loss
+def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0):
+    """`Focal Loss <https://arxiv.org/abs/1708.02002>`_ for targets in gaussian
+    distribution.
+
+    Args:
+        pred (torch.Tensor): The prediction.
+        gaussian_target (torch.Tensor): The learning target of the prediction
+            in gaussian distribution.
+        alpha (float, optional): A balanced form for Focal Loss.
+            Defaults to 2.0.
+        gamma (float, optional): The gamma for calculating the modulating
+            factor. Defaults to 4.0.
+    """
+    eps = 1e-12
+    device = pred.device
+    pos_weights = gaussian_target.eq(1)
+    pos_weights = pos_weights.to(device)
+    neg_weights = (1 - gaussian_target).pow(gamma)
+    neg_weights = neg_weights.to(device)
+    pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights
+    neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights
+    return pos_loss + neg_loss
+
+@weighted_loss
+def l1_loss(pred, target):
+    """L1 loss.
+
+    Args:
+        pred (torch.Tensor): The prediction.
+        target (torch.Tensor): The learning target of the prediction.
+
+    Returns:
+        torch.Tensor: Calculated loss
+    """
+    device = pred.device
+    target = target.to(device)
+    assert pred.size() == target.size() and target.numel() > 0
+    loss = torch.abs(pred - target)
+    return loss
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/ciassd_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/ciassd_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..46e2bf79449c5eb8ac65ca609498cb283f8c1305
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/ciassd_loss.py
@@ -0,0 +1,254 @@
+import torch
+import torch.nn as nn
+import numpy as np
+from opencood.utils.common_utils import limit_period
+from opencood.data_utils.post_processor.voxel_postprocessor import VoxelPostprocessor
+from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import aligned_boxes_iou3d_gpu
+from icecream import ic
+
+class CiassdLoss(nn.Module):
+    def __init__(self, args, keyname='stage1_out'):
+        super(CiassdLoss, self).__init__()
+        self.pos_cls_weight = args['pos_cls_weight']
+        self.encode_rad_error_by_sin = args['encode_rad_error_by_sin']
+        self.cls = args['cls']
+        self.reg = args['reg']
+        self.dir = args['dir']
+        self.iou = None if 'iou' not in args else args['iou']
+        self.keyname = keyname
+        self.loss_dict = {}
+        ##
+        self.num_cls = 2
+        self.box_codesize = 7
+
+    def forward(self, output_dict, label_dict):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        preds_dict = output_dict[self.keyname]
+
+        if 'stage1' in label_dict.keys():
+            target_dict = label_dict['stage1']
+        else: # for PointPillars
+            target_dict = label_dict
+
+        if 'record_len' in output_dict:
+            batch_size = int(output_dict['record_len'].sum())
+        else:
+            batch_size = output_dict['batch_size']
+
+        cls_labls = target_dict['pos_equal_one'].view(batch_size, -1,  self.num_cls - 1)
+        positives = cls_labls > 0
+        negatives = target_dict['neg_equal_one'].view(batch_size, -1,  self.num_cls - 1) > 0
+        cared = torch.logical_or(positives, negatives)
+        cls_labls = cls_labls * cared.type_as(cls_labls)
+        # num_normalizer = cared.sum(1, keepdim=True)
+        pos_normalizer = positives.sum(1, keepdim=True).float()
+
+        # cls loss
+        cls_preds = preds_dict["cls_preds"].permute(0, 2, 3, 1).contiguous() \
+                    .view(batch_size, -1,  self.num_cls - 1)
+        cls_weights = positives * self.pos_cls_weight + negatives * 1.0
+        cls_weights /= torch.clamp(pos_normalizer, min=1.0)
+        cls_loss = sigmoid_focal_loss(cls_preds, cls_labls, weights=cls_weights, **self.cls)
+        cls_loss_reduced = cls_loss.sum() * self.cls['weight'] / batch_size
+
+        # reg loss
+        reg_weights = positives / torch.clamp(pos_normalizer, min=1.0)
+        reg_preds = preds_dict['reg_preds'].permute(0, 2, 3, 1).contiguous().view(batch_size, -1, self.box_codesize)
+        reg_targets = target_dict['targets'].view(batch_size, -1, self.box_codesize)
+        if self.encode_rad_error_by_sin:
+            reg_preds, reg_targets = add_sin_difference(reg_preds, reg_targets)
+        reg_loss = weighted_smooth_l1_loss(reg_preds, reg_targets, weights=reg_weights, sigma=self.reg['sigma'])
+        reg_loss_reduced = reg_loss.sum() * self.reg['weight'] / batch_size
+
+
+        # dir loss
+        dir_targets = self.get_direction_target(target_dict['targets'].view(batch_size, -1, self.box_codesize))
+        dir_logits = preds_dict[f"dir_preds"].permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2) # [N, H*W*#anchor, 2]
+
+        dir_loss = softmax_cross_entropy_with_logits(dir_logits.view(-1, self.anchor_num), dir_targets.view(-1, self.anchor_num)) 
+        dir_loss = dir_loss.flatten() * reg_weights.flatten() # [N, H*W*anchor_num]
+        dir_loss_reduced = dir_loss.sum() * self.dir['weight'] / batch_size
+
+        loss = cls_loss_reduced + reg_loss_reduced + dir_loss_reduced
+
+        # iou loss
+        if self.iou is not None:
+            iou_preds = preds_dict["iou_preds"].permute(0, 2, 3, 1).contiguous()
+            pos_pred_mask = reg_weights.squeeze(dim=-1) > 0 # (4, 70400)
+            iou_pos_preds = iou_preds.view(batch_size, -1)[pos_pred_mask]
+            boxes3d_pred = VoxelPostprocessor.delta_to_boxes3d(preds_dict['reg_preds'].permute(0, 2, 3, 1).contiguous().detach(),
+                                                            output_dict['anchor_box'])[pos_pred_mask]
+            boxes3d_tgt = VoxelPostprocessor.delta_to_boxes3d(target_dict['targets'],
+                                                            output_dict['anchor_box'])[pos_pred_mask]
+
+            iou_weights = reg_weights[pos_pred_mask].view(-1)
+            iou_pos_targets = aligned_boxes_iou3d_gpu(boxes3d_pred.float()[:, [0, 1, 2, 5, 4, 3, 6]],
+                                                    boxes3d_tgt.float()[:, [0, 1, 2, 5, 4, 3, 6]]).detach().squeeze()
+            iou_pos_targets = 2 * iou_pos_targets.view(-1) - 1
+            iou_loss = weighted_smooth_l1_loss(iou_pos_preds, iou_pos_targets, weights=iou_weights, sigma=self.iou['sigma'])
+            iou_loss_reduced = iou_loss.sum() * self.iou['weight'] / batch_size
+
+            loss += iou_loss_reduced
+            self.loss_dict.update({
+                'iou_loss': iou_loss_reduced
+            })
+        
+        
+        self.loss_dict.update({
+            'total_loss': loss,
+            'cls_loss': cls_loss_reduced,
+            'reg_loss': reg_loss_reduced,
+            'dir_loss': dir_loss_reduced,
+        })
+
+        return loss
+
+    def logging(self, epoch, batch_id, batch_len, writer = None):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict['total_loss']
+        reg_loss = self.loss_dict['reg_loss']
+        cls_loss = self.loss_dict['cls_loss']
+        dir_loss = self.loss_dict['dir_loss']
+        if 'iou_loss' in self.loss_dict:
+            iou_loss = self.loss_dict['iou_loss']
+        if (batch_id + 1) % 10 == 0:
+            print("[epoch %d][%d/%d], || Loss: %.4f || Cls: %.4f"
+                  " || Loc: %.4f || Dir: %.4f || Iou: %.4f" % (
+                      epoch, batch_id + 1, batch_len,
+                      total_loss.item(), cls_loss.item(), reg_loss.item(), dir_loss.item(), iou_loss.item()))
+        if writer is not None:
+            writer.add_scalar('Regression_loss', reg_loss.item(),
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Confidence_loss', cls_loss.item(),
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Direction_loss', dir_loss.item(),
+                            epoch*batch_len + batch_id)
+            if 'iou_loss' in self.loss_dict:
+                writer.add_scalar('Iou_loss', iou_loss.item(),
+                                epoch*batch_len + batch_id)
+
+
+    def get_direction_target(self, reg_targets):
+        """
+        Args:
+            reg_targets:  [N, H * W * #anchor_num, 7]
+                The last term is (theta_gt - theta_a)
+        
+        Returns:
+            dir_targets:
+                theta_gt: [N, H * W * #anchor_num, NUM_BIN] 
+                NUM_BIN = 2
+        """
+        num_bins = self.dir['args']['num_bins']
+        dir_offset = self.dir['args']['dir_offset']
+        anchor_yaw = np.deg2rad(np.array(self.dir['args']['anchor_yaw']))  # for direction classification
+        self.anchor_yaw_map = torch.from_numpy(anchor_yaw).view(1,-1,1)  # [1,2,1]
+        self.anchor_num = self.anchor_yaw_map.shape[1]
+
+        H_times_W_times_anchor_num = reg_targets.shape[1]
+        anchor_map = self.anchor_yaw_map.repeat(1, H_times_W_times_anchor_num//self.anchor_num, 1).to(reg_targets.device) # [1, H * W * #anchor_num, 1]
+
+        rot_gt = reg_targets[..., -1] + anchor_map[..., -1] # [N, H*W*anchornum]
+        offset_rot = limit_period(rot_gt - dir_offset, 0, 2 * np.pi)
+        dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long()  # [N, H*W*anchornum]
+        dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=num_bins - 1)
+        # one_hot:
+        # if rot_gt > 0, then the label is 1, then the regression target is [0, 1]
+        dir_cls_targets = one_hot_f(dir_cls_targets, num_bins)
+        return dir_cls_targets
+
+
+
+def add_sin_difference(boxes1, boxes2):
+    rad_pred_encoding = torch.sin(boxes1[..., -1:]) * torch.cos(boxes2[..., -1:])   # ry -> sin(pred_ry)*cos(gt_ry)
+    rad_gt_encoding = torch.cos(boxes1[..., -1:]) * torch.sin(boxes2[..., -1:])     # ry -> cos(pred_ry)*sin(gt_ry)
+    res_boxes1 = torch.cat([boxes1[..., :-1], rad_pred_encoding], dim=-1)
+    res_boxes2 = torch.cat([boxes2[..., :-1], rad_gt_encoding], dim=-1)
+    return res_boxes1, res_boxes2
+
+
+def get_direction_target(reg_targets, anchors, one_hot=True, dir_offset=0.0):
+    """
+    Generate targets for bounding box direction classification.
+
+    Parameters
+    ----------
+    anchors: torch.Tensor
+        shape as (H*W*2, 7) or (H, W, 2, 7)
+    reg_targets: torch.Tensor
+        shape as (B, H*W*2, 7)
+
+    Returns
+    -------
+    dir_cls_targets : torch.Tensor
+        [batch_size, w*h*num_anchor_per_pos, 2]
+    """
+    batch_size = reg_targets.shape[0]
+    anchors = anchors.view(1,  -1, anchors.shape[-1]).repeat(batch_size, 1, 1)
+    rot_gt = reg_targets[..., -1] + anchors[..., -1]  # [4, 70400]
+    dir_cls_targets = ((rot_gt - dir_offset) > 0).long()  # [4, 70400]
+    if one_hot:
+        dir_cls_targets = one_hot_f(dir_cls_targets, 2, dtype=anchors.dtype)
+    return dir_cls_targets
+
+
+def one_hot_f(tensor, depth, dim=-1, on_value=1.0, dtype=torch.float32):
+    tensor_onehot = torch.zeros(*list(tensor.shape), depth, dtype=dtype, device=tensor.device) # [4, 70400, 2]
+    tensor_onehot.scatter_(dim, tensor.unsqueeze(dim).long(), on_value)                        # [4, 70400, 2]
+    return tensor_onehot
+
+
+def sigmoid_focal_loss(preds, targets, weights=None, **kwargs):
+    assert 'gamma' in kwargs and 'alpha' in kwargs
+    # sigmoid cross entropy with logits
+    # more details: https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
+    per_entry_cross_ent = torch.clamp(preds, min=0) - preds * targets.type_as(preds)
+    per_entry_cross_ent += torch.log1p(torch.exp(-torch.abs(preds)))
+    # focal loss
+    prediction_probabilities = torch.sigmoid(preds)
+    p_t = (targets * prediction_probabilities) + ((1 - targets) * (1 - prediction_probabilities))
+    modulating_factor = torch.pow(1.0 - p_t, kwargs['gamma'])
+    alpha_weight_factor = targets * kwargs['alpha'] + (1 - targets) * (1 - kwargs['alpha'])
+
+    loss = modulating_factor * alpha_weight_factor * per_entry_cross_ent
+    if weights is not None:
+        loss *= weights
+    return loss
+
+
+def softmax_cross_entropy_with_logits(logits, labels):
+    param = list(range(len(logits.shape)))
+    transpose_param = [0] + [param[-1]] + param[1:-1]
+    logits = logits.permute(*transpose_param)
+    loss_ftor = torch.nn.CrossEntropyLoss(reduction="none")
+    loss = loss_ftor(logits, labels.max(dim=-1)[1])
+    return loss
+
+
+def weighted_smooth_l1_loss(preds, targets, sigma=3.0, weights=None):
+    diff = preds - targets
+    abs_diff = torch.abs(diff)
+    abs_diff_lt_1 = torch.le(abs_diff, 1 / (sigma ** 2)).type_as(abs_diff)
+    loss = abs_diff_lt_1 * 0.5 * torch.pow(abs_diff * sigma, 2) + \
+               (abs_diff - 0.5 / (sigma ** 2)) * (1.0 - abs_diff_lt_1)
+    if weights is not None:
+        loss *= weights
+    return loss
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/fpvrcnn_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/fpvrcnn_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..d62c2d608094f040a9a84ca2b86cd51b47506db9
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/fpvrcnn_loss.py
@@ -0,0 +1,192 @@
+import torch
+from torch import nn
+import numpy as np
+from opencood.loss.ciassd_loss import CiassdLoss, weighted_smooth_l1_loss
+from icecream import ic 
+
+class FpvrcnnLoss(nn.Module):
+    def __init__(self, args):
+        super(FpvrcnnLoss, self).__init__()
+        self.ciassd_loss = CiassdLoss(args['stage1'])
+        self.cls = args['stage2']['cls']
+        self.reg = args['stage2']['reg']
+        self.iou = args['stage2']['iou']
+        self.loss_dict = {}
+
+    def forward(self, output_dict, label_dict):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        ciassd_loss = self.ciassd_loss(output_dict, label_dict)
+
+        # only update ciassd if no bbox is detected in the first stage
+        if 'stage2_out' not in output_dict:
+            self.loss_dict = {
+                'loss': ciassd_loss,
+            }
+            return ciassd_loss
+
+        # rcnn out
+        rcnn_cls = output_dict['stage2_out']['rcnn_cls'].view(1, -1, 1)
+        rcnn_iou = output_dict['stage2_out']['rcnn_iou'].view(1, -1, 1)
+        rcnn_reg = output_dict['stage2_out']['rcnn_reg'].view(1, -1, 7)
+
+        tgt_cls = output_dict['rcnn_label_dict']['cls_tgt'].view(1, -1, 1)
+        tgt_iou = output_dict['rcnn_label_dict']['iou_tgt'].view(1, -1, 1)
+        tgt_reg = output_dict['rcnn_label_dict']['reg_tgt'].view(1, -1, 7)
+
+        pos_norm = tgt_cls.sum()
+        # cls loss
+        loss_cls = weighted_sigmoid_binary_cross_entropy(rcnn_cls, tgt_cls)
+
+        
+        # iou loss
+        # TODO: also count the negative samples
+        tgt_iou = 2 * (tgt_iou - 0.5) # normalize to -1, 1
+        loss_iou = weighted_smooth_l1_loss(rcnn_iou, tgt_iou,
+                                           weights=tgt_cls).mean()
+
+        # regression loss
+        # [deprecated by Yifan Lu] Target resampling : Generate a weights mask to force the regressor concentrate on low iou predictions
+        # sample 50% with iou>0.7 and 50% < 0.7
+        weights = torch.ones(tgt_iou.shape, device=tgt_iou.device)
+        weights[tgt_cls == 0] = 0
+        # neg = torch.logical_and(tgt_iou < 0.7, tgt_cls != 0)
+        # pos = torch.logical_and(tgt_iou >= 0.7, tgt_cls != 0)
+        # num_neg = int(neg.sum(dim=1))
+        # num_pos = int(pos.sum(dim=1))
+        # num_pos_smps = max(num_neg, 2)
+        # pos_indices = torch.where(pos)[1]
+        # not_selsected = torch.randperm(num_pos)[:num_pos - num_pos_smps]
+        # # not_selsected_indices = pos_indices[not_selsected]
+        # weights[:, pos_indices[not_selsected]] = 0
+        loss_reg = weighted_smooth_l1_loss(rcnn_reg, tgt_reg,
+                                           weights=weights / max(weights.sum(),
+                                                                 1)).sum()
+
+        loss_cls_reduced = loss_cls * self.cls['weight']
+        loss_iou_reduced = loss_iou * self.iou['weight']
+        loss_reg_reduced = loss_reg * self.reg['weight']
+
+        # if torch.isnan(loss_reg_reduced):
+        #     print('debug')
+
+        rcnn_loss = loss_cls_reduced + loss_iou_reduced + loss_reg_reduced
+        loss = rcnn_loss + ciassd_loss
+
+        self.loss_dict.update({
+            'loss': loss,
+            'rcnn_loss': rcnn_loss,
+            'cls_loss': loss_cls_reduced,
+            'iou_loss': loss_iou_reduced,
+            'reg_loss': loss_reg_reduced,
+        })
+
+        return loss
+
+    def logging(self, epoch, batch_id, batch_len, writer=None):
+        """
+        Print out  the loss function for current iteration.
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        ciassd_loss_dict = self.ciassd_loss.loss_dict
+        ciassd_total_loss = ciassd_loss_dict['total_loss']
+        reg_loss = ciassd_loss_dict['reg_loss']
+        cls_loss = ciassd_loss_dict['cls_loss']
+        dir_loss = ciassd_loss_dict['dir_loss']
+        iou_loss = ciassd_loss_dict['iou_loss']
+
+        if (batch_id + 1) % 10 == 0:
+            str_to_print = "[epoch %d][%d/%d], || Loss: %.4f || Ciassd: %.4f " \
+                           "|| Cls1: %.4f || Loc1: %.4f || Dir1: %.4f || Iou1: %.4f" % (
+                               epoch, batch_id + 1, batch_len, self.loss_dict['loss'],
+                               ciassd_total_loss.item(), cls_loss.item(), reg_loss.item(),
+                               dir_loss.item(), iou_loss.item(),
+                               )
+            if 'rcnn_loss' in self.loss_dict:
+                str_to_print += " || Rcnn: %.4f || Cls2: %.4f || Loc2: %.4f || Iou2: %.4f" % (
+                        self.loss_dict['rcnn_loss'],
+                        self.loss_dict['cls_loss'].item(),
+                        self.loss_dict['reg_loss'].item(),
+                        self.loss_dict['iou_loss'].item(),
+                    )
+            print(str_to_print)
+            
+        if writer:
+            writer.add_scalar('Ciassd_regression_loss', reg_loss.item(),
+                            epoch * batch_len + batch_id)
+            writer.add_scalar('Ciassd_Confidence_loss', cls_loss.item(),
+                            epoch * batch_len + batch_id)
+            writer.add_scalar('Ciassd_Direction_loss', dir_loss.item(),
+                            epoch * batch_len + batch_id)
+            writer.add_scalar('Ciassd_Iou_loss', iou_loss.item(),
+                            epoch * batch_len + batch_id)
+            writer.add_scalar('Ciassd_loss', ciassd_total_loss.item(),
+                            epoch * batch_len + batch_id)
+            if 'rcnn_loss' in self.loss_dict:
+                writer.add_scalar('Rcnn_regression_loss',
+                                self.loss_dict['reg_loss'].item(),
+                                epoch * batch_len + batch_id)
+                writer.add_scalar('Rcnn_Confidence_loss',
+                                self.loss_dict['cls_loss'].item(),
+                                epoch * batch_len + batch_id)
+                writer.add_scalar('Rcnn_Iou_loss',
+                                self.loss_dict['iou_loss'].item(),
+                                epoch * batch_len + batch_id)
+                writer.add_scalar('Rcnn_loss', self.loss_dict['rcnn_loss'].item(),
+                                epoch * batch_len + batch_id)
+                writer.add_scalar('Total_loss', self.loss_dict['loss'].item(),
+                                epoch * batch_len + batch_id)
+
+
+def weighted_sigmoid_binary_cross_entropy(preds, tgts, weights=None,
+                                          class_indices=None):
+    if weights is not None:
+        weights = weights.unsqueeze(-1)
+    if class_indices is not None:
+        weights *= (
+            indices_to_dense_vector(class_indices, preds.shape[2])
+                .view(1, 1, -1)
+                .type_as(preds)
+        )
+    per_entry_cross_ent = nn.functional.binary_cross_entropy_with_logits(preds,
+                                                                         tgts,
+                                                                         weights)
+    return per_entry_cross_ent
+
+
+def indices_to_dense_vector(
+        indices, size, indices_value=1.0, default_value=0, dtype=np.float32
+):
+    """Creates dense vector with indices set to specific value and rest to zeros.
+    This function exists because it is unclear if it is safe to use
+        tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
+    with indices which are not ordered.
+    This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
+    Args:
+        indices: 1d Tensor with integer indices which are to be set to
+            indices_values.
+        size: scalar with size (integer) of output Tensor.
+        indices_value: values of elements specified by indices in the output vector
+        default_value: values of other elements in the output vector.
+        dtype: data type.
+    Returns:
+        dense 1D Tensor of shape [size] with indices set to indices_values and the
+            rest set to default_value.
+    """
+    dense = torch.zeros(size).fill_(default_value)
+    dense[indices] = indices_value
+
+    return dense
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/pixor_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/pixor_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..2473960706d0b8c00b2ed3dd13665ffd82ed1859
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/pixor_loss.py
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+from functools import reduce
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class PixorLoss(nn.Module):
+    def __init__(self, args):
+        super(PixorLoss, self).__init__()
+        self.alpha = args["alpha"]
+        self.beta = args["beta"]
+        self.loss_dict = {}
+
+    def forward(self, output_dict, target_dict):
+        """
+        Compute loss for pixor network
+        Parameters
+        ----------
+        output_dict : dict
+           The dictionary that contains the output.
+
+        target_dict : dict
+           The dictionary that contains the target.
+
+        Returns
+        -------
+        total_loss : torch.Tensor
+            Total loss.
+
+        """
+        targets = target_dict["label_map"]
+        cls_preds, loc_preds = output_dict["cls"], output_dict["reg"]
+
+        cls_targets, loc_targets = targets.split([1, 6], dim=1)
+        pos_count = cls_targets.sum()
+        neg_count = (cls_targets == 0).sum()
+        w1, w2 = neg_count / (pos_count + neg_count), pos_count / (
+                    pos_count + neg_count)
+        weights = torch.ones_like(cls_preds.reshape(-1))
+        weights[cls_targets.reshape(-1) == 1] = w1
+        weights[cls_targets.reshape(-1) == 0] = w2
+        # cls_targets = cls_targets.float()
+        # cls_loss = F.binary_cross_entropy_with_logits(input=cls_preds.reshape(-1), target=cls_targets.reshape(-1), weight=weights,
+        #                                               reduction='mean')
+        cls_loss = F.binary_cross_entropy_with_logits(
+            input=cls_preds, target=cls_targets,
+            reduction='mean')
+        pos_pixels = cls_targets.sum()
+
+        loc_loss = F.smooth_l1_loss(cls_targets * loc_preds,
+                                    cls_targets * loc_targets,
+                                    reduction='sum')
+        loc_loss = loc_loss / pos_pixels if pos_pixels > 0 else loc_loss
+
+        total_loss = self.alpha * cls_loss + self.beta * loc_loss
+
+        self.loss_dict.update({'total_loss': total_loss,
+                               'reg_loss': loc_loss,
+                               'cls_loss': cls_loss})
+
+        return total_loss
+
+    def logging(self, epoch, batch_id, batch_len, writer = None):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict['total_loss']
+        reg_loss = self.loss_dict['reg_loss']
+        cls_loss = self.loss_dict['cls_loss']
+
+        print("[epoch %d][%d/%d], || Loss: %.4f || cls Loss: %.4f"
+              " || reg Loss: %.4f" % (
+                  epoch, batch_id + 1, batch_len,
+                  total_loss.item(), cls_loss.item(), reg_loss.item()))
+                  
+        if not writer is None:
+            writer.add_scalar('Regression_loss', reg_loss.item(),
+                            epoch * batch_len + batch_id)
+            writer.add_scalar('Confidence_loss', cls_loss.item(),
+                            epoch * batch_len + batch_id)
+
+
+def test():
+    torch.manual_seed(0)
+    loss = PixorLoss(None)
+    pred = torch.sigmoid(torch.randn(1, 7, 2, 3))
+    label = torch.zeros(1, 7, 2, 3)
+    loss = loss(pred, label)
+    print(loss)
+
+
+if __name__ == "__main__":
+    test()
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_depth_adv_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_depth_adv_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..668cc51203d322f02157a2a3fc61f47c16a41386
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_depth_adv_loss.py
@@ -0,0 +1,208 @@
+"""
+Deigned for camera modality with depth supervision.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from opencood.loss.point_pillar_loss import PointPillarLoss
+
+class PointPillarDepthAdvLoss(PointPillarLoss):
+    def __init__(self, args):
+        super().__init__(args)
+        self.depth = args['depth']
+        self.adv = args['adv']
+        self.adv_criterion = nn.BCELoss()
+
+
+        self.depth_weight = self.depth['weight']
+        self.smooth_target = True if 'smooth_target' in self.depth and self.depth['smooth_target'] else False
+        self.use_fg_mask = True if 'use_fg_mask' in self.depth and self.depth['use_fg_mask'] else False
+        self.fg_weight = 3.25
+        self.bg_weight = 0.25
+        if self.smooth_target:
+            self.depth_loss_func = FocalLoss(alpha=0.25, gamma=2.0, reduction="none", smooth_target=True)
+        else:
+            self.depth_loss_func = FocalLoss(alpha=0.25, gamma=2.0, reduction="none")
+
+    def forward(self, output_dict, target_dict, suffix=""):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+
+        total_loss = super().forward(output_dict, target_dict, suffix)
+        all_depth_loss = 0
+        all_adv_loss = 0
+
+        depth_items_list = [x for x in output_dict.keys() if x.startswith(f"depth_items{suffix}")]
+        ######## Depth Supervision ########
+        for depth_item_name in depth_items_list:
+            depth_item = output_dict[depth_item_name]
+            # depth logdit: [N, D, H, W]
+            # depth gt indices: [N, H, W]
+            # fg_mask: [N, H, W]
+            depth_logit, depth_gt_indices = depth_item[0], depth_item[1]
+            depth_loss = self.depth_loss_func(depth_logit, depth_gt_indices) 
+            if self.use_fg_mask:
+                fg_mask = depth_item[-1]
+                weight_mask = (fg_mask > 0) * self.fg_weight + (fg_mask == 0) * self.bg_weight
+                depth_loss *= weight_mask
+
+            depth_loss = depth_loss.mean() * self.depth_weight 
+            all_depth_loss += depth_loss
+
+        total_loss += all_depth_loss
+
+        adv_list = [x for x in output_dict.keys() if x.startswith(f"real_pred{suffix}")]
+        ######## adv in forground object ########
+        for real_pred_keyname in adv_list:
+            fake_pred_keyname = real_pred_keyname.replace("real", "fake")
+            real = output_dict[real_pred_keyname].view(-1)
+            fake = output_dict[fake_pred_keyname].view(-1)
+            nsample = real.shape[0]
+
+            real_label = torch.full((nsample,), 1, dtype=torch.float, device=real.device)
+            errD_real = self.adv_criterion(real, real_label)
+
+            fake_label = torch.full((nsample,), 0, dtype=torch.float, device=real.device)
+            errD_fake = self.adv_criterion(fake, fake_label)
+
+            all_adv_loss += errD_real * self.adv['real_weight'] + errD_fake * self.adv['fake_weight']
+
+        all_adv_loss *= self.adv['weight']
+        total_loss += all_adv_loss
+
+        self.loss_dict.update({'depth_loss': all_depth_loss})
+        self.loss_dict.update({'adv_loss': all_adv_loss})
+        self.loss_dict.update({'total_loss': total_loss})
+        
+        return total_loss 
+
+
+    def logging(self, epoch, batch_id, batch_len, writer = None, suffix=""):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict.get('total_loss', 0)
+        reg_loss = self.loss_dict.get('reg_loss', 0)
+        cls_loss = self.loss_dict.get('cls_loss', 0)
+        dir_loss = self.loss_dict.get('dir_loss', 0)
+        iou_loss = self.loss_dict.get('iou_loss', 0)
+        depth_loss = self.loss_dict.get('depth_loss', 0)
+        adv_loss = self.loss_dict.get('adv_loss', 0)
+
+
+        print("[epoch %d][%d/%d]%s || Loss: %.4f || Conf Loss: %.4f"
+              " || Loc Loss: %.4f || Dir Loss: %.4f || IoU Loss: %.4f || Depth Loss: %.4f || Adv Loss: %.4f " % (
+                  epoch, batch_id + 1, batch_len, suffix,
+                  total_loss, cls_loss, reg_loss, dir_loss, iou_loss, depth_loss, adv_loss))
+
+        if not writer is None:
+            writer.add_scalar('Regression_loss' + suffix, reg_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Confidence_loss' + suffix, cls_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Dir_loss' + suffix, dir_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Iou_loss' + suffix, iou_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Depth_loss' + suffix, depth_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Adv_loss' + suffix, adv_loss,
+                            epoch*batch_len + batch_id)
+
+
+class FocalLoss(nn.Module):
+    r"""Criterion that computes Focal loss.
+
+    According to :cite:`lin2018focal`, the Focal loss is computed as follows:
+
+    .. math::
+
+        \text{FL}(p_t) = -\alpha_t (1 - p_t)^{\gamma} \, \text{log}(p_t)
+
+    Where:
+       - :math:`p_t` is the model's estimated probability for each class.
+
+    Args:
+        alpha: Weighting factor :math:`\alpha \in [0, 1]`.
+        gamma: Focusing parameter :math:`\gamma >= 0`.
+        reduction: Specifies the reduction to apply to the
+          output: ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction
+          will be applied, ``'mean'``: the sum of the output will be divided by
+          the number of elements in the output, ``'sum'``: the output will be
+          summed.
+        eps: Deprecated: scalar to enforce numerical stability. This is no longer
+          used.
+
+    Shape:
+        - Input: :math:`(N, C, *)` where C = number of classes.
+        - Target: :math:`(N, *)` where each value is
+          :math:`0 ≤ targets[i] ≤ C−1`.
+
+    Example:
+        >>> N = 5  # num_classes
+        >>> kwargs = {"alpha": 0.5, "gamma": 2.0, "reduction": 'mean'}
+        >>> criterion = FocalLoss(**kwargs)
+        >>> input = torch.randn(1, N, 3, 5, requires_grad=True)
+        >>> target = torch.empty(1, 3, 5, dtype=torch.long).random_(N)
+        >>> output = criterion(input, target)
+        >>> output.backward()
+    """
+
+    def __init__(self, alpha, gamma = 2.0, reduction= 'none', smooth_target = False , eps = None) -> None:
+        super().__init__()
+        self.alpha = alpha
+        self.gamma = gamma
+        self.reduction = reduction
+        self.smooth_target = smooth_target
+        self.eps = eps
+        if self.smooth_target:
+            self.smooth_kernel = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1, bias=False)
+            self.smooth_kernel.weight = torch.nn.Parameter(torch.tensor([[[0.2, 0.9, 0.2]]]), requires_grad=False)
+            self.smooth_kernel = self.smooth_kernel.to(torch.device("cuda"))
+
+    def forward(self, input, target):
+        n = input.shape[0]
+        out_size = (n,) + input.shape[2:]
+
+        # compute softmax over the classes axis
+        input_soft = input.softmax(1)
+        log_input_soft = input.log_softmax(1)
+
+        # create the labels one hot tensor
+        D = input.shape[1]
+        if self.smooth_target:
+            target_one_hot = F.one_hot(target, num_classes=D).to(input).view(-1, D) # [N*H*W, D]
+            target_one_hot = self.smooth_kernel(target_one_hot.float().unsqueeze(1)).squeeze(1) # [N*H*W, D]
+            target_one_hot = target_one_hot.view(*target.shape, D).permute(0, 3, 1, 2)
+        else:
+            target_one_hot = F.one_hot(target, num_classes=D).to(input).permute(0, 3, 1, 2)
+        # compute the actual focal loss
+        weight = torch.pow(-input_soft + 1.0, self.gamma)
+
+        focal = -self.alpha * weight * log_input_soft
+        loss_tmp = torch.einsum('bc...,bc...->b...', (target_one_hot, focal))
+
+        if self.reduction == 'none':
+            loss = loss_tmp
+        elif self.reduction == 'mean':
+            loss = torch.mean(loss_tmp)
+        elif self.reduction == 'sum':
+            loss = torch.sum(loss_tmp)
+        else:
+            raise NotImplementedError(f"Invalid reduction mode: {self.reduction}")
+        return loss
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_depth_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_depth_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2f7c9ab1f5e7f607dd91491eccc3c042547d103
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_depth_loss.py
@@ -0,0 +1,209 @@
+"""
+Deigned for camera modality with depth supervision.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from opencood.loss.point_pillar_loss import PointPillarLoss
+
+class PointPillarDepthLoss(PointPillarLoss):
+    def __init__(self, args):
+        super().__init__(args)
+        self.depth = args['depth']
+
+
+        self.depth_weight = self.depth['weight']
+        self.smooth_target = True if 'smooth_target' in self.depth and self.depth['smooth_target'] else False
+        self.use_fg_mask = True if 'use_fg_mask' in self.depth and self.depth['use_fg_mask'] else False
+        self.fg_weight = 3.25
+        self.bg_weight = 0.25
+        if self.smooth_target:
+            self.depth_loss_func = FocalLoss(alpha=0.25, gamma=2.0, reduction="none", smooth_target=True)
+        else:
+            self.depth_loss_func = FocalLoss(alpha=0.25, gamma=2.0, reduction="none")
+
+    # def forward(self, output_dict, target_dict, suffix=""):
+    #     """
+    #     Parameters
+    #     ----------
+    #     output_dict : dict
+    #     target_dict : dict
+    #     """
+
+    #     total_loss = super().forward(output_dict, target_dict, suffix)
+
+    #     ######## Depth Supervision ########
+    #     if f"depth_items{suffix}" in output_dict and output_dict[f'depth_items{suffix}'] is not None:
+    #         # depth logdit: [N, D, H, W]
+    #         # depth gt indices: [N, H, W]
+    #         # fg_mask: [N, H, W]
+    #         depth_logit, depth_gt_indices = output_dict[f'depth_items{suffix}'][0], output_dict[f'depth_items{suffix}'][1]
+    #         depth_loss = self.depth_loss_func(depth_logit, depth_gt_indices) 
+    #         if self.use_fg_mask:
+    #             fg_mask = output_dict[f'depth_items{suffix}'][-1]
+    #             weight_mask = (fg_mask > 0) * self.fg_weight + (fg_mask == 0) * self.bg_weight
+    #             depth_loss *= weight_mask
+
+    #         depth_loss = depth_loss.mean() * self.depth_weight 
+
+    #         total_loss += depth_loss
+    #         self.loss_dict.update({'depth_loss': depth_loss})
+        
+    #     return total_loss
+
+    def forward(self, output_dict, target_dict, suffix=""):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+
+        total_loss = super().forward(output_dict, target_dict, suffix)
+        all_depth_loss = 0
+        depth_items_list = [x for x in output_dict.keys() if x.startswith(f"depth_items{suffix}")]
+        ######## Depth Supervision ########
+        for depth_item_name in depth_items_list:
+            depth_item = output_dict[depth_item_name]
+
+            # depth logdit: [N, D, H, W]
+            # depth gt indices: [N, H, W]
+            # fg_mask: [N, H, W]
+            depth_logit, depth_gt_indices = depth_item[0], depth_item[1]
+            depth_loss = self.depth_loss_func(depth_logit, depth_gt_indices) 
+            if self.use_fg_mask:
+                fg_mask = depth_item[-1]
+                weight_mask = (fg_mask > 0) * self.fg_weight + (fg_mask == 0) * self.bg_weight
+                depth_loss *= weight_mask
+
+            depth_loss = depth_loss.mean() * self.depth_weight 
+            all_depth_loss += depth_loss
+
+        total_loss += all_depth_loss
+        self.loss_dict.update({'depth_loss': all_depth_loss}) # no update the total loss in dict
+        
+        return total_loss
+
+
+    def logging(self, epoch, batch_id, batch_len, writer = None, suffix=""):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict.get('total_loss', 0)
+        reg_loss = self.loss_dict.get('reg_loss', 0)
+        cls_loss = self.loss_dict.get('cls_loss', 0)
+        dir_loss = self.loss_dict.get('dir_loss', 0)
+        iou_loss = self.loss_dict.get('iou_loss', 0)
+        depth_loss = self.loss_dict.get('depth_loss', 0)
+
+
+        print("[epoch %d][%d/%d]%s || Loss: %.4f || Conf Loss: %.4f"
+              " || Loc Loss: %.4f || Dir Loss: %.4f || IoU Loss: %.4f || Depth Loss: %.4f" % (
+                  epoch, batch_id + 1, batch_len, suffix,
+                  total_loss, cls_loss, reg_loss, dir_loss, iou_loss, depth_loss))
+
+        if not writer is None:
+            writer.add_scalar('Regression_loss' + suffix, reg_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Confidence_loss' + suffix, cls_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Dir_loss' + suffix, dir_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Iou_loss' + suffix, iou_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Depth_loss' + suffix, depth_loss,
+                            epoch*batch_len + batch_id)
+
+
+class FocalLoss(nn.Module):
+    r"""Criterion that computes Focal loss.
+
+    According to :cite:`lin2018focal`, the Focal loss is computed as follows:
+
+    .. math::
+
+        \text{FL}(p_t) = -\alpha_t (1 - p_t)^{\gamma} \, \text{log}(p_t)
+
+    Where:
+       - :math:`p_t` is the model's estimated probability for each class.
+
+    Args:
+        alpha: Weighting factor :math:`\alpha \in [0, 1]`.
+        gamma: Focusing parameter :math:`\gamma >= 0`.
+        reduction: Specifies the reduction to apply to the
+          output: ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction
+          will be applied, ``'mean'``: the sum of the output will be divided by
+          the number of elements in the output, ``'sum'``: the output will be
+          summed.
+        eps: Deprecated: scalar to enforce numerical stability. This is no longer
+          used.
+
+    Shape:
+        - Input: :math:`(N, C, *)` where C = number of classes.
+        - Target: :math:`(N, *)` where each value is
+          :math:`0 ≤ targets[i] ≤ C−1`.
+
+    Example:
+        >>> N = 5  # num_classes
+        >>> kwargs = {"alpha": 0.5, "gamma": 2.0, "reduction": 'mean'}
+        >>> criterion = FocalLoss(**kwargs)
+        >>> input = torch.randn(1, N, 3, 5, requires_grad=True)
+        >>> target = torch.empty(1, 3, 5, dtype=torch.long).random_(N)
+        >>> output = criterion(input, target)
+        >>> output.backward()
+    """
+
+    def __init__(self, alpha, gamma = 2.0, reduction= 'none', smooth_target = False , eps = None) -> None:
+        super().__init__()
+        self.alpha = alpha
+        self.gamma = gamma
+        self.reduction = reduction
+        self.smooth_target = smooth_target
+        self.eps = eps
+        if self.smooth_target:
+            self.smooth_kernel = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1, bias=False)
+            self.smooth_kernel.weight = torch.nn.Parameter(torch.tensor([[[0.2, 0.9, 0.2]]]), requires_grad=False)
+            self.smooth_kernel = self.smooth_kernel.to(torch.device("cuda"))
+
+    def forward(self, input, target):
+        n = input.shape[0]
+        out_size = (n,) + input.shape[2:]
+
+        # compute softmax over the classes axis
+        input_soft = input.softmax(1)
+        log_input_soft = input.log_softmax(1)
+
+        # create the labels one hot tensor
+        D = input.shape[1]
+        if self.smooth_target:
+            target_one_hot = F.one_hot(target, num_classes=D).to(input).view(-1, D) # [N*H*W, D]
+            target_one_hot = self.smooth_kernel(target_one_hot.float().unsqueeze(1)).squeeze(1) # [N*H*W, D]
+            target_one_hot = target_one_hot.view(*target.shape, D).permute(0, 3, 1, 2)
+        else:
+            target_one_hot = F.one_hot(target, num_classes=D).to(input).permute(0, 3, 1, 2)
+        # compute the actual focal loss
+        weight = torch.pow(-input_soft + 1.0, self.gamma)
+
+        focal = -self.alpha * weight * log_input_soft
+        loss_tmp = torch.einsum('bc...,bc...->b...', (target_one_hot, focal))
+
+        if self.reduction == 'none':
+            loss = loss_tmp
+        elif self.reduction == 'mean':
+            loss = torch.mean(loss_tmp)
+        elif self.reduction == 'sum':
+            loss = torch.sum(loss_tmp)
+        else:
+            raise NotImplementedError(f"Invalid reduction mode: {self.reduction}")
+        return loss
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_disconet_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_disconet_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..52441644cf7b3e47ffad03806a6fae6fae39f546
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_disconet_loss.py
@@ -0,0 +1,108 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from opencood.loss.point_pillar_loss import PointPillarLoss
+
+class PointPillarDiscoNetLoss(PointPillarLoss):
+    def __init__(self, args):
+        super(PointPillarDiscoNetLoss, self).__init__(args)
+        self.kd = args['kd']
+
+    def forward(self, output_dict, target_dict):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        total_loss = super().forward(output_dict, target_dict)
+
+        ########## KL loss ############
+        rm = output_dict['reg_preds']  # [B, 14, 50, 176]
+        psm = output_dict['cls_preds'] # [B, 2, 50, 176]
+        feature = output_dict['feature']
+
+        teacher_rm = output_dict['teacher_reg_preds']
+        teather_psm = output_dict['teacher_cls_preds']
+        
+
+        
+        feature = output_dict['feature']
+        teacher_feature = output_dict['teacher_feature']
+        kl_loss_mean = nn.KLDivLoss(size_average=True, reduce=True)
+
+        N, C, H, W = teacher_feature.shape
+        teacher_feature = teacher_feature.permute(0,2,3,1).reshape(N*H*W, C)
+        student_feature = feature.permute(0,2,3,1).reshape(N*H*W, C)
+        kd_loss_feature = kl_loss_mean(
+                F.log_softmax(student_feature, dim=1), F.softmax(teacher_feature, dim=1)
+            )
+        
+        kd_loss = kd_loss_feature
+
+        if self.kd.get('decoder_kd', False):
+            N, C, H, W = teacher_rm.shape
+            teacher_rm = teacher_rm.permute(0,2,3,1).reshape(N*H*W, C)
+            student_rm = rm.permuate(0,2,3,1).reshape(N*H*W, C)
+            kd_loss_rm = kl_loss_mean(
+                    F.log_softmax(student_rm, dim=1), F.softmax(teacher_rm, dim=1)
+                )
+
+            N, C, H, W = teacher_psm.shape
+            teacher_psm = teather_psm.permute(0,2,3,1).reshape(N*H*W, C)
+            student_psm = psm.permuate(0,2,3,1).reshape(N*H*W, C)
+            kd_loss_psm = kl_loss_mean(
+                    F.log_softmax(student_psm, dim=1), F.softmax(teacher_psm, dim=1)
+                )
+
+            kd_loss += kd_loss_rm + kd_loss_psm
+
+        kd_loss *= self.kd['weight']
+        total_loss += kd_loss
+        self.loss_dict.update({'total_loss': total_loss.item(),
+                              'kd_loss': kd_loss.item()})
+
+
+        return total_loss
+
+
+    def logging(self, epoch, batch_id, batch_len, writer = None):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict.get('total_loss', 0)
+        reg_loss = self.loss_dict.get('reg_loss', 0)
+        cls_loss = self.loss_dict.get('cls_loss', 0)
+        dir_loss = self.loss_dict.get('dir_loss', 0)
+        iou_loss = self.loss_dict.get('iou_loss', 0)
+        kd_loss = self.loss_dict.get('kd_loss', 0)
+
+
+        print("[epoch %d][%d/%d] || Loss: %.4f || Conf Loss: %.4f"
+              " || Loc Loss: %.4f || Dir Loss: %.4f || IoU Loss: %.4f || KD Loss: %.4f" % (
+                  epoch, batch_id + 1, batch_len,
+                  total_loss, cls_loss, reg_loss, dir_loss, iou_loss, kd_loss))
+
+        if not writer is None:
+            writer.add_scalar('Regression_loss', reg_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Confidence_loss', cls_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Dir_loss', dir_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Iou_loss', iou_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Kd_loss', kd_loss,
+                            epoch*batch_len + batch_id)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c4fc65c55920a16d33d2f9351492dcc6cfda903
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_loss.py
@@ -0,0 +1,245 @@
+# -*- coding: utf-8 -*-
+# Author: Yifan Lu
+# Add direction classification loss
+# The originally point_pillar_loss.py, can not determine if the box heading is opposite to the GT.
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from opencood.utils.common_utils import limit_period
+from opencood.data_utils.post_processor.voxel_postprocessor import VoxelPostprocessor
+from icecream import ic
+
+class PointPillarLoss(nn.Module):
+    def __init__(self, args):
+        super(PointPillarLoss, self).__init__()
+        self.pos_cls_weight = args['pos_cls_weight']
+
+        self.cls = args['cls']
+        self.reg = args['reg']
+
+        if 'dir' in args:
+            self.dir = args['dir']
+        else:
+            self.dir = None
+
+        if 'iou' in args:
+            from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import aligned_boxes_iou3d_gpu
+            self.iou_loss_func = aligned_boxes_iou3d_gpu
+            self.iou = args['iou']
+        else:
+            self.iou = None
+        
+        self.loss_dict = {}
+
+    def forward(self, output_dict, target_dict, suffix=""):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        if 'record_len' in output_dict:
+            batch_size = int(output_dict['record_len'].sum())
+        elif 'batch_size' in output_dict:
+            batch_size = output_dict['batch_size']
+        else:
+            batch_size = target_dict['pos_equal_one'].shape[0]
+
+        cls_labls = target_dict['pos_equal_one'].view(batch_size, -1,  1)
+        positives = cls_labls > 0
+        negatives = target_dict['neg_equal_one'].view(batch_size, -1,  1) > 0
+        # cared = torch.logical_or(positives, negatives)
+        # cls_labls = cls_labls * cared.type_as(cls_labls)
+        # num_normalizer = cared.sum(1, keepdim=True)
+        pos_normalizer = positives.sum(1, keepdim=True).float()
+
+        # rename variable 
+        if f'psm{suffix}' in output_dict:
+            output_dict[f'cls_preds{suffix}'] = output_dict[f'psm{suffix}']
+        if f'rm{suffix}' in output_dict:
+            output_dict[f'reg_preds{suffix}'] = output_dict[f'rm{suffix}']
+        if f'dm{suffix}' in output_dict:
+            output_dict[f'dir_preds{suffix}'] = output_dict[f'dm{suffix}']
+
+        total_loss = 0
+
+        # cls loss
+        cls_preds = output_dict[f'cls_preds{suffix}'].permute(0, 2, 3, 1).contiguous() \
+                    .view(batch_size, -1,  1)
+        cls_weights = positives * self.pos_cls_weight + negatives * 1.0
+        cls_weights /= torch.clamp(pos_normalizer, min=1.0)
+        cls_loss = sigmoid_focal_loss(cls_preds, cls_labls, weights=cls_weights, **self.cls)
+        cls_loss = cls_loss.sum() * self.cls['weight'] / batch_size
+
+        # reg loss
+        reg_weights = positives / torch.clamp(pos_normalizer, min=1.0)
+        reg_preds = output_dict[f'reg_preds{suffix}'].permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 7)
+        reg_targets = target_dict['targets'].view(batch_size, -1, 7)
+        reg_preds, reg_targets = self.add_sin_difference(reg_preds, reg_targets)
+        reg_loss = weighted_smooth_l1_loss(reg_preds, reg_targets, weights=reg_weights, sigma=self.reg['sigma'])
+        reg_loss = reg_loss.sum() * self.reg['weight'] / batch_size
+
+
+        ######## direction ##########
+        if self.dir:
+            dir_targets = self.get_direction_target(target_dict['targets'].view(batch_size, -1, 7))
+            dir_logits = output_dict[f"dir_preds{suffix}"].permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2) # [N, H*W*#anchor, 2]
+
+            dir_loss = softmax_cross_entropy_with_logits(dir_logits.view(-1, self.anchor_num), dir_targets.view(-1, self.anchor_num)) 
+            dir_loss = dir_loss.flatten() * reg_weights.flatten() 
+            dir_loss = dir_loss.sum() * self.dir['weight'] / batch_size
+            total_loss += dir_loss
+            self.loss_dict.update({'dir_loss': dir_loss.item()})
+
+
+        ######## IoU ###########
+        if self.iou:
+            iou_preds = output_dict["iou_preds{suffix}"].permute(0, 2, 3, 1).contiguous()
+            pos_pred_mask = reg_weights.squeeze(dim=-1) > 0 # (4, 70400)
+            iou_pos_preds = iou_preds.view(batch_size, -1)[pos_pred_mask]
+            boxes3d_pred = VoxelPostprocessor.delta_to_boxes3d(output_dict[f'reg_preds{suffix}'].permute(0, 2, 3, 1).contiguous().detach(),
+                                                            output_dict['anchor_box'])[pos_pred_mask]
+            boxes3d_tgt = VoxelPostprocessor.delta_to_boxes3d(target_dict['targets'],
+                                                            output_dict['anchor_box'])[pos_pred_mask]
+            iou_weights = reg_weights[pos_pred_mask].view(-1)
+            iou_pos_targets = self.iou_loss_func(boxes3d_pred.float()[:, [0, 1, 2, 5, 4, 3, 6]], # hwl -> dx dy dz
+                                                    boxes3d_tgt.float()[:, [0, 1, 2, 5, 4, 3, 6]]).detach().squeeze()
+            iou_pos_targets = 2 * iou_pos_targets.view(-1) - 1
+            iou_loss = weighted_smooth_l1_loss(iou_pos_preds, iou_pos_targets, weights=iou_weights, sigma=self.iou['sigma'])
+
+            iou_loss = iou_loss.sum() * self.iou['weight'] / batch_size
+            total_loss += iou_loss
+            self.loss_dict.update({'iou_loss': iou_loss.item()})
+
+        total_loss += reg_loss + cls_loss
+
+        self.loss_dict.update({'total_loss': total_loss.item(),
+                               'reg_loss': reg_loss.item(),
+                               'cls_loss': cls_loss.item()})
+
+        return total_loss
+
+
+    @staticmethod
+    def add_sin_difference(boxes1, boxes2, dim=6):
+        assert dim != -1
+        rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \
+                            torch.cos(boxes2[..., dim:dim + 1])
+        rad_tg_encoding = torch.cos(boxes1[..., dim:dim + 1]) * \
+                          torch.sin(boxes2[..., dim:dim + 1])
+
+        boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding,
+                            boxes1[..., dim + 1:]], dim=-1)
+        boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding,
+                            boxes2[..., dim + 1:]], dim=-1)
+        return boxes1, boxes2
+
+    def get_direction_target(self, reg_targets):
+        """
+        Args:
+            reg_targets:  [N, H * W * #anchor_num, 7]
+                The last term is (theta_gt - theta_a)
+        
+        Returns:
+            dir_targets:
+                theta_gt: [N, H * W * #anchor_num, NUM_BIN] 
+                NUM_BIN = 2
+        """
+        num_bins = self.dir['args']['num_bins']
+        dir_offset = self.dir['args']['dir_offset']
+        anchor_yaw = np.deg2rad(np.array(self.dir['args']['anchor_yaw']))  # for direction classification
+        self.anchor_yaw_map = torch.from_numpy(anchor_yaw).view(1,-1,1)  # [1,2,1]
+        self.anchor_num = self.anchor_yaw_map.shape[1]
+
+        H_times_W_times_anchor_num = reg_targets.shape[1]
+        anchor_map = self.anchor_yaw_map.repeat(1, H_times_W_times_anchor_num//self.anchor_num, 1).to(reg_targets.device) # [1, H * W * #anchor_num, 1]
+        rot_gt = reg_targets[..., -1] + anchor_map[..., -1] # [N, H*W*anchornum]
+        offset_rot = limit_period(rot_gt - dir_offset, 0, 2 * np.pi)
+        dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / num_bins)).long()  # [N, H*W*anchornum]
+        dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=num_bins - 1)
+        # one_hot:
+        # if rot_gt > 0, then the label is 1, then the regression target is [0, 1]
+        dir_cls_targets = one_hot_f(dir_cls_targets, num_bins)
+        return dir_cls_targets
+
+
+
+    def logging(self, epoch, batch_id, batch_len, writer = None, suffix=""):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict.get('total_loss', 0)
+        reg_loss = self.loss_dict.get('reg_loss', 0)
+        cls_loss = self.loss_dict.get('cls_loss', 0)
+        dir_loss = self.loss_dict.get('dir_loss', 0)
+        iou_loss = self.loss_dict.get('iou_loss', 0)
+
+
+        print("[epoch %d][%d/%d]%s || Loss: %.4f || Conf Loss: %.4f"
+              " || Loc Loss: %.4f || Dir Loss: %.4f || IoU Loss: %.4f" % (
+                  epoch, batch_id + 1, batch_len, suffix,
+                  total_loss, cls_loss, reg_loss, dir_loss, iou_loss))
+
+        if not writer is None:
+            writer.add_scalar('Regression_loss'+suffix, reg_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Confidence_loss'+suffix, cls_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Dir_loss'+suffix, dir_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Iou_loss'+suffix, iou_loss,
+                            epoch*batch_len + batch_id)
+
+def one_hot_f(tensor, num_bins, dim=-1, on_value=1.0, dtype=torch.float32):
+    tensor_onehot = torch.zeros(*list(tensor.shape), num_bins, dtype=dtype, device=tensor.device) 
+    tensor_onehot.scatter_(dim, tensor.unsqueeze(dim).long(), on_value)                    
+    return tensor_onehot
+
+def softmax_cross_entropy_with_logits(logits, labels):
+    param = list(range(len(logits.shape)))
+    transpose_param = [0] + [param[-1]] + param[1:-1]
+    logits = logits.permute(*transpose_param)
+    loss_ftor = torch.nn.CrossEntropyLoss(reduction="none")
+    loss = loss_ftor(logits, labels.max(dim=-1)[1])
+    return loss
+
+def weighted_smooth_l1_loss(preds, targets, sigma=3.0, weights=None):
+    diff = preds - targets
+    abs_diff = torch.abs(diff)
+    abs_diff_lt_1 = torch.le(abs_diff, 1 / (sigma ** 2)).type_as(abs_diff)
+    loss = abs_diff_lt_1 * 0.5 * torch.pow(abs_diff * sigma, 2) + \
+               (abs_diff - 0.5 / (sigma ** 2)) * (1.0 - abs_diff_lt_1)
+    if weights is not None:
+        loss *= weights
+    return loss
+
+
+def sigmoid_focal_loss(preds, targets, weights=None, **kwargs):
+    assert 'gamma' in kwargs and 'alpha' in kwargs
+    # sigmoid cross entropy with logits
+    # more details: https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
+    per_entry_cross_ent = torch.clamp(preds, min=0) - preds * targets.type_as(preds)
+    per_entry_cross_ent += torch.log1p(torch.exp(-torch.abs(preds)))
+    # focal loss
+    prediction_probabilities = torch.sigmoid(preds)
+    p_t = (targets * prediction_probabilities) + ((1 - targets) * (1 - prediction_probabilities))
+    modulating_factor = torch.pow(1.0 - p_t, kwargs['gamma'])
+    alpha_weight_factor = targets * kwargs['alpha'] + (1 - targets) * (1 - kwargs['alpha'])
+
+    loss = modulating_factor * alpha_weight_factor * per_entry_cross_ent
+    if weights is not None:
+        loss *= weights
+    return loss
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_uncertainty_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_uncertainty_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b9d2f9fbc04b457249b8a09811e7b73f4296d92
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/point_pillar_uncertainty_loss.py
@@ -0,0 +1,289 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from opencood.loss.point_pillar_loss import PointPillarLoss, \
+    one_hot_f, softmax_cross_entropy_with_logits, weighted_smooth_l1_loss, sigmoid_focal_loss
+import d3d.mathh as mathh
+from opencood.utils.common_utils import limit_period
+from opencood.data_utils.post_processor.voxel_postprocessor import VoxelPostprocessor
+from functools import partial
+
+class PointPillarUncertaintyLoss(PointPillarLoss):
+    def __init__(self, args):
+        super(PointPillarUncertaintyLoss, self).__init__(args)
+        self.uncertainty = args['uncertainty']
+        self.uncertainty_dim = args['uncertainty']['dim'] # 2 means x, y; 3 means x, y, yaw; 7 means x y z dh dw dl yaw
+        self.unc_loss_func = KLLoss(args['uncertainty'])
+
+
+    def forward(self, output_dict, target_dict, suffix=""):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        if 'record_len' in output_dict:
+            batch_size = int(output_dict['record_len'].sum())
+        elif 'batch_size' in output_dict:
+            batch_size = output_dict['batch_size']
+        else:
+            batch_size = target_dict['pos_equal_one'].shape[0]
+
+        cls_labls = target_dict['pos_equal_one'].view(batch_size, -1,  1)
+        positives = cls_labls > 0
+        negatives = target_dict['neg_equal_one'].view(batch_size, -1,  1) > 0
+
+        pos_normalizer = positives.sum(1, keepdim=True).float()
+
+        # rename variable
+        if f'psm{suffix}' in output_dict:
+            output_dict[f'cls_preds{suffix}'] = output_dict[f'psm{suffix}']
+        if f'rm{suffix}' in output_dict:
+            output_dict[f'reg_preds{suffix}'] = output_dict[f'rm{suffix}']
+        if f'dm{suffix}' in output_dict:
+            output_dict[f'dir_preds{suffix}'] = output_dict[f'dm{suffix}']
+        if f'sm{suffix}' in output_dict:
+            output_dict[f'unc_preds{suffix}'] = output_dict[f'sm{suffix}']
+
+        total_loss = 0
+
+        # cls loss
+        cls_preds = output_dict[f'cls_preds{suffix}'].permute(0, 2, 3, 1).contiguous() \
+                    .view(batch_size, -1,  1)
+        cls_weights = positives * self.pos_cls_weight + negatives * 1.0
+        cls_weights /= torch.clamp(pos_normalizer, min=1.0)
+        cls_loss = sigmoid_focal_loss(cls_preds, cls_labls, weights=cls_weights, **self.cls)
+        cls_loss = cls_loss.sum() * self.cls['weight'] / batch_size
+
+        # reg loss
+        reg_weights = positives / torch.clamp(pos_normalizer, min=1.0)
+        reg_preds = output_dict[f'reg_preds{suffix}'].permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 7)
+        reg_targets = target_dict['targets'].view(batch_size, -1, 7)
+        reg_preds_w_angle, reg_targets_w_angle = self.add_sin_difference_and_angle(reg_preds, reg_targets) # note the difference 
+        reg_loss = weighted_smooth_l1_loss(reg_preds_w_angle[...,:7], reg_targets_w_angle[...,:7], weights=reg_weights, sigma=self.reg['sigma'])
+        reg_loss = reg_loss.sum() * self.reg['weight'] / batch_size
+
+        # uncertainty loss
+        ######## kl #########
+        unc_preds = output_dict[f'unc_preds{suffix}'].permute(0, 2, 3, 1).contiguous() # [N, H, W, #anchor_num * 3]
+        unc_preds = unc_preds.view(unc_preds.size(0), -1, self.uncertainty_dim)
+
+        unc_loss = self.unc_loss_func(reg_preds_w_angle,
+                                     reg_targets_w_angle,
+                                     unc_preds,
+                                     reg_weights)
+
+        unc_loss = unc_loss.sum() / unc_preds.shape[0]
+        unc_loss *= self.uncertainty['weight']
+
+
+        ######## direction ##########
+        if self.dir:
+            dir_targets = self.get_direction_target(target_dict['targets'].view(batch_size, -1, 7))
+            dir_logits = output_dict[f"dir_preds{suffix}"].permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2) # [N, H*W*#anchor, 2]
+
+            dir_loss = softmax_cross_entropy_with_logits(dir_logits.view(-1, self.anchor_num), dir_targets.view(-1, self.anchor_num)) 
+            dir_loss = dir_loss.flatten() * reg_weights.flatten() 
+            dir_loss = dir_loss.sum() * self.dir['weight'] / batch_size
+            total_loss += dir_loss
+            self.loss_dict.update({'dir_loss': dir_loss.item()})
+
+
+        ######## IoU ###########
+        if self.iou:
+            iou_preds = output_dict["iou_preds{suffix}"].permute(0, 2, 3, 1).contiguous()
+            pos_pred_mask = reg_weights.squeeze(dim=-1) > 0 # (4, 70400)
+            iou_pos_preds = iou_preds.view(batch_size, -1)[pos_pred_mask]
+            boxes3d_pred = VoxelPostprocessor.delta_to_boxes3d(output_dict[f'reg_preds{suffix}'].permute(0, 2, 3, 1).contiguous().detach(),
+                                                            output_dict['anchor_box'])[pos_pred_mask]
+            boxes3d_tgt = VoxelPostprocessor.delta_to_boxes3d(target_dict['targets'],
+                                                            output_dict['anchor_box'])[pos_pred_mask]
+            iou_weights = reg_weights[pos_pred_mask].view(-1)
+            iou_pos_targets = self.iou_loss_func(boxes3d_pred.float()[:, [0, 1, 2, 5, 4, 3, 6]], # hwl -> dx dy dz
+                                                    boxes3d_tgt.float()[:, [0, 1, 2, 5, 4, 3, 6]]).detach().squeeze()
+            iou_pos_targets = 2 * iou_pos_targets.view(-1) - 1
+            iou_loss = weighted_smooth_l1_loss(iou_pos_preds, iou_pos_targets, weights=iou_weights, sigma=self.iou['sigma'])
+
+            iou_loss = iou_loss.sum() * self.iou['weight'] / batch_size
+            total_loss += iou_loss
+            self.loss_dict.update({'iou_loss': iou_loss.item()})
+
+        total_loss += reg_loss + cls_loss + unc_loss
+
+        self.loss_dict.update({'total_loss': total_loss.item(),
+                               'reg_loss': reg_loss.item(),
+                               'cls_loss': cls_loss.item(),
+                               'unc_loss': unc_loss.item()})
+
+        return total_loss
+
+
+    def logging(self, epoch, batch_id, batch_len, writer = None):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict.get('total_loss', 0)
+        reg_loss = self.loss_dict.get('reg_loss', 0)
+        cls_loss = self.loss_dict.get('cls_loss', 0)
+        dir_loss = self.loss_dict.get('dir_loss', 0)
+        iou_loss = self.loss_dict.get('iou_loss', 0)
+        unc_loss = self.loss_dict.get('unc_loss', 0)
+
+
+        print("[epoch %d][%d/%d] || Loss: %.4f || Conf Loss: %.4f"
+              " || Loc Loss: %.4f || Dir Loss: %.4f || IoU Loss: %.4f || Unc Loss: %.4f" % (
+                  epoch, batch_id + 1, batch_len,
+                  total_loss, cls_loss, reg_loss, dir_loss, iou_loss, unc_loss))
+
+        if not writer is None:
+            writer.add_scalar('Regression_loss', reg_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Confidence_loss', cls_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Dir_loss', dir_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Iou_loss', iou_loss,
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Unc_loss', unc_loss,
+                            epoch*batch_len + batch_id)
+
+    @staticmethod
+    def add_sin_difference_and_angle(boxes1, boxes2, dim=6):
+        """
+        This is different with base PointPillarLoss's add_sin_difference function.
+        We retain the angle, and put it at last dimension
+
+            add_sin_difference returns [B, H*W, 7]
+            -> 
+            add_sin_difference_and_angle returns [B, H*W, 8]
+
+        """
+        assert dim != -1
+
+        # sin(theta1 - theta2) = sin(theta1)*cos(theta2) - cos(theta1)*sin(theta2) 
+        rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \
+                            torch.cos(boxes2[..., dim:dim + 1])
+
+        rad_tg_encoding = torch.cos(boxes1[..., dim: dim + 1]) * \
+                          torch.sin(boxes2[..., dim: dim + 1])
+        
+        boxes1_w_angle = torch.cat([boxes1[..., :dim], rad_pred_encoding,
+                            boxes1[..., dim:]], dim=-1) # originally, boxes1[..., dim + 1:]], dim=-1)
+        boxes2_w_angle = torch.cat([boxes2[..., :dim], rad_tg_encoding,
+                            boxes2[..., dim:]], dim=-1) # originally, boxes1[..., dim + 1:]], dim=-1)
+
+        return boxes1_w_angle, boxes2_w_angle
+
+
+class KLLoss(nn.Module):
+    def __init__(self, args):
+        super(KLLoss, self).__init__()
+
+        self.angle_weight = args['angle_weight']
+        self.uncertainty_dim = args['dim']
+        if args['xy_loss_type'] == "l2":
+            self.xy_loss = self.kl_loss_l2
+        elif args['xy_loss_type'] == "l1":
+            self.xy_loss = self.kl_loss_l1
+        else:
+            raise "not implemented"
+
+        if args['angle_loss_type'] == "l2":
+            self.angle_loss = self.kl_loss_l2
+        elif args['angle_loss_type'] == "von-mise":
+            lambda_V = args['lambda_V']
+            s0 = args['s0']
+            limit_period = args['limit_period']
+            self.angle_loss = partial(self.kl_loss_angular, lambda_V=lambda_V, s0=s0, limit_period=limit_period)
+        else:
+            raise "not implemented"
+
+    @staticmethod
+    def kl_loss_l2(diff, s):
+        """
+        Args:
+            diff: [B, 2]
+            s:    [B, 2]
+        Returns:
+            loss: [B, 2]
+        """
+        loss = 0.5*(torch.exp(-s) * (diff**2) + s)
+        return loss
+    
+    @staticmethod
+    def kl_loss_l1(diff, s):
+        """
+        Args:
+            diff: [B, 2]
+            s:    [B, 2]
+        Returns:
+            loss: [B, 2]
+        """
+        loss = 0.5*torch.exp(-s) * torch.abs(diff) + s
+        return loss
+    
+    @staticmethod
+    def kl_loss_angular(diff, s, lambda_V=1, s0=1, limit_period=False):
+        """
+        Args:
+            diff: [B, 1]
+            s:    [B, 1]
+            if limit_period, 
+                diff + 180 ~ diff. 
+        Returns:
+            loss: [B, 1]
+        """
+        exp_minus_s = torch.exp(-s)
+        if limit_period:
+            cos_abs = torch.abs(torch.cos(diff))
+            loss = loss = torch.log(mathh.i0e_cuda(exp_minus_s)*torch.exp(exp_minus_s)) - exp_minus_s * cos_abs.detach() + lambda_V * F.elu(s-s0)
+        else:
+            loss = torch.log(mathh.i0e_cuda(exp_minus_s)*torch.exp(exp_minus_s)) - exp_minus_s * torch.cos(diff) + lambda_V * F.elu(s-s0)
+
+        return loss
+
+    def forward(self, input: torch.Tensor,
+                      target: torch.Tensor, 
+                      sm: torch.Tensor, 
+                      weights: torch.Tensor = None):
+        target = torch.where(torch.isnan(target), input, target)  # ignore nan targets
+        
+        if self.uncertainty_dim == 3: # x,y,yaw
+            xy_diff = input[...,:2] - target[...,:2]
+            loss1 = self.xy_loss(xy_diff, sm[...,:2])
+            theta_diff = input[...,7:8] - target[...,7:8]
+            loss2 = self.angle_weight * self.angle_loss(theta_diff, sm[...,2:3])
+            loss = torch.cat((loss1, loss2), dim=-1)
+            
+        elif self.uncertainty_dim == 7: # all regression target
+            other_diff = input[...,:6] - target[...,:6]
+            theta_diff = input[...,7:8] - target[...,7:8]
+            diff = torch.cat((other_diff, theta_diff), dim=-1)
+            loss = self.xy_loss(diff, sm)
+
+        elif self.uncertainty_dim == 2: # x,y
+            xy_diff = input[...,:2] - target[...,:2]
+            loss = self.xy_loss(xy_diff, sm[...,:2])
+        else:
+            raise "not implemented"
+
+        # anchor-wise weighting
+        if weights is not None:
+            assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1]
+
+            loss = loss * weights
+        
+        return loss
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/uncertainty_loss_old.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/uncertainty_loss_old.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4ca42c8a7070f94ea9094728a68bf28c52fcfc0
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/uncertainty_loss_old.py
@@ -0,0 +1,482 @@
+# -*- coding: utf-8 -*-
+# Author: Yifan Lu <yifan_lu@sjtu.edu.cn>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import d3d.mathh as mathh
+from opencood.utils.common_utils import limit_period
+from functools import partial
+
+class WeightedSmoothL1Loss(nn.Module):
+    """
+    Code-wise Weighted Smooth L1 Loss modified based on fvcore.nn.smooth_l1_loss
+    https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py
+                  | 0.5 * x ** 2 / beta   if abs(x) < beta
+    smoothl1(x) = |
+                  | abs(x) - 0.5 * beta   otherwise,
+    where x = input - target.
+    """
+    def __init__(self, beta: float = 1.0 / 9.0, code_weights: list = None):
+        """
+        Args:
+            beta: Scalar float.
+                L1 to L2 change point.
+                For beta values < 1e-5, L1 loss is computed.
+            code_weights: (#codes) float list if not None.
+                Code-wise weights.
+        """
+        super(WeightedSmoothL1Loss, self).__init__()
+        self.beta = beta
+        if code_weights is not None:
+            self.code_weights = np.array(code_weights, dtype=np.float32)
+            self.code_weights = torch.from_numpy(self.code_weights).cuda()
+
+    @staticmethod
+    def smooth_l1_loss(diff, beta):
+        if beta < 1e-5:
+            loss = torch.abs(diff)
+        else:
+            n = torch.abs(diff)
+            loss = torch.where(n < beta, 0.5 * n ** 2 / beta, n - 0.5 * beta)
+
+        return loss
+
+    def forward(self, input: torch.Tensor,
+                target: torch.Tensor, weights: torch.Tensor = None):
+        """
+        Args:
+            input: (B, #anchors, #codes) float tensor.
+                Ecoded predicted locations of objects.
+            target: (B, #anchors, #codes) float tensor.
+                Regression targets.
+            weights: (B, #anchors) float tensor if not None.
+
+        Returns:
+            loss: (B, #anchors) float tensor.
+                Weighted smooth l1 loss without reduction.
+        """
+        target = torch.where(torch.isnan(target), input, target)  # ignore nan targets
+
+        diff = input - target
+        loss = self.smooth_l1_loss(diff, self.beta)
+
+        # anchor-wise weighting
+        if weights is not None:
+            assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1]
+            loss = loss * weights.unsqueeze(-1)
+
+        return loss
+
+
+
+
+class KLLoss(nn.Module):
+    def __init__(self, args):
+        super(KLLoss, self).__init__()
+
+        self.angle_weight = args['angle_weight']
+        self.uncertainty_dim = args['uncertainty_dim']
+        if args['xy_loss_type'] == "l2":
+            self.xy_loss = self.kl_loss_l2
+        elif args['xy_loss_type'] == "l1":
+            self.xy_loss = self.kl_loss_l1
+        else:
+            raise "not implemented"
+
+        if args['angle_loss_type'] == "l2":
+            self.angle_loss = self.kl_loss_l2
+        elif args['angle_loss_type'] == "von":
+            lambda_V = args['lambda_V']
+            s0 = args['s0']
+            limit_period = args['limit_period']
+            self.angle_loss = partial(self.kl_loss_angular, lambda_V=lambda_V, s0=s0, limit_period=limit_period)
+        else:
+            raise "not implemented"
+
+
+
+
+    @staticmethod
+    def kl_loss_l2(diff, s):
+        """
+        Args:
+            diff: [B, 2]
+            s:    [B, 2]
+        Returns:
+            loss: [B, 2]
+        """
+        loss = 0.5*(torch.exp(-s) * (diff**2) + s)
+        return loss
+    
+    @staticmethod
+    def kl_loss_l1(diff, s):
+        """
+        Args:
+            diff: [B, 2]
+            s:    [B, 2]
+        Returns:
+            loss: [B, 2]
+        """
+        loss = 0.5*torch.exp(-s) * torch.abs(diff) + s
+        return loss
+    
+    @staticmethod
+    def kl_loss_angular(diff, s, lambda_V=1, s0=1, limit_period=False):
+        """
+        Args:
+            diff: [B, 1]
+            s:    [B, 1]
+            if limit_period, 
+                diff + 180 ~ diff. 
+        Returns:
+            loss: [B, 1]
+        """
+        exp_minus_s = torch.exp(-s)
+        if limit_period:
+            cos_abs = torch.abs(torch.cos(diff))
+            loss = loss = torch.log(mathh.i0e_cuda(exp_minus_s)*torch.exp(exp_minus_s)) - exp_minus_s * cos_abs.detach() + lambda_V * F.elu(s-s0)
+        else:
+            loss = torch.log(mathh.i0e_cuda(exp_minus_s)*torch.exp(exp_minus_s)) - exp_minus_s * torch.cos(diff) + lambda_V * F.elu(s-s0)
+
+        return loss
+
+    
+    def forward(self, input: torch.Tensor,
+                      target: torch.Tensor, 
+                      sm: torch.Tensor, 
+                      weights: torch.Tensor = None):
+        target = torch.where(torch.isnan(target), input, target)  # ignore nan targets
+        
+        
+
+        if self.uncertainty_dim == 3:
+            xy_diff = input[...,:2] - target[...,:2]
+            loss1 = self.xy_loss(xy_diff, sm[...,:2])
+            
+            theta_diff = input[...,7:8] - target[...,7:8]
+
+            loss2 = self.angle_weight * self.angle_loss(theta_diff, sm[...,2:3])
+
+            loss = torch.cat((loss1, loss2), dim=-1)
+            
+        elif self.uncertainty_dim == 7:
+            ## is this right?
+            other_diff = input[...,:6] - target[...,:6]
+            theta_diff = input[...,7:8] - target[...,7:8]
+
+            diff = torch.cat((other_diff, theta_diff), dim=-1)
+            loss = self.xy_loss(diff, sm)
+
+        elif self.uncertainty_dim == 2:
+            xy_diff = input[...,:2] - target[...,:2]
+            loss = self.xy_loss(xy_diff, sm[...,:2])
+        else:
+            raise "not implemented"
+
+        # anchor-wise weighting
+        if weights is not None:
+            assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1]
+            loss = loss * weights.unsqueeze(-1)
+        
+        return loss
+
+
+
+class PointPillarUncertaintyLoss(nn.Module):
+    def __init__(self, args):
+        super(PointPillarUncertaintyLoss, self).__init__()
+        self.reg_loss_func = WeightedSmoothL1Loss()
+        self.alpha = 0.25
+        self.gamma = 2.0
+
+        self.cls_weight = args['cls_weight']
+        self.kl_weight = args['kl_weight']
+        self.reg_coe = args['reg']
+        self.uncertainty_dim = args['kl_args']['uncertainty_dim']
+
+        if 'dir_args' in args.keys():
+            self.use_dir = True
+            self.dir_weight = args['dir_args']['dir_weight']
+            self.dir_offset = args['dir_args']['args']['dir_offset']
+            self.num_bins = args['dir_args']['args']['num_bins']
+            anchor_yaw = np.deg2rad(np.array(args['dir_args']['anchor_yaw']))  # for direction classification
+            self.anchor_yaw_map = torch.from_numpy(anchor_yaw).view(1,-1,1)  # [1,2,1]
+            self.anchor_num = self.anchor_yaw_map.shape[1]
+
+        else:
+            self.use_dir =False
+
+
+        self.kl_loss_func = KLLoss(args['kl_args'])
+
+        self.loss_dict = {}
+
+    def forward(self, output_dict, target_dict):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        rm = output_dict['rm']  # [B, 14, 50, 176]
+        psm = output_dict['psm'] # [B, 2, 50, 176]
+        sm = output_dict['sm']  # log of sigma^2 / scale [B, 6, 50 176]
+        targets = target_dict['targets']
+
+        cls_preds = psm.permute(0, 2, 3, 1).contiguous() # N, C, H, W -> N, H, W, C
+
+        box_cls_labels = target_dict['pos_equal_one']  # [B, 50, 176, 2] 
+        box_cls_labels = box_cls_labels.view(psm.shape[0], -1).contiguous() # -> [B, 50*176*2], two types of anchor
+
+        positives = box_cls_labels > 0
+        negatives = box_cls_labels == 0
+        negative_cls_weights = negatives * 1.0
+        cls_weights = (negative_cls_weights + 1.0 * positives).float() # all 1
+        reg_weights = positives.float()
+
+        pos_normalizer = positives.sum(1, keepdim=True).float() # positive number per sample
+        reg_weights /= torch.clamp(pos_normalizer, min=1.0)
+        cls_weights /= torch.clamp(pos_normalizer, min=1.0)
+        cls_targets = box_cls_labels
+        cls_targets = cls_targets.unsqueeze(dim=-1)
+
+        cls_targets = cls_targets.squeeze(dim=-1)
+        one_hot_targets = torch.zeros(
+            *list(cls_targets.shape), 2,
+            dtype=cls_preds.dtype, device=cls_targets.device
+        )
+        one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0)
+        cls_preds = cls_preds.view(psm.shape[0], -1, 1)
+        one_hot_targets = one_hot_targets[..., 1:]
+
+        cls_loss_src = self.cls_loss_func(cls_preds,
+                                          one_hot_targets,
+                                          weights=cls_weights)  # [N, M]
+        cls_loss = cls_loss_src.sum() / psm.shape[0]
+        conf_loss = cls_loss * self.cls_weight
+
+        ########## regression ##########
+        rm = rm.permute(0, 2, 3, 1).contiguous()
+        rm = rm.view(rm.size(0), -1, 7)
+        targets = targets.view(targets.size(0), -1, 7)
+
+        box_preds_sin, reg_targets_sin = self.add_sin_difference_dim(rm,
+                                                                 targets)
+        loc_loss_src =\
+            self.reg_loss_func(box_preds_sin[...,:7],
+                               reg_targets_sin[...,:7],
+                               weights=reg_weights)
+        reg_loss = loc_loss_src.sum() / rm.shape[0]
+        reg_loss *= self.reg_coe
+
+
+        ######## direction ##########
+        if self.use_dir:
+            dir_targets = self.get_direction_target(targets)
+            N =  output_dict["dm"].shape[0]
+            dir_logits = output_dict["dm"].permute(0, 2, 3, 1).contiguous().view(N, -1, 2) # [N, H*W*#anchor, 2]
+
+
+            dir_loss = softmax_cross_entropy_with_logits(dir_logits.view(-1, self.anchor_num), dir_targets.view(-1, self.anchor_num)) 
+
+            dir_loss = dir_loss.view(dir_logits.shape[:2]) * reg_weights # [N, H*W*anchor_num]
+
+            dir_loss = dir_loss.sum() * self.dir_weight / N
+
+        ######## kl #########
+        sm = sm.permute(0, 2, 3, 1).contiguous() # [N, H, W, #anchor_num * 3]
+        sm = sm.view(sm.size(0), -1, self.uncertainty_dim)
+
+        kl_loss_src = \
+            self.kl_loss_func(box_preds_sin,
+                              reg_targets_sin,
+                              sm,
+                              reg_weights)
+
+        kl_loss = kl_loss_src.sum() / sm.shape[0]
+        kl_loss *= self.kl_weight
+
+        # total_loss = reg_loss + conf_loss + kl_loss
+        total_loss = reg_loss + conf_loss
+
+        self.loss_dict.update({'total_loss': total_loss,
+                               'reg_loss': reg_loss,
+                               'conf_loss': conf_loss,
+                               'kl_loss': kl_loss})
+        
+        if self.use_dir:
+            # total_loss += dir_loss
+            self.loss_dict.update({'dir_loss': dir_loss})
+
+
+        return total_loss
+
+    def get_direction_target(self, reg_targets):
+        """
+        Args:
+            reg_targets:  [N, H * W * #anchor_num, 7]
+                The last term is (theta_gt - theta_a)
+        
+        Returns:
+            dir_targets:
+                theta_gt: [N, H * W * #anchor_num, NUM_BIN] 
+                NUM_BIN = 2
+        """
+        # (1, 2, 1)
+        H_times_W_times_anchor_num = reg_targets.shape[1]
+        anchor_map = self.anchor_yaw_map.repeat(1, H_times_W_times_anchor_num//self.anchor_num, 1).to(reg_targets.device) # [1, H * W * #anchor_num, 1]
+        rot_gt = reg_targets[..., -1] + anchor_map[..., -1] # [N, H*W*anchornum]
+        offset_rot = limit_period(rot_gt - self.dir_offset, 0, 2 * np.pi)
+        dir_cls_targets = torch.floor(offset_rot / (2 * np.pi / self.num_bins)).long()  # [N, H*W*anchornum]
+        dir_cls_targets = torch.clamp(dir_cls_targets, min=0, max=self.num_bins - 1)
+        # one_hot:
+        # if rot_gt > 0, then the label is 1, then the regression target is [0, 1]
+        dir_cls_targets = one_hot_f(dir_cls_targets, self.num_bins)
+        return dir_cls_targets
+
+
+
+    def cls_loss_func(self, input: torch.Tensor,
+                      target: torch.Tensor,
+                      weights: torch.Tensor):
+        """
+        Args:
+            input: (B, #anchors, #classes) float tensor.
+                Predicted logits for each class
+            target: (B, #anchors, #classes) float tensor.
+                One-hot encoded classification targets
+            weights: (B, #anchors) float tensor.
+                Anchor-wise weights.
+
+        Returns:
+            weighted_loss: (B, #anchors, #classes) float tensor after weighting.
+        """
+        pred_sigmoid = torch.sigmoid(input)
+        alpha_weight = target * self.alpha + (1 - target) * (1 - self.alpha)
+        pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid
+        focal_weight = alpha_weight * torch.pow(pt, self.gamma)
+
+        bce_loss = self.sigmoid_cross_entropy_with_logits(input, target)
+
+        loss = focal_weight * bce_loss
+
+        if weights.shape.__len__() == 2 or \
+                (weights.shape.__len__() == 1 and target.shape.__len__() == 2):
+            weights = weights.unsqueeze(-1)
+
+        assert weights.shape.__len__() == loss.shape.__len__()
+
+        return loss * weights
+
+    @staticmethod
+    def sigmoid_cross_entropy_with_logits(input: torch.Tensor, target: torch.Tensor):
+        """ PyTorch Implementation for tf.nn.sigmoid_cross_entropy_with_logits:
+            max(x, 0) - x * z + log(1 + exp(-abs(x))) in
+            https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
+
+        Args:
+            input: (B, #anchors, #classes) float tensor.
+                Predicted logits for each class
+            target: (B, #anchors, #classes) float tensor.
+                One-hot encoded classification targets
+
+        Returns:
+            loss: (B, #anchors, #classes) float tensor.
+                Sigmoid cross entropy loss without reduction
+        """
+        loss = torch.clamp(input, min=0) - input * target + \
+               torch.log1p(torch.exp(-torch.abs(input)))
+        return loss
+
+    @staticmethod
+    def add_sin_difference_dim(boxes1, boxes2, dim=6):
+        """
+        This is different with other loss function.
+        Here we especially retain the angel
+
+            Add sin difference ?
+            Replace sin difference !
+
+        Returns:
+            [B, H*W, 7] -> [B, H*W, 8]
+        """
+        assert dim != -1
+
+        # sin(theta1 - theta2) = sin(theta1)*cos(theta2) - cos(theta1)*sin(theta2) 
+
+        rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \
+                            torch.cos(boxes2[..., dim:dim + 1])
+
+        rad_tg_encoding = torch.cos(boxes1[..., dim: dim + 1]) * \
+                          torch.sin(boxes2[..., dim: dim + 1])
+
+        # boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding,
+        #                     boxes1[..., dim + 1:]], dim=-1)
+        # boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding,
+        #                     boxes2[..., dim + 1:]], dim=-1)
+        
+        boxes1_encoded = torch.cat([boxes1[..., :dim], rad_pred_encoding,
+                            boxes1[..., dim:]], dim=-1)
+        boxes2_encoded = torch.cat([boxes2[..., :dim], rad_tg_encoding,
+                            boxes2[..., dim:]], dim=-1)
+
+        return boxes1_encoded, boxes2_encoded
+
+
+    def logging(self, epoch, batch_id, batch_len, writer = None):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict['total_loss']
+        reg_loss = self.loss_dict['reg_loss']
+        conf_loss = self.loss_dict['conf_loss']
+        kl_loss = self.loss_dict['kl_loss']
+        
+
+        print_msg = ("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f"
+                    " || Loc Loss: %.4f || KL Loss: %.4f" % (
+                        epoch, batch_id + 1, batch_len,
+                        total_loss.item(), conf_loss.item(), reg_loss.item(),  kl_loss.item()))
+        
+        if self.use_dir:
+            dir_loss = self.loss_dict['dir_loss']
+            print_msg += " || Dir Loss: %.4f" % dir_loss.item()
+
+        print(print_msg)
+
+        if not writer is None:
+            writer.add_scalar('Regression_loss', reg_loss.item(),
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Confidence_loss', conf_loss.item(),
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('kl_loss', kl_loss.item(),
+                            epoch*batch_len + batch_id)
+            if self.use_dir:
+                writer.add_scalar('dir_loss', dir_loss.item(),
+                            epoch*batch_len + batch_id)
+
+def one_hot_f(tensor, depth, dim=-1, on_value=1.0, dtype=torch.float32):
+    tensor_onehot = torch.zeros(*list(tensor.shape), depth, dtype=dtype, device=tensor.device) # [4, 70400, 2]
+    tensor_onehot.scatter_(dim, tensor.unsqueeze(dim).long(), on_value)                        # [4, 70400, 2]
+    return tensor_onehot
+
+def softmax_cross_entropy_with_logits(logits, labels):
+    param = list(range(len(logits.shape)))
+    transpose_param = [0] + [param[-1]] + param[1:-1]
+    logits = logits.permute(*transpose_param)
+    loss_ftor = torch.nn.CrossEntropyLoss(reduction="none")
+    loss = loss_ftor(logits, labels.max(dim=-1)[1])
+    return loss
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/voxel_net_loss.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/voxel_net_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..a257fa9edbc4f81322eee5e3f73d8f8b0e8d7e50
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/loss/voxel_net_loss.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, Hao Xiang <haxiang@g.ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class VoxelNetLoss(nn.Module):
+    def __init__(self, args):
+        super(VoxelNetLoss, self).__init__()
+        self.smoothl1loss = nn.SmoothL1Loss(size_average=False)
+        self.alpha = args['alpha']
+        self.beta = args['beta']
+        self.reg_coe = args['reg']
+        self.loss_dict = {}
+
+    def forward(self, output_dict, target_dict):
+        """
+        Parameters
+        ----------
+        output_dict : dict
+        target_dict : dict
+        """
+        rm = output_dict['rm']
+        psm = output_dict['psm']
+
+        pos_equal_one = target_dict['pos_equal_one']
+        neg_equal_one = target_dict['neg_equal_one']
+        targets = target_dict['targets']
+
+        p_pos = F.sigmoid(psm.permute(0, 2, 3, 1))
+        rm = rm.permute(0, 2, 3, 1).contiguous()
+        rm = rm.view(rm.size(0), rm.size(1), rm.size(2), -1, 7)
+        targets = targets.view(targets.size(0), targets.size(1),
+                               targets.size(2), -1, 7)
+        pos_equal_one_for_reg = pos_equal_one.unsqueeze(
+            pos_equal_one.dim()).expand(-1, -1, -1, -1, 7)
+
+        rm_pos = rm * pos_equal_one_for_reg
+        targets_pos = targets * pos_equal_one_for_reg
+
+        cls_pos_loss = -pos_equal_one * torch.log(p_pos + 1e-6)
+        cls_pos_loss = cls_pos_loss.sum() / (pos_equal_one.sum() + 1e-6)
+
+        cls_neg_loss = -neg_equal_one * torch.log(1 - p_pos + 1e-6)
+        cls_neg_loss = cls_neg_loss.sum() / (neg_equal_one.sum() + 1e-6)
+
+        reg_loss = self.smoothl1loss(rm_pos, targets_pos)
+        reg_loss = reg_loss / (pos_equal_one.sum() + 1e-6)
+        conf_loss = self.alpha * cls_pos_loss + self.beta * cls_neg_loss
+
+        total_loss = self.reg_coe * reg_loss + conf_loss
+
+        self.loss_dict.update({'total_loss': total_loss,
+                               'reg_loss': reg_loss,
+                               'conf_loss': conf_loss})
+
+        return total_loss
+
+    def logging(self, epoch, batch_id, batch_len, writer = None):
+        """
+        Print out  the loss function for current iteration.
+
+        Parameters
+        ----------
+        epoch : int
+            Current epoch for training.
+        batch_id : int
+            The current batch.
+        batch_len : int
+            Total batch length in one iteration of training,
+        writer : SummaryWriter
+            Used to visualize on tensorboard
+        """
+        total_loss = self.loss_dict['total_loss']
+        reg_loss = self.loss_dict['reg_loss']
+        conf_loss = self.loss_dict['conf_loss']
+
+        print("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f"
+              " || Loc Loss: %.4f" % (
+                  epoch, batch_id + 1, batch_len,
+                  total_loss.item(), conf_loss.item(), reg_loss.item()))
+
+        if not writer is None:
+            writer.add_scalar('Regression_loss', reg_loss.item(),
+                            epoch*batch_len + batch_id)
+            writer.add_scalar('Confidence_loss', conf_loss.item(),
+                            epoch*batch_len + batch_id)
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d1abc5fb11a40ad42f073d24eeef31aaef94fb5a
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/__init__.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/center_point_codriving.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/center_point_codriving.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7be5a81881416b375dbc97a555af57c205a7ef50
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/center_point_codriving.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/point_pillar_multiclass.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/point_pillar_multiclass.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e0397ae7179f77b8786a1d413973b97ca9acc9e6
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/point_pillar_multiclass.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/point_pillar_single_multiclass.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/point_pillar_single_multiclass.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..edfe74f8dceee0ea598ad9fe9125c48ac4d3691a
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/__pycache__/point_pillar_single_multiclass.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point.py
new file mode 100644
index 0000000000000000000000000000000000000000..34c657370fe11b637894a2efc8ab43c9d5d17b2c
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point.py
@@ -0,0 +1,148 @@
+# -*- coding: utf-8 -*-
+
+
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+
+
+class CenterPoint(nn.Module):
+    def __init__(self, args):
+        super(CenterPoint, self).__init__()
+
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
+        # self.out_size_factor = args['']
+        self.voxel_size = args['voxel_size']
+        self.out_size_factor = args['out_size_factor']
+        self.cav_lidar_range  = args['lidar_range']
+
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+        input_channels = 128*2 if self.shrink_flag else 128*3
+        self.cls_head = nn.Conv2d(input_channels, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(input_channels, 8 * args['anchor_number'],
+                                  kernel_size=1)
+        # self.conv_cls = nn.Conv2d(input_channels, args['anchor_number'],
+        #                           kernel_size=1)
+        # self.conv_box = nn.Conv2d(input_channels, 8 * args['anchor_number'],
+        #                           kernel_size=1)
+        self.init_weight()
+
+    def init_weight(self):
+        pi = 0.01
+        nn.init.constant_(self.cls_head.bias, -np.log((1 - pi) / pi) )
+        nn.init.normal_(self.reg_head.weight, mean=0, std=0.001)
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points}
+
+        batch_dict = self.pillar_vfe(batch_dict)
+        batch_dict = self.scatter(batch_dict)
+        batch_dict = self.backbone(batch_dict)
+
+        spatial_features_2d = batch_dict['spatial_features_2d']
+
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+
+        cls = self.cls_head(spatial_features_2d)
+        bbox = self.reg_head(spatial_features_2d)
+        # cls = self.conv_cls(spatial_features_2d)
+        # bbox = self.conv_box(spatial_features_2d)
+    
+
+        # 把bbox 的第二维度变成7 
+        _, bbox_temp = self.generate_predicted_boxes(cls, bbox)
+
+        # print(bbox.equal(bbox_temp))
+        output_dict = {'cls_preds': cls,
+                       'reg_preds': bbox_temp,
+                       'bbox_preds': bbox}    # 计算loss的时候使用 'bbox', 在生成output的时候 'rm'
+                       
+        return output_dict
+
+
+    def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None):
+        """
+        Args:
+            batch_size:
+            cls_preds: (N, H, W, C1)
+            box_preds: (N, H, W, C2)
+            dir_cls_preds: (N, H, W, C3)
+
+        Returns:
+            batch_cls_preds: (B, num_boxes, num_classes)
+            batch_box_preds: (B, num_boxes, 7+C)
+
+        """
+        box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
+        
+        batch, H, W, code_size = box_preds.size()   ## code_size 表示的是预测的尺寸
+        
+        # batch_reg = box_preds[:, 0:2, :, :]  # x,y,z
+        # batch_hei = box_preds[:, 2:3, :, :]
+        # batch_dim = torch.exp(box_preds[:, 3:6, :, :])
+        # # batch_dim = box_preds[:, 3:6, :, :]   # w h l 
+        # batch_rots = box_preds[:, 6:7, :, :]
+        # batch_rotc = box_preds[:, 7:8, :, :]
+        # rot = torch.atan2(batch_rots, batch_rotc)
+        
+        
+        box_preds = box_preds.reshape(batch, H*W, code_size)
+
+        batch_reg = box_preds[..., 0:2]
+        # batch_hei = box_preds[..., 2:3] 
+        # batch_dim = torch.exp(box_preds[..., 3:6])
+        
+        h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0]
+        w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1]
+        l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2]
+        batch_dim = torch.cat([h,w,l], dim=-1)
+        batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2]
+
+        batch_rots = box_preds[..., 6:7]
+        batch_rotc = box_preds[..., 7:8]
+
+        rot = torch.atan2(batch_rots, batch_rotc)
+
+        ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)])
+        ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+        xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+
+        xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1]
+        ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2]
+
+        xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0]   ## 基于feature_map 的size求解真实的坐标
+        ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1]
+
+
+        batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2)
+        # batch_box_preds = batch_box_preds.reshape(batch, H, W, batch_box_preds.shape[-1])
+        # batch_box_preds = batch_box_preds.permute(0, 3, 1, 2).contiguous()
+
+        # batch_box_preds_temp = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=1)
+        # box_preds = box_preds.permute(0, 3, 1, 2).contiguous()
+
+        # batch_cls_preds = cls_preds.view(batch, H*W, -1)
+        return cls_preds, batch_box_preds
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4478bc7fb7939b5a11461b89eeb38949fcbc435
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline.py
@@ -0,0 +1,217 @@
+# Author: Yifan Lu <yifan_lu@sjtu.edu.cn>
+# a class that integrate multiple simple fusion methods (Single Scale)
+# Support F-Cooper, Self-Att, DiscoNet(wo KD), V2VNet, V2XViT, When2comm
+import torch
+import torch.nn as nn
+from icecream import ic
+import numpy as np
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion
+from opencood.models.fuse_modules.fusion_in_one import MaxFusion, AttFusion, DiscoFusion, V2VNetFusion, V2XViTFusion, When2commFusion
+from opencood.utils.transformation_utils import normalize_pairwise_tfm
+
+class CenterPointBaseline(nn.Module):
+    """
+    F-Cooper implementation with point pillar backbone.
+    """
+    def __init__(self, args):
+        super(CenterPointBaseline, self).__init__()
+
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        is_resnet = args['base_bev_backbone'].get("resnet", False)
+        if is_resnet:
+            self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger
+        else:
+            self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger
+        self.voxel_size = args['voxel_size']
+        self.out_size_factor = args['out_size_factor']
+        self.cav_lidar_range  = args['lidar_range']
+
+        if args['fusion_method'] == "max":
+            self.fusion_net = MaxFusion()
+        if args['fusion_method'] == "att":
+            self.fusion_net = AttFusion(args['att']['feat_dim'])
+        if args['fusion_method'] == "disconet":
+            self.fusion_net = DiscoFusion(args['disconet']['feat_dim'])
+        if args['fusion_method'] == "v2vnet":
+            self.fusion_net = V2VNetFusion(args['v2vnet'])
+        if args['fusion_method'] == 'v2xvit':
+            self.fusion_net = V2XViTFusion(args['v2xvit'])
+        if args['fusion_method'] == 'when2comm':
+            self.fusion_net = When2commFusion(args['when2comm'])
+
+        self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter'])
+
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+
+        self.compression = False
+        if "compression" in args:
+            self.compression = True
+            self.naive_compressor = NaiveCompressor(self.out_channel, args['compression'])
+
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'],
+                                  kernel_size=1)
+        self.use_dir = False
+        if 'dir_args' in args.keys():
+            self.use_dir = True
+            self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'],
+                                  kernel_size=1) # BIN_NUM = 2
+ 
+        if 'backbone_fix' in args.keys() and args['backbone_fix']:
+            self.backbone_fix()
+        
+        self.init_weight()
+    
+    def init_weight(self):
+        pi = 0.01
+        nn.init.constant_(self.cls_head.bias, -np.log((1 - pi) / pi) )
+        nn.init.normal_(self.reg_head.weight, mean=0, std=0.001)
+
+    def backbone_fix(self):
+        """
+        Fix the parameters of backbone during finetune on timedelay。
+        """
+        for p in self.pillar_vfe.parameters():
+            p.requires_grad = False
+
+        for p in self.scatter.parameters():
+            p.requires_grad = False
+
+        for p in self.backbone.parameters():
+            p.requires_grad = False
+
+        if self.compression:
+            for p in self.naive_compressor.parameters():
+                p.requires_grad = False
+        if self.shrink_flag:
+            for p in self.shrink_conv.parameters():
+                p.requires_grad = False
+
+        for p in self.cls_head.parameters():
+            p.requires_grad = False
+        for p in self.reg_head.parameters():
+            p.requires_grad = False
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len}
+        # n, 4 -> n, c
+        batch_dict = self.pillar_vfe(batch_dict)
+        # n, c -> N, C, H, W
+        batch_dict = self.scatter(batch_dict)
+        # calculate pairwise affine transformation matrix
+        _, _, H0, W0 = batch_dict['spatial_features'].shape # original feature map shape H0, W0
+        t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], H0, W0, self.voxel_size[0])
+        batch_dict = self.backbone(batch_dict)
+
+        spatial_features_2d = batch_dict['spatial_features_2d']
+
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+
+        if self.compression:
+            spatial_features_2d = self.naive_compressor(spatial_features_2d)
+
+        fused_feature = self.fusion_net(spatial_features_2d, record_len, t_matrix)
+
+        cls = self.cls_head(fused_feature)
+        bbox = self.reg_head(fused_feature)
+
+        # 把bbox 的第二维度变成7 
+        _, bbox_temp = self.generate_predicted_boxes(cls, bbox)
+
+        # print(bbox.equal(bbox_temp))
+        output_dict = {'cls_preds': cls,
+                       'reg_preds': bbox_temp,
+                       'bbox_preds': bbox}    # 计算loss的时候使用 'bbox', 在生成output的时候 'rm'
+
+        if self.use_dir:
+            output_dict.update({'dir_preds': self.dir_head(fused_feature)})
+
+        return output_dict
+
+    def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None):
+        """
+        Args:
+            batch_size:
+            cls_preds: (N, H, W, C1)
+            box_preds: (N, H, W, C2)
+            dir_cls_preds: (N, H, W, C3)
+
+        Returns:
+            batch_cls_preds: (B, num_boxes, num_classes)
+            batch_box_preds: (B, num_boxes, 7+C)
+
+        """
+        box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
+        
+        batch, H, W, code_size = box_preds.size()   ## code_size 表示的是预测的尺寸
+        
+        # batch_reg = box_preds[:, 0:2, :, :]  # x,y,z
+        # batch_hei = box_preds[:, 2:3, :, :]
+        # batch_dim = torch.exp(box_preds[:, 3:6, :, :])
+        # # batch_dim = box_preds[:, 3:6, :, :]   # w h l 
+        # batch_rots = box_preds[:, 6:7, :, :]
+        # batch_rotc = box_preds[:, 7:8, :, :]
+        # rot = torch.atan2(batch_rots, batch_rotc)
+        
+        
+        box_preds = box_preds.reshape(batch, H*W, code_size)
+
+        batch_reg = box_preds[..., 0:2]
+        # batch_hei = box_preds[..., 2:3] 
+        # batch_dim = torch.exp(box_preds[..., 3:6])
+        
+        h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0]
+        w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1]
+        l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2]
+        batch_dim = torch.cat([h,w,l], dim=-1)
+        batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2]
+
+        batch_rots = box_preds[..., 6:7]
+        batch_rotc = box_preds[..., 7:8]
+
+        rot = torch.atan2(batch_rots, batch_rotc)
+
+        ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)])
+        ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+        xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+
+        xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1]
+        ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2]
+
+        xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0]   ## 基于feature_map 的size求解真实的坐标
+        ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1]
+
+
+        batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2)
+        # batch_box_preds = batch_box_preds.reshape(batch, H, W, batch_box_preds.shape[-1])
+        # batch_box_preds = batch_box_preds.permute(0, 3, 1, 2).contiguous()
+
+        # batch_box_preds_temp = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=1)
+        # box_preds = box_preds.permute(0, 3, 1, 2).contiguous()
+
+        # batch_cls_preds = cls_preds.view(batch, H*W, -1)
+        return cls_preds, batch_box_preds
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline_multiclass.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline_multiclass.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6657de7f59e464d213b62820c2a17dfd76f0bdc
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline_multiclass.py
@@ -0,0 +1,262 @@
+import torch.nn as nn
+import numpy as np
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+# from opencood.models.sub_modules.compress_core import CompressCore
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+# from opencood.models.sub_modules.dcn_net import DCNNet
+# from opencood.models.fuse_modules.where2comm import Where2comm
+from opencood.models.fuse_modules.where2comm_attn import Where2comm
+import torch
+import torch.nn.functional as F
+
+class centerpointbaselinemulticlass(nn.Module):
+    def __init__(self, args):
+        super(centerpointbaselinemulticlass, self).__init__()
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        
+        
+        if 'resnet' in args['base_bev_backbone']:
+            self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64)
+        else:
+            self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
+        
+        self.voxel_size = args['voxel_size']
+        self.out_size_factor = args['out_size_factor']
+        self.cav_lidar_range  = args['lidar_range']
+
+        self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter'])
+
+        # used to downsample the feature map for efficient computation
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+        
+        self.compression = False
+        if 'compression' in args and args['compression'] > 0:
+            self.compression = True
+            self.naive_compressor = NaiveCompressor(self.out_channel, args['compression'])
+
+        self.dcn = False
+        if 'dcn' in args:
+            self.dcn = True
+            self.dcn_net = DCNNet(args['dcn'])
+
+        # self.fusion_net = TransformerFusion(args['fusion_args'])
+        self.fusion_net = Where2comm(args['fusion_args'])
+        self.multi_scale = args['fusion_args']['multi_scale']
+
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'],
+                                  kernel_size=1)
+        if 'backbone_fix' in args.keys() and args['backbone_fix']:
+            self.backbone_fix()
+        
+        self.init_weight()
+    
+    def init_weight(self):
+        pi = 0.01
+        nn.init.constant_(self.cls_head.bias, -np.log((1 - pi) / pi) )
+        nn.init.normal_(self.reg_head.weight, mean=0, std=0.001)
+
+    def backbone_fix(self):
+        """
+        Fix the parameters of backbone during finetune on timedelay。
+        """
+        for p in self.pillar_vfe.parameters():
+            p.requires_grad = False
+
+        for p in self.scatter.parameters():
+            p.requires_grad = False
+
+        for p in self.backbone.parameters():
+            p.requires_grad = False
+
+        if self.compression:
+            for p in self.naive_compressor.parameters():
+                p.requires_grad = False
+        if self.shrink_flag:
+            for p in self.shrink_conv.parameters():
+                p.requires_grad = False
+
+        for p in self.cls_head.parameters():
+            p.requires_grad = False
+        for p in self.reg_head.parameters():
+            p.requires_grad = False
+    
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, data_dict):
+        if type(data_dict) == dict:
+            voxel_features = data_dict['processed_lidar']['voxel_features']
+            voxel_coords = data_dict['processed_lidar']['voxel_coords']
+            voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+            record_len = data_dict['record_len']
+            # pairwise_t_matrix = data_dict['pairwise_t_matrix']
+            batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len}
+        # n, 4 -> n, c
+            batch_dict = self.pillar_vfe(batch_dict)
+        # n, c -> N, C, H, W
+            batch_dict = self.scatter(batch_dict)
+            batch_dict = self.backbone(batch_dict)
+        # N, C, H', W'. [N, 384, 100, 352]
+            spatial_features_2d = batch_dict['spatial_features_2d']
+            # print(spatial_features_2d)
+        elif type(data_dict) == list:
+            spatial_features_2d = []
+            for data in data_dict:
+                voxel_features = data['processed_lidar']['voxel_features']
+                voxel_coords = data['processed_lidar']['voxel_coords']
+                voxel_num_points = data['processed_lidar']['voxel_num_points']
+                record_len = data['record_len']
+                # pairwise_t_matrix = data_dict['pairwise_t_matrix']
+                batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len}
+                # n, 4 -> n, c
+                batch_dict = self.pillar_vfe(batch_dict)
+                # n, c -> N, C, H, W
+                batch_dict = self.scatter(batch_dict)
+            
+                batch_dict = self.backbone(batch_dict)
+                # N, C, H', W'. 
+                spatial_feature_2d = batch_dict['spatial_features_2d']
+                spatial_features_2d.append(spatial_feature_2d)
+            spatial_features_2d = torch.cat(spatial_features_2d)
+        else:
+            print("wrong type of data_dict")
+
+
+        # downsample feature to reduce memory
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+        # compressor
+        if self.compression:
+            spatial_features_2d = self.naive_compressor(spatial_features_2d)
+        # dcn
+        if self.dcn:
+            spatial_features_2d = self.dcn_net(spatial_features_2d)
+        # spatial_features_2d is [sum(cav_num), 256, 50, 176]
+        # output only contains ego
+        # [B, 256, 50, 176]
+        psm_single = self.cls_head(spatial_features_2d)
+        # print(spatial_features_2d)
+        rm_single = self.reg_head(spatial_features_2d)
+        
+        fused_feature = spatial_features_2d
+        cls = self.cls_head(fused_feature) # fused_feature [B, 128, 96, 288] -> [B, 3, 96, 288]
+        bbox = self.reg_head(fused_feature) # fused_feature [B, 128, 96, 288] -> [B, 24, 96, 288]
+
+        if not self.training:
+            _, C, H, W = cls.shape
+            cls = psm_single[0].unsqueeze(0).contiguous().view(1, -1, H, W)
+            bbox = rm_single[0].unsqueeze(0).contiguous().view(1, -1, H, W)
+
+
+        box_preds_for_infer = bbox.permute(0, 2, 3, 1).contiguous()
+        bbox_temp_list = []
+        num_class = int(box_preds_for_infer.shape[3]/8)
+        box_preds_for_infer = box_preds_for_infer.view(box_preds_for_infer.shape[0], box_preds_for_infer.shape[1], box_preds_for_infer.shape[2], num_class, 8)
+        for i in range(num_class):
+            box_preds_for_infer_singleclass = box_preds_for_infer[:,:,:,i,:]
+            box_preds_for_infer_singleclass = box_preds_for_infer_singleclass.permute(0, 3, 1, 2)
+            _, bbox_temp = self.generate_predicted_boxes(cls[:, i, :, :], box_preds_for_infer_singleclass)
+            bbox_temp_list.append(bbox_temp)
+        bbox_temp_list = torch.stack(bbox_temp_list, dim=1)
+
+
+        _, bbox_temp = self.generate_predicted_boxes(cls, bbox)
+
+        feature_list = []
+        feature_regroup = self.regroup(spatial_features_2d, record_len)
+        for ego_id in range(len(feature_regroup)):
+            feature_list.append(feature_regroup[ego_id][0:1])
+        feature_egos = torch.cat(feature_list, dim=0)
+        result_dict = {'fused_feature':feature_egos}
+
+        output_dict = {'cls_preds': cls,
+                       'reg_preds': bbox_temp,
+                       'reg_preds_multiclass': bbox_temp_list,
+                       'bbox_preds': bbox
+                       }
+        output_dict.update(result_dict)
+        
+        
+
+        _, bbox_temp_single = self.generate_predicted_boxes(psm_single, rm_single)
+
+        output_dict.update({'cls_preds_single': psm_single,
+                       'reg_preds_single': bbox_temp_single,
+                       'bbox_preds_single': rm_single,
+                       # 'comm_rate': communication_rates
+                       })
+        
+
+        return output_dict
+
+    def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None):
+        """
+        Args:
+            batch_size:
+            cls_preds: (N, H, W, C1)
+            box_preds: (N, H, W, C2)
+            dir_cls_preds: (N, H, W, C3)
+
+        Returns:
+            batch_cls_preds: (B, num_boxes, num_classes)
+            batch_box_preds: (B, num_boxes, 7+C)
+
+        """
+        box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
+        
+        batch, H, W, code_size = box_preds.size()   ## code_size 表示的是预测的尺寸
+        
+        box_preds = box_preds.reshape(batch, H*W, code_size)
+
+        batch_reg = box_preds[..., 0:2]
+        # batch_hei = box_preds[..., 2:3] 
+        # batch_dim = torch.exp(box_preds[..., 3:6])
+        
+        h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0]
+        w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1]
+        l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2]
+        batch_dim = torch.cat([h,w,l], dim=-1)
+        batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2]
+
+        batch_rots = box_preds[..., 6:7]
+        batch_rotc = box_preds[..., 7:8]
+
+        rot = torch.atan2(batch_rots, batch_rotc)
+
+        ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)])
+        ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+        xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+
+        xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1]
+        ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2]
+
+        xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0]   ## 基于feature_map 的size求解真实的坐标
+        ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1]
+
+
+        batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2)
+
+        return cls_preds, batch_box_preds
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline_multiscale.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline_multiscale.py
new file mode 100644
index 0000000000000000000000000000000000000000..18b16a6723353ee9bad972a78e3dcdc6a101460e
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_baseline_multiscale.py
@@ -0,0 +1,209 @@
+# Author: Yifan Lu <yifan_lu@sjtu.edu.cn>
+# a class that integrate multiple simple fusion methods (Single Scale)
+# Support F-Cooper, Self-Att, DiscoNet(wo KD), V2VNet, V2XViT, When2comm
+
+import torch.nn as nn
+from icecream import ic
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone 
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion
+from opencood.models.fuse_modules.fusion_in_one import MaxFusion, AttFusion, DiscoFusion, V2VNetFusion, V2XViTFusion, When2commFusion
+from opencood.utils.transformation_utils import normalize_pairwise_tfm
+import numpy as np
+import torch
+
+class CenterPointBaselineMultiscale(nn.Module):
+    """
+    F-Cooper implementation with point pillar backbone.
+    """
+    def __init__(self, args):
+        super(CenterPointBaselineMultiscale, self).__init__()
+
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64)
+        self.voxel_size = args['voxel_size']
+        self.out_size_factor = args['out_size_factor']
+        self.cav_lidar_range  = args['lidar_range']
+
+        self.fusion_net = nn.ModuleList()
+        for i in range(len(args['base_bev_backbone']['layer_nums'])):
+            if args['fusion_method'] == "max":
+                self.fusion_net.append(MaxFusion())
+            if args['fusion_method'] == "att":
+                self.fusion_net.append(AttFusion(args['att']['feat_dim'][i]))
+        self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter'])
+
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+
+        self.compression = False
+        if "compression" in args:
+            self.compression = True
+            self.naive_compressor = NaiveCompressor(64, args['compression'])
+
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'],
+                                  kernel_size=1)
+        self.use_dir = False
+        if 'dir_args' in args.keys():
+            self.use_dir = True
+            self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'],
+                                  kernel_size=1) # BIN_NUM = 2
+ 
+        if 'backbone_fix' in args.keys() and args['backbone_fix']:
+            self.backbone_fix()
+        
+        self.init_weight()
+    
+    def init_weight(self):
+        pi = 0.01
+        nn.init.constant_(self.cls_head.bias, -np.log((1 - pi) / pi) )
+        nn.init.normal_(self.reg_head.weight, mean=0, std=0.001)
+
+    def backbone_fix(self):
+        """
+        Fix the parameters of backbone during finetune on timedelay。
+        """
+        for p in self.pillar_vfe.parameters():
+            p.requires_grad = False
+
+        for p in self.scatter.parameters():
+            p.requires_grad = False
+
+        for p in self.backbone.parameters():
+            p.requires_grad = False
+
+        if self.compression:
+            for p in self.naive_compressor.parameters():
+                p.requires_grad = False
+        if self.shrink_flag:
+            for p in self.shrink_conv.parameters():
+                p.requires_grad = False
+
+        for p in self.cls_head.parameters():
+            p.requires_grad = False
+        for p in self.reg_head.parameters():
+            p.requires_grad = False
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len}
+        # n, 4 -> n, c
+        batch_dict = self.pillar_vfe(batch_dict)
+        # n, c -> N, C, H, W
+        batch_dict = self.scatter(batch_dict)
+        # calculate pairwise affine transformation matrix
+        _, _, H0, W0 = batch_dict['spatial_features'].shape # original feature map shape H0, W0
+        t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], H0, W0, self.voxel_size[0])
+
+        spatial_features = batch_dict['spatial_features']
+
+        if self.compression:
+            spatial_features = self.naive_compressor(spatial_features)
+
+        # multiscale fusion
+        feature_list = self.backbone.get_multiscale_feature(spatial_features)
+        fused_feature_list = []
+        for i, fuse_module in enumerate(self.fusion_net):
+            fused_feature_list.append(fuse_module(feature_list[i], record_len, t_matrix))
+        fused_feature = self.backbone.decode_multiscale_feature(fused_feature_list) 
+
+        if self.shrink_flag:
+            fused_feature = self.shrink_conv(fused_feature)
+
+        cls = self.cls_head(fused_feature)
+        bbox = self.reg_head(fused_feature)
+        # 把bbox 的第二维度变成7 
+        _, bbox_temp = self.generate_predicted_boxes(cls, bbox)
+
+        # print(bbox.equal(bbox_temp))
+        output_dict = {'cls_preds': cls,
+                       'reg_preds': bbox_temp,
+                       'bbox_preds': bbox}    # 计算loss的时候使用 'bbox', 在生成output的时候 'rm'
+
+        if self.use_dir:
+            output_dict.update({'dir_preds': self.dir_head(fused_feature)})
+
+        return output_dict
+
+    def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None):
+        """
+        Args:
+            batch_size:
+            cls_preds: (N, H, W, C1)
+            box_preds: (N, H, W, C2)
+            dir_cls_preds: (N, H, W, C3)
+
+        Returns:
+            batch_cls_preds: (B, num_boxes, num_classes)
+            batch_box_preds: (B, num_boxes, 7+C)
+
+        """
+        box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
+        
+        batch, H, W, code_size = box_preds.size()   ## code_size 表示的是预测的尺寸
+        
+        # batch_reg = box_preds[:, 0:2, :, :]  # x,y,z
+        # batch_hei = box_preds[:, 2:3, :, :]
+        # batch_dim = torch.exp(box_preds[:, 3:6, :, :])
+        # # batch_dim = box_preds[:, 3:6, :, :]   # w h l 
+        # batch_rots = box_preds[:, 6:7, :, :]
+        # batch_rotc = box_preds[:, 7:8, :, :]
+        # rot = torch.atan2(batch_rots, batch_rotc)
+        
+        
+        box_preds = box_preds.reshape(batch, H*W, code_size)
+
+        batch_reg = box_preds[..., 0:2]
+        # batch_hei = box_preds[..., 2:3] 
+        # batch_dim = torch.exp(box_preds[..., 3:6])
+        
+        h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0]
+        w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1]
+        l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2]
+        batch_dim = torch.cat([h,w,l], dim=-1)
+        batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2]
+
+        batch_rots = box_preds[..., 6:7]
+        batch_rotc = box_preds[..., 7:8]
+
+        rot = torch.atan2(batch_rots, batch_rotc)
+
+        ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)])
+        ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+        xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+
+        xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1]
+        ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2]
+
+        xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0]   ## 基于feature_map 的size求解真实的坐标
+        ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1]
+
+
+        batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2)
+        # batch_box_preds = batch_box_preds.reshape(batch, H, W, batch_box_preds.shape[-1])
+        # batch_box_preds = batch_box_preds.permute(0, 3, 1, 2).contiguous()
+
+        # batch_box_preds_temp = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=1)
+        # box_preds = box_preds.permute(0, 3, 1, 2).contiguous()
+
+        # batch_cls_preds = cls_preds.view(batch, H*W, -1)
+        return cls_preds, batch_box_preds
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_codriving.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_codriving.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6b3fb2e35e440db238cc28e436804c7a753a0bf
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_codriving.py
@@ -0,0 +1,301 @@
+import torch.nn as nn
+import numpy as np
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+from opencood.models.fuse_modules.codriving_attn import Where2comm
+import torch
+
+class centerpointcodriving(nn.Module):
+    def __init__(self, args):
+        super(centerpointcodriving, self).__init__()
+
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        if 'resnet' in args['base_bev_backbone']:
+            self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64)
+        else:
+            self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
+        
+        self.voxel_size = args['voxel_size']
+        self.out_size_factor = args['out_size_factor']
+        self.cav_lidar_range  = args['lidar_range']
+
+        self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter'])
+
+        # used to downsample the feature map for efficient computation
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+        
+        self.compression = False
+        if 'compression' in args and args['compression'] > 0:
+            self.compression = True
+            self.naive_compressor = NaiveCompressor(self.out_channel, args['compression'])
+
+        self.dcn = False
+        if 'dcn' in args:
+            self.dcn = True
+            self.dcn_net = DCNNet(args['dcn'])
+
+        # self.fusion_net = TransformerFusion(args['fusion_args'])
+        self.fusion_net = Where2comm(args['fusion_args'])
+        self.multi_scale = args['fusion_args']['multi_scale']
+
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'],
+                                  kernel_size=1)
+        if 'backbone_fix' in args.keys() and args['backbone_fix']:
+            self.backbone_fix()
+        
+        if 'early_fusion' in args:
+            self.early_flag = args['early_fusion']
+        else:
+            self.early_flag = False
+
+        self.init_weight()
+    
+    def init_weight(self):
+        pi = 0.01
+        nn.init.constant_(self.cls_head.bias, -np.log((1 - pi) / pi) )
+        nn.init.normal_(self.reg_head.weight, mean=0, std=0.001)
+
+    def backbone_fix(self):
+        """
+        Fix the parameters of backbone during finetune on timedelay。
+        """
+        for p in self.pillar_vfe.parameters():
+            p.requires_grad = False
+
+        for p in self.scatter.parameters():
+            p.requires_grad = False
+
+        for p in self.backbone.parameters():
+            p.requires_grad = False
+
+        if self.compression:
+            for p in self.naive_compressor.parameters():
+                p.requires_grad = False
+        if self.shrink_flag:
+            for p in self.shrink_conv.parameters():
+                p.requires_grad = False
+
+        for p in self.cls_head.parameters():
+            p.requires_grad = False
+        for p in self.reg_head.parameters():
+            p.requires_grad = False
+    
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, data_dict, waypoints=None):
+        voxel_features = data_dict['processed_lidar']['voxel_features'] # e.g. (34814,32,4)
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']  #  e.g (34814,4)
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] #  e.g (34814)
+        record_len = data_dict['record_len']
+
+        pairwise_t_matrix = data_dict['pairwise_t_matrix']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len}
+        # n, 4 -> n, c
+        batch_dict = self.pillar_vfe(batch_dict)
+        # n, c -> N, C, H, W
+        batch_dict = self.scatter(batch_dict)
+        batch_dict = self.backbone(batch_dict)
+        # N, C, H', W'. [N, 384, 100, 352]
+        spatial_features_2d = batch_dict['spatial_features_2d']
+        
+        
+        
+        # feat_3d = batch_dict['spatial_features'][0].detach().cpu().numpy()    # shape: (C, H, W)
+        # feat_2d = batch_dict['spatial_features_2d'][0].detach().cpu().numpy() # shape: (C, H, W)
+
+        # # 2) Convert each to a grayscale image by averaging across channels
+        # feat_3d_gray = np.mean(feat_3d, axis=0)  # shape: (H, W)
+        # feat_2d_gray = np.mean(feat_2d, axis=0)  # shape: (H, W)
+
+        # # 3) Normalize each to [0, 255]
+        # def normalize_to_uint8(img):
+        #     img_min, img_max = img.min(), img.max()
+        #     if img_max - img_min < 1e-6:
+        #         # Edge case if everything is the same value
+        #         return np.zeros_like(img, dtype=np.uint8)
+        #     normalized = (img - img_min) / (img_max - img_min)
+        #     return (normalized * 255).astype(np.uint8)
+
+        # feat_3d_gray = normalize_to_uint8(feat_3d_gray)
+        # feat_2d_gray = normalize_to_uint8(feat_2d_gray)
+        # import cv2
+        # # 4) Save as images
+        # cv2.imwrite("debug/spatial_features.jpg", feat_3d_gray)
+        # cv2.imwrite("debug/spatial_features_2d.jpg", feat_2d_gray)
+        # import pdb; pdb.set_trace()
+        
+        
+        
+        
+        # downsample feature to reduce memory
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d) # [B, 384, 96, 288] -> [B, 128, 96, 288]
+        # compressor
+        if self.compression:
+            spatial_features_2d = self.naive_compressor(spatial_features_2d)
+        # dcn
+        if self.dcn:
+            spatial_features_2d = self.dcn_net(spatial_features_2d)
+        # spatial_features_2d is [sum(cav_num), 256, 50, 176]
+        # output only contains ego
+        # [B, 256, 50, 176]
+        psm_single = self.cls_head(spatial_features_2d) # spatial_features_2d: [B, 128, 96, 288]
+        rm_single = self.reg_head(spatial_features_2d)
+
+        # print('spatial_features_2d: ', spatial_features_2d.shape)
+        if self.multi_scale:
+            fused_feature, communication_rates, result_dict = self.fusion_net(batch_dict['spatial_features'],  # [BN, 64, 192, 576]
+                                            psm_single,
+                                            record_len,
+                                            pairwise_t_matrix, 
+                                            self.backbone,
+                                            waypoints)
+            # downsample feature to reduce memory
+            if self.shrink_flag:
+                fused_feature = self.shrink_conv(fused_feature)
+        elif self.early_flag:
+            fused_feature_tuple = self.regroup(spatial_features_2d, record_len)
+            feature_bank = []
+            for feature_ in fused_feature_tuple:
+                feature_bank.append(feature_[0])
+            fused_feature = torch.stack(feature_bank, dim=0)
+            result_dict = {}
+            communication_rates = 0
+        else:
+            fused_feature, communication_rates, result_dict = self.fusion_net(spatial_features_2d,
+                                            psm_single,
+                                            record_len,
+                                            pairwise_t_matrix)
+            
+
+        cls = self.cls_head(fused_feature)
+        bbox = self.reg_head(fused_feature)
+
+        box_preds_for_infer = bbox.permute(0, 2, 3, 1).contiguous()
+        bbox_temp_list = []
+        num_class = int(box_preds_for_infer.shape[3]/8)
+        box_preds_for_infer = box_preds_for_infer.view(box_preds_for_infer.shape[0], box_preds_for_infer.shape[1], box_preds_for_infer.shape[2], num_class, 8)
+        for i in range(num_class):
+            box_preds_for_infer_singleclass = box_preds_for_infer[:,:,:,i,:]
+            box_preds_for_infer_singleclass = box_preds_for_infer_singleclass.permute(0, 3, 1, 2)
+            _, bbox_temp = self.generate_predicted_boxes(cls[:, i, :, :], box_preds_for_infer_singleclass)
+            bbox_temp_list.append(bbox_temp)
+        bbox_temp_list = torch.stack(bbox_temp_list, dim=1)
+
+
+        _, bbox_temp = self.generate_predicted_boxes(cls, bbox)
+
+        output_dict = {'cls_preds': cls,
+                       'reg_preds': bbox_temp,
+                       'reg_preds_multiclass': bbox_temp_list,
+                       'bbox_preds': bbox
+                       }
+        
+        result_dict.update({'fused_feature':fused_feature})
+
+        output_dict.update(result_dict)
+        _, bbox_temp_single = self.generate_predicted_boxes(psm_single, rm_single)
+        output_dict.update({'cls_preds_single': psm_single,
+                       'reg_preds_single': bbox_temp_single,
+                       'bbox_preds_single': rm_single,
+                       'comm_rate': communication_rates,
+                       })
+
+        #######  output box for single head ##########
+        psm_single_regroup = self.regroup(psm_single, record_len)
+        rm_single_regroup = self.regroup(rm_single, record_len)
+        psm_single_ego_list = []
+        rm_single_ego_list = []
+        for b in range(len(record_len)):
+            psm_single_ego_list.append(psm_single_regroup[b][0:1])
+            rm_single_ego_list.append(rm_single_regroup[b][0:1])
+        psm_single_ego = torch.cat((psm_single_ego_list), 0)
+        rm_single_ego = torch.cat((rm_single_ego_list), 0)
+        # generate box
+        box_preds_for_infer = rm_single_ego.permute(0, 2, 3, 1).contiguous()
+        bbox_temp_list_single = []
+        num_class = int(box_preds_for_infer.shape[3]/8)
+        box_preds_for_infer = box_preds_for_infer.view(box_preds_for_infer.shape[0], box_preds_for_infer.shape[1], box_preds_for_infer.shape[2], num_class, 8)
+        for i in range(num_class):
+            box_preds_for_infer_singleclass = box_preds_for_infer[:,:,:,i,:]
+            box_preds_for_infer_singleclass = box_preds_for_infer_singleclass.permute(0, 3, 1, 2)
+            _, bbox_temp = self.generate_predicted_boxes(psm_single_ego[:, i, :, :], box_preds_for_infer_singleclass)
+            bbox_temp_list_single.append(bbox_temp)
+        bbox_temp_list_single = torch.stack(bbox_temp_list_single, dim=1)
+        output_dict.update({'cls_preds_single_ego': psm_single_ego,
+                       'reg_preds_multiclass_single_ego': bbox_temp_list_single,
+                       'bbox_preds_single_ego': rm_single_ego
+                       })
+
+        return output_dict
+
+    def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None):
+        """
+        Args:
+            batch_size:
+            cls_preds: (N, H, W, C1)
+            box_preds: (N, H, W, C2)
+            dir_cls_preds: (N, H, W, C3)
+
+        Returns:
+            batch_cls_preds: (B, num_boxes, num_classes)
+            batch_box_preds: (B, num_boxes, 7+C)
+
+        """
+        box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
+        
+        batch, H, W, code_size = box_preds.size()   ## code_size 表示的是预测的尺寸
+        
+        box_preds = box_preds.reshape(batch, H*W, code_size)
+
+        batch_reg = box_preds[..., 0:2]
+        # batch_hei = box_preds[..., 2:3] 
+        # batch_dim = torch.exp(box_preds[..., 3:6])
+        
+        h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0]
+        w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1]
+        l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2]
+        batch_dim = torch.cat([h,w,l], dim=-1)
+        batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2]
+
+        batch_rots = box_preds[..., 6:7]
+        batch_rotc = box_preds[..., 7:8]
+
+        rot = torch.atan2(batch_rots, batch_rotc)
+
+        ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)])
+        ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+        xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+
+        xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1]
+        ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2]
+
+        xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0]   ## 基于feature_map 的size求解真实的坐标
+        ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1]
+
+
+        batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2)
+
+        return cls_preds, batch_box_preds
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_intermediate.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_intermediate.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b31cb8d3abbab5e6c152d24d69cb4a945f86719
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_intermediate.py
@@ -0,0 +1,143 @@
+# -*- coding: utf-8 -*-
+
+
+import torch
+import torch.nn as nn
+import numpy as np
+
+
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.att_bev_backbone import AttBEVBackbone
+
+
+class CenterPointIntermediate(nn.Module):
+    def __init__(self, args):
+        super(CenterPointIntermediate, self).__init__()
+
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        self.backbone = AttBEVBackbone(args['base_bev_backbone'], 64)
+        # self.out_size_factor = args['']
+        self.voxel_size = args['voxel_size']
+        self.out_size_factor = args['out_size_factor']
+        self.cav_lidar_range  = args['lidar_range']
+        self.conv_cls = nn.Conv2d(
+            128*3, 1,
+            kernel_size=1
+        )
+        self.conv_box = nn.Conv2d(
+            128*3, 8,               ## xyz,hwl,sin(r),cos(r)
+            kernel_size=1
+        )  # in_channels  out_channels
+        self.init_weight()
+
+    def init_weight(self):
+        pi = 0.01
+        nn.init.constant_(self.conv_cls.bias, -np.log((1 - pi) / pi) )
+        nn.init.normal_(self.conv_box.weight, mean=0, std=0.001)
+
+
+
+    def forward(self, data_dict):
+
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len']
+        lidar_pose = data_dict['lidar_pose']
+        pairwise_t_matrix = data_dict['pairwise_t_matrix']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len,
+                      'pairwise_t_matrix': pairwise_t_matrix}
+
+        batch_dict = self.pillar_vfe(batch_dict)
+        batch_dict = self.scatter(batch_dict)
+        batch_dict = self.backbone(batch_dict)
+
+        spatial_features_2d = batch_dict['spatial_features_2d']
+        
+        # if self.dcn and not self.before_backbone:
+        #     spatial_features_2d = self.dcn_net(spatial_features_2d)
+
+        cls = self.conv_cls(spatial_features_2d)   ## [2, 1, h, w]
+        bbox = self.conv_box(spatial_features_2d)    ## [2, 8, h, w]
+    
+
+        # 把bbox 的第二维度变成7 
+        _, bbox_temp = self.generate_predicted_boxes(cls, bbox)
+
+        # print(bbox.equal(bbox_temp))
+        output_dict = {'cls_preds': cls,
+                       'reg_preds': bbox_temp,
+                       'cls': cls,
+                       'bbox_preds':bbox}    # 计算loss的时候使用 'bbox', 在生成output的时候 'rm'
+                       
+        return output_dict
+
+
+    def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None):
+        """
+        Args:
+            batch_size:
+            cls_preds: (N, H, W, C1)
+            box_preds: (N, H, W, C2)
+            dir_cls_preds: (N, H, W, C3)
+
+        Returns:
+            batch_cls_preds: (B, num_boxes, num_classes)
+            batch_box_preds: (B, num_boxes, 7+C)
+
+        """
+        box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
+        
+        batch, H, W, code_size = box_preds.size()   ## code_size 表示的是预测的尺寸
+        
+        # batch_reg = box_preds[:, 0:2, :, :]  # x,y,z
+        # batch_hei = box_preds[:, 2:3, :, :]
+        # batch_dim = torch.exp(box_preds[:, 3:6, :, :])
+        # # batch_dim = box_preds[:, 3:6, :, :]   # w h l 
+        # batch_rots = box_preds[:, 6:7, :, :]
+        # batch_rotc = box_preds[:, 7:8, :, :]
+        # rot = torch.atan2(batch_rots, batch_rotc)
+        
+        
+        box_preds = box_preds.reshape(batch, H*W, code_size)
+
+        batch_reg = box_preds[..., 0:2]
+        batch_hei = box_preds[..., 2:3]
+
+        batch_dim = torch.exp(box_preds[..., 3:6])
+
+        batch_rots = box_preds[..., 6:7]
+        batch_rotc = box_preds[..., 7:8]
+
+        rot = torch.atan2(batch_rots, batch_rotc)
+
+        ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)])
+        ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+        xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+
+        xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1]
+        ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2]
+
+        xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0]   ## 基于feature_map 的size求解真实的坐标
+        ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1]
+
+
+        batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2)
+        # batch_box_preds = batch_box_preds.reshape(batch, H, W, batch_box_preds.shape[-1])
+        # batch_box_preds = batch_box_preds.permute(0, 3, 1, 2).contiguous()
+
+        # batch_box_preds_temp = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=1)
+        # box_preds = box_preds.permute(0, 3, 1, 2).contiguous()
+
+        # batch_cls_preds = cls_preds.view(batch, H*W, -1)
+        return cls_preds, batch_box_preds
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_where2comm.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_where2comm.py
new file mode 100644
index 0000000000000000000000000000000000000000..57f9d040ed2cf42d830b229983e9184def345290
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_where2comm.py
@@ -0,0 +1,226 @@
+import torch.nn as nn
+import numpy as np
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+from opencood.models.sub_modules.dcn_net import DCNNet
+# from opencood.models.fuse_modules.where2comm import Where2comm
+from opencood.models.fuse_modules.where2comm_attn import Where2comm
+import torch
+
+class CenterPointWhere2comm(nn.Module):
+    def __init__(self, args):
+        super(CenterPointWhere2comm, self).__init__()
+
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        if 'resnet' in args['base_bev_backbone']:
+            self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64)
+        else:
+            self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
+        
+        self.voxel_size = args['voxel_size']
+        self.out_size_factor = args['out_size_factor']
+        self.cav_lidar_range  = args['lidar_range']
+
+        self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter'])
+
+        # used to downsample the feature map for efficient computation
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+        
+        self.compression = False
+        if 'compression' in args and args['compression'] > 0:
+            self.compression = True
+            self.naive_compressor = NaiveCompressor(self.out_channel, args['compression'])
+
+        self.dcn = False
+        if 'dcn' in args:
+            self.dcn = True
+            self.dcn_net = DCNNet(args['dcn'])
+
+        # self.fusion_net = TransformerFusion(args['fusion_args'])
+        self.fusion_net = Where2comm(args['fusion_args'])
+        self.multi_scale = args['fusion_args']['multi_scale']
+
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'],
+                                  kernel_size=1)
+        if 'backbone_fix' in args.keys() and args['backbone_fix']:
+            self.backbone_fix()
+        
+        self.init_weight()
+    
+    def init_weight(self):
+        pi = 0.01
+        nn.init.constant_(self.cls_head.bias, -np.log((1 - pi) / pi) )
+        nn.init.normal_(self.reg_head.weight, mean=0, std=0.001)
+
+    def backbone_fix(self):
+        """
+        Fix the parameters of backbone during finetune on timedelay。
+        """
+        for p in self.pillar_vfe.parameters():
+            p.requires_grad = False
+
+        for p in self.scatter.parameters():
+            p.requires_grad = False
+
+        for p in self.backbone.parameters():
+            p.requires_grad = False
+
+        if self.compression:
+            for p in self.naive_compressor.parameters():
+                p.requires_grad = False
+        if self.shrink_flag:
+            for p in self.shrink_conv.parameters():
+                p.requires_grad = False
+
+        for p in self.cls_head.parameters():
+            p.requires_grad = False
+        for p in self.reg_head.parameters():
+            p.requires_grad = False
+    
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len']
+
+        pairwise_t_matrix = data_dict['pairwise_t_matrix']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len}
+        # n, 4 -> n, c
+        batch_dict = self.pillar_vfe(batch_dict)
+        # n, c -> N, C, H, W
+        batch_dict = self.scatter(batch_dict)
+        batch_dict = self.backbone(batch_dict)
+        # N, C, H', W'. [N, 384, 100, 352]
+        spatial_features_2d = batch_dict['spatial_features_2d']
+        
+        # downsample feature to reduce memory
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+        # compressor
+        if self.compression:
+            spatial_features_2d = self.naive_compressor(spatial_features_2d)
+        # dcn
+        if self.dcn:
+            spatial_features_2d = self.dcn_net(spatial_features_2d)
+        # spatial_features_2d is [sum(cav_num), 256, 50, 176]
+        # output only contains ego
+        # [B, 256, 50, 176]
+        psm_single = self.cls_head(spatial_features_2d)
+        rm_single = self.reg_head(spatial_features_2d)
+
+        # print('spatial_features_2d: ', spatial_features_2d.shape)
+        if self.multi_scale:
+            fused_feature, communication_rates, result_dict = self.fusion_net(batch_dict['spatial_features'],
+                                            psm_single,
+                                            record_len,
+                                            pairwise_t_matrix, 
+                                            self.backbone)
+            # downsample feature to reduce memory
+            if self.shrink_flag:
+                fused_feature = self.shrink_conv(fused_feature)
+        else:
+            fused_feature, communication_rates, result_dict = self.fusion_net(spatial_features_2d,
+                                            psm_single,
+                                            record_len,
+                                            pairwise_t_matrix)
+            
+            
+        # print('fused_feature: ', fused_feature.shape)
+        cls = self.cls_head(fused_feature)
+        bbox = self.reg_head(fused_feature)
+
+        _, bbox_temp = self.generate_predicted_boxes(cls, bbox)
+
+        output_dict = {'cls_preds': cls,
+                       'reg_preds': bbox_temp,
+                       'bbox_preds': bbox
+                       }
+        output_dict.update(result_dict)
+
+        _, bbox_temp_single = self.generate_predicted_boxes(psm_single, rm_single)
+
+        output_dict.update({'cls_preds_single': psm_single,
+                       'reg_preds_single': bbox_temp_single,
+                       'bbox_preds_single': rm_single,
+                       'comm_rate': communication_rates
+                       })
+        return output_dict
+
+    def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None):
+        """
+        Args:
+            batch_size:
+            cls_preds: (N, H, W, C1)
+            box_preds: (N, H, W, C2)
+            dir_cls_preds: (N, H, W, C3)
+
+        Returns:
+            batch_cls_preds: (B, num_boxes, num_classes)
+            batch_box_preds: (B, num_boxes, 7+C)
+
+        """
+        box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
+        
+        batch, H, W, code_size = box_preds.size()   ## code_size 表示的是预测的尺寸
+        
+        box_preds = box_preds.reshape(batch, H*W, code_size)
+
+        batch_reg = box_preds[..., 0:2]
+        # batch_hei = box_preds[..., 2:3] 
+        # batch_dim = torch.exp(box_preds[..., 3:6])
+        
+        h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0]
+        w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1]
+        l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2]
+        batch_dim = torch.cat([h,w,l], dim=-1)
+        batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2]
+
+        batch_rots = box_preds[..., 6:7]
+        batch_rotc = box_preds[..., 7:8]
+
+        rot = torch.atan2(batch_rots, batch_rotc)
+
+        ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)])
+        ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+        xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+
+        xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1]
+        ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2]
+
+        xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0]   ## 基于feature_map 的size求解真实的坐标
+        ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1]
+
+
+        batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2)
+        # batch_box_preds = batch_box_preds.reshape(batch, H, W, batch_box_preds.shape[-1])
+        # batch_box_preds = batch_box_preds.permute(0, 3, 1, 2).contiguous()
+
+        # batch_box_preds_temp = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=1)
+        # box_preds = box_preds.permute(0, 3, 1, 2).contiguous()
+
+        # batch_cls_preds = cls_preds.view(batch, H*W, -1)
+        return cls_preds, batch_box_preds
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_where2comm_multiclass.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_where2comm_multiclass.py
new file mode 100644
index 0000000000000000000000000000000000000000..10a4961e69f13a0444814c9f5a9d6c39dd740a9a
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/center_point_where2comm_multiclass.py
@@ -0,0 +1,246 @@
+import torch.nn as nn
+import numpy as np
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+# from opencood.models.sub_modules.dcn_net import DCNNet
+# from opencood.models.fuse_modules.where2comm import Where2comm
+from opencood.models.fuse_modules.where2comm_attn import Where2comm
+import torch
+
+class centerpointwhere2commmulticlass(nn.Module):
+    def __init__(self, args):
+        super(centerpointwhere2commmulticlass, self).__init__()
+
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        if 'resnet' in args['base_bev_backbone']:
+            self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64)
+        else:
+            self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
+        
+        self.voxel_size = args['voxel_size']
+        self.out_size_factor = args['out_size_factor']
+        self.cav_lidar_range  = args['lidar_range']
+
+        self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter'])
+
+        # used to downsample the feature map for efficient computation
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+        
+        self.compression = False
+        if 'compression' in args and args['compression'] > 0:
+            self.compression = True
+            self.naive_compressor = NaiveCompressor(self.out_channel, args['compression'])
+
+        self.dcn = False
+        if 'dcn' in args:
+            self.dcn = True
+            self.dcn_net = DCNNet(args['dcn'])
+
+        # self.fusion_net = TransformerFusion(args['fusion_args'])
+        self.fusion_net = Where2comm(args['fusion_args'])
+        self.multi_scale = args['fusion_args']['multi_scale']
+
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'],
+                                  kernel_size=1)
+        if 'backbone_fix' in args.keys() and args['backbone_fix']:
+            self.backbone_fix()
+        
+        if 'early_fusion' in args:
+            self.early_flag = args['early_fusion']
+        else:
+            self.early_flag = False
+
+        self.init_weight()
+    
+    def init_weight(self):
+        pi = 0.01
+        nn.init.constant_(self.cls_head.bias, -np.log((1 - pi) / pi) )
+        nn.init.normal_(self.reg_head.weight, mean=0, std=0.001)
+
+    def backbone_fix(self):
+        """
+        Fix the parameters of backbone during finetune on timedelay。
+        """
+        for p in self.pillar_vfe.parameters():
+            p.requires_grad = False
+
+        for p in self.scatter.parameters():
+            p.requires_grad = False
+
+        for p in self.backbone.parameters():
+            p.requires_grad = False
+
+        if self.compression:
+            for p in self.naive_compressor.parameters():
+                p.requires_grad = False
+        if self.shrink_flag:
+            for p in self.shrink_conv.parameters():
+                p.requires_grad = False
+
+        for p in self.cls_head.parameters():
+            p.requires_grad = False
+        for p in self.reg_head.parameters():
+            p.requires_grad = False
+    
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features'] # (34814,32,4)
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']  # (34814,4)
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] # (34814)
+        record_len = data_dict['record_len']
+
+        pairwise_t_matrix = data_dict['pairwise_t_matrix']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len}
+        # n, 4 -> n, c
+        batch_dict = self.pillar_vfe(batch_dict)
+        # n, c -> N, C, H, W
+        batch_dict = self.scatter(batch_dict)
+        batch_dict = self.backbone(batch_dict)
+        # N, C, H', W'. [N, 384, 100, 352]
+        spatial_features_2d = batch_dict['spatial_features_2d']
+        
+        # downsample feature to reduce memory
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+        # compressor
+        if self.compression:
+            spatial_features_2d = self.naive_compressor(spatial_features_2d)
+        # dcn
+        if self.dcn:
+            spatial_features_2d = self.dcn_net(spatial_features_2d)
+        # spatial_features_2d is [sum(cav_num), 256, 50, 176]
+        # output only contains ego
+        # [B, 256, 50, 176]
+        psm_single = self.cls_head(spatial_features_2d)
+        rm_single = self.reg_head(spatial_features_2d)
+
+        # print('spatial_features_2d: ', spatial_features_2d.shape)
+        if self.multi_scale:
+            fused_feature, communication_rates, result_dict = self.fusion_net(batch_dict['spatial_features'],
+                                            psm_single,
+                                            record_len,
+                                            pairwise_t_matrix, 
+                                            self.backbone)
+            # downsample feature to reduce memory
+            if self.shrink_flag:
+                fused_feature = self.shrink_conv(fused_feature)
+        elif self.early_flag:
+            fused_feature_tuple = self.regroup(spatial_features_2d, record_len)
+            feature_bank = []
+            for feature_ in fused_feature_tuple:
+                feature_bank.append(feature_[0])
+            fused_feature = torch.stack(feature_bank, dim=0)
+            result_dict = {}
+            communication_rates = 0
+        else:
+            fused_feature, communication_rates, result_dict = self.fusion_net(spatial_features_2d,
+                                            psm_single,
+                                            record_len,
+                                            pairwise_t_matrix)
+            
+            
+        # print('fused_feature: ', fused_feature.shape)
+        cls = self.cls_head(fused_feature) # fused_feature [12, 128, 96, 288] -> [12, 3, 96, 288]
+        bbox = self.reg_head(fused_feature) # fused_feature [12, 128, 96, 288] -> [12, 24, 96, 288]
+
+        box_preds_for_infer = bbox.permute(0, 2, 3, 1).contiguous()
+        bbox_temp_list = []
+        num_class = int(box_preds_for_infer.shape[3]/8)
+        box_preds_for_infer = box_preds_for_infer.view(box_preds_for_infer.shape[0], box_preds_for_infer.shape[1], box_preds_for_infer.shape[2], num_class, 8)
+        for i in range(num_class): # num_class
+            box_preds_for_infer_singleclass = box_preds_for_infer[:,:,:,i,:]
+            box_preds_for_infer_singleclass = box_preds_for_infer_singleclass.permute(0, 3, 1, 2)
+            _, bbox_temp = self.generate_predicted_boxes(cls[:, i, :, :], box_preds_for_infer_singleclass)
+            bbox_temp_list.append(bbox_temp)
+        bbox_temp_list = torch.stack(bbox_temp_list, dim=1)
+
+
+        _, bbox_temp = self.generate_predicted_boxes(cls, bbox)
+
+        output_dict = {'cls_preds': cls, # (4,1,100,100) -> [1, 3, 92, 92]
+                       'reg_preds': bbox_temp, #(4,10000,7)
+                       'reg_preds_multiclass': bbox_temp_list, # [1, 3, 27648, 7]
+                       'bbox_preds': bbox #(4,8,100,100) -> [1, 24, 92, 92]
+                       }
+        output_dict.update(result_dict)
+
+        _, bbox_temp_single = self.generate_predicted_boxes(psm_single, rm_single)
+
+        output_dict.update({'cls_preds_single': psm_single, # [12, 1, 100, 100]
+                       'reg_preds_single': bbox_temp_single,  # [12, 10000, 7]
+                       'bbox_preds_single': rm_single,   # [12, 8, 100, 100]
+                       'comm_rate': communication_rates
+                       })
+        return output_dict
+
+    def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None):
+        """
+        Args:
+            batch_size:
+            cls_preds: (N, H, W, C1)
+            box_preds: (N, H, W, C2)
+            dir_cls_preds: (N, H, W, C3)
+
+        Returns:
+            batch_cls_preds: (B, num_boxes, num_classes)
+            batch_box_preds: (B, num_boxes, 7+C)
+
+        """
+        box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
+        
+        batch, H, W, code_size = box_preds.size()   ## code_size 表示的是预测的尺寸
+        
+        box_preds = box_preds.reshape(batch, H*W, code_size)
+
+        batch_reg = box_preds[..., 0:2]
+        # batch_hei = box_preds[..., 2:3] 
+        # batch_dim = torch.exp(box_preds[..., 3:6])
+        
+        h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0]
+        w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1]
+        l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2]
+        batch_dim = torch.cat([h,w,l], dim=-1)
+        batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2]
+
+        batch_rots = box_preds[..., 6:7]
+        batch_rotc = box_preds[..., 7:8]
+
+        rot = torch.atan2(batch_rots, batch_rotc)
+
+        ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)])
+        ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+        xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+
+        xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1]
+        ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2]
+
+        xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0]   ## 基于feature_map 的size求解真实的坐标
+        ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1]
+
+
+        batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2)
+
+        return cls_preds, batch_box_preds
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/ciassd.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/ciassd.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b69e455b399f793969364527f463c9c6b1e7e18
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/ciassd.py
@@ -0,0 +1,51 @@
+import torch
+from torch import nn
+import numpy as np
+
+from opencood.models.sub_modules.mean_vfe import MeanVFE
+from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x
+from opencood.models.sub_modules.height_compression import HeightCompression
+from opencood.models.sub_modules.cia_ssd_utils import SSFA, Head
+
+
+class CIASSD(nn.Module):
+    def __init__(self, args):
+        super(CIASSD, self).__init__()
+        lidar_range = np.array(args['lidar_range'])
+        grid_size = np.round((lidar_range[3:6] - lidar_range[:3]) /
+                             np.array(args['voxel_size'])).astype(np.int64)
+        self.vfe = MeanVFE(args['mean_vfe'], args['mean_vfe']['num_point_features'])
+        self.spconv_block = VoxelBackBone8x(args['spconv'],
+                                            input_channels=args['spconv']['num_features_in'],
+                                            grid_size=grid_size)
+        self.map_to_bev = HeightCompression(args['map2bev'])
+        self.ssfa = SSFA(args['ssfa'])
+        self.head = Head(**args['head'])
+
+    def forward(self, batch_dict):
+        voxel_features = batch_dict['processed_lidar']['voxel_features']
+        voxel_coords = batch_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = batch_dict['processed_lidar']['voxel_num_points']
+
+        # save memory
+        batch_dict.pop('processed_lidar')
+        batch_dict.update({'voxel_features': voxel_features,
+                           'voxel_coords': voxel_coords,
+                           'voxel_num_points': voxel_num_points})
+
+        batch_dict['batch_size'] = batch_dict['object_bbx_center'].shape[0]
+
+        batch_dict = self.vfe(batch_dict)
+        batch_dict = self.spconv_block(batch_dict)
+        batch_dict = self.map_to_bev(batch_dict)
+        out = self.ssfa(batch_dict['spatial_features'])
+        out = self.head(out)
+        batch_dict['preds_dict_stage1'] = out
+
+        return batch_dict
+
+
+
+if __name__=="__main__":
+    model = SSFA(None)
+    print(model)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/__pycache__/codriving.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/__pycache__/codriving.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2efbf59deef2733bb821ecab028ba455cc668834
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/__pycache__/codriving.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/__pycache__/where2comm.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/__pycache__/where2comm.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9bd17f146ddb2f0435d47717e318b3c00e18c88f
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/__pycache__/where2comm.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/codriving.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/codriving.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa8cbafc0c6e9577fca1c1a32ec052e5bfb2ddbc
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/codriving.py
@@ -0,0 +1,143 @@
+# -*- coding: utf-8 -*-
+# Author: Yue Hu <phyllis1sjtu@outlook.com>, Genjia Liu <LGJ1zed@sjtu.edu.cn>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+import torch
+import torch.nn as nn
+import numpy as np
+import copy
+import random
+
+from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple
+
+class Communication(nn.Module):
+    def __init__(self, args):
+        super(Communication, self).__init__()
+        
+        self.smooth = False
+        self.thre = args['thre']
+        if 'gaussian_smooth' in args:
+            # Gaussian Smooth
+            self.smooth = True
+            kernel_size = args['gaussian_smooth']['k_size']
+            c_sigma = args['gaussian_smooth']['c_sigma']
+            self.gaussian_filter = nn.Conv2d(1, 1, kernel_size=kernel_size, stride=1, padding=(kernel_size-1)//2)
+            self.init_gaussian_filter(kernel_size, c_sigma)
+            self.gaussian_filter.requires_grad = False
+        self.det_range = args['cav_lidar_range']
+        self.use_driving_request = args['driving_request']
+
+        self.args = args
+        
+    def init_gaussian_filter(self, k_size=5, sigma=1):
+        def _gen_gaussian_kernel(k_size=5, sigma=1):
+            center = k_size // 2
+            x, y = np.mgrid[0 - center : k_size - center, 0 - center : k_size - center]
+            g = 1 / (2 * np.pi * sigma) * np.exp(-(np.square(x) + np.square(y)) / (2 * np.square(sigma)))
+            return g
+        gaussian_kernel = _gen_gaussian_kernel(k_size, sigma)
+        self.gaussian_filter.weight.data = torch.Tensor(gaussian_kernel).to(self.gaussian_filter.weight.device).unsqueeze(0).unsqueeze(0)
+        self.gaussian_filter.bias.data.zero_()
+
+    def forward(self, batch_confidence_maps, record_len, pairwise_t_matrix, waypoints=None):
+        # batch_confidence_maps:[(L1, H, W), (L2, H, W), ...]
+        # pairwise_t_matrix: (B,L,L,2,3)
+        # thre: threshold of objectiveness
+        # a_ji = (1 - q_i)*q_ji
+        B, L, _, _, _ = pairwise_t_matrix.shape
+        _, _, H, W = batch_confidence_maps[0].shape
+        
+        ### get matrix for inverse transform
+        pairwise_t_matrix_inverse = pairwise_t_matrix.clone()
+
+        pairwise_t_matrix_inverse[...,0,1] = pairwise_t_matrix_inverse[...,0,1] / (H / W)
+        pairwise_t_matrix_inverse[...,1,0] = pairwise_t_matrix_inverse[...,1,0] / (W / H)
+
+        pairwise_t_matrix_inverse[...,0,2] *= -1
+        pairwise_t_matrix_inverse[...,1,2] *= -1
+
+        pairwise_t_matrix_inverse_2 = pairwise_t_matrix_inverse.clone()
+
+        pairwise_t_matrix_inverse[...,0,1] = pairwise_t_matrix_inverse_2[...,1,0]
+        pairwise_t_matrix_inverse[...,1,0] = pairwise_t_matrix_inverse_2[...,0,1]
+
+        pairwise_t_matrix_inverse[...,0,1] = pairwise_t_matrix_inverse[...,0,1] * (H / W)
+        pairwise_t_matrix_inverse[...,1,0] = pairwise_t_matrix_inverse[...,1,0] * (W / H)        
+
+        communication_masks = []
+        communication_rates = []
+        batch_communication_maps = []
+        for b in range(B):
+            # number of valid agent
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            # t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+            ori_communication_maps = batch_confidence_maps[b].sigmoid().max(dim=1)[0].unsqueeze(1) # dim1=2 represents the confidence of two anchors
+
+            if False: # self.smooth:
+                processed_communication_maps = self.gaussian_filter(ori_communication_maps)
+                # normalize to 0-1
+                if processed_communication_maps.max() >0:
+                    processed_communication_maps = processed_communication_maps/processed_communication_maps.max()*ori_communication_maps.max()
+            else:
+                processed_communication_maps = ori_communication_maps
+
+            ########## driving request ############
+            if waypoints is not None: # only used with waypoints prediction model
+                # assert B==1 # waypoints.size(0)==len(record_len)
+
+                from opencood.utils.waypoint2map import waypoints2map_radius  # radius=40  sigma_reverse=5
+                bev_grad_cam = waypoints2map_radius( waypoints.cpu().numpy(), radius=self.args.get('radius',160), sigma_reverse=self.args.get('sigma_reverse',2), \
+                                                    grid_coord=[batch_confidence_maps[b].size(2),batch_confidence_maps[b].size(3), \
+                                                                self.det_range[4]/(self.det_range[4]-self.det_range[1]),\
+                                                                self.det_range[3]/(self.det_range[3]-self.det_range[0])] \
+                                                    , det_range=self.det_range) # (1,10,2) -> (1,192,576)
+
+                bev_grad_cam_tensor = torch.tensor(bev_grad_cam).to(batch_confidence_maps[0].device)
+                # warp request map
+                N = record_len[b].item()
+                grad_cam_repeat = bev_grad_cam_tensor[0][None, None].repeat(N,1,1,1) # bev_grad_cam_tensor[b][None, None].repeat(N,1,1,1)
+                t_matrix = pairwise_t_matrix_inverse[b][:N, :N, :, :]
+                warpped_grad_cam = warp_affine_simple(grad_cam_repeat,
+                                                        t_matrix[0, :, :, :],
+                                                        (H, W)).clamp(0,1)
+
+                processed_communication_maps = processed_communication_maps * torch.clamp((warpped_grad_cam.to(processed_communication_maps.dtype)*5/(warpped_grad_cam.max()+1e-7)), min=1e-4, max=1 - 1e-4)
+
+            ############################################
+
+            communication_maps = processed_communication_maps
+
+            ones_mask = torch.ones_like(communication_maps).to(communication_maps.device)
+            zeros_mask = torch.zeros_like(communication_maps).to(communication_maps.device)
+
+            if self.args.get('random_thre',False):
+                thre_list = [0.001,0.003,0.01,0.02,0.1]
+                thre = random.choice(thre_list)
+                thre = np.random.uniform(0.5*thre, 1.5*thre)
+            else:
+                thre = self.thre
+
+
+            communication_mask = torch.where(communication_maps>= thre, ones_mask, zeros_mask)
+
+            communication_rate = communication_mask[1:N].sum()/(H*W)
+
+            # communication_mask = warp_affine_simple(communication_mask,
+            #                                 t_matrix[0, :, :, :],
+            #                                 (H, W))
+            
+            communication_mask_nodiag = communication_mask.clone()
+            ones_mask = torch.ones_like(communication_mask).to(communication_mask.device)
+            communication_mask_nodiag[0] = ones_mask[0]
+
+            communication_masks.append(communication_mask_nodiag)
+            communication_rates.append(communication_rate)
+            batch_communication_maps.append(ori_communication_maps*communication_mask_nodiag)
+        communication_rates = sum(communication_rates)/B
+        # communication_masks = torch.stack(communication_masks, dim=0)  ## torch.concat
+        communication_masks = torch.concat(communication_masks, dim=0)
+        
+        return batch_communication_maps, communication_masks, communication_rates
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/where2comm.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/where2comm.py
new file mode 100644
index 0000000000000000000000000000000000000000..26db8147c65ff5a060f2f53b9f11360325d5308d
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/where2comm.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+# Author: Yue Hu <phyllis1sjtu@outlook.com>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+import torch
+import torch.nn as nn
+import numpy as np
+
+class Communication(nn.Module):
+    def __init__(self, args):
+        super(Communication, self).__init__()
+        
+        self.smooth = False
+        self.thre = args['thre']
+        if 'gaussian_smooth' in args:
+            # Gaussian Smooth
+            self.smooth = True
+            kernel_size = args['gaussian_smooth']['k_size']
+            c_sigma = args['gaussian_smooth']['c_sigma']
+            self.gaussian_filter = nn.Conv2d(1, 1, kernel_size=kernel_size, stride=1, padding=(kernel_size-1)//2)
+            self.init_gaussian_filter(kernel_size, c_sigma)
+            self.gaussian_filter.requires_grad = False
+        
+    def init_gaussian_filter(self, k_size=5, sigma=1):
+        def _gen_gaussian_kernel(k_size=5, sigma=1):
+            center = k_size // 2
+            x, y = np.mgrid[0 - center : k_size - center, 0 - center : k_size - center]
+            g = 1 / (2 * np.pi * sigma) * np.exp(-(np.square(x) + np.square(y)) / (2 * np.square(sigma)))
+            return g
+        gaussian_kernel = _gen_gaussian_kernel(k_size, sigma)
+        self.gaussian_filter.weight.data = torch.Tensor(gaussian_kernel).to(self.gaussian_filter.weight.device).unsqueeze(0).unsqueeze(0)
+        self.gaussian_filter.bias.data.zero_()
+
+    def forward(self, batch_confidence_maps, record_len, pairwise_t_matrix):
+        # batch_confidence_maps:[(L1, H, W), (L2, H, W), ...]
+        # pairwise_t_matrix: (B,L,L,2,3)
+        # thre: threshold of objectiveness
+        # a_ji = (1 - q_i)*q_ji
+        B, L, _, _, _ = pairwise_t_matrix.shape
+        _, _, H, W = batch_confidence_maps[0].shape
+        
+        communication_masks = []
+        communication_rates = []
+        batch_communication_maps = []
+        for b in range(B):
+            # number of valid agent
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            # t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+            ori_communication_maps = batch_confidence_maps[b].sigmoid().max(dim=1)[0].unsqueeze(1) # dim1=2 represents the confidence of two anchors
+            
+            if self.smooth:
+                communication_maps = self.gaussian_filter(ori_communication_maps)
+            else:
+                communication_maps = ori_communication_maps
+
+            ones_mask = torch.ones_like(communication_maps).to(communication_maps.device)
+            zeros_mask = torch.zeros_like(communication_maps).to(communication_maps.device)
+            communication_mask = torch.where(communication_maps>self.thre, ones_mask, zeros_mask)
+
+            communication_rate = communication_mask[0].sum()/(H*W)
+
+            # communication_mask = warp_affine_simple(communication_mask,
+            #                                 t_matrix[0, :, :, :],
+            #                                 (H, W))
+            
+            communication_mask_nodiag = communication_mask.clone()
+            ones_mask = torch.ones_like(communication_mask).to(communication_mask.device)
+            communication_mask_nodiag[0] = ones_mask[0] # [::2]
+
+            communication_masks.append(communication_mask_nodiag)
+            communication_rates.append(communication_rate)
+            batch_communication_maps.append(ori_communication_maps*communication_mask_nodiag)
+        communication_rates = sum(communication_rates)/B
+        communication_masks = torch.stack(communication_masks, dim=0)  ## torch.concat
+        return batch_communication_maps, communication_masks, communication_rates
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/where2comm_v0.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/where2comm_v0.py
new file mode 100644
index 0000000000000000000000000000000000000000..3bf6c9f274845b3c185b17c111bec2edcb898d56
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/comm_modules/where2comm_v0.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+# Author: Yue Hu <phyllis1sjtu@outlook.com>, Genjia Liu <LGJ1zed@sjtu.edu.cn>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+import torch
+import torch.nn as nn
+import numpy as np
+
+class Communication(nn.Module):
+    def __init__(self, args):
+        super(Communication, self).__init__()
+        
+        self.smooth = False
+        self.thre = args['thre']
+        if 'gaussian_smooth' in args:
+            # Gaussian Smooth
+            self.smooth = True
+            kernel_size = args['gaussian_smooth']['k_size']
+            c_sigma = args['gaussian_smooth']['c_sigma']
+            self.gaussian_filter = nn.Conv2d(1, 1, kernel_size=kernel_size, stride=1, padding=(kernel_size-1)//2)
+            self.init_gaussian_filter(kernel_size, c_sigma)
+            self.gaussian_filter.requires_grad = False
+        
+    def init_gaussian_filter(self, k_size=5, sigma=1):
+        def _gen_gaussian_kernel(k_size=5, sigma=1):
+            center = k_size // 2
+            x, y = np.mgrid[0 - center : k_size - center, 0 - center : k_size - center]
+            g = 1 / (2 * np.pi * sigma) * np.exp(-(np.square(x) + np.square(y)) / (2 * np.square(sigma)))
+            return g
+        gaussian_kernel = _gen_gaussian_kernel(k_size, sigma)
+        self.gaussian_filter.weight.data = torch.Tensor(gaussian_kernel).to(self.gaussian_filter.weight.device).unsqueeze(0).unsqueeze(0)
+        self.gaussian_filter.bias.data.zero_()
+
+    def forward(self, batch_confidence_maps, record_len, pairwise_t_matrix):
+        # batch_confidence_maps:[(L1, H, W), (L2, H, W), ...]
+        # pairwise_t_matrix: (B,L,L,2,3)
+        # thre: threshold of objectiveness
+        # a_ji = (1 - q_i)*q_ji
+        B, L, _, _, _ = pairwise_t_matrix.shape
+        _, _, H, W = batch_confidence_maps[0].shape
+        
+        communication_masks = []
+        communication_rates = []
+        batch_communication_maps = []
+        for b in range(B):
+            # number of valid agent
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            # t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+            ori_communication_maps = batch_confidence_maps[b].sigmoid().max(dim=1)[0].unsqueeze(1) # dim1=2 represents the confidence of two anchors
+            
+            if self.smooth:
+                communication_maps = self.gaussian_filter(ori_communication_maps)
+            else:
+                communication_maps = ori_communication_maps
+
+            ones_mask = torch.ones_like(communication_maps).to(communication_maps.device)
+            zeros_mask = torch.zeros_like(communication_maps).to(communication_maps.device)
+            communication_mask = torch.where(communication_maps>self.thre, ones_mask, zeros_mask)
+
+            communication_rate = communication_mask[1:N].sum()/(H*W)
+
+            # communication_mask = warp_affine_simple(communication_mask,
+            #                                 t_matrix[0, :, :, :],
+            #                                 (H, W))
+            
+            communication_mask_nodiag = communication_mask.clone()
+            ones_mask = torch.ones_like(communication_mask).to(communication_mask.device)
+            communication_mask_nodiag[::N] = ones_mask[::N] # [::2]
+
+            communication_masks.append(communication_mask_nodiag)
+            communication_rates.append(communication_rate)
+            batch_communication_maps.append(ori_communication_maps*communication_mask_nodiag)
+        communication_rates = sum(communication_rates)/B
+        communication_masks = torch.concat(communication_masks, dim=0)  ## torch.concat
+        return batch_communication_maps, communication_masks, communication_rates
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/da_modules/gsl.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/da_modules/gsl.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f1ddf068d7493b7627344836beb920d27a40387
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/da_modules/gsl.py
@@ -0,0 +1,35 @@
+"""
+https://github.com/DerrickXuNu/MPDA/blob/9879d4b615/opencood/models/da_modules/gradient_layer.py
+"""
+
+import torch
+
+
+class _GradientScalarLayer(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, input, weight):
+        ctx.weight = weight
+        return input.view_as(input)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        grad_input = grad_output.clone()
+        return ctx.weight * grad_input, None
+
+
+gradient_scalar = _GradientScalarLayer.apply
+
+
+class GradientScalarLayer(torch.nn.Module):
+    def __init__(self, weight):
+        super(GradientScalarLayer, self).__init__()
+        self.weight = weight
+
+    def forward(self, input):
+        return gradient_scalar(input, self.weight)
+
+    def __repr__(self):
+        tmpstr = self.__class__.__name__ + "("
+        tmpstr += "weight=" + str(self.weight)
+        tmpstr += ")"
+        return tmpstr
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fpvrcnn.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fpvrcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..05e114b817904dbd31ea87ec8b3d93441f549b44
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fpvrcnn.py
@@ -0,0 +1,90 @@
+import random, os
+
+import torch
+from torch import nn
+import numpy as np
+
+from opencood.models.sub_modules.mean_vfe import MeanVFE
+from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x
+from opencood.models.sub_modules.height_compression import HeightCompression
+from opencood.models.sub_modules.cia_ssd_utils import SSFA, Head
+from opencood.models.sub_modules.vsa import VoxelSetAbstraction
+from opencood.models.sub_modules.roi_head import RoIHead
+from opencood.models.sub_modules.matcher import Matcher
+from opencood.data_utils.post_processor.fpvrcnn_postprocessor import \
+    FpvrcnnPostprocessor
+from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple
+
+class FPVRCNN(nn.Module):
+    def __init__(self, args):
+        super(FPVRCNN, self).__init__()
+        lidar_range = np.array(args['lidar_range'])
+        grid_size = np.round((lidar_range[3:6] - lidar_range[:3]) /
+                             np.array(args['voxel_size'])).astype(np.int64)
+        self.vfe = MeanVFE(args['mean_vfe'],
+                           args['mean_vfe']['num_point_features'])
+        self.spconv_block = VoxelBackBone8x(args['spconv'],
+                                            input_channels=args['spconv'][
+                                                'num_features_in'],
+                                            grid_size=grid_size)
+        self.map_to_bev = HeightCompression(args['map2bev'])
+        self.ssfa = SSFA(args['ssfa'])
+        self.head = Head(**args['head'])
+        self.post_processor = FpvrcnnPostprocessor(args['post_processer'],
+                                                   train=True)
+        self.vsa = VoxelSetAbstraction(args['vsa'], args['voxel_size'],
+                                       args['lidar_range'],
+                                       num_bev_features=128,
+                                       num_rawpoint_features=3)
+        self.matcher = Matcher(args['matcher'], args['lidar_range'])
+        self.roi_head = RoIHead(args['roi_head'])
+        self.train_stage2 = args['activate_stage2']
+        self.discrete_ratio = args['voxel_size'][0]
+
+    def forward(self, batch_dict):
+        voxel_features = batch_dict['processed_lidar']['voxel_features']
+        voxel_coords = batch_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = batch_dict['processed_lidar']['voxel_num_points']
+
+        # save memory
+        batch_dict.pop('processed_lidar')
+        batch_dict.update({'voxel_features': voxel_features,
+                           'voxel_coords': voxel_coords,
+                           'voxel_num_points': voxel_num_points,
+                           'batch_size': int(batch_dict['record_len'].sum()),
+                           'proj_first': batch_dict['proj_first'],
+                           'lidar_pose': batch_dict['lidar_pose']})
+
+        batch_dict = self.vfe(batch_dict)
+        batch_dict = self.spconv_block(batch_dict)
+        batch_dict = self.map_to_bev(batch_dict)
+
+        out = self.ssfa(batch_dict['spatial_features'])
+        batch_dict['stage1_out'] = self.head(out) 
+
+        data_dict, output_dict = {}, {}
+        data_dict['ego'], output_dict['ego'] = batch_dict, batch_dict
+
+        pred_box3d_list, scores_list = \
+            self.post_processor.post_process(data_dict, output_dict,
+                                             stage1=True)
+
+        # if proj_first is False
+        # the boxes are predicted in each coordinate              
+        batch_dict['det_boxes'] = pred_box3d_list
+        batch_dict['det_scores'] = scores_list
+
+        if pred_box3d_list is not None and self.train_stage2:
+            batch_dict = self.vsa(batch_dict)
+            batch_dict = self.matcher(batch_dict)
+            batch_dict = self.roi_head(batch_dict)
+
+        return batch_dict
+
+
+
+
+
+if __name__ == "__main__":
+    model = SSFA(None)
+    print(model)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3727a0cd3647dcfd06a2cda11b904fdae01ea3ca
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/__init__.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/att_fuse.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/att_fuse.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2cefe8da1e1971d2c15651acec68a9a8669c1bc9
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/att_fuse.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/codriving_attn.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/codriving_attn.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f2c4efac4a61c8400257332f0ea79111825d46c6
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/codriving_attn.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/f_cooper_fuse.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/f_cooper_fuse.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d78c180a80fb1e2c8e58a9ceb9d102de45061fdf
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/f_cooper_fuse.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/fuse_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/fuse_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d985eb540048cb448161ab7a55ce030015987cd1
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/fuse_utils.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/fusion_in_one.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/fusion_in_one.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7ab039a409e093d4a7f60551422d2d522fe54351
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/fusion_in_one.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/where2comm_attn.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/where2comm_attn.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7ca5bd7b1d959f56bbc3b78e0b731219fa975334
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/__pycache__/where2comm_attn.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/att_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/att_fuse.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5e173110da6c413f4ccb542b7437dd6b4dfde0b
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/att_fuse.py
@@ -0,0 +1,223 @@
+"""
+Implementation of Attn Fusion
+"""
+
+import torch
+import torch.nn as nn
+
+from opencood.models.sub_modules.torch_transformation_utils import \
+    get_discretized_transformation_matrix, get_transformation_matrix, \
+    warp_affine_simple, get_rotated_roi
+from matplotlib import pyplot as plt
+from icecream import ic
+import torch.nn.functional as F
+import numpy as np
+
+class ScaledDotProductAttention(nn.Module):
+    """
+    Scaled Dot-Product Attention proposed in "Attention Is All You Need"
+    Compute the dot products of the query with all keys, divide each by sqrt(dim),
+    and apply a softmax function to obtain the weights on the values
+    Args: dim, mask
+        dim (int): dimention of attention
+        mask (torch.Tensor): tensor containing indices to be masked
+    Inputs: query, key, value, mask
+        - **query** (batch, q_len, d_model): tensor containing projection
+          vector for decoder.
+        - **key** (batch, k_len, d_model): tensor containing projection
+          vector for encoder.
+        - **value** (batch, v_len, d_model): tensor containing features of the
+          encoded input sequence.
+        - **mask** (-): tensor containing indices to be masked
+    Returns: context, attn
+        - **context**: tensor containing the context vector from
+          attention mechanism.
+        - **attn**: tensor containing the attention (alignment) from the
+          encoder outputs.
+    """
+
+    def __init__(self, dim):
+        super(ScaledDotProductAttention, self).__init__()
+        self.sqrt_dim = np.sqrt(dim)
+
+    def forward(self, query, key, value):
+        score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim
+        attn = F.softmax(score, -1)
+        context = torch.bmm(attn, value)
+        return context
+
+class AttFusion(nn.Module):
+    def __init__(self, args):
+        super(AttFusion, self).__init__()
+
+        self.discrete_ratio = args['voxel_size'][0]  # voxel_size[0]=0.4    
+        self.downsample_rate = args['downsample_rate']
+        self.att = ScaledDotProductAttention(args['in_channels'])
+
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, xx, record_len, pairwise_t_matrix):
+        """
+        Fusion forwarding.
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The transformation matrix from each cav to ego, 
+            shape: (B, L, L, 4, 4) 
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        _, C, H, W = xx.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        split_x = self.regroup(xx, record_len)
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+        pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+        pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+        pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+        pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+        batch_node_features = split_x
+        # iteratively update the features for num_iteration times
+
+        out = []
+        # iterate each batch
+        for b in range(B):
+
+            # number of valid agent
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+            updated_node_features = []
+
+            # update each node i
+            i = 0 # ego
+            x = warp_affine_simple(batch_node_features[b], t_matrix[i, :, :, :], (H, W))
+
+            cav_num = x.shape[0]
+            x = x.view(cav_num, C, -1).permute(2, 0, 1) #  (H*W, cav_num, C), perform self attention on each pixel.
+            h = self.att(x, x, x)
+            h = h.permute(1, 2, 0).view(cav_num, C, H, W)[0, ...]  # C, W, H before
+            out.append(h)
+
+        out = torch.stack(out)
+        
+        return out
+
+
+    # def forward_debug(self, x, origin_x, record_len, pairwise_t_matrix):
+    #     """
+    #     Fusion forwarding
+    #     Used for debug and visualization
+
+        
+    #     Parameters
+    #     ----------
+    #     x : torch.Tensor
+    #         input data, (sum(n_cav), C, H, W)
+
+    #     origin_x: torch.Tensor
+    #         pillars (sum(n_cav), C, H * downsample_rate, W * downsample_rate)
+            
+    #     record_len : list
+    #         shape: (B)
+            
+    #     pairwise_t_matrix : torch.Tensor
+    #         The transformation matrix from each cav to ego, 
+    #         shape: (B, L, L, 4, 4) 
+            
+    #     Returns
+    #     -------
+    #     Fused feature.
+    #     """
+    #     from matplotlib import pyplot as plt
+
+    #     _, C, H, W = x.shape
+    #     B, L = pairwise_t_matrix.shape[:2]
+
+    #     split_x = self.regroup(x, record_len)
+    #     split_origin_x = self.regroup(origin_x, record_len)
+
+    #     # (B,L,L,2,3)
+    #     pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+    #     pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+    #     pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+    #     pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+    #     pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+
+    #     # (B*L,L,1,H,W)
+    #     roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x)
+    #     for b in range(B):
+    #         N = record_len[b]
+    #         for i in range(N):
+    #             one_tensor = torch.ones((L,1,H,W)).to(x)
+    #             roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W))
+
+    #     batch_node_features = split_x
+    #     # iteratively update the features for num_iteration times
+
+    #     # visualize warped feature map
+    #     for b in range(B):
+    #         # number of valid agent
+    #         N = record_len[b]
+    #         # (N,N,4,4)
+    #         # t_matrix[i, j]-> from i to j
+    #         t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+    #         # update each node i
+    #         i = 0 # ego
+    #         mask = roi_mask[b, i, :N, ...]
+    #         # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective
+    #         # Notice we put i one the first dim of t_matrix. Different from original.
+    #         # t_matrix[i,j] = Tji
+    #         neighbor_feature = warp_affine_simple(batch_node_features[b],
+    #                                         t_matrix[i, :, :, :],
+    #                                         (H, W))
+    #         for idx in range(N):
+    #             plt.imshow(torch.max(neighbor_feature[idx],0)[0].detach().cpu().numpy())
+    #             plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/feature_{b}_{idx}")
+    #             plt.clf()
+    #             plt.imshow(mask[idx][0].detach().cpu().numpy())
+    #             plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/mask_feature_{b}_{idx}")
+    #             plt.clf()
+
+
+        
+    #     # visualize origin pillar feature 
+    #     origin_node_features = split_origin_x
+
+    #     for b in range(B):
+    #         N = record_len[b]
+    #         # (N,N,4,4)
+    #         # t_matrix[i, j]-> from i to j
+    #         t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+    #         i = 0 # ego
+    #         # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective
+    #         # Notice we put i one the first dim of t_matrix. Different from original.
+    #         # t_matrix[i,j] = Tji
+    #         neighbor_feature = warp_affine_simple(origin_node_features[b],
+    #                                         t_matrix[i, :, :, :],
+    #                                         (H*self.downsample_rate, W*self.downsample_rate))
+
+    #         for idx in range(N):
+    #             plt.imshow(torch.max(neighbor_feature[idx],0)[0].detach().cpu().numpy())
+    #             plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/origin_{b}_{idx}")
+    #             plt.clf()
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/codriving_attn.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/codriving_attn.py
new file mode 100644
index 0000000000000000000000000000000000000000..15868495726f36cf56b0017e1b895269130a8c82
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/codriving_attn.py
@@ -0,0 +1,349 @@
+from turtle import update
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple
+from opencood.models.comm_modules.codriving import Communication
+
+
+class ScaledDotProductAttention(nn.Module):
+    """
+    Scaled Dot-Product Attention proposed in "Attention Is All You Need"
+    Compute the dot products of the query with all keys, divide each by sqrt(dim),
+    and apply a softmax function to obtain the weights on the values
+    Args: dim, mask
+        dim (int): dimention of attention
+        mask (torch.Tensor): tensor containing indices to be masked
+    Inputs: query, key, value, mask
+        - **query** (batch, q_len, d_model): tensor containing projection
+          vector for decoder.
+        - **key** (batch, k_len, d_model): tensor containing projection
+          vector for encoder.
+        - **value** (batch, v_len, d_model): tensor containing features of the
+          encoded input sequence.
+        - **mask** (-): tensor containing indices to be masked
+    Returns: context, attn
+        - **context**: tensor containing the context vector from
+          attention mechanism.
+        - **attn**: tensor containing the attention (alignment) from the
+          encoder outputs.
+    """
+
+    def __init__(self, dim):
+        super(ScaledDotProductAttention, self).__init__()
+        self.sqrt_dim = np.sqrt(dim)
+
+    def forward(self, query, key, value):
+        score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim
+        attn = F.softmax(score, -1)
+        context = torch.bmm(attn, value)
+        return context
+
+class AttenFusion(nn.Module):
+    def __init__(self, feature_dim):
+        super(AttenFusion, self).__init__()
+        self.att = ScaledDotProductAttention(feature_dim)
+
+    def forward(self, x):
+        cav_num, C, H, W = x.shape
+        x = x.view(cav_num, C, -1).permute(2, 0, 1) #  (H*W, cav_num, C), perform self attention on each pixel.
+        x = self.att(x, x, x)
+        x = x.permute(1, 2, 0).view(cav_num, C, H, W)[0]  # C, W, H before
+        return x
+
+class MaxFusion(nn.Module):
+    def __init__(self):
+        super(MaxFusion, self).__init__()
+
+    def forward(self, x):
+        return torch.max(x, dim=0)[0]
+
+
+class EncodeLayer(nn.Module):
+    def __init__(self, channels, n_head=8, dropout=0):
+        super(EncodeLayer, self).__init__()
+        self.attn = nn.MultiheadAttention(channels, n_head, dropout)
+        self.linear1 = nn.Linear(channels, channels)
+        self.linear2 = nn.Linear(channels, channels)
+
+        self.norm1 = nn.LayerNorm(channels)
+        self.norm2 = nn.LayerNorm(channels)
+
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+
+        self.relu = nn.ReLU()
+
+    def forward(self, q, k, v, confidence_map=None):
+        """
+        order (seq, batch, feature)
+        Args:
+            q: (1, H*W, C)
+            k: (N, H*W, C)
+            v: (N, H*W, C)
+        Returns:
+            outputs: ()
+        """
+        residual = q
+        if confidence_map is not None:
+            context, weight = self.attn(q,k,v, quality_map=confidence_map) # (1, H*W, C)
+        else:
+            context, weight = self.attn(q,k,v) # (1, H*W, C)
+        context = self.dropout1(context)
+        output1 = self.norm1(residual + context)
+
+        # feed forward net
+        residual = output1 # (1, H*W, C)
+        context = self.linear2(self.relu(self.linear1(output1)))
+        context = self.dropout2(context)
+        output2 = self.norm2(residual + context)
+
+        return output2
+
+class TransformerFusion(nn.Module):
+    def __init__(self, channels=256, n_head=8, with_spe=True, with_scm=True, dropout=0):
+        super(TransformerFusion, self).__init__()
+
+        self.encode_layer = EncodeLayer(channels, n_head, dropout)
+        self.with_spe = with_spe
+        self.with_scm = with_scm
+        
+    def forward(self, batch_neighbor_feature, batch_neighbor_feature_pe, batch_confidence_map, record_len):
+        x_fuse = []
+        B = len(record_len)
+        for b in range(B):
+            # number of valid agent
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            neighbor_feature = batch_neighbor_feature[b]
+            _, C, H, W = neighbor_feature.shape
+            neighbor_feature_flat = neighbor_feature.view(N,C,H*W)  # (N, C, H*W)
+
+            if self.with_spe:
+                neighbor_feature_pe = batch_neighbor_feature_pe[b]
+                neighbor_feature_flat_pe = neighbor_feature_pe.view(N,C,H*W)  # (N, C, H*W)
+                query = neighbor_feature_flat_pe[0:1,...].permute(0,2,1)  # (1, H*W, C)
+                key = neighbor_feature_flat_pe.permute(0,2,1)  # (N, H*W, C)
+            else:
+                query = neighbor_feature_flat[0:1,...].permute(0,2,1)  # (1, H*W, C)
+                key = neighbor_feature_flat.permute(0,2,1)  # (N, H*W, C)
+            
+            value = neighbor_feature_flat.permute(0,2,1)
+
+            if self.with_scm:
+                confidence_map = batch_confidence_map[b]
+                fused_feature = self.encode_layer(query, key, value, confidence_map)  # (1, H*W, C)
+            else:
+                fused_feature = self.encode_layer(query, key, value)  # (1, H*W, C)
+            
+            fused_feature = fused_feature.permute(0,2,1).reshape(1, C, H, W)
+
+            x_fuse.append(fused_feature)
+        x_fuse = torch.concat(x_fuse, dim=0)
+        return x_fuse
+
+def add_pe_map(x):
+    # scale = 2 * math.pi
+    temperature = 10000
+    num_pos_feats = x.shape[-3] // 2  # positional encoding dimension. C = 2d
+
+    mask = torch.zeros([x.shape[-2], x.shape[-1]], dtype=torch.bool, device=x.device)  #[H, W]
+    not_mask = ~mask
+    y_embed = not_mask.cumsum(0, dtype=torch.float32)  # [H, W]
+    x_embed = not_mask.cumsum(1, dtype=torch.float32)  # [H, W]
+
+    dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=x.device)  # [0,1,2,...,d]
+    dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats)  # 10000^(2k/d), k is [0,0,1,1,...,d/2,d/2]
+
+    pos_x = x_embed[:, :, None] / dim_t
+    pos_y = y_embed[:, :, None] / dim_t
+    pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2)
+    pos_y = torch.stack((pos_y[:, :, 0::2].sin(), pos_y[:, :, 1::2].cos()), dim=3).flatten(2)
+    pos = torch.cat((pos_y, pos_x), dim=2).permute(2, 0, 1)  # [C, H, W]
+
+    if len(x.shape) == 4:
+        x_pe = x + pos[None,:,:,:]
+    elif len(x.shape) == 5:
+        x_pe = x + pos[None,None,:,:,:]
+    return x_pe
+
+
+class Where2comm(nn.Module):
+    def __init__(self, args):
+        super(Where2comm, self).__init__()
+
+        self.communication = False
+        self.round = 1
+        if 'communication' in args:
+            self.communication = True
+            self.naive_communication = Communication(args['communication'])
+            if 'round' in args['communication']:
+                self.round = args['communication']['round']
+        self.discrete_ratio = args['voxel_size'][0]  # voxel_size[0]=0.4    
+        self.downsample_rate = args['downsample_rate']  # 2/4, downsample rate from original feature map [200, 704]
+        
+        self.agg_mode = args['agg_operator']['mode']
+        self.multi_scale = args['multi_scale']
+        if self.multi_scale:
+            layer_nums = args['layer_nums']
+            num_filters = args['num_filters']
+            self.num_levels = len(layer_nums)
+            self.fuse_modules = nn.ModuleList()
+            for idx in range(self.num_levels):
+                if self.agg_mode == 'ATTEN':
+                    fuse_network = AttenFusion(num_filters[idx])
+                elif self.agg_mode == 'MAX':
+                    fuse_network = MaxFusion()
+                elif self.agg_mode == 'Transformer':
+                    fuse_network = TransformerFusion(
+                                                channels=num_filters[idx], 
+                                                n_head=args['agg_operator']['n_head'], 
+                                                with_spe=args['agg_operator']['with_spe'], 
+                                                with_scm=args['agg_operator']['with_scm'])
+                self.fuse_modules.append(fuse_network)
+        else:
+            if self.agg_mode == 'ATTEN':
+                self.fuse_modules = AttenFusion(args['agg_operator']['feature_dim'])
+            elif self.agg_mode == 'MAX':
+                self.fuse_modules = MaxFusion()   
+            elif self.agg_mode == 'Transformer':
+                self.fuse_network = TransformerFusion(
+                                            channels=args['agg_operator']['feature_dim'], 
+                                            n_head=args['agg_operator']['n_head'], 
+                                            with_spe=args['agg_operator']['with_spe'], 
+                                            with_scm=args['agg_operator']['with_scm'])     
+
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, x, rm, record_len, pairwise_t_matrix, backbone=None, waypoints=None):
+        """
+        Fusion forwarding.
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The transformation matrix from each cav to ego, 
+            shape: (B, L, L, 4, 4) 
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+        pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+        pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+        pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+        pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+        feature_list = []
+        for b in range(B):
+            feature_list.append({})
+
+        if self.multi_scale:
+            ups = []
+            # backbone.__dict__()
+            with_resnet = True if hasattr(backbone, 'resnet') else False
+            if with_resnet:
+                feats = backbone.resnet(x) # e.g. x: [2, 64, 192, 576] -> ([2, 64, 96, 288], [2, 128, 48, 144], [2, 256, 24, 72])
+            
+            for i in range(self.num_levels):
+                x = feats[i] if with_resnet else backbone.blocks[i](x)
+
+                ############ 1. Communication (Mask the features) #########
+                if i==0:
+                    if self.communication:
+                        batch_confidence_maps = self.regroup(rm, record_len)
+                        _, communication_masks, communication_rates = self.naive_communication(batch_confidence_maps, record_len, pairwise_t_matrix, waypoints=waypoints)
+                        # communication_masks = communication_masks.squeeze(0)
+                        x = x * communication_masks
+                    else:
+                        communication_rates = torch.tensor(0).to(x.device)
+                else:
+                    if self.communication:
+                        communication_masks = F.max_pool2d(communication_masks, kernel_size=2)
+                        x = x * communication_masks
+
+                ############ 2. Split the confidence map #######################
+                # split x:[(L1, C, H, W), (L2, C, H, W), ...]
+                # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...]
+                batch_node_features = self.regroup(x, record_len)
+                
+                ############ 3. Fusion ####################################
+                x_fuse = []
+                for b in range(B):
+                    # number of valid agent
+                    N = record_len[b]
+                    # (N,N,4,4)
+                    # t_matrix[i, j]-> from i to j
+                    t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+                    node_features = batch_node_features[b]
+                    C, H, W = node_features.shape[1:]
+                    neighbor_feature = warp_affine_simple(node_features,
+                                                    t_matrix[0, :, :, :],
+                                                    (H, W))
+                    x_fuse.append(self.fuse_modules[i](neighbor_feature)) # [N,C,H,W]
+
+                    feature_list[b][i] = neighbor_feature
+
+                x_fuse = torch.stack(x_fuse)
+
+                ############ 4. Deconv ####################################
+                if len(backbone.deblocks) > 0:
+                    ups.append(backbone.deblocks[i](x_fuse))
+                else:
+                    ups.append(x_fuse)
+                
+            if len(ups) > 1:
+                x_fuse = torch.cat(ups, dim=1)  # ups[0],ups[1],ups[2] [1, 128, 96, 288]
+            elif len(ups) == 1:
+                x_fuse = ups[0]
+            
+            if len(backbone.deblocks) > self.num_levels:
+                x_fuse = backbone.deblocks[-1](x_fuse)
+        else:
+            ############ 1. Split the features #######################
+            # split x:[(L1, C, H, W), (L2, C, H, W), ...]
+            # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...]
+            batch_node_features = self.regroup(x, record_len)
+            batch_confidence_maps = self.regroup(rm, record_len)
+
+            ############ 2. Communication (Mask the features) #########
+            if self.communication:
+                _, communication_masks, communication_rates = self.naive_communication(batch_confidence_maps, record_len, pairwise_t_matrix)
+            else:
+                communication_rates = torch.tensor(0).to(x.device)
+            
+            ############ 3. Fusion ####################################
+            x_fuse = []
+            for b in range(B):
+                # number of valid agent
+                N = record_len[b]
+                # (N,N,4,4)
+                # t_matrix[i, j]-> from i to j
+                t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+                node_features = batch_node_features[b]
+                if self.communication:
+                    node_features = node_features * communication_masks[b]
+                neighbor_feature = warp_affine_simple(node_features,
+                                                t_matrix[0, :, :, :],
+                                                (H, W))
+                x_fuse.append(self.fuse_modules(neighbor_feature))
+            x_fuse = torch.stack(x_fuse)
+        
+        return x_fuse, communication_rates, {'features_before_fusion':feature_list}  # ms_atten x:[1, 384, 96, 288]
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/deform_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/deform_fuse.py
new file mode 100644
index 0000000000000000000000000000000000000000..31491ec6363f85e6147703884ea4c0d726f1b369
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/deform_fuse.py
@@ -0,0 +1,251 @@
+"""
+Implementation of deformable fusion
+
+The design is: for ego agent f_0 and collaborative agent f_1.
+
+f_0[x0,y0] may not correspond to f_1[x0,y0]
+
+So it will learn an offset (delta_x and delta_y) for this pixel position.
+Then f_0[x0,y0] will fuse with f_1[x0+delta_x, y0+delta_y]
+"""
+
+from this import d
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple
+from icecream import ic
+
+def regroup(x, record_len):
+    cum_sum_len = torch.cumsum(record_len, dim=0)
+    split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+    return split_x
+
+
+class RigidOffset(nn.Module):
+    """ Learn a rigid transformation grid for the whole feature map
+    """
+
+    def __init__(self, in_ch, hidden_ch=32):
+        super(RigidOffset, self).__init__()
+        self.model = nn.Sequential(
+            nn.Conv2d(in_ch, hidden_ch, kernel_size=(3, 3), padding=1),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            nn.Conv2d(hidden_ch, hidden_ch, kernel_size=(3, 3), padding=1),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            nn.Conv2d(hidden_ch, hidden_ch, kernel_size=(3, 3), padding=1),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            nn.AdaptiveAvgPool2d(output_size=1),
+            nn.Flatten(),
+            nn.Linear(in_features=hidden_ch, out_features=hidden_ch, bias=True),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.Linear(in_features=hidden_ch, out_features=3, bias=True),
+        )
+
+    def forward(self, x, return_M=False):
+        """
+        Args:
+            x.shape:(sum(record_len_minus1), 2C, H, W)
+        Returns:
+            out.shape: (sum(record_len_minus1), H, W, 2)
+        """
+        N, _, H, W = x.shape
+        xytheta = self.model(x)  # [sum(record_len_minus1), 3], 3 corresponds to x, y, theta
+
+
+        cos = torch.cos(xytheta[:, 2])
+        sin = torch.sin(xytheta[:, 2])
+
+        M = torch.zeros((N, 2, 3), device=x.device)
+        M[:, 0, 0] = cos
+        M[:, 0, 1] = sin
+        M[:, 1, 0] = -sin
+        M[:, 1, 1] = cos
+        M[:, 0, 2] = xytheta[:, 0]
+        M[:, 1, 2] = xytheta[:, 1]
+
+        grid = F.affine_grid(M, size=x.shape)
+
+        if return_M:
+            return grid, M
+
+        return grid
+
+
+class ArbitraryOffset(nn.Module):
+    """ Learn a offset/residual grid for each pixel
+    """
+
+    def __init__(self, in_ch, out_ch=2, hidden_ch=32):
+        """
+        Args:
+            in_ch: is 2 times feature channel, since they concat together
+        """
+        super(ArbitraryOffset, self).__init__()
+        self.model = nn.Sequential(
+            nn.Conv2d(in_ch, hidden_ch, 3, 1, 1),
+            nn.InstanceNorm2d(hidden_ch),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.Conv2d(hidden_ch, hidden_ch // 2, 3, 1, 1),
+            nn.InstanceNorm2d(hidden_ch // 2),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.Conv2d(hidden_ch // 2, hidden_ch // 4, 1, 1, 0),
+            nn.InstanceNorm2d(hidden_ch // 4),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.Conv2d(hidden_ch // 4, 2, 1, 1, 0)
+        )
+
+    def forward(self, x):
+        """
+        Args:
+            x.shape:(sum(record_len_minus1), 2C, H, W)
+        Returns:
+            out.shape: (sum(record_len_minus1), H, W, 2)
+        """
+        N, _, H, W = x.shape
+
+        x = self.model(x)
+
+        grid_residual = x.reshape(N, H, W, 2)
+
+        M_origin = torch.Tensor([[[1, 0, 0], [0, 1, 0]]])
+        grid_origin = F.affine_grid(M_origin, size=(1, 1, H, W)).to(x.device)
+
+        grid = grid_residual + grid_origin
+        return grid
+
+
+class DeformFusion(nn.Module):
+    """ deformable fusion for multiscale feature map
+        For each pixel in ego agent's feature map,
+        it will learn a offset to fuse the feature.
+    """
+
+    def __init__(self, in_ch, deform_method, cycle_consist_loss=False):
+        """
+        Args:
+            in_ch: channels num of one agent's feature map.
+        """
+        super(DeformFusion, self).__init__()
+        self.cycle_consistency_loss = cycle_consist_loss
+
+        if deform_method == "rigid":
+            self.grid_net = RigidOffset(in_ch * 2)
+        elif deform_method == "arbitrary":
+            self.grid_net = ArbitraryOffset(in_ch * 2)
+        
+
+    def forward(self, features, record_len, pairwise_t_matrix, lidar_pose=None):
+        """
+        Args:
+            features: List[torch.Tensor]
+                multiscale features. features[i] is (sum(cav), C, H, W), different i, different C, H, W
+            record_len: torch.tensor
+                record cav number
+            pairwise_t_matrix: torch.Tensor,
+                already normalized. shape [B, N_max, N_max, 2, 3]
+            lidar_pose: torch.Tensor
+                shape [(sum(cav), 6)], this is only used to calculate intersection. If proj_first=False, then equal to pairwise_t_matrix
+        """
+
+        ##### first align them to ego coordinate, espeically when proj_first = False.
+        device = features[0].device
+        record_len_minus1 = record_len - 1
+
+        if(torch.sum(record_len_minus1)==0):
+            return features
+
+        ms_split_x = [regroup(features[i], record_len) for i in range(len(features))]
+        ms_split_x_warp = []
+
+        for split_x in ms_split_x:  # different scale
+            split_x_warp = []
+            H, W = split_x[0].shape[2:]
+            for b, xx in enumerate(split_x):  # different samples
+                N = xx.shape[0]
+                t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+                i = 0  # ego
+                split_x_warp.append(warp_affine_simple(xx, t_matrix[i, :, :, :], (H, W)))  # [N_,C,H,W], N_ varies
+            ms_split_x_warp.append(split_x_warp)
+
+
+        ##### we caculate the grid by scale=1 feature, and share it with all scales.
+        split_x = ms_split_x_warp[0]  # first scale
+        H, W = split_x[0].shape[:2]
+
+        cat_features = []
+        for b, xx in enumerate(split_x):
+            N = xx.shape[0]
+            cat_feature = torch.cat([xx[0:1].expand(N - 1, -1, -1, -1), xx[1:]], dim=1)  # (N-1, 2C, H, W)
+            cat_features.append(cat_feature)
+
+        cat_feature = torch.cat(cat_features, dim=0)  # (sum(record_len_minus1), 2C, H, W)
+
+        grid_offset = self.grid_net(cat_feature)  # (sum(record_len_minus1), H, W, 2)
+
+        grid = grid_offset  # (sum(record_len_minus1),H,W,2)
+        ms_grid = [grid[:,::2**i,::2**i,:] for i in range(len(features))]
+
+        ms_split_grid = [regroup(grid, record_len_minus1) for grid in ms_grid]  # [[N1-1,H,W,2], [N2-1,H,W,2],...], shared for all scales.
+
+        #####  fusion
+        ms_fused_features = []
+        for scale, split_x in enumerate(ms_split_x_warp):
+            fused_features = []
+            for b, xx in enumerate(split_x):
+                if xx.shape[0] == 1:
+                    fused_features.append(xx[0])
+                else:
+                    neighbor_feature_deform = torch.cat([F.grid_sample(xx[1:], ms_split_grid[scale][b]), xx[0:1]], dim=0) # (N-1, C, H, W)
+                    fuesd_feature = torch.max(neighbor_feature_deform, dim=0)[0]  
+                    fused_features.append(fuesd_feature)
+            ms_fused_features.append(torch.stack(fused_features))
+
+
+        if self.cycle_consistency_loss:
+            split_x = ms_split_x[0]  # before warping to the ego agent, scale = 1
+            H, W = split_x[0].shape[2:]
+
+            cat_features = []
+            for b, xx in enumerate(split_x):
+                N = xx.shape[0]
+                t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+                """
+                    [agent0, agent1] -> M_0
+                    [agent1, agent2] -> M_1
+                    ...
+                    [agentN-1,agent0] -> M_N-1
+                    M_0@M_1@...@M_N-1 = I
+
+                    The latter should align to the former agent.
+                """
+                latter_agent = torch.cat([xx[1:],xx[:1]], dim=0) # [agent1,agent2,..., agent0]
+                t_matrix_adj = torch.stack([t_matrix[i,(i+1)%N] for i in range(N)])
+                latter_agent_warp = warp_affine_simple(latter_agent, t_matrix_adj, dsize=(H,W))
+                cat_feature = torch.cat([xx, latter_agent_warp], dim=1)
+                cat_features.append(cat_feature)
+
+            cat_feature = torch.cat(cat_features, dim=0)  # (sum(record_len), 2C, H, W)
+            _, M = self.grid_net(cat_feature, return_M=True)  # (sum(record_len)*H*W, 2)
+
+            M_homo = F.pad(M, (0, 0, 0, 1), "constant", 0)  # pad 2nd to last by (0, 1)
+            M_homo[:, 2, 2] = 1
+
+            split_M = regroup(M_homo, record_len)
+
+        return ms_fused_features
+
+
+if __name__ == "__main__":
+    features = [torch.randn(4,64,200,704), torch.randn(4,128,100,352), torch.randn(4,256,50,176)]
+    record_len = torch.tensor([1,3])
+    pairwise_t_matirx = torch.eye(4).view(1,1,1,4,4).expand(2,5,5,4,4)
+
+    model = DeformFusion(in_ch=64, deform_method='rigid', cycle_consist_loss=True)
+
+    out = model(features, record_len, pairwise_t_matirx)
+    for xx in out:
+        print(xx.shape)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/deform_transformer_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/deform_transformer_fuse.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e13e58bf68f04dfee64e4ed01948644251e7aa4
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/deform_transformer_fuse.py
@@ -0,0 +1,205 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+"""
+Implementation of transformer encoder fusion.
+It is only a method to fuse features
+Not rely on specific backbone.
+"""
+
+import torch
+import torch.nn as nn
+
+from opencood.models.sub_modules.torch_transformation_utils import \
+    get_discretized_transformation_matrix, get_transformation_matrix, \
+    warp_affine_simple, get_rotated_roi
+import torch.nn.functional as F
+from icecream import ic
+from mmcv.ops import DeformConv2dPack as DCN2d
+
+class MultiheadAttBlock(nn.Module):
+    def __init__(self, channels, n_head=8, dropout=0):
+        super(MultiheadAttBlock, self).__init__()
+        self.attn = nn.MultiheadAttention(channels, n_head, dropout)
+
+    def forward(self, q, k, v):
+        """
+        order (seq, batch, feature)
+        Args:
+            q: (1, H*W, C)
+            k: (N, H*W, C)
+            v: (N, H*W, C)
+        Returns:
+            outputs: ()
+        """
+        context, weight = self.attn(q,k,v) # (1, H*W, C)
+
+        return context
+
+class TransformerBlock(nn.Module):
+    def __init__(self, channels, n_head=8, dropout=0):
+        super(TransformerBlock, self).__init__()
+        self.attn = nn.MultiheadAttention(channels, n_head, dropout)
+        self.linear1 = nn.Linear(channels, channels)
+        self.linear2 = nn.Linear(channels, channels)
+
+        self.norm1 = nn.LayerNorm(channels)
+        self.norm2 = nn.LayerNorm(channels)
+
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+
+        self.relu = nn.ReLU()
+
+    def forward(self, q, k, v):
+        """
+        order (seq, batch, feature)
+        Args:
+            q: (1, H*W, C)
+            k: (N, H*W, C)
+            v: (N, H*W, C)
+        Returns:
+            outputs: ()
+        """
+        residual = q
+        context, weight = self.attn(q,k,v) # (1, H*W, C)
+        context = self.dropout1(context)
+        output1 = self.norm1(residual + context)
+
+        # feed forward net
+        residual = output1 # (1, H*W, C)
+        context = self.linear2(self.relu(self.linear1(output1)))
+        context = self.dropout2(context)
+        output2 = self.norm2(residual + context)
+
+        return output2
+
+
+
+class DeformTransformerFusion(nn.Module):
+    def __init__(self, args):
+        super(DeformTransformerFusion, self).__init__()
+        
+        self.channels = args['in_channels']
+        self.n_head = args['n_head']
+        self.dropout = args['dropout_rate']
+        self.discrete_ratio = args['voxel_size'][0]  # voxel_size[0]=0.4    0.16m one pixel? I think it's 0.4m per pixel, according to [200, 704]
+        self.downsample_rate = args['downsample_rate']  # 4, downsample rate from original feature map [200, 704]
+
+        self.deform_conv1 = DCN2d(self.channels, self.channels, kernel_size=1, stride=1, padding=0)
+        self.deform_conv2 = DCN2d(self.channels, self.channels, kernel_size=3, stride=1, padding=1)
+        if args['only_attention']:
+            self.transformer_block = MultiheadAttBlock(self.channels, self.n_head, self.dropout)
+        else:
+            self.transformer_block = TransformerBlock(self.channels, self.n_head, self.dropout)
+
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, x, record_len, pairwise_t_matrix):
+        """
+        Fusion forwarding.
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The transformation matrix from each cav to ego, 
+            shape: (B, L, L, 4, 4) 
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        # split x:[(L1, C, H, W), (L2, C, H, W), ...]
+        # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...]
+        split_x = self.regroup(x, record_len)
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+        pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+        pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+        pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+        pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+
+        # (B*L,L,1,H,W)
+        roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x)
+        for b in range(B):
+            N = record_len[b]
+            for i in range(N):
+                one_tensor = torch.ones((L,1,H,W)).to(x)
+                roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W))
+
+        batch_node_features = split_x
+        # iteratively update the features for num_iteration times
+
+        out = []
+        # iterate each batch
+        for b in range(B):
+
+            # number of valid agent
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+            updated_node_features = []
+
+            # update each node i
+            i = 0 # ego
+            # (N,1,H,W)
+            mask = roi_mask[b, i, :N, ...]
+
+            # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective
+            # Notice we put i one the first dim of t_matrix. Different from original.
+            # t_matrix[i,j] = Tji
+            neighbor_feature = warp_affine_simple(batch_node_features[b],
+                                            t_matrix[i, :, :, :],
+                                            (H, W))
+            
+            neighbor_feature_deform1_flat = self.deform_conv1(neighbor_feature).view(N,C,H*W)
+            neighbor_feature_deform2_flat = self.deform_conv2(neighbor_feature).view(N,C,H*W)
+
+            neighbor_feature_flat = neighbor_feature.view(N,C,H*W)  # (N, C, H*W)
+
+
+            
+            query = neighbor_feature_flat[0:1,...].permute(0,2,1)  # (1, H*W, C)
+
+            key0 = neighbor_feature_flat.permute(0,2,1)  # (N, H*W, C)
+            key1 = neighbor_feature_deform1_flat.permute(0,2,1) # (N, H*W, C)
+            key2 = neighbor_feature_deform2_flat.permute(0,2,1) # (N, H*W, C)
+
+            key = torch.cat((key0,key1,key2), dim=0)  # (3N, H*W, C)
+
+            value = key
+
+            fusion_result = self.transformer_block(query, key, value)  # (1, H*W, C)
+            fusion_result = fusion_result.permute(0,2,1).reshape(1, C, H, W)[0]
+
+            out.append(fusion_result)
+            
+        out = torch.stack(out)
+        
+        return out
+
+
+
+
+
+
+
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/disco_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/disco_fuse.py
new file mode 100644
index 0000000000000000000000000000000000000000..6222d72128d0b08557484155fa86f68d6cf7ae01
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/disco_fuse.py
@@ -0,0 +1,96 @@
+# fusion method by disconet
+# no kd loss
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple
+
+def regroup(x, record_len):
+    cum_sum_len = torch.cumsum(record_len, dim=0)
+    split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+    return split_x
+
+class DiscoFusion(nn.Module):
+    def __init__(self, args):
+        super(DiscoFusion, self).__init__()
+        self.discrete_ratio = args['voxel_size'][0]  # voxel_size[0]=0.4    
+        self.downsample_rate = args['downsample_rate']  # 2/4, downsample rate from original feature map [200, 704]
+        self.pixel_weight_layer = PixelWeightLayer(args['in_channels'])
+
+    def forward(self, x, record_len, pairwise_t_matrix):
+        ########## FUSION START ##########
+        # we concat ego's feature with other agent
+        # first transform feature to ego's coordinate
+        split_x = regroup(x, record_len)
+
+        B = pairwise_t_matrix.shape[0]
+        _, C, H, W = x.shape
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+        pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+        pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+        pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+        pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+        out = []
+
+        for b in range(B):
+            # number of valid agent
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+            # update each node i
+            i = 0 # ego
+            # (N, C, H, W) neighbor_feature is agent i's neighborhood warping to agent i's perspective
+            # Notice we put i one the first dim of t_matrix. Different from original.
+            # t_matrix[i,j] = Tji
+            neighbor_feature = warp_affine_simple(split_x[b],
+                                            t_matrix[i, :, :, :],
+                                            (H, W))
+
+            # (N, C, H, W)
+            ego_feature = split_x[b][0].view(1, C, H, W).expand(N, -1, -1, -1)
+            # (N, 2C, H, W)
+            neighbor_feature_cat = torch.cat((neighbor_feature, ego_feature), dim=1)
+            # (N, 1, H, W)
+            agent_weight = self.pixel_weight_layer(neighbor_feature_cat) 
+            # (N, 1, H, W)
+            agent_weight = F.softmax(agent_weight, dim=0)
+
+            agent_weight = agent_weight.expand(-1, C, -1, -1)
+            # (N, C, H, W)
+            feature_fused = torch.sum(agent_weight * neighbor_feature, dim=0)
+            out.append(feature_fused)
+
+        return torch.stack(out)
+
+
+
+class PixelWeightLayer(nn.Module):
+    def __init__(self, channel):
+        super(PixelWeightLayer, self).__init__()
+
+        self.conv1_1 = nn.Conv2d(channel * 2, 128, kernel_size=1, stride=1, padding=0)
+        self.bn1_1 = nn.BatchNorm2d(128)
+
+        self.conv1_2 = nn.Conv2d(128, 32, kernel_size=1, stride=1, padding=0)
+        self.bn1_2 = nn.BatchNorm2d(32)
+
+        self.conv1_3 = nn.Conv2d(32, 8, kernel_size=1, stride=1, padding=0)
+        self.bn1_3 = nn.BatchNorm2d(8)
+
+        self.conv1_4 = nn.Conv2d(8, 1, kernel_size=1, stride=1, padding=0)
+        # self.bn1_4 = nn.BatchNorm2d(1)
+
+    def forward(self, x):
+        x = x.view(-1, x.size(-3), x.size(-2), x.size(-1))
+        x_1 = F.relu(self.bn1_1(self.conv1_1(x)))
+        x_1 = F.relu(self.bn1_2(self.conv1_2(x_1)))
+        x_1 = F.relu(self.bn1_3(self.conv1_3(x_1)))
+        x_1 = F.relu(self.conv1_4(x_1))
+
+        return x_1
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/f_cooper_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/f_cooper_fuse.py
new file mode 100644
index 0000000000000000000000000000000000000000..0527e7694fb74911287af0daf11937354e991604
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/f_cooper_fuse.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+"""
+Implementation of F-cooper maxout fusing.
+"""
+import torch
+import torch.nn as nn
+
+
+class SpatialFusion(nn.Module):
+    def __init__(self):
+        super(SpatialFusion, self).__init__()
+
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, x, record_len):
+        # x: B, C, H, W, split x:[(B1, C, W, H), (B2, C, W, H)]
+        split_x = self.regroup(x, record_len)
+        out = []
+
+        for xx in split_x:
+            xx = torch.max(xx, dim=0, keepdim=True)[0]
+            out.append(xx)
+        return torch.cat(out, dim=0)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/fuse_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/fuse_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..a42d172cc5b33044d81a899b89a4b75d700a704a
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/fuse_utils.py
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import torch
+import numpy as np
+
+from einops import rearrange
+from opencood.utils.common_utils import torch_tensor_to_numpy
+
+
+def regroup(dense_feature, record_len, max_len):
+    """
+    Regroup the data based on the record_len.
+    Parameters
+    ----------
+    dense_feature : torch.Tensor
+        N, C, H, W
+    record_len : list
+        [sample1_len, sample2_len, ...]
+    max_len : int
+        Maximum cav number
+    Returns
+    -------
+    regroup_feature : torch.Tensor
+        B, L, C, H, W
+    """
+    cum_sum_len = list(np.cumsum(torch_tensor_to_numpy(record_len)))
+    split_features = torch.tensor_split(dense_feature,
+                                        cum_sum_len[:-1])
+    regroup_features = []
+    mask = []
+
+    for split_feature in split_features:
+        # M, C, H, W
+        feature_shape = split_feature.shape
+
+        # the maximum M is 5 as most 5 cavs
+        padding_len = max_len - feature_shape[0]
+        mask.append([1] * feature_shape[0] + [0] * padding_len)
+
+        padding_tensor = torch.zeros(padding_len, feature_shape[1],
+                                     feature_shape[2], feature_shape[3])
+        padding_tensor = padding_tensor.to(split_feature.device)
+
+        split_feature = torch.cat([split_feature, padding_tensor],
+                                  dim=0)
+
+        # 1, 5C, H, W
+        split_feature = split_feature.view(-1,
+                                           feature_shape[2],
+                                           feature_shape[3]).unsqueeze(0)
+        regroup_features.append(split_feature)
+
+    # B, 5C, H, W
+    regroup_features = torch.cat(regroup_features, dim=0)
+    # B, L, C, H, W
+    regroup_features = rearrange(regroup_features,
+                                 'b (l c) h w -> b l c h w',
+                                 l=max_len)
+    mask = torch.from_numpy(np.array(mask)).to(regroup_features.device)
+
+    return regroup_features, mask
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/fusion_in_one.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/fusion_in_one.py
new file mode 100644
index 0000000000000000000000000000000000000000..32afb3fcc00d6700c432e50a32d0ac0ad689cd23
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/fusion_in_one.py
@@ -0,0 +1,505 @@
+"""
+A model zoo for intermediate fusion.
+Please make sure your pairwise_t_matrix is normalized before using it.
+Enjoy it.
+"""
+
+import torch
+from torch import nn
+from icecream import ic
+from opencood.models.fuse_modules.att_fuse import ScaledDotProductAttention
+from opencood.models.sub_modules.torch_transformation_utils import \
+    warp_affine_simple
+from opencood.models.fuse_modules.fuse_utils import regroup as Regroup
+from opencood.models.fuse_modules.att_fuse import ScaledDotProductAttention
+from opencood.models.comm_modules.where2comm import Communication
+from opencood.models.fuse_modules.where2comm_attn import TransformerFusion
+import torch.nn.functional as F
+
+def regroup(x, record_len):
+    cum_sum_len = torch.cumsum(record_len, dim=0)
+    split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+    return split_x
+
+def warp_feature(x, record_len, pairwise_t_matrix):
+    _, C, H, W = x.shape
+    B, L = pairwise_t_matrix.shape[:2]
+    split_x = regroup(x, record_len)
+    batch_node_features = split_x
+    out = []
+    # iterate each batch
+    for b in range(B):
+        N = record_len[b]
+        t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+        # update each node i
+        i = 0 # ego
+        neighbor_feature = warp_affine_simple(batch_node_features[b],
+                                        t_matrix[i, :, :, :],
+                                        (H, W))
+        out.append(neighbor_feature)
+
+    out = torch.cat(out, dim=0)
+    
+    return out
+
+class MaxFusion(nn.Module):
+    def __init__(self):
+        super(MaxFusion, self).__init__()
+
+    def forward(self, x, record_len, pairwise_t_matrix):
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+        split_x = regroup(x, record_len)
+        batch_node_features = split_x
+        out = []
+        # iterate each batch
+        for b in range(B):
+            N = record_len[b]
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+            # update each node i
+            i = 0 # ego
+            neighbor_feature = warp_affine_simple(batch_node_features[b],
+                                            t_matrix[i, :, :, :],
+                                            (H, W))
+            out.append(torch.max(neighbor_feature, dim=0)[0])
+        out = torch.stack(out)
+        
+        return out
+
+class AttFusion(nn.Module):
+    def __init__(self, feature_dims):
+        super(AttFusion, self).__init__()
+        self.att = ScaledDotProductAttention(feature_dims)
+
+    def forward(self, xx, record_len, pairwise_t_matrix):
+        _, C, H, W = xx.shape
+        B, L = pairwise_t_matrix.shape[:2]
+        split_x = regroup(xx, record_len)
+        batch_node_features = split_x
+        out = []
+        # iterate each batch
+        for b in range(B):
+            N = record_len[b]
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+            # update each node i
+            i = 0 # ego
+            x = warp_affine_simple(batch_node_features[b], t_matrix[i, :, :, :], (H, W))
+            cav_num = x.shape[0]
+            x = x.view(cav_num, C, -1).permute(2, 0, 1) #  (H*W, cav_num, C), perform self attention on each pixel.
+            h = self.att(x, x, x)
+            h = h.permute(1, 2, 0).view(cav_num, C, H, W)[0, ...]  # C, W, H before
+            out.append(h)
+
+        out = torch.stack(out)
+        return out
+
+class DiscoFusion(nn.Module):
+    def __init__(self, feature_dims):
+        super(DiscoFusion, self).__init__()
+        from opencood.models.fuse_modules.disco_fuse import PixelWeightLayer
+        self.pixel_weight_layer = PixelWeightLayer(feature_dims)
+
+    def forward(self, xx, record_len, pairwise_t_matrix):
+        _, C, H, W = xx.shape
+        B, L = pairwise_t_matrix.shape[:2]
+        split_x = regroup(xx, record_len)
+        out = []
+
+        for b in range(B):
+            N = record_len[b]
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+            i = 0 # ego
+            neighbor_feature = warp_affine_simple(split_x[b],
+                                            t_matrix[i, :, :, :],
+                                            (H, W))
+            # (N, C, H, W)
+            ego_feature = split_x[b][0].view(1, C, H, W).expand(N, -1, -1, -1)
+            # (N, 2C, H, W)
+            neighbor_feature_cat = torch.cat((neighbor_feature, ego_feature), dim=1)
+            # (N, 1, H, W)
+            agent_weight = self.pixel_weight_layer(neighbor_feature_cat) 
+            # (N, 1, H, W)
+            agent_weight = F.softmax(agent_weight, dim=0)
+
+            agent_weight = agent_weight.expand(-1, C, -1, -1)
+            # (N, C, H, W)
+            feature_fused = torch.sum(agent_weight * neighbor_feature, dim=0)
+            out.append(feature_fused)
+
+        return torch.stack(out)
+
+class V2VNetFusion(nn.Module):
+    def __init__(self, args):
+        super(V2VNetFusion, self).__init__()
+        from opencood.models.sub_modules.convgru import ConvGRU
+        in_channels = args['in_channels']
+        H, W = args['conv_gru']['H'], args['conv_gru']['W'] # remember to modify for v2xsim dataset
+        kernel_size = args['conv_gru']['kernel_size']
+        num_gru_layers = args['conv_gru']['num_layers']
+        self.num_iteration = args['num_iteration']
+        self.gru_flag = args['gru_flag']
+        self.agg_operator = args['agg_operator']
+
+        self.msg_cnn = nn.Conv2d(in_channels * 2, in_channels, kernel_size=3,
+                                 stride=1, padding=1)
+        self.conv_gru = ConvGRU(input_size=(H, W),
+                                input_dim=in_channels * 2,
+                                hidden_dim=[in_channels] * num_gru_layers,
+                                kernel_size=kernel_size,
+                                num_layers=num_gru_layers,
+                                batch_first=True,
+                                bias=True,
+                                return_all_layers=False)
+        self.mlp = nn.Linear(in_channels, in_channels)
+
+    def forward(self, x, record_len, pairwise_t_matrix):
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        split_x = regroup(x, record_len)
+        # (B*L,L,1,H,W)
+        roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x)
+        for b in range(B):
+            N = record_len[b]
+            for i in range(N):
+                one_tensor = torch.ones((L,1,H,W)).to(x)
+                roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W))
+
+        batch_node_features = split_x
+        # iteratively update the features for num_iteration times
+        for l in range(self.num_iteration):
+
+            batch_updated_node_features = []
+            # iterate each batch
+            for b in range(B):
+
+                # number of valid agent
+                N = record_len[b]
+                # (N,N,4,4)
+                # t_matrix[i, j]-> from i to j
+                t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+                updated_node_features = []
+
+                # update each node i
+                for i in range(N):
+                    # (N,1,H,W)
+                    mask = roi_mask[b, i, :N, ...]
+                    neighbor_feature = warp_affine_simple(batch_node_features[b],
+                                                   t_matrix[i, :, :, :],
+                                                   (H, W))
+
+                    # (N,C,H,W)
+                    ego_agent_feature = batch_node_features[b][i].unsqueeze(
+                        0).repeat(N, 1, 1, 1)
+                    #(N,2C,H,W)
+                    neighbor_feature = torch.cat(
+                        [neighbor_feature, ego_agent_feature], dim=1)
+                    # (N,C,H,W)
+                    # message contains all feature map from j to ego i.
+                    message = self.msg_cnn(neighbor_feature) * mask
+
+                    # (C,H,W)
+                    if self.agg_operator=="avg":
+                        agg_feature = torch.mean(message, dim=0)
+                    elif self.agg_operator=="max":
+                        agg_feature = torch.max(message, dim=0)[0]
+                    else:
+                        raise ValueError("agg_operator has wrong value")
+                    # (2C, H, W)
+                    cat_feature = torch.cat(
+                        [batch_node_features[b][i, ...], agg_feature], dim=0)
+                    # (C,H,W)
+                    if self.gru_flag:
+                        gru_out = \
+                            self.conv_gru(cat_feature.unsqueeze(0).unsqueeze(0))[
+                                0][
+                                0].squeeze(0).squeeze(0)
+                    else:
+                        gru_out = batch_node_features[b][i, ...] + agg_feature
+                    updated_node_features.append(gru_out.unsqueeze(0))
+                # (N,C,H,W)
+                batch_updated_node_features.append(
+                    torch.cat(updated_node_features, dim=0))
+            batch_node_features = batch_updated_node_features
+        # (B,C,H,W)
+        out = torch.cat(
+            [itm[0, ...].unsqueeze(0) for itm in batch_node_features], dim=0)
+        # (B,C,H,W) -> (B, H, W, C) -> (B,C,H,W)
+        out = self.mlp(out.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)
+
+        return out
+
+class V2XViTFusion(nn.Module):
+    def __init__(self, args):
+        super(V2XViTFusion, self).__init__()
+        from opencood.models.sub_modules.v2xvit_basic import V2XTransformer
+        self.fusion_net = V2XTransformer(args['transformer'])
+
+    def forward(self, x, record_len, pairwise_t_matrix):
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        regroup_feature, mask = Regroup(x, record_len, L)
+        prior_encoding = \
+            torch.zeros(len(record_len), L, 3, 1, 1).to(record_len.device)
+        
+        # prior encoding should include [velocity, time_delay, infra], but it is not supported by all basedataset.
+        # it is possible to modify the xxx_basedataset.py and intermediatefusiondataset.py to retrieve these information
+        prior_encoding = prior_encoding.repeat(1, 1, 1,
+                                               regroup_feature.shape[3],
+                                               regroup_feature.shape[4])
+
+        regroup_feature = torch.cat([regroup_feature, prior_encoding], dim=2)
+        regroup_feature_new = []
+
+        for b in range(B):
+            ego = 0
+            regroup_feature_new.append(warp_affine_simple(regroup_feature[b], pairwise_t_matrix[b, ego], (H, W)))
+        regroup_feature = torch.stack(regroup_feature_new)
+
+        # b l c h w -> b l h w c
+        regroup_feature = regroup_feature.permute(0, 1, 3, 4, 2)
+        # transformer fusion. In perfect setting, there is no delay. 
+        # it is possible to modify the xxx_basedataset.py and intermediatefusiondataset.py to retrieve these information
+        spatial_correction_matrix = torch.eye(4).expand(len(record_len), L, 4, 4).to(record_len.device)
+        fused_feature = self.fusion_net(regroup_feature, mask, spatial_correction_matrix)
+        # b h w c -> b c h w
+        fused_feature = fused_feature.permute(0, 3, 1, 2)
+        
+        return fused_feature
+
+class When2commFusion(nn.Module):
+    def __init__(self, args):
+        super(When2commFusion, self).__init__()
+        import numpy as np
+        from opencood.models.fuse_modules.when2com_fuse import policy_net4, km_generator_v2, MIMOGeneralDotProductAttention, AdditiveAttentin
+
+        self.in_channels = args['in_channels']
+        self.feat_H = args['H']
+        self.feat_W = args['W']
+        self.query_size = args['query_size']
+        self.key_size = args['key_size']
+        
+
+        self.query_key_net = policy_net4(self.in_channels)
+        self.key_net = km_generator_v2(out_size=self.key_size)
+        self.query_net = km_generator_v2(out_size=self.query_size)
+        # self.attention_net = MIMOGeneralDotProductAttention(self.query_size, self.key_size)
+        self.attention_net = AdditiveAttentin(self.key_size, self.query_size)
+
+    def forward(self, x, record_len, pairwise_t_matrix):
+        """
+        Fusion forwarding.
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The transformation matrix from each cav to ego, 
+            shape: (B, L, L, 4, 4) 
+        
+        weight: torch.Tensor
+            Weight of aggregating coming message
+            shape: (B, L, L)
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        # split x:[(L1, C, H, W), (L2, C, H, W), ...]
+        # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...]
+        split_x = regroup(x, record_len)
+        batch_node_features = split_x
+        updated_node_features = []
+        for b in range(B):
+
+            # number of valid agent
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+            # update each node i
+            # (N,1,H,W)
+            # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective
+            # Notice we put i one the first dim of t_matrix. Different from original.
+            # t_matrix[i,j] = Tji
+            neighbor_feature = warp_affine_simple(batch_node_features[b],
+                                            t_matrix[0, :, :, :],
+                                            (H, W))
+            query_key_maps = self.query_key_net(neighbor_feature)
+
+            keys = self.key_net(query_key_maps).unsqueeze(0) # [N, C_k]
+            query = self.query_net(query_key_maps[0].unsqueeze(0)).unsqueeze(0) # [1, C_q]
+
+            neighbor_feature = neighbor_feature.unsqueeze(0) # [1, N, C, H, W]
+
+            feat_fuse, prob_action = self.attention_net(query, keys, neighbor_feature, sparse=False)
+
+            updated_node_features.append(feat_fuse)
+
+        out = torch.cat(updated_node_features, dim=0)
+        
+        return out
+
+
+
+class Where2commFusion(nn.Module):
+    def __init__(self, args):
+        super(Where2commFusion, self).__init__()
+
+        self.communication = False
+        self.round = 1
+        if 'communication' in args:
+            self.communication = True
+            self.naive_communication = Communication(args['communication'])
+            if 'round' in args['communication']:
+                self.round = args['communication']['round']
+ 
+        self.agg_mode = args['agg_operator']['mode']
+        self.multi_scale = args['multi_scale']
+        if self.multi_scale:
+            layer_nums = args['layer_nums']
+            num_filters = args['num_filters']
+            self.num_levels = len(layer_nums)
+            self.fuse_modules = nn.ModuleList()
+            for idx in range(self.num_levels):
+                if self.agg_mode == 'ATTEN':
+                    fuse_network = AttFusion(num_filters[idx])
+                elif self.agg_mode == 'MAX':
+                    fuse_network = MaxFusion()
+                elif self.agg_mode == 'Transformer':
+                    fuse_network = TransformerFusion(
+                                                channels=num_filters[idx], 
+                                                n_head=args['agg_operator']['n_head'], 
+                                                with_spe=args['agg_operator']['with_spe'], 
+                                                with_scm=args['agg_operator']['with_scm'])
+                self.fuse_modules.append(fuse_network)
+        else:
+            if self.agg_mode == 'ATTEN':
+                self.fuse_modules = AttFusion(args['agg_operator']['feature_dim'])
+            elif self.agg_mode == 'MAX':
+                self.fuse_modules = MaxFusion()   
+            elif self.agg_mode == 'Transformer':
+                self.fuse_network = TransformerFusion(
+                                            channels=args['agg_operator']['feature_dim'], 
+                                            n_head=args['agg_operator']['n_head'], 
+                                            with_spe=args['agg_operator']['with_spe'], 
+                                            with_scm=args['agg_operator']['with_scm'])     
+
+    def forward(self, x, rm, record_len, pairwise_t_matrix, backbone=None, heads=None):
+        """
+        Fusion forwarding.
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The transformation matrix from each cav to ego, 
+            shape: (B, L, L, 4, 4) 
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        if self.multi_scale:
+            ups = []
+            # backbone.__dict__()
+            with_resnet = True if hasattr(backbone, 'resnet') else False
+            if with_resnet:
+                feats = backbone.resnet(x)
+            
+            for i in range(self.num_levels):
+                x = feats[i] if with_resnet else backbone.blocks[i](x)
+
+                ############ 1. Communication (Mask the features) #########
+                if i==0:
+                    if self.communication:
+                        batch_confidence_maps = regroup(rm, record_len)
+                        _, communication_masks, communication_rates = self.naive_communication(batch_confidence_maps, record_len, pairwise_t_matrix)
+                        x = x * communication_masks
+                    else:
+                        communication_rates = torch.tensor(0).to(x.device)
+                
+                ############ 2. Split the confidence map #######################
+                # split x:[(L1, C, H, W), (L2, C, H, W), ...]
+                # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...]
+                batch_node_features = regroup(x, record_len)
+                
+                ############ 3. Fusion ####################################
+                x_fuse = []
+                for b in range(B):
+                    # number of valid agent
+                    N = record_len[b]
+                    # (N,N,4,4)
+                    # t_matrix[i, j]-> from i to j
+                    t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+                    node_features = batch_node_features[b]
+                    C, H, W = node_features.shape[1:]
+                    neighbor_feature = warp_affine_simple(node_features,
+                                                    t_matrix[0, :, :, :],
+                                                    (H, W))
+                    x_fuse.append(self.fuse_modules[i](neighbor_feature))
+                x_fuse = torch.stack(x_fuse)
+
+                ############ 4. Deconv ####################################
+                if len(backbone.deblocks) > 0:
+                    ups.append(backbone.deblocks[i](x_fuse))
+                else:
+                    ups.append(x_fuse)
+                
+            if len(ups) > 1:
+                x_fuse = torch.cat(ups, dim=1)
+            elif len(ups) == 1:
+                x_fuse = ups[0]
+            
+            if len(backbone.deblocks) > self.num_levels:
+                x_fuse = backbone.deblocks[-1](x_fuse)
+        else:
+            ############ 1. Split the features #######################
+            # split x:[(L1, C, H, W), (L2, C, H, W), ...]
+            # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...]
+            batch_node_features = self.regroup(x, record_len)
+            batch_confidence_maps = self.regroup(rm, record_len)
+
+            ############ 2. Communication (Mask the features) #########
+            if self.communication:
+                _, communication_masks, communication_rates = self.naive_communication(batch_confidence_maps, record_len, pairwise_t_matrix)
+            else:
+                communication_rates = torch.tensor(0).to(x.device)
+            
+            ############ 3. Fusion ####################################
+            x_fuse = []
+            for b in range(B):
+                # number of valid agent
+                N = record_len[b]
+                # (N,N,4,4)
+                # t_matrix[i, j]-> from i to j
+                t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+                node_features = batch_node_features[b]
+                if self.communication:
+                    node_features = node_features * communication_masks[b]
+                neighbor_feature = warp_affine_simple(node_features,
+                                                t_matrix[0, :, :, :],
+                                                (H, W))
+                x_fuse.append(self.fuse_modules(neighbor_feature))
+            x_fuse = torch.stack(x_fuse)
+        
+        return x_fuse, communication_rates, {}
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/max_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/max_fuse.py
new file mode 100644
index 0000000000000000000000000000000000000000..538cf4f2039a6bcc6a9df728c355326a19af6f3d
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/max_fuse.py
@@ -0,0 +1,200 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+import torch
+import torch.nn as nn
+
+from opencood.models.sub_modules.torch_transformation_utils import \
+    get_discretized_transformation_matrix, get_transformation_matrix, \
+    warp_affine_simple, get_rotated_roi
+from opencood.models.sub_modules.convgru import ConvGRU
+from icecream import ic
+from matplotlib import pyplot as plt
+
+class MaxFusion(nn.Module):
+    def __init__(self, args):
+        super(MaxFusion, self).__init__()
+        self.discrete_ratio = args['voxel_size'][0]  # voxel_size[0]=0.4    
+        self.downsample_rate = args['downsample_rate']  # 2/4, downsample rate from original feature map [200, 704]
+
+
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, x, record_len, pairwise_t_matrix):
+        """
+        Fusion forwarding.
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The transformation matrix from each cav to ego, 
+            shape: (B, L, L, 4, 4) 
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        # split x:[(L1, C, H, W), (L2, C, H, W), ...]
+        # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...]
+        split_x = self.regroup(x, record_len)
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+        pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+        pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+        pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+        pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+
+        # (B*L,L,1,H,W)
+        roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x)
+        for b in range(B):
+            N = record_len[b]
+            for i in range(N):
+                one_tensor = torch.ones((L,1,H,W)).to(x)
+                roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W))
+
+        batch_node_features = split_x
+        # iteratively update the features for num_iteration times
+
+        out = []
+        # iterate each batch
+        for b in range(B):
+
+            # number of valid agent
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+            updated_node_features = []
+
+            # update each node i
+            i = 0 # ego
+            # (N,1,H,W)
+            mask = roi_mask[b, i, :N, ...]
+            # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective
+            # Notice we put i one the first dim of t_matrix. Different from original.
+            # t_matrix[i,j] = Tji
+            neighbor_feature = warp_affine_simple(batch_node_features[b],
+                                            t_matrix[i, :, :, :],
+                                            (H, W))
+            out.append(torch.max(neighbor_feature, dim=0)[0])
+        out = torch.stack(out)
+        
+        return out
+
+
+    def forward_debug(self, x, origin_x, record_len, pairwise_t_matrix):
+        """
+        Fusion forwarding
+        Used for debug and visualization
+
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+
+        origin_x: torch.Tensor
+            pillars (sum(n_cav), C, H * downsample_rate, W * downsample_rate)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The transformation matrix from each cav to ego, 
+            shape: (B, L, L, 4, 4) 
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        from matplotlib import pyplot as plt
+
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        split_x = self.regroup(x, record_len)
+        split_origin_x = self.regroup(origin_x, record_len)
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+        pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+        pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+        pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+        pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+
+        # (B*L,L,1,H,W)
+        roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x)
+        for b in range(B):
+            N = record_len[b]
+            for i in range(N):
+                one_tensor = torch.ones((L,1,H,W)).to(x)
+                roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W))
+
+        batch_node_features = split_x
+        # iteratively update the features for num_iteration times
+
+        # visualize warped feature map
+        for b in range(B):
+            # number of valid agent
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+            # update each node i
+            i = 0 # ego
+            mask = roi_mask[b, i, :N, ...]
+            # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective
+            # Notice we put i one the first dim of t_matrix. Different from original.
+            # t_matrix[i,j] = Tji
+            neighbor_feature = warp_affine_simple(batch_node_features[b],
+                                            t_matrix[i, :, :, :],
+                                            (H, W))
+            for idx in range(N):
+                plt.imshow(torch.max(neighbor_feature[idx],0)[0].detach().cpu().numpy())
+                plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/feature_{b}_{idx}")
+                plt.clf()
+                plt.imshow(mask[idx][0].detach().cpu().numpy())
+                plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/mask_feature_{b}_{idx}")
+                plt.clf()
+
+
+        
+        # visualize origin pillar feature 
+        origin_node_features = split_origin_x
+
+        for b in range(B):
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+            i = 0 # ego
+            # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective
+            # Notice we put i one the first dim of t_matrix. Different from original.
+            # t_matrix[i,j] = Tji
+            neighbor_feature = warp_affine_simple(origin_node_features[b],
+                                            t_matrix[i, :, :, :],
+                                            (H*self.downsample_rate, W*self.downsample_rate))
+
+            for idx in range(N):
+                plt.imshow(torch.max(neighbor_feature[idx],0)[0].detach().cpu().numpy())
+                plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/origin_{b}_{idx}")
+                plt.clf()
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/mean_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/mean_fuse.py
new file mode 100644
index 0000000000000000000000000000000000000000..61d33d787e0ceaa256d6045d07d7a3b13d68e988
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/mean_fuse.py
@@ -0,0 +1,129 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+"""
+Implementation of V2VNet Fusion
+"""
+
+import torch
+import torch.nn as nn
+
+from opencood.models.sub_modules.torch_transformation_utils import \
+    get_discretized_transformation_matrix, get_transformation_matrix, \
+    warp_affine_simple, get_rotated_roi
+from opencood.models.sub_modules.convgru import ConvGRU
+from icecream import ic
+from matplotlib import pyplot as plt
+from icecream import ic
+
+class MeanFusion(nn.Module):
+    def __init__(self, args):
+        super(MeanFusion, self).__init__()
+
+        self.discrete_ratio = args['voxel_size'][0]  # voxel_size[0]=0.4    0.16m one pixel? I think it's 0.4m per pixel, according to [200, 704]
+        self.downsample_rate = args['downsample_rate']  # 4, downsample rate from original feature map [200, 704]
+
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, x, record_len, pairwise_t_matrix):
+        """
+        Fusion forwarding.
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The transformation matrix from each cav to ego, 
+            shape: (B, L, L, 4, 4) 
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        # split x:[(L1, C, H, W), (L2, C, H, W), ...]
+        # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...]
+        split_x = self.regroup(x, record_len)
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+        pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+        pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+        pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+        pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+
+        # (B*L,L,1,H,W)
+        roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x)
+        for b in range(B):
+            N = record_len[b]
+            for i in range(N):
+                one_tensor = torch.ones((L,1,H,W)).to(x)
+                roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W))
+
+        batch_node_features = split_x
+        # iteratively update the features for num_iteration times
+
+        out = []
+        # iterate each batch
+        for b in range(B):
+
+            # number of valid agent
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+            # update each node i
+            i = 0 # ego
+            # (N,1,H,W)
+            mask = roi_mask[b, i, :N, ...]
+            # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective
+            # Notice we put i one the first dim of t_matrix. Different from original.
+            # t_matrix[i,j] = Tji
+            neighbor_feature = warp_affine_simple(batch_node_features[b],
+                                            t_matrix[i, :, :, :],
+                                            (H, W))
+            
+            out.append(torch.mean(neighbor_feature, dim=0))
+        out = torch.stack(out)
+        
+        return out
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# from matplotlib import pyplot as plt
+# for idx in range(3):
+#     plt.imshow(torch.max(neighbor_feature[idx],0)[0].detach().cpu().numpy())
+#     plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/in_mean/agent{idx}")
+#     plt.clf()
+#     plt.imshow(mask[idx][0].detach().cpu().numpy())
+#     plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/in_mean/mask{idx}")
+#     plt.clf()
+
+# raise
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/modality_aware_fusion.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/modality_aware_fusion.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa2b1efbeb98b185f5e65d5824262c84a7f5ba28
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/modality_aware_fusion.py
@@ -0,0 +1,74 @@
+import torch
+import torch.nn as nn
+from opencood.models.fuse_modules.fusion_in_one import regroup, warp_feature
+from opencood.models.fuse_modules.att_fuse import ScaledDotProductAttention
+from opencood.models.sub_modules.torch_transformation_utils import \
+    warp_affine_simple
+
+# TODO
+# https://github.com/microsoft/Swin-Transformer/tree/f92123a0035930d89cf53fcb8257199481c4428d/kernels/window_process
+
+
+class MAttFusion(nn.Module):
+    def __init__(self, feature_dims):
+        super().__init__()
+        print(feature_dims)
+        print(type(feature_dims))
+        self.att = ScaledDotProductAttention(feature_dims)
+    
+    def forward(self, x, record_len, pairwise_t_matrix, lidar_agent_indicator):
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+        split_x = regroup(x, record_len)
+        split_lidar_indicator = regroup(lidar_agent_indicator, record_len)
+
+        batch_node_features = split_x
+        batch_node_lidar_agent = split_lidar_indicator
+
+        out = []
+        # iterate each batch
+        for b in range(B):
+            N = record_len[b]
+            lidar_agent = batch_node_lidar_agent[b]
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+            
+            # update each node i
+            i = 0 # ego
+            neighbor_feature = warp_affine_simple(batch_node_features[b],
+                                            t_matrix[i, :, :, :],
+                                            (H, W))
+            if sum(lidar_agent) !=0 and sum(lidar_agent) != N:
+                # multi modality aware
+                lidar_feature = torch.max(neighbor_feature[lidar_agent], dim=0)[0] # [C, H, W]
+                camera_feature = torch.max(neighbor_feature[1-lidar_agent], dim=0)[0] # [C, H, W]
+                N_lidar = sum(lidar_agent)
+                N_camera = N - N_lidar
+
+                # spatial attention 3x3
+                camera_feature_3x3 = []
+                x_offsets = [-1, 0, 1]
+                y_offsets = [-1, 0, 1]
+                for x_offset in x_offsets:
+                    for y_offset in y_offsets:
+                        camera_feature_3x3.append(torch.roll(camera_feature, (x_offset, y_offset), (0,1)))
+                camera_feature_3x3 = torch.stack(camera_feature_3x3, dim=0) # 9, C, H, W
+
+                key = lidar_feature.view(1, C, -1).permute(2, 0, 1) #  [H*W, 1, C]
+                query = camera_feature_3x3.view(9, C, -1).permute(2, 0, 1) # [H*W, N_camera, C]
+                value = query
+                h = self.att(key, query, value)
+                h = h.permute(1, 2, 0).view(1, C, H, W)[0, ...] # [C, H, W]
+                out.append(torch.maximum(h, lidar_feature))
+
+            else:
+                # single modality
+                cav_num = neighbor_feature.shape[0]
+                x = neighbor_feature.view(cav_num, C, -1).permute(2, 0, 1) #  (H*W, cav_num, C), perform self attention on each pixel.
+                h = self.att(x, x, x)
+                h = h.permute(1, 2, 0).view(cav_num, C, H, W)[0, ...]  # C, W, H before
+                out.append(h)
+
+
+        out = torch.stack(out)
+        
+        return out
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/ms_max_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/ms_max_fuse.py
new file mode 100644
index 0000000000000000000000000000000000000000..99550922a2534ac916342e462d24b284e43df1ef
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/ms_max_fuse.py
@@ -0,0 +1,178 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+import torch
+import torch.nn as nn
+from opencood.models.sub_modules.resblock import ResNetModified, BasicBlock, Bottleneck
+from opencood.models.sub_modules.torch_transformation_utils import \
+    get_discretized_transformation_matrix, get_transformation_matrix, \
+    warp_affine_simple, get_rotated_roi
+from opencood.models.sub_modules.convgru import ConvGRU
+from icecream import ic
+from matplotlib import pyplot as plt
+
+class MSMaxFusion(nn.Module):
+    def __init__(self, args):
+        super(MSMaxFusion, self).__init__()
+        self.discrete_ratio = args['voxel_size'][0]  # voxel_size[0]=0.4    
+        self.downsample_rate = args['downsample_rate']  # 2/4, downsample rate from original feature map [200, 704]
+        self.outC = args['outC']
+        layer_nums = args['layer_nums']
+        num_filters = args['num_filters']
+        layer_strides = args['layer_strides']
+        upsample_strides = args['upsample_strides']
+        num_upsample_filters = args['num_upsample_filter']
+        self.level_num = len(layer_nums)
+
+        self.resnet = ResNetModified(BasicBlock, 
+                                    layer_nums,
+                                    layer_strides,
+                                    num_filters)
+        num_levels = len(layer_nums)
+
+        self.fuse_modules = nn.ModuleList()
+        self.deblocks = nn.ModuleList()
+
+        for idx in range(num_levels):
+            
+            fuse_network = MaxFusion()
+            self.fuse_modules.append(fuse_network)
+            
+            if len(upsample_strides) > 0:
+                stride = upsample_strides[idx]
+                self.deblocks.append(nn.Sequential(
+                    nn.ConvTranspose2d(
+                        num_filters[idx], num_upsample_filters[idx],
+                        upsample_strides[idx],
+                        stride=upsample_strides[idx], bias=False
+                    ),
+                    nn.BatchNorm2d(num_upsample_filters[idx],
+                                    eps=1e-3, momentum=0.01),
+                    nn.ReLU()
+                ))
+
+
+        c_in = sum(num_upsample_filters)
+        if len(upsample_strides) > num_levels:
+            self.deblocks.append(nn.Sequential(
+                nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1],
+                                   stride=upsample_strides[-1], bias=False),
+                nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01),
+                nn.ReLU(),
+            ))
+
+        self.num_bev_features = c_in
+
+        self.conv_last = nn.Conv2d(sum(num_upsample_filters), outC, kernel_size=3, stride=1, padding=1)
+
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, x, record_len, pairwise_t_matrix):
+        """
+        Fusion forwarding.
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The transformation matrix from each cav to ego, 
+            shape: (B, L, L, 4, 4) 
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+        pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+        pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+        pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+        pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+
+        features = self.resnet(x)
+
+        ups = []
+
+        for i in range(self.level_num):
+            x_fuse = self.fuse_modules[i](features[i], record_len, pairwise_t_matrix)
+            if len(self.deblocks) > 0:
+                ups.append(self.deblocks[i](x_fuse))
+            else:
+                ups.append(x_fuse)
+
+        if len(ups) > 1:
+            x = torch.cat(ups, dim=1)
+        elif len(ups) == 1:
+            x = ups[0]
+
+        x = self.conv_last(x)
+        
+        return x
+
+
+
+class MaxFusion(nn.Module):
+    def __init__(self):
+        super(MaxFusion, self).__init__()
+
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, x, record_len, pairwise_t_matrix):
+        """
+        Fusion forwarding.
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The affine transformation matrix from each cav to ego, already normalized
+            shape: (B, L, L, 2, 3) 
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        split_x = self.regroup(x, record_len)
+
+        batch_node_features = split_x
+
+        out = []
+        # iterate each batch
+        for b in range(B):
+
+            N = record_len[b]
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+            # update each node i
+            i = 0 # ego
+            neighbor_feature = warp_affine_simple(batch_node_features[b],
+                                            t_matrix[i, :, :, :],
+                                            (H, W))
+            out.append(torch.max(neighbor_feature, dim=0)[0])
+        out = torch.stack(out)
+        
+        return out
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/self_attn.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/self_attn.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6d468a17689e2117f70d7b254902e1019413635
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/self_attn.py
@@ -0,0 +1,103 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>, Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple
+
+DEBUG=False
+
+class ScaledDotProductAttention(nn.Module):
+    """
+    Scaled Dot-Product Attention proposed in "Attention Is All You Need"
+    Compute the dot products of the query with all keys, divide each by sqrt(dim),
+    and apply a softmax function to obtain the weights on the values
+    Args: dim, mask
+        dim (int): dimention of attention
+        mask (torch.Tensor): tensor containing indices to be masked
+    Inputs: query, key, value, mask
+        - **query** (batch, q_len, d_model): tensor containing projection
+          vector for decoder.
+        - **key** (batch, k_len, d_model): tensor containing projection
+          vector for encoder.
+        - **value** (batch, v_len, d_model): tensor containing features of the
+          encoded input sequence.
+        - **mask** (-): tensor containing indices to be masked
+    Returns: context, attn
+        - **context**: tensor containing the context vector from
+          attention mechanism.
+        - **attn**: tensor containing the attention (alignment) from the
+          encoder outputs.
+    """
+
+    def __init__(self, dim):
+        super(ScaledDotProductAttention, self).__init__()
+        self.sqrt_dim = np.sqrt(dim)
+
+    def forward(self, query, key, value):
+        score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim
+        attn = F.softmax(score, -1)
+        context = torch.bmm(attn, value)
+        return context
+
+
+class AttFusion(nn.Module):
+    def __init__(self, feature_dim):
+        super(AttFusion, self).__init__()
+        self.att = ScaledDotProductAttention(feature_dim)
+
+    def forward(self, x, record_len, pairwise_t_matrix):
+        """
+        pairwise_t_matrix : [N,N,2,3]
+        """
+        split_x = self.regroup(x, record_len)
+        batch_size = len(record_len)
+        C, H, W = split_x[0].shape[1:]  # C, W, H before
+        out = []
+        for b, xx in enumerate(split_x):
+          N = xx.shape[0]
+          t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+          i = 0 # ego
+          xx = warp_affine_simple(xx, t_matrix[i, :, :, :], (H, W))
+
+          cav_num = xx.shape[0]
+          xx = xx.view(cav_num, C, -1).permute(2, 0, 1) #  (H*W, cav_num, C), perform self attention on each pixel.
+          h = self.att(xx, xx, xx)
+          h = h.permute(1, 2, 0).view(cav_num, C, H, W)[0, ...].unsqueeze(0)  # C, W, H before
+          out.append(h)
+        return torch.cat(out, dim=0)
+
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+
+
+    def forward_debug(self, x, origin_x, record_len, pairwise_t_matrix):
+        split_x = self.regroup(x, record_len)
+        split_origin_x = self.regroup(origin_x, record_len)
+        batch_size = len(record_len)
+        C, H, W = split_x[0].shape[1:]  # C, W, H before
+        H_origin, W_origin = split_origin_x[0].shape[2:]
+        out = []
+        from matplotlib import pyplot as plt
+        for b, xx in enumerate(split_x):
+          N = xx.shape[0]
+          t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+          i = 0
+          xx = warp_affine_simple(xx, t_matrix[i, :, :, :], (H, W))
+          origin_xx = warp_affine_simple(split_origin_x[b], t_matrix[i, :, :, :], (H_origin, W_origin))
+
+          for idx in range(N):
+            plt.imshow(torch.max(xx[idx],0)[0].detach().cpu().numpy())
+            plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/feature_{b}_{idx}")
+            plt.clf()
+            plt.imshow(torch.max(origin_xx[idx],0)[0].detach().cpu().numpy())
+            plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/vis_result/debug_warp_feature/origin_feature_{b}_{idx}")
+            plt.clf()
+        raise
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/transformer.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..8578dc41ee877f144ebe568ee9c829f3d52ca53d
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/transformer.py
@@ -0,0 +1,146 @@
+"""
+Implementation of Simple transformer fusion.
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import kornia
+
+
+class TransformerMessage(nn.Module):
+    def __init__(self, 
+                 in_channels=64,
+                 trans_layer=[3]):
+        super(TransformerMessage, self).__init__()
+        self.in_channels = in_channels
+
+        self.trans_layer = trans_layer
+
+        dropout = 0
+        nhead = 8
+        for c_layer in self.trans_layer:
+            d_model = in_channels
+            cross_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
+            # Implementation of Feedforward model
+            linear1 = nn.Linear(d_model, d_model)
+            linear2 = nn.Linear(d_model, d_model)
+
+            norm1 = nn.LayerNorm(d_model)
+            norm2 = nn.LayerNorm(d_model)
+            dropout0 = nn.Dropout(dropout)
+            dropout1 = nn.Dropout(dropout)
+            dropout2 = nn.Dropout(dropout)
+            self.__setattr__('cross_attn'+str(c_layer), cross_attn)
+            self.__setattr__('linear1_'+str(c_layer), linear1)
+            self.__setattr__('linear2_'+str(c_layer), linear2)
+            self.__setattr__('norm1_'+str(c_layer), norm1)
+            self.__setattr__('norm2_'+str(c_layer), norm2)
+            self.__setattr__('dropout0_'+str(c_layer), dropout0)
+            self.__setattr__('dropout1_'+str(c_layer), dropout1)
+            self.__setattr__('dropout2_'+str(c_layer), dropout2)
+
+    def add_pe_map(self, x, normalized=True):
+        """ Add positional encoding to feature map.
+        Args:
+            x: torch.Tensor
+                [N, C, H, W]
+        
+        """
+        # scale = 2 * math.pi
+        temperature = 10000 
+        num_pos_feats = x.shape[-3] // 2  # d
+
+        mask = torch.zeros([x.shape[-2], x.shape[-1]], dtype=torch.bool, device=x.device)
+        not_mask = ~mask
+        y_embed = not_mask.cumsum(0, dtype=torch.float32)
+        x_embed = not_mask.cumsum(1, dtype=torch.float32)
+
+        dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=x.device)
+        dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats)
+
+        pos_x = x_embed[:, :, None] / dim_t
+        pos_y = y_embed[:, :, None] / dim_t
+        pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2)
+        pos_y = torch.stack((pos_y[:, :, 0::2].sin(), pos_y[:, :, 1::2].cos()), dim=3).flatten(2)
+        pos = torch.cat((pos_y, pos_x), dim=2).permute(2, 0, 1)
+
+        if len(x.shape) == 5:
+            x = x + pos[None,None,:,:,:]
+        elif len(x.shape) == 6:
+            x = x + pos[None,None,None,:,:,:]
+        return x
+
+    def forward(self, x, shift_mats, shift_mats_rev, agent_mask):
+        batch, max_agent_num, c, h, w = x.shape
+
+        # ================================
+        # First, transform to each coord
+        feat_shifted = []
+        for agent_i in range(max_agent_num):
+            # shift_mat_i = shift_mats[:, agent_i, :, :]
+            shift_mat_rev_i = shift_mats_rev[:, agent_i, :, :]
+            feat_i = x[:, agent_i, :, :, :]
+            feat_shifted_i = []
+            for agent_j in range(max_agent_num):
+                shift_mat_j = shift_mats[:, agent_j, :, :]
+                shift_mat = shift_mat_j.view(batch, 3, 3) @ shift_mat_rev_i.view(batch, 3, 3)
+                feat = kornia.warp_perspective(feat_i, shift_mat, dsize=(100 * 2, 100 * 2), align_corners=False)
+                feat_shifted_i.append(feat)
+            feat_shifted_i = torch.cat([f.unsqueeze(1) for f in feat_shifted_i], dim=1)
+            feat_shifted.append(feat_shifted_i)
+        feat_shifted = torch.cat([f.unsqueeze(1) for f in feat_shifted], dim=1)
+
+
+        # ================================
+        # x_fuse, _, _ = self.TRANSFORMER_MESSAGE([[],[],[],local_com_mat], [transformed_feature], num_agent_tensor)
+
+        for i, c_layer in enumerate(self.trans_layer):
+            batch_updated_features = torch.zeros(batch, max_agent_num, c, h, w).to(shift_mats.device)
+            for batch_i in range(batch):
+                N = int(torch.sum(agent_mask[batch_i]))
+                feat_map = x[batch_i:batch_i+1, :N, :, :, :]
+                val_feat = feat_shifted[batch_i:batch_i+1, :N, :N, :, :, :]
+
+                feat_map = self.add_pe_map(feat_map)
+                # [b,N,C,H,W] -> [b,N,H,W,C]
+                # [b,N,N,C,H,W] -> [N,b,N,H,W,C]
+                src = feat_map.permute(0,1,3,4,2).contiguous().view(N*h*w,c).contiguous().unsqueeze(0)
+                tgt = val_feat.permute(1,0,2,4,5,3).contiguous().view(N, N*h*w,c).contiguous()
+                # print(src.shape)  # torch.Size([1, 120000, 64])
+                # print(tgt.shape)  # torch.Size([N, 120000, 64])
+
+                src2, weight_mat = eval('self.cross_attn'+str(c_layer))(src, tgt, value=tgt, attn_mask=None, key_padding_mask=None)
+                src = src + eval('self.dropout1_'+str(c_layer))(src2)
+                src = eval('self.norm1_'+str(c_layer))(src)
+                src2 = eval('self.linear2_'+str(c_layer))(eval('self.dropout0_'+str(c_layer))(F.relu(eval('self.linear1_'+str(c_layer))(src))))
+                src = src + eval('self.dropout2_'+str(c_layer))(src2)
+                src = eval('self.norm2_'+str(c_layer))(src)
+
+                feat_fuse = src.view(1, N, h, w, c).contiguous().permute(0, 1, 4, 2, 3).contiguous()
+                # print(feat_fuse.shape)  # torch.Size([1, N, 64, 200, 200])
+                batch_updated_features[batch_i, :N, :, :, :] = feat_fuse.squeeze(0)
+        
+        return batch_updated_features, None
+
+
+if __name__=="__main__":
+    from icecream import ic
+    x = torch.rand((64,6,8))  # [C,H,W]
+    temperature = 10000
+    num_pos_feats = x.shape[-3] // 2  # [d]
+
+    mask = torch.zeros([x.shape[-2], x.shape[-1]], dtype=torch.bool, device=x.device)  #[H, W]
+    not_mask = ~mask
+    y_embed = not_mask.cumsum(0, dtype=torch.float32)  # [H, W]
+    x_embed = not_mask.cumsum(1, dtype=torch.float32)  # [H, W]
+
+    dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=x.device)  # [0,1,2,...,d]
+    dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats)  # 10000^(2k/d), k is [0,0,1,1,...,d/2,d/2]
+
+    pos_x = x_embed[:, :, None] / dim_t
+    pos_y = y_embed[:, :, None] / dim_t
+
+    pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2)
+    pos_y = torch.stack((pos_y[:, :, 0::2].sin(), pos_y[:, :, 1::2].cos()), dim=3).flatten(2)
+    pos = torch.cat((pos_y, pos_x), dim=2).permute(2, 0, 1)
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/transformer_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/transformer_fuse.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf915d078837ba4614472be541b07809426543d4
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/transformer_fuse.py
@@ -0,0 +1,219 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+"""
+Implementation of transformer encoder fusion.
+"""
+
+import torch
+import torch.nn as nn
+
+from opencood.models.sub_modules.torch_transformation_utils import \
+    get_discretized_transformation_matrix, get_transformation_matrix, \
+    warp_affine_simple, get_rotated_roi
+import torch.nn.functional as F
+from icecream import ic
+from matplotlib import pyplot as plt
+
+# class MultiheadAttBlock(nn.Module):
+#     def __init__(self, channels, n_head=8, dropout=0):
+#         super(MultiheadAttBlock, self).__init__()
+#         self.attn = nn.MultiheadAttention(channels, n_head, dropout)
+
+#     def forward(self, q, k, v):
+#         """
+#         order (seq, batch, feature)
+#         Args:
+#             q: (1, H*W, C)
+#             k: (N, H*W, C)
+#             v: (N, H*W, C)
+#         Returns:
+#             outputs: ()
+#         """
+#         context, weight = self.attn(q,k,v) # (1, H*W, C)
+
+#         return context
+
+
+class EncodeLayer(nn.Module):
+    def __init__(self, channels, n_head=8, dropout=0):
+        super(EncodeLayer, self).__init__()
+        self.attn = nn.MultiheadAttention(channels, n_head, dropout)
+        self.linear1 = nn.Linear(channels, channels)
+        self.linear2 = nn.Linear(channels, channels)
+
+        self.norm1 = nn.LayerNorm(channels)
+        self.norm2 = nn.LayerNorm(channels)
+
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+
+        self.relu = nn.ReLU()
+
+    def forward(self, q, k, v):
+        """
+        order (seq, batch, feature)
+        Args:
+            q: (1, H*W, C)
+            k: (N, H*W, C)
+            v: (N, H*W, C)
+        Returns:
+            outputs: ()
+        """
+        residual = q
+        context, weight = self.attn(q,k,v) # (1, H*W, C)
+        context = self.dropout1(context)
+        output1 = self.norm1(residual + context)
+
+        # feed forward net
+        residual = output1 # (1, H*W, C)
+        context = self.linear2(self.relu(self.linear1(output1)))
+        context = self.dropout2(context)
+        output2 = self.norm2(residual + context)
+
+        return output2
+
+
+
+
+
+class TransformerFusion(nn.Module):
+    def __init__(self, args):
+        super(TransformerFusion, self).__init__()
+        
+        self.channels = args['in_channels']
+        self.n_head = args['n_head']
+        self.dropout = args['dropout_rate']
+
+        self.discrete_ratio = args['voxel_size'][0]
+        self.downsample_rate = args['downsample_rate']
+
+        self.encode_layer = EncodeLayer(self.channels, self.n_head, self.dropout)
+    
+    def add_pe_map(self, x, normalized=True):
+        # scale = 2 * math.pi
+        temperature = 10000
+        num_pos_feats = x.shape[-3] // 2  # positional encoding dimension. C = 2d
+
+        mask = torch.zeros([x.shape[-2], x.shape[-1]], dtype=torch.bool, device=x.device)  #[H, W]
+        not_mask = ~mask
+        y_embed = not_mask.cumsum(0, dtype=torch.float32)  # [H, W]
+        x_embed = not_mask.cumsum(1, dtype=torch.float32)  # [H, W]
+
+        dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=x.device)  # [0,1,2,...,d]
+        dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats)  # 10000^(2k/d), k is [0,0,1,1,...,d/2,d/2]
+
+        pos_x = x_embed[:, :, None] / dim_t
+        pos_y = y_embed[:, :, None] / dim_t
+        pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2)
+        pos_y = torch.stack((pos_y[:, :, 0::2].sin(), pos_y[:, :, 1::2].cos()), dim=3).flatten(2)
+        pos = torch.cat((pos_y, pos_x), dim=2).permute(2, 0, 1)  # [C, H, W]
+
+        if len(x.shape) == 4:
+            x = x + pos[None,:,:,:]
+        elif len(x.shape) == 5:
+            x = x + pos[None,None,:,:,:]
+        return x
+
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, x, record_len, pairwise_t_matrix):
+        """
+        Fusion forwarding.
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The transformation matrix from each cav to ego, 
+            shape: (B, L, L, 4, 4) 
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        # split x:[(L1, C, H, W), (L2, C, H, W), ...]
+        # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...]
+        split_x = self.regroup(x, record_len)
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+        pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+        pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+        pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+        pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+
+        # (B*L,L,1,H,W)
+        roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x)
+        for b in range(B):
+            N = record_len[b]
+            for i in range(N):
+                one_tensor = torch.ones((L,1,H,W)).to(x)
+                roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W))
+
+        batch_node_features = split_x
+        # iteratively update the features for num_iteration times
+
+        out = []
+        # iterate each batch
+        for b in range(B):
+
+            # number of valid agent
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+            updated_node_features = []
+
+            # update each node i
+            i = 0 # ego
+            # (N,1,H,W)
+            mask = roi_mask[b, i, :N, ...]
+
+            # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective
+            # Notice we put i one the first dim of t_matrix. Different from original.
+            # t_matrix[i,j] = Tji
+            neighbor_feature = warp_affine_simple(batch_node_features[b],
+                                            t_matrix[i, :, :, :],
+                                            (H, W))
+
+            neighbor_feature_flat = neighbor_feature.view(N,C,H*W)  # (N, C, H*W)
+            neighbor_feature_flat_pe = self.add_pe_map(neighbor_feature).view(N,C,H*W)  # (N, C, H*W)
+            
+            query = neighbor_feature_flat_pe[0:1,...].permute(0,2,1)  # (1, H*W, C)
+            key = neighbor_feature_flat_pe.permute(0,2,1)  # (N, H*W, C)
+            value = neighbor_feature_flat.permute(0,2,1)
+
+
+
+            fusion_result = self.encode_layer(query, key, value)  # (1, H*W, C)
+            fusion_result = fusion_result.permute(0,2,1).reshape(1, C, H, W)[0]
+
+            out.append(fusion_result)
+            
+        out = torch.stack(out)
+        
+        return out
+
+
+
+
+
+
+
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/v2v_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/v2v_fuse.py
new file mode 100644
index 0000000000000000000000000000000000000000..689631882edf6489126ba363f4435729af616d88
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/v2v_fuse.py
@@ -0,0 +1,179 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+"""
+Implementation of V2VNet Fusion
+"""
+
+from email import message_from_binary_file
+import torch
+import torch.nn as nn
+
+from opencood.models.sub_modules.torch_transformation_utils import \
+    get_discretized_transformation_matrix, get_transformation_matrix, \
+    warp_affine_simple, get_rotated_roi
+from opencood.models.sub_modules.convgru import ConvGRU
+from icecream import ic
+from matplotlib import pyplot as plt
+from icecream import ic
+
+class V2VNetFusion(nn.Module):
+    def __init__(self, args):
+        super(V2VNetFusion, self).__init__()
+        
+        in_channels = args['in_channels']
+        H, W = args['conv_gru']['H'], args['conv_gru']['W'] # remember to modify for v2xsim dataset
+        kernel_size = args['conv_gru']['kernel_size']
+        num_gru_layers = args['conv_gru']['num_layers']
+
+        self.discrete_ratio = args['voxel_size'][0]  
+        self.downsample_rate = args['downsample_rate']  
+        self.num_iteration = args['num_iteration']
+        self.gru_flag = args['gru_flag']
+        self.agg_operator = args['agg_operator']
+
+        self.msg_cnn = nn.Conv2d(in_channels * 2, in_channels, kernel_size=3,
+                                 stride=1, padding=1)
+        self.conv_gru = ConvGRU(input_size=(H, W),
+                                input_dim=in_channels * 2,
+                                hidden_dim=[in_channels] * num_gru_layers,
+                                kernel_size=kernel_size,
+                                num_layers=num_gru_layers,
+                                batch_first=True,
+                                bias=True,
+                                return_all_layers=False)
+        self.mlp = nn.Linear(in_channels, in_channels)
+
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, x, record_len, pairwise_t_matrix, weight=None):
+        """
+        Fusion forwarding.
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The transformation matrix from each cav to ego, 
+            shape: (B, L, L, 4, 4) 
+        
+        weight: torch.Tensor
+            Weight of aggregating coming message
+            shape: (B, L, L)
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        # split x:[(L1, C, H, W), (L2, C, H, W), ...]
+        # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...]
+        split_x = self.regroup(x, record_len)
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+        pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+        pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+        pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+        pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+
+        # (B*L,L,1,H,W)
+        roi_mask = torch.zeros((B, L, L, 1, H, W)).to(x)
+        for b in range(B):
+            N = record_len[b]
+            for i in range(N):
+                one_tensor = torch.ones((L,1,H,W)).to(x)
+                roi_mask[b,i] = warp_affine_simple(one_tensor, pairwise_t_matrix[b][i, :, :, :],(H, W))
+
+        batch_node_features = split_x
+        # iteratively update the features for num_iteration times
+        for l in range(self.num_iteration):
+
+            batch_updated_node_features = []
+            # iterate each batch
+            for b in range(B):
+
+                # number of valid agent
+                N = record_len[b]
+                # (N,N,4,4)
+                # t_matrix[i, j]-> from i to j
+                t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+                updated_node_features = []
+
+                # update each node i
+                for i in range(N):
+                    # (N,1,H,W)
+                    mask = roi_mask[b, i, :N, ...]
+                    # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective
+                    # Notice we put i one the first dim of t_matrix. Different from original.
+                    # t_matrix[i,j] = Tji
+                    neighbor_feature = warp_affine_simple(batch_node_features[b],
+                                                   t_matrix[i, :, :, :],
+                                                   (H, W))
+
+                    # (N,C,H,W)
+                    ego_agent_feature = batch_node_features[b][i].unsqueeze(
+                        0).repeat(N, 1, 1, 1)
+                    #(N,2C,H,W)
+                    neighbor_feature = torch.cat(
+                        [neighbor_feature, ego_agent_feature], dim=1)
+                    # (N,C,H,W)
+                    # message contains all feature map from j to ego i.
+                    message = self.msg_cnn(neighbor_feature) * mask
+
+                    # (C,H,W)
+                    if self.agg_operator=="avg":
+                        agg_feature = torch.mean(message, dim=0)
+                    elif self.agg_operator=="max":
+                        agg_feature = torch.max(message, dim=0)[0]
+                    elif self.agg_operator=='weight':
+                        agg_feature = torch.sum(message * weight[b][i,:N].view(-1,1,1,1), dim=0)
+                    else:
+                        raise ValueError("agg_operator has wrong value")
+                    # (2C, H, W)
+                    cat_feature = torch.cat(
+                        [batch_node_features[b][i, ...], agg_feature], dim=0)
+                    # (C,H,W)
+                    if self.gru_flag:
+                        gru_out = \
+                            self.conv_gru(cat_feature.unsqueeze(0).unsqueeze(0))[
+                                0][
+                                0].squeeze(0).squeeze(0)
+                    else:
+                        gru_out = batch_node_features[b][i, ...] + agg_feature
+                    updated_node_features.append(gru_out.unsqueeze(0))
+                # (N,C,H,W)
+                batch_updated_node_features.append(
+                    torch.cat(updated_node_features, dim=0))
+            batch_node_features = batch_updated_node_features
+        # (B,C,H,W)
+        out = torch.cat(
+            [itm[0, ...].unsqueeze(0) for itm in batch_node_features], dim=0)
+        # (B,C,H,W) -> (B, H, W, C) -> (B,C,H,W)
+        out = self.mlp(out.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)
+
+        return out
+
+
+
+# from matplotlib import pyplot as plt
+# neighbor_feature = neighbor_feature.detach().cpu().numpy()
+# for j in range(N):
+#     plt.imshow(neighbor_feature[j].max(axis=0))
+#     plt.savefig(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/v2x_fuse_{j}")
+#     plt.clf()
+# raise
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/v2xvit_fuse[not_use].py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/v2xvit_fuse[not_use].py
new file mode 100644
index 0000000000000000000000000000000000000000..ba3cb0b3f3fdab10af925c6d321db6b8c3356ae7
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/v2xvit_fuse[not_use].py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+import torch
+import torch.nn as nn
+from opencood.models.fuse_modules.fuse_utils import regroup
+from opencood.models.sub_modules.v2xvit_basic import V2XTransformer
+from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple
+
+
+class V2XViTFusion(nn.Module):
+    def __init__(self, args):
+        super(V2XViTFusion, self).__init__()
+        self.discrete_ratio = args['voxel_size'][0]  # voxel_size[0]=0.4    
+        self.downsample_rate = args['downsample_rate']  # 2/4, downsample rate from original feature map [200, 704]
+        self.fusion_net = V2XTransformer(args['transformer'])
+
+    def forward(self, x, record_len, pairwise_t_matrix):
+        """
+        Fusion forwarding.
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The transformation matrix from each cav to ego, 
+            shape: (B, L, L, 4, 4) 
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        regroup_feature, mask = regroup(x, record_len, L)
+        prior_encoding = \
+            torch.zeros(len(record_len), L, 3, 1, 1).to(record_len.device)
+        
+        # prior encoding added
+        prior_encoding = prior_encoding.repeat(1, 1, 1,
+                                               regroup_feature.shape[3],
+                                               regroup_feature.shape[4])
+
+        regroup_feature = torch.cat([regroup_feature, prior_encoding], dim=2)
+        regroup_feature_new = []
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+        pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+        pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+        pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+        pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+        for b in range(B):
+            # (B,L,L,2,3)
+            ego = 0
+            regroup_feature_new.append(warp_affine_simple(regroup_feature[b], pairwise_t_matrix[b, ego], (H, W)))
+        regroup_feature = torch.stack(regroup_feature_new)
+
+        # b l c h w -> b l h w c
+        regroup_feature = regroup_feature.permute(0, 1, 3, 4, 2)
+        # transformer fusion
+        spatial_correction_matrix = torch.eye(4).expand(len(record_len), L, 4, 4).to(record_len.device)
+        fused_feature = self.fusion_net(regroup_feature, mask, spatial_correction_matrix)
+        # b h w c -> b c h w
+        fused_feature = fused_feature.permute(0, 3, 1, 2)
+        
+        return fused_feature
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/when2com_fuse.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/when2com_fuse.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a3ffe71fcc902d9547b09eaa6833e35919980c0
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/when2com_fuse.py
@@ -0,0 +1,363 @@
+# -*- coding: utf-8 -*-
+# Author: Yue Hu <18671129361@sjtu.edu.cn>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+"""
+Implementation of When2com Fusion
+"""
+
+import torch
+import torch.nn as nn
+import numpy as np
+
+from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple
+
+
+class When2comFusion(nn.Module):
+    def __init__(self, args):
+        super(When2comFusion, self).__init__()
+
+        self.discrete_ratio = args['voxel_size'][0]  
+        self.downsample_rate = args['downsample_rate']  
+
+        self.in_channels = args['in_channels']
+        self.feat_H = args['H']
+        self.feat_W = args['W']
+        self.query_size = args['query_size']
+        self.key_size = args['key_size']
+        self.mode = args['mode']
+        self.agent_num = 2
+
+        self.query_key_net = policy_net4(self.in_channels)
+        self.key_net = km_generator(out_size=self.key_size, input_feat_h=self.feat_H//4, input_feat_w=self.feat_W//4)
+        self.query_net = km_generator(out_size=self.query_size, input_feat_h=self.feat_H//4, input_feat_w=self.feat_W//4)
+        self.attention_net = MIMOGeneralDotProductAttention(self.query_size, self.key_size)
+
+    def activated_select(self, val_mat, prob_action, thres=0.2):
+        coef_act = torch.mul(prob_action, (prob_action > thres).float())
+        attn_shape = coef_act.shape
+        bats, key_num, query_num = attn_shape[0], attn_shape[1], attn_shape[2]
+        coef_act_exp = coef_act.view(bats, key_num, query_num, 1, 1, 1)
+
+        output = coef_act_exp * val_mat  # (batch,4,channel,size,size)
+        feat_act = output.sum(1)  # (batch,1,channel,size,size)
+
+        # compute connect
+        count_coef = coef_act.clone()
+        ind = np.diag_indices(self.agent_num)
+        count_coef[:, ind[0], ind[1]] = 0
+        num_connect = torch.nonzero(count_coef).shape[0] / (
+            self.agent_num * count_coef.shape[0]
+        )
+        return feat_act, coef_act, num_connect
+
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, x, record_len, pairwise_t_matrix, weight=None):
+        """
+        Fusion forwarding.
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The transformation matrix from each cav to ego, 
+            shape: (B, L, L, 4, 4) 
+        
+        weight: torch.Tensor
+            Weight of aggregating coming message
+            shape: (B, L, L)
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+        pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+        pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+        pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+        pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+        # split x:[(L1, C, H, W), (L2, C, H, W), ...]
+        # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...]
+        split_x = self.regroup(x, record_len)
+        batch_node_features = split_x
+        updated_node_features = []
+        for b in range(B):
+
+            # number of valid agent
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+
+            # update each node i
+            # (N,1,H,W)
+            # (N,C,H,W) neighbor_feature is agent i's neighborhood warping to agent i's perspective
+            # Notice we put i one the first dim of t_matrix. Different from original.
+            # t_matrix[i,j] = Tji
+            neighbor_feature = warp_affine_simple(batch_node_features[b],
+                                            t_matrix[0, :, :, :],
+                                            (H, W))
+            query_key_maps = self.query_key_net(neighbor_feature)
+            keys = self.key_net(query_key_maps)
+            query = self.query_net(query_key_maps[0].unsqueeze(0))
+
+            query = query.unsqueeze(0)
+            keys = keys.unsqueeze(0)
+            neighbor_feature = neighbor_feature.unsqueeze(1).unsqueeze(0)
+
+            feat_fuse, prob_action = self.attention_net(query, keys, neighbor_feature, sparse=True)
+
+            if self.mode == "activated":
+                feat_fuse, connect_mat, num_connect = self.activated_select(neighbor_feature, prob_action)
+
+            updated_node_features.append(feat_fuse.squeeze(0))
+
+        out = torch.cat(updated_node_features, dim=0)
+        
+        return out
+
+class conv2DBatchNormRelu(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        n_filters,
+        k_size,
+        stride,
+        padding,
+        bias=True,
+        dilation=1,
+        is_batchnorm=True,
+    ):
+        super(conv2DBatchNormRelu, self).__init__()
+
+        conv_mod = nn.Conv2d(
+            int(in_channels),
+            int(n_filters),
+            kernel_size=k_size,
+            padding=padding,
+            stride=stride,
+            bias=bias,
+            dilation=dilation,
+        )
+
+        if is_batchnorm:
+            self.cbr_unit = nn.Sequential(
+                conv_mod, nn.BatchNorm2d(int(n_filters)), nn.ReLU(inplace=True)
+            )
+        else:
+            self.cbr_unit = nn.Sequential(conv_mod, nn.ReLU(inplace=True))
+
+    def forward(self, inputs):
+        outputs = self.cbr_unit(inputs)
+        return outputs
+
+
+class Sparsemax(nn.Module):
+    """Sparsemax function."""
+
+    def __init__(self, dim=None):
+        """Initialize sparsemax activation
+        
+        Args:
+            dim (int, optional): The dimension over which to apply the sparsemax function.
+        """
+        super(Sparsemax, self).__init__()
+
+        self.dim = -1 if dim is None else dim
+
+    def forward(self, input):
+        """Forward function.
+        Args:
+            input (torch.Tensor): Input tensor. First dimension should be the batch size
+        Returns:
+            torch.Tensor: [batch_size x number_of_logits] Output tensor
+        """
+        # Sparsemax currently only handles 2-dim tensors,
+        # so we reshape and reshape back after sparsemax
+        original_size = input.size()
+        input = input.view(-1, input.size(self.dim))
+        
+        dim = 1
+        number_of_logits = input.size(dim)
+
+        # Translate input by max for numerical stability
+        input = input - torch.max(input, dim=dim, keepdim=True)[0].expand_as(input)
+
+        # Sort input in descending order.
+        # (NOTE: Can be replaced with linear time selection method described here:
+        # http://stanford.edu/~jduchi/projects/DuchiShSiCh08.html)
+        zs = torch.sort(input=input, dim=dim, descending=True)[0]
+        range = torch.range(start=1, end=number_of_logits, device=input.device).view(1, -1)
+        range = range.expand_as(zs)
+
+        # Determine sparsity of projection
+        bound = 1 + range * zs
+        cumulative_sum_zs = torch.cumsum(zs, dim)
+        is_gt = torch.gt(bound, cumulative_sum_zs).type(input.type())
+        k = torch.max(is_gt * range, dim, keepdim=True)[0]
+
+        # Compute threshold function
+        zs_sparse = is_gt * zs
+
+        # Compute taus
+        taus = (torch.sum(zs_sparse, dim, keepdim=True) - 1) / k
+        taus = taus.expand_as(input)
+
+        # Sparsemax
+        self.output = torch.max(torch.zeros_like(input), input - taus)
+
+        output = self.output.view(original_size)
+
+        return output
+
+    def backward(self, grad_output):
+        """Backward function."""
+        dim = 1
+
+        nonzeros = torch.ne(self.output, 0)
+        sum = torch.sum(grad_output * nonzeros, dim=dim) / torch.sum(nonzeros, dim=dim)
+        self.grad_input = nonzeros * (grad_output - sum.expand_as(grad_output))
+
+        return self.grad_input
+
+class km_generator(nn.Module):
+    def __init__(self, out_size=128, input_feat_h=25, input_feat_w=63):
+        super(km_generator, self).__init__()
+        # self.n_feat = int(256 * (input_feat_h//4 + 1) * (input_feat_w//4 + 1))
+        self.n_feat = int(256 * input_feat_h * input_feat_w)
+        self.fc = nn.Sequential(
+            nn.Linear(self.n_feat, 256), #            
+            nn.ReLU(inplace=True),
+            nn.Linear(256, 128), #             
+            nn.ReLU(inplace=True),
+            nn.Linear(128, out_size)) #            
+
+    def forward(self, feat_map):
+        outputs = self.fc(feat_map.view(-1, self.n_feat))
+        return outputs
+
+class km_generator_v2(nn.Module):
+    def __init__(self, out_size=128):
+        super(km_generator_v2, self).__init__()
+        # N, C = 256, H, W
+        self.conv1 = conv2DBatchNormRelu(256, 128, k_size=3, stride=2, padding=1)
+        self.avgp = nn.AdaptiveAvgPool2d((5, 7))
+        self.n_feat = int(128*5*7)
+        self.fc = nn.Sequential(
+            nn.Linear(self.n_feat, 256), #            
+            nn.ReLU(inplace=True),
+            nn.Linear(256, 128), #             
+            nn.ReLU(inplace=True),
+            nn.Linear(128, out_size)) #    
+
+    def forward(self, feat_map):
+        feat_map = self.avgp(self.conv1(feat_map))
+        outputs = self.fc(feat_map.view(-1, self.n_feat))
+        return outputs
+
+class policy_net4(nn.Module):
+    def __init__(self, in_channel):
+        super(policy_net4, self).__init__()
+        # Encoder
+        # down 1 
+        self.conv1 = conv2DBatchNormRelu(in_channel, 512, k_size=3, stride=1, padding=1)
+        self.conv2 = conv2DBatchNormRelu(512, 256, k_size=3, stride=1, padding=1)
+        self.conv3 = conv2DBatchNormRelu(256, 256, k_size=3, stride=2, padding=1)
+
+        # down 2
+        self.conv4 = conv2DBatchNormRelu(256, 256, k_size=3, stride=1, padding=1)
+        self.conv5 = conv2DBatchNormRelu(256, 256, k_size=3, stride=2, padding=1)
+
+    def forward(self, x):
+        outputs = self.conv1(x)
+        outputs = self.conv2(outputs)
+        outputs = self.conv3(outputs)
+        outputs = self.conv4(outputs)
+        outputs = self.conv5(outputs)
+        return outputs
+
+class MIMOGeneralDotProductAttention(nn.Module):
+    ''' Scaled Dot-Product Attention '''
+
+    def __init__(self, query_size, key_size, warp_flag=True, attn_dropout=0.1):
+        super().__init__()
+        self.sparsemax = Sparsemax(dim=1)
+        self.softmax = nn.Softmax(dim=1)
+        self.linear = nn.Linear(query_size, key_size)
+        self.warp_flag = warp_flag
+        print('Msg size: ',query_size,'  Key size: ', key_size)
+
+    def forward(self, qu, k, v, sparse=True):
+        # qu (b, q_agents, query_size)
+        # k (b, k_agents, key_size)
+        # v (b, k_agents, q_agents, c, h, w)
+        query = self.linear(qu)  # (b, q_agents, key_size)
+
+        # normalization
+        # query_norm = query.norm(p=2,dim=2).unsqueeze(2).expand_as(query)
+        # query = query.div(query_norm + 1e-9)
+
+        # k_norm = k.norm(p=2,dim=2).unsqueeze(2).expand_as(k)
+        # k = k.div(k_norm + 1e-9)
+        # generate the
+        attn_orig = torch.bmm(k, query.transpose(2, 1))  # (b, k_agents, q_agents)  column: differnt keys and the same query
+
+        # scaling [not sure]
+        # scaling = torch.sqrt(torch.tensor(k.shape[2],dtype=torch.float32)).cuda()
+        # attn_orig = attn_orig/ scaling # (b,5,5)  column: differnt keys and the same query
+
+        attn_orig_softmax = self.softmax(attn_orig)  # (b, k_agents, q_agents)
+        # attn_orig_softmax = self.sparsemax(attn_orig)
+
+        attn_shape = attn_orig_softmax.shape
+        bats, key_num, query_num = attn_shape[0], attn_shape[1], attn_shape[2]
+        attn_orig_softmax_exp = attn_orig_softmax.view(bats, key_num, query_num, 1, 1, 1)
+
+        if self.warp_flag:
+            v_exp = v
+        else:
+            v_exp = torch.unsqueeze(v, 2)
+            v_exp = v_exp.expand(-1, -1, query_num, -1, -1, -1)
+
+        output = attn_orig_softmax_exp * v_exp  # (b, k_agents, q_agents, c, h, w)
+        output_sum = output.sum(1)  # (b, q_agents, c, h, w)
+
+        return output_sum, attn_orig_softmax
+
+
+class AdditiveAttentin(nn.Module):
+    def __init__(self, c_k, c_q):
+        super().__init__()
+        # self.dropout = nn.Dropout(attn_dropout)
+        self.softmax = nn.Softmax(dim=1)
+        self.sparsemax = Sparsemax(dim=1)
+        self.linear_feat = nn.Linear(c_k, 128)
+        self.linear_context = nn.Linear(c_q, 128)
+        self.linear_out = nn.Linear(128, 1)
+
+    def forward(self, q, k, v, sparse=True):
+        temp1 = self.linear_feat(k)  # [b, N, 128]
+        temp2 = self.linear_context(q)  # [b, 1, 128]
+        attn_orig = torch.bmm(temp1, temp2.transpose(2, 1))
+        if sparse:
+            attn_orig = self.sparsemax(attn_orig)  # [b, N, 1]
+        else:
+            attn_orig = self.softmax(attn_orig)  # [b, N, 1]
+        attn = attn_orig.unsqueeze(-1).unsqueeze(-1) # [b, N, 1, 1, 1]
+        output = attn * v # [b, N, C, H, W]
+        output = output.sum(1)  # (b, C, H, W)
+        return output, attn
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/where2comm_attn.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/where2comm_attn.py
new file mode 100644
index 0000000000000000000000000000000000000000..7fcb589c0b872333463f30746e4b5ec5a6f1e060
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fuse_modules/where2comm_attn.py
@@ -0,0 +1,341 @@
+from turtle import update
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple
+from opencood.models.comm_modules.where2comm import Communication
+
+
+class ScaledDotProductAttention(nn.Module):
+    """
+    Scaled Dot-Product Attention proposed in "Attention Is All You Need"
+    Compute the dot products of the query with all keys, divide each by sqrt(dim),
+    and apply a softmax function to obtain the weights on the values
+    Args: dim, mask
+        dim (int): dimention of attention
+        mask (torch.Tensor): tensor containing indices to be masked
+    Inputs: query, key, value, mask
+        - **query** (batch, q_len, d_model): tensor containing projection
+          vector for decoder.
+        - **key** (batch, k_len, d_model): tensor containing projection
+          vector for encoder.
+        - **value** (batch, v_len, d_model): tensor containing features of the
+          encoded input sequence.
+        - **mask** (-): tensor containing indices to be masked
+    Returns: context, attn
+        - **context**: tensor containing the context vector from
+          attention mechanism.
+        - **attn**: tensor containing the attention (alignment) from the
+          encoder outputs.
+    """
+
+    def __init__(self, dim):
+        super(ScaledDotProductAttention, self).__init__()
+        self.sqrt_dim = np.sqrt(dim)
+
+    def forward(self, query, key, value):
+        score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim
+        attn = F.softmax(score, -1)
+        context = torch.bmm(attn, value)
+        return context
+
+class AttenFusion(nn.Module):
+    def __init__(self, feature_dim):
+        super(AttenFusion, self).__init__()
+        self.att = ScaledDotProductAttention(feature_dim)
+
+    def forward(self, x):
+        cav_num, C, H, W = x.shape
+        x = x.view(cav_num, C, -1).permute(2, 0, 1) #  (H*W, cav_num, C), perform self attention on each pixel.
+        x = self.att(x, x, x)
+        x = x.permute(1, 2, 0).view(cav_num, C, H, W)[0]  # C, W, H before
+        return x
+
+class MaxFusion(nn.Module):
+    def __init__(self):
+        super(MaxFusion, self).__init__()
+
+    def forward(self, x):
+        return torch.max(x, dim=0)[0]
+
+
+class EncodeLayer(nn.Module):
+    def __init__(self, channels, n_head=8, dropout=0):
+        super(EncodeLayer, self).__init__()
+        self.attn = nn.MultiheadAttention(channels, n_head, dropout)
+        self.linear1 = nn.Linear(channels, channels)
+        self.linear2 = nn.Linear(channels, channels)
+
+        self.norm1 = nn.LayerNorm(channels)
+        self.norm2 = nn.LayerNorm(channels)
+
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+
+        self.relu = nn.ReLU()
+
+    def forward(self, q, k, v, confidence_map=None):
+        """
+        order (seq, batch, feature)
+        Args:
+            q: (1, H*W, C)
+            k: (N, H*W, C)
+            v: (N, H*W, C)
+        Returns:
+            outputs: ()
+        """
+        residual = q
+        if confidence_map is not None:
+            context, weight = self.attn(q,k,v, quality_map=confidence_map) # (1, H*W, C)
+        else:
+            context, weight = self.attn(q,k,v) # (1, H*W, C)
+        context = self.dropout1(context)
+        output1 = self.norm1(residual + context)
+
+        # feed forward net
+        residual = output1 # (1, H*W, C)
+        context = self.linear2(self.relu(self.linear1(output1)))
+        context = self.dropout2(context)
+        output2 = self.norm2(residual + context)
+
+        return output2
+
+class TransformerFusion(nn.Module):
+    def __init__(self, channels=256, n_head=8, with_spe=True, with_scm=True, dropout=0):
+        super(TransformerFusion, self).__init__()
+
+        self.encode_layer = EncodeLayer(channels, n_head, dropout)
+        self.with_spe = with_spe
+        self.with_scm = with_scm
+        
+    def forward(self, batch_neighbor_feature, batch_neighbor_feature_pe, batch_confidence_map, record_len):
+        x_fuse = []
+        B = len(record_len)
+        for b in range(B):
+            # number of valid agent
+            N = record_len[b]
+            # (N,N,4,4)
+            # t_matrix[i, j]-> from i to j
+            neighbor_feature = batch_neighbor_feature[b]
+            _, C, H, W = neighbor_feature.shape
+            neighbor_feature_flat = neighbor_feature.view(N,C,H*W)  # (N, C, H*W)
+
+            if self.with_spe:
+                neighbor_feature_pe = batch_neighbor_feature_pe[b]
+                neighbor_feature_flat_pe = neighbor_feature_pe.view(N,C,H*W)  # (N, C, H*W)
+                query = neighbor_feature_flat_pe[0:1,...].permute(0,2,1)  # (1, H*W, C)
+                key = neighbor_feature_flat_pe.permute(0,2,1)  # (N, H*W, C)
+            else:
+                query = neighbor_feature_flat[0:1,...].permute(0,2,1)  # (1, H*W, C)
+                key = neighbor_feature_flat.permute(0,2,1)  # (N, H*W, C)
+            
+            value = neighbor_feature_flat.permute(0,2,1)
+
+            if self.with_scm:
+                confidence_map = batch_confidence_map[b]
+                fused_feature = self.encode_layer(query, key, value, confidence_map)  # (1, H*W, C)
+            else:
+                fused_feature = self.encode_layer(query, key, value)  # (1, H*W, C)
+            
+            fused_feature = fused_feature.permute(0,2,1).reshape(1, C, H, W)
+
+            x_fuse.append(fused_feature)
+        x_fuse = torch.concat(x_fuse, dim=0)
+        return x_fuse
+
+def add_pe_map(x):
+    # scale = 2 * math.pi
+    temperature = 10000
+    num_pos_feats = x.shape[-3] // 2  # positional encoding dimension. C = 2d
+
+    mask = torch.zeros([x.shape[-2], x.shape[-1]], dtype=torch.bool, device=x.device)  #[H, W]
+    not_mask = ~mask
+    y_embed = not_mask.cumsum(0, dtype=torch.float32)  # [H, W]
+    x_embed = not_mask.cumsum(1, dtype=torch.float32)  # [H, W]
+
+    dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=x.device)  # [0,1,2,...,d]
+    dim_t = temperature ** (2 * (dim_t // 2) / num_pos_feats)  # 10000^(2k/d), k is [0,0,1,1,...,d/2,d/2]
+
+    pos_x = x_embed[:, :, None] / dim_t
+    pos_y = y_embed[:, :, None] / dim_t
+    pos_x = torch.stack((pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3).flatten(2)
+    pos_y = torch.stack((pos_y[:, :, 0::2].sin(), pos_y[:, :, 1::2].cos()), dim=3).flatten(2)
+    pos = torch.cat((pos_y, pos_x), dim=2).permute(2, 0, 1)  # [C, H, W]
+
+    if len(x.shape) == 4:
+        x_pe = x + pos[None,:,:,:]
+    elif len(x.shape) == 5:
+        x_pe = x + pos[None,None,:,:,:]
+    return x_pe
+
+
+class Where2comm(nn.Module):
+    def __init__(self, args):
+        super(Where2comm, self).__init__()
+
+        self.communication = False
+        self.round = 1
+        if 'communication' in args:
+            self.communication = True
+            self.naive_communication = Communication(args['communication'])
+            if 'round' in args['communication']:
+                self.round = args['communication']['round']
+        self.discrete_ratio = args['voxel_size'][0]  # voxel_size[0]=0.4    
+        self.downsample_rate = args['downsample_rate']  # 2/4, downsample rate from original feature map [200, 704]
+        
+        self.agg_mode = args['agg_operator']['mode']
+        self.multi_scale = args['multi_scale']
+        if self.multi_scale:
+            layer_nums = args['layer_nums']
+            num_filters = args['num_filters']
+            self.num_levels = len(layer_nums)
+            self.fuse_modules = nn.ModuleList()
+            for idx in range(self.num_levels):
+                if self.agg_mode == 'ATTEN':
+                    fuse_network = AttenFusion(num_filters[idx])
+                elif self.agg_mode == 'MAX':
+                    fuse_network = MaxFusion()
+                elif self.agg_mode == 'Transformer':
+                    fuse_network = TransformerFusion(
+                                                channels=num_filters[idx], 
+                                                n_head=args['agg_operator']['n_head'], 
+                                                with_spe=args['agg_operator']['with_spe'], 
+                                                with_scm=args['agg_operator']['with_scm'])
+                self.fuse_modules.append(fuse_network)
+        else:
+            if self.agg_mode == 'ATTEN':
+                self.fuse_modules = AttenFusion(args['agg_operator']['feature_dim'])
+            elif self.agg_mode == 'MAX':
+                self.fuse_modules = MaxFusion()   
+            elif self.agg_mode == 'Transformer':
+                self.fuse_network = TransformerFusion(
+                                            channels=args['agg_operator']['feature_dim'], 
+                                            n_head=args['agg_operator']['n_head'], 
+                                            with_spe=args['agg_operator']['with_spe'], 
+                                            with_scm=args['agg_operator']['with_scm'])     
+
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, x, rm, record_len, pairwise_t_matrix, backbone=None):
+        """
+        Fusion forwarding.
+        
+        Parameters
+        ----------
+        x : torch.Tensor
+            input data, (sum(n_cav), C, H, W)
+            
+        record_len : list
+            shape: (B)
+            
+        pairwise_t_matrix : torch.Tensor
+            The transformation matrix from each cav to ego, 
+            shape: (B, L, L, 4, 4) 
+            
+        Returns
+        -------
+        Fused feature.
+        """
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+        pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+        pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+        pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+        pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+        if self.multi_scale:
+            ups = []
+            # backbone.__dict__()
+            with_resnet = True if hasattr(backbone, 'resnet') else False
+            if with_resnet:
+                feats = backbone.resnet(x)
+            
+            for i in range(self.num_levels):
+                x = feats[i] if with_resnet else backbone.blocks[i](x)
+
+                ############ 1. Communication (Mask the features) #########
+                if i==0:
+                    if self.communication:
+                        batch_confidence_maps = self.regroup(rm, record_len)
+                        _, communication_masks, communication_rates = self.naive_communication(batch_confidence_maps, record_len, pairwise_t_matrix)
+                        x = x * communication_masks
+                    else:
+                        communication_rates = torch.tensor(0).to(x.device)
+                else:
+                    if self.communication:
+                        communication_masks = F.max_pool2d(communication_masks, kernel_size=2)
+                        x = x * communication_masks
+                        
+                ############ 2. Split the confidence map #######################
+                # split x:[(L1, C, H, W), (L2, C, H, W), ...]
+                # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...]
+                batch_node_features = self.regroup(x, record_len)
+                
+                ############ 3. Fusion ####################################
+                x_fuse = []
+                for b in range(B):
+                    # number of valid agent
+                    N = record_len[b]
+                    # (N,N,4,4)
+                    # t_matrix[i, j]-> from i to j
+                    t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+                    node_features = batch_node_features[b]
+                    C, H, W = node_features.shape[1:]
+                    neighbor_feature = warp_affine_simple(node_features,
+                                                    t_matrix[0, :, :, :],
+                                                    (H, W))
+                    x_fuse.append(self.fuse_modules[i](neighbor_feature))
+                x_fuse = torch.stack(x_fuse)
+
+                ############ 4. Deconv ####################################
+                if len(backbone.deblocks) > 0:
+                    ups.append(backbone.deblocks[i](x_fuse))
+                else:
+                    ups.append(x_fuse)
+                
+            if len(ups) > 1:
+                x_fuse = torch.cat(ups, dim=1)
+            elif len(ups) == 1:
+                x_fuse = ups[0]
+            
+            if len(backbone.deblocks) > self.num_levels:
+                x_fuse = backbone.deblocks[-1](x_fuse)
+        else:
+            ############ 1. Split the features #######################
+            # split x:[(L1, C, H, W), (L2, C, H, W), ...]
+            # for example [[2, 256, 50, 176], [1, 256, 50, 176], ...]
+            batch_node_features = self.regroup(x, record_len)
+            batch_confidence_maps = self.regroup(rm, record_len)
+
+            ############ 2. Communication (Mask the features) #########
+            if self.communication:
+                _, communication_masks, communication_rates = self.naive_communication(batch_confidence_maps, record_len, pairwise_t_matrix)
+            else:
+                communication_rates = torch.tensor(0).to(x.device)
+            
+            ############ 3. Fusion ####################################
+            x_fuse = []
+            for b in range(B):
+                # number of valid agent
+                N = record_len[b]
+                # (N,N,4,4)
+                # t_matrix[i, j]-> from i to j
+                t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+                node_features = batch_node_features[b]
+                if self.communication:
+                    node_features = node_features * communication_masks[b]
+                neighbor_feature = warp_affine_simple(node_features,
+                                                t_matrix[0, :, :, :],
+                                                (H, W))
+                x_fuse.append(self.fuse_modules(neighbor_feature))
+            x_fuse = torch.stack(x_fuse)
+        
+        return x_fuse, communication_rates, {}
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fvoxelrcnn.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fvoxelrcnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e9742cd514f9a7f39d3cf8174a9c5cb4645ec53
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/fvoxelrcnn.py
@@ -0,0 +1,82 @@
+import random, os
+
+import torch
+from torch import nn
+import numpy as np
+from icecream import ic
+from opencood.models.sub_modules.mean_vfe import MeanVFE
+from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x
+from opencood.models.sub_modules.height_compression import HeightCompression
+from opencood.models.sub_modules.cia_ssd_utils import SSFA, Head
+from opencood.models.sub_modules.matcher_v2 import MatcherV2
+from opencood.models.sub_modules.voxel_rcnn_head import VoxelRCNNHead
+from opencood.data_utils.post_processor.fpvrcnn_postprocessor import \
+    FpvrcnnPostprocessor
+from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple
+
+class FVoxelRCNN(nn.Module):
+    def __init__(self, args):
+        super(FVoxelRCNN, self).__init__()
+        lidar_range = np.array(args['lidar_range'])
+        grid_size = np.round((lidar_range[3:6] - lidar_range[:3]) /
+                             np.array(args['voxel_size'])).astype(np.int64)
+        self.vfe = MeanVFE(args['mean_vfe'],
+                           args['mean_vfe']['num_point_features'])
+        self.spconv_block = VoxelBackBone8x(args['spconv'],
+                                            input_channels=args['spconv'][
+                                                'num_features_in'],
+                                            grid_size=grid_size)
+        self.map_to_bev = HeightCompression(args['map2bev'])
+        # set experiment to validate the ssfa module
+        self.ssfa = SSFA(args['ssfa'])
+        self.head = Head(**args['head'])
+        self.post_processor = FpvrcnnPostprocessor(args['post_processer'],
+                                                   train=self.training)
+        self.matcher = MatcherV2(args['matcher'], args['lidar_range'])
+        self.roi_head = VoxelRCNNHead(args['roi_head'], self.spconv_block.backbone_channels)
+        self.train_stage2 = args['activate_stage2']
+
+    def forward(self, batch_dict):
+        # lidar
+        voxel_features = batch_dict['processed_lidar']['voxel_features']
+        voxel_coords = batch_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = batch_dict['processed_lidar']['voxel_num_points']
+        # cemera 
+
+        # save memory
+        batch_dict.pop('processed_lidar')
+        batch_dict.update({'voxel_features': voxel_features,
+                           'voxel_coords': voxel_coords,
+                           'voxel_num_points': voxel_num_points,
+                           'batch_size': int(batch_dict['record_len'].sum()),
+                           'proj_first': batch_dict['proj_first'],
+                           'lidar_pose': batch_dict['lidar_pose']})
+
+        batch_dict = self.vfe(batch_dict)
+        batch_dict = self.spconv_block(batch_dict)
+        batch_dict = self.map_to_bev(batch_dict)
+
+        out = self.ssfa(batch_dict['spatial_features'])
+        batch_dict['stage1_out'] = self.head(out) 
+        ### stage 1 ### finished
+
+        data_dict, output_dict = {}, {}
+        data_dict['ego'], output_dict['ego'] = batch_dict, batch_dict
+
+        pred_box3d_list, scores_list = \
+            self.post_processor.post_process(data_dict, output_dict,
+                                             stage1=True)
+  
+        batch_dict['det_boxes'] = pred_box3d_list
+        batch_dict['det_scores'] = scores_list
+
+        if pred_box3d_list is not None and self.train_stage2:
+            batch_dict = self.matcher(batch_dict)
+            batch_dict = self.roi_head(batch_dict)
+        return batch_dict
+
+
+
+if __name__ == "__main__":
+    model = SSFA(None)
+    print(model)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_encoders.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_encoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d7db9bdae228302d4a2c974fb44647aa96a6edf
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_encoders.py
@@ -0,0 +1,301 @@
+# -*- coding: utf-8 -*-
+# Author: Yifan Lu
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import torch
+import torch.nn as nn
+import numpy as np
+from opencood.models.sub_modules.lss_submodule import Up, CamEncode, BevEncode, CamEncode_Resnet101
+from opencood.utils.camera_utils import gen_dx_bx, cumsum_trick, QuickCumsum, depth_discretization
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.mean_vfe import MeanVFE
+from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x
+from opencood.models.sub_modules.height_compression import HeightCompression
+
+
+
+class PointPillar(nn.Module):
+    def __init__(self, args):
+        super(PointPillar, self).__init__()
+        grid_size = (np.array(args['lidar_range'][3:6]) - np.array(args['lidar_range'][0:3])) / \
+                            np.array(args['voxel_size'])
+        grid_size = np.round(grid_size).astype(np.int64)
+        args['point_pillar_scatter']['grid_size'] = grid_size
+
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+
+
+    def forward(self, data_dict, modality_name):
+        voxel_features = data_dict[f'inputs_{modality_name}']['voxel_features']
+        voxel_coords = data_dict[f'inputs_{modality_name}']['voxel_coords']
+        voxel_num_points = data_dict[f'inputs_{modality_name}']['voxel_num_points']
+    
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points}
+
+        batch_dict = self.pillar_vfe(batch_dict)
+        batch_dict = self.scatter(batch_dict)
+        lidar_feature_2d = batch_dict['spatial_features'] # H0, W0
+        return lidar_feature_2d
+
+class SECOND(nn.Module):
+    def __init__(self, args):
+        super(SECOND, self).__init__()
+        lidar_range = np.array(args['lidar_range'])
+        grid_size = np.round((lidar_range[3:6] - lidar_range[:3]) /
+                                np.array(args['voxel_size'])).astype(np.int64)
+        self.vfe = MeanVFE(args['mean_vfe'],
+                            args['mean_vfe']['num_point_features'])
+        self.spconv_block = VoxelBackBone8x(args['spconv'],
+                                            input_channels=args['spconv'][
+                                                'num_features_in'],
+                                            grid_size=grid_size)
+        self.map_to_bev = HeightCompression(args['map2bev'])
+
+    def forward(self, data_dict, modality_name):
+        voxel_features = data_dict[f'inputs_{modality_name}']['voxel_features']
+        voxel_coords = data_dict[f'inputs_{modality_name}']['voxel_coords']
+        voxel_num_points = data_dict[f'inputs_{modality_name}']['voxel_num_points']
+        batch_size = voxel_coords[:,0].max() + 1
+
+
+        batch_dict = {'voxel_features': voxel_features,
+                    'voxel_coords': voxel_coords,
+                    'voxel_num_points': voxel_num_points,
+                    'batch_size': batch_size}
+
+        batch_dict = self.vfe(batch_dict)
+        batch_dict = self.spconv_block(batch_dict)
+        batch_dict = self.map_to_bev(batch_dict)
+        return batch_dict['spatial_features']
+
+class LiftSplatShoot(nn.Module):
+    def __init__(self, args): 
+        super(LiftSplatShoot, self).__init__()
+        self.grid_conf = args['grid_conf']   # 网格配置参数
+        self.data_aug_conf = args['data_aug_conf']   # 数据增强配置参数
+        dx, bx, nx = gen_dx_bx(self.grid_conf['xbound'],
+                                self.grid_conf['ybound'],
+                                self.grid_conf['zbound'],
+                                )  # 划分网格
+
+        self.dx = dx.clone().detach().requires_grad_(False).to(torch.device("cuda"))  # [0.4,0.4,20]
+        self.bx = bx.clone().detach().requires_grad_(False).to(torch.device("cuda"))  # [-49.8,-49.8,0]
+        self.nx = nx.clone().detach().requires_grad_(False).to(torch.device("cuda"))  # [250,250,1]
+        self.depth_supervision = args['depth_supervision']
+        self.downsample = args['img_downsample']  # 下采样倍数
+        self.camC = args['img_features']  # 图像特征维度
+        self.frustum = self.create_frustum().clone().detach().requires_grad_(False).to(torch.device("cuda"))  # frustum: DxfHxfWx3(41x8x16x3)
+        self.use_quickcumsum = True
+        self.D, _, _, _ = self.frustum.shape  # D: 41
+        self.camera_encoder_type = args['camera_encoder']
+        if self.camera_encoder_type == 'EfficientNet':
+            self.camencode = CamEncode(self.D, self.camC, self.downsample, \
+                self.grid_conf['ddiscr'], self.grid_conf['mode'], args['use_depth_gt'], args['depth_supervision'])
+        elif self.camera_encoder_type == 'Resnet101':
+            self.camencode = CamEncode_Resnet101(self.D, self.camC, self.downsample, \
+                self.grid_conf['ddiscr'], self.grid_conf['mode'], args['use_depth_gt'], args['depth_supervision'])
+    
+    def create_frustum(self):
+        # make grid in image plane
+        ogfH, ogfW = self.data_aug_conf['final_dim']  # 原始图片大小  ogfH:128  ogfW:288
+        fH, fW = ogfH // self.downsample, ogfW // self.downsample  # 下采样16倍后图像大小  fH: 12  fW: 22
+        # ds = torch.arange(*self.grid_conf['dbound'], dtype=torch.float).view(-1, 1, 1).expand(-1, fH, fW)  # 在深度方向上划分网格 ds: DxfHxfW(41x12x22)
+        ds = torch.tensor(depth_discretization(*self.grid_conf['ddiscr'], self.grid_conf['mode']), dtype=torch.float).view(-1,1,1).expand(-1, fH, fW)
+
+        D, _, _ = ds.shape # D: 41 表示深度方向上网格的数量
+        xs = torch.linspace(0, ogfW - 1, fW, dtype=torch.float).view(1, 1, fW).expand(D, fH, fW)  # 在0到288上划分18个格子 xs: DxfHxfW(41x12x22)
+        ys = torch.linspace(0, ogfH - 1, fH, dtype=torch.float).view(1, fH, 1).expand(D, fH, fW)  # 在0到127上划分8个格子 ys: DxfHxfW(41x12x22)
+
+        # D x H x W x 3
+        frustum = torch.stack((xs, ys, ds), -1)  # 堆积起来形成网格坐标, frustum[i,j,k,0]就是(i,j)位置，深度为k的像素的宽度方向上的栅格坐标   frustum: DxfHxfWx3
+        return frustum
+
+    def get_geometry(self, rots, trans, intrins, post_rots, post_trans):
+        """Determine the (x,y,z) locations (in the ego frame)
+        of the points in the point cloud.
+        Returns B x N x D x H/downsample x W/downsample x 3
+        """
+        B, N, _ = trans.shape  # B:4(batchsize)    N: 4(相机数目)
+
+        # undo post-transformation
+        # B x N x D x H x W x 3
+        # 抵消数据增强及预处理对像素的变化
+        points = self.frustum - post_trans.view(B, N, 1, 1, 1, 3)
+        points = torch.inverse(post_rots).view(B, N, 1, 1, 1, 3, 3).matmul(points.unsqueeze(-1))
+
+        # cam_to_ego
+        points = torch.cat((points[:, :, :, :, :, :2] * points[:, :, :, :, :, 2:3],  # points[:, :, :, :, :, 2:3] ranges from [4, 45) meters
+                            points[:, :, :, :, :, 2:3]
+                            ), 5)  # 将像素坐标(u,v,d)变成齐次坐标(du,dv,d)
+        # d[u,v,1]^T=intrins*rots^(-1)*([x,y,z]^T-trans)
+        combine = rots.matmul(torch.inverse(intrins))
+        points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1)
+        points += trans.view(B, N, 1, 1, 1, 3)  # 将像素坐标d[u,v,1]^T转换到车体坐标系下的[x,y,z]
+        
+        return points  # B x N x D x H x W x 3 (4 x 4 x 41 x 16 x 22 x 3) 
+
+    def get_cam_feats(self, x):
+        """Return B x N x D x H/downsample x W/downsample x C
+        """
+        B, N, C, imH, imW = x.shape  # B: 4  N: 4  C: 3  imH: 256  imW: 352
+
+        x = x.view(B*N, C, imH, imW)  # B和N两个维度合起来  x: 16 x 4 x 256 x 352
+        depth_items, x = self.camencode(x) # 进行图像编码  x: B*N x C x D x fH x fW(24 x 64 x 41 x 16 x 22)
+        x = x.view(B, N, self.camC, self.D, imH//self.downsample, imW//self.downsample)  #将前两维拆开 x: B x N x C x D x fH x fW(4 x 6 x 64 x 41 x 16 x 22)
+        x = x.permute(0, 1, 3, 4, 5, 2)  # x: B x N x D x fH x fW x C(4 x 6 x 41 x 16 x 22 x 64)
+
+        return x, depth_items
+
+    def voxel_pooling(self, geom_feats, x):
+        # geom_feats: B x N x D x H x W x 3 (4 x 6 x 41 x 16 x 22 x 3), D is discretization in "UD" or "LID"
+        # x: B x N x D x fH x fW x C(4 x 6 x 41 x 16 x 22 x 64), D is num_bins
+
+        B, N, D, H, W, C = x.shape  # B: 4  N: 6  D: 41  H: 16  W: 22  C: 64
+        Nprime = B*N*D*H*W  # Nprime
+
+        # flatten x
+        x = x.reshape(Nprime, C)  # 将图像展平，一共有 B*N*D*H*W 个点
+
+        # flatten indices
+
+        geom_feats = ((geom_feats - (self.bx - self.dx/2.)) / self.dx).long()  # 将[-48,48] [-10 10]的范围平移到 [0, 240), [0, 1) 计算栅格坐标并取整
+        geom_feats = geom_feats.view(Nprime, 3)  # 将像素映射关系同样展平  geom_feats: B*N*D*H*W x 3 
+        batch_ix = torch.cat([torch.full([Nprime//B, 1], ix,
+                             device=x.device, dtype=torch.long) for ix in range(B)])  # 每个点对应于哪个batch
+        geom_feats = torch.cat((geom_feats, batch_ix), 1)  # geom_feats: B*N*D*H*W x 4, geom_feats[:,3]表示batch_id
+
+        # filter out points that are outside box
+        # 过滤掉在边界线之外的点 x:0~240  y: 0~240  z: 0
+        kept = (geom_feats[:, 0] >= 0) & (geom_feats[:, 0] < self.nx[0])\
+            & (geom_feats[:, 1] >= 0) & (geom_feats[:, 1] < self.nx[1])\
+            & (geom_feats[:, 2] >= 0) & (geom_feats[:, 2] < self.nx[2])
+        x = x[kept] 
+        geom_feats = geom_feats[kept]
+
+        # get tensors from the same voxel next to each other
+        ranks = geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B)\
+            + geom_feats[:, 1] * (self.nx[2] * B)\
+            + geom_feats[:, 2] * B\
+            + geom_feats[:, 3]  # 给每一个点一个rank值，rank相等的点在同一个batch，并且在在同一个格子里面
+        sorts = ranks.argsort()
+        x, geom_feats, ranks = x[sorts], geom_feats[sorts], ranks[sorts]  # 按照rank排序，这样rank相近的点就在一起了
+        # x: 168648 x 64  geom_feats: 168648 x 4  ranks: 168648
+
+        # cumsum trick
+        if not self.use_quickcumsum:
+            x, geom_feats = cumsum_trick(x, geom_feats, ranks)
+        else:
+            x, geom_feats = QuickCumsum.apply(x, geom_feats, ranks)  # 一个batch的一个格子里只留一个点 x: 29072 x 64  geom_feats: 29072 x 4
+
+        # griddify (B x C x Z x X x Y)
+        # final = torch.zeros((B, C, self.nx[2], self.nx[0], self.nx[1]), device=x.device)  # final: 4 x 64 x Z x X x Y
+        # final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 0], geom_feats[:, 1]] = x  # 将x按照栅格坐标放到final中
+
+        # modify griddify (B x C x Z x Y x X) by Yifan Lu 2022.10.7
+        # ------> x
+        # |
+        # |
+        # y
+        final = torch.zeros((B, C, self.nx[2], self.nx[1], self.nx[0]), device=x.device)  # final: 4 x 64 x Z x Y x X
+        final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 1], geom_feats[:, 0]] = x  # 将x按照栅格坐标放到final中
+
+        # collapse Z
+        final = torch.cat(final.unbind(dim=2), 1)  # 消除掉z维
+
+        return final  # final: 4 x 64 x 240 x 240  # B, C, H, W
+
+    def get_voxels(self, x, rots, trans, intrins, post_rots, post_trans):
+        geom = self.get_geometry(rots, trans, intrins, post_rots, post_trans)  # 像素坐标到自车中坐标的映射关系 geom: B x N x D x H x W x 3 (4 x N x 42 x 16 x 22 x 3)
+        x_img, depth_items = self.get_cam_feats(x)  # 提取图像特征并预测深度编码 x: B x N x D x fH x fW x C(4 x N x 42 x 16 x 22 x 64)
+        x = self.voxel_pooling(geom, x_img)  # x: 4 x 64 x 240 x 240
+
+        return x, depth_items
+
+    def forward(self, data_dict, modality_name):
+        # x: [4,4,3,256, 352]
+        # rots: [4,4,3,3]
+        # trans: [4,4,3]
+        # intrins: [4,4,3,3]
+        # post_rots: [4,4,3,3]
+        # post_trans: [4,4,3]
+        image_inputs_dict = data_dict[f'inputs_{modality_name}']
+        x, rots, trans, intrins, post_rots, post_trans = \
+            image_inputs_dict['imgs'], image_inputs_dict['rots'], image_inputs_dict['trans'], image_inputs_dict['intrins'], image_inputs_dict['post_rots'], image_inputs_dict['post_trans']
+        x, depth_items = self.get_voxels(x, rots, trans, intrins, post_rots, post_trans)  # 将图像转换到BEV下，x: B x C x 240 x 240 (4 x 64 x 240 x 240)
+        
+        if self.depth_supervision:
+            self.depth_items = depth_items
+
+        return x
+
+
+class LiftSplatShootVoxel(LiftSplatShoot):
+    def voxel_pooling(self, geom_feats, x):
+        # geom_feats: B x N x D x H x W x 3 (4 x 6 x 41 x 16 x 22 x 3), D is discretization in "UD" or "LID"
+        # x: B x N x D x fH x fW x C(4 x 6 x 41 x 16 x 22 x 64), D is num_bins
+
+        B, N, D, H, W, C = x.shape  # B: 4  N: 6  D: 41  H: 16  W: 22  C: 64
+        Nprime = B*N*D*H*W  # Nprime
+
+        # flatten x
+        x = x.reshape(Nprime, C)  # 将图像展平，一共有 B*N*D*H*W 个点
+
+        # flatten indices
+
+        geom_feats = ((geom_feats - (self.bx - self.dx/2.)) / self.dx).long()  # 将[-48,48] [-10 10]的范围平移到 [0, 240), [0, 1) 计算栅格坐标并取整
+        geom_feats = geom_feats.view(Nprime, 3)  # 将像素映射关系同样展平  geom_feats: B*N*D*H*W x 3 
+        batch_ix = torch.cat([torch.full([Nprime//B, 1], ix,
+                             device=x.device, dtype=torch.long) for ix in range(B)])  # 每个点对应于哪个batch
+        geom_feats = torch.cat((geom_feats, batch_ix), 1)  # geom_feats: B*N*D*H*W x 4, geom_feats[:,3]表示batch_id
+
+        # filter out points that are outside box
+        # 过滤掉在边界线之外的点 x:0~240  y: 0~240  z: 0
+        kept = (geom_feats[:, 0] >= 0) & (geom_feats[:, 0] < self.nx[0])\
+            & (geom_feats[:, 1] >= 0) & (geom_feats[:, 1] < self.nx[1])\
+            & (geom_feats[:, 2] >= 0) & (geom_feats[:, 2] < self.nx[2])
+        x = x[kept] 
+        geom_feats = geom_feats[kept]
+
+        # get tensors from the same voxel next to each other
+        ranks = geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B)\
+            + geom_feats[:, 1] * (self.nx[2] * B)\
+            + geom_feats[:, 2] * B\
+            + geom_feats[:, 3]  # 给每一个点一个rank值，rank相等的点在同一个batch，并且在在同一个格子里面
+        sorts = ranks.argsort()
+        x, geom_feats, ranks = x[sorts], geom_feats[sorts], ranks[sorts]  # 按照rank排序，这样rank相近的点就在一起了
+        # x: 168648 x 64  geom_feats: 168648 x 4  ranks: 168648
+
+        # cumsum trick
+        if not self.use_quickcumsum:
+            x, geom_feats = cumsum_trick(x, geom_feats, ranks)
+        else:
+            x, geom_feats = QuickCumsum.apply(x, geom_feats, ranks)  # 一个batch的一个格子里只留一个点 x: 29072 x 64  geom_feats: 29072 x 4
+
+        # griddify (B x C x Z x X x Y)
+        # final = torch.zeros((B, C, self.nx[2], self.nx[0], self.nx[1]), device=x.device)  # final: 4 x 64 x Z x X x Y
+        # final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 0], geom_feats[:, 1]] = x  # 将x按照栅格坐标放到final中
+
+        # modify griddify (B x C x Z x Y x X) by Yifan Lu 2022.10.7
+        # ------> x
+        # |
+        # |
+        # y
+        final = torch.zeros((B, C, self.nx[2], self.nx[1], self.nx[0]), device=x.device)  # final: 4 x 64 x Z x Y x X
+        final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 1], geom_feats[:, 0]] = x  # 将x按照栅格坐标放到final中
+
+        # collapse Z
+        #final = torch.max(final.unbind(dim=2), 1)[0]  # 消除掉z维
+        final = torch.max(final, 2)[0]  # 消除掉z维
+        return final  # final: 4 x 64 x 240 x 240  # B, C, H, W 
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_model_late.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_model_late.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bd48c15ffcd64f3f5657d051947a539414eccaa
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_model_late.py
@@ -0,0 +1,110 @@
+# Author: Yifan Lu <yifan_lu@sjtu.edu.cn>
+# In this heterogeneous version, feature align start before backbone.
+
+import torch
+import torch.nn as nn
+import numpy as np
+from icecream import ic
+import torchvision
+from collections import OrderedDict, Counter
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone 
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+import importlib
+
+class HeterModelLate(nn.Module):
+    def __init__(self, args):
+        super(HeterModelLate, self).__init__()
+        modality_name_list = list(args.keys())
+        modality_name_list = [x for x in modality_name_list if x.startswith("m") and x[1:].isdigit()] 
+        self.modality_name_list = modality_name_list
+        self.cav_range = args['lidar_range']
+        self.sensor_type_dict = OrderedDict()
+
+        # setup each modality model
+        for modality_name in self.modality_name_list:
+            model_setting = args[modality_name]
+            sensor_name = model_setting['sensor_type']
+            self.sensor_type_dict[modality_name] = sensor_name
+
+            # import model
+            encoder_filename = "opencood.models.heter_encoders"
+            encoder_lib = importlib.import_module(encoder_filename)
+            encoder_class = None
+            target_model_name = model_setting['core_method'].replace('_', '')
+
+            for name, cls in encoder_lib.__dict__.items():
+                if name.lower() == target_model_name.lower():
+                    encoder_class = cls
+
+            # build encoder
+            setattr(self, f"encoder_{modality_name}", encoder_class(model_setting['encoder_args']))
+            # depth supervision for camera
+            if model_setting['encoder_args'].get("depth_supervision", False) :
+                setattr(self, f"depth_supervision_{modality_name}", True)
+            else:
+                setattr(self, f"depth_supervision_{modality_name}", False)
+
+            # setup backbone (very light-weight)
+            setattr(self, f"backbone_{modality_name}", ResNetBEVBackbone(model_setting['backbone_args']))
+            if sensor_name == "camera":
+                camera_mask_args = model_setting['camera_mask_args']
+                setattr(self, f"crop_ratio_W_{modality_name}", (self.cav_range[3]) / (camera_mask_args['grid_conf']['xbound'][1]))
+                setattr(self, f"crop_ratio_H_{modality_name}", (self.cav_range[4]) / (camera_mask_args['grid_conf']['ybound'][1]))
+
+            # setup layers (actual backbone)
+            setattr(self, f"layers_{modality_name}", ResNetBEVBackbone(model_setting['layers_args']))
+            setattr(self, f"layers_num_{modality_name}", len(model_setting['layers_args']['num_upsample_filter']))
+
+            # setup shrink head
+            setattr(self, f"shrink_conv_{modality_name}",  DownsampleConv(model_setting['shrink_header']))
+
+            # setup detection head
+            in_head = model_setting['head_args']['in_head']
+            setattr(self, f'cls_head_{modality_name}', nn.Conv2d(in_head, args['anchor_number'], kernel_size=1))
+            setattr(self, f'reg_head_{modality_name}', nn.Conv2d(in_head, args['anchor_number'] * 7, kernel_size=1))
+            setattr(self, f'dir_head_{modality_name}', nn.Conv2d(in_head, args['anchor_number'] *  args['dir_args']['num_bins'], kernel_size=1))
+
+
+    def forward(self, data_dict):
+        output_dict = {}
+        modality_name = [x for x in list(data_dict.keys()) if x.startswith("inputs_")]
+        assert len(modality_name) == 1
+        modality_name = modality_name[0].lstrip('inputs_')
+
+        feature = eval(f"self.encoder_{modality_name}")(data_dict, modality_name)
+        feature = eval(f"self.backbone_{modality_name}")({"spatial_features": feature})['spatial_features_2d']
+
+        if self.sensor_type_dict[modality_name] == "camera":
+            # should be padding. Instead of masking
+            _, _, H, W = feature.shape
+            feature = torchvision.transforms.CenterCrop(
+                    (int(H*eval(f"self.crop_ratio_H_{modality_name}")), int(W*eval(f"self.crop_ratio_W_{modality_name}")))
+                )(feature)
+
+            if eval(f"self.depth_supervision_{modality_name}"):
+                output_dict.update({
+                    f"depth_items_{modality_name}": eval(f"self.encoder_{modality_name}").depth_items
+                })
+
+        # multiscale fusion. 
+        # Here we do not use layer0 of the "self.layers_{modality_name}"
+        # We assume feature from the "self.backbone_{modality_name}" is the first-scale feature
+        feature_list = [feature]
+
+        for i in range(1, eval(f"self.layers_num_{modality_name}")):
+            feature = eval(f"self.layers_{modality_name}").get_layer_i_feature(feature, layer_i=i)
+            feature_list.append(feature)
+
+        feature = eval(f"self.layers_{modality_name}").decode_multiscale_feature(feature_list)
+        
+        feature = eval(f"self.shrink_conv_{modality_name}")(feature)
+
+        cls_preds = eval(f"self.cls_head_{modality_name}")(feature)
+        reg_preds = eval(f"self.reg_head_{modality_name}")(feature)
+        dir_preds = eval(f"self.dir_head_{modality_name}")(feature)
+
+        output_dict.update({'cls_preds': cls_preds,
+                            'reg_preds': reg_preds,
+                            'dir_preds': dir_preds})
+
+        return output_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_model_sharedhead.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_model_sharedhead.py
new file mode 100644
index 0000000000000000000000000000000000000000..62e79e941f5585cb00643d2ac87970f7d7b83607
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/heter_model_sharedhead.py
@@ -0,0 +1,294 @@
+# Author: Yifan Lu <yifan_lu@sjtu.edu.cn>
+# In this heterogeneous version, feature align start before backbone.
+
+import torch
+import torch.nn as nn
+import numpy as np
+from icecream import ic
+from collections import OrderedDict, Counter
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone 
+from opencood.models.sub_modules.feature_alignnet import AlignNet
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion
+from opencood.models.fuse_modules.fusion_in_one import MaxFusion, AttFusion, DiscoFusion, V2VNetFusion, V2XViTFusion, When2commFusion, warp_feature
+from opencood.utils.transformation_utils import normalize_pairwise_tfm
+from opencood.utils.model_utils import check_trainable_module, fix_bn, unfix_bn
+import importlib
+import torchvision
+
+class HeterModelSharedhead(nn.Module):
+    def __init__(self, args):
+        super(HeterModelSharedhead, self).__init__()
+        self.args = args
+        modality_name_list = list(args.keys())
+        modality_name_list = [x for x in modality_name_list if x.startswith("m") and x[1:].isdigit()] 
+        self.modality_name_list = modality_name_list
+
+        self.ego_modality = args['ego_modality']
+        self.stage2_added_modality = args.get('stage2_added_modality', None)
+
+        self.cav_range = args['lidar_range']
+        self.sensor_type_dict = OrderedDict()
+
+        # setup each modality model
+        for modality_name in self.modality_name_list:
+            model_setting = args[modality_name]
+            sensor_name = model_setting['sensor_type']
+            self.sensor_type_dict[modality_name] = sensor_name
+
+            # import model
+            encoder_filename = "opencood.models.heter_encoders"
+            encoder_lib = importlib.import_module(encoder_filename)
+            encoder_class = None
+            target_model_name = model_setting['core_method'].replace('_', '')
+
+            for name, cls in encoder_lib.__dict__.items():
+                if name.lower() == target_model_name.lower():
+                    encoder_class = cls
+
+            """
+            Encoder building
+            """
+            setattr(self, f"encoder_{modality_name}", encoder_class(model_setting['encoder_args']))
+            if model_setting['encoder_args'].get("depth_supervision", False):
+                setattr(self, f"depth_supervision_{modality_name}", True)
+            else:
+                setattr(self, f"depth_supervision_{modality_name}", False)
+
+            """
+            Backbone building 
+            """
+            setattr(self, f"backbone_{modality_name}", ResNetBEVBackbone(model_setting['backbone_args']))
+
+            """
+            Aligner building
+            """
+            setattr(self, f"aligner_{modality_name}", AlignNet(model_setting['aligner_args']))
+            if sensor_name == "camera":
+                camera_mask_args = model_setting['camera_mask_args']
+                setattr(self, f"crop_ratio_W_{modality_name}", (self.cav_range[3]) / (camera_mask_args['grid_conf']['xbound'][1]))
+                setattr(self, f"crop_ratio_H_{modality_name}", (self.cav_range[4]) / (camera_mask_args['grid_conf']['ybound'][1]))
+                setattr(self, f"xdist_{modality_name}", (camera_mask_args['grid_conf']['xbound'][1] - camera_mask_args['grid_conf']['xbound'][0]))
+                setattr(self, f"ydist_{modality_name}", (camera_mask_args['grid_conf']['ybound'][1] - camera_mask_args['grid_conf']['ybound'][0]))
+
+        """For feature transformation"""
+        self.H = (self.cav_range[4] - self.cav_range[1])
+        self.W = (self.cav_range[3] - self.cav_range[0])
+        self.fake_voxel_size = 1
+
+        """
+        single supervision
+        """
+        self.supervise_single = False
+        if args.get("supervise_single", False):
+            self.supervise_single = True
+            in_head_single = args['in_head_single']
+            setattr(self, f'cls_head_single', nn.Conv2d(in_head_single, args['anchor_number'], kernel_size=1))
+            setattr(self, f'reg_head_single', nn.Conv2d(in_head_single, args['anchor_number'] * 7, kernel_size=1))
+            setattr(self, f'dir_head_single', nn.Conv2d(in_head_single, args['anchor_number'] *  args['dir_args']['num_bins'], kernel_size=1))
+
+
+        """
+        Fusion, by default multiscale fusion: 
+        """
+        self.backbone = ResNetBEVBackbone(args['fusion_backbone'])
+        self.fusion_net = nn.ModuleList()
+
+        for i in range(len(args['fusion_backbone']['layer_nums'])):
+            if args['fusion_method'] == "max":
+                self.fusion_net.append(MaxFusion())
+            if args['fusion_method'] == "att":
+                self.fusion_net.append(AttFusion(args['att']['feat_dim'][i]))
+
+
+        """
+        Shrink header
+        """
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+
+        """
+        Shared Heads
+        """
+        self.cls_head = nn.Conv2d(args['in_head'], args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(args['in_head'], 7 * args['anchor_number'],
+                                  kernel_size=1)
+        self.dir_head = nn.Conv2d(args['in_head'], args['dir_args']['num_bins'] * args['anchor_number'],
+                                  kernel_size=1) # BIN_NUM = 2
+        
+        self.model_train_init()
+
+        # check again which module is not fixed.
+        check_trainable_module(self)
+
+    def model_train_init(self):
+        if self.stage2_added_modality is None:
+            return
+        """
+        In stage 2, only ONE modality's aligner is trainable.
+        We first fix all modules, and set the aligner trainable.
+        """
+        # fix all modules
+        self.eval()
+        for p in self.parameters():
+            p.requires_grad_(False)
+        
+        # unfix aligner module
+        for p in eval(f"self.aligner_{self.stage2_added_modality}").parameters():
+            p.requires_grad_(True)
+        eval(f"self.aligner_{self.stage2_added_modality}").apply(unfix_bn)
+
+
+    def forward(self, data_dict):
+        output_dict = {}
+        agent_modality_list = data_dict['agent_modality_list'] 
+        t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], self.H, self.W, self.fake_voxel_size)
+        record_len = data_dict['record_len'] 
+        # print(agent_modality_list)
+
+        modality_count_dict = Counter(agent_modality_list)
+        modality_feature_dict = {}
+
+        for modality_name in self.modality_name_list:
+            if modality_name not in modality_count_dict:
+                continue
+            feature = eval(f"self.encoder_{modality_name}")(data_dict, modality_name)
+            feature = eval(f"self.backbone_{modality_name}")({"spatial_features": feature})['spatial_features_2d']
+            feature = eval(f"self.aligner_{modality_name}")(feature)
+            modality_feature_dict[modality_name] = feature
+
+
+        """
+        Spatial Align
+        """
+        if len(self.ego_modality) == 2 and eval(f"self.aligner_{self.ego_modality}.spatial_align_flag"):
+            """
+            e.g. 
+                self.ego_modality = 'm4'. The length of string is 2.
+                record_len = [2, 3, 3]
+                agent_modality_list = [m4, m1, m4, m4, m1, m4, m1, m1].
+                ego_idx_in_allcav = [0, 2, 5]
+                student_idx_in_allcav = [0, 2, 3, 5]
+                ego_idx_in_student = [0, 1, 3]
+
+                in eval, ego can be non-student. only student ego will perform spatial align.
+            """
+            record_len_list = record_len.detach().cpu().numpy().tolist()
+            ego_idx_in_allcav = [0] + np.cumsum(record_len_list)[:-1].tolist()
+
+            student_idx_in_allcav = [i for i, x in enumerate(agent_modality_list) if x == self.ego_modality]
+            student_ego_idx_in_allcav = [i for i in ego_idx_in_allcav if i in student_idx_in_allcav]
+
+            student_ego_idx_in_student = [student_idx_in_allcav.index(x) for x in student_ego_idx_in_allcav]
+            student_ego_idx_in_ego = [ego_idx_in_allcav.index(x) for x in student_ego_idx_in_allcav]
+            spatial_align_sample =  student_ego_idx_in_ego # within a batch, which samples will perform spatial align? only ego is student.
+
+            if(len(spatial_align_sample)):
+                student_feature = modality_feature_dict[self.ego_modality][student_ego_idx_in_student] # ego in all student modality
+
+                counting_dict = {modality_name: 0 for modality_name in self.modality_name_list}
+                teacher_feature_2d_list = [] # the same shape as 'feature', but replace ego modality feature with all zero.
+
+                ego_aligner = eval(f"self.aligner_{self.ego_modality}")
+
+                for modality_name in agent_modality_list:
+                    feat_idx = counting_dict[modality_name]
+                    agent_feature = modality_feature_dict[modality_name][feat_idx]
+                    if modality_name in ego_aligner.teacher:
+                        teacher_feature_2d_list.append(agent_feature)
+                    else:
+                        teacher_feature_2d_list.append(torch.zeros_like(agent_feature, device=agent_feature.device))
+                    counting_dict[modality_name] += 1
+                
+                # unify the feature shape
+                _, _, H, W = modality_feature_dict[self.ego_modality].shape
+                target_H = int(H*eval(f"self.crop_ratio_H_{self.ego_modality}"))
+                target_W = int(W*eval(f"self.crop_ratio_W_{self.ego_modality}"))
+                crop_func = torchvision.transforms.CenterCrop((target_H, target_W))
+                teacher_feature_2d_list = [crop_func(feat) for feat in teacher_feature_2d_list]
+
+                teacher_feature_full = torch.stack(teacher_feature_2d_list)
+                teacher_feature = MaxFusion()(teacher_feature_full, record_len, t_matrix)
+                teacher_feature = torchvision.transforms.CenterCrop((H, W))(teacher_feature)
+                teacher_feature = teacher_feature[spatial_align_sample]
+                
+                modality_feature_dict[self.ego_modality][student_ego_idx_in_student] = \
+                    ego_aligner.spatail_align(student_feature, teacher_feature,
+                            (eval(f"self.xdist_{self.ego_modality}"), eval(f"self.ydist_{self.ego_modality}")))
+
+        """
+        Crop/Padd camera feature map.
+        """
+        for modality_name in self.modality_name_list:
+            if modality_name in modality_count_dict:
+                if self.sensor_type_dict[modality_name] == "camera":
+                    # should be padding. Instead of masking
+                    feature = modality_feature_dict[modality_name]
+                    _, _, H, W = feature.shape
+                    target_H = int(H*eval(f"self.crop_ratio_H_{modality_name}"))
+                    target_W = int(W*eval(f"self.crop_ratio_W_{modality_name}"))
+
+                    crop_func = torchvision.transforms.CenterCrop((target_H, target_W))
+                    modality_feature_dict[modality_name] = crop_func(feature)
+                    if eval(f"self.depth_supervision_{modality_name}"):
+                        output_dict.update({
+                            f"depth_items_{modality_name}": eval(f"self.encoder_{modality_name}").depth_items
+                        })
+
+        """
+        Assemble heter features
+        """
+        counting_dict = {modality_name:0 for modality_name in self.modality_name_list}
+        heter_feature_2d_list = []
+        for modality_name in agent_modality_list:
+            feat_idx = counting_dict[modality_name]
+            heter_feature_2d_list.append(modality_feature_dict[modality_name][feat_idx])
+            counting_dict[modality_name] += 1
+
+        heter_feature_2d = torch.stack(heter_feature_2d_list)
+
+        """
+        Single supervision
+        """
+        if self.supervise_single:
+            cls_preds_before_fusion = self.cls_head_single(heter_feature_2d)
+            reg_preds_before_fusion = self.reg_head_single(heter_feature_2d)
+            dir_preds_before_fusion = self.dir_head_single(heter_feature_2d)
+            output_dict.update({'cls_preds_single': cls_preds_before_fusion,
+                                'reg_preds_single': reg_preds_before_fusion,
+                                'dir_preds_single': dir_preds_before_fusion})
+
+        """
+        Feature Fusion (multiscale).
+
+        we omit self.backbone's first layer.
+        """
+
+        feature_list = [heter_feature_2d]
+        for i in range(1, len(self.fusion_net)):
+            heter_feature_2d = self.backbone.get_layer_i_feature(heter_feature_2d, layer_i=i)
+            feature_list.append(heter_feature_2d)
+
+        fused_feature_list = []
+        for i, fuse_module in enumerate(self.fusion_net):
+            fused_feature_list.append(fuse_module(feature_list[i], record_len, t_matrix))
+        fused_feature = self.backbone.decode_multiscale_feature(fused_feature_list)
+
+        if self.shrink_flag:
+            fused_feature = self.shrink_conv(fused_feature)
+
+        cls_preds = self.cls_head(fused_feature)
+        reg_preds = self.reg_head(fused_feature)
+        dir_preds = self.dir_head(fused_feature)
+
+        output_dict.update({'cls_preds': cls_preds,
+                            'reg_preds': reg_preds,
+                            'dir_preds': dir_preds})
+
+        return output_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3b4c4ead5541cd70ff4b054e5d27e13e468c405
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot.py
@@ -0,0 +1,220 @@
+"""
+Copyright (C) 2020 NVIDIA Corporation.  All rights reserved.
+Licensed under the NVIDIA Source Code License. See LICENSE at https://github.com/nv-tlabs/lift-splat-shoot.
+Authors: Jonah Philion and Sanja Fidler
+"""
+
+import torch
+from torch import nn
+from efficientnet_pytorch import EfficientNet
+from torchvision.models.resnet import resnet18
+from icecream import ic
+
+from opencood.utils.camera_utils import gen_dx_bx, cumsum_trick, QuickCumsum, depth_discretization
+from opencood.models.sub_modules.lss_submodule import Up, CamEncode, BevEncode, CamEncode_Resnet101
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from matplotlib import pyplot as plt
+
+
+class LiftSplatShoot(nn.Module):
+    def __init__(self, args): 
+        super(LiftSplatShoot, self).__init__()
+        self.grid_conf = args['grid_conf']   # 网格配置参数
+        self.data_aug_conf = args['data_aug_conf']   # 数据增强配置参数
+        self.bevout_feature = args['bevout_feature']
+        dx, bx, nx = gen_dx_bx(self.grid_conf['xbound'],
+                                self.grid_conf['ybound'],
+                                self.grid_conf['zbound'],
+                                )  # 划分网格
+
+        self.dx = dx.clone().detach().requires_grad_(False).to(torch.device("cuda"))  # [0.4,0.4,20]
+        self.bx = bx.clone().detach().requires_grad_(False).to(torch.device("cuda"))  # [-49.8,-49.8,0]
+        self.nx = nx.clone().detach().requires_grad_(False).to(torch.device("cuda"))  # [250,250,1]
+        
+        self.downsample = args['img_downsample']  # 下采样倍数
+        self.camC = args['img_features']  # 图像特征维度
+        self.frustum = self.create_frustum().clone().detach().requires_grad_(False).to(torch.device("cuda"))  # frustum: DxfHxfWx3(41x8x16x3)
+
+        self.D, _, _, _ = self.frustum.shape  # D: 41
+        self.camera_encoder_type = args['camera_encoder']
+        if self.camera_encoder_type == 'EfficientNet':
+            self.camencode = CamEncode(self.D, self.camC, self.downsample, \
+                self.grid_conf['ddiscr'], self.grid_conf['mode'], args['use_depth_gt'], args['depth_supervision'])
+        elif self.camera_encoder_type == 'Resnet101':
+            self.camencode = CamEncode_Resnet101(self.D, self.camC, self.downsample, \
+                self.grid_conf['ddiscr'], self.grid_conf['mode'], args['use_depth_gt'], args['depth_supervision'])
+
+        self.bevencode = BevEncode(inC=self.camC, outC=self.bevout_feature)
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+
+        self.cls_head = nn.Conv2d(self.bevout_feature, args['anchor_number'],
+                                  kernel_size=1)                 
+        self.reg_head = nn.Conv2d(self.bevout_feature, 7 * args['anchor_number'],
+                                  kernel_size=1)
+        if 'dir_args' in args.keys():
+            self.use_dir = True
+            self.dir_head = nn.Conv2d(self.bevout_feature, args['dir_args']['num_bins'] * args['anchor_number'],
+                                  kernel_size=1) # BIN_NUM = 2
+        else:
+            self.use_dir = False
+
+        # toggle using QuickCumsum vs. autograd
+        self.use_quickcumsum = True
+
+        # for p in self.parameters():
+        #     p.requires_grad = False
+        # for p in self.camencode.depth_head.parameters():
+        #     p.requires_grad = True
+        #     print("freeze ",p)
+    
+    def create_frustum(self):
+        # make grid in image plane
+        ogfH, ogfW = self.data_aug_conf['final_dim']  # 原始图片大小  ogfH:128  ogfW:288
+        fH, fW = ogfH // self.downsample, ogfW // self.downsample  # 下采样16倍后图像大小  fH: 12  fW: 22
+        # ds = torch.arange(*self.grid_conf['dbound'], dtype=torch.float).view(-1, 1, 1).expand(-1, fH, fW)  # 在深度方向上划分网格 ds: DxfHxfW(41x12x22)
+        ds = torch.tensor(depth_discretization(*self.grid_conf['ddiscr'], self.grid_conf['mode']), dtype=torch.float).view(-1,1,1).expand(-1, fH, fW)
+
+        D, _, _ = ds.shape # D: 41 表示深度方向上网格的数量
+        xs = torch.linspace(0, ogfW - 1, fW, dtype=torch.float).view(1, 1, fW).expand(D, fH, fW)  # 在0到288上划分18个格子 xs: DxfHxfW(41x12x22)
+        ys = torch.linspace(0, ogfH - 1, fH, dtype=torch.float).view(1, fH, 1).expand(D, fH, fW)  # 在0到127上划分8个格子 ys: DxfHxfW(41x12x22)
+
+        # D x H x W x 3
+        frustum = torch.stack((xs, ys, ds), -1)  # 堆积起来形成网格坐标, frustum[i,j,k,0]就是(i,j)位置，深度为k的像素的宽度方向上的栅格坐标   frustum: DxfHxfWx3
+        return frustum
+
+    def get_geometry(self, rots, trans, intrins, post_rots, post_trans):
+        """Determine the (x,y,z) locations (in the ego frame)
+        of the points in the point cloud.
+        Returns B x N x D x H/downsample x W/downsample x 3
+        """
+        B, N, _ = trans.shape  # B:4(batchsize)    N: 4(相机数目)
+
+        # undo post-transformation
+        # B x N x D x H x W x 3
+        # 抵消数据增强及预处理对像素的变化
+        points = self.frustum - post_trans.view(B, N, 1, 1, 1, 3)
+        points = torch.inverse(post_rots).view(B, N, 1, 1, 1, 3, 3).matmul(points.unsqueeze(-1))
+
+        # cam_to_ego
+        points = torch.cat((points[:, :, :, :, :, :2] * points[:, :, :, :, :, 2:3],  # points[:, :, :, :, :, 2:3] ranges from [4, 45) meters
+                            points[:, :, :, :, :, 2:3]
+                            ), 5)  # 将像素坐标(u,v,d)变成齐次坐标(du,dv,d)
+        # d[u,v,1]^T=intrins*rots^(-1)*([x,y,z]^T-trans)
+        combine = rots.matmul(torch.inverse(intrins))
+        points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1)
+        points += trans.view(B, N, 1, 1, 1, 3)  # 将像素坐标d[u,v,1]^T转换到车体坐标系下的[x,y,z]
+        
+        return points  # B x N x D x H x W x 3 (4 x 4 x 41 x 16 x 22 x 3) 
+
+    def get_cam_feats(self, x):
+        """Return B x N x D x H/downsample x W/downsample x C
+        """
+        B, N, C, imH, imW = x.shape  # B: 4  N: 4  C: 3  imH: 256  imW: 352
+
+        x = x.view(B*N, C, imH, imW)  # B和N两个维度合起来  x: 16 x 4 x 256 x 352
+        depth_items, x = self.camencode(x) # 进行图像编码  x: B*N x C x D x fH x fW(24 x 64 x 41 x 16 x 22)
+        x = x.view(B, N, self.camC, self.D, imH//self.downsample, imW//self.downsample)  #将前两维拆开 x: B x N x C x D x fH x fW(4 x 6 x 64 x 41 x 16 x 22)
+        x = x.permute(0, 1, 3, 4, 5, 2)  # x: B x N x D x fH x fW x C(4 x 6 x 41 x 16 x 22 x 64)
+
+        return x, depth_items
+
+    def voxel_pooling(self, geom_feats, x):
+        # geom_feats: B x N x D x H x W x 3 (4 x 6 x 41 x 16 x 22 x 3), D is discretization in "UD" or "LID"
+        # x: B x N x D x fH x fW x C(4 x 6 x 41 x 16 x 22 x 64), D is num_bins
+
+        B, N, D, H, W, C = x.shape  # B: 4  N: 6  D: 41  H: 16  W: 22  C: 64
+        Nprime = B*N*D*H*W  # Nprime
+
+        # flatten x
+        x = x.reshape(Nprime, C)  # 将图像展平，一共有 B*N*D*H*W 个点
+
+        # flatten indices
+
+        geom_feats = ((geom_feats - (self.bx - self.dx/2.)) / self.dx).long()  # 将[-48,48] [-10 10]的范围平移到 [0, 240), [0, 1) 计算栅格坐标并取整
+        geom_feats = geom_feats.view(Nprime, 3)  # 将像素映射关系同样展平  geom_feats: B*N*D*H*W x 3 
+        batch_ix = torch.cat([torch.full([Nprime//B, 1], ix,
+                             device=x.device, dtype=torch.long) for ix in range(B)])  # 每个点对应于哪个batch
+        geom_feats = torch.cat((geom_feats, batch_ix), 1)  # geom_feats: B*N*D*H*W x 4, geom_feats[:,3]表示batch_id
+
+        # filter out points that are outside box
+        # 过滤掉在边界线之外的点 x:0~240  y: 0~240  z: 0
+        kept = (geom_feats[:, 0] >= 0) & (geom_feats[:, 0] < self.nx[0])\
+            & (geom_feats[:, 1] >= 0) & (geom_feats[:, 1] < self.nx[1])\
+            & (geom_feats[:, 2] >= 0) & (geom_feats[:, 2] < self.nx[2])
+        x = x[kept] 
+        geom_feats = geom_feats[kept]
+
+        # get tensors from the same voxel next to each other
+        ranks = geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B)\
+            + geom_feats[:, 1] * (self.nx[2] * B)\
+            + geom_feats[:, 2] * B\
+            + geom_feats[:, 3]  # 给每一个点一个rank值，rank相等的点在同一个batch，并且在在同一个格子里面
+        sorts = ranks.argsort()
+        x, geom_feats, ranks = x[sorts], geom_feats[sorts], ranks[sorts]  # 按照rank排序，这样rank相近的点就在一起了
+        # x: 168648 x 64  geom_feats: 168648 x 4  ranks: 168648
+
+        # cumsum trick
+        if not self.use_quickcumsum:
+            x, geom_feats = cumsum_trick(x, geom_feats, ranks)
+        else:
+            x, geom_feats = QuickCumsum.apply(x, geom_feats, ranks)  # 一个batch的一个格子里只留一个点 x: 29072 x 64  geom_feats: 29072 x 4
+
+        # griddify (B x C x Z x X x Y)
+        # final = torch.zeros((B, C, self.nx[2], self.nx[0], self.nx[1]), device=x.device)  # final: 4 x 64 x Z x X x Y
+        # final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 0], geom_feats[:, 1]] = x  # 将x按照栅格坐标放到final中
+
+        # modify griddify (B x C x Z x Y x X) by Yifan Lu 2022.10.7
+        # ------> x
+        # |
+        # |
+        # y
+        final = torch.zeros((B, C, self.nx[2], self.nx[1], self.nx[0]), device=x.device)  # final: 4 x 64 x Z x Y x X
+        final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 1], geom_feats[:, 0]] = x  # 将x按照栅格坐标放到final中
+
+        # collapse Z
+        final = torch.cat(final.unbind(dim=2), 1)  # 消除掉z维
+
+        return final  # final: 4 x 64 x 240 x 240  # B, C, H, W
+
+    def get_voxels(self, x, rots, trans, intrins, post_rots, post_trans):
+        geom = self.get_geometry(rots, trans, intrins, post_rots, post_trans)  # 像素坐标到自车中坐标的映射关系 geom: B x N x D x H x W x 3 (4 x N x 42 x 16 x 22 x 3)
+        x_img, depth_items = self.get_cam_feats(x)  # 提取图像特征并预测深度编码 x: B x N x D x fH x fW x C(4 x N x 42 x 16 x 22 x 64)
+        x = self.voxel_pooling(geom, x_img)  # x: 4 x 64 x 240 x 240
+
+        return x, depth_items
+
+    def forward(self, data_dict):
+        # x: [4,4,3,256, 352]
+        # rots: [4,4,3,3]
+        # trans: [4,4,3]
+        # intrins: [4,4,3,3]
+        # post_rots: [4,4,3,3]
+        # post_trans: [4,4,3]
+        image_inputs_dict = data_dict['image_inputs']
+        x, rots, trans, intrins, post_rots, post_trans = \
+            image_inputs_dict['imgs'], image_inputs_dict['rots'], image_inputs_dict['trans'], image_inputs_dict['intrins'], image_inputs_dict['post_rots'], image_inputs_dict['post_trans']
+        x, depth_items = self.get_voxels(x, rots, trans, intrins, post_rots, post_trans)  # 将图像转换到BEV下，x: B x C x 240 x 240 (4 x 64 x 240 x 240)
+
+        x = self.bevencode(x)  # 用resnet18提取特征  x: 4 x C x 240 x 240
+
+        if self.shrink_flag:
+            x = self.shrink_conv(x)
+        # 4 x C x 120 x 120
+        psm = self.cls_head(x)
+        rm = self.reg_head(x)
+        output_dict = {'cls_preds': psm,
+                       'reg_preds': rm,
+                       'depth_items': depth_items}
+
+        if self.use_dir:
+            dm = self.dir_head(x)
+            output_dict.update({"dir_preds": dm})
+
+        return output_dict
+
+
+def compile_model(grid_conf, data_aug_conf, outC):
+    return LiftSplatShoot(grid_conf, data_aug_conf, outC)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot_intermediate.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot_intermediate.py
new file mode 100644
index 0000000000000000000000000000000000000000..44c0ad669a4f4b7191be81d85ad77191ce8b0b98
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot_intermediate.py
@@ -0,0 +1,77 @@
+"""
+Copyright (C) 2020 NVIDIA Corporation.  All rights reserved.
+Licensed under the NVIDIA Source Code License. See LICENSE at https://github.com/nv-tlabs/lift-splat-shoot.
+Authors: Jonah Philion and Sanja Fidler
+
+Intermediate fusion for camera based collaboration
+"""
+
+from numpy import record
+import torch
+from torch import nn
+from efficientnet_pytorch import EfficientNet
+from torchvision.models.resnet import resnet18
+from icecream import ic
+from opencood.models.lift_splat_shoot import LiftSplatShoot
+from opencood.utils.camera_utils import gen_dx_bx, cumsum_trick, QuickCumsum
+from opencood.models.sub_modules.lss_submodule import BevEncodeMSFusion, BevEncodeSSFusion, Up, CamEncode, BevEncode
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from matplotlib import pyplot as plt
+
+
+class LiftSplatShootIntermediate(LiftSplatShoot):
+    def __init__(self, args): 
+        super(LiftSplatShootIntermediate, self).__init__(args)
+
+        fusion_args = args['fusion_args']
+        self.ms = args['fusion_args']['core_method'].endswith("ms")
+        if self.ms:
+            self.bevencode = BevEncodeMSFusion(fusion_args)
+        else:
+            self.bevencode = BevEncodeSSFusion(fusion_args)
+        self.supervise_single = args['supervise_single']
+
+        for p in self.camencode.parameters():
+            p.requires_grad_(False)
+
+        if self.supervise_single:
+            self.cls_head_before_fusion = nn.Conv2d(self.bevout_feature, args['anchor_number'], kernel_size=1)                 
+            self.reg_head_before_fusion = nn.Conv2d(self.bevout_feature, 7 * args['anchor_number'], kernel_size=1)
+            if self.use_dir:
+                self.dir_head_before_fusion = nn.Conv2d(self.bevout_feature, args['dir_args']['num_bins'] * args['anchor_number'], kernel_size=1) # BIN_NUM = 2
+
+    
+    def forward(self, data_dict):
+        return self._forward(data_dict)
+
+    def _forward(self, data_dict):
+        image_inputs_dict = data_dict['image_inputs']
+        record_len = data_dict['record_len']
+        x, rots, trans, intrins, post_rots, post_trans = \
+            image_inputs_dict['imgs'], image_inputs_dict['rots'], image_inputs_dict['trans'], image_inputs_dict['intrins'], image_inputs_dict['post_rots'], image_inputs_dict['post_trans']
+        x, depth_items = self.get_voxels(x, rots, trans, intrins, post_rots, post_trans)  # 将图像转换到BEV下，x: sum(record_len) x C x 240 x 240 (4 x 64 x 240 x 240)
+        pairwise_t_matrix = data_dict['pairwise_t_matrix']
+        x_single, x_fuse = self.bevencode(x, record_len, pairwise_t_matrix)
+        psm = self.cls_head(x_fuse)
+        rm = self.reg_head(x_fuse)
+        output_dict = {'cls_preds': psm,
+                       'reg_preds': rm,
+                       'depth_items': depth_items}
+        if self.use_dir:
+            dm = self.dir_head(x_fuse)
+            output_dict.update({"dir_preds": dm})
+
+        if self.supervise_single:
+            psm_single = self.cls_head_before_fusion(x_single)
+            rm_single = self.reg_head_before_fusion(x_single)
+            output_dict.update({'cls_preds_single': psm_single,
+                                'reg_preds_single': rm_single})
+            if self.use_dir:
+                dm_single = self.dir_head_before_fusion(x_single)
+                output_dict.update({"dir_preds_single": dm_single})
+
+        return output_dict
+
+
+def compile_model(grid_conf, data_aug_conf, outC):
+    return LiftSplatShootIntermediate(grid_conf, data_aug_conf, outC)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot_voxel.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot_voxel.py
new file mode 100644
index 0000000000000000000000000000000000000000..91d750f2991ce3742b1f7432d0f1ad663eb32d30
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/lift_splat_shoot_voxel.py
@@ -0,0 +1,220 @@
+"""
+Copyright (C) 2020 NVIDIA Corporation.  All rights reserved.
+Licensed under the NVIDIA Source Code License. See LICENSE at https://github.com/nv-tlabs/lift-splat-shoot.
+Authors: Jonah Philion and Sanja Fidler
+"""
+
+import torch
+from torch import nn
+from efficientnet_pytorch import EfficientNet
+from torchvision.models.resnet import resnet18
+from icecream import ic
+
+from opencood.utils.camera_utils import gen_dx_bx, cumsum_trick, QuickCumsum, depth_discretization
+from opencood.models.sub_modules.lss_submodule import Up, CamEncode, BevEncode, CamEncode_Resnet101
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from matplotlib import pyplot as plt
+
+
+class LiftSplatShootVoxel(nn.Module):
+    def __init__(self, args): 
+        super(LiftSplatShootVoxel, self).__init__()
+        self.grid_conf = args['grid_conf']   # 网格配置参数
+        self.data_aug_conf = args['data_aug_conf']   # 数据增强配置参数
+        self.bevout_feature = args['bevout_feature']
+        dx, bx, nx = gen_dx_bx(self.grid_conf['xbound'],
+                                              self.grid_conf['ybound'],
+                                              self.grid_conf['zbound'],
+                                              )  # 划分网格
+
+        self.dx = dx.clone().detach().requires_grad_(False).to(torch.device("cuda"))  # [0.4,0.4,20]
+        self.bx = bx.clone().detach().requires_grad_(False).to(torch.device("cuda"))  # [-49.8,-49.8,0]
+        self.nx = nx.clone().detach().requires_grad_(False).to(torch.device("cuda"))  # [250,250,1]
+        
+        self.downsample = args['img_downsample']  # 下采样倍数
+        self.camC = args['img_features']  # 图像特征维度
+        self.frustum = self.create_frustum().clone().detach().requires_grad_(False).to(torch.device("cuda"))  # frustum: DxfHxfWx3(41x8x16x3)
+
+        self.D, _, _, _ = self.frustum.shape  # D: 41
+        self.camera_encoder_type = args['camera_encoder']
+        if self.camera_encoder_type == 'EfficientNet':
+            self.camencode = CamEncode(self.D, self.camC, self.downsample, \
+                self.grid_conf['ddiscr'], self.grid_conf['mode'], args['use_depth_gt'], args['depth_supervision'])
+        elif self.camera_encoder_type == 'Resnet101':
+            self.camencode = CamEncode_Resnet101(self.D, self.camC, self.downsample, \
+                self.grid_conf['ddiscr'], self.grid_conf['mode'], args['use_depth_gt'], args['depth_supervision'])
+
+        self.bevencode = BevEncode(inC=self.camC, outC=self.bevout_feature)
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+
+        self.cls_head = nn.Conv2d(self.bevout_feature, args['anchor_number'],
+                                  kernel_size=1)                 
+        self.reg_head = nn.Conv2d(self.bevout_feature, 7 * args['anchor_number'],
+                                  kernel_size=1)
+        if 'dir_args' in args.keys():
+            self.use_dir = True
+            self.dir_head = nn.Conv2d(self.bevout_feature, args['dir_args']['num_bins'] * args['anchor_number'],
+                                  kernel_size=1) # BIN_NUM = 2
+        else:
+            self.use_dir = False
+
+        # toggle using QuickCumsum vs. autograd
+        self.use_quickcumsum = True
+
+        # for p in self.parameters():
+        #     p.requires_grad = False
+        # for p in self.camencode.depth_head.parameters():
+        #     p.requires_grad = True
+        #     print("freeze ",p)
+    
+    def create_frustum(self):
+        # make grid in image plane
+        ogfH, ogfW = self.data_aug_conf['final_dim']  # 原始图片大小  ogfH:128  ogfW:288
+        fH, fW = ogfH // self.downsample, ogfW // self.downsample  # 下采样16倍后图像大小  fH: 12  fW: 22
+        # ds = torch.arange(*self.grid_conf['dbound'], dtype=torch.float).view(-1, 1, 1).expand(-1, fH, fW)  # 在深度方向上划分网格 ds: DxfHxfW(41x12x22)
+        ds = torch.tensor(depth_discretization(*self.grid_conf['ddiscr'], self.grid_conf['mode']), dtype=torch.float).view(-1,1,1).expand(-1, fH, fW)
+
+        D, _, _ = ds.shape # D: 41 表示深度方向上网格的数量
+        xs = torch.linspace(0, ogfW - 1, fW, dtype=torch.float).view(1, 1, fW).expand(D, fH, fW)  # 在0到288上划分18个格子 xs: DxfHxfW(41x12x22)
+        ys = torch.linspace(0, ogfH - 1, fH, dtype=torch.float).view(1, fH, 1).expand(D, fH, fW)  # 在0到127上划分8个格子 ys: DxfHxfW(41x12x22)
+
+        # D x H x W x 3
+        frustum = torch.stack((xs, ys, ds), -1)  # 堆积起来形成网格坐标, frustum[i,j,k,0]就是(i,j)位置，深度为k的像素的宽度方向上的栅格坐标   frustum: DxfHxfWx3
+        return frustum
+
+    def get_geometry(self, rots, trans, intrins, post_rots, post_trans):
+        """Determine the (x,y,z) locations (in the ego frame)
+        of the points in the point cloud.
+        Returns B x N x D x H/downsample x W/downsample x 3
+        """
+        B, N, _ = trans.shape  # B:4(batchsize)    N: 4(相机数目)
+
+        # undo post-transformation
+        # B x N x D x H x W x 3
+        # 抵消数据增强及预处理对像素的变化
+        points = self.frustum - post_trans.view(B, N, 1, 1, 1, 3)
+        points = torch.inverse(post_rots).view(B, N, 1, 1, 1, 3, 3).matmul(points.unsqueeze(-1))
+
+        # cam_to_ego
+        points = torch.cat((points[:, :, :, :, :, :2] * points[:, :, :, :, :, 2:3],  # points[:, :, :, :, :, 2:3] ranges from [4, 45) meters
+                            points[:, :, :, :, :, 2:3]
+                            ), 5)  # 将像素坐标(u,v,d)变成齐次坐标(du,dv,d)
+        # d[u,v,1]^T=intrins*rots^(-1)*([x,y,z]^T-trans)
+        combine = rots.matmul(torch.inverse(intrins))
+        points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1)
+        points += trans.view(B, N, 1, 1, 1, 3)  # 将像素坐标d[u,v,1]^T转换到车体坐标系下的[x,y,z]
+        
+        return points  # B x N x D x H x W x 3 (4 x 4 x 41 x 16 x 22 x 3) 
+
+    def get_cam_feats(self, x):
+        """Return B x N x D x H/downsample x W/downsample x C
+        """
+        B, N, C, imH, imW = x.shape  # B: 4  N: 4  C: 3  imH: 256  imW: 352
+
+        x = x.view(B*N, C, imH, imW)  # B和N两个维度合起来  x: 16 x 4 x 256 x 352
+        depth_items, x = self.camencode(x) # 进行图像编码  x: B*N x C x D x fH x fW(24 x 64 x 41 x 16 x 22)
+        x = x.view(B, N, self.camC, self.D, imH//self.downsample, imW//self.downsample)  #将前两维拆开 x: B x N x C x D x fH x fW(4 x 6 x 64 x 41 x 16 x 22)
+        x = x.permute(0, 1, 3, 4, 5, 2)  # x: B x N x D x fH x fW x C(4 x 6 x 41 x 16 x 22 x 64)
+
+        return x, depth_items
+
+    def voxel_pooling(self, geom_feats, x):
+        # geom_feats: B x N x D x H x W x 3 (4 x 6 x 41 x 16 x 22 x 3), D is discretization in "UD" or "LID"
+        # x: B x N x D x fH x fW x C(4 x 6 x 41 x 16 x 22 x 64), D is num_bins
+
+        B, N, D, H, W, C = x.shape  # B: 4  N: 6  D: 41  H: 16  W: 22  C: 64
+        Nprime = B*N*D*H*W  # Nprime
+
+        # flatten x
+        x = x.reshape(Nprime, C)  # 将图像展平，一共有 B*N*D*H*W 个点
+
+        # flatten indices
+
+        geom_feats = ((geom_feats - (self.bx - self.dx/2.)) / self.dx).long()  # 将[-48,48] [-10 10]的范围平移到 [0, 240), [0, 1) 计算栅格坐标并取整
+        geom_feats = geom_feats.view(Nprime, 3)  # 将像素映射关系同样展平  geom_feats: B*N*D*H*W x 3 
+        batch_ix = torch.cat([torch.full([Nprime//B, 1], ix,
+                             device=x.device, dtype=torch.long) for ix in range(B)])  # 每个点对应于哪个batch
+        geom_feats = torch.cat((geom_feats, batch_ix), 1)  # geom_feats: B*N*D*H*W x 4, geom_feats[:,3]表示batch_id
+
+        # filter out points that are outside box
+        # 过滤掉在边界线之外的点 x:0~240  y: 0~240  z: 0
+        kept = (geom_feats[:, 0] >= 0) & (geom_feats[:, 0] < self.nx[0])\
+            & (geom_feats[:, 1] >= 0) & (geom_feats[:, 1] < self.nx[1])\
+            & (geom_feats[:, 2] >= 0) & (geom_feats[:, 2] < self.nx[2])
+        x = x[kept] 
+        geom_feats = geom_feats[kept]
+
+        # get tensors from the same voxel next to each other
+        ranks = geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B)\
+            + geom_feats[:, 1] * (self.nx[2] * B)\
+            + geom_feats[:, 2] * B\
+            + geom_feats[:, 3]  # 给每一个点一个rank值，rank相等的点在同一个batch，并且在在同一个格子里面
+        sorts = ranks.argsort()
+        x, geom_feats, ranks = x[sorts], geom_feats[sorts], ranks[sorts]  # 按照rank排序，这样rank相近的点就在一起了
+        # x: 168648 x 64  geom_feats: 168648 x 4  ranks: 168648
+
+        # cumsum trick
+        if not self.use_quickcumsum:
+            x, geom_feats = cumsum_trick(x, geom_feats, ranks)
+        else:
+            x, geom_feats = QuickCumsum.apply(x, geom_feats, ranks)  # 一个batch的一个格子里只留一个点 x: 29072 x 64  geom_feats: 29072 x 4
+
+        # griddify (B x C x Z x X x Y)
+        # final = torch.zeros((B, C, self.nx[2], self.nx[0], self.nx[1]), device=x.device)  # final: 4 x 64 x Z x X x Y
+        # final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 0], geom_feats[:, 1]] = x  # 将x按照栅格坐标放到final中
+
+        # modify griddify (B x C x Z x Y x X) by Yifan Lu 2022.10.7
+        # ------> x
+        # |
+        # |
+        # y
+        final = torch.zeros((B, C, self.nx[2], self.nx[1], self.nx[0]), device=x.device)  # final: 4 x 64 x Z x Y x X
+        final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 1], geom_feats[:, 0]] = x  # 将x按照栅格坐标放到final中
+
+        # collapse Z
+        #final = torch.max(final.unbind(dim=2), 1)[0]  # 消除掉z维
+        final = torch.max(final, 2)[0]  # 消除掉z维
+        return final  # final: 4 x 64 x 240 x 240  # B, C, H, W
+
+    def get_voxels(self, x, rots, trans, intrins, post_rots, post_trans):
+        geom = self.get_geometry(rots, trans, intrins, post_rots, post_trans)  # 像素坐标到自车中坐标的映射关系 geom: B x N x D x H x W x 3 (4 x N x 42 x 16 x 22 x 3)
+        x_img, depth_items = self.get_cam_feats(x)  # 提取图像特征并预测深度编码 x: B x N x D x fH x fW x C(4 x N x 42 x 16 x 22 x 64)
+        x = self.voxel_pooling(geom, x_img)  # x: 4 x 64 x 240 x 240
+
+        return x, depth_items
+
+    def forward(self, data_dict):
+        # x: [4,4,3,256, 352]
+        # rots: [4,4,3,3]
+        # trans: [4,4,3]
+        # intrins: [4,4,3,3]
+        # post_rots: [4,4,3,3]
+        # post_trans: [4,4,3]
+        image_inputs_dict = data_dict['image_inputs']
+        x, rots, trans, intrins, post_rots, post_trans = \
+            image_inputs_dict['imgs'], image_inputs_dict['rots'], image_inputs_dict['trans'], image_inputs_dict['intrins'], image_inputs_dict['post_rots'], image_inputs_dict['post_trans']
+        x, depth_items = self.get_voxels(x, rots, trans, intrins, post_rots, post_trans)  # 将图像转换到BEV下，x: B x C x 240 x 240 (4 x 64 x 240 x 240)
+
+        x = self.bevencode(x)  # 用resnet18提取特征  x: 4 x C x 240 x 240
+
+        if self.shrink_flag:
+            x = self.shrink_conv(x)
+        # 4 x C x 120 x 120
+        psm = self.cls_head(x)
+        rm = self.reg_head(x)
+        output_dict = {'psm': psm,
+                       'rm': rm,
+                       'depth_items': depth_items}
+
+        if self.use_dir:
+            dm = self.dir_head(x)
+            output_dict.update({"dm": dm})
+
+        return output_dict
+
+
+def compile_model(grid_conf, data_aug_conf, outC):
+    return LiftSplatShootVoxel(grid_conf, data_aug_conf, outC)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/pixor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/pixor.py
new file mode 100644
index 0000000000000000000000000000000000000000..e29d7ff60b3aba46d1bc4f073587253105cfe189
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/pixor.py
@@ -0,0 +1,310 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+def conv3x3(in_planes, out_planes, stride=1, bias=False):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=bias)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, in_planes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(in_planes, planes, stride, bias=True)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes, bias=True)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        # out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        # out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, in_planes, planes, stride=1, downsample=None,
+                 use_bn=True):
+        super(Bottleneck, self).__init__()
+        bias = not use_bn
+        self.use_bn = use_bn
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=bias)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=bias)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1,
+                               bias=bias)
+        self.bn3 = nn.BatchNorm2d(self.expansion * planes)
+        self.downsample = downsample
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        """
+        Forward pass of residual block.
+        Parameters
+        ----------
+        x : torch.Tensor
+            Shape (N, C, W, L).
+
+        Returns
+        -------
+        out : torch.Tensor
+            Shape (N, self.expansion*planes, W/stride, L/stride).
+        """
+        residual = x
+        # (N, planes, W, L)
+        out = self.conv1(x)
+        if self.use_bn:
+            out = self.bn1(out)
+        out = self.relu(out)
+        # (N, planes, W/stride, L/stride)
+        out = self.conv2(out)
+        if self.use_bn:
+            out = self.bn2(out)
+        out = self.relu(out)
+        # (N, self.expansion*planes, W/stride, L/stride)
+        out = self.conv3(out)
+        if self.use_bn:
+            out = self.bn3(out)
+
+        if self.downsample is not None:
+            # (N, self.expansion*planes, W/2, L/2)
+            residual = self.downsample(x)
+        out = self.relu(residual + out)
+        return out
+
+
+class BackBone(nn.Module):
+
+    def __init__(self, block, num_block, geom, use_bn=True):
+        super(BackBone, self).__init__()
+
+        self.use_bn = use_bn
+
+        # Block 1
+        self.conv1 = conv3x3(geom["input_shape"][-1], 32)
+        self.conv2 = conv3x3(32, 32)
+        self.bn1 = nn.BatchNorm2d(32)
+        self.bn2 = nn.BatchNorm2d(32)
+        self.relu = nn.ReLU(inplace=True)
+
+        # Block 2-5
+        self.in_planes = 32
+        self.block2 = self._make_layer(block, 24, num_blocks=num_block[0])
+        self.block3 = self._make_layer(block, 48, num_blocks=num_block[1])
+        self.block4 = self._make_layer(block, 64, num_blocks=num_block[2])
+        self.block5 = self._make_layer(block, 96, num_blocks=num_block[3])
+
+        # Lateral layers
+        self.latlayer1 = nn.Conv2d(384, 196, kernel_size=1, stride=1,
+                                   padding=0)
+        self.latlayer2 = nn.Conv2d(256, 128, kernel_size=1, stride=1,
+                                   padding=0)
+        self.latlayer3 = nn.Conv2d(192, 96, kernel_size=1, stride=1, padding=0)
+
+        # Top-down layers
+        self.deconv1 = nn.ConvTranspose2d(196, 128, kernel_size=3, stride=2,
+                                          padding=1, output_padding=1)
+        p = 0 if geom['label_shape'][1] == 175 else 1
+        self.deconv2 = nn.ConvTranspose2d(128, 96, kernel_size=3, stride=2,
+                                          padding=1, output_padding=(1, p))
+
+    def encode(self, x):
+        x = self.conv1(x)
+        if self.use_bn:
+            x = self.bn1(x)
+        x = self.relu(x)
+
+        x = self.conv2(x)
+        if self.use_bn:
+            x = self.bn2(x)
+        c1 = self.relu(x)
+
+        # bottom up layers
+        c2 = self.block2(c1)
+        c3 = self.block3(c2)
+        c4 = self.block4(c3)
+        c5 = self.block5(c4)
+
+        return c3, c4, c5
+
+    def decode(self, c3, c4, c5):
+        l5 = self.latlayer1(c5)
+        l4 = self.latlayer2(c4)
+        p5 = l4 + self.deconv1(l5)
+        l3 = self.latlayer3(c3)
+        p4 = l3 + self.deconv2(p5)
+
+        return p4
+
+    def forward(self, x):
+        c3, c4, c5 = self.encode(x)
+        p4 = self.decode(c3, c4, c5)
+
+        return p4
+
+    def _make_layer(self, block, planes, num_blocks):
+
+        if self.use_bn:
+            # downsample the H*W by 1/2
+            downsample = nn.Sequential(
+                nn.Conv2d(self.in_planes, planes * block.expansion,
+                          kernel_size=1, stride=2, bias=False),
+                nn.BatchNorm2d(planes * block.expansion)
+            )
+        else:
+            downsample = nn.Conv2d(self.in_planes, planes * block.expansion,
+                                   kernel_size=1, stride=2, bias=True)
+
+        layers = [
+            block(self.in_planes, planes, stride=2, downsample=downsample)]
+
+        self.in_planes = planes * block.expansion
+        for i in range(1, num_blocks):
+            layers.append(block(self.in_planes, planes, stride=1))
+            self.in_planes = planes * block.expansion
+        return nn.Sequential(*layers)
+
+    def _upsample_add(self, x, y):
+        """Upsample and add two feature maps.
+        Args:
+          x: (Variable) top feature map to be upsampled.
+          y: (Variable) lateral feature map.
+        Returns:
+          (Variable) added feature map.
+        Note in PyTorch, when input size is odd, the upsampled feature map
+        with `F.upsample(..., scale_factor=2, mode='nearest')`
+        maybe not equal to the lateral feature map size.
+        e.g.
+        original input size: [N,_,15,15] ->
+        conv2d feature map size: [N,_,8,8] ->
+        upsampled feature map size: [N,_,16,16]
+        So we choose bilinear upsample which supports arbitrary output sizes.
+        """
+        _, _, H, W = y.size()
+        return F.upsample(x, size=(H, W), mode='bilinear') + y
+
+
+class Header(nn.Module):
+
+    def __init__(self, use_bn=True):
+        super(Header, self).__init__()
+
+        self.use_bn = use_bn
+        bias = not use_bn
+        self.conv1 = conv3x3(96, 96, bias=bias)
+        self.bn1 = nn.BatchNorm2d(96)
+        self.conv2 = conv3x3(96, 96, bias=bias)
+        self.bn2 = nn.BatchNorm2d(96)
+        self.conv3 = conv3x3(96, 96, bias=bias)
+        self.bn3 = nn.BatchNorm2d(96)
+        self.conv4 = conv3x3(96, 96, bias=bias)
+        self.bn4 = nn.BatchNorm2d(96)
+
+        self.clshead = conv3x3(96, 1, bias=True)
+        self.reghead = conv3x3(96, 6, bias=True)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        if self.use_bn:
+            x = self.bn1(x)
+        x = self.conv2(x)
+        if self.use_bn:
+            x = self.bn2(x)
+        x = self.conv3(x)
+        if self.use_bn:
+            x = self.bn3(x)
+        x = self.conv4(x)
+        if self.use_bn:
+            x = self.bn4(x)
+
+        cls = self.clshead(x)
+        reg = self.reghead(x)
+
+        return cls, reg
+
+
+class PIXOR(nn.Module):
+    """
+    The Pixor backbone. The input of PIXOR nn module is a tensor of
+    [batch_size, height, weight, channel], The output of PIXOR nn module
+    is also a tensor of [batch_size, height/4, weight/4, channel].  Note that
+     we convert the dimensions to [C, H, W] for PyTorch's nn.Conv2d functions
+
+    Parameters
+    ----------
+    args : dict
+        The arguments of the model.
+
+    Attributes
+    ----------
+    backbone : opencood.object
+        The backbone used to extract features.
+    header : opencood.object
+        Header used to predict the classification and coordinates.
+    """
+
+    def __init__(self, args):
+        super(PIXOR, self).__init__()
+        geom = args["geometry_param"]
+        use_bn = args["use_bn"]
+        self.backbone = BackBone(Bottleneck, [3, 6, 6, 3], geom, use_bn)
+        self.header = Header(use_bn)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+        prior = 0.01
+        self.header.clshead.weight.data.fill_(-math.log((1.0 - prior) / prior))
+        self.header.clshead.bias.data.fill_(0)
+        self.header.reghead.weight.data.fill_(0)
+        self.header.reghead.bias.data.fill_(0)
+
+    def forward(self, data_dict):
+        bev_input = data_dict['processed_lidar']["bev_input"]
+
+        features = self.backbone(bev_input)
+        # cls -- (N, 1, W/4, L/4)
+        # reg -- (N, 6, W/4, L/4)
+        cls, reg = self.header(features)
+
+        output_dict = {
+            "cls": cls,
+            "reg": reg
+        }
+
+        return output_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/pixor_intermediate.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/pixor_intermediate.py
new file mode 100644
index 0000000000000000000000000000000000000000..4207049dc25c14387483fabef739520eed5cb0dd
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/pixor_intermediate.py
@@ -0,0 +1,95 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import math
+
+import torch.nn as nn
+
+from opencood.models.fuse_modules.self_attn import AttFusion
+from opencood.models.pixor import Bottleneck, BackBone, Header
+
+
+class BackBoneIntermediate(BackBone):
+
+    def __init__(self, block, num_block, geom, use_bn=True):
+        super(BackBoneIntermediate, self).__init__(block,
+                                                   num_block,
+                                                   geom, use_bn)
+
+        self.fusion_net3 = AttFusion(192)
+        self.fusion_net4 = AttFusion(256)
+        self.fusion_net5 = AttFusion(384)
+
+    def forward(self, x, record_len):
+        # Here c3, c4, c5 includes all cav
+        c3, c4, c5 = self.encode(x)
+
+        # Here c3, c4, c5 only include ego 
+        c5 = self.fusion_net5(c5, record_len)
+        c4 = self.fusion_net4(c4, record_len)
+        c3 = self.fusion_net3(c3, record_len)
+
+        p4 = self.decode(c3, c4, c5)
+        return p4
+
+
+class PIXORIntermediate(nn.Module):
+    """
+    The Pixor backbone. The input of PIXOR nn module is a tensor of
+    [batch_size, height, weight, channel], The output of PIXOR nn module
+    is also a tensor of [batch_size, height/4, weight/4, channel].  Note that
+     we convert the dimensions to [C, H, W] for PyTorch's nn.Conv2d functions
+
+    Parameters
+    ----------
+    args : dict
+        The arguments of the model.
+
+    Attributes
+    ----------
+    backbone : opencood.object
+        The backbone used to extract features.
+    header : opencood.object
+        Header used to predict the classification and coordinates.
+    """
+
+    def __init__(self, args):
+        super(PIXORIntermediate, self).__init__()
+        geom = args["geometry_param"]
+        use_bn = args["use_bn"]
+        self.backbone = BackBoneIntermediate(Bottleneck, [3, 6, 6, 3],
+                                             geom,
+                                             use_bn)
+        self.header = Header(use_bn)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+        prior = 0.01
+        self.header.clshead.weight.data.fill_(-math.log((1.0 - prior) / prior))
+        self.header.clshead.bias.data.fill_(0)
+        self.header.reghead.weight.data.fill_(0)
+        self.header.reghead.bias.data.fill_(0)
+
+    def forward(self, data_dict):
+        bev_input = data_dict['processed_lidar']["bev_input"]
+        record_len = data_dict['record_len']
+
+        features = self.backbone(bev_input, record_len)
+        # cls -- (N, 1, W/4, L/4)
+        # reg -- (N, 6, W/4, L/4)
+        cls, reg = self.header(features)
+
+        output_dict = {
+            "cls": cls,
+            "reg": reg
+        }
+
+        return output_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf1711746400adfb964afa40e8a79b1c4cedc150
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar.py
@@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, OpenPCDet
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import torch
+import torch.nn as nn
+
+
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+
+
+class PointPillar(nn.Module):
+    def __init__(self, args):
+        super(PointPillar, self).__init__()
+
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        is_resnet = args['base_bev_backbone'].get("resnet", False)
+        if is_resnet:
+            self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger
+        else:
+            self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger
+        self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter'])
+
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'], # 384
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 7 * args['anchor_number'], # 384
+                                  kernel_size=1)
+        
+        if 'dir_args' in args.keys():
+            self.use_dir = True
+            self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'],
+                                  kernel_size=1) # BIN_NUM = 2， # 384
+        else:
+            self.use_dir = False
+
+    def forward(self, data_dict):
+
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points}
+
+        batch_dict = self.pillar_vfe(batch_dict)
+        batch_dict = self.scatter(batch_dict)
+        batch_dict = self.backbone(batch_dict)
+
+        spatial_features_2d = batch_dict['spatial_features_2d']
+
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+
+        psm = self.cls_head(spatial_features_2d)
+        rm = self.reg_head(spatial_features_2d)
+
+        output_dict = {'cls_preds': psm,
+                       'reg_preds': rm}
+                       
+        if self.use_dir:
+            dm = self.dir_head(spatial_features_2d)
+            output_dict.update({'dir_preds': dm})
+
+        return output_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_baseline.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_baseline.py
new file mode 100644
index 0000000000000000000000000000000000000000..6fbe99a8bbd43bedd6e2f7322857a0288a5d046a
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_baseline.py
@@ -0,0 +1,138 @@
+# Author: Yifan Lu <yifan_lu@sjtu.edu.cn>
+# a class that integrate multiple simple fusion methods (Single Scale)
+# Support F-Cooper, Self-Att, DiscoNet(wo KD), V2VNet, V2XViT, When2comm
+
+import torch.nn as nn
+from icecream import ic
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion
+from opencood.models.fuse_modules.fusion_in_one import MaxFusion, AttFusion, DiscoFusion, V2VNetFusion, V2XViTFusion, When2commFusion
+from opencood.utils.transformation_utils import normalize_pairwise_tfm
+
+class PointPillarBaseline(nn.Module):
+    """
+    F-Cooper implementation with point pillar backbone.
+    """
+    def __init__(self, args):
+        super(PointPillarBaseline, self).__init__()
+
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        is_resnet = args['base_bev_backbone'].get("resnet", False)
+        if is_resnet:
+            self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger
+        else:
+            self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger
+        self.voxel_size = args['voxel_size']
+
+        if args['fusion_method'] == "max":
+            self.fusion_net = MaxFusion()
+        if args['fusion_method'] == "att":
+            self.fusion_net = AttFusion(args['att']['feat_dim'])
+        if args['fusion_method'] == "disconet":
+            self.fusion_net = DiscoFusion(args['disconet']['feat_dim'])
+        if args['fusion_method'] == "v2vnet":
+            self.fusion_net = V2VNetFusion(args['v2vnet'])
+        if args['fusion_method'] == 'v2xvit':
+            self.fusion_net = V2XViTFusion(args['v2xvit'])
+        if args['fusion_method'] == 'when2comm':
+            self.fusion_net = When2commFusion(args['when2comm'])
+
+        self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter'])
+
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+
+        self.compression = False
+        if "compression" in args:
+            self.compression = True
+            self.naive_compressor = NaiveCompressor(self.out_channel, args['compression'])
+
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 7 * args['anchor_number'],
+                                  kernel_size=1)
+        self.use_dir = False
+        if 'dir_args' in args.keys():
+            self.use_dir = True
+            self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'],
+                                  kernel_size=1) # BIN_NUM = 2
+ 
+        if 'backbone_fix' in args.keys() and args['backbone_fix']:
+            self.backbone_fix()
+
+    def backbone_fix(self):
+        """
+        Fix the parameters of backbone during finetune on timedelay。
+        """
+        for p in self.pillar_vfe.parameters():
+            p.requires_grad = False
+
+        for p in self.scatter.parameters():
+            p.requires_grad = False
+
+        for p in self.backbone.parameters():
+            p.requires_grad = False
+
+        if self.compression:
+            for p in self.naive_compressor.parameters():
+                p.requires_grad = False
+        if self.shrink_flag:
+            for p in self.shrink_conv.parameters():
+                p.requires_grad = False
+
+        for p in self.cls_head.parameters():
+            p.requires_grad = False
+        for p in self.reg_head.parameters():
+            p.requires_grad = False
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len}
+        # n, 4 -> n, c
+        batch_dict = self.pillar_vfe(batch_dict)
+        # n, c -> N, C, H, W
+        batch_dict = self.scatter(batch_dict)
+        # calculate pairwise affine transformation matrix
+        _, _, H0, W0 = batch_dict['spatial_features'].shape # original feature map shape H0, W0
+        t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], H0, W0, self.voxel_size[0])
+        batch_dict = self.backbone(batch_dict)
+
+        spatial_features_2d = batch_dict['spatial_features_2d']
+
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+
+        if self.compression:
+            spatial_features_2d = self.naive_compressor(spatial_features_2d)
+
+        fused_feature = self.fusion_net(spatial_features_2d, record_len, t_matrix)
+
+        psm = self.cls_head(fused_feature)
+        rm = self.reg_head(fused_feature)
+
+        output_dict = {'cls_preds': psm,
+                       'reg_preds': rm}
+
+        if self.use_dir:
+            output_dict.update({'dir_preds': self.dir_head(fused_feature)})
+
+        return output_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_baseline_multiscale.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_baseline_multiscale.py
new file mode 100644
index 0000000000000000000000000000000000000000..232ad55d68f47065c85de5f34031d49eeed25c30
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_baseline_multiscale.py
@@ -0,0 +1,135 @@
+# Author: Yifan Lu <yifan_lu@sjtu.edu.cn>
+# a class that integrate multiple simple fusion methods (Single Scale)
+# Support F-Cooper, Self-Att, DiscoNet(wo KD), V2VNet, V2XViT, When2comm
+
+import torch.nn as nn
+from icecream import ic
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone 
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone 
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion
+from opencood.models.fuse_modules.fusion_in_one import MaxFusion, AttFusion, DiscoFusion, V2VNetFusion, V2XViTFusion, When2commFusion
+from opencood.utils.transformation_utils import normalize_pairwise_tfm
+
+class PointPillarBaselineMultiscale(nn.Module):
+    """
+    F-Cooper implementation with point pillar backbone.
+    """
+    def __init__(self, args):
+        super(PointPillarBaselineMultiscale, self).__init__()
+
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        is_resnet = args['base_bev_backbone'].get("resnet", True) # default true
+        if is_resnet:
+            self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger
+        else:
+            self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger
+        self.voxel_size = args['voxel_size']
+
+        self.fusion_net = nn.ModuleList()
+        for i in range(len(args['base_bev_backbone']['layer_nums'])):
+            if args['fusion_method'] == "max":
+                self.fusion_net.append(MaxFusion())
+            if args['fusion_method'] == "att":
+                self.fusion_net.append(AttFusion(args['att']['feat_dim'][i]))
+        self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter'])
+
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+
+        self.compression = False
+        if "compression" in args:
+            self.compression = True
+            self.naive_compressor = NaiveCompressor(64, args['compression'])
+
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 7 * args['anchor_number'],
+                                  kernel_size=1)
+        self.use_dir = False
+        if 'dir_args' in args.keys():
+            self.use_dir = True
+            self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'],
+                                  kernel_size=1) # BIN_NUM = 2
+ 
+        if 'backbone_fix' in args.keys() and args['backbone_fix']:
+            self.backbone_fix()
+
+    def backbone_fix(self):
+        """
+        Fix the parameters of backbone during finetune on timedelay。
+        """
+        for p in self.pillar_vfe.parameters():
+            p.requires_grad = False
+
+        for p in self.scatter.parameters():
+            p.requires_grad = False
+
+        for p in self.backbone.parameters():
+            p.requires_grad = False
+
+        if self.compression:
+            for p in self.naive_compressor.parameters():
+                p.requires_grad = False
+        if self.shrink_flag:
+            for p in self.shrink_conv.parameters():
+                p.requires_grad = False
+
+        for p in self.cls_head.parameters():
+            p.requires_grad = False
+        for p in self.reg_head.parameters():
+            p.requires_grad = False
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len}
+        # n, 4 -> n, c
+        batch_dict = self.pillar_vfe(batch_dict)
+        # n, c -> N, C, H, W
+        batch_dict = self.scatter(batch_dict)
+        # calculate pairwise affine transformation matrix
+        _, _, H0, W0 = batch_dict['spatial_features'].shape # original feature map shape H0, W0
+        t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], H0, W0, self.voxel_size[0])
+
+        spatial_features = batch_dict['spatial_features']
+
+        if self.compression:
+            spatial_features = self.naive_compressor(spatial_features)
+
+        # multiscale fusion
+        feature_list = self.backbone.get_multiscale_feature(spatial_features)
+        fused_feature_list = []
+        for i, fuse_module in enumerate(self.fusion_net):
+            fused_feature_list.append(fuse_module(feature_list[i], record_len, t_matrix))
+        fused_feature = self.backbone.decode_multiscale_feature(fused_feature_list) 
+
+        if self.shrink_flag:
+            fused_feature = self.shrink_conv(fused_feature)
+
+        psm = self.cls_head(fused_feature)
+        rm = self.reg_head(fused_feature)
+
+        output_dict = {'cls_preds': psm,
+                       'reg_preds': rm}
+
+        if self.use_dir:
+            output_dict.update({'dir_preds': self.dir_head(fused_feature)})
+
+        return output_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_deform_transformer.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_deform_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3a64a770c6c6267ce2bcb9c494bc31fbb9903d2
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_deform_transformer.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>, Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import torch.nn as nn
+import torch
+
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.deformable_transformer_backbone import DeformableTransformerBackbone
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+from opencood.models.point_pillar import PointPillar
+from opencood.utils.transformation_utils import get_pairwise_transformation_torch
+from opencood.utils.model_utils import weight_init
+
+
+class PointPillarDeformTransformer(nn.Module):
+    def __init__(self, args):
+        super(PointPillarDeformTransformer, self).__init__()
+
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        self.backbone = DeformableTransformerBackbone(args['deformable_transfomer_backbone'])
+        # used to downsample the feature map for efficient computation
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+
+        self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'],
+                                  kernel_size=1)
+        if args['backbone_fix']:
+            self.backbone_fix()
+        
+        self.apply(weight_init)
+            
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len']
+        lidar_pose = data_dict['lidar_pose']  # [sum(cav), 6]
+
+        pairwise_t_matrix = data_dict['pairwise_t_matrix']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len,
+                      'pairwise_t_matrix': pairwise_t_matrix,
+                      'lidar_pose': lidar_pose}
+
+
+        
+        # n, 4 -> n, c
+        batch_dict = self.pillar_vfe(batch_dict)
+        # n, c -> N, C, H, W
+        batch_dict = self.scatter(batch_dict)
+
+        batch_dict = self.backbone(batch_dict)
+        # N, C, H', W'. [N, 384, 100, 352]
+        spatial_features_2d = batch_dict['spatial_features_2d']
+
+        # downsample feature to reduce memory
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+
+        psm = self.cls_head(spatial_features_2d)
+        rm = self.reg_head(spatial_features_2d)
+
+
+        output_dict = {'psm': psm,
+                       'rm': rm}
+
+        return output_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_disconet.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_disconet.py
new file mode 100644
index 0000000000000000000000000000000000000000..080ba5f569f12ad237cefbbe3cc18f1fc91f707e
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_disconet.py
@@ -0,0 +1,97 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import torch
+import torch.nn as nn
+
+
+import torch.nn.functional as F
+from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.data_utils.post_processor import UncertaintyVoxelPostprocessor
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.utils.transformation_utils import normalize_pairwise_tfm, regroup
+from opencood.models.fuse_modules.fusion_in_one import DiscoFusion
+
+class PointPillarDiscoNet(nn.Module):
+    def __init__(self, args):
+        super(PointPillarDiscoNet, self).__init__()
+        self.discrete_ratio = args['voxel_size'][0]
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
+        self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter'])
+        self.voxel_size = args['voxel_size']
+        
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+
+        self.fusion_net = DiscoFusion(self.out_channel)
+
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 7 * args['anchor_number'],
+                                  kernel_size=1)
+        self.use_dir = False
+        if 'dir_args' in args.keys():
+            self.use_dir = True
+            self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'],
+                                  kernel_size=1) # BIN_NUM = 2
+
+    def forward(self, data_dict):
+
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+
+        teacher_voxel_features = data_dict['teacher_processed_lidar']['voxel_features']
+        teacher_voxel_coords = data_dict['teacher_processed_lidar']['voxel_coords']
+        teacher_voxel_num_points = data_dict['teacher_processed_lidar']['voxel_num_points']
+
+        record_len = data_dict['record_len']
+        lidar_pose = data_dict['lidar_pose']
+        pairwise_t_matrix = data_dict['pairwise_t_matrix']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len,
+                      'pairwise_t_matrix': pairwise_t_matrix}
+
+
+        batch_dict = self.pillar_vfe(batch_dict)
+        batch_dict = self.scatter(batch_dict)
+
+        _, _, H0, W0 = batch_dict['spatial_features'].shape
+        t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], H0, W0, self.voxel_size[0])
+
+        batch_dict = self.backbone(batch_dict)
+
+
+        spatial_features_2d = batch_dict['spatial_features_2d']
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+
+        spatial_features_2d = self.fusion_net(spatial_features_2d, record_len, t_matrix)
+
+        psm = self.cls_head(spatial_features_2d)
+        rm = self.reg_head(spatial_features_2d)
+
+        output_dict = {'feature': spatial_features_2d,
+                       'cls_preds': psm,
+                       'reg_preds': rm}
+        if self.use_dir:
+            output_dict.update({'dir_preds': self.dir_head(spatial_features_2d)})
+
+        return output_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_disconet_teacher.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_disconet_teacher.py
new file mode 100644
index 0000000000000000000000000000000000000000..323b562a0f97e101cd0affebe2ced9aadbf91055
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_disconet_teacher.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, OpenPCDet
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import torch
+import torch.nn as nn
+
+
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+
+class PointPillarDiscoNetTeacher(nn.Module):
+    def __init__(self, args):
+        super(PointPillarDiscoNetTeacher, self).__init__()
+
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
+
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 7 * args['anchor_number'],
+                                  kernel_size=1)
+
+        if 'dir_args' in args.keys():
+            self.use_dir = True
+            self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'],
+                                  kernel_size=1) # BIN_NUM = 2
+
+    def forward(self, data_dict):
+
+        voxel_features = data_dict['teacher_processed_lidar']['voxel_features']
+        voxel_coords = data_dict['teacher_processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['teacher_processed_lidar']['voxel_num_points']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points}
+        
+        batch_dict = self.pillar_vfe(batch_dict)
+        batch_dict = self.scatter(batch_dict)
+        batch_dict = self.backbone(batch_dict)
+
+        spatial_features_2d = batch_dict['spatial_features_2d']
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+
+        psm = self.cls_head(spatial_features_2d)
+        rm = self.reg_head(spatial_features_2d)
+
+        output_dict = {'teacher_feature': spatial_features_2d,
+                       'teacher_cls_preds': psm,
+                       'teacher_reg_preds': rm}
+        if self.use_dir:
+            output_dict.update({'dir_preds': self.dir_head(spatial_features_2d)})
+
+
+        return output_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_intermediate.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_intermediate.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c09ad6a343145f457ab1714fc50ed2611c3cd8b
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_intermediate.py
@@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import torch
+import torch.nn as nn
+
+
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.att_bev_backbone import AttBEVBackbone
+from opencood.models.sub_modules.dcn_net import DCNNet
+from opencood.utils.transformation_utils import get_pairwise_transformation_torch
+from opencood.data_utils.post_processor import UncertaintyVoxelPostprocessor
+
+
+class PointPillarIntermediate(nn.Module):
+    def __init__(self, args):
+        super(PointPillarIntermediate, self).__init__()
+
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        self.backbone = AttBEVBackbone(args['base_bev_backbone'], 64)
+
+        self.dcn = False
+        if 'dcn' in args:
+            self.dcn = True
+            self.before_backbone = args['dcn']['before_backbone']
+            self.dcn_net = DCNNet(args['dcn'])
+
+
+        self.cls_head = nn.Conv2d(128 * 3, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(128 * 3, 7 * args['anchor_num'],
+                                  kernel_size=1)
+
+    def forward(self, data_dict):
+
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len']
+        lidar_pose = data_dict['lidar_pose']
+        pairwise_t_matrix = data_dict['pairwise_t_matrix']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len,
+                      'pairwise_t_matrix': pairwise_t_matrix}
+            
+
+
+        batch_dict = self.pillar_vfe(batch_dict)
+        batch_dict = self.scatter(batch_dict)
+
+        if self.dcn and self.before_backbone:
+            batch_dict['spatial_features'] = self.dcn_net(batch_dict['spatial_features'])
+
+        batch_dict = self.backbone(batch_dict)
+        spatial_features_2d = batch_dict['spatial_features_2d']
+
+        if self.dcn and not self.before_backbone:
+            spatial_features_2d = self.dcn_net(spatial_features_2d)
+
+        psm = self.cls_head(spatial_features_2d)
+        rm = self.reg_head(spatial_features_2d)
+
+        output_dict = {'psm': psm,
+                       'rm': rm}
+
+        return output_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_mash.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_mash.py
new file mode 100644
index 0000000000000000000000000000000000000000..e095720da60bdb992abdb76d678dd5768d8dd94a
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_mash.py
@@ -0,0 +1,249 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>, Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+from numpy import record
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+from opencood.models.sub_modules.mash_utils import QueryEncoder, KeyEncoder, SmoothingNetwork
+
+from icecream import ic
+
+
+class PointPillarMash(nn.Module):
+    def __init__(self, args):
+        super(PointPillarMash, self).__init__()
+
+        self.max_cav = args['max_cav']
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
+        
+        # used to downsample the feature map for efficient computation
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+        self.compression = False
+
+        if args['compression'] > 0:
+            self.compression = True
+            self.naive_compressor = NaiveCompressor(256, args['compression'])
+
+        self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'],
+                                  kernel_size=1)
+
+        mash_args = args['mash']
+        self.query_encoder = QueryEncoder(mash_args["feature_dim"], mash_args['query_dim'])
+        self.key_encoder = KeyEncoder(mash_args["feature_dim"], mash_args['key_dim'])
+        self.queryKeySim = nn.Conv2d(mash_args['query_dim'],  mash_args['key_dim'], 1, 1)
+        self.smoothing_net = SmoothingNetwork(in_ch=mash_args['H'] * mash_args['W'] + 1)
+        self.H = mash_args['H']
+        self.W = mash_args['W']
+        self.downsample_rate = mash_args['downsample_rate']
+        self.discrete_ratio = args['voxel_size'][0]
+
+        if args['backbone_fix']:
+            self.backbone_fix()
+
+    def backbone_fix(self):
+        """
+        Fix the parameters of backbone during finetune on timedelay。
+        """
+        for p in self.pillar_vfe.parameters():
+            p.requires_grad = False
+
+        for p in self.scatter.parameters():
+            p.requires_grad = False
+
+        for p in self.backbone.parameters():
+            p.requires_grad = False
+
+        if self.compression:
+            for p in self.naive_compressor.parameters():
+                p.requires_grad = False
+        if self.shrink_flag:
+            for p in self.shrink_conv.parameters():
+                p.requires_grad = False
+
+        for p in self.cls_head.parameters():
+            p.requires_grad = False
+        for p in self.reg_head.parameters():
+            p.requires_grad = False
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len']
+
+        pairwise_t_matrix = data_dict['pairwise_t_matrix']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len}
+
+        # n, 4 -> n, c
+        batch_dict = self.pillar_vfe(batch_dict)
+        # n, c -> N, C, H, W
+        batch_dict = self.scatter(batch_dict)
+        batch_dict = self.backbone(batch_dict)
+        # N, C, H', W'. [N, 256, 50, 176]
+        spatial_features_2d = batch_dict['spatial_features_2d']
+
+        # downsample feature to reduce memory
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+        # compressor
+        if self.compression:
+            spatial_features_2d = self.naive_compressor(spatial_features_2d)
+
+        # spatial_features_2d is [sum(cav_num), 256, 50, 176]
+        # output only contains ego
+        # [B, 256, 50, 176]
+
+        B = len(record_len)
+
+        querys = self.queryKeySim(self.query_encoder(spatial_features_2d))
+        keys = self.key_encoder(spatial_features_2d)
+
+        split_query = self.regroup(querys, record_len)
+        split_key = self.regroup(keys, record_len)
+        split_feature = self.regroup(spatial_features_2d, record_len)
+        
+        fuse_features = []
+        estimate_volumes = []
+        for b in range(B):
+            # N, C, H, W
+            feature = split_feature[b]
+            key = split_key[b]
+            query = split_query[b]
+
+            ego = 0
+            fuse_feature = [feature[ego]]
+            N = record_len[b]
+
+            for i in range(1, N):
+                corr_volume = self.computeCorrespondenceVolume(query[ego], key[i])
+                corr_volume_decoded = self.smoothCorrespondenceVolume(corr_volume) # (Hs*Ws+1, Ht, Wt)
+                grid, mask = self.idx2grid(corr_volume_decoded) # (1, H, W, 2)
+                weight = torch.max(corr_volume_decoded, dim=0, keepdim=True)[0]
+                estimate_volumes.append(corr_volume_decoded)
+                
+                warp_feature = F.grid_sample(feature[i].unsqueeze(0), grid).squeeze()
+                warp_feature *= weight
+                warp_feature *= mask
+                fuse_feature.append(warp_feature)
+            
+            # max / sum
+            fuse_features.append(torch.max(torch.stack(fuse_feature), dim = 0)[0])
+        
+        # B,C,H,W
+        out_feature = torch.stack(fuse_features)
+        if estimate_volumes:
+            corr_vol = torch.stack(estimate_volumes)
+        else:
+            corr_vol = None
+
+        psm = self.cls_head(out_feature)
+        rm = self.reg_head(out_feature)
+
+        output_dict = {'psm': psm,
+                       'rm': rm,
+                       'corr_vol': corr_vol}
+
+        return output_dict
+    
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def computeCorrespondenceVolume(self, featA, featB):
+        """compute the similarity volume
+
+        Args:
+            featA: [C, H, W], the query vectors of target agent 
+            featB: [C, H, W], the key vectors of support agent
+
+        Returns:
+            distAB: [Hs*Ws+1, Ht, Wt]
+        """
+        C, H, W = featA.shape
+        
+        distAB = torch.zeros((self.H * self.W +1, self.H, self.W), device=featA.device)
+        fA = featA.permute(1,2,0).reshape(-1,C) # (H*W, C)
+        fB = featB.permute(1,2,0).reshape(-1,C) # (H*W, C)
+        
+        fA2 = torch.pow(torch.norm(fA,dim=-1),2).view(-1,1).repeat(1,fA.shape[0]) # (H*W, H*W)
+        fB2 = torch.pow(torch.norm(fB,dim=-1),2).view(-1,1).repeat(1,fB.shape[0]) # (H*W, H*W)
+
+ 
+        normA = torch.pow( fA2 + fB2.t() - 2.*torch.matmul(fA,fB.t()), 0.5 ) # (H*W, H*W)
+
+
+        distAB[:-1,...] = normA.permute(1,0).reshape(-1, H, W)
+        distAB[-1,:,:] = torch.norm(featA,p=2,dim=0)
+        distAB = -distAB # two pixel is similar, then distAB[pixel1,pixel2] is low. We want it high 
+
+        return distAB
+
+    def smoothCorrespondenceVolume(self, distAB):
+        """ smooth the correspondence Volume
+
+        Args:
+            distAB: (Hs*Ws+1, Ht, Wt)
+        Returns:
+            smoothed distAB
+        """
+        distAB = distAB.unsqueeze(0)
+        output = self.smoothing_net(distAB)
+        output.squeeze_(0)
+
+        return output
+
+    def idx2grid(self, matches):
+        """
+        Args:
+            matches: (Hs*Ws + 1, Ht, Wt)
+        """
+        # should rewrite because H!=W
+        # matches = matches.unsqueeze(0) # [1, Hs*Ws + 1, Ht, Wt]
+
+
+        H, W = matches.shape[-2:]
+        X = torch.arange(W).view(1,-1).repeat(H,1).type(torch.long).view(-1).to(matches.device) # (Ht * Wt)
+        Y = torch.arange(H).view(-1,1).repeat(1,W).type(torch.long).view(-1).to(matches.device) # (Ht * Wt)
+        X = torch.cat([X,torch.tensor([0],device=matches.device)],0)
+        Y = torch.cat([Y,torch.tensor([0],device=matches.device)],0)
+
+        idx = torch.argmax(matches.detach(),0).view(-1) # (Ht*Wt), the value is the index in supporting map
+        
+        # idx has no gradient
+        # mask select those have no correspondence.
+        # that means, ego's feature is used.
+        mask = (idx == (matches.shape[0] - 1)).view(H, W).to(matches.device)
+
+        x = torch.index_select(X,0,idx).view(1,H,W) # x_src in affine_grid
+        y = torch.index_select(Y,0,idx).view(1,H,W) # y_src in affine_grid
+        x = 2*((1.*x/W)-0.5) # (1, H, W)
+        y = 2*((1.*y/H)-0.5) # (1, H, W)
+
+        grid = torch.cat([x.unsqueeze(-1),y.unsqueeze(-1)],-1) # (1, 32, 32, 2)
+
+        return grid, mask
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_multiclass.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_multiclass.py
new file mode 100644
index 0000000000000000000000000000000000000000..40fe35065b09ead67f47f48fc4c64fabbdf83eea
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_multiclass.py
@@ -0,0 +1,217 @@
+# Author: Yifan Lu <yifan_lu@sjtu.edu.cn>, Genjia Liu <lgj1zed@sjtu.edu.cn>
+# a class that integrate multiple simple fusion methods (Single Scale)
+# Support F-Cooper, Self-Att, DiscoNet(wo KD), V2VNet, V2XViT, When2comm
+# To deployed with centerpoint_loss_multiclass
+
+import torch
+import torch.nn as nn
+from icecream import ic
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion
+from opencood.models.fuse_modules.fusion_in_one import MaxFusion, AttFusion, DiscoFusion, V2VNetFusion, V2XViTFusion, When2commFusion
+from opencood.utils.transformation_utils import normalize_pairwise_tfm
+import torch.nn.functional as F
+
+class PointPillarMulticlass(nn.Module):
+    """
+    F-Cooper implementation with point pillar backbone.
+    """
+    def __init__(self, args):
+        super(PointPillarMulticlass, self).__init__()
+
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        is_resnet = args['base_bev_backbone'].get("resnet", False)
+        if is_resnet:
+            self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger
+        else:
+            self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger
+        self.voxel_size = args['voxel_size']
+        self.out_size_factor = args['out_size_factor']
+        self.cav_lidar_range  = args['lidar_range']
+
+        if args['fusion_method'] == "max":
+            self.fusion_net = MaxFusion()
+        if args['fusion_method'] == "att":
+            self.fusion_net = AttFusion(args['att']['feat_dim'])
+        if args['fusion_method'] == "disconet":
+            self.fusion_net = DiscoFusion(args['disconet']['feat_dim'])
+        if args['fusion_method'] == "v2vnet":
+            self.fusion_net = V2VNetFusion(args['v2vnet'])
+        if args['fusion_method'] == 'v2xvit':
+            self.fusion_net = V2XViTFusion(args['v2xvit'])
+        if args['fusion_method'] == 'when2comm':
+            self.fusion_net = When2commFusion(args['when2comm'])
+
+        self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter'])
+
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+
+        self.compression = False
+        if "compression" in args:
+            self.compression = True
+            self.naive_compressor = NaiveCompressor(self.out_channel, args['compression'])
+
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'],
+                                  kernel_size=1)
+        self.use_dir = False
+        # if 'dir_args' in args.keys():
+        #     self.use_dir = True
+        #     self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'],
+        #                           kernel_size=1) # BIN_NUM = 2
+ 
+        if 'backbone_fix' in args.keys() and args['backbone_fix']:
+            self.backbone_fix()
+
+    def backbone_fix(self):
+        """
+        Fix the parameters of backbone during finetune on timedelay。
+        """
+        for p in self.pillar_vfe.parameters():
+            p.requires_grad = False
+
+        for p in self.scatter.parameters():
+            p.requires_grad = False
+
+        for p in self.backbone.parameters():
+            p.requires_grad = False
+
+        if self.compression:
+            for p in self.naive_compressor.parameters():
+                p.requires_grad = False
+        if self.shrink_flag:
+            for p in self.shrink_conv.parameters():
+                p.requires_grad = False
+
+        for p in self.cls_head.parameters():
+            p.requires_grad = False
+        for p in self.reg_head.parameters():
+            p.requires_grad = False
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len}
+        # n, 4 -> n, c
+        batch_dict = self.pillar_vfe(batch_dict)
+        # n, c -> N, C, H, W
+        batch_dict = self.scatter(batch_dict)
+        # calculate pairwise affine transformation matrix
+        _, _, H0, W0 = batch_dict['spatial_features'].shape # original feature map shape H0, W0
+        t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], H0, W0, self.voxel_size[0])
+        batch_dict = self.backbone(batch_dict)
+
+        spatial_features_2d = batch_dict['spatial_features_2d']
+
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+
+        if self.compression:
+            spatial_features_2d = self.naive_compressor(spatial_features_2d)
+
+        fused_feature = self.fusion_net(spatial_features_2d, record_len, t_matrix)
+
+        cls = self.cls_head(fused_feature) # [B, 256, 48, 144] -> [B, 3, 48, 144]
+        bbox = self.reg_head(fused_feature) # [B, 256, 48, 176] -> [B, 24, 48, 144]
+
+        if fused_feature.size(2) == 48:
+            scaled_feature = F.interpolate(fused_feature, scale_factor=2, mode='nearest') # 'nearest', 'bilinear'
+        else:
+            scaled_feature = fused_feature
+        result_dict = {'fused_feature':scaled_feature}
+
+        box_preds_for_infer = bbox.permute(0, 2, 3, 1).contiguous()
+        bbox_temp_list = []
+        num_class = int(box_preds_for_infer.shape[3]/8)
+        box_preds_for_infer = box_preds_for_infer.view(box_preds_for_infer.shape[0], box_preds_for_infer.shape[1], box_preds_for_infer.shape[2], num_class, 8)
+        for i in range(num_class): # num_class
+            box_preds_for_infer_singleclass = box_preds_for_infer[:,:,:,i,:]
+            box_preds_for_infer_singleclass = box_preds_for_infer_singleclass.permute(0, 3, 1, 2)
+            _, bbox_temp = self.generate_predicted_boxes(cls[:, i, :, :], box_preds_for_infer_singleclass)
+            bbox_temp_list.append(bbox_temp)
+        bbox_temp_list = torch.stack(bbox_temp_list, dim=1)
+
+        output_dict = {'cls_preds': cls,
+                       'bbox_preds': bbox,
+                       'reg_preds_multiclass': bbox_temp_list}  # [1, 3, 6912, 7]
+
+        output_dict.update(result_dict)
+        if self.use_dir:
+            output_dict.update({'dir_preds': self.dir_head(fused_feature)})
+
+        return output_dict
+
+    def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None):
+        """
+        Args:
+            batch_size:
+            cls_preds: (N, H, W, C1)
+            box_preds: (N, H, W, C2)
+            dir_cls_preds: (N, H, W, C3)
+
+        Returns:
+            batch_cls_preds: (B, num_boxes, num_classes)
+            batch_box_preds: (B, num_boxes, 7+C)
+
+        """
+        box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
+        
+        batch, H, W, code_size = box_preds.size()   ## code_size 表示的是预测的尺寸
+        
+        box_preds = box_preds.reshape(batch, H*W, code_size)
+
+        batch_reg = box_preds[..., 0:2]
+        # batch_hei = box_preds[..., 2:3] 
+        # batch_dim = torch.exp(box_preds[..., 3:6])
+        
+        h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0]
+        w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1]
+        l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2]
+        batch_dim = torch.cat([h,w,l], dim=-1)
+        batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2]
+
+        batch_rots = box_preds[..., 6:7]
+        batch_rotc = box_preds[..., 7:8]
+
+        rot = torch.atan2(batch_rots, batch_rotc)
+
+        ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)])
+        ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+        xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+
+        xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1]
+        ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2]
+
+        xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0]   ## 基于feature_map 的size求解真实的坐标
+        ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1]
+
+
+        batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2)
+        # batch_box_preds = batch_box_preds.reshape(batch, H, W, batch_box_preds.shape[-1])
+        # batch_box_preds = batch_box_preds.permute(0, 3, 1, 2).contiguous()
+
+        # batch_box_preds_temp = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=1)
+        # box_preds = box_preds.permute(0, 3, 1, 2).contiguous()
+
+        # batch_cls_preds = cls_preds.view(batch, H*W, -1)
+        return cls_preds, batch_box_preds
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_single_multiclass.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_single_multiclass.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed57982a44f0e342069e2e1b054ba12967b7990b
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_single_multiclass.py
@@ -0,0 +1,242 @@
+# Author: Yifan Lu <yifan_lu@sjtu.edu.cn>, Genjia Liu <lgj1zed@sjtu.edu.cn>
+# a class that integrate multiple simple fusion methods (Single Scale)
+# Support F-Cooper, Self-Att, DiscoNet(wo KD), V2VNet, V2XViT, When2comm
+# To deployed with centerpoint_loss_multiclass
+
+import torch
+import torch.nn as nn
+from icecream import ic
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+from opencood.models.fuse_modules.f_cooper_fuse import SpatialFusion
+from opencood.models.fuse_modules.fusion_in_one import MaxFusion, AttFusion, DiscoFusion, V2VNetFusion, V2XViTFusion, When2commFusion
+from opencood.utils.transformation_utils import normalize_pairwise_tfm
+from opencood.utils.transformation_utils import get_relative_transformation
+class PointPillarSingleMulticlass(nn.Module):
+    """
+    F-Cooper implementation with point pillar backbone.
+    """
+    def __init__(self, args):
+        super(PointPillarSingleMulticlass, self).__init__()
+
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        is_resnet = args['base_bev_backbone'].get("resnet", False)
+        if is_resnet:
+            self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger
+        else:
+            self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) # or you can use ResNetBEVBackbone, which is stronger
+        self.voxel_size = args['voxel_size']
+        self.out_size_factor = args['out_size_factor']
+        self.cav_lidar_range  = args['lidar_range']
+
+        if args['fusion_method'] == "max":
+            self.fusion_net = MaxFusion()
+        if args['fusion_method'] == "att":
+            self.fusion_net = AttFusion(args['att']['feat_dim'])
+        if args['fusion_method'] == "disconet":
+            self.fusion_net = DiscoFusion(args['disconet']['feat_dim'])
+        if args['fusion_method'] == "v2vnet":
+            self.fusion_net = V2VNetFusion(args['v2vnet'])
+        if args['fusion_method'] == 'v2xvit':
+            self.fusion_net = V2XViTFusion(args['v2xvit'])
+        if args['fusion_method'] == 'when2comm':
+            self.fusion_net = When2commFusion(args['when2comm'])
+        
+        self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter'])
+
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+
+        self.compression = False
+        if "compression" in args:
+            self.compression = True
+            self.naive_compressor = NaiveCompressor(self.out_channel, args['compression'])
+
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 8 * args['anchor_number'],
+                                  kernel_size=1)
+        self.use_dir = False
+        # if 'dir_args' in args.keys():
+        #     self.use_dir = True
+        #     self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'],
+        #                           kernel_size=1) # BIN_NUM = 2
+ 
+        if 'backbone_fix' in args.keys() and args['backbone_fix']:
+            self.backbone_fix()
+
+    def backbone_fix(self):
+        """
+        Fix the parameters of backbone during finetune on timedelay。
+        """
+        for p in self.pillar_vfe.parameters():
+            p.requires_grad = False
+
+        for p in self.scatter.parameters():
+            p.requires_grad = False
+
+        for p in self.backbone.parameters():
+            p.requires_grad = False
+
+        if self.compression:
+            for p in self.naive_compressor.parameters():
+                p.requires_grad = False
+        if self.shrink_flag:
+            for p in self.shrink_conv.parameters():
+                p.requires_grad = False
+
+        for p in self.cls_head.parameters():
+            p.requires_grad = False
+        for p in self.reg_head.parameters():
+            p.requires_grad = False
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len'] if 'record_len' in data_dict.keys() else 3
+        
+        # if key only contains "ego", like intermediate fusion
+        
+        # relative_t_matrix = get_relative_transformation(lidar_pose) # [N, 4, 4], cav_to_ego, T_ego_cav
+        # elif key contains "ego", "641", "649" ..., like late fusion
+        
+        '''
+        if 'record_len' in data_dict:
+            record_len = data_dict['record_len']
+            relative_t_matrix = data_dict['transformation_matrix']
+        else: 
+            relative_t_matrix = []
+            for cav_id, cav_data in data_dict.items():
+
+                relative_t_matrix.append(cav_data['transformation_matrix'])
+            record_len = len(relative_t_matrix)
+            relative_t_matrix = torch.stack(relative_t_matrix, dim=0)
+        '''
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len}
+        # n, 4 -> n, c
+        batch_dict = self.pillar_vfe(batch_dict)
+        # n, c -> N, C, H, W
+        batch_dict = self.scatter(batch_dict)
+        # calculate pairwise affine transformation matrix
+        _, _, H0, W0 = batch_dict['spatial_features'].shape # original feature map shape H0, W0
+        # t_matrix = normalize_pairwise_tfm(data_dict['pairwise_t_matrix'], H0, W0, self.voxel_size[0])
+        batch_dict = self.backbone(batch_dict)
+
+        spatial_features_2d = batch_dict['spatial_features_2d']
+
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+
+        if self.compression:
+            spatial_features_2d = self.naive_compressor(spatial_features_2d)
+        
+        psm_single = self.cls_head(spatial_features_2d)
+        rm_single = self.reg_head(spatial_features_2d)
+
+        fused_feature = spatial_features_2d
+
+        cls = self.cls_head(fused_feature) # [B, 256, 48, 144] -> [B, 3, 48, 144]
+        bbox = self.reg_head(fused_feature) # [B, 256, 48, 176] -> [B, 24, 48, 144]
+
+        box_preds_for_infer = bbox.permute(0, 2, 3, 1).contiguous()
+        bbox_temp_list = []
+        num_class = int(box_preds_for_infer.shape[3]/8)
+        box_preds_for_infer = box_preds_for_infer.view(box_preds_for_infer.shape[0], box_preds_for_infer.shape[1], box_preds_for_infer.shape[2], num_class, 8)
+        for i in range(num_class): # num_class
+            box_preds_for_infer_singleclass = box_preds_for_infer[:,:,:,i,:]
+            box_preds_for_infer_singleclass = box_preds_for_infer_singleclass.permute(0, 3, 1, 2)
+            _, bbox_temp = self.generate_predicted_boxes(cls[:, i, :, :], box_preds_for_infer_singleclass)
+            bbox_temp_list.append(bbox_temp)
+        bbox_temp_list = torch.stack(bbox_temp_list, dim=1)
+
+        _, bbox_temp = self.generate_predicted_boxes(cls, bbox)
+
+        output_dict = {'cls_preds': cls,
+                       'bbox_preds': bbox,
+                       'reg_preds_multiclass': bbox_temp_list,
+                       'reg_preds': bbox_temp
+                      }  # [1, 3, 6912, 7]
+
+        _, bbox_temp_single = self.generate_predicted_boxes(psm_single, rm_single)
+
+        output_dict.update({'cls_preds_single': psm_single, # [BN, 1, 100, 100]
+                       'reg_preds_single': bbox_temp_single,  # [BN, 10000, 7]
+                       'bbox_preds_single': rm_single,   # [BN, 8, 100, 100]
+                       # 'comm_rate': communication_rates
+                       })
+
+        if self.use_dir:
+            output_dict.update({'dir_preds': self.dir_head(fused_feature)})
+
+        return output_dict
+
+    def generate_predicted_boxes(self, cls_preds, box_preds, dir_cls_preds=None):
+        """
+        Args:
+            batch_size:
+            cls_preds: (N, H, W, C1)
+            box_preds: (N, H, W, C2)
+            dir_cls_preds: (N, H, W, C3)
+
+        Returns:
+            batch_cls_preds: (B, num_boxes, num_classes)
+            batch_box_preds: (B, num_boxes, 7+C)
+
+        """
+        box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
+        
+        batch, H, W, code_size = box_preds.size()   ## code_size 表示的是预测的尺寸
+        
+        box_preds = box_preds.reshape(batch, H*W, code_size)
+
+        batch_reg = box_preds[..., 0:2]
+        # batch_hei = box_preds[..., 2:3] 
+        # batch_dim = torch.exp(box_preds[..., 3:6])
+        
+        h = box_preds[..., 3:4] * self.out_size_factor * self.voxel_size[0]
+        w = box_preds[..., 4:5] * self.out_size_factor * self.voxel_size[1]
+        l = box_preds[..., 5:6] * self.out_size_factor * self.voxel_size[2]
+        batch_dim = torch.cat([h,w,l], dim=-1)
+        batch_hei = box_preds[..., 2:3] * self.out_size_factor * self.voxel_size[2] + self.cav_lidar_range[2]
+
+        batch_rots = box_preds[..., 6:7]
+        batch_rotc = box_preds[..., 7:8]
+
+        rot = torch.atan2(batch_rots, batch_rotc)
+
+        ys, xs = torch.meshgrid([torch.arange(0, H), torch.arange(0, W)])
+        ys = ys.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+        xs = xs.view(1, H, W).repeat(batch, 1, 1).to(cls_preds.device)
+
+        xs = xs.view(batch, -1, 1) + batch_reg[:, :, 0:1]
+        ys = ys.view(batch, -1, 1) + batch_reg[:, :, 1:2]
+
+        xs = xs * self.out_size_factor * self.voxel_size[0] + self.cav_lidar_range[0]   ## 基于feature_map 的size求解真实的坐标
+        ys = ys * self.out_size_factor * self.voxel_size[1] + self.cav_lidar_range[1]
+
+
+        batch_box_preds = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=2)
+        # batch_box_preds = batch_box_preds.reshape(batch, H, W, batch_box_preds.shape[-1])
+        # batch_box_preds = batch_box_preds.permute(0, 3, 1, 2).contiguous()
+
+        # batch_box_preds_temp = torch.cat([xs, ys, batch_hei, batch_dim, rot], dim=1)
+        # box_preds = box_preds.permute(0, 3, 1, 2).contiguous()
+
+        # batch_cls_preds = cls_preds.view(batch, H*W, -1)
+        return cls_preds, batch_box_preds
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_uncertainty.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_uncertainty.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bd495187bdf2baa8ba4464d04860daf29b63c95
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_uncertainty.py
@@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, OpenPCDet
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import torch
+import torch.nn as nn
+
+
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.utils.model_utils import weight_init
+
+class PointPillarUncertainty(nn.Module):
+    def __init__(self, args):
+        super(PointPillarUncertainty, self).__init__()
+
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
+        self.uncertainty_dim = args['uncertainty_dim'] # dim=3 means x, y, yaw, dim=2 means x, y
+
+        self.cls_head = nn.Conv2d(128 * 3, args['anchor_num'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(128 * 3, 7 * args['anchor_num'],
+                                  kernel_size=1)
+
+        self.unc_head = nn.Conv2d(128 * 3, self.uncertainty_dim * args['anchor_num'],
+                                  kernel_size=1) 
+
+        if 'dir_args' in args.keys():
+            self.use_dir = True
+            self.dir_head = nn.Conv2d(128 * 3, args['dir_args']['num_bins'] * args['anchor_num'],
+                                  kernel_size=1) # BIN_NUM = 2
+        else:
+            self.use_dir = False
+
+        
+        self.apply(weight_init)
+    
+
+    def forward(self, data_dict):
+
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+
+
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points}
+
+        batch_dict = self.pillar_vfe(batch_dict)
+        batch_dict = self.scatter(batch_dict)
+        batch_dict = self.backbone(batch_dict)
+
+        spatial_features_2d = batch_dict['spatial_features_2d']
+
+        cls_preds = self.cls_head(spatial_features_2d)
+        reg_preds = self.reg_head(spatial_features_2d)
+        unc_preds = self.unc_head(spatial_features_2d) # s is log(b) or log(sigma^2)
+
+        output_dict = {'cls_preds': cls_preds,
+                       'reg_preds': reg_preds,
+                       'unc_preds': unc_preds}
+        
+        if self.use_dir:
+            dir_preds = self.dir_head(spatial_features_2d)
+            output_dict.update({'dir_preds': dir_preds})
+
+        return output_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_v2vnet_robust.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_v2vnet_robust.py
new file mode 100644
index 0000000000000000000000000000000000000000..802a4c8192afeb92fd515473bab69c47dd1f06ea
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_v2vnet_robust.py
@@ -0,0 +1,335 @@
+# -*- coding: utf-8 -*-
+# Author: Hao Xiang <haxiang@g.ucla.edu>, Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+from numpy import record
+import torch
+import torch.nn as nn
+
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+from opencood.models.sub_modules.v2v_robust_module import AttentionWrapper, PoseRegressionWraper, WeightedEM, get_intersection, regroup
+from opencood.utils.pose_utils import generate_noise_torch
+from opencood.utils.transformation_utils import get_pairwise_transformation_torch
+from opencood.models.fuse_modules.v2v_fuse import V2VNetFusion
+
+from icecream import ic
+from opencood.utils.model_utils import weight_init
+
+class PointPillarV2VNetRobust(nn.Module):
+    def __init__(self, args):
+        super(PointPillarV2VNetRobust, self).__init__()
+
+        self.max_cav = args['max_cav']
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
+        
+        # used to downsample the feature map for efficient computation
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+        self.compression = False
+
+        if args['compression'] > 0:
+            self.compression = True
+            self.naive_compressor = NaiveCompressor(256, args['compression'])
+
+        self.fusion_net = V2VNetFusion(args['v2vfusion'])
+
+        self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'],
+                                  kernel_size=1)
+
+
+        self.downsample_rate = args['robust']['downsample_rate']
+        self.discrete_ratio = args['robust']['discrete_ratio']
+        self.H = args['robust']['H']
+        self.W = args['robust']['W']
+
+        self.affine_parameter = {"H":self.H, "W": self.W, "downsample_rate": self.downsample_rate, "discrete_ratio": self.discrete_ratio}
+        learnable_alpha = True if 'learnable_alpha' not in args['robust'] else args['robust']['learnable_alpha']
+
+        self.pose_reg_net = PoseRegressionWraper(args['robust']['feature_dim']*2, 
+                                                args['robust']['hidden_dim'],
+                                                self.affine_parameter
+                                                )   
+
+        self.attention_net = AttentionWrapper(args['robust']['feature_dim']*2, 
+                                              args['robust']['hidden_dim'],
+                                              self.affine_parameter,
+                                              learnable_alpha,
+                                              )
+
+        self.stage = args['stage'] # [0, 1, 2]
+
+        self.apply(weight_init)
+
+        if self.stage == 1:
+            self.backbone_fix()
+        if self.stage == 2:
+            self.backbone_unfix()
+
+    def backbone_fix(self):
+        """
+        Fix the parameters of backbone for stage 1 
+        """
+        for p in self.pillar_vfe.parameters():
+            p.requires_grad = False
+
+        for p in self.scatter.parameters():
+            p.requires_grad = False
+
+        for p in self.backbone.parameters():
+            p.requires_grad = False
+
+        if self.compression:
+            for p in self.naive_compressor.parameters():
+                p.requires_grad = False
+        if self.shrink_flag:
+            for p in self.shrink_conv.parameters():
+                p.requires_grad = False
+
+        for p in self.fusion_net.parameters():
+            p.requires_grad = False
+
+        for p in self.cls_head.parameters():
+            p.requires_grad = False
+        for p in self.reg_head.parameters():
+            p.requires_grad = False
+
+        for p in self.attention_net.parameters():
+            p.requires_grad = False
+
+    def backbone_unfix(self):
+        """
+        unfix for stage 2
+        """
+
+        for p in self.pillar_vfe.parameters():
+            p.requires_grad = True
+
+        for p in self.scatter.parameters():
+            p.requires_grad = True
+
+        for p in self.backbone.parameters():
+            p.requires_grad = True
+
+        if self.compression:
+            for p in self.naive_compressor.parameters():
+                p.requires_grad = True
+        if self.shrink_flag:
+            for p in self.shrink_conv.parameters():
+                p.requires_grad = True
+
+        for p in self.fusion_net.parameters():
+            p.requires_grad = True
+
+        for p in self.cls_head.parameters():
+            p.requires_grad = True
+        for p in self.reg_head.parameters():
+            p.requires_grad = True
+
+        for p in self.attention_net.parameters():
+            p.requires_grad = True
+
+    def pose_correction(self, features, record_len, pairwise_t_matrix):
+        """ use pose regression module to correct relative pose
+        Args:
+
+        Returns:
+            pairwise_t_matrix_new: 
+                [B, L, L, 4, 4], the relative pose after correction.
+        """
+        return self.pose_reg_net(features, record_len, pairwise_t_matrix)
+
+    def global_correction(self, lidar_pose, pairwise_t_matrix, record_len):
+        """
+        Args:
+            lidar_pose: [N, 3]
+                input noisy lidar pose
+            pairwise_t_matrix: [B, L, L, 4, 4]
+                relative pose after pose regression module
+            record_len: list,
+                shape [B]
+        
+        Returns:
+            lidar_pose_new: [N, 3]
+                refined lidar pose
+        """
+
+        B = len(record_len)
+        lidar_pose_new = []
+
+        # [[N1,3], [N2, 3], ...]
+        lidar_pose_split = regroup(lidar_pose, record_len)
+
+        for b in range(B):
+            if record_len[b] == 1:
+                lidar_pose_new.append(lidar_pose_split[b])
+                continue
+            lidar_pose = lidar_pose_split[b]
+            intersection_matrix = get_intersection(pairwise_t_matrix[b], self.affine_parameter)
+            lidar_pose_corrected = WeightedEM(lidar_pose, pairwise_t_matrix[b],intersection_matrix)
+
+            lidar_pose_new.append(lidar_pose_corrected)
+
+        lidar_pose_new = torch.cat(lidar_pose_new, dim=0)
+
+        return lidar_pose_new
+        
+
+    def noise_generator(self, lidar_pose, all_strong=False):
+        noise_s = generate_noise_torch(lidar_pose, pos_std=0.4, rot_std=4)  # (N, 6)
+        noise_w = generate_noise_torch(lidar_pose, pos_std=0.01, rot_std=0.1)  # (N, 6)
+        N = lidar_pose.shape[0]
+        
+        if all_strong:
+            choice = torch.zeros((N, 1), device=lidar_pose.device) # (N, 1) 0 choose strong, 1 choose weak
+            noise = noise_s
+        else:
+            choice = torch.randint(0, 2, (N, 1), device=lidar_pose.device) # (N, 1) 0 choose strong, 1 choose weak
+            noise = choice * noise_w + (1-choice) * noise_s
+        
+        return noise, choice
+
+
+    def train_forward(self, spatial_features_2d, record_len, lidar_pose, pairwise_t_matrix, stage):
+        """
+        stage = 0, only training attentive_aggregation and v2vnet, strong noise and weak noise are used.
+        stage = 1, only training pose correction module. all strong noise
+        stage = 2, all component are used. all strong noise
+
+        Args:
+            spatial_features_2d: (N, C, H, W)
+            record_len: list
+            lidar_pose: (N, 6), it will turn to [N, 3] quickly
+        """
+        if stage == 0:
+            noise, choice = self.noise_generator(lidar_pose, all_strong=False)
+
+        if stage == 1 or stage == 2:
+            noise, choice = self.noise_generator(lidar_pose, all_strong=True)
+
+        lidar_pose += noise
+        lidar_pose = lidar_pose[:,[0,1,4]] # [N, 3]
+        pairwise_t_matrix = get_pairwise_transformation_torch(lidar_pose, self.max_cav, record_len, dof=3)
+        
+        # when training pairwise_t_matrix, pairwise_t_matrix carries given noise.
+        if self.stage == 0:
+            scores, weight = self.attention_net(spatial_features_2d, record_len, pairwise_t_matrix)
+            fused_feature = self.fusion_net(spatial_features_2d, record_len, pairwise_t_matrix, weight)
+            psm = self.cls_head(fused_feature)
+            rm = self.reg_head(fused_feature)
+            print("scores:", scores)
+            print("weight:", weight)
+            print("alpha:", self.attention_net.alpha)
+
+            output_dict = {'stage': stage,
+                            'scores': scores,
+                            'choice': choice,
+                            'psm': psm,
+                            'rm': rm}
+
+        if self.stage == 1:
+            pairwise_corr, _ = self.pose_correction(spatial_features_2d, record_len, pairwise_t_matrix)
+            output_dict = {'stage': stage,
+                            'pairwise_corr' : pairwise_corr,
+                            'pairwise_t_matrix': pairwise_t_matrix}
+        
+        if self.stage == 2:
+            pairwise_corr, pairwise_t_matrix_new = self.pose_correction(spatial_features_2d, record_len, pairwise_t_matrix)
+            lidar_pose_corrected = self.global_correction(lidar_pose, pairwise_t_matrix_new, record_len) # [N, 3]
+
+            pairwise_t_matrix_corrected = get_pairwise_transformation_torch(lidar_pose_corrected, self.max_cav, record_len, dof=3)
+            scores, weight = self.attention_net(spatial_features_2d, record_len, pairwise_t_matrix_corrected)
+            fused_feature = self.fusion_net(spatial_features_2d, record_len, pairwise_t_matrix_corrected, weight)
+            psm = self.cls_head(fused_feature)
+            rm = self.reg_head(fused_feature)
+
+            output_dict = {'stage': stage,
+                            'scores': scores,
+                            'psm': psm,
+                            'rm': rm,
+                            'pairwise_corr' : pairwise_corr,
+                            'pairwise_t_matrix': pairwise_t_matrix}
+
+        return output_dict
+
+
+
+    def eval_forward(self, spatial_features_2d, record_len, lidar_pose, pairwise_t_matrix, stage):
+        """
+            same as stage=2 in training, but no noise added.
+        """
+        lidar_pose = lidar_pose[:,[0,1,4]] # [N, 3]
+        pairwise_t_matrix = get_pairwise_transformation_torch(lidar_pose, self.max_cav, record_len, dof=3)
+
+        pairwise_corr, pairwise_t_matrix = self.pose_correction(spatial_features_2d, record_len, pairwise_t_matrix)
+
+        lidar_pose_corrected = self.global_correction(lidar_pose, pairwise_t_matrix, record_len) # [N, 3]
+        pairwise_t_matrix_corrected = get_pairwise_transformation_torch(lidar_pose_corrected, self.max_cav, record_len, dof=3)
+
+
+        scores, weight = self.attention_net(spatial_features_2d, record_len, pairwise_t_matrix_corrected)
+        fused_feature = self.fusion_net(spatial_features_2d, record_len, pairwise_t_matrix_corrected, weight)
+        psm = self.cls_head(fused_feature)
+        rm = self.reg_head(fused_feature)
+
+        output_dict = {'stage': stage,
+                        'scores': scores,
+                        'psm': psm,
+                        'rm': rm,
+                        'pairwise_t_matrix': pairwise_t_matrix}
+
+        return output_dict
+
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len']
+
+        pairwise_t_matrix = data_dict['pairwise_t_matrix']
+        lidar_pose = data_dict['lidar_pose'] # [sum(cav), 6]
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len}
+        # n, 4 -> n, c
+        batch_dict = self.pillar_vfe(batch_dict)
+        # n, c -> N, C, H, W
+        batch_dict = self.scatter(batch_dict)
+        batch_dict = self.backbone(batch_dict)
+        # N, C, H', W'. [N, 256, 50, 176]
+        spatial_features_2d = batch_dict['spatial_features_2d']
+
+        # downsample feature to reduce memory
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+        # compressor
+        if self.compression:
+            spatial_features_2d = self.naive_compressor(spatial_features_2d)
+
+        # spatial_features_2d is [sum(cav_num), 256, 50, 176]
+        # output only contains ego
+        # [B, 256, 50, 176]
+
+        # lidar_pose -> pairwise_t_matrix , same content
+
+        # if self.training:
+        #     return self.train_forward(spatial_features_2d, record_len, lidar_pose, pairwise_t_matrix, self.stage)
+        # else:
+        #     return self.eval_forward(spatial_features_2d, record_len, lidar_pose, pairwise_t_matrix, self.stage)
+
+        return self.train_forward(spatial_features_2d, record_len, lidar_pose, pairwise_t_matrix, self.stage)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_where2comm.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_where2comm.py
new file mode 100644
index 0000000000000000000000000000000000000000..cbc0db947cbea549aa83b677020cbf15fadee222
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/point_pillar_where2comm.py
@@ -0,0 +1,156 @@
+import torch.nn as nn
+
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.models.sub_modules.point_pillar_scatter import PointPillarScatter
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+from opencood.models.sub_modules.naive_compress import NaiveCompressor
+from opencood.models.sub_modules.dcn_net import DCNNet
+# from opencood.models.fuse_modules.where2comm import Where2comm
+from opencood.models.fuse_modules.where2comm_attn import Where2comm
+import torch
+
+class PointPillarWhere2comm(nn.Module):
+    def __init__(self, args):
+        super(PointPillarWhere2comm, self).__init__()
+
+        # PIllar VFE
+        self.pillar_vfe = PillarVFE(args['pillar_vfe'],
+                                    num_point_features=4,
+                                    voxel_size=args['voxel_size'],
+                                    point_cloud_range=args['lidar_range'])
+        self.scatter = PointPillarScatter(args['point_pillar_scatter'])
+        if 'resnet' in args['base_bev_backbone']:
+            self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64)
+        else:
+            self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
+        self.out_channel = sum(args['base_bev_backbone']['num_upsample_filter'])
+
+
+        # used to downsample the feature map for efficient computation
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+        self.compression = False
+
+        if 'compression' in args and args['compression'] > 0:
+            self.compression = True
+            self.naive_compressor = NaiveCompressor(256, args['compression'])
+
+        # self.fusion_net = TransformerFusion(args['fusion_args'])
+        self.fusion_net = Where2comm(args['fusion_args'])
+        self.multi_scale = args['fusion_args']['multi_scale']
+
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 7 * args['anchor_number'],
+                                  kernel_size=1)
+        self.use_dir = False
+        if 'dir_args' in args.keys():
+            self.use_dir = True
+            self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_number'],
+                                  kernel_size=1) # BIN_NUM = 2
+        if "backbone_fix" in args and args['backbone_fix']:
+            self.backbone_fix()
+
+    def backbone_fix(self):
+        """
+        Fix the parameters of backbone during finetune on timedelay。
+        """
+        for p in self.pillar_vfe.parameters():
+            p.requires_grad = False
+
+        for p in self.scatter.parameters():
+            p.requires_grad = False
+
+        for p in self.backbone.parameters():
+            p.requires_grad = False
+
+        if self.compression:
+            for p in self.naive_compressor.parameters():
+                p.requires_grad = False
+        if self.shrink_flag:
+            for p in self.shrink_conv.parameters():
+                p.requires_grad = False
+
+        for p in self.cls_head.parameters():
+            p.requires_grad = False
+        for p in self.reg_head.parameters():
+            p.requires_grad = False
+    
+    def regroup(self, x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len']
+
+        pairwise_t_matrix = data_dict['pairwise_t_matrix']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'record_len': record_len}
+        # n, 4 -> n, c
+        batch_dict = self.pillar_vfe(batch_dict)
+        # n, c -> N, C, H, W
+        batch_dict = self.scatter(batch_dict)
+        batch_dict = self.backbone(batch_dict)
+        # N, C, H', W'. [N, 384, 100, 352]
+        spatial_features_2d = batch_dict['spatial_features_2d']
+        
+        # downsample feature to reduce memory
+        if self.shrink_flag:
+            spatial_features_2d = self.shrink_conv(spatial_features_2d)
+        # compressor
+        if self.compression:
+            spatial_features_2d = self.naive_compressor(spatial_features_2d)
+        # spatial_features_2d is [sum(cav_num), 256, 50, 176]
+        # output only contains ego
+        # [B, 256, 50, 176]
+        psm_single = self.cls_head(spatial_features_2d)
+        rm_single = self.reg_head(spatial_features_2d)
+        if self.use_dir:
+            dir_single = self.dir_head(spatial_features_2d)
+
+        if self.multi_scale:
+            fused_feature, communication_rates, result_dict = self.fusion_net(batch_dict['spatial_features'],
+                                            psm_single,
+                                            record_len,
+                                            pairwise_t_matrix, 
+                                            self.backbone)
+            # downsample feature to reduce memory
+            if self.shrink_flag:
+                fused_feature = self.shrink_conv(fused_feature)
+        else:
+            fused_feature, communication_rates, result_dict = self.fusion_net(spatial_features_2d,
+                                            psm_single,
+                                            record_len,
+                                            pairwise_t_matrix)
+            
+            
+        # print('fused_feature: ', fused_feature.shape)
+        psm = self.cls_head(fused_feature)
+        rm = self.reg_head(fused_feature)
+        
+
+        output_dict = {'cls_preds': psm,
+                       'reg_preds': rm}
+        if self.use_dir:
+            output_dict.update({'dir_preds': self.dir_head(fused_feature),
+                                'dir_preds_single': dir_single})
+
+        output_dict.update(result_dict)
+
+        output_dict.update({'cls_preds_single': psm_single,
+                       'reg_preds_single': rm_single,
+                       'comm_rate': communication_rates
+                       })
+        return output_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second.py
new file mode 100644
index 0000000000000000000000000000000000000000..34efee7769009ecc873be262b2796f4cd32e9ad2
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, OpenPCDet
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import torch.nn as nn
+
+from opencood.models.sub_modules.mean_vfe import MeanVFE
+from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x
+from opencood.models.sub_modules.height_compression import HeightCompression
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+
+
+class Second(nn.Module):
+    def __init__(self, args):
+        super(Second, self).__init__()
+
+        # mean_vfe
+        self.mean_vfe = MeanVFE(args['mean_vfe'], 4)
+        # sparse 3d backbone
+        self.backbone_3d = VoxelBackBone8x(args['backbone_3d'],
+                                           4, args['grid_size'])
+        # height compression
+        self.height_compression = HeightCompression(args['height_compression'])
+        # base ben backbone
+        self.backbone_2d = BaseBEVBackbone(args['base_bev_backbone'], 256)
+
+        # head
+        self.cls_head = nn.Conv2d(256 * 2, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(256 * 2, 7 * args['anchor_num'],
+                                  kernel_size=1)
+
+    def forward(self, data_dict):
+
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        batch_size = voxel_coords[:,0].max() + 1 # batch size is padded in the first idx
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'batch_size': batch_size}
+
+        batch_dict = self.mean_vfe(batch_dict)
+        batch_dict = self.backbone_3d(batch_dict)
+        batch_dict = self.height_compression(batch_dict)
+        batch_dict = self.backbone_2d(batch_dict)
+
+        spatial_features_2d = batch_dict['spatial_features_2d']
+
+        psm = self.cls_head(spatial_features_2d)
+        rm = self.reg_head(spatial_features_2d)
+
+        output_dict = {'psm': psm,
+                       'rm': rm}
+
+        return output_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_intermediate.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_intermediate.py
new file mode 100644
index 0000000000000000000000000000000000000000..c86f9734f8b1c74dcf4ee64483130814c1ff17ec
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_intermediate.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import torch
+import torch.nn as nn
+
+from opencood.models.sub_modules.mean_vfe import MeanVFE
+from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x
+from opencood.models.sub_modules.height_compression import HeightCompression
+from opencood.models.sub_modules.att_bev_backbone  import AttBEVBackbone
+
+
+class SecondIntermediate(nn.Module):
+    def __init__(self, args):
+        super(SecondIntermediate, self).__init__()
+
+        self.batch_size = args['batch_size']
+        # mean_vfe
+        self.mean_vfe = MeanVFE(args['mean_vfe'], 4)
+        # sparse 3d backbone
+        self.backbone_3d = VoxelBackBone8x(args['backbone_3d'],
+                                           4, args['grid_size'])
+        # height compression
+        self.height_compression = HeightCompression(args['height_compression'])
+        # base ben backbone
+        self.backbone_2d = AttBEVBackbone(args['base_bev_backbone'], 256)
+
+        # head
+        self.cls_head = nn.Conv2d(256 * 2, args['anchor_number'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(256 * 2, 7 * args['anchor_num'],
+                                  kernel_size=1)
+
+    def forward(self, data_dict):
+
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'batch_size': torch.sum(record_len).cpu().numpy(),
+                      'record_len': record_len}
+
+        batch_dict = self.mean_vfe(batch_dict)
+        batch_dict = self.backbone_3d(batch_dict)
+        batch_dict = self.height_compression(batch_dict)
+        batch_dict = self.backbone_2d(batch_dict)
+
+        spatial_features_2d = batch_dict['spatial_features_2d']
+
+        psm = self.cls_head(spatial_features_2d)
+        rm = self.reg_head(spatial_features_2d)
+
+        output_dict = {'psm': psm,
+                       'rm': rm}
+
+        return output_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_ssfa.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_ssfa.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a05d9f0656e6cedb786b8b8828e0073d3d65cc2
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_ssfa.py
@@ -0,0 +1,58 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, OpenPCDet
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import torch.nn as nn
+
+from opencood.models.sub_modules.mean_vfe import MeanVFE
+from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x
+from opencood.models.sub_modules.height_compression import HeightCompression
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.cia_ssd_utils import SSFA, Head
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+import numpy as np
+
+class SecondSSFA(nn.Module):
+    def __init__(self, args):
+        super(SecondSSFA, self).__init__()
+        lidar_range = np.array(args['lidar_range'])
+        grid_size = np.round((lidar_range[3:6] - lidar_range[:3]) /
+                             np.array(args['voxel_size'])).astype(np.int64)
+        self.vfe = MeanVFE(args['mean_vfe'],
+                           args['mean_vfe']['num_point_features'])
+        self.spconv_block = VoxelBackBone8x(args['spconv'],
+                                            input_channels=args['spconv'][
+                                                'num_features_in'],
+                                            grid_size=grid_size)
+        self.map_to_bev = HeightCompression(args['map2bev'])
+        self.ssfa = SSFA(args['ssfa'])
+
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+
+        self.head = Head(**args['head'])
+
+    def forward(self, data_dict):
+
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        batch_size = voxel_coords[:,0].max() + 1 # batch size is padded in the first idx
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'batch_size': batch_size}
+
+        batch_dict = self.vfe(batch_dict)
+        batch_dict = self.spconv_block(batch_dict)
+        batch_dict = self.map_to_bev(batch_dict)
+        out = self.ssfa(batch_dict['spatial_features'])
+        if self.shrink_flag:
+            out = self.shrink_conv(out)
+
+        return self.head(out)
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_ssfa_uncertainty.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_ssfa_uncertainty.py
new file mode 100644
index 0000000000000000000000000000000000000000..580e2193111534d7bbbd16e599576e96dd6f03d3
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/second_ssfa_uncertainty.py
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, OpenPCDet
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import torch.nn as nn
+
+from opencood.models.sub_modules.mean_vfe import MeanVFE
+from opencood.models.sub_modules.sparse_backbone_3d import VoxelBackBone8x
+from opencood.models.sub_modules.height_compression import HeightCompression
+from opencood.models.sub_modules.base_bev_backbone import BaseBEVBackbone
+from opencood.models.sub_modules.cia_ssd_utils import SSFA
+from opencood.models.sub_modules.downsample_conv import DownsampleConv
+import numpy as np
+from opencood.utils.model_utils import weight_init
+
+class SecondSSFAUncertainty(nn.Module):
+    def __init__(self, args):
+        super(SecondSSFAUncertainty, self).__init__()
+        lidar_range = np.array(args['lidar_range'])
+        grid_size = np.round((lidar_range[3:6] - lidar_range[:3]) /
+                             np.array(args['voxel_size'])).astype(np.int64)
+        self.vfe = MeanVFE(args['mean_vfe'],
+                           args['mean_vfe']['num_point_features'])
+        self.spconv_block = VoxelBackBone8x(args['spconv'],
+                                            input_channels=args['spconv'][
+                                                'num_features_in'],
+                                            grid_size=grid_size)
+        self.map_to_bev = HeightCompression(args['map2bev'])
+        self.ssfa = SSFA(args['ssfa'])
+        self.out_channel = args['ssfa']['feature_num']
+
+        uncertainty_dim = args['uncertainty_dim']
+
+        self.shrink_flag = False
+        if 'shrink_header' in args:
+            self.shrink_flag = True
+            self.shrink_conv = DownsampleConv(args['shrink_header'])
+            self.out_channel = args['shrink_header']['dim'][-1]
+
+        self.cls_head = nn.Conv2d(self.out_channel, args['anchor_num'],
+                                  kernel_size=1)
+        self.reg_head = nn.Conv2d(self.out_channel, 7 * args['anchor_num'],
+                                  kernel_size=1)
+        self.unc_head = nn.Conv2d(self.out_channel, uncertainty_dim * args['anchor_num'],
+                                  kernel_size=1)
+        
+        self.use_dir = False
+        if 'dir_args' in args.keys():
+            self.use_dir = True
+            self.dir_head = nn.Conv2d(self.out_channel, args['dir_args']['num_bins'] * args['anchor_num'],
+                                  kernel_size=1) # BIN_NUM = 2
+
+        self.apply(weight_init)
+
+    def forward(self, data_dict):
+
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        batch_size = voxel_coords[:, 0].max() + 1 # batch size is padded in the first idx
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'batch_size': batch_size}
+
+        batch_dict = self.vfe(batch_dict)
+        batch_dict = self.spconv_block(batch_dict)
+        batch_dict = self.map_to_bev(batch_dict)
+        out = self.ssfa(batch_dict['spatial_features'])
+        if self.shrink_flag:
+            out = self.shrink_conv(out)
+
+        cls_preds = self.cls_head(out)
+        reg_preds = self.reg_head(out)
+        unc_preds = self.unc_head(out) # s is log(b) or log(sigma^2)
+
+        output_dict = {'cls_preds': cls_preds,
+                       'reg_preds': reg_preds,
+                       'unc_preds': unc_preds}
+        
+        if self.use_dir:
+            dir_preds = self.dir_head(out)
+            output_dict.update({'dir_preds': dir_preds})
+
+        return output_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/base_bev_backbone.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/base_bev_backbone.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b09d5d4aff56fd21e4b37fee1b6e7856a31e554a
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/base_bev_backbone.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/base_bev_backbone_resnet.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/base_bev_backbone_resnet.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..27d551ca0fba7bcb30c6408507f19fa69f497428
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/base_bev_backbone_resnet.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/downsample_conv.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/downsample_conv.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3c080b27623f55b3d58d85cbd2b21e7eb39fe588
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/downsample_conv.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/naive_compress.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/naive_compress.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2f2d9706674f705bc03c734a4b529f0e488b0baf
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/naive_compress.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/pillar_vfe.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/pillar_vfe.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5c64aeb0e45a13e06401683b27a9a5480279f5d6
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/pillar_vfe.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/point_pillar_scatter.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/point_pillar_scatter.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a72f47059b971145a2c912babb702dc667c11e14
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/point_pillar_scatter.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/resblock.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/resblock.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..65d40d6621cf6163d68f647bcb37df12b06b5e54
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/resblock.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/torch_transformation_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/torch_transformation_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9af88dd06f6005f1a3700bbf1d0c7674590563ad
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/__pycache__/torch_transformation_utils.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/att_bev_backbone.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/att_bev_backbone.py
new file mode 100644
index 0000000000000000000000000000000000000000..258475c502c7cf7602a3ebda7dbecf3e253d25c6
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/att_bev_backbone.py
@@ -0,0 +1,166 @@
+import numpy as np
+import torch
+import torch.nn as nn
+
+from opencood.models.fuse_modules.self_attn import AttFusion
+from opencood.models.sub_modules.auto_encoder import AutoEncoder
+
+DEBUG = False
+
+class AttBEVBackbone(nn.Module):
+    def __init__(self, model_cfg, input_channels):
+        super().__init__()
+        self.model_cfg = model_cfg
+        self.compress = False
+
+        self.discrete_ratio = model_cfg['voxel_size'][0]
+        self.downsample_rate = 1
+
+            
+
+        if 'compression' in model_cfg and model_cfg['compression'] > 0:
+            self.compress = True
+            self.compress_layer = model_cfg['compression']
+
+        if 'layer_nums' in self.model_cfg:
+
+            assert len(self.model_cfg['layer_nums']) == \
+                   len(self.model_cfg['layer_strides']) == \
+                   len(self.model_cfg['num_filters'])
+
+            layer_nums = self.model_cfg['layer_nums']
+            layer_strides = self.model_cfg['layer_strides']
+            num_filters = self.model_cfg['num_filters']
+        else:
+            layer_nums = layer_strides = num_filters = []
+
+        if 'upsample_strides' in self.model_cfg:
+            assert len(self.model_cfg['upsample_strides']) \
+                   == len(self.model_cfg['num_upsample_filter'])
+
+            num_upsample_filters = self.model_cfg['num_upsample_filter']
+            upsample_strides = self.model_cfg['upsample_strides']
+
+        else:
+            upsample_strides = num_upsample_filters = []
+
+        num_levels = len(layer_nums)
+        c_in_list = [input_channels, *num_filters[:-1]]
+
+        self.blocks = nn.ModuleList()
+        self.fuse_modules = nn.ModuleList()
+        self.deblocks = nn.ModuleList()
+
+        if self.compress:
+            self.compression_modules = nn.ModuleList()
+
+        for idx in range(num_levels):
+            cur_layers = [
+                nn.ZeroPad2d(1),
+                nn.Conv2d(
+                    c_in_list[idx], num_filters[idx], kernel_size=3,
+                    stride=layer_strides[idx], padding=0, bias=False
+                ),
+                nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01),
+                nn.ReLU()
+            ]
+
+            fuse_network = AttFusion(num_filters[idx])
+            self.fuse_modules.append(fuse_network)
+            if self.compress and self.compress_layer - idx > 0:
+                self.compression_modules.append(AutoEncoder(num_filters[idx],
+                                                            self.compress_layer-idx))
+
+            for k in range(layer_nums[idx]):
+                cur_layers.extend([
+                    nn.Conv2d(num_filters[idx], num_filters[idx],
+                              kernel_size=3, padding=1, bias=False),
+                    nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01),
+                    nn.ReLU()
+                ])
+
+            self.blocks.append(nn.Sequential(*cur_layers))
+            if len(upsample_strides) > 0:
+                stride = upsample_strides[idx]
+                if stride >= 1:
+                    self.deblocks.append(nn.Sequential(
+                        nn.ConvTranspose2d(
+                            num_filters[idx], num_upsample_filters[idx],
+                            upsample_strides[idx],
+                            stride=upsample_strides[idx], bias=False
+                        ),
+                        nn.BatchNorm2d(num_upsample_filters[idx],
+                                       eps=1e-3, momentum=0.01),
+                        nn.ReLU()
+                    ))
+                else:
+                    stride = np.round(1 / stride).astype(np.int)
+                    self.deblocks.append(nn.Sequential(
+                        nn.Conv2d(
+                            num_filters[idx], num_upsample_filters[idx],
+                            stride,
+                            stride=stride, bias=False
+                        ),
+                        nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3,
+                                       momentum=0.01),
+                        nn.ReLU()
+                    ))
+
+        c_in = sum(num_upsample_filters)
+        if len(upsample_strides) > num_levels:
+            self.deblocks.append(nn.Sequential(
+                nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1],
+                                   stride=upsample_strides[-1], bias=False),
+                nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01),
+                nn.ReLU(),
+            ))
+
+        self.num_bev_features = c_in
+
+    def forward(self, data_dict):
+        spatial_features = data_dict['spatial_features']
+        if DEBUG:
+            origin_features = torch.clone(spatial_features)
+        record_len = data_dict['record_len']
+        pairwise_t_matrix = data_dict['pairwise_t_matrix']
+
+        ups = []
+        ret_dict = {}
+        x = spatial_features
+
+        H, W = x.shape[2:]   #  200, 704
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+
+        pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+        pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+        pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+        pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+
+        for i in range(len(self.blocks)):
+            x = self.blocks[i](x)
+            if self.compress and i < len(self.compression_modules):
+                x = self.compression_modules[i](x)
+            if DEBUG:
+                self.fuse_modules[i].forward_debug(x, origin_features, record_len, pairwise_t_matrix)
+            else:
+                x_fuse = self.fuse_modules[i](x, record_len, pairwise_t_matrix)
+
+            stride = int(spatial_features.shape[2] / x.shape[2])
+            ret_dict['spatial_features_%dx' % stride] = x
+
+            if len(self.deblocks) > 0:
+                ups.append(self.deblocks[i](x_fuse))
+            else:
+                ups.append(x_fuse)
+
+        if len(ups) > 1:
+            x = torch.cat(ups, dim=1)
+        elif len(ups) == 1:
+            x = ups[0]
+
+        if len(self.deblocks) > len(self.blocks):
+            x = self.deblocks[-1](x)
+
+        data_dict['spatial_features_2d'] = x
+        return data_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/auto_encoder.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/auto_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3c548e4bd60d3552abda7e7cc69ae56d2d00d46
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/auto_encoder.py
@@ -0,0 +1,67 @@
+import torch
+import torch.nn as nn
+
+
+class AutoEncoder(nn.Module):
+    def __init__(self, feature_num, layer_num):
+        super().__init__()
+        self.feature_num = feature_num
+        self.feature_stride = 2
+
+        self.encoder = nn.ModuleList()
+        self.decoder = nn.ModuleList()
+
+        for i in range(layer_num):
+            cur_layers = [
+                nn.ZeroPad2d(1),
+                nn.Conv2d(
+                    feature_num, feature_num, kernel_size=3,
+                    stride=2, padding=0, bias=False
+                ),
+                nn.BatchNorm2d(feature_num, eps=1e-3, momentum=0.01),
+                nn.ReLU()]
+
+            cur_layers.extend([
+                nn.Conv2d(feature_num, feature_num // self.feature_stride,
+                          kernel_size=3, padding=1, bias=False),
+                nn.BatchNorm2d(feature_num // self.feature_stride,
+                               eps=1e-3, momentum=0.01),
+                nn.ReLU()
+            ])
+
+            self.encoder.append(nn.Sequential(*cur_layers))
+            feature_num = feature_num // self.feature_stride
+
+        feature_num = self.feature_num
+        for i in range(layer_num):
+            cur_layers = [nn.Sequential(
+                nn.ConvTranspose2d(
+                    feature_num // 2, feature_num,
+                    kernel_size=2,
+                    stride=2, bias=False
+                ),
+                nn.BatchNorm2d(feature_num,
+                               eps=1e-3, momentum=0.01),
+                nn.ReLU()
+            )]
+
+            cur_layers.extend([nn.Sequential(
+                nn.Conv2d(
+                    feature_num, feature_num, kernel_size=3,
+                    stride=1, bias=False, padding=1
+                ),
+                nn.BatchNorm2d(feature_num, eps=1e-3,
+                               momentum=0.01),
+                nn.ReLU()
+            )])
+            self.decoder.append(nn.Sequential(*cur_layers))
+            feature_num //= 2
+
+    def forward(self, x):
+        for i in range(len(self.encoder)):
+            x = self.encoder[i](x)
+
+        for i in range(len(self.decoder)-1, -1, -1):
+            x = self.decoder[i](x)
+
+        return x
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_bev_backbone.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_bev_backbone.py
new file mode 100644
index 0000000000000000000000000000000000000000..38f6960abd3c407b268df86e7d0ee7012357cc23
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_bev_backbone.py
@@ -0,0 +1,157 @@
+import numpy as np
+import torch
+import torch.nn as nn
+
+
+class BaseBEVBackbone(nn.Module):
+    def __init__(self, model_cfg, input_channels):
+        super().__init__()
+        self.model_cfg = model_cfg
+
+        if 'layer_nums' in self.model_cfg:
+
+            assert len(self.model_cfg['layer_nums']) == \
+                   len(self.model_cfg['layer_strides']) == \
+                   len(self.model_cfg['num_filters'])
+
+            layer_nums = self.model_cfg['layer_nums']
+            layer_strides = self.model_cfg['layer_strides']
+            num_filters = self.model_cfg['num_filters']
+        else:
+            layer_nums = layer_strides = num_filters = []
+
+        if 'upsample_strides' in self.model_cfg:
+            assert len(self.model_cfg['upsample_strides']) \
+                   == len(self.model_cfg['num_upsample_filter'])
+
+            num_upsample_filters = self.model_cfg['num_upsample_filter']
+            upsample_strides = self.model_cfg['upsample_strides']
+
+        else:
+            upsample_strides = num_upsample_filters = []
+
+        num_levels = len(layer_nums)
+        self.num_levels = num_levels
+        c_in_list = [input_channels, *num_filters[:-1]]
+
+        self.blocks = nn.ModuleList()
+        self.deblocks = nn.ModuleList()
+
+        for idx in range(num_levels):
+            cur_layers = [
+                nn.ZeroPad2d(1),
+                nn.Conv2d(
+                    c_in_list[idx], num_filters[idx], kernel_size=3,
+                    stride=layer_strides[idx], padding=0, bias=False
+                ),
+                nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01),
+                nn.ReLU()
+            ]
+            for k in range(layer_nums[idx]):
+                cur_layers.extend([
+                    nn.Conv2d(num_filters[idx], num_filters[idx],
+                              kernel_size=3, padding=1, bias=False),
+                    nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01),
+                    nn.ReLU()
+                ])
+
+            self.blocks.append(nn.Sequential(*cur_layers))
+            if len(upsample_strides) > 0:
+                stride = upsample_strides[idx]
+                if stride >= 1:
+                    self.deblocks.append(nn.Sequential(
+                        nn.ConvTranspose2d(
+                            num_filters[idx], num_upsample_filters[idx],
+                            upsample_strides[idx],
+                            stride=upsample_strides[idx], bias=False
+                        ),
+                        nn.BatchNorm2d(num_upsample_filters[idx],
+                                       eps=1e-3, momentum=0.01),
+                        nn.ReLU()
+                    ))
+                else:
+                    stride = np.round(1 / stride).astype(np.int)
+                    self.deblocks.append(nn.Sequential(
+                        nn.Conv2d(
+                            num_filters[idx], num_upsample_filters[idx],
+                            stride,
+                            stride=stride, bias=False
+                        ),
+                        nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3,
+                                       momentum=0.01),
+                        nn.ReLU()
+                    ))
+
+        c_in = sum(num_upsample_filters)
+        if len(upsample_strides) > num_levels:
+            self.deblocks.append(nn.Sequential(
+                nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1],
+                                   stride=upsample_strides[-1], bias=False),
+                nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01),
+                nn.ReLU(),
+            ))
+
+        self.num_bev_features = c_in
+
+    def forward(self, data_dict):
+        spatial_features = data_dict['spatial_features']
+
+        ups = []
+        ret_dict = {}
+        x = spatial_features
+
+        for i in range(len(self.blocks)):
+            x = self.blocks[i](x)
+
+            stride = int(spatial_features.shape[2] / x.shape[2])
+            ret_dict['spatial_features_%dx' % stride] = x
+
+            if len(self.deblocks) > 0:
+                ups.append(self.deblocks[i](x))
+            else:
+                ups.append(x)
+
+        if len(ups) > 1:
+            x = torch.cat(ups, dim=1)
+        elif len(ups) == 1:
+            x = ups[0]
+
+        if len(self.deblocks) > len(self.blocks):
+            x = self.deblocks[-1](x)
+
+        data_dict['spatial_features_2d'] = x # [N,C,100,352]
+
+        return data_dict
+
+
+    def get_multiscale_feature(self, spatial_features):
+        """
+        before multiscale intermediate fusion
+        """
+        feature_list = []
+        x = spatial_features
+        for i in range(len(self.blocks)):
+            x = self.blocks[i](x)
+            feature_list.append(x)
+
+        return feature_list
+
+    def decode_multiscale_feature(self, x):
+        """
+        after multiscale interemediate fusion
+        """
+        ups = []
+        for i in range(self.num_levels):
+            if len(self.deblocks) > 0:
+                ups.append(self.deblocks[i](x[i]))
+            else:
+                ups.append(x[i])
+        if len(ups) > 1:
+            x = torch.cat(ups, dim=1)
+        elif len(ups) == 1:
+            x = ups[0]
+
+        if len(self.deblocks) > self.num_levels:
+            x = self.deblocks[-1](x)
+        return x
+        
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_bev_backbone_resnet.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_bev_backbone_resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..69ada56db671c985eb53533fd2cd29c72ef7f5b3
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_bev_backbone_resnet.py
@@ -0,0 +1,145 @@
+"""
+Resblock is much strong than normal conv
+
+Provide api for multiscale intermeidate fuion
+"""
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from opencood.models.sub_modules.resblock import ResNetModified, BasicBlock
+
+DEBUG = False
+
+class ResNetBEVBackbone(nn.Module):
+    def __init__(self, model_cfg, input_channels=64):
+        super().__init__()
+        self.model_cfg = model_cfg
+
+        if 'layer_nums' in self.model_cfg:
+
+            assert len(self.model_cfg['layer_nums']) == \
+                   len(self.model_cfg['layer_strides']) == \
+                   len(self.model_cfg['num_filters'])
+
+            layer_nums = self.model_cfg['layer_nums']
+            layer_strides = self.model_cfg['layer_strides']
+            num_filters = self.model_cfg['num_filters']
+        else:
+            layer_nums = layer_strides = num_filters = []
+
+        if 'upsample_strides' in self.model_cfg:
+            assert len(self.model_cfg['upsample_strides']) \
+                   == len(self.model_cfg['num_upsample_filter'])
+
+            num_upsample_filters = self.model_cfg['num_upsample_filter']
+            upsample_strides = self.model_cfg['upsample_strides']
+
+        else:
+            upsample_strides = num_upsample_filters = []
+
+        self.resnet = ResNetModified(BasicBlock, 
+                                        layer_nums,
+                                        layer_strides,
+                                        num_filters,
+                                        inplanes = model_cfg.get('inplanes', 64))
+
+        num_levels = len(layer_nums)
+        self.num_levels = len(layer_nums)
+        self.deblocks = nn.ModuleList()
+
+        for idx in range(num_levels):
+            if len(upsample_strides) > 0:
+                stride = upsample_strides[idx]
+                if stride >= 1:
+                    self.deblocks.append(nn.Sequential(
+                        nn.ConvTranspose2d(
+                            num_filters[idx], num_upsample_filters[idx],
+                            upsample_strides[idx],
+                            stride=upsample_strides[idx], bias=False
+                        ),
+                        nn.BatchNorm2d(num_upsample_filters[idx],
+                                       eps=1e-3, momentum=0.01),
+                        nn.ReLU()
+                    ))
+                else:
+                    stride = np.round(1 / stride).astype(np.int)
+                    self.deblocks.append(nn.Sequential(
+                        nn.Conv2d(
+                            num_filters[idx], num_upsample_filters[idx],
+                            stride,
+                            stride=stride, bias=False
+                        ),
+                        nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3,
+                                       momentum=0.01),
+                        nn.ReLU()
+                    ))
+
+        c_in = sum(num_upsample_filters)
+        if len(upsample_strides) > num_levels:
+            self.deblocks.append(nn.Sequential(
+                nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1],
+                                   stride=upsample_strides[-1], bias=False),
+                nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01),
+                nn.ReLU(),
+            ))
+
+        self.num_bev_features = c_in
+
+    def forward(self, data_dict):
+        spatial_features = data_dict['spatial_features']
+
+        x = self.resnet(spatial_features)  # tuple of features
+        ups = []
+
+        for i in range(self.num_levels):
+            if len(self.deblocks) > 0:
+                ups.append(self.deblocks[i](x[i]))
+            else:
+                ups.append(x[i])
+
+        if len(ups) > 1:
+            x = torch.cat(ups, dim=1)
+        elif len(ups) == 1:
+            x = ups[0]
+
+        if len(self.deblocks) > self.num_levels:
+            x = self.deblocks[-1](x)
+
+        data_dict['spatial_features_2d'] = x
+        return data_dict
+
+    # these two functions are seperated for multiscale intermediate fusion
+    def get_multiscale_feature(self, spatial_features):
+        """
+        before multiscale intermediate fusion
+        """
+        x = self.resnet(spatial_features)  # tuple of features
+        return x
+
+    def decode_multiscale_feature(self, x):
+        """
+        after multiscale interemediate fusion
+        """
+        ups = []
+        for i in range(self.num_levels):
+            if len(self.deblocks) > 0:
+                ups.append(self.deblocks[i](x[i]))
+            else:
+                ups.append(x[i])
+        if len(ups) > 1:
+            x = torch.cat(ups, dim=1)
+        elif len(ups) == 1:
+            x = ups[0]
+
+        if len(self.deblocks) > self.num_levels:
+            x = self.deblocks[-1](x)
+        return x
+        
+    def get_layer_i_feature(self, spatial_features, layer_i):
+        """
+        before multiscale intermediate fusion
+        """
+        return eval(f"self.resnet.layer{layer_i}")(spatial_features)  # tuple of features
+    
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_transformer.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e05212a86deb085c000f4f674f403c8ed78e1c2
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/base_transformer.py
@@ -0,0 +1,124 @@
+import torch
+from torch import nn
+
+from einops import rearrange
+
+
+class PreNorm(nn.Module):
+    def __init__(self, dim, fn):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.fn = fn
+
+    def forward(self, x, **kwargs):
+        return self.fn(self.norm(x), **kwargs)
+
+
+class FeedForward(nn.Module):
+    def __init__(self, dim, hidden_dim, dropout=0.):
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Linear(dim, hidden_dim),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, dim),
+            nn.Dropout(dropout)
+        )
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class CavAttention(nn.Module):
+    """
+    Vanilla CAV attention.
+    """
+    def __init__(self, dim, heads, dim_head=64, dropout=0.1):
+        super().__init__()
+        inner_dim = heads * dim_head
+
+        self.heads = heads
+        self.scale = dim_head ** -0.5
+
+        self.attend = nn.Softmax(dim=-1)
+        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
+
+        self.to_out = nn.Sequential(
+            nn.Linear(inner_dim, dim),
+            nn.Dropout(dropout)
+        )
+
+    def forward(self, x, mask, prior_encoding):
+        # x: (B, L, H, W, C) -> (B, H, W, L, C)
+        # mask: (B, L)
+        x = x.permute(0, 2, 3, 1, 4)
+        # mask: (B, 1, H, W, L, 1)
+        mask = mask.unsqueeze(1)
+
+        # qkv: [(B, H, W, L, C_inner) *3]
+        qkv = self.to_qkv(x).chunk(3, dim=-1)
+        # q: (B, M, H, W, L, C)
+        q, k, v = map(lambda t: rearrange(t, 'b h w l (m c) -> b m h w l c',
+                                          m=self.heads), qkv)
+
+        # attention, (B, M, H, W, L, L)
+        att_map = torch.einsum('b m h w i c, b m h w j c -> b m h w i j',
+                               q, k) * self.scale
+        # add mask
+        att_map = att_map.masked_fill(mask == 0, -float('inf'))
+        # softmax
+        att_map = self.attend(att_map)
+
+        # out:(B, M, H, W, L, C_head)
+        out = torch.einsum('b m h w i j, b m h w j c -> b m h w i c', att_map,
+                           v)
+        out = rearrange(out, 'b m h w l c -> b h w l (m c)',
+                        m=self.heads)
+        out = self.to_out(out)
+        # (B L H W C)
+        out = out.permute(0, 3, 1, 2, 4)
+        return out
+
+
+class BaseEncoder(nn.Module):
+    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
+        super().__init__()
+        self.layers = nn.ModuleList([])
+        for _ in range(depth):
+            self.layers.append(nn.ModuleList([
+                PreNorm(dim, CavAttention(dim,
+                                          heads=heads,
+                                          dim_head=dim_head,
+                                          dropout=dropout)),
+                PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout))
+            ]))
+
+    def forward(self, x, mask):
+        for attn, ff in self.layers:
+            x = attn(x, mask=mask) + x
+            x = ff(x) + x
+        return x
+
+
+class BaseTransformer(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+
+        dim = args['dim']
+        depth = args['depth']
+        heads = args['heads']
+        dim_head = args['dim_head']
+        mlp_dim = args['mlp_dim']
+        dropout = args['dropout']
+        max_cav = args['max_cav']
+
+        self.encoder = BaseEncoder(dim, depth, heads, dim_head, mlp_dim,
+                                   dropout)
+
+    def forward(self, x, mask):
+        # B, L, H, W, C
+        output = self.encoder(x, mask)
+        # B, H, W, C
+        output = output[:, 0]
+
+        return 
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/bev_roi_head.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/bev_roi_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..713833d277b4ca4d2b731b24ca9296e695909c78
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/bev_roi_head.py
@@ -0,0 +1,230 @@
+import torch
+import torch.nn as nn
+from mmcv.ops import RoIAlignRotated
+from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import boxes_iou3d_gpu
+from opencood.utils import box_utils
+from opencood.utils import common_utils
+import numpy as np
+from icecream import ic
+
+class BEVRoIHead(nn.Module):
+    def __init__(self, model_cfg, pc_range):
+        super().__init__()
+        self.model_cfg = model_cfg
+        self.pc_range = pc_range
+        self.roi_align_size = 3
+        self.code_size = 7
+        self.enlarge_ratio = model_cfg.get("enlarge_ratio", 1)
+        self.roialign_rotated = RoIAlignRotated(output_size=self.roi_align_size, spatial_scale=1, clockwise=True)
+        
+        c_out = self.model_cfg['in_channels'] # 128
+        pre_channel = self.roi_align_size * self.roi_align_size * c_out # 3*3*128
+        fc_layers = [self.model_cfg['n_fc_neurons']] * 2
+        self.shared_fc_layers, pre_channel = self._make_fc_layers(pre_channel,
+                                                                  fc_layers)
+
+        self.cls_layers, pre_channel = self._make_fc_layers(pre_channel,
+                                                            fc_layers,
+                                                            output_channels=
+                                                            self.model_cfg[
+                                                                'num_cls'])
+        self.iou_layers, _ = self._make_fc_layers(pre_channel, fc_layers,
+                                                  output_channels=
+                                                  self.model_cfg['num_cls'])
+        self.reg_layers, _ = self._make_fc_layers(pre_channel, fc_layers,
+                                                  output_channels=
+                                                  self.model_cfg[
+                                                      'num_cls'] * 7)
+
+        self._init_weights(weight_init='xavier')
+
+    def _init_weights(self, weight_init='xavier'):
+        if weight_init == 'kaiming':
+            init_func = nn.init.kaiming_normal_
+        elif weight_init == 'xavier':
+            init_func = nn.init.xavier_normal_
+        elif weight_init == 'normal':
+            init_func = nn.init.normal_
+        else:
+            raise NotImplementedError
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
+                if weight_init == 'normal':
+                    init_func(m.weight, mean=0, std=0.001)
+                else:
+                    init_func(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+        nn.init.normal_(self.reg_layers[-1].weight, mean=0, std=0.001)
+
+    def _make_fc_layers(self, input_channels, fc_list, output_channels=None):
+        fc_layers = []
+        pre_channel = input_channels
+        for k in range(len(fc_list)):
+            fc_layers.extend([
+                nn.Conv1d(pre_channel, fc_list[k], kernel_size=1, bias=False),
+                # nn.BatchNorm1d(fc_list[k]),
+                nn.ReLU()
+            ])
+            pre_channel = fc_list[k]
+            if self.model_cfg['dp_ratio'] > 0:
+                fc_layers.append(nn.Dropout(self.model_cfg['dp_ratio']))
+        if output_channels is not None:
+            fc_layers.append(
+                nn.Conv1d(pre_channel, output_channels, kernel_size=1,
+                          bias=True))
+        fc_layers = nn.Sequential(*fc_layers)
+        return fc_layers, pre_channel
+
+    def forward(self, batch_dict):
+        batch_dict = self.assign_targets(batch_dict)
+
+        # put roi back to dense feature map for rotated roi align.
+        batch_size = batch_dict['batch_size_2stage']
+        # [[RoI_H0*RoI_W0, C], [RoI_H1*RoI_W1, C], ...]
+        feature_of_proposals_ego_list = batch_dict['feature_of_proposals_ego_list'] 
+        C = feature_of_proposals_ego_list[0].shape[1]
+        device = feature_of_proposals_ego_list[0].device
+        
+        H, W = batch_dict['feature_shape']
+        grid_size_H = (self.pc_range[4] - self.pc_range[1]) / H 
+        grid_size_W = (self.pc_range[3] - self.pc_range[0]) / W 
+
+        # dense feature map
+        feature_map = torch.zeros((batch_size, C, H, W), device=device)
+        roi_cnt = 0
+        for batch_idx, roi_fused in enumerate(batch_dict['roi_fused']): # per scene
+            for roi in roi_fused:
+                feature_map[batch_idx, :, roi[2]:roi[3], roi[0]:roi[1]] = \
+                    feature_of_proposals_ego_list[roi_cnt].permute(1,0).view(C, roi[3]-roi[2], roi[1]-roi[0]) 
+                roi_cnt += 1
+
+        # proposal to rotated roi input, 
+        # (batch_index, center_x, center_y, w, h, angle). The angle is in radian.
+        roi_input = torch.zeros((len(feature_of_proposals_ego_list), 6), device=device)
+        
+        box_cnt = 0
+        for batch_idx, box_fused in enumerate(batch_dict['boxes_fused']): # per scene
+            # box_fused is [n_boxes, 7], x, y, z, h, w, l, yaw -> (center_x, center_y, w, h)
+            roi_input[box_cnt:box_cnt+box_fused.shape[0], 0] = batch_idx
+            roi_input[box_cnt:box_cnt+box_fused.shape[0], 1] = (box_fused[:, 0] - self.pc_range[0]) / grid_size_W
+            roi_input[box_cnt:box_cnt+box_fused.shape[0], 2] = (box_fused[:, 1] - self.pc_range[1]) / grid_size_H
+            roi_input[box_cnt:box_cnt+box_fused.shape[0], 3] = box_fused[:, 5] / grid_size_W * self.enlarge_ratio  # box's l -> W
+            roi_input[box_cnt:box_cnt+box_fused.shape[0], 4] = box_fused[:, 4] / grid_size_H * self.enlarge_ratio # box's w -> H
+            roi_input[box_cnt:box_cnt+box_fused.shape[0], 5] = box_fused[:, 6] 
+            box_cnt += box_fused.shape[0]
+
+        # roi align
+        N_proposals = roi_input.shape[0]
+        # [sum(proposal), C, self.roi_align_size, self.roi_align_size]
+        pooled_feature = self.roialign_rotated(feature_map, roi_input) 
+        # [sum(proposal), self.roi_align_size * self.roi_align_size * C, 1]
+        pooled_feature = pooled_feature.flatten(start_dim=2).permute(0,2,1).flatten(start_dim=1).unsqueeze(-1)
+        shared_features = self.shared_fc_layers(pooled_feature)
+        
+        rcnn_cls = self.cls_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1)
+        rcnn_iou = self.iou_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1)
+        rcnn_reg = self.reg_layers(shared_features).transpose(1, 2).contiguous().squeeze(dim=1)
+
+        batch_dict['stage2_out'] = {
+            'rcnn_cls': rcnn_cls,
+            'rcnn_iou': rcnn_iou,
+            'rcnn_reg': rcnn_reg,
+        }
+
+        return batch_dict
+
+
+    def assign_targets(self, batch_dict):
+        batch_dict['rcnn_label_dict'] = {
+            'rois': [],
+            'gt_of_rois': [],
+            'gt_of_rois_src': [],
+            'cls_tgt': [],
+            'reg_tgt': [],
+            'iou_tgt': [],
+            'rois_anchor': [],
+            'record_len': [],
+            'rois_scores_stage1': []
+        }
+        pred_boxes = batch_dict['boxes_fused']
+        pred_scores = batch_dict['scores_fused']
+        gt_boxes = [b[m][:, [0, 1, 2, 5, 4, 3, 6]].float() for b, m in
+                    zip(batch_dict['object_bbx_center'],
+                        batch_dict['object_bbx_mask'].bool())]  # hwl -> lwh order
+        for rois, scores, gts in zip(pred_boxes, pred_scores,  gt_boxes): # each frame
+            rois = rois[:, [0, 1, 2, 5, 4, 3, 6]]  # hwl -> lwh
+            if gts.shape[0] == 0:
+                gts = rois.clone()
+
+            ious = boxes_iou3d_gpu(rois, gts)
+            max_ious, gt_inds = ious.max(dim=1)
+            gt_of_rois = gts[gt_inds]
+            rcnn_labels = (max_ious > 0.3).float()
+            mask = torch.logical_not(rcnn_labels.bool())
+
+            # set negative samples back to rois, no correction in stage2 for them
+            gt_of_rois[mask] = rois[mask]
+            gt_of_rois_src = gt_of_rois.clone().detach()
+
+            # canoical transformation
+            roi_center = rois[:, 0:3]
+            # TODO: roi_ry > 0 in pcdet
+            roi_ry = rois[:, 6] % (2 * np.pi)
+            gt_of_rois[:, 0:3] = gt_of_rois[:, 0:3] - roi_center
+            gt_of_rois[:, 6] = gt_of_rois[:, 6] - roi_ry
+
+            # transfer LiDAR coords to local coords
+            gt_of_rois = common_utils.rotate_points_along_z(
+                points=gt_of_rois.view(-1, 1, gt_of_rois.shape[-1]),
+                angle=-roi_ry.view(-1)
+            ).view(-1, gt_of_rois.shape[-1])
+
+            # flip orientation if rois have opposite orientation
+            heading_label = (gt_of_rois[:, 6] + (
+                    torch.div(torch.abs(gt_of_rois[:, 6].min()),
+                              (2 * np.pi), rounding_mode='trunc')
+                    + 1) * 2 * np.pi) % (2 * np.pi)  # 0 ~ 2pi
+            opposite_flag = (heading_label > np.pi * 0.5) & (
+                    heading_label < np.pi * 1.5)
+
+            # (0 ~ pi/2, 3pi/2 ~ 2pi)
+            heading_label[opposite_flag] = (heading_label[
+                                                opposite_flag] + np.pi) % (
+                                                   2 * np.pi)
+            flag = heading_label > np.pi
+            heading_label[flag] = heading_label[
+                                      flag] - np.pi * 2  # (-pi/2, pi/2)
+            heading_label = torch.clamp(heading_label, min=-np.pi / 2,
+                                        max=np.pi / 2)
+            gt_of_rois[:, 6] = heading_label
+
+            # generate regression target
+            rois_anchor = rois.clone().detach().view(-1, self.code_size)
+            rois_anchor[:, 0:3] = 0
+            rois_anchor[:, 6] = 0
+
+            reg_targets = box_utils.box_encode(
+                gt_of_rois.view(-1, self.code_size), rois_anchor
+            )
+
+            batch_dict['rcnn_label_dict']['rois'].append(rois)
+            batch_dict['rcnn_label_dict']['rois_scores_stage1'].append(scores)
+            batch_dict['rcnn_label_dict']['gt_of_rois'].append(gt_of_rois)
+            batch_dict['rcnn_label_dict']['gt_of_rois_src'].append(
+                gt_of_rois_src)
+            batch_dict['rcnn_label_dict']['cls_tgt'].append(rcnn_labels)
+            batch_dict['rcnn_label_dict']['reg_tgt'].append(reg_targets)
+            batch_dict['rcnn_label_dict']['iou_tgt'].append(max_ious)
+            batch_dict['rcnn_label_dict']['rois_anchor'].append(rois_anchor)
+            batch_dict['rcnn_label_dict']['record_len'].append(rois.shape[0])
+            
+
+        # cat list to tensor
+        for k, v in batch_dict['rcnn_label_dict'].items():
+            if k == 'record_len':
+                continue
+            batch_dict['rcnn_label_dict'][k] = torch.cat(v, dim=0)
+
+        return batch_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/bevformer.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/bevformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/box_align.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/box_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..d955d886c8281ba1882a36b72bde312b15f7757b
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/box_align.py
@@ -0,0 +1,808 @@
+"""
+This module is designed for box alignment
+It should be used for 1-round communication, maybe adapt to 2-round communication latter
+i.e, collaborative agent send the full feature map and noisy pose once together
+
+We will use g2o for pose graph optimization.
+"""
+
+
+from cv2 import threshold
+from opencood.models.sub_modules.pose_graph_optim import PoseGraphOptimization2D
+from opencood.utils.transformation_utils import pose_to_tfm
+from opencood.utils import box_utils
+from collections import OrderedDict
+import numpy as np
+import torch
+import torch.nn.functional as F
+import g2o
+from icecream import ic
+import os
+
+DEBUG = False
+
+def all_pair_l2(A, B):
+    """ All pair L2 distance for A and B
+    Args:
+        A : torch.Tensor
+            shape [N_A, D]
+        B : torch.Tensor
+            shape [N_B, D]
+    Returns:
+        C : torch.Tensor
+            shape [N_A, N_B]
+    """
+    TwoAB = 2*A@B.T
+    C = torch.sqrt(torch.sum(A * A, 1, keepdim=True).expand_as(TwoAB) \
+        + torch.sum(B * B, 1, keepdim=True).T.expand_as(TwoAB) \
+        - TwoAB)
+    return C
+
+def box_alignment_relative_sample(
+            pred_corners_list,
+            noisy_lidar_pose, 
+            clean_lidar_pose=None, 
+            uncertainty_list=None, 
+            order='hwl', 
+            landmark_SE2=True,
+            adaptive_landmark=False):
+    """ Perform box alignment for one sample. 
+    Correcting the relative pose.
+
+    Args:
+        pred_corners_list: in each ego coordinate
+            [[N_1, 8, 3], ..., [N_cav1, 8, 3]]
+
+        clean_lidar_poses:
+            [N_cav1, 6], in degree
+        
+        noisy_lidar_poses:
+            [N_cav1, 6], in degree
+
+        uncertainty_list:
+            [[N_1, 3], [N_2, 3], ..., [N_cav1, 3]]
+
+        landmark_SE2:
+            if True, the landmark is SE(2), otherwise R^2
+        
+        adaptive_landmark: (when landmark_SE2 = True)
+            if True, landmark will turn to R^2 if yaw angles differ a lot
+
+    Returns: 
+        refined_lidar_poses: np.ndarray
+            [N_cav1, 3], 
+    """
+
+    ## first transform point from ego coordinate to world coordinate, using lidar_pose.
+    N = noisy_lidar_pose.shape[0]
+    device = pred_corners_list[0].device
+    lidar_pose_noisy_tfm = pose_to_tfm(noisy_lidar_pose, dof=6)
+
+    pred_corners_world_list = \
+        [box_utils.project_box3d(pred_corners_list[i], lidar_pose_noisy_tfm[i]) for i in range(N)]  # [[N1, 8, 3], [N2, 8, 3],...]
+    pred_box3d_list = \
+        [box_utils.corner_to_center_torch(corner, order).to(device) for corner in pred_corners_list]   # [[N1, 7], [N2, 7], ...], angle in radius
+    pred_box3d_world_list = \
+        [box_utils.corner_to_center_torch(corner, order).to(device) for corner in pred_corners_world_list]   # [[N1, 7], [N2, 7], ...], angle in radius
+
+    pred_center_list = \
+        [torch.mean(corner_tensor, dim=[1]) for corner_tensor in pred_corners_list] # [[N1,3], [N2,3], ...]
+
+    pred_center_world_list = \
+        [pred_box3d_world[:,:3] for pred_box3d_world in pred_box3d_world_list]
+
+    pred_yaw_world_list = \
+        [pred_box3d[:, 6] for pred_box3d in pred_box3d_world_list]
+
+    pred_len = \
+        [pred_center.shape[0] for pred_center in pred_center_list] 
+
+    
+
+
+    box_idx_to_agent = []
+    for i in range(N):
+        box_idx_to_agent += [i] * pred_len[i] 
+    
+
+    pred_center_cat = torch.cat(pred_center_list, dim=0)   # [sum(pred_box), 3]
+    pred_center_world_cat = torch.cat(pred_center_world_list, dim=0)  # [sum(pred_box), 3]
+    pred_box3d_cat = torch.cat(pred_box3d_list, dim=0)  # [sum(pred_box), 7]
+    pred_yaw_world_cat = torch.cat(pred_yaw_world_list)  # [sum(pred_box)]
+
+
+    w_a = 1.6 # width of anchor
+    l_a = 3.9 # length of anchor
+    d_a_square = w_a ** 2 + l_a ** 2 # anchor's diag
+
+
+    if uncertainty_list is not None:
+        pred_log_sigma2_cat = torch.cat(uncertainty_list)
+        pred_certainty_cat = torch.exp(-pred_log_sigma2_cat)
+        pred_certainty_cat[:,:2] /= d_a_square # sigma_delta_x -> sigma_x. 
+
+
+    pred_center_world_cat_cpu = pred_center_world_cat.cpu() # if use gpu, it will get nan.
+    pred_center_allpair_dist = all_pair_l2(pred_center_world_cat_cpu, pred_center_world_cat_cpu) # [sum(pred_box), sum(pred_box)]
+
+
+    # let pair from one vehicle be max distance
+    MAX_DIST = 10000
+    cum = 0
+    for i in range(N):
+        pred_center_allpair_dist[cum: cum + pred_len[i], cum: cum +pred_len[i]] = MAX_DIST
+        cum += pred_len[i]
+
+
+    cluster_id = N # let the vertex id of object start from N
+    cluster_dict = OrderedDict()
+    remain_box = set(range(cum))
+    thres = 0.75  # l2 distance within the threshold, can be considered as one object.
+    for box_idx in range(cum): 
+
+        if box_idx not in remain_box:  # already assigned
+            continue
+        within_thres_idx_tensor = (pred_center_allpair_dist[box_idx] < thres).nonzero().flatten()
+        within_thres_idx_list = within_thres_idx_tensor.cpu().numpy().tolist()
+
+        if len(within_thres_idx_list) == 0:  # if it's a single box
+            continue
+
+        # start from within_thres_idx_list, find new box added to the cluster
+        explored = [box_idx]
+        unexplored = [idx for idx in within_thres_idx_list if idx in remain_box]
+
+        while unexplored:
+            idx = unexplored[0]
+            within_thres_idx_tensor = (pred_center_allpair_dist[idx] < thres).nonzero().flatten()
+            within_thres_idx_list = within_thres_idx_tensor.cpu().numpy().tolist()
+            for newidx in within_thres_idx_list:
+                if (newidx not in explored) and (newidx not in unexplored) and (newidx in remain_box):
+                    unexplored.append(newidx)
+            unexplored.remove(idx)
+            explored.append(idx)
+        
+        if len(explored) == 1: # it's a single box, neighbors have been assigned
+            remain_box.remove(box_idx)
+            continue
+        
+        cluster_box_idxs = explored
+
+        cluster_dict[cluster_id] = OrderedDict()
+        cluster_dict[cluster_id]['box_idx'] = [idx for idx in cluster_box_idxs]
+        cluster_dict[cluster_id]['box_dist'] = [pred_center_cat[idx].norm() for idx in cluster_box_idxs]  # distance to observer 
+        cluster_dict[cluster_id]['box_center_world'] = [pred_center_world_cat[idx] for idx in cluster_box_idxs]  # coordinate in world, [3,]
+        cluster_dict[cluster_id]['box_yaw'] = [pred_yaw_world_cat[idx] for idx in cluster_box_idxs]
+
+        yaw_var = torch.var(torch.as_tensor(cluster_dict[cluster_id]['box_yaw']), unbiased=False)
+        
+        if landmark_SE2:
+            if adaptive_landmark and yaw_var > 0.2:
+                landmark = pred_center_world_cat[box_idx].clone()[:2]
+            else:
+                landmark = pred_center_world_cat[box_idx].clone()
+                landmark[2] = pred_yaw_world_cat[box_idx]
+        else:
+            landmark = pred_center_world_cat[box_idx].clone()[:2]
+
+
+        cluster_dict[cluster_id]['landmark'] = landmark.cpu().numpy()  # [x, y, yaw] or [x, y]
+        cluster_dict[cluster_id]['landmark_SE2'] = True if landmark.shape[0] == 3 else False
+
+        DEBUG = False
+        if DEBUG:
+            from icecream import ic
+            ic(cluster_dict[cluster_id]['box_idx'])
+            ic(cluster_dict[cluster_id]['box_center_world'])
+            ic(cluster_dict[cluster_id]['box_yaw'])
+            ic(cluster_dict[cluster_id]['landmark'])
+        
+
+        cluster_id += 1
+        for idx in cluster_box_idxs:
+            remain_box.remove(idx)
+
+    vertex_num = cluster_id
+    agent_num = N
+    landmark_num = cluster_id - N
+    # ic(agent_num)
+    # ic(landmark_num)
+
+    """
+        Now we have clusters for objects. we can create pose graph.
+        First we consider center as landmark.
+        Maybe set corner as landmarks in the future.
+    """
+    pgo = PoseGraphOptimization2D(verbose=False)
+
+    # Add agent to vertexs
+    for agent_id in range(N):
+        v_id = agent_id
+        # notice lidar_pose use degree format, translate it to radians.
+        pose_np = noisy_lidar_pose[agent_id, [0,1,4]].cpu().numpy()
+        pose_np[2] = np.deg2rad(pose_np[2])  # radians
+        v_pose = g2o.SE2(pose_np)
+        
+        if agent_id == 0:
+            pgo.add_vertex(id=v_id, pose=v_pose, fixed=True)
+        else:
+            pgo.add_vertex(id=v_id, pose=v_pose, fixed=False)
+
+    # Add landmark to vertexs
+    for landmark_id in range(N, cluster_id):
+        v_id = landmark_id
+        landmark = cluster_dict[landmark_id]['landmark'] # (3,) or (2,)
+        landmark_SE2 = cluster_dict[landmark_id]['landmark_SE2']
+
+        if landmark_SE2:
+            v_pose = g2o.SE2(landmark)
+        else:
+            v_pose = landmark
+
+        pgo.add_vertex(id=v_id, pose=v_pose, fixed=False, SE2=landmark_SE2)
+
+    # Add agent-landmark edge to edge
+    for landmark_id in range(N, cluster_id):
+        landmark_SE2 = cluster_dict[landmark_id]['landmark_SE2']
+
+        for box_idx in cluster_dict[landmark_id]['box_idx']:
+            agent_id = box_idx_to_agent[box_idx]
+            if landmark_SE2:
+                e_pose = g2o.SE2(pred_box3d_cat[box_idx][[0,1,6]].cpu().numpy().astype(np.float64))
+                info = np.identity(3, dtype=np.float64)
+                if uncertainty_list is not None:
+                    info[[0,1,2],[0,1,2]] = pred_certainty_cat[box_idx].cpu().numpy()
+            else:
+                e_pose = pred_box3d_cat[box_idx][[0,1]].cpu().numpy().astype(np.float64)
+                info = np.identity(2, dtype=np.float64)
+                if uncertainty_list is not None:
+                    info[[0,1],[0,1]] = pred_certainty_cat[box_idx][:2].cpu().numpy()
+
+
+            pgo.add_edge(vertices=[agent_id, landmark_id], measurement=e_pose, information=info, SE2=landmark_SE2)
+    
+    pgo.optimize()
+
+    pose_new_list = []
+    for agent_id in range(N):
+        # print(pgo.get_pose(agent_id).vector())
+        pose_new_list.append(pgo.get_pose(agent_id).vector())
+
+    refined_pose = np.array(pose_new_list)
+    refined_pose[:,2] = np.rad2deg(refined_pose[:,2])  # rad -> degree, same as source
+
+    return refined_pose
+
+def box_alignment_sample(pred_corners_list, lidar_poses_for_tfm, noisy_lidar_poses, uncertainty_list=None, order='hwl'):
+    """ Perform box alignment for one sample.
+    Args:
+        pred_corners_list: in each ego coordinate
+            [[N_1, 8, 3], ..., [N_cav1, 8, 3]]
+
+        lidar_poses:
+            [N_cav1, 6] , in degree
+
+        scores_list:
+            [[N_1, 3], [N_2, 3], ..., [N_cav1, 3]]
+
+    Returns: 
+        refined_lidar_poses: np.ndarray
+            [N_cav1, 3], 
+    """
+    
+    ## first transform point from ego coordinate to world coordinate, using lidar_pose.
+    lidar_poses = lidar_poses_for_tfm
+    N = lidar_poses.shape[0]
+    device = pred_corners_list[0].device
+    lidar_pose_tfm = pose_to_tfm(lidar_poses, dof=6)  # Tw_c
+
+
+
+    pred_corners_world_list = \
+        [box_utils.project_box3d(pred_corners_list[i], lidar_pose_tfm[i]) for i in range(N)]  # [[N1, 8, 3], [N2, 8, 3],...]
+    pred_box3d_list = \
+        [box_utils.corner_to_center_torch(corner, order).to(device) for corner in pred_corners_list]   # [[N1, 7], [N2, 7], ...], angle in radius
+    pred_box3d_world_list = \
+        [box_utils.corner_to_center_torch(corner, order).to(device) for corner in pred_corners_world_list]   # [[N1, 7], [N2, 7], ...], angle in radius
+
+    pred_center_list = \
+        [torch.mean(corner_tensor, dim=[1]) for corner_tensor in pred_corners_list] # [[N1,3], [N2,3], ...]
+
+    pred_center_world_list = \
+        [pred_box3d_world[:,:3] for pred_box3d_world in pred_box3d_world_list]
+
+    pred_yaw_world_list = \
+        [pred_box3d[:, 6] for pred_box3d in pred_box3d_world_list]
+
+    pred_len = \
+        [pred_center.shape[0] for pred_center in pred_center_list] 
+
+
+    box_idx_to_agent = []
+    for i in range(N):
+        box_idx_to_agent += [i] * pred_len[i] 
+
+    if DEBUG:
+        vis_corners_list(pred_corners_world_list,filename="/GPFS/rhome/yifanlu/OpenCOOD/box_align_items/gt_box_noisy_pose.png")
+    
+
+    pred_center_cat = torch.cat(pred_center_list, dim=0)   # [sum(pred_box), 3]
+    pred_center_world_cat = torch.cat(pred_center_world_list, dim=0)  # [sum(pred_box), 3]
+    pred_box3d_cat = torch.cat(pred_box3d_list, dim=0)  # [sum(pred_box), 7]
+    pred_yaw_world_cat = torch.cat(pred_yaw_world_list)  # [sum(pred_box)]
+
+    pred_center_world_cat_cpu = pred_center_world_cat.cpu() # if use gpu, it will get nan.
+    pred_center_allpair_dist = all_pair_l2(pred_center_world_cat_cpu, pred_center_world_cat_cpu) # [sum(pred_box), sum(pred_box)]
+
+
+    # let pair from one vehicle be max distance
+    MAX_DIST = 10000
+    cum = 0
+    for i in range(N):
+        pred_center_allpair_dist[cum: cum + pred_len[i], cum: cum +pred_len[i]] = MAX_DIST
+        cum += pred_len[i]
+
+
+    cluster_id = N # let the vertex id of object start from N
+    cluster_dict = OrderedDict()
+    remain_box = set(range(cum))
+    thres = 1  # l2 distance within the threshold, can be considered as one object.
+    for box_idx in range(cum):
+        if box_idx not in remain_box:  # already assigned
+            continue
+        within_thres_idx_tensor = (pred_center_allpair_dist[box_idx] < thres).nonzero().flatten()
+        within_thres_idx_list = within_thres_idx_tensor.cpu().numpy().tolist()
+
+        if len(within_thres_idx_list) == 0:  # if it's a single box
+            continue
+
+        # start from within_thres_idx_list, find new box added to the cluster
+        explored = [box_idx]
+        unexplored = [idx for idx in within_thres_idx_list if idx in remain_box]
+
+        while unexplored:
+            idx = unexplored[0]
+            within_thres_idx_tensor = (pred_center_allpair_dist[idx] < thres).nonzero().flatten()
+            within_thres_idx_list = within_thres_idx_tensor.cpu().numpy().tolist()
+            for newidx in within_thres_idx_list:
+                if (newidx not in explored) and (newidx not in unexplored) and (newidx in remain_box):
+                    unexplored.append(newidx)
+            unexplored.remove(idx)
+            explored.append(idx)
+        
+        if len(explored) == 1: # it's a single box, neighbors have been assigned
+            remain_box.remove(box_idx)
+            continue
+        
+        cluster_box_idxs = explored
+
+        cluster_dict[cluster_id] = OrderedDict()
+        cluster_dict[cluster_id]['box_idx'] = [idx for idx in cluster_box_idxs]
+        cluster_dict[cluster_id]['box_dist'] = [pred_center_cat[idx].norm() for idx in cluster_box_idxs]  # distance to observer 
+        cluster_dict[cluster_id]['box_center_world'] = [pred_center_world_cat[idx] for idx in cluster_box_idxs]  # coordinate in world, [3,]
+        cluster_dict[cluster_id]['box_yaw'] = [pred_yaw_world_cat[idx] for idx in cluster_box_idxs]
+
+
+        box_dist = torch.as_tensor(cluster_dict[cluster_id]['box_dist']).to(device)
+        box_weight = F.normalize(1/box_dist, p=1, dim=0) # [n]
+        centers = torch.stack(cluster_dict[cluster_id]['box_center_world'], dim=0) # [n, 3]
+        yaws = torch.stack(cluster_dict[cluster_id]['box_yaw'])  # [n]
+
+        weighted_center = torch.sum(box_weight.unsqueeze(-1) * centers, dim=0) # [3,]
+        weighted_yaw = torch.sum(box_weight * yaws) # [1,]
+
+        weighted_center[2] = weighted_yaw  # just replace z to yaw
+
+        cluster_dict[cluster_id]['se2'] = weighted_center  # [x, y, yaw]
+
+        # DEBUG = True
+        if DEBUG:
+            from icecream import ic
+            ic(cluster_dict[cluster_id]['box_idx'])
+            ic(centers)
+            ic(yaws)
+            ic(box_weight)
+            ic(cluster_dict[cluster_id]['se2'])
+
+        cluster_dict[cluster_id].pop('box_dist')
+        cluster_dict[cluster_id].pop('box_center_world')
+        cluster_dict[cluster_id].pop('box_yaw')
+
+        cluster_id += 1
+        for idx in cluster_box_idxs:
+            remain_box.remove(idx)
+
+    vertex_num = cluster_id
+    agent_num = N
+    landmark_num = cluster_id - N
+    # ic(agent_num)
+    # ic(landmark_num)
+
+    """
+        Now we have clusters for objects. we can create pose graph.
+        First we consider center as landmark.
+        Maybe set corner as landmarks in the future.
+    """
+    pgo = PoseGraphOptimization2D(verbose=False)
+    if DEBUG:
+        pgo = PoseGraphOptimization2D(verbose=True)
+    # Add agent to vertexs
+    for agent_id in range(N):
+        v_id = agent_id
+        # notice lidar_pose use degree format, translate it to radius.
+        # pose_np = lidar_poses[agent_id, [0,1,4]].cpu().numpy()
+        pose_np = noisy_lidar_poses[agent_id, [0,1,4]].cpu().numpy()
+        pose_np[2] = np.deg2rad(pose_np[2])  # radius
+        v_pose = g2o.SE2(pose_np)
+        # if agent_id == 0 and DEBUG:
+        #     pgo.add_vertex(id=v_id, pose=v_pose, fixed=True)
+        # else:
+        pgo.add_vertex(id=v_id, pose=v_pose, fixed=False)
+
+    # Add landmark to vertexs
+    for landmark_id in range(N, cluster_id):
+        v_id = landmark_id
+        v_pose = g2o.SE2(cluster_dict[landmark_id]['se2'].cpu().numpy())
+        pgo.add_vertex(id=v_id, pose=v_pose, fixed=False)
+
+    # Add agent-landmark edge to edge
+    for landmark_id in range(N, cluster_id):
+        for box_idx in cluster_dict[landmark_id]['box_idx']:
+            agent_id = box_idx_to_agent[box_idx]
+            e_pose = g2o.SE2(pred_box3d_cat[box_idx][[0,1,6]].cpu().numpy())
+            pgo.add_edge(vertices=[agent_id, landmark_id], measurement=e_pose, information=np.identity(3))
+    
+    pgo.optimize()
+
+    pose_new_list = []
+    for agent_id in range(N):
+        # print(pgo.get_pose(agent_id).vector())
+        pose_new_list.append(pgo.get_pose(agent_id).vector())
+
+    refined_pose = np.array(pose_new_list)
+    refined_pose[:,2] = np.rad2deg(refined_pose[:,2])  # rad -> degree, same as source
+
+    return refined_pose
+
+def box_alignment(pred_corner3d_list, uncertainty_list, lidar_poses, record_len, proj_first=False):
+    """
+    Args:
+        pred_corner3d_list: list of tensors, with shape [[N1_object, 8, 3], [N2_object, 8, 3], ...,[N_sumcav_object, 8, 3]]
+            box in each agent's coordinate. (proj_first=False)
+        
+        pred_box3d_list: not necessary
+            list of tensors, with shape [[N1_object, 7], [N2_object, 7], ...,[N_sumcav_object, 7]]
+
+        scores_list: list of tensor, [[N1_object,], [N2_object,], ...,[N_sumcav_object,]]
+            box confidence score.
+
+        lidar_poses: torch.Tensor [sum(cav), 6]
+
+        record_len: torch.Tensor
+    Returns:
+        refined_lidar_pose: torch.Tensor [sum(cav), 6]
+    """
+    refined_lidar_pose = []
+    start_idx = 0
+    for b in record_len:
+        refined_lidar_pose.append(
+            torch.from_numpy(
+                box_alignment_relative_sample(
+                    pred_corner3d_list[start_idx: start_idx + b],
+                    lidar_poses[start_idx: start_idx + b],
+                    clean_lidar_pose=None,
+                    uncertainty_list= None if uncertainty_list is None else uncertainty_list[start_idx: start_idx + b]
+                )
+            )
+        )
+        start_idx += b
+
+    return torch.cat(refined_lidar_pose, dim=0)
+
+def vis_corners_list(corner3d_list, filename="/GPFS/rhome/yifanlu/OpenCOOD/opencood/corners.png"):
+    """
+    Args:
+        corner3d: list of  torch.Tensor, shape [N, 8, 3]
+
+    """
+    COLOR = ['red','springgreen','dodgerblue', 'darkviolet']
+    box_idx = 0
+
+    for idx in range(len(corner3d_list)):
+        corner3d = corner3d_list[idx]
+        if torch.is_tensor(corner3d):
+            corner3d = corner3d.cpu().numpy()
+
+        corner2d = corner3d[:,:4,:2]
+        import matplotlib.pyplot as plt
+        for i in range(corner2d.shape[0]):
+            plt.scatter(corner2d[i,[0,1],0], corner2d[i,[0,1], 1], s=2, c=COLOR[idx])
+            plt.plot(corner2d[i,[0,1,2,3,0],0], corner2d[i,[0,1,2,3,0], 1], linewidth=1, c=COLOR[idx])
+            plt.text(corner2d[i,0,0], corner2d[i,0,1], s=str(box_idx), fontsize="xx-small")
+            box_idx += 1
+    plt.gca().invert_yaxis()
+    plt.axis('equal')
+    plt.savefig(filename, dpi=400)
+    plt.clf()
+
+def vis_corners(corner3d, filename="/GPFS/rhome/yifanlu/OpenCOOD/opencood/corners.png"):
+    """
+    Args:
+        corner3d: torch.Tensor, shape [N, 8, 3]
+
+        box3d: torch.Tensor shape [N, 7]
+    """
+    if torch.is_tensor(corner3d):
+        corner3d = corner3d.cpu().numpy()
+
+
+    corner2d = corner3d[:,:4,:2]
+    import matplotlib.pyplot as plt
+    for i in range(corner2d.shape[0]):
+        plt.scatter(corner2d[i,[0,1],0], corner2d[i,[0,1], 1], s=2)
+        plt.plot(corner2d[i,[0,1,2,3,0],0], corner2d[i,[0,1,2,3,0], 1])
+        # plt.text(corner2d[i,0,0], corner2d[i,0,1], s=f"{box3d[i,0]:.2f},{box3d[i,1]:.2f},{box3d[i,6]:.2f}", fontsize='xx-small')
+    plt.axis('equal')
+    plt.savefig(filename, dpi=300)
+    plt.clf()
+
+def vis_pose(lidar_poses):
+    """
+    Args:
+        lidar_poses: torch.Tensor shape [N_, 6], x,y,z, roll, yaw, pitch
+    """
+    h = 1.56
+    l = 3.9
+    w = 1.6
+    if torch.is_tensor(lidar_poses):
+        lidar_poses = lidar_poses.cpu().numpy()
+
+    box3d = np.zeros((lidar_poses.shape[0], 7))
+    box3d[:,0] = lidar_poses[:,0]
+    box3d[:,1] = lidar_poses[:,1]
+    box3d[:,3] = h # hwl order
+    box3d[:,4] = w
+    box3d[:,5] = l
+    box3d[:,6] = np.deg2rad(lidar_poses[:,4])  # degree -> radius
+
+    corner3d = box_utils.boxes_to_corners_3d(box3d, order='hwl')
+    vis_corners(corner3d, box3d, "/GPFS/rhome/yifanlu/OpenCOOD/opencood/pose_corners.png")
+
+def test_pred_gt_box():
+    gt_corners_list = torch.load("/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/previous_items/gt_box_list.pt")
+    data = torch.load("/GPFS/rhome/yifanlu/OpenCOOD/box_align_items/stage1_output_02/0.pt")
+    pred_corner3d_list, pred_box3d_list, scores_list, record_len, lidar_pose, lidar_pose_clean = data
+
+    lidar_pose_tfm = pose_to_tfm(lidar_pose, dof=6)
+    lidar_pose_clean_tfm = pose_to_tfm(lidar_pose_clean, dof=6)  # Tw_c
+    N = lidar_pose.shape[0]
+
+    pred_corners_world_list = \
+        [box_utils.project_box3d(pred_corner3d_list[i], lidar_pose_tfm[i]) for i in range(N)]  # [[N1, 8, 3], [N2, 8, 3],...]
+
+    gt_corners_world_list = \
+        [box_utils.project_box3d(gt_corners_list[i], lidar_pose_clean_tfm[i]) for i in range(N)]
+
+    vis_corners_list([torch.cat(pred_corners_world_list, dim=0), torch.cat(gt_corners_world_list, dim=0)], filename="/GPFS/rhome/yifanlu/OpenCOOD/box_align_items/gt_box_pred_box.png")
+
+    
+
+
+def test_gt_boxes_world():
+    data = torch.load("/GPFS/rhome/yifanlu/OpenCOOD/box_align_items/stage1_output_02/0.pt")
+    pred_corner3d_list, pred_box3d_list, scores_list, record_len, lidar_pose, lidar_pose_clean = data
+    
+    gt_poses_tensor = lidar_pose_clean
+    noisy_poses_tensor = lidar_pose
+
+    gt_corners_list = torch.load("/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/previous_items/gt_box_list.pt")
+    
+    # refined_poses = box_alignment_sample(gt_corners_list, noisy_poses_tensor, noisy_poses_tensor)
+    refined_poses = box_alignment_relative_sample(pred_corner3d_list, noisy_poses_tensor, gt_poses_tensor)
+    print("before:\n", noisy_poses_tensor.cpu().numpy()[:,[0,1,4]])
+    
+    print("after:\n", refined_poses)
+
+    print("gt:\n", gt_poses_tensor.cpu().numpy()[:,[0,1,4]])
+
+    # gt_corners_world_list = \
+    #     [box_utils.project_box3d(gt_corners_list[i], lidar_pose_tfm[i]) for i in range(3)]  # [[N1, 8, 3], [N2, 8, 3],...]
+
+    # vis_corners_list(gt_corners_world_list, filename="/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/gt_corners.png")
+
+
+def test_box_align_tmp():
+    """
+        This func input different noise_std pose (load from stored files).
+        And run pose graph optimization, compare the localization error w/wo uncertainty/landmark SE2, etc.
+    """
+    noise_stds = ['02','04','06']
+    items = ["16"]
+    torch.set_printoptions(precision=3, sci_mode=False)
+    np.set_printoptions(precision=3, suppress=True)
+    for item in items:
+        for noise_std in noise_stds:
+            file_dir = f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/stage1_output_{noise_std}_w_uncertainty/{item}.pt"
+            data = torch.load(file_dir)
+            pred_corner3d_list, pred_box3d_list, uncertainty_list, record_len, lidar_pose, lidar_pose_clean = data
+            lidar_pose[0] = lidar_pose_clean[0]
+            refined_pose_SE2 = box_alignment_relative_sample(pred_corner3d_list, lidar_pose_clean, lidar_pose, uncertainty_list=uncertainty_list, landmark_SE2=True)
+            refined_pose = box_alignment_relative_sample(pred_corner3d_list, lidar_pose_clean, lidar_pose, uncertainty_list=uncertainty_list, landmark_SE2=False)
+            # refined_pose = box_alignment_sample(pred_corner3d_list, lidar_pose, lidar_pose)
+            lidar_pose_clean = lidar_pose_clean[:,[0,1,4]].cpu().numpy()
+            print(f"noise std: {noise_std}: SE2")
+            print(np.abs(refined_pose_SE2 - lidar_pose_clean))
+            # print(f"PointXY")
+            # print(np.abs(refined_pose - lidar_pose_clean))
+            print(f"original error:")
+            lidar_pose = lidar_pose[:,[0,1,4]].cpu().numpy()
+            print(np.abs(lidar_pose - lidar_pose_clean))
+            # print(refined_pose_w_u)
+            # print(lidar_pose_clean) 
+
+
+def test_box_align(noise_std="04", relative=True, use_uncertainty=False):
+    from glob import glob
+    data_dir = f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/stage1_output_{noise_std}_w_uncertainty/*"
+    trans_error_before_list = []
+    rotate_error_before_list = []
+    trans_error_after_list = []
+    rotate_error_after_list = []
+
+    full_files = glob(data_dir)
+    for filename in full_files:
+        data = torch.load(filename)
+        
+        if relative is False:
+            pred_corner3d_list, pred_box3d_list, scores_list, record_len, lidar_pose, lidar_pose_clean = data
+            refined_pose = box_alignment_sample(pred_corner3d_list, None, lidar_pose)
+        elif relative is True:
+            pred_corner3d_list, pred_box3d_list, uncertainty_list, record_len, lidar_pose, lidar_pose_clean = data
+            lidar_pose[0] = lidar_pose_clean[0]
+            # if not use_uncertainty:
+            #     uncertainty_list = None
+            refined_pose = box_alignment_relative_sample(pred_corner3d_list, lidar_pose_clean, lidar_pose, uncertainty_list=uncertainty_list)
+            uncertainty_list = None
+            refined_pose_wo_uncertainty = box_alignment_relative_sample(pred_corner3d_list, lidar_pose_clean, lidar_pose, uncertainty_list=uncertainty_list)
+
+        lidar_pose = lidar_pose.cpu().numpy()[:,[0,1,4]]
+        lidar_pose_clean = lidar_pose_clean.cpu().numpy()[:,[0,1,4]]
+        np.set_printoptions(suppress=True, precision=4)
+        print(lidar_pose[1:])
+        print(refined_pose_wo_uncertainty[1:])
+        print(refined_pose[1:])
+        print(lidar_pose_clean[1:])
+        print()
+
+        error_before = np.abs(lidar_pose - lidar_pose_clean)
+        error_after = np.abs(refined_pose - lidar_pose_clean)
+
+        trans_error_before_list.append(np.mean(error_before[:,[0,1]]))
+        rotate_error_before_list.append(np.mean(error_before[:,2]))
+
+        trans_error_after_list.append(np.mean(error_after[:,[0,1]]))
+        rotate_error_after_list.append(np.mean(error_after[:,2]))
+
+    raise
+
+
+    out_quantile_dict = {0.8:None, 0.5:None, 0.3:None}
+    for q in out_quantile_dict.keys():
+        out_quantile_dict[q] = (np.quantile(trans_error_before_list, q), 
+                                    np.quantile(trans_error_after_list, q), 
+                                    np.quantile(rotate_error_before_list, q),
+                                    np.quantile(rotate_error_after_list, q))
+
+    return out_quantile_dict
+    # return np.mean(trans_error_before_list), np.mean(rotate_error_before_list), np.mean(trans_error_after_list), np.mean(rotate_error_after_list)
+
+def main1():
+    """
+    This function test the box alignment performance on the subset of training set.
+    """
+    for noise in ['02', '04', '06']:
+        out = test_box_align(noise, relative=True, use_uncertainty=True)
+        for k,v in out.items():
+            with open(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/rel_quantile{k*100}_{noise}_w_u.txt", "w") as f:
+                f.write(f"trans error before: \t {v[0]}\n")
+                f.write(f"trans error after:  \t {v[1]}\n\n")
+
+                f.write(f"rotate error before: \t {v[2]}\n")
+                f.write(f"rotate error after: \t {v[3]}\n")
+
+
+        out = test_box_align(noise, relative=True, use_uncertainty=False)
+        for k,v in out.items():
+            with open(f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/rel_quantile{k*100}_{noise}_wo_u.txt", "w") as f:
+                f.write(f"trans error before: \t {v[0]}\n")
+                f.write(f"trans error after:  \t {v[1]}\n\n")
+
+                f.write(f"rotate error before: \t {v[2]}\n")
+                f.write(f"rotate error after: \t {v[3]}\n")
+
+
+def vis_pose_graph(
+            poses,
+            pred_corner3d, 
+            save_dir_path="/GPFS/rhome/yifanlu/OpenCOOD/box_align_items/pose_graph_vis",
+            ):
+    """
+    Args:
+        poses: list of np.ndarray
+            each item is a pose . [pose_before, ..., pose_refined]
+
+        pred_corner3d: list
+            predicted box for each agent.
+
+    """
+    COLOR = ['red','springgreen','dodgerblue', 'darkviolet', 'orange']
+    from opencood.utils.transformation_utils import get_relative_transformation
+
+    if not os.path.exists(save_dir_path):
+        os.mkdir(save_dir_path)
+
+    for iter, pose in enumerate(poses):
+        box_idx = 0
+        # we first transform other agents' box to ego agent's coordinate
+        relative_t_matrix = get_relative_transformation(pose)
+        N = pose.shape[0]
+
+        pred_corners3d_in_ego = [box_utils.project_box3d(pred_corner3d[i].cpu().numpy(), relative_t_matrix[i]) for i in range(N)]
+
+        for agent_id in range(len(pred_corners3d_in_ego)):
+            corner3d = pred_corners3d_in_ego[agent_id]
+            agent_pos = relative_t_matrix[agent_id][:2,3] # agent's position in ego's coordinate
+            if torch.is_tensor(corner3d):
+                corner3d = corner3d.cpu().numpy()
+
+            corner2d = corner3d[:,:4,:2]
+            center2d = np.mean(corner2d, axis=1)
+            import matplotlib.pyplot as plt
+            for i in range(corner2d.shape[0]):
+                plt.scatter(corner2d[i,[0,1],0], corner2d[i,[0,1], 1], s=2, c=COLOR[agent_id])
+                plt.plot(corner2d[i,[0,1,2,3,0],0], corner2d[i,[0,1,2,3,0], 1], linewidth=1, c=COLOR[agent_id])
+                plt.text(corner2d[i,0,0], corner2d[i,0,1], s=str(box_idx), fontsize="xx-small")
+                # add a line connecting box center and agent.
+                box_center = center2d[i] # [2,]
+                connection_x = [agent_pos[0], box_center[0]]
+                connection_y = [agent_pos[1], box_center[1]]
+                # print(connection_x)
+                # print(connection_y)
+                # print()
+                plt.plot(connection_x, connection_y,'--', linewidth=0.5, c=COLOR[agent_id], alpha=0.3)
+                box_idx += 1
+        
+        filename = os.path.join(save_dir_path, f"{iter}.png")
+        plt.gca().invert_yaxis()
+        plt.axis('equal')
+        plt.savefig(filename, dpi=400)
+        plt.clf()
+
+def vis_pose_graphs():
+    noise_stds = ['02','04','06']
+    items = ["53", "63", "73", "83"]
+    torch.set_printoptions(precision=3, sci_mode=False)
+    np.set_printoptions(precision=3, suppress=True)
+    for item in items:
+        for noise_std in noise_stds:
+            file_dir = f"/GPFS/rhome/yifanlu/workspace/OpenCOOD/box_align_items/stage1_output_{noise_std}_w_uncertainty/{item}.pt"
+            data = torch.load(file_dir)
+            pred_corner3d_list, pred_box3d_list, uncertainty_list, record_len, lidar_pose, lidar_pose_clean = data
+            lidar_pose[0] = lidar_pose_clean[0]
+            refined_pose_SE2 = box_alignment_relative_sample(pred_corner3d_list, lidar_pose_clean, lidar_pose, uncertainty_list=uncertainty_list, landmark_SE2=True)
+            ## visualize pred_corner3d with refined_pose. We can set different iteration to animate
+            save_dir_path = f"/GPFS/rhome/yifanlu/OpenCOOD/box_align_items/pose_graph_vis/{item}_{noise_std}"
+            poses = [lidar_pose.cpu().numpy(), refined_pose_SE2]
+            vis_pose_graph(poses, pred_corner3d_list, save_dir_path)
+
+
+
+
+def main2():
+    pass
+
+if __name__ == "__main__":
+    # vis_pose_graphs()
+    test_box_align_tmp()
+    # main1()
+    # test_gt_boxes_world()
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/box_align_v2.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/box_align_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..c54e709fe978232ecd8aa6c085d7792d02138b42
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/box_align_v2.py
@@ -0,0 +1,437 @@
+"""
+This module is designed for box alignment
+
+We will use g2o for pose graph optimization.
+"""
+
+
+from opencood.models.sub_modules.pose_graph_optim import PoseGraphOptimization2D
+from opencood.utils.transformation_utils import pose_to_tfm
+from opencood.utils.common_utils import check_torch_to_numpy
+from opencood.utils import box_utils
+from collections import OrderedDict
+import numpy as np
+import torch
+import torch.nn.functional as F
+import g2o
+from icecream import ic
+import copy
+import os
+import matplotlib.pyplot as plt
+
+DEBUG = False
+
+def vis_pose_graph(poses, pred_corner3d, save_dir_path, vis_agent=False):
+    """
+    Args:
+        poses: list of np.ndarray
+            each item is a pose . [pose_before, ..., pose_refined]
+
+        pred_corner3d: list
+            predicted box for each agent.
+
+        vis_agent: bool
+            whether draw the agent's box
+
+    """
+    COLOR = ['red','springgreen','dodgerblue', 'darkviolet', 'orange']
+    from opencood.utils.transformation_utils import get_relative_transformation
+
+    if not os.path.exists(save_dir_path):
+        os.makedirs(save_dir_path)
+
+    for iter, pose in enumerate(poses):
+        box_idx = 0
+        # we first transform other agents' box to ego agent's coordinate
+        relative_t_matrix = get_relative_transformation(pose)
+        N = pose.shape[0]
+        nonempty_indices = [idx for (idx, corners) in enumerate(pred_corner3d) if len(corners)!=0]
+        pred_corners3d_in_ego = [box_utils.project_box3d(pred_corner3d[i], relative_t_matrix[i]) for i in nonempty_indices]
+
+        for agent_id in range(len(pred_corners3d_in_ego)):
+            if agent_id not in nonempty_indices:
+                continue
+            corner3d = pred_corners3d_in_ego[agent_id]
+            agent_pos = relative_t_matrix[agent_id][:2,3] # agent's position in ego's coordinate
+
+            if vis_agent:
+                plt.scatter(agent_pos[0], agent_pos[1], s=4, c=COLOR[agent_id])
+
+            corner2d = corner3d[:,:4,:2]
+            center2d = np.mean(corner2d, axis=1)
+            for i in range(corner2d.shape[0]):
+                plt.scatter(corner2d[i,[0,1],0], corner2d[i,[0,1], 1], s=2, c=COLOR[agent_id])
+                plt.plot(corner2d[i,[0,1,2,3,0],0], corner2d[i,[0,1,2,3,0], 1], linewidth=1, c=COLOR[agent_id])
+                plt.text(corner2d[i,0,0], corner2d[i,0,1], s=str(box_idx), fontsize="xx-small")
+                # add a line connecting box center and agent.
+                box_center = center2d[i] # [2,]
+                connection_x = [agent_pos[0], box_center[0]]
+                connection_y = [agent_pos[1], box_center[1]]
+
+                plt.plot(connection_x, connection_y,'--', linewidth=0.5, c=COLOR[agent_id], alpha=0.3)
+                box_idx += 1
+        
+        filename = os.path.join(save_dir_path, f"{iter}.png")
+        plt.gca().invert_yaxis()
+        plt.axis('equal')
+        plt.savefig(filename, dpi=400)
+        plt.clf()
+
+
+def all_pair_l2(A, B):
+    """ All pair L2 distance for A and B
+    Args:
+        A : np.ndarray
+            shape [N_A, D]
+        B : np.ndarray
+            shape [N_B, D]
+    Returns:
+        C : np.ndarray
+            shape [N_A, N_B]
+    """
+    TwoAB = 2*A@B.T  # [N_A, N_B]
+    C = np.sqrt(
+              np.sum(A * A, 1, keepdims=True).repeat(TwoAB.shape[1], axis=1) \
+            + np.sum(B * B, 1, keepdims=True).T.repeat(TwoAB.shape[0], axis=0) \
+            - TwoAB
+        )
+    return C
+
+
+
+
+def box_alignment_relative_sample_np(
+            pred_corners_list,
+            noisy_lidar_pose, 
+            uncertainty_list=None, 
+            landmark_SE2=True,
+            adaptive_landmark=False,
+            normalize_uncertainty=False,
+            abandon_hard_cases = False,
+            drop_hard_boxes = False,
+            drop_unsure_edge = False,
+            use_uncertainty = True,
+            thres = 1.5,
+            yaw_var_thres = 0.2,
+            max_iterations = 1000):
+    """ Perform box alignment for one sample. 
+    Correcting the relative pose.
+
+    Args:
+        pred_corners_list: in each ego coordinate
+            [[N_1, 8, 3], ..., [N_cav1, 8, 3]]
+
+        clean_lidar_poses:
+            [N_cav1, 6], in degree
+        
+        noisy_lidar_poses:
+            [N_cav1, 6], in degree
+
+        uncertainty_list:
+            [[N_1, 3], [N_2, 3], ..., [N_cav1, 3]]
+
+        landmark_SE2:
+            if True, the landmark is SE(2), otherwise R^2
+        
+        adaptive_landmark: (when landmark_SE2 = True)
+            if True, landmark will turn to R^2 if yaw angles differ a lot
+
+        normalize_uncertainty: bool
+            if True, normalize the uncertainty
+        
+        abandon_hard_cases: bool
+            if True, algorithm will just return original poses for hard cases
+
+        drop_unsure_edge: bool
+
+    Returns: 
+        refined_lidar_poses: np.ndarray
+            [N_cav1, 3], 
+    """
+    if not use_uncertainty:
+        uncertainty_list = None
+    ## first transform point from ego coordinate to world coordinate, using lidar_pose.
+    order = 'lwh'  # hwl
+    N = noisy_lidar_pose.shape[0]
+    lidar_pose_noisy_tfm = pose_to_tfm(noisy_lidar_pose)
+
+    nonempty_indices = [idx for (idx, corners) in enumerate(pred_corners_list) if len(corners)!=0] # if one agent detects no boxes, its corners is just [].
+    
+    pred_corners_world_list = \
+        [box_utils.project_box3d(pred_corners_list[i], lidar_pose_noisy_tfm[i]) for i in nonempty_indices]  # [[N1, 8, 3], [N2, 8, 3],...]
+    pred_box3d_list = \
+        [box_utils.corner_to_center(corner, order) for corner in pred_corners_list if len(corner)!=0]   # [[N1, 7], [N2, 7], ...], angle in radian
+    pred_box3d_world_list = \
+        [box_utils.corner_to_center(corner, order) for corner in pred_corners_world_list]   # [[N1, 7], [N2, 7], ...], angle in radian
+    pred_center_list = \
+        [np.mean(corners, axis=1) for corners in pred_corners_list if len(corners)!=0] # [[N1,3], [N2,3], ...]
+
+    pred_center_world_list = \
+        [pred_box3d_world[:,:3] for pred_box3d_world in pred_box3d_world_list]
+    pred_yaw_world_list = \
+        [pred_box3d[:, 6] for pred_box3d in pred_box3d_world_list]
+    pred_len = \
+        [len(corners) for corners in pred_corners_list] 
+
+
+    box_idx_to_agent = []
+    for i in range(N):
+        box_idx_to_agent += [i] * pred_len[i] 
+    
+    pred_center_cat = np.concatenate(pred_center_list, axis=0)   # [sum(pred_box), 3]
+    pred_center_world_cat =  np.concatenate(pred_center_world_list, axis=0)  # [sum(pred_box), 3]
+    pred_box3d_cat =  np.concatenate(pred_box3d_list, axis=0)  # [sum(pred_box), 7]
+    pred_yaw_world_cat = np.concatenate(pred_yaw_world_list, axis=0)  # [sum(pred_box)]
+
+    # hard-coded currently
+    w_a = 1.6 # width of anchor
+    l_a = 3.9 # length of anchor
+    d_a_square = w_a ** 2 + l_a ** 2 # anchor's diag
+
+
+    if uncertainty_list is not None:
+        pred_log_sigma2_cat = np.concatenate([i for i in uncertainty_list if len(i)!=0], axis=0)
+        # Since the regression target is x_t = (x_g - x_a)/d_a, 
+        # var(x) = d_a^2 * var(x_t)
+        # so we 1/var(x) = 1/var(x_t) / d_a^2  
+        # sigma_{delta_x}^2 -> sigma_x^2. 
+        pred_certainty_cat = np.exp(-pred_log_sigma2_cat)
+        pred_certainty_cat[:,:2] /= d_a_square 
+
+
+        if normalize_uncertainty:
+            pred_certainty_cat = np.sqrt(pred_certainty_cat)
+
+
+    pred_center_allpair_dist = all_pair_l2(pred_center_world_cat, pred_center_world_cat) # [sum(pred_box), sum(pred_box)]
+
+    # let pair from one vehicle be max distance
+    MAX_DIST = 10000
+    cum = 0
+    for i in range(N):
+        pred_center_allpair_dist[cum: cum + pred_len[i], cum: cum +pred_len[i]] = MAX_DIST   # do not include itself
+        cum += pred_len[i]
+
+
+    cluster_id = N # let the vertex id of object start from N
+    cluster_dict = OrderedDict()
+    remain_box = set(range(cum))
+
+    for box_idx in range(cum): 
+
+        if box_idx not in remain_box:  # already assigned
+            continue
+        
+        within_thres_idx_tensor = (pred_center_allpair_dist[box_idx] < thres).nonzero()[0]
+        within_thres_idx_list = within_thres_idx_tensor.tolist()
+
+        if len(within_thres_idx_list) == 0:  # if it's a single box
+            continue
+
+        # start from within_thres_idx_list, find new box added to the cluster
+        explored = [box_idx]
+        unexplored = [idx for idx in within_thres_idx_list if idx in remain_box]
+
+        while unexplored:
+            idx = unexplored[0]
+            within_thres_idx_tensor = (pred_center_allpair_dist[box_idx] < thres).nonzero()[0]
+            within_thres_idx_list = within_thres_idx_tensor.tolist()
+            for newidx in within_thres_idx_list:
+                if (newidx not in explored) and (newidx not in unexplored) and (newidx in remain_box):
+                    unexplored.append(newidx)
+            unexplored.remove(idx)
+            explored.append(idx)
+        
+        if len(explored) == 1: # it's a single box, neighbors have been assigned
+            remain_box.remove(box_idx)
+            continue
+        
+        cluster_box_idxs = explored
+
+        cluster_dict[cluster_id] = OrderedDict()
+        cluster_dict[cluster_id]['box_idx'] = [idx for idx in cluster_box_idxs]
+        cluster_dict[cluster_id]['box_center_world'] = [pred_center_world_cat[idx] for idx in cluster_box_idxs]  # coordinate in world, [3,]
+        cluster_dict[cluster_id]['box_yaw'] = [pred_yaw_world_cat[idx] for idx in cluster_box_idxs]
+
+        yaw_var = np.var(cluster_dict[cluster_id]['box_yaw'])
+        cluster_dict[cluster_id]['box_yaw_varies'] = yaw_var > yaw_var_thres
+        cluster_dict[cluster_id]['active'] = True
+
+
+        ########### adaptive_landmark ##################
+        if landmark_SE2:
+            if adaptive_landmark and yaw_var > yaw_var_thres:
+                landmark = pred_center_world_cat[box_idx][:2]
+                for _box_idx in cluster_box_idxs:
+                    pred_certainty_cat[_box_idx] *= 2
+            else:
+                landmark = copy.deepcopy(pred_center_world_cat[box_idx])
+                landmark[2] = pred_yaw_world_cat[box_idx]
+        else:
+            landmark = pred_center_world_cat[box_idx][:2]
+        ##################################################
+
+
+        cluster_dict[cluster_id]['landmark'] = landmark  # [x, y, yaw] or [x, y]
+        cluster_dict[cluster_id]['landmark_SE2'] = True if landmark.shape[0] == 3 else False
+
+        DEBUG = False
+        if DEBUG:
+            from icecream import ic
+            ic(cluster_dict[cluster_id]['box_idx'])
+            ic(cluster_dict[cluster_id]['box_center_world'])
+            ic(cluster_dict[cluster_id]['box_yaw'])
+            ic(cluster_dict[cluster_id]['landmark'])
+        
+
+        cluster_id += 1
+        for idx in cluster_box_idxs:
+            remain_box.remove(idx)
+
+    
+    vertex_num = cluster_id
+    agent_num = N
+    landmark_num = cluster_id - N
+
+
+    ########### abandon_hard_cases ##########
+    """
+        We should think what is hard cases for agent-object pose graph optimization
+            1. Overlapping boxes are rare (landmark_num <= 3)
+            2. Yaw angles differ a lot
+    """
+
+    if abandon_hard_cases:
+        # case1: object num is smaller than 3
+        if landmark_num <= 3:
+            return noisy_lidar_pose[:,[0,1,4]]
+        
+        # case2: more than half of the landmarks yaw varies 
+        yaw_varies_cnt = sum([cluster_dict[i]["box_yaw_varies"] for i in range(agent_num, vertex_num)])
+        if yaw_varies_cnt >= 0.5 * landmark_num:
+            return noisy_lidar_pose[:,[0,1,4]]
+
+    ########### drop hard boxes ############
+
+    if drop_hard_boxes:
+        for landmark_id in range(agent_num, vertex_num):
+            if cluster_dict[landmark_id]['box_yaw_varies']:
+                cluster_dict[landmark_id]['active'] = False
+
+
+
+
+    """
+        Now we have clusters for objects. we can create pose graph.
+        First we consider center as landmark.
+        Maybe set corner as landmarks in the future.
+    """
+    pgo = PoseGraphOptimization2D()
+
+    # Add agent to vertexs
+    for agent_id in range(agent_num):
+        v_id = agent_id
+        # notice lidar_pose use degree format, translate it to radians.
+        pose_np = noisy_lidar_pose[agent_id, [0,1,4]]
+        pose_np[2] = np.deg2rad(pose_np[2])  # radians
+        v_pose = g2o.SE2(pose_np)
+        
+        if agent_id == 0:
+            pgo.add_vertex(id=v_id, pose=v_pose, fixed=True)
+        else:
+            pgo.add_vertex(id=v_id, pose=v_pose, fixed=False)
+
+    # Add object to vertexs
+    for landmark_id in range(agent_num, vertex_num):
+        v_id = landmark_id
+        landmark = cluster_dict[landmark_id]['landmark'] # (3,) or (2,)
+        landmark_SE2 = cluster_dict[landmark_id]['landmark_SE2']
+
+        if landmark_SE2:
+            v_pose = g2o.SE2(landmark)
+        else:
+            v_pose = landmark
+
+        pgo.add_vertex(id=v_id, pose=v_pose, fixed=False, SE2=landmark_SE2)
+
+    # Add agent-object edge to edge set
+    for landmark_id in range(agent_num, vertex_num):
+        landmark_SE2 = cluster_dict[landmark_id]['landmark_SE2']
+
+        if not cluster_dict[landmark_id]['active']:
+            continue
+
+        for box_idx in cluster_dict[landmark_id]['box_idx']:
+            agent_id = box_idx_to_agent[box_idx]
+            if landmark_SE2:
+                e_pose = g2o.SE2(pred_box3d_cat[box_idx][[0,1,6]].astype(np.float64))
+                info = np.identity(3, dtype=np.float64)
+                if uncertainty_list is not None:
+                    info[[0,1,2],[0,1,2]] = pred_certainty_cat[box_idx]
+
+                    ############ drop_unsure_edge ###########
+                    if drop_unsure_edge and sum(pred_certainty_cat[box_idx]) < 100:
+                        continue
+
+            else:
+                e_pose = pred_box3d_cat[box_idx][[0,1]].astype(np.float64)
+                info = np.identity(2, dtype=np.float64)
+                if uncertainty_list is not None:
+                    info[[0,1],[0,1]] = pred_certainty_cat[box_idx][:2]
+
+                    ############ drop_unsure_edge ############
+                    if drop_unsure_edge and sum(pred_certainty_cat[box_idx]) < 100:
+                        continue
+
+            pgo.add_edge(vertices=[agent_id, landmark_id], measurement=e_pose, information=info, SE2=landmark_SE2)
+    
+    pgo.optimize(max_iterations)
+
+    pose_new_list = []
+    for agent_id in range(agent_num):
+        # print(pgo.get_pose(agent_id).vector())
+        pose_new_list.append(pgo.get_pose(agent_id).vector())
+
+    refined_pose = np.array(pose_new_list)
+    refined_pose[:,2] = np.rad2deg(refined_pose[:,2])  # rad -> degree, same as source
+
+    return refined_pose
+
+def box_alignment_relative_np(pred_corner3d_list, 
+                              uncertainty_list, 
+                              lidar_poses, 
+                              record_len, 
+                              **kwargs):
+    """
+    Args:
+        pred_corner3d_list: list of tensors, with shape [[N1_object, 8, 3], [N2_object, 8, 3], ...,[N_sumcav_object, 8, 3]]
+            box in each agent's coordinate. (proj_first=False)
+        
+        pred_box3d_list: not necessary
+            list of tensors, with shape [[N1_object, 7], [N2_object, 7], ...,[N_sumcav_object, 7]]
+
+        scores_list: list of tensor, [[N1_object,], [N2_object,], ...,[N_sumcav_object,]]
+            box confidence score.
+
+        lidar_poses: torch.Tensor [sum(cav), 6]
+
+        record_len: torch.Tensor
+    Returns:
+        refined_lidar_pose: torch.Tensor [sum(cav), 6]
+    """
+    refined_lidar_pose = []
+    start_idx = 0
+    for b in record_len:
+        refined_lidar_pose.append(
+            box_alignment_relative_sample_np(
+                pred_corner3d_list[start_idx: start_idx + b],
+                lidar_poses[start_idx: start_idx + b],
+                uncertainty_list= None if uncertainty_list is None else uncertainty_list[start_idx: start_idx + b],
+                **kwargs
+            )
+        )
+        start_idx += b
+
+    return np.cat(refined_lidar_pose, axis=0)
+
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/cbam.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/cbam.py
new file mode 100644
index 0000000000000000000000000000000000000000..9fc5d628f81643add567f51109b9e432a262d6b0
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/cbam.py
@@ -0,0 +1,279 @@
+import torch
+import torch.nn as nn
+import math
+import torch.utils.model_zoo as model_zoo
+
+
+__all__ = ['ResNet', 'resnet18_cbam', 'resnet34_cbam', 'resnet50_cbam', 'resnet101_cbam',
+           'resnet152_cbam']
+
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    "3x3 convolution with padding"
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False)
+
+def conv1x1(in_planes, out_planes, stride=1):
+    "1x1 convolution with padding"
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride,
+                     padding=0, bias=False)
+
+
+class ChannelAttention(nn.Module):
+    def __init__(self, in_planes, ratio=16):
+        super(ChannelAttention, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.max_pool = nn.AdaptiveMaxPool2d(1)
+           
+        self.fc = nn.Sequential(nn.Conv2d(in_planes, in_planes // 16, 1, bias=False),
+                               nn.ReLU(),
+                               nn.Conv2d(in_planes // 16, in_planes, 1, bias=False))
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_out = self.fc(self.avg_pool(x))
+        max_out = self.fc(self.max_pool(x))
+        out = avg_out + max_out
+        return self.sigmoid(out)
+
+class SpatialAttention(nn.Module):
+    def __init__(self, kernel_size=7):
+        super(SpatialAttention, self).__init__()
+
+        self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        avg_out = torch.mean(x, dim=1, keepdim=True)
+        max_out, _ = torch.max(x, dim=1, keepdim=True)
+        x = torch.cat([avg_out, max_out], dim=1)
+        x = self.conv1(x)
+        return self.sigmoid(x)
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv1x1(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv1x1(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+
+        self.ca = ChannelAttention(planes)
+        self.sa = SpatialAttention()
+
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        out = self.ca(out) * out
+        out = self.sa(out) * out
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * 4)
+        self.relu = nn.ReLU(inplace=True)
+
+        self.ca = ChannelAttention(planes * 4)
+        self.sa = SpatialAttention()
+
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        out = self.ca(out) * out
+        out = self.sa(out) * out
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Module):
+
+    def __init__(self, block, layers, num_classes=1000):
+        self.inplanes = 64
+        super(ResNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+
+        return x
+
+
+def resnet18_cbam(pretrained=False, **kwargs):
+    """Constructs a ResNet-18 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+    if pretrained:
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet18'])
+        now_state_dict        = model.state_dict()
+        now_state_dict.update(pretrained_state_dict)
+        model.load_state_dict(now_state_dict)
+    return model
+
+
+def resnet34_cbam(pretrained=False, **kwargs):
+    """Constructs a ResNet-34 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet34'])
+        now_state_dict        = model.state_dict()
+        now_state_dict.update(pretrained_state_dict)
+        model.load_state_dict(now_state_dict)
+    return model
+
+
+def resnet50_cbam(pretrained=False, **kwargs):
+    """Constructs a ResNet-50 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet50'])
+        now_state_dict        = model.state_dict()
+        now_state_dict.update(pretrained_state_dict)
+        model.load_state_dict(now_state_dict)
+    return model
+
+
+def resnet101_cbam(pretrained=False, **kwargs):
+    """Constructs a ResNet-101 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
+    if pretrained:
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet101'])
+        now_state_dict        = model.state_dict()
+        now_state_dict.update(pretrained_state_dict)
+        model.load_state_dict(now_state_dict)
+    return model
+
+
+def resnet152_cbam(pretrained=False, **kwargs):
+    """Constructs a ResNet-152 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
+    if pretrained:
+        pretrained_state_dict = model_zoo.load_url(model_urls['resnet152'])
+        now_state_dict        = model.state_dict()
+        now_state_dict.update(pretrained_state_dict)
+        model.load_state_dict(now_state_dict)
+    return model
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/cia_ssd_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/cia_ssd_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce6e1a77e1d971b6a1578fa833f7874194ed2a0b
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/cia_ssd_utils.py
@@ -0,0 +1,101 @@
+import torch
+from torch import nn
+
+
+
+class SSFA(nn.Module):
+    def __init__(self, args):
+        super(SSFA, self).__init__()
+        self._num_input_features = args['feature_num']  # 128
+
+        seq = [nn.ZeroPad2d(1)]
+        seq += get_conv_layers('Conv2d', 128, 128, n_layers=3, kernel_size=[3, 3, 3],
+                                                  stride=[1, 1, 1], padding=[0, 1, 1], sequential=False)
+        self.bottom_up_block_0 = nn.Sequential(*seq)
+        self.bottom_up_block_1 = get_conv_layers('Conv2d', 128, 256, n_layers=3, kernel_size=[3, 3, 3],
+                                                  stride=[2, 1, 1], padding=[1, 1, 1])
+
+        self.trans_0 = get_conv_layers('Conv2d', 128, 128, n_layers=1, kernel_size=[1], stride=[1], padding=[0])
+        self.trans_1 = get_conv_layers('Conv2d', 256, 256, n_layers=1, kernel_size=[1], stride=[1], padding=[0])
+
+        self.deconv_block_0 = get_conv_layers('ConvTranspose2d', 256, 128, n_layers=1, kernel_size=[3], stride=[2],
+                                              padding=[1], output_padding=[1])
+        self.deconv_block_1 = get_conv_layers('ConvTranspose2d', 256, 128, n_layers=1, kernel_size=[3], stride=[2],
+                                              padding=[1], output_padding=[1])
+
+        self.conv_0 = get_conv_layers('Conv2d', 128, 128, n_layers=1, kernel_size=[3], stride=[1], padding=[1])
+        self.conv_1 = get_conv_layers('Conv2d', 128, 128, n_layers=1, kernel_size=[3], stride=[1], padding=[1])
+
+        self.w_0 = get_conv_layers('Conv2d', 128, 1, n_layers=1, kernel_size=[1], stride=[1], padding=[0], relu_last=False)
+        self.w_1 = get_conv_layers('Conv2d', 128, 1, n_layers=1, kernel_size=[1], stride=[1], padding=[0], relu_last=False)
+
+    # default init_weights for conv(msra) and norm in ConvModule
+    def init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.xavier_normal_(m.weight, gain=1)
+                if hasattr(m, "bias") and m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        x_0 = self.bottom_up_block_0(x)
+        x_1 = self.bottom_up_block_1(x_0)
+        x_trans_0 = self.trans_0(x_0)
+        x_trans_1 = self.trans_1(x_1)
+        x_middle_0 = self.deconv_block_0(x_trans_1) + x_trans_0
+        x_middle_1 = self.deconv_block_1(x_trans_1)
+        x_output_0 = self.conv_0(x_middle_0)
+        x_output_1 = self.conv_1(x_middle_1)
+
+        x_weight_0 = self.w_0(x_output_0)
+        x_weight_1 = self.w_1(x_output_1)
+        x_weight = torch.softmax(torch.cat([x_weight_0, x_weight_1], dim=1), dim=1)
+        x_output = x_output_0 * x_weight[:, 0:1, :, :] + x_output_1 * x_weight[:, 1:, :, :]
+
+        return x_output.contiguous()
+
+
+def get_conv_layers(conv_name, in_channels, out_channels, n_layers, kernel_size, stride,
+                    padding, relu_last=True, sequential=True, **kwargs):
+    """
+    Build convolutional layers. kernel_size, stride and padding should be a list with the lengths that match n_layers
+    """
+    seq = []
+    for i in range(n_layers):
+        seq.extend([getattr(nn, conv_name)(in_channels, out_channels, kernel_size[i], stride=stride[i],
+                                           padding=padding[i], bias=False, **{k: v[i] for k, v in kwargs.items()}),
+                    nn.BatchNorm2d(out_channels, eps=1e-3, momentum=0.01)])
+        if i < n_layers - 1 or relu_last:
+            seq.append(nn.ReLU())
+        in_channels = out_channels
+    if sequential:
+        return nn.Sequential(*seq)
+    else:
+        return seq
+
+
+class Head(nn.Module):
+    def __init__(self, num_input, num_pred, num_cls, num_iou=2, use_dir=False, num_dir=1):
+        super(Head, self).__init__()
+        self.use_dir = use_dir
+
+        self.conv_box = nn.Conv2d(num_input, num_pred, 1)  # 128 -> 14
+        self.conv_cls = nn.Conv2d(num_input, num_cls, 1)   # 128 -> 2
+        self.conv_iou = nn.Conv2d(num_input, num_iou, 1, bias=False)
+
+        if self.use_dir:
+            self.conv_dir = nn.Conv2d(num_input, num_dir, 1)  # 128 -> 4
+
+    def forward(self, x):
+        box_preds = self.conv_box(x)
+        cls_preds = self.conv_cls(x)
+        ret_dict = {"reg_preds": box_preds, "cls_preds": cls_preds}
+        if self.use_dir:
+            dir_preds = self.conv_dir(x)  # dir_preds.shape=[8, w, h, 4]
+            ret_dict["dir_preds"] = dir_preds
+        else:
+            ret_dict["dir_preds"] = torch.zeros((len(box_preds), 1, 2))
+
+        ret_dict["iou_preds"] = self.conv_iou(x)
+
+        return ret_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/convgru.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/convgru.py
new file mode 100644
index 0000000000000000000000000000000000000000..a489157ae3aef50ad9be825a8c54d79c0dd20ca5
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/convgru.py
@@ -0,0 +1,197 @@
+import os
+import torch
+from torch import nn
+from torch.autograd import Variable
+
+
+class ConvGRUCell(nn.Module):
+    def __init__(self, input_size, input_dim, hidden_dim, kernel_size, bias):
+        """
+        Initialize the ConvLSTM cell
+        :param input_size: (int, int)
+            Height and width of input tensor as (height, width).
+        :param input_dim: int
+            Number of channels of input tensor.
+        :param hidden_dim: int
+            Number of channels of hidden state.
+        :param kernel_size: (int, int)
+            Size of the convolutional kernel.
+        :param bias: bool
+            Whether or not to add the bias.
+        :param dtype: torch.cuda.FloatTensor or torch.FloatTensor
+            Whether or not to use cuda.
+        """
+        super(ConvGRUCell, self).__init__()
+        self.height, self.width = input_size
+        self.padding = kernel_size[0] // 2, kernel_size[1] // 2
+        self.hidden_dim = hidden_dim
+        self.bias = bias
+
+        self.conv_gates = nn.Conv2d(in_channels=input_dim + hidden_dim,
+                                    out_channels=2 * self.hidden_dim,
+                                    # for update_gate,reset_gate respectively
+                                    kernel_size=kernel_size,
+                                    padding=self.padding,
+                                    bias=self.bias)
+
+        self.conv_can = nn.Conv2d(in_channels=input_dim + hidden_dim,
+                                  out_channels=self.hidden_dim,
+                                  # for candidate neural memory
+                                  kernel_size=kernel_size,
+                                  padding=self.padding,
+                                  bias=self.bias)
+
+    def init_hidden(self, batch_size):
+        return (Variable(
+            torch.zeros(batch_size, self.hidden_dim, self.height, self.width)))
+
+    def forward(self, input_tensor, h_cur):
+        """
+        :param self:
+        :param input_tensor: (b, c, h, w)
+            input is actually the target_model
+        :param h_cur: (b, c_hidden, h, w)
+            current hidden and cell states respectively
+        :return: h_next,
+            next hidden state
+        """
+        combined = torch.cat([input_tensor, h_cur], dim=1)
+        combined_conv = self.conv_gates(combined)
+
+        gamma, beta = torch.split(combined_conv, self.hidden_dim, dim=1)
+        reset_gate = torch.sigmoid(gamma)
+        update_gate = torch.sigmoid(beta)
+
+        combined = torch.cat([input_tensor, reset_gate * h_cur], dim=1)
+        cc_cnm = self.conv_can(combined)
+        cnm = torch.tanh(cc_cnm)
+
+        h_next = (1 - update_gate) * h_cur + update_gate * cnm
+        return h_next
+
+
+class ConvGRU(nn.Module):
+    def __init__(self, input_size, input_dim, hidden_dim, kernel_size,
+                 num_layers,
+                 batch_first=False, bias=True, return_all_layers=False):
+        """
+        :param input_size: (int, int)
+            Height and width of input tensor as (height, width).
+        :param input_dim: int e.g. 256
+            Number of channels of input tensor.
+        :param hidden_dim: int e.g. 1024
+            Number of channels of hidden state.
+        :param kernel_size: (int, int)
+            Size of the convolutional kernel.
+        :param num_layers: int
+            Number of ConvLSTM layers
+        :param dtype: torch.cuda.FloatTensor or torch.FloatTensor
+            Whether or not to use cuda.
+        :param alexnet_path: str
+            pretrained alexnet parameters
+        :param batch_first: bool
+            if the first position of array is batch or not
+        :param bias: bool
+            Whether or not to add the bias.
+        :param return_all_layers: bool
+            if return hidden and cell states for all layers
+        """
+        super(ConvGRU, self).__init__()
+
+        # Make sure that both `kernel_size` and
+        # `hidden_dim` are lists having len == num_layers
+        kernel_size = self._extend_for_multilayer(kernel_size, num_layers)
+        hidden_dim = self._extend_for_multilayer(hidden_dim, num_layers)
+        if not len(kernel_size) == len(hidden_dim) == num_layers:
+            raise ValueError('Inconsistent list length.')
+
+        self.height, self.width = input_size
+        self.input_dim = input_dim
+        self.hidden_dim = hidden_dim
+        self.kernel_size = kernel_size
+        self.num_layers = num_layers
+        self.batch_first = batch_first
+        self.bias = bias
+        self.return_all_layers = return_all_layers
+
+        cell_list = []
+        for i in range(0, self.num_layers):
+            cur_input_dim = input_dim if i == 0 else hidden_dim[i - 1]
+            cell_list.append(ConvGRUCell(input_size=(self.height, self.width),
+                                         input_dim=cur_input_dim,
+                                         hidden_dim=self.hidden_dim[i],
+                                         kernel_size=self.kernel_size[i],
+                                         bias=self.bias))
+
+        # convert python list to pytorch module
+        self.cell_list = nn.ModuleList(cell_list)
+
+    def forward(self, input_tensor, hidden_state=None):
+        """
+        :param input_tensor: (b, t, c, h, w) or (t,b,c,h,w)
+            depends on if batch first or not extracted features from alexnet
+        :param hidden_state:
+        :return: layer_output_list, last_state_list
+        """
+        if not self.batch_first:
+            # (t, b, c, h, w) -> (b, t, c, h, w)
+            input_tensor = input_tensor.permute(1, 0, 2, 3, 4)
+
+        # Implement stateful ConvLSTM
+        if hidden_state is not None:
+            raise NotImplementedError()
+        else:
+            hidden_state = self._init_hidden(batch_size=input_tensor.size(0),
+                                             device=input_tensor.device,
+                                             dtype=input_tensor.dtype)
+
+        layer_output_list = []
+        last_state_list = []
+
+        seq_len = input_tensor.size(1)
+        cur_layer_input = input_tensor
+
+        for layer_idx in range(self.num_layers):
+            h = hidden_state[layer_idx]
+            output_inner = []
+            for t in range(seq_len):
+                # input current hidden and cell state
+                # then compute the next hidden
+                # and cell state through ConvLSTMCell forward function
+                h = self.cell_list[layer_idx](
+                    input_tensor=cur_layer_input[:, t, :, :, :],  # (b,t,c,h,w)
+                    h_cur=h)
+                output_inner.append(h)
+
+            layer_output = torch.stack(output_inner, dim=1)
+            cur_layer_input = layer_output
+
+            layer_output_list.append(layer_output)
+            last_state_list.append([h])
+
+        if not self.return_all_layers:
+            layer_output_list = layer_output_list[-1:]
+            last_state_list = last_state_list[-1:]
+
+        return layer_output_list, last_state_list
+
+    def _init_hidden(self, batch_size, device=None, dtype=None):
+        init_states = []
+        for i in range(self.num_layers):
+            init_states.append(
+                self.cell_list[i].init_hidden(batch_size).to(device).to(dtype))
+        return init_states
+
+    @staticmethod
+    def _check_kernel_size_consistency(kernel_size):
+        if not (isinstance(kernel_size, tuple) or
+                (isinstance(kernel_size, list) and all(
+                    [isinstance(elem, tuple) for elem in kernel_size]))):
+            raise ValueError('`kernel_size` must be tuple or list of tuples')
+
+    @staticmethod
+    def _extend_for_multilayer(param, num_layers):
+        if not isinstance(param, list):
+            param = [param] * num_layers
+        return param
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/dcn_net.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/dcn_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..07d095be11fc3182c4c43481934c90661627e323
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/dcn_net.py
@@ -0,0 +1,21 @@
+from mmcv.ops import DeformConv2dPack as DCN
+import torch
+import torch.nn as nn
+
+class DCNNet(nn.Module):
+    def __init__(self, args):
+        super(DCNNet,self).__init__()
+
+        module_list =[]
+        in_channels = args['in_channels']
+        out_channels = args['out_channels']
+        stride = args['stride']
+        kernel_size = args['kernel_size']
+        padding = args['padding']
+
+        for i in range(args['n_blocks']):
+            module_list.append(DCN(in_channels[i],out_channels[i],kernel_size[i],stride=stride[i],padding=padding[i]))
+        self.model = nn.Sequential(*module_list)
+
+    def forward(self, x):
+        return self.model(x)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/deformable_transformer_backbone.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/deformable_transformer_backbone.py
new file mode 100644
index 0000000000000000000000000000000000000000..861fe508297e29ad5bd6d7d494af9ced91a665bc
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/deformable_transformer_backbone.py
@@ -0,0 +1,217 @@
+import numpy as np
+import torch
+import math
+import torch.nn as nn
+from opencood.models.sub_modules.resblock import ResNetModified, BasicBlock, Bottleneck
+from opencood.models.sub_modules.detr_module import PositionEmbeddingSine, \
+                DeformableTransformerEncoderLayer, DeformableTransformerEncoder
+from opencood.models.fuse_modules.self_attn import AttFusion
+from opencood.models.fuse_modules.deform_fuse import DeformFusion
+from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple
+
+
+DEBUG = True
+
+
+"""
+    Different from MaxFusion in max_fuse.py
+    This is a simplified version.
+    pairwise_t_matrix is already scaled.
+"""
+def regroup(x, record_len):
+    cum_sum_len = torch.cumsum(record_len, dim=0)
+    split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+    return split_x
+
+class MaxFusion(nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, x, record_len, pairwise_t_matrix):
+        """
+        pairwise_t_matrix is already normalized [B, L, L, 2, 3]
+        """
+        split_x = regroup(x, record_len)
+        batch_size = len(record_len)
+        C, H, W = split_x[0].shape[1:]  # C, W, H before
+        out = []
+        for b, xx in enumerate(split_x):
+            N = xx.shape[0]
+            t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
+            i = 0
+            xx = warp_affine_simple(xx, t_matrix[i, :, :, :], (H, W))
+
+            h = torch.max(xx, dim=0)[0]  # C, W, H before
+            out.append(h)
+        return torch.stack(out, dim=0)
+
+
+
+
+class DeformableTransformerBackbone(nn.Module):
+    def __init__(self, model_cfg):
+        super().__init__()
+        self.model_cfg = model_cfg
+        self.compress = False
+
+        
+        self.proj_first = True
+        if ('proj_first' in model_cfg) and (model_cfg['proj_first'] is False):
+            self.proj_first = False
+            self.discrete_ratio = model_cfg['voxel_size'][0]
+            self.downsample_rate = 1
+
+        self.level_num = len(model_cfg['layer_nums']) # exactly 3 now
+
+        layer_nums = model_cfg['layer_nums']
+        num_filters = model_cfg['num_filters']
+        layer_strides = model_cfg['layer_strides']
+        hidden_dim = model_cfg['hidden_dim']
+        upsample_strides = model_cfg['upsample_strides']
+        num_upsample_filters = model_cfg['num_upsample_filter']
+
+        self.resnet = ResNetModified(BasicBlock, 
+                                        layer_nums,
+                                        layer_strides,
+                                        num_filters)
+
+        self.position_embedding = PositionEmbeddingSine(hidden_dim//2)
+
+        self.hidden_dim = hidden_dim
+
+        if model_cfg['fusion'] == 'max':
+            self.fuse_net = [MaxFusion() for _ in range(self.level_num)]
+        elif model_cfg['fusion'] == 'self_att':
+            self.fuse_net = [AttFusion(n_filter) for n_filter in num_filters]
+        elif model_cfg['fusion'] == 'deform':
+            self.fuse_net = DeformFusion(num_filters[0], model_cfg['deform_method'])
+        elif model_cfg['fusion'] == 'deform_w_cycle':
+            assert self.proj_first is False
+            assert model_cfg['deform_method'] == 'rigid'
+            self.fuse_net = DeformFusion(num_filters[0], model_cfg['deform_method'], cycle_consist_loss=True)
+        else:
+            raise
+
+        input_proj_list = []
+        for i in range(self.level_num):
+            proj_in_channels = num_filters[i]
+            input_proj_list.append(nn.Sequential(
+                nn.Conv2d(proj_in_channels, self.hidden_dim, kernel_size=1),
+                nn.GroupNorm(32, self.hidden_dim),
+            ))
+
+        self.input_proj = nn.ModuleList(input_proj_list)
+        self.level_embed = nn.Parameter(torch.Tensor(self.level_num, self.hidden_dim))
+        self.upsample_strides = model_cfg['upsample_strides']
+
+        encoder_layer = DeformableTransformerEncoderLayer(self.hidden_dim, model_cfg['dim_feedforward'],
+                                                          model_cfg['dropout'], model_cfg['activation'],
+                                                          self.level_num, model_cfg['n_head'], model_cfg['enc_n_points'])
+        self.encoder = DeformableTransformerEncoder(encoder_layer, model_cfg['num_encoder_layers'])
+
+        self.deblocks = nn.ModuleList()
+        for idx in range(self.level_num):
+            self.deblocks.append(nn.Sequential(
+                nn.ConvTranspose2d(
+                    self.hidden_dim, num_upsample_filters[idx],
+                    upsample_strides[idx],
+                    stride=upsample_strides[idx], bias=False
+                ),
+                nn.BatchNorm2d(num_upsample_filters[idx],
+                                eps=1e-3, momentum=0.01),
+                nn.ReLU()
+                ))
+
+    def forward(self, data_dict):
+        spatial_features = data_dict['spatial_features']
+        if DEBUG:
+            origin_features = torch.clone(spatial_features)
+
+        record_len = data_dict['record_len']
+        pairwise_t_matrix = data_dict['pairwise_t_matrix']
+        lidar_pose = data_dict['lidar_pose'] # (sum(cav),6 )
+
+        ups = []
+        ret_dict = {}
+        x = spatial_features
+
+        B = len(record_len)
+        H, W = x.shape[2:]  ## this is original feature map [200, 704], not downsampled
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+
+        if not self.proj_first:
+            pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+            pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+            pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+            pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+
+        features = self.resnet(x)  # feature[i] is (sum(cav), C, H, W), different i, different C, H, W
+
+        if self.model_cfg['fusion'].startswith('deform'):
+            x_fuseds = self.fuse_net(features, record_len, pairwise_t_matrix, lidar_pose)
+        else:
+            x_fuseds = [self.fuse_net[i](features[i], record_len, pairwise_t_matrix) for i in range(len(features))]
+
+        pos_embeds = [self.position_embedding(x_fused) for x_fused in x_fuseds]
+        srcs = [self.input_proj[i](x_fuseds[i]) for i in range(len(x_fuseds))]
+
+
+        # srcs = []
+        # pos_embeds = []
+        # for i, feat in enumerate(features):
+        #     x_fused = self.fuse_net[i](feat, record_len, pairwise_t_matrix)
+        #     x_pos = self.position_embedding(x_fused)
+        #     x_fused = self.input_proj[i](x_fused)
+        #     srcs.append(x_fused)  # (B, hidden_dim, H1, W1)
+        #     pos_embeds.append(x_pos)
+
+
+        src_flatten = []
+        mask_flatten = []
+        lvl_pos_embed_flatten = []
+        spatial_shapes = []
+        for lvl, (src, pos_embed) in enumerate(zip(srcs, pos_embeds)):
+            bs, c, h, w = src.shape
+            spatial_shape = (h, w)
+            spatial_shapes.append(spatial_shape)
+            src = src.flatten(2).transpose(1, 2)
+            pos_embed = pos_embed.flatten(2).transpose(1, 2)
+            
+            lvl_pos_embed = pos_embed + self.level_embed[lvl].view(1, 1, -1)
+            lvl_pos_embed_flatten.append(lvl_pos_embed)
+            src_flatten.append(src)
+        src_flatten = torch.cat(src_flatten, 1)
+        mask_flatten = torch.zeros(src_flatten.shape[:2], device=src_flatten.device, dtype=torch.bool)
+        lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1)
+        spatial_shapes = torch.as_tensor(spatial_shapes, dtype=torch.long, device=src_flatten.device)
+        level_start_index = torch.cat((spatial_shapes.new_zeros((1, )), spatial_shapes.prod(1).cumsum(0)[:-1]))
+        valid_ratios = torch.stack([self.get_valid_ratio(m) for m in srcs], 1)
+
+
+        memory = self.encoder(src_flatten, spatial_shapes, level_start_index, valid_ratios, lvl_pos_embed_flatten, mask_flatten)
+        flatten_length = [h*w for (h,w) in spatial_shapes]
+        output_split = torch.split(memory, flatten_length, dim=1)
+        output_features = [output.reshape(bs,spatial_shapes[i][0], spatial_shapes[i][1],self.hidden_dim).permute(0,3,1,2) for i, output in enumerate(output_split)]
+        
+        ups = []
+        for i, feat in enumerate(output_features):
+            feat = self.deblocks[i](feat)
+            ups.append(feat)
+
+        ups = torch.cat(ups, dim=1)
+
+        x = ups
+
+        data_dict['spatial_features_2d'] = x
+        return data_dict
+
+
+    def get_valid_ratio(self, x):
+        N, _, H, W = x.shape
+        mask = torch.zeros((N,H,W),dtype=torch.bool,device=x.device)
+        valid_H = torch.sum(~mask[:, :, 0], 1)
+        valid_W = torch.sum(~mask[:, 0, :], 1)
+        valid_ratio_h = valid_H.float() / H
+        valid_ratio_w = valid_W.float() / W
+        valid_ratio = torch.stack([valid_ratio_w, valid_ratio_h], -1)
+        return valid_ratio
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/dense_head.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/dense_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..c773f8037162a393e7e1191013154c88c431b7d2
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/dense_head.py
@@ -0,0 +1,26 @@
+# author: Yifan Lu
+# dense head for stage1, predict cls, reg, dir
+import torch.nn as nn
+import torch
+
+class Head(nn.Module):
+    def __init__(self, args):
+        super(Head, self).__init__()
+        
+        self.conv_box = nn.Conv2d(args['num_input'], args['num_pred'], 1)  # 128 -> 14
+        self.conv_cls = nn.Conv2d(args['num_input'], args['num_cls'], 1)   # 128 -> 2
+        self.conv_dir = nn.Conv2d(args['num_input'], args['num_dir'], 1)  # 128 -> 4
+        self.conv_iou = nn.Conv2d(args['num_input'], args['num_dir'], 1, bias=False)
+
+    def forward(self, x):
+        box_preds = self.conv_box(x)
+        cls_preds = self.conv_cls(x)
+        dir_preds = self.conv_dir(x)  # dir_preds.shape=[8, w, h, 4]
+        iou_preds = self.conv_iou(x)
+
+        ret_dict = {"reg_preds": box_preds, \
+                    "cls_preds": cls_preds, \
+                    "dir_preds": dir_preds, \
+                    "iou_preds": iou_preds}
+
+        return ret_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/detr_module.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/detr_module.py
new file mode 100644
index 0000000000000000000000000000000000000000..642439391b759bc5168979f50a25d172fd0a1954
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/detr_module.py
@@ -0,0 +1,137 @@
+import torch
+import torch.nn as nn
+import math
+import copy
+from opencood.models.sub_modules.ms_deform_attn import MSDeformAttn
+import torch.nn.functional as F
+
+class PositionEmbeddingSine(nn.Module):
+    """
+    This is a more standard version of the position embedding, very similar to the one
+    used by the Attention is all you need paper, generalized to work on images.
+    """
+    def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
+        super().__init__()
+        self.num_pos_feats = num_pos_feats
+        self.temperature = temperature
+        self.normalize = normalize
+        if scale is not None and normalize is False:
+            raise ValueError("normalize should be True if scale is passed")
+        if scale is None:
+            scale = 2 * math.pi
+        self.scale = scale
+
+    def forward(self, x):
+        """
+        Args:
+            x: torch.Tensor
+                [N, C, H, W]
+        """
+        mask = torch.zeros((x.shape[0], x.shape[-2],x.shape[-1]), dtype=torch.bool, device=x.device)
+        assert mask is not None
+        not_mask = ~mask
+        y_embed = not_mask.cumsum(1, dtype=torch.float32)
+        x_embed = not_mask.cumsum(2, dtype=torch.float32)
+        if self.normalize:
+            eps = 1e-6
+            y_embed = (y_embed - 0.5) / (y_embed[:, -1:, :] + eps) * self.scale
+            x_embed = (x_embed - 0.5) / (x_embed[:, :, -1:] + eps) * self.scale
+
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+
+        pos_x = x_embed[:, :, :, None] / dim_t
+        pos_y = y_embed[:, :, :, None] / dim_t
+        pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3)
+        pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3)
+        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
+        return pos
+
+
+class DeformableTransformerEncoderLayer(nn.Module):
+    def __init__(self,
+                 d_model=256, d_ffn=1024,
+                 dropout=0.1, activation="relu",
+                 n_levels=4, n_heads=8, n_points=4):
+        super().__init__()
+
+        # self attention
+        self.self_attn = MSDeformAttn(d_model, n_levels, n_heads, n_points)
+        self.dropout1 = nn.Dropout(dropout)
+        self.norm1 = nn.LayerNorm(d_model)
+
+        # ffn
+        self.linear1 = nn.Linear(d_model, d_ffn)
+        self.activation = _get_activation_fn(activation)
+        self.dropout2 = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(d_ffn, d_model)
+        self.dropout3 = nn.Dropout(dropout)
+        self.norm2 = nn.LayerNorm(d_model)
+
+    @staticmethod
+    def with_pos_embed(tensor, pos):
+        return tensor if pos is None else tensor + pos
+
+    def forward_ffn(self, src):
+        src2 = self.linear2(self.dropout2(self.activation(self.linear1(src))))
+        src = src + self.dropout3(src2)
+        src = self.norm2(src)
+        return src
+
+    def forward(self, src, pos, reference_points, spatial_shapes, level_start_index, padding_mask=None):
+        # self attention
+        src2 = self.self_attn(self.with_pos_embed(src, pos), reference_points, src, spatial_shapes, level_start_index, padding_mask)
+        src = src + self.dropout1(src2)
+        src = self.norm1(src)
+
+        # ffn
+        src = self.forward_ffn(src)
+
+        return src
+
+
+class DeformableTransformerEncoder(nn.Module):
+    def __init__(self, encoder_layer, num_layers):
+        super().__init__()
+        self.layers = _get_clones(encoder_layer, num_layers)
+        self.num_layers = num_layers
+
+    @staticmethod
+    def get_reference_points(spatial_shapes, valid_ratios, device):
+        reference_points_list = []
+        for lvl, (H_, W_) in enumerate(spatial_shapes):
+
+            ref_y, ref_x = torch.meshgrid(torch.linspace(0.5, H_ - 0.5, H_, dtype=torch.float32, device=device),
+                                          torch.linspace(0.5, W_ - 0.5, W_, dtype=torch.float32, device=device))
+            ref_y = ref_y.reshape(-1)[None] / (valid_ratios[:, None, lvl, 1] * H_)
+            ref_x = ref_x.reshape(-1)[None] / (valid_ratios[:, None, lvl, 0] * W_)
+            ref = torch.stack((ref_x, ref_y), -1)
+
+            reference_points_list.append(ref)
+        reference_points = torch.cat(reference_points_list, 1)
+        reference_points = reference_points[:, :, None] * valid_ratios[:, None]
+        return reference_points
+
+    def forward(self, src, spatial_shapes, level_start_index, valid_ratios, pos=None, padding_mask=None):
+        output = src
+        reference_points = self.get_reference_points(spatial_shapes, valid_ratios, device=src.device)
+        for _, layer in enumerate(self.layers):
+            output = layer(output, pos, reference_points, spatial_shapes, level_start_index, padding_mask)
+
+        return output
+
+
+
+
+def _get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/discriminator.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/discriminator.py
new file mode 100644
index 0000000000000000000000000000000000000000..87bb7d70fb538c26850ddfca7979827c4ff7dd4d
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/discriminator.py
@@ -0,0 +1,44 @@
+import torch
+import torch.nn as nn
+from opencood.models.da_modules.gsl import GradientScalarLayer
+
+def weights_init(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv') != -1:
+        nn.init.normal_(m.weight.data, 0.0, 0.02)
+    elif classname.find('BatchNorm') != -1:
+        nn.init.normal_(m.weight.data, 1.0, 0.02)
+        nn.init.constant_(m.bias.data, 0)
+
+class Discriminator(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.indim = args['indim']
+        self.roi_size = args['roi_align_size']
+        self.netD = nn.Sequential(
+            nn.Conv2d(self.indim, self.indim//2, kernel_size=1, stride=1, padding=0),
+            nn.BatchNorm2d(self.indim//2),
+            nn.LeakyReLU(0.2, inplace=True),
+            nn.Conv2d(self.indim//2, self.indim//4, kernel_size=1, stride=1, padding=0),
+            nn.BatchNorm2d(self.indim//4),
+            nn.LeakyReLU(0.2, inplace=True),
+            nn.AvgPool2d(kernel_size=self.roi_size, stride=1, padding=0), # [N, self.indim//4, 1, 1],
+            nn.Flatten(start_dim=1),
+            nn.Linear(self.indim//4, self.indim//8),
+            nn.LeakyReLU(0.2, inplace=True),
+            nn.Linear(self.indim//8, 1),
+            nn.Sigmoid()
+        )
+        self.grl = GradientScalarLayer(- args.get('scale', 1))
+
+        self.netD.apply(weights_init)
+    
+    def forward(self, x):
+        """
+        Input:
+            x: [N, indim, RoIsize, RoIsize]
+        Output:
+            cls: [N, 1]
+        """
+        x = self.grl(x)
+        return self.netD(x)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/downsample_conv.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/downsample_conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca9550a7f2f75010b80a7a8dd6639dcb469b3b1a
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/downsample_conv.py
@@ -0,0 +1,50 @@
+"""
+Class used to downsample features by 3*3 conv
+"""
+import torch.nn as nn
+
+
+class DoubleConv(nn.Module):
+    """
+    Double convoltuion
+    Args:
+        in_channels: input channel num
+        out_channels: output channel num
+    """
+
+    def __init__(self, in_channels, out_channels, kernel_size,
+                 stride, padding):
+        super().__init__()
+        self.double_conv = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size,
+                      stride=stride, padding=padding),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+        return self.double_conv(x)
+
+
+class DownsampleConv(nn.Module):
+    def __init__(self, config):
+        super(DownsampleConv, self).__init__()
+        self.layers = nn.ModuleList([])
+        input_dim = config['input_dim']
+
+        for (ksize, dim, stride, padding) in zip(config['kernal_size'],
+                                                 config['dim'],
+                                                 config['stride'],
+                                                 config['padding']):
+            self.layers.append(DoubleConv(input_dim,
+                                          dim,
+                                          kernel_size=ksize,
+                                          stride=stride,
+                                          padding=padding))
+            input_dim = dim
+
+    def forward(self, x):
+        for i in range(len(self.layers)):
+            x = self.layers[i](x)
+        return x
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/feature_alignnet.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/feature_alignnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..679f5ca1bb4da74a0ebbd6791f42ba813101bf4c
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/feature_alignnet.py
@@ -0,0 +1,141 @@
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.ops import DeformConv2dPack as dconv2d
+from timm.models.layers import DropPath
+from opencood.models.sub_modules.cbam import BasicBlock
+from opencood.models.sub_modules.feature_alignnet_modules import SCAligner, Res1x1Aligner, \
+    Res3x3Aligner, Res3x3Aligner, CBAM, ConvNeXt, FANet, SDTAAgliner
+import numpy as np
+
+
+
+class AlignNet(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        model_name = args['core_method']
+        
+        if model_name == "scaligner":
+            self.channel_align = SCAligner(args['args'])
+        elif model_name == "resnet1x1":
+            self.channel_align = Res1x1Aligner(args['args'])
+        elif model_name == "resnet3x3":
+            self.channel_align = Res3x3Aligner(args['args'])
+        elif model_name == "sdta":
+            self.channel_align = SDTAAgliner(args['args'])
+        elif model_name == "cbam":
+            self.channel_align = CBAM(args['args'])
+        elif model_name == "convnext":
+            self.channel_align = ConvNeXt(args['args'])
+        elif model_name == "fanet":
+            self.channel_align = FANet(args['args'])
+        elif model_name == 'identity':
+            self.channel_align = nn.Identity()
+
+        self.spatial_align_flag = args.get("spatial_align", False)
+        if self.spatial_align_flag:
+            warpnet_indim = args['args']['warpnet_indim']
+            dim = args['args']['dim']
+            self.teacher = args['args']['teacher']
+            setattr(self, "warpnet", 
+                nn.Sequential(
+                nn.Conv2d(warpnet_indim, warpnet_indim, kernel_size=3, stride=1, padding=1),
+                nn.BatchNorm2d(warpnet_indim),
+                nn.ReLU(),
+                nn.Conv2d(warpnet_indim, dim, kernel_size=3, stride=1, padding=1),
+                nn.BatchNorm2d(dim),
+                nn.ReLU(),
+                nn.Conv2d(dim, 2, kernel_size=3, stride=1, padding=1),
+                )
+            )
+            self.theta_identity = torch.tensor([[[1.,0.,0.],[0.,1.,0.]]])
+
+        self.count = 0 # debug
+
+    def forward(self, x):
+        return self.channel_align(x)
+
+
+    def spatail_align(self, student_feature, teacher_feature, physical_dist):
+        physical_offset = self.warpnet(torch.cat([student_feature, teacher_feature], dim=1)).permute(0,2,3,1) # N, H, W, 2, unit is meter.
+        mask = torch.any(teacher_feature != 0, dim=1)
+        physical_offset *= mask.unsqueeze(-1)
+        relative_offset = physical_offset * torch.tensor([2./physical_dist[0], 2./physical_dist[1]], device=physical_offset.device)  # N, H, W, 2
+        warp_field = relative_offset + \
+            torch.nn.functional.affine_grid(self.theta_identity.expand(student_feature.shape[0], 2, 3), student_feature.shape).to(relative_offset.device)
+        spataial_aligned_feature = torch.nn.functional.grid_sample(student_feature, warp_field)
+
+        # self.visualize_offset(physical_offset, warp_field, student_feature, spataial_aligned_feature, teacher_feature)
+        return spataial_aligned_feature
+
+    def visualize_offset(self, physical_offset, warp_field, feature_before, feature_after, teacher_feature):
+        """
+        physical_offset: shape [N, H, W, 2]
+        warp_field: shape [N, H, W, 2]
+        feaure_before: [N, C, H, W]
+        feature_after: [N, C, H, W]
+        """
+        import seaborn as sns
+        import matplotlib.pyplot as plt
+        import os
+        N = physical_offset.shape[0]
+        print(physical_offset.shape)
+        
+        save_path = "opencood/logs/vislog"
+        file_idx = self.count
+        self.count += 1
+
+        physical_offsets_save_path = os.path.join(save_path, "physical_offsets")
+        vmax = physical_offset.max()
+        print(f"physical offset max: {vmax}")
+        if not os.path.exists(physical_offsets_save_path):
+            os.mkdir(physical_offsets_save_path)
+        physical_offset = physical_offset.detach().cpu().numpy()
+        warp_field = warp_field.detach().cpu().numpy()
+        for i in range(N):
+            sns.heatmap(physical_offset[i,:,:,0], cmap="vlag", vmin=-vmax*0.8, vmax=vmax*0.8, square=True)
+            plt.axis('off')
+            plt.savefig(os.path.join(physical_offsets_save_path, "{}_{}_physical_x.png".format(file_idx, i)), dpi=500)
+            plt.close()
+
+            sns.heatmap(physical_offset[i,:,:,1], cmap="vlag", vmin=-vmax*0.8, vmax=vmax*0.8, square=True)
+            plt.axis('off')
+            plt.savefig(os.path.join(physical_offsets_save_path, "{}_{}_physical_y.png".format(file_idx, i)), dpi=500)
+            plt.close()
+
+            sns.heatmap(warp_field[i,:,:,0], cmap="vlag", square=True)
+            plt.axis('off')
+            plt.savefig(os.path.join(physical_offsets_save_path, "{}_{}_warpfield_x.png".format(file_idx, i)), dpi=500)
+            plt.close()
+
+            sns.heatmap(warp_field[i,:,:,1], cmap="vlag", square=True)
+            plt.axis('off')
+            plt.savefig(os.path.join(physical_offsets_save_path, "{}_{}_warpfield_y.png".format(file_idx, i)), dpi=500)
+            plt.close()
+
+        spatial_feature_save_path = os.path.join(save_path, "spatial_feature")
+        if not os.path.exists(spatial_feature_save_path):
+            os.mkdir(spatial_feature_save_path)
+        feature_before = feature_before.detach().cpu().numpy()
+        feature_after = feature_after.detach().cpu().numpy()
+        teacher_feature = teacher_feature.detach().cpu().numpy()
+        for i in range(N):
+            channel = np.random.randint(64)
+            plt.imshow(feature_before[i, channel])
+            plt.axis("off")
+            plt.colorbar()
+            plt.savefig(os.path.join(spatial_feature_save_path, "{}_{}_before.png".format(file_idx, i)), dpi=500)
+            plt.close()
+
+            plt.imshow(feature_after[i, channel])
+            plt.axis("off")
+            plt.colorbar()
+            plt.savefig(os.path.join(spatial_feature_save_path, "{}_{}_spaligned.png".format(file_idx, i)), dpi=500)
+            plt.close()
+            
+            plt.imshow(teacher_feature[i, channel])
+            plt.axis("off")
+            plt.colorbar()
+            plt.savefig(os.path.join(spatial_feature_save_path, "{}_{}_teacher.png".format(file_idx, i)), dpi=500)
+            plt.close()
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/feature_alignnet_modules.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/feature_alignnet_modules.py
new file mode 100644
index 0000000000000000000000000000000000000000..9354d3704bc0721151a8043e4754c82121b147c4
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/feature_alignnet_modules.py
@@ -0,0 +1,499 @@
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.ops import DeformConv2dPack as dconv2d
+from timm.models.layers import DropPath
+from opencood.models.sub_modules.cbam import BasicBlock
+import math
+
+class LayerNorm(nn.Module):
+    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape))
+        self.eps = eps
+        self.data_format = data_format
+        if self.data_format not in ["channels_last", "channels_first"]:
+            raise NotImplementedError
+        self.normalized_shape = (normalized_shape,)
+
+    def forward(self, x):
+        if self.data_format == "channels_last":
+            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+        elif self.data_format == "channels_first":
+            u = x.mean(1, keepdim=True)
+            s = (x - u).pow(2).mean(1, keepdim=True)
+            x = (x - u) / torch.sqrt(s + self.eps)
+            x = self.weight[:, None, None] * x + self.bias[:, None, None]
+            return x
+
+class XCA(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        self.temperature = nn.Parameter(torch.ones(num_heads, 1, 1))
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+    def forward(self, x):
+        B, N, C = x.shape
+        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
+        qkv = qkv.permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
+
+        q = q.transpose(-2, -1)
+        k = k.transpose(-2, -1)
+        v = v.transpose(-2, -1)
+
+        q = torch.nn.functional.normalize(q, dim=-1)
+        k = torch.nn.functional.normalize(k, dim=-1)
+
+        attn = (q @ k.transpose(-2, -1)) * self.temperature
+        # -------------------
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).permute(0, 3, 1, 2).reshape(B, N, C)
+        # ------------------
+        x = self.proj(x)
+        x = self.proj_drop(x)
+
+        return x
+
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'temperature'}
+
+
+class ConvEncoder(nn.Module):
+    def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6, expan_ratio=4, kernel_size=1, deformable=False):
+        super().__init__()
+        if not deformable:
+            self.dwconv = nn.Conv2d(dim, dim, kernel_size=kernel_size, padding=kernel_size // 2, groups=dim)
+        else:
+            self.dwconv = dconv2d(dim, dim, kernel_size=kernel_size, padding=kernel_size // 2, groups=dim)
+        self.norm = LayerNorm(dim, eps=1e-6)
+        self.pwconv1 = nn.Linear(dim, expan_ratio * dim)
+        self.act = nn.GELU()
+        self.pwconv2 = nn.Linear(expan_ratio * dim, dim)
+        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones(dim),
+                                  requires_grad=True) if layer_scale_init_value > 0 else None
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+
+    def forward(self, x):
+        input = x
+        x = self.dwconv(x)
+        x = x.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.pwconv2(x)
+        if self.gamma is not None:
+            x = self.gamma * x
+        x = x.permute(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)
+
+        x = input + self.drop_path(x)
+        return x
+
+class SDTAEncoder(nn.Module):
+    def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6, expan_ratio=4,
+                 use_pos_emb=False, num_heads=4, qkv_bias=True, attn_drop=0., drop=0., num_conv=2, deformable=False):
+        super().__init__()
+        width = dim
+        convs = []
+        if not deformable:
+            for i in range(num_conv):
+                convs.append(nn.Conv2d(dim, dim, kernel_size=1, padding=0, groups=width))
+                # convs.append(nn.BatchNorm2d(dim))
+                convs.append(nn.ReLU())
+        else:
+            for i in range(num_conv):
+                convs.append(dconv2d(dim, dim, kernel_size=1, padding=0, groups=width))
+                # convs.append(nn.BatchNorm2d(dim))
+                convs.append(nn.ReLU())
+        self.convs = nn.Sequential(*convs)
+
+
+        self.norm_xca = LayerNorm(dim, eps=1e-6)
+        self.gamma_xca = nn.Parameter(layer_scale_init_value * torch.ones(dim),
+                                      requires_grad=True) if layer_scale_init_value > 0 else None
+        self.xca = XCA(dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
+
+        self.norm = LayerNorm(dim, eps=1e-6)
+        self.pwconv1 = nn.Linear(dim, expan_ratio * dim)  # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()  # TODO: MobileViT is using 'swish'
+        self.pwconv2 = nn.Linear(expan_ratio * dim, dim)
+        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
+                                  requires_grad=True) if layer_scale_init_value > 0 else None
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+
+    def forward(self, x):
+        input = x
+
+        x = self.convs(x)
+
+        # XCA
+        B, C, H, W = x.shape
+        x = x.reshape(B, C, H * W).permute(0, 2, 1)
+        x = x + self.drop_path(self.gamma_xca * self.xca(self.norm_xca(x)))
+        x = x.reshape(B, H, W, C)
+
+        # Inverted Bottleneck
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.pwconv2(x)
+        if self.gamma is not None:
+            x = self.gamma * x
+        x = x.permute(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)
+
+        x = input + self.drop_path(x)
+
+        return x
+
+
+
+class SDTA(nn.Module):
+    def __init__(self, args, deform):
+        super().__init__()
+        in_ch = args['in_ch']
+        self.model = nn.ModuleList()
+
+        for i in range(args['layer_num']):
+            self.model.append(ConvEncoder(dim=in_ch, deformable=deform))
+            self.model.append(SDTAEncoder(dim=in_ch, deformable=deform))
+            
+    def forward(self, x):
+        for m in self.model:
+            x = m(x)
+        return x
+
+
+class Resnet3x3(nn.Module):
+    def __init__(self, args, deform=False):
+        super().__init__()
+        in_ch = args['in_ch']
+        layernum = args['layer_num']
+        model_list = nn.ModuleList()
+        for _ in range(layernum):
+            model_list.append(ResidualBlock(in_ch, in_ch, kernel_size=3, deform=deform))
+ 
+        self.model = nn.Sequential(*model_list)
+
+    def forward(self, x):
+        return self.model(x)
+
+
+class Resnet1x1(nn.Module):
+    def __init__(self, args, deform=False):
+        super().__init__()
+        in_ch = args['in_ch']
+        layernum = args['layer_num']
+        model_list = nn.ModuleList()
+        for _ in range(layernum):
+            model_list.append(ResidualBlock(in_ch, in_ch, kernel_size=1, deform=deform))
+ 
+        self.model = nn.Sequential(*model_list)
+
+    def forward(self, x):
+        return self.model(x)
+
+
+"""
+Feature-Align Network with Knowledge Distillation for Efficient Denoising
+"""
+class ARNetBlock(nn.Module):
+    def __init__(self, indim, outdim):
+        super().__init__()
+        self.model = nn.Sequential(
+            nn.Conv2d(indim, indim, kernel_size=1),
+            nn.ReLU(),
+            nn.Conv2d(indim, indim, kernel_size=3, padding=1, groups=8),
+            nn.ReLU(),
+            nn.Conv2d(indim, outdim, kernel_size=1)
+        )
+
+    def forward(self, x):
+        return self.model(x)
+
+class FALayer(nn.Module):
+    def __init__(self, indim, outdim, imgdim):
+        super().__init__()
+        self.conv1 = nn.Conv2d(imgdim, imgdim, 1)
+        self.relu = nn.ReLU()
+        self.conv2 = nn.Conv2d(imgdim, outdim, 1)
+        self.conv3 = nn.Conv2d(imgdim, outdim, 1)
+        self.arblock = ARNetBlock(indim, outdim)
+    
+    def forward(self, feature, img):
+        feature = self.arblock(feature)
+        inter = self.relu(self.conv1(img))
+        gamma = self.conv2(inter)
+        beta = self.conv3(inter)
+
+        return feature * gamma + beta
+
+class FANet(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        dim = args['dim']
+        self.falayer1 = FALayer(dim, dim, dim)
+        self.falayer2 = FALayer(dim, dim*2, dim)
+        self.falayer3 = FALayer(dim*2, dim*4, dim)
+        self.falayer4 = FALayer(dim*4, dim*2, dim)
+        self.falayer5 = FALayer(dim*2, dim, dim)
+        self.maxpool = nn.MaxPool2d(2)
+        self.upsample2d = nn.Upsample(scale_factor=2, mode='bilinear')
+
+        self.skip_conv1 = nn.Conv2d(dim*2, dim*2, 1)
+        self.skip_conv2 = nn.Conv2d(dim, dim, 1)
+
+
+    def forward(self, x):
+        x_detach = x.detach()
+        # fake image input
+        img0 = x_detach 
+        img1 = self.maxpool(img0)
+        img2 = self.maxpool(img1)
+
+        feature0 = self.falayer1(x, img0) # H,W, dim
+        feature1 = self.falayer2(self.maxpool(feature0), img1) # H/2, W/2, dim*2
+        feature2 = self.falayer3(self.maxpool(feature1), img2) # H/4, W/4, dim*4
+
+        feature3 = self.falayer4(self.upsample2d(feature2), img1) + self.skip_conv1(feature1)
+        feature4 = self.falayer5(self.upsample2d(feature3), img0) + self.skip_conv2(feature0)
+
+        return feature4
+
+
+
+"""
+CBAM: Convolutional Block Attention Module
+"""
+class CBAM(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        dim = args['dim']
+        num_of_blocks = args['num_of_blocks']
+        model_list = nn.ModuleList()
+        for _ in range(num_of_blocks):
+            model_list.append(BasicBlock(dim, dim))
+ 
+        self.model = nn.Sequential(*model_list)
+
+    def forward(self, x):
+        return self.model(x)
+
+
+"""
+ConvNeXt 
+"""
+class ConvNeXtBlock(nn.Module):
+    r""" 
+    https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py
+
+    ConvNeXt Block. There are two equivalent implementations:
+    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
+    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
+    We use (2) as we find it slightly faster in PyTorch
+    
+    Args:
+        dim (int): Number of input channels.
+        drop_path (float): Stochastic depth rate. Default: 0.0
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+    """
+    def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6, kernel_size=7, deform=False):
+        super().__init__()
+        self.deform = deform
+        if self.deform:
+            self.dfconv = dconv2d(dim, dim, kernel_size=3, padding=1)
+        self.dwconv = nn.Conv2d(dim, dim, kernel_size=kernel_size, padding=kernel_size//2, groups=dim) # depthwise conv
+        self.norm = LayerNorm(dim, eps=1e-6)
+        self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()
+        self.pwconv2 = nn.Linear(4 * dim, dim)
+        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)), 
+                                    requires_grad=True) if layer_scale_init_value > 0 else None
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+
+    def forward(self, x):
+        input = x
+        if self.deform:
+            x = self.dfconv(x)
+            x = self.act(x)
+        x = self.dwconv(x)
+        x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.pwconv2(x)
+        if self.gamma is not None:
+            x = self.gamma * x
+        x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
+
+        x = input + self.drop_path(x)
+        return x
+
+class ConvNeXt(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        dim = args['dim']
+        kernel_size = args.get("kernel_size", 7)
+        num_of_blocks = args['num_of_blocks']
+        deform = args.get('deform', False)
+        model_list = nn.ModuleList()
+        for _ in range(num_of_blocks):
+            model_list.append(ConvNeXtBlock(dim, kernel_size=kernel_size, deform=deform))
+ 
+        self.model = nn.Sequential(*model_list)
+
+    def forward(self, x):
+        return self.model(x)
+
+
+
+
+"""
+Resnet1x1 Aligner
+"""
+class ResidualBlock(nn.Module):  
+    def __init__(self, in_channels, out_channels, use_1x1conv=False, kernel_size=3, deform=False):
+        super(ResidualBlock, self).__init__()
+        if kernel_size == 3:
+            padding = 1
+            stride = 1
+        elif kernel_size == 1:
+            padding = 0
+            stride = 1
+        else:
+            raise("Not Supported")
+
+        if not deform:
+            self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=padding, stride=stride)
+            self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=kernel_size, padding=padding)
+        else:
+            self.conv1 = dconv2d(in_channels, out_channels, kernel_size=kernel_size, padding=padding, stride=stride)
+            self.conv2 = dconv2d(out_channels, out_channels, kernel_size=kernel_size, padding=padding)
+
+        # 1x1conv来升维
+        if use_1x1conv:
+            self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride)
+        else:
+            self.conv3 = None
+        self.bn1 = nn.BatchNorm2d(out_channels)
+        self.bn2 = nn.BatchNorm2d(out_channels)
+
+    def forward(self, X):
+        Y = F.relu(self.bn1(self.conv1(X)))
+        Y = self.bn2(self.conv2(Y))
+        if self.conv3:
+            X = self.conv3(X)
+        return F.relu(Y + X)
+
+
+class Res1x1Aligner(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        dim = args['dim']
+        num_of_blocks = args['num_of_blocks']
+        deform = args.get('deform', False)
+        model_list = nn.ModuleList()
+        for _ in range(num_of_blocks):
+            model_list.append(ResidualBlock(dim, dim, kernel_size=1, deform=deform))
+ 
+        self.model = nn.Sequential(*model_list)
+
+    def forward(self, x):
+        return self.model(x)
+
+class Res3x3Aligner(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        dim = args['dim']
+        num_of_blocks = args['num_of_blocks']
+        deform = args.get('deform', False)
+        model_list = nn.ModuleList()
+        for _ in range(num_of_blocks):
+            model_list.append(ResidualBlock(dim, dim, kernel_size=3, deform=deform))
+ 
+        self.model = nn.Sequential(*model_list)
+
+    def forward(self, x):
+        return self.model(x)
+
+
+class SDTAAgliner(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        in_ch = args['dim']
+        self.model = nn.ModuleList()
+
+        for i in range(args['num_of_blocks']):
+            self.model.append(ConvEncoder(dim=in_ch, deformable=False))
+            self.model.append(SDTAEncoder(dim=in_ch, deformable=False))
+            
+    def forward(self, x):
+        for m in self.model:
+            x = m(x)
+        return x
+
+"""
+Laynorm + MLP
+"""
+class ResMLP(nn.Module):
+    def __init__(self, num_of_layers=2, dim=64):
+        super().__init__()
+        model_list = [nn.LayerNorm(dim)]
+        for i in range(num_of_layers):
+            model_list.append(nn.Linear(dim, dim))
+            model_list.append(nn.GELU())
+        self.model = nn.Sequential(*model_list)
+    
+    def forward(self, x):
+        return x + self.model(x)
+
+class SCAligner(nn.Module):
+    """
+    Structure:
+
+    Input:
+        FeatureMap (NCHW) 
+    Model:
+        Permute -> (NHWC)
+        ------------------------ x M
+        LayerNorm -> (NHWC)
+        MLP(GELU) x n + skip_conn-> (NHWC) 
+        ------------------------
+        Permute -> (NCHW)
+
+    if Camera, additionally
+
+    Input:
+        FeatureMap (NCHW)
+        Coming FeatureMap Mean (NCHW)
+    Model:
+        cat -> (N 2C HW)
+        conv2d -> (N2HW)
+        warp FeatureMap (NCHW)
+
+    """
+    def __init__(self, args):   
+        super().__init__()
+        num_of_blocks = args['num_of_blocks']
+        num_of_layers = args['num_of_layers']
+        dim = args['dim']
+        model_list = []
+        for _ in range(num_of_blocks):
+            model_list.append(ResMLP(num_of_layers, dim))
+        self.backbone = nn.Sequential(*model_list)
+
+
+    def forward(self, x):
+        x = x.permute(0,2,3,1)
+        x = self.backbone(x)
+        x = x.permute(0,3,1,2)
+        return x
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/functions/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/functions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a2197bda3199aa32cafc5b9d396479609853dd2
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/functions/__init__.py
@@ -0,0 +1,10 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from .ms_deform_attn_func import MSDeformAttnFunction
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/functions/ms_deform_attn_func.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/functions/ms_deform_attn_func.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c5df8cf5d23aca963eec6c1133c180b37289607
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/functions/ms_deform_attn_func.py
@@ -0,0 +1,61 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import torch
+import torch.nn.functional as F
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+import MultiScaleDeformableAttention as MSDA
+
+
+class MSDeformAttnFunction(Function):
+    @staticmethod
+    def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step):
+        ctx.im2col_step = im2col_step
+        output = MSDA.ms_deform_attn_forward(
+            value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step)
+        ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors
+        grad_value, grad_sampling_loc, grad_attn_weight = \
+            MSDA.ms_deform_attn_backward(
+                value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step)
+
+        return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None
+
+
+def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights):
+    # for debug and test only,
+    # need to use cuda version instead
+    N_, S_, M_, D_ = value.shape
+    _, Lq_, M_, L_, P_, _ = sampling_locations.shape
+    value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
+    sampling_grids = 2 * sampling_locations - 1
+    sampling_value_list = []
+    for lid_, (H_, W_) in enumerate(value_spatial_shapes):
+        # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
+        value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_)
+        # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
+        sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)
+        # N_*M_, D_, Lq_, P_
+        sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_,
+                                          mode='bilinear', padding_mode='zeros', align_corners=False)
+        sampling_value_list.append(sampling_value_l_)
+    # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)
+    attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_)
+    output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_)
+    return output.transpose(1, 2).contiguous()
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/height_compression.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/height_compression.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d7f38ed23f62fab0709b4e1ac5b141aaefd1f18
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/height_compression.py
@@ -0,0 +1,27 @@
+import torch.nn as nn
+
+
+class HeightCompression(nn.Module):
+    def __init__(self, model_cfg, **kwargs):
+        super().__init__()
+        self.model_cfg = model_cfg
+        self.num_bev_features = self.model_cfg['feature_num']
+
+    def forward(self, batch_dict):
+        """
+        Args:
+            batch_dict:
+                encoded_spconv_tensor: sparse tensor
+        Returns:
+            batch_dict:
+                spatial_features:
+
+        """
+        encoded_spconv_tensor = batch_dict['encoded_spconv_tensor']
+        spatial_features = encoded_spconv_tensor.dense()
+        N, C, D, H, W = spatial_features.shape
+        spatial_features = spatial_features.view(N, C * D, H, W)
+        batch_dict['spatial_features'] = spatial_features
+        batch_dict['spatial_features_stride'] = \
+            batch_dict['encoded_spconv_tensor_stride']
+        return batch_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/hmsa.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/hmsa.py
new file mode 100644
index 0000000000000000000000000000000000000000..814b298cc3de2636c2e31cf4949e64086a69abde
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/hmsa.py
@@ -0,0 +1,151 @@
+import torch
+from torch import nn
+
+from einops import rearrange
+
+
+class HGTCavAttention(nn.Module):
+    def __init__(self, dim, heads, num_types=2,
+                 num_relations=4, dim_head=64, dropout=0.1):
+        super().__init__()
+        inner_dim = heads * dim_head
+
+        self.heads = heads
+        self.scale = dim_head ** -0.5
+        self.num_types = num_types
+
+        self.attend = nn.Softmax(dim=-1)
+        self.drop_out = nn.Dropout(dropout)
+        self.k_linears = nn.ModuleList()
+        self.q_linears = nn.ModuleList()
+        self.v_linears = nn.ModuleList()
+        self.a_linears = nn.ModuleList()
+        self.norms = nn.ModuleList()
+        for t in range(num_types):
+            self.k_linears.append(nn.Linear(dim, inner_dim))
+            self.q_linears.append(nn.Linear(dim, inner_dim))
+            self.v_linears.append(nn.Linear(dim, inner_dim))
+            self.a_linears.append(nn.Linear(inner_dim, dim))
+
+        self.relation_att = nn.Parameter(
+            torch.Tensor(num_relations, heads, dim_head, dim_head))
+        self.relation_msg = nn.Parameter(
+            torch.Tensor(num_relations, heads, dim_head, dim_head))
+
+        torch.nn.init.xavier_uniform(self.relation_att)
+        torch.nn.init.xavier_uniform(self.relation_msg)
+
+    def to_qkv(self, x, types):
+        # x: (B,H,W,L,C)
+        # types: (B,L)
+        q_batch = []
+        k_batch = []
+        v_batch = []
+
+        for b in range(x.shape[0]):
+            q_list = []
+            k_list = []
+            v_list = []
+
+            for i in range(x.shape[-2]):
+                # (H,W,1,C)
+                q_list.append(
+                    self.q_linears[types[b, i]](x[b, :, :, i, :].unsqueeze(2)))
+                k_list.append(
+                    self.k_linears[types[b, i]](x[b, :, :, i, :].unsqueeze(2)))
+                v_list.append(
+                    self.v_linears[types[b, i]](x[b, :, :, i, :].unsqueeze(2)))
+            # (1,H,W,L,C)
+            q_batch.append(torch.cat(q_list, dim=2).unsqueeze(0))
+            k_batch.append(torch.cat(k_list, dim=2).unsqueeze(0))
+            v_batch.append(torch.cat(v_list, dim=2).unsqueeze(0))
+        # (B,H,W,L,C)
+        q = torch.cat(q_batch, dim=0)
+        k = torch.cat(k_batch, dim=0)
+        v = torch.cat(v_batch, dim=0)
+        return q, k, v
+
+    def get_relation_type_index(self, type1, type2):
+        return type1 * self.num_types + type2
+
+    def get_hetero_edge_weights(self, x, types):
+        w_att_batch = []
+        w_msg_batch = []
+
+        for b in range(x.shape[0]):
+            w_att_list = []
+            w_msg_list = []
+
+            for i in range(x.shape[-2]):
+                w_att_i_list = []
+                w_msg_i_list = []
+
+                for j in range(x.shape[-2]):
+                    e_type = self.get_relation_type_index(types[b, i],
+                                                          types[b, j])
+                    w_att_i_list.append(self.relation_att[e_type].unsqueeze(0))
+                    w_msg_i_list.append(self.relation_msg[e_type].unsqueeze(0))
+                w_att_list.append(torch.cat(w_att_i_list, dim=0).unsqueeze(0))
+                w_msg_list.append(torch.cat(w_msg_i_list, dim=0).unsqueeze(0))
+
+            w_att_batch.append(torch.cat(w_att_list, dim=0).unsqueeze(0))
+            w_msg_batch.append(torch.cat(w_msg_list, dim=0).unsqueeze(0))
+
+        # (B,M,L,L,C_head,C_head)
+        w_att = torch.cat(w_att_batch, dim=0).permute(0, 3, 1, 2, 4, 5)
+        w_msg = torch.cat(w_msg_batch, dim=0).permute(0, 3, 1, 2, 4, 5)
+        return w_att, w_msg
+
+    def to_out(self, x, types):
+        out_batch = []
+        for b in range(x.shape[0]):
+            out_list = []
+            for i in range(x.shape[-2]):
+                out_list.append(
+                    self.a_linears[types[b, i]](x[b, :, :, i, :].unsqueeze(2)))
+            out_batch.append(torch.cat(out_list, dim=2).unsqueeze(0))
+        out = torch.cat(out_batch, dim=0)
+        return out
+
+    def forward(self, x, mask, prior_encoding):
+        # x: (B, L, H, W, C) -> (B, H, W, L, C)
+        # mask: (B, H, W, L, 1)
+        # prior_encoding: (B,L,H,W,3)
+        x = x.permute(0, 2, 3, 1, 4)
+        # mask: (B, 1, H, W, L, 1)
+        mask = mask.unsqueeze(1)
+        # (B,L)
+        velocities, dts, types = [itm.squeeze(-1) for itm in
+                                  prior_encoding[:, :, 0, 0, :].split(
+                                      [1, 1, 1], dim=-1)]
+        types = types.to(torch.int)
+        dts = dts.to(torch.int)
+        qkv = self.to_qkv(x, types)
+        # (B,M,L,L,C_head,C_head)
+        w_att, w_msg = self.get_hetero_edge_weights(x, types)
+
+        # q: (B, M, H, W, L, C)
+        q, k, v = map(lambda t: rearrange(t, 'b h w l (m c) -> b m h w l c',
+                                          m=self.heads), (qkv))
+        # attention, (B, M, H, W, L, L)
+        att_map = torch.einsum(
+            'b m h w i p, b m i j p q, bm h w j q -> b m h w i j',
+            [q, w_att, k]) * self.scale
+        # add mask
+        att_map = att_map.masked_fill(mask == 0, -float('inf'))
+        # softmax
+        att_map = self.attend(att_map)
+
+        # out:(B, M, H, W, L, C_head)
+        v_msg = torch.einsum('b m i j p c, b m h w j p -> b m h w i j c',
+                             w_msg, v)
+        out = torch.einsum('b m h w i j, b m h w i j c -> b m h w i c',
+                           att_map, v_msg)
+
+        out = rearrange(out, 'b m h w l c -> b h w l (m c)',
+                        m=self.heads)
+        out = self.to_out(out, types)
+        out = self.drop_out(out)
+        # (B L H W C)
+        out = out.permute(0, 3, 1, 2, 4)
+        return out
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/lss_submodule.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/lss_submodule.py
new file mode 100644
index 0000000000000000000000000000000000000000..331eeb41c6a6e3aa5f877eace32e6da2f282fe74
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/lss_submodule.py
@@ -0,0 +1,417 @@
+import torch
+from torch import nn
+from efficientnet_pytorch import EfficientNet
+from torchvision.models.resnet import resnet18
+from torchvision.models.resnet import resnet101
+import torch.nn.functional as F
+from opencood.utils.camera_utils import bin_depths
+from opencood.models.sub_modules.torch_transformation_utils import \
+    warp_affine_simple
+from opencood.utils.transformation_utils import normalize_pairwise_tfm
+from opencood.models.fuse_modules.fusion_in_one import \
+    MaxFusion, AttFusion, V2VNetFusion, V2XViTFusion, When2commFusion, Where2commFusion, DiscoFusion
+
+class Up(nn.Module):
+    def __init__(self, in_channels, out_channels, scale_factor=2):
+        super().__init__()
+
+        self.up = nn.Upsample(scale_factor=scale_factor, mode='bilinear',
+                              align_corners=True)  # 上采样 BxCxHxW->BxCx2Hx2W
+
+        self.conv = nn.Sequential(  # 两个3x3卷积
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),  # inplace=True使用原地操作，节省内存
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x1, x2):
+        x1 = self.up(x1)  # 对x1进行上采样
+        x1 = torch.cat([x2, x1], dim=1)  # 将x1和x2 concat 在一起
+        return self.conv(x1)
+
+
+class CamEncode(nn.Module):  # 提取图像特征进行图像编码
+    def __init__(self, D, C, downsample, ddiscr, mode, use_gt_depth=False, depth_supervision=True):
+        super(CamEncode, self).__init__()
+        self.D = D  # 42
+        self.C = C  # 64
+        self.downsample = downsample
+        self.d_min = ddiscr[0]
+        self.d_max = ddiscr[1]
+        self.num_bins = ddiscr[2]
+        self.mode = mode
+        self.use_gt_depth = use_gt_depth
+        self.depth_supervision = depth_supervision # in the case of not use gt depth
+
+        
+        self.trunk = EfficientNet.from_pretrained("efficientnet-b0")  # 使用 efficientnet 提取特征
+
+        self.up1 = Up(320+112, 512)  # 上采样模块，输入输出通道分别为320+112和512
+        if downsample == 8:
+            self.up2 = Up(512+40, 512)
+        if not use_gt_depth:
+            self.depth_head = nn.Conv2d(512, self.D, kernel_size=1, padding=0)  # 1x1卷积，变换维度
+
+        self.image_head = nn.Conv2d(512, self.C, kernel_size=1, padding=0)
+
+
+    def get_depth_dist(self, x, eps=1e-5):  # 对深度维进行softmax，得到每个像素不同深度的概率
+        return F.softmax(x, dim=1)
+
+    def get_gt_depth_dist(self, x):  # 对深度维进行onehot，得到每个像素不同深度的概率
+        """
+        Args:
+            x: [B*N, H, W]
+        Returns:
+            x: [B*N, D, fH, fW]
+        """
+        target = self.training
+        torch.clamp_max_(x, self.d_max) # save memory
+        # [B*N, H, W], indices (float), value: [0, num_bins)
+        depth_indices, mask = bin_depths(x, self.mode, self.d_min, self.d_max, self.num_bins, target=target)
+        depth_indices = depth_indices[:, self.downsample//2::self.downsample, self.downsample//2::self.downsample]
+        onehot_dist = F.one_hot(depth_indices.long()).permute(0,3,1,2) # [B*N, num_bins, fH, fW]
+
+        if not target:
+            mask = mask[:, self.downsample//2::self.downsample, self.downsample//2::self.downsample].unsqueeze(1)
+            onehot_dist *= mask
+
+        return onehot_dist, depth_indices
+
+    def get_eff_features(self, x):  # 使用efficientnet提取特征
+        # adapted from https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/model.py#L231
+
+        endpoints = dict()
+
+        # Stem
+        x = self.trunk._swish(self.trunk._bn0(self.trunk._conv_stem(x)))  #  x: 24 x 32 x 64 x 176
+        prev_x = x
+
+        # Blocks
+        for idx, block in enumerate(self.trunk._blocks):
+            drop_connect_rate = self.trunk._global_params.drop_connect_rate
+            if drop_connect_rate:
+                drop_connect_rate *= float(idx) / len(self.trunk._blocks) # scale drop connect_rate
+            x = block(x, drop_connect_rate=drop_connect_rate)
+            if prev_x.size(2) > x.size(2):
+                endpoints['reduction_{}'.format(len(endpoints)+1)] = prev_x
+            prev_x = x
+
+        # Head
+        endpoints['reduction_{}'.format(len(endpoints)+1)] = x  # x: 24 x 320 x 4 x 11
+        x = self.up1(endpoints['reduction_5'], endpoints['reduction_4'])  # 先对endpoints[4]进行上采样，然后将 endpoints[5]和endpoints[4] concat 在一起
+        if self.downsample == 8:
+            x = self.up2(x, endpoints['reduction_3'])
+        return x  # x: 24 x 512 x 8 x 22
+
+    def forward(self, x):
+        """
+        Returns:
+            log_depth : [B*N, D, fH, fW], or None if not used latter
+            depth_gt_indices : [B*N, fH, fW], or None if not used latter
+            new_x : [B*N, C, D, fH, fW]
+        """
+        x_img = x[:,:3:,:,:]
+        features = self.get_eff_features(x_img)  # depth: B*N x D x fH x fW(24 x 41 x 8 x 22)  x: B*N x C x D x fH x fW(24 x 64 x 41 x 8 x 22)
+        x_img = self.image_head(features)
+        
+        if self.depth_supervision or self.use_gt_depth: # depth data must exist
+            x_depth = x[:,3,:,:]
+            depth_gt, depth_gt_indices = self.get_gt_depth_dist(x_depth)
+
+        if self.use_gt_depth:
+            new_x = depth_gt.unsqueeze(1) * x_img.unsqueeze(2) # new_x: 24 x 64 x 41 x 8 x 18
+            return None, new_x
+        else:
+            depth_logit = self.depth_head(features)
+            depth = self.get_depth_dist(depth_logit)
+            new_x = depth.unsqueeze(1) * x_img.unsqueeze(2) # new_x: 24 x 64 x 41 x 8 x 18
+            if self.depth_supervision:
+                return (depth_logit, depth_gt_indices), new_x
+            else:
+                return None, new_x
+
+class CamEncode_Resnet101(nn.Module):  # 提取图像特征进行图像编码
+    def __init__(self, D, C, downsample, ddiscr, mode, use_gt_depth=False, depth_supervision=True):
+        super(CamEncode_Resnet101, self).__init__()
+        self.D = D  # 42
+        self.C = C  # 64
+        self.downsample = downsample
+        self.d_min = ddiscr[0]
+        self.d_max = ddiscr[1]
+        self.num_bins = ddiscr[2]
+        self.mode = mode
+        self.use_gt_depth = use_gt_depth
+        self.depth_supervision = depth_supervision # in the case of not use gt depth
+
+        trunk = resnet101(pretrained=False, zero_init_residual=True)  # 使用 resnet101 提取特征
+        self.conv1 = trunk.conv1
+        self.bn1 = trunk.bn1
+        self.relu = trunk.relu
+        self.maxpool = trunk.maxpool
+        self.layer1 = trunk.layer1
+        self.layer2 = trunk.layer2
+        self.layer3 = nn.Identity()
+
+        self.up1 = Up(320+112, 512)  # 上采样模块，输入输出通道分别为320+112和512
+        if downsample == 8:
+            self.up2 = Up(512+40, 512)
+        if not use_gt_depth:
+            self.depth_head = nn.Conv2d(512, self.D, kernel_size=1, padding=0)  # 1x1卷积，变换维度
+
+        self.image_head = nn.Conv2d(512, self.C, kernel_size=1, padding=0)
+
+
+    def get_depth_dist(self, x, eps=1e-5):  # 对深度维进行softmax，得到每个像素不同深度的概率
+        return F.softmax(x, dim=1)
+
+    def get_gt_depth_dist(self, x):  # 对深度维进行onehot，得到每个像素不同深度的概率
+        """
+        Args:
+            x: [B*N, H, W]
+        Returns:
+            x: [B*N, D, fH, fW]
+        """
+        target = self.training
+        torch.clamp_max_(x, self.d_max) # save memory
+        # [B*N, H, W], indices (float), value: [0, num_bins)
+        depth_indices, mask = bin_depths(x, self.mode, self.d_min, self.d_max, self.num_bins, target=target)
+        depth_indices = depth_indices[:, self.downsample//2::self.downsample, self.downsample//2::self.downsample]
+        onehot_dist = F.one_hot(depth_indices.long()).permute(0,3,1,2) # [B*N, num_bins, fH, fW]
+
+        if not target:
+            mask = mask[:, self.downsample//2::self.downsample, self.downsample//2::self.downsample].unsqueeze(1)
+            onehot_dist *= mask
+
+        return onehot_dist, depth_indices
+
+    def resnet101_forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x_8 = x # channel = 512
+        x = self.layer3(x) # channel = 1024
+        return x_8, x
+
+    def get_eff_features(self, x):  # 使用efficientnet提取特征
+        # adapted from https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/model.py#L231
+        #x: 16 x 3 x 480 x 640
+        x_8, x_16 = self.resnet101_forward(x) # 16x512x60x80 , 16x1024x30x40   
+        if self.downsample == 8:
+            return x_8  
+        else:
+            res = self.final_conv(x_16)
+            return res
+
+
+    def forward(self, x):
+        """
+        Returns:
+            log_depth : [B*N, D, fH, fW], or None if not used latter
+            depth_gt_indices : [B*N, fH, fW], or None if not used latter
+            new_x : [B*N, C, D, fH, fW]
+        """
+        #x: 16 x 3 x 480 x 640
+        #print(x.shape)
+        x_img = x[:,:3:,:,:]
+        features = self.get_eff_features(x_img)  # depth: B*N x D x fH x fW(24 x 41 x 8 x 22)  x: B*N x C x D x fH x fW(24 x 64 x 41 x 8 x 22)
+        x_img = self.image_head(features)
+        
+        if self.depth_supervision or self.use_gt_depth: # depth data must exist
+            x_depth = x[:,3,:,:]
+            depth_gt, depth_gt_indices = self.get_gt_depth_dist(x_depth)
+
+        if self.use_gt_depth:
+            new_x = depth_gt.unsqueeze(1) * x_img.unsqueeze(2) # new_x: 24 x 64 x 41 x 8 x 18
+            return None, new_x
+        else:
+            depth_logit = self.depth_head(features)
+            depth = self.get_depth_dist(depth_logit)
+            new_x = depth.unsqueeze(1) * x_img.unsqueeze(2) # new_x: 24 x 64 x 41 x 8 x 18
+            if self.depth_supervision:
+                return (depth_logit, depth_gt_indices), new_x
+            else:
+                return None, new_x
+
+
+class BevEncode(nn.Module):
+    def __init__(self, inC, outC):  # inC: 64  outC: not 1 for object detection
+        super(BevEncode, self).__init__()
+
+        # 使用resnet的前3个stage作为backbone
+        trunk = resnet18(pretrained=False, zero_init_residual=True)
+        self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = trunk.bn1
+        self.relu = trunk.relu
+
+        self.layer1 = trunk.layer1
+        self.layer2 = trunk.layer2
+        self.layer3 = trunk.layer3
+
+        self.up1 = Up(64+256, 256, scale_factor=4)
+        self.up2 = nn.Sequential(  # 2倍上采样->3x3卷积->1x1卷积
+            nn.Upsample(scale_factor=2, mode='bilinear',
+                              align_corners=True),
+            nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(128, outC, kernel_size=1, padding=0),
+        )
+
+    def forward(self, x):  # x: 4 x 64 x 240 x 240
+        x = self.conv1(x)  # x: 4 x 64 x 120 x 120
+        x = self.bn1(x)
+        x = self.relu(x)
+
+        x1 = self.layer1(x)  # x1: 4 x 64 x 120 x 120
+        x = self.layer2(x1)  # x: 4 x 128 x 60 x 60
+        x = self.layer3(x)  # x: 4 x 256 x 30 x 30
+
+        x = self.up1(x, x1)  # 给x进行4倍上采样然后和x1 concat 在一起  x: 4 x 256 x 120 x 120
+        x = self.up2(x)  # 2倍上采样->3x3卷积->1x1卷积  x: 4 x 1 x 240 x 240
+
+        return x
+
+class BevEncodeSSFusion(nn.Module):
+    """
+    Multiscale version of ResNet Encoder
+    """
+    def __init__(self, fusion_args):  # inC: 64  outC: not 1 for object detection
+        super(BevEncodeSSFusion, self).__init__()
+        args = fusion_args['args']
+        inC = args['in_channels']
+        self.discrete_ratio = args['voxel_size'][0]  
+        self.downsample_rate = 1
+        # 使用resnet的前3个stage作为backbone
+        trunk = resnet18(pretrained=False, zero_init_residual=True)
+        self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = trunk.bn1
+        self.relu = trunk.relu # make it 64 channels
+
+        self.layer1 = trunk.layer1
+        self.layer2 = trunk.layer2
+        self.layer3 = trunk.layer3
+
+        self.up_layer1 = Up(64+256, 256, scale_factor=2)
+        self.up_layer2 = Up(128+256, 256, scale_factor=2)
+        self.down_layer = nn.Sequential(
+            nn.Conv2d(256, 256, kernel_size=3,
+                      stride=1, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 128, kernel_size=3,
+                      stride=1,padding=1),
+            nn.ReLU(inplace=True)
+        )
+        if fusion_args['core_method'] == "max":
+            self.fuse_module = MaxFusion()
+        elif fusion_args['core_method'] == "att":
+            self.fuse_module = AttFusion(256)
+        elif fusion_args['core_method'] == "disconet":
+            self.fuse_module = DiscoFusion(256)
+        elif fusion_args['core_method'] == "v2vnet": 
+            self.fuse_module = V2VNetFusion(args['v2vnet'])
+        elif fusion_args['core_method'] == "v2xvit":
+            self.fuse_module = V2XViTFusion(args['v2xvit'])
+        elif fusion_args['core_method'] == "when2comm":
+            self.fuse_module = When2commFusion(args['when2comm'])
+        elif fusion_args['core_method'] == "where2comm":
+            self.fuse_module = Where2commFusion(args['where2comm'])
+        else:
+            raise "not implemented"
+
+    def forward(self, x, record_len, pairwise_t_matrix):  # x: 4 x 64 x 240 x 240
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = normalize_pairwise_tfm(pairwise_t_matrix, H, W, self.discrete_ratio, self.downsample_rate)
+
+        x = self.conv1(x)  # x: 4 x 64 x 120 x 120
+        x = self.bn1(x)
+        x = self.relu(x)
+
+        x1 = self.layer1(x)  # x1: 4 x 64 x 120 x 120
+        x2 = self.layer2(x1)  # x2: 4 x 128 x 60 x 60
+        x3 = self.layer3(x2)  # x3: 4 x 256 x 30 x 30
+        x_single = self.down_layer(self.up_layer1(self.up_layer2(x3, x2), x1)) # 4 x 128 x 120 x 120
+
+        x = self.up_layer1(self.up_layer2(x3, x2), x1) # 4 x 256 x 120 x 120
+        x_fuse = self.fuse_module(x, record_len, pairwise_t_matrix)
+        x_fuse = self.down_layer(x_fuse)
+
+
+        return x_single, x_fuse
+
+
+
+class BevEncodeMSFusion(nn.Module):
+    """
+    Multiscale version of ResNet Encoder
+    """
+    def __init__(self, fusion_args):  # inC: 64  outC: not 1 for object detection
+        super(BevEncodeMSFusion, self).__init__()
+        args = fusion_args['args']
+        inC = args['in_channels']
+        self.discrete_ratio = args['voxel_size'][0]  
+        self.downsample_rate = 1
+        # 使用resnet的前3个stage作为backbone
+        trunk = resnet18(pretrained=False, zero_init_residual=True)
+        self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = trunk.bn1
+        self.relu = trunk.relu # make it 64 channels
+
+        self.layer1 = trunk.layer1
+        self.layer2 = trunk.layer2
+        self.layer3 = trunk.layer3
+
+        self.up_layer1 = Up(64+256, 256, scale_factor=2)
+        self.up_layer2 = Up(128+256, 256, scale_factor=2)
+        self.down_layer = nn.Sequential(
+            nn.Conv2d(256, 256, kernel_size=3,
+                      stride=1, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 128, kernel_size=3,
+                      stride=1,padding=1),
+            nn.ReLU(inplace=True)
+        )
+        if fusion_args['core_method'] == "max_ms":
+            self.fuse_module = [MaxFusion(), MaxFusion(), MaxFusion()]
+        elif fusion_args['core_method'] == "att_ms":
+            self.fuse_module = [AttFusion(64), AttFusion(128), AttFusion(256)]
+        else:
+            raise "not implemented"
+
+    def forward(self, x, record_len, pairwise_t_matrix):  # x: 4 x 64 x 240 x 240
+        _, C, H, W = x.shape
+        B, L = pairwise_t_matrix.shape[:2]
+
+        # (B,L,L,2,3)
+        pairwise_t_matrix = normalize_pairwise_tfm(pairwise_t_matrix, H, W, self.discrete_ratio, self.downsample_rate)
+
+        x = self.conv1(x)  # x: 4 x 64 x 120 x 120
+        x = self.bn1(x)
+        x = self.relu(x)
+
+        x1 = self.layer1(x)  # x1: 4 x 64 x 120 x 120
+        x2 = self.layer2(x1)  # x2: 4 x 128 x 60 x 60
+        x3 = self.layer3(x2)  # x3: 4 x 256 x 30 x 30
+        x_single = self.down_layer(self.up_layer1(self.up_layer2(x3, x2), x1)) # 4 x 64 x 120 x 120
+
+        x1_fuse = self.fuse_module[0](x1, record_len, pairwise_t_matrix)
+        x2_fuse = self.fuse_module[1](x2, record_len, pairwise_t_matrix)
+        x3_fuse = self.fuse_module[2](x3, record_len, pairwise_t_matrix)
+
+        x_fuse = self.down_layer(self.up_layer1(self.up_layer2(x3_fuse, x2_fuse), x1_fuse)) # 4 x 64 x 120 x 120
+
+        return x_single, x_fuse
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mash_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mash_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..995038082841796698d7030632d4e69bd6695f09
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mash_utils.py
@@ -0,0 +1,131 @@
+import torch
+import torch.nn as nn
+
+
+class conv2DBatchNormRelu(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        n_filters,
+        k_size,
+        stride,
+        padding,
+        bias=True,
+        dilation=1,
+        is_batchnorm=True,
+        shouldUseReLU=True,
+    ):
+        super(conv2DBatchNormRelu, self).__init__()
+
+        conv_mod = nn.Conv2d(
+            int(in_channels),
+            int(n_filters),
+            kernel_size=k_size,
+            padding=padding,
+            stride=stride,
+            bias=bias,
+            dilation=dilation,
+        )
+
+        if shouldUseReLU:
+            if is_batchnorm:
+                self.cbr_unit = nn.Sequential(conv_mod, nn.BatchNorm2d(int(n_filters)), nn.ReLU(inplace=False))
+            else:
+                self.cbr_unit = nn.Sequential(conv_mod, nn.ReLU(inplace=False))
+        else:
+            if is_batchnorm:
+                self.cbr_unit = nn.Sequential(conv_mod, nn.BatchNorm2d(int(n_filters)))
+            else:
+                self.cbr_unit = nn.Sequential(conv_mod)
+
+
+    def forward(self, inputs):
+        outputs = self.cbr_unit(inputs)
+        return outputs
+
+class segnetDown3(nn.Module):
+    def __init__(self, in_size, out_size, indices=False):
+        super(segnetDown3, self).__init__()
+        self.conv1 = conv2DBatchNormRelu(in_size, out_size, 3, 1, 1)
+        self.conv2 = conv2DBatchNormRelu(out_size, out_size, 3, 1, 1)
+        self.conv3 = conv2DBatchNormRelu(out_size, out_size, 3, 1, 1)
+
+        self.indices = indices
+        if indices:
+            self.maxpool_with_argmax = nn.MaxPool2d(2, 2, return_indices=True)
+        else:
+            self.maxpool_without_argmax = nn.MaxPool2d(2, 2, return_indices=False)
+
+    def forward(self, inputs):
+        outputs = self.conv1(inputs)
+        outputs = self.conv2(outputs)
+        outputs = self.conv3(outputs)
+        unpooled_shape = outputs.size()
+
+        if self.indices:
+            outputs, indices = self.maxpool_with_argmax(outputs)
+            return outputs, indices, unpooled_shape
+        else:
+            outputs = self.maxpool_without_argmax(outputs)
+            return outputs
+
+class segnetUp3(nn.Module):
+    def __init__(self, in_size, out_size, shouldUseReLU=True):
+        super(segnetUp3, self).__init__()
+        self.unpool = nn.MaxUnpool2d(2, 2)
+        self.up = torch.nn.Upsample(scale_factor=2,mode='bilinear',align_corners=True)
+        self.conv1 = conv2DBatchNormRelu(in_size, in_size, 3, 1, 1)
+        self.conv2 = conv2DBatchNormRelu(in_size, in_size, 3, 1, 1)
+        self.conv3 = conv2DBatchNormRelu(in_size, out_size, 3, 1, 1, shouldUseReLU=shouldUseReLU)
+
+    def forward(self, inputs, indices=None, output_shape=None):
+        if indices is not None:
+            outputs = self.unpool(input=inputs, indices=indices, output_size=output_shape)
+        else:
+            outputs = self.up(inputs)
+
+        outputs = self.conv1(outputs)
+        outputs = self.conv2(outputs)
+        outputs = self.conv3(outputs)
+        return outputs
+
+class QueryEncoder(nn.Module):
+    def __init__(self, in_ch, out_ch):
+        super(QueryEncoder, self).__init__()
+
+        self.model = nn.Sequential(
+            conv2DBatchNormRelu(in_ch, 512, 1, 1, 0),
+            conv2DBatchNormRelu(512, 512, 1, 1, 0),
+            conv2DBatchNormRelu(512, out_ch, 1, 1, 0),
+        )
+    def forward(self, x):
+        return self.model(x)
+
+class KeyEncoder(nn.Module):
+    def __init__(self, in_ch, out_ch):
+        super(KeyEncoder,self).__init__()
+        self.model = nn.Sequential(
+            conv2DBatchNormRelu(in_ch, 512, 1, 1, 0),
+            # conv2DBatchNormRelu(512, 512, 1, 1, 0),
+            conv2DBatchNormRelu(512, out_ch, 1, 1, 0),
+        )
+    def forward(self, x):
+        return self.model(x)
+
+class SmoothingNetwork(nn.Module):
+    def __init__(self, in_ch=32*32+1):
+        super(SmoothingNetwork, self).__init__()
+        out_ch = in_ch
+        self.d32to16 = segnetDown3(in_ch,256,indices=True)
+        self.d16to08 = segnetDown3(256,128,indices=True)
+        self.d08to16 = segnetUp3(128,256)
+        self.d16to32 = segnetUp3(256,out_ch)
+
+    def forward(self, distAB):
+        Da16d,Da_i16,Da_s16 = self.d32to16(torch.nn.Softmax(1)(distAB))
+        Da08d,Da_i08,Da_s08 = self.d16to08(Da16d)
+        Da08 = Da08d
+        Da16 = self.d08to16(Da08,Da_i08,Da_s08)
+        Da32 = self.d16to32(Da16,Da_i16,Da_s16)
+        return Da32
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..132d529c38d39c05839976dfd68129825a30655f
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher.py
@@ -0,0 +1,181 @@
+import torch
+from torch import nn
+
+from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import boxes_iou3d_gpu
+from opencood.utils.box_utils import corner_to_center_torch, boxes_to_corners_3d, project_box3d, project_points_by_matrix_torch, get_mask_for_boxes_within_range_torch
+from opencood.utils.transformation_utils import x1_to_x2
+from icecream import ic
+import copy
+pi = 3.141592653
+
+
+def limit_period(val, offset=0.5, period=2 * pi):
+    return val - torch.floor(val / period + offset) * period
+
+
+class Matcher(nn.Module):
+    """Correct localization error and use Algorithm 1:
+     BBox matching with scores to fuse the proposal BBoxes"""
+
+    def __init__(self, cfg, pc_range):
+        super(Matcher, self).__init__()
+        self.pc_range = pc_range
+
+    @torch.no_grad()
+    def forward(self, data_dict):
+        clusters, scores = self.clustering(data_dict)
+        data_dict['boxes_fused'], data_dict[
+            'scores_fused'] = self.cluster_fusion(clusters, scores)
+        self.merge_keypoints(data_dict)
+        return data_dict
+
+
+    def clustering(self, data_dict):
+        """
+        Assign predicted boxes to clusters according to their ious with each other
+        """
+        clusters_batch = []
+        scores_batch = []
+        record_len = [int(l) for l in data_dict['record_len']]
+        lidar_poses = data_dict['lidar_pose'].cpu().numpy()
+        for i, l in enumerate(record_len):
+            cur_boxes_list = data_dict['det_boxes'][sum(record_len[:i]):sum(record_len[:i])+l]
+            
+            # Added by Yifan Lu 
+            if data_dict['proj_first'] is False:
+                cur_boxes_list_ego = []
+                # project bounding box to ego coordinate. [x,y,z,l,w,h,yaw]
+                cur_boxes_list_ego.append(cur_boxes_list[0])
+                for agent_id in range(1, l):
+                    tfm = x1_to_x2(lidar_poses[sum(record_len[:i])+agent_id], 
+                                   lidar_poses[sum(record_len[:i])])
+                    tfm = torch.from_numpy(tfm).to(cur_boxes_list[0].device).float()
+                    cur_boxes = cur_boxes_list[agent_id]
+                    cur_corners = boxes_to_corners_3d(cur_boxes, order='hwl')
+                    cur_corners_ego = project_box3d(cur_corners, tfm)
+                    cur_boxes_ego = corner_to_center_torch(cur_corners_ego, order='hwl')
+                    cur_boxes_list_ego.append(cur_boxes_ego)
+                cur_boxes_list = cur_boxes_list_ego
+
+
+            cur_scores_list = data_dict['det_scores'][sum(record_len[:i]):sum(record_len[:i])+l]
+            cur_boxes_list = [b for b in cur_boxes_list if len(b) > 0]
+            cur_scores_list = [s for s in cur_scores_list if len(s) > 0]
+            if len(cur_scores_list) == 0:
+                clusters_batch.append([torch.Tensor([0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.57]).
+                                      to(torch.device('cuda')).view(1, 7)])
+                scores_batch.append([torch.Tensor([0.01]).to(torch.device('cuda')).view(-1)])
+                continue
+
+            pred_boxes_cat = torch.cat(cur_boxes_list, dim=0)
+            pred_boxes_cat[:, -1] = limit_period(pred_boxes_cat[:, -1])
+            pred_scores_cat = torch.cat(cur_scores_list, dim=0)
+
+            ious = boxes_iou3d_gpu(pred_boxes_cat, pred_boxes_cat)
+            cluster_indices = torch.zeros(len(ious)).int() # gt assignments of preds
+            cur_cluster_id = 1
+            while torch.any(cluster_indices == 0):
+                cur_idx = torch.where(cluster_indices == 0)[0][0] # find the idx of the first pred which is not assigned yet
+                cluster_indices[torch.where(ious[cur_idx] > 0.1)[0]] = cur_cluster_id
+                cur_cluster_id += 1
+            clusters = []
+            scores = []
+            for j in range(1, cur_cluster_id):
+                clusters.append(pred_boxes_cat[cluster_indices==j])
+                scores.append(pred_scores_cat[cluster_indices==j])
+            clusters_batch.append(clusters)
+            scores_batch.append(scores)
+
+        return clusters_batch, scores_batch
+
+    def cluster_fusion(self, clusters, scores):
+        """
+        Merge boxes in each cluster with scores as weights for merging
+        """
+        boxes_fused = []
+        scores_fused = []
+        for cl, sl in zip(clusters, scores): # each frame
+            for c, s in zip(cl, sl): # frame's cluster
+                # reverse direction for non-dominant direction of boxes
+                dirs = c[:, -1]
+                max_score_idx = torch.argmax(s)
+                dirs_diff = torch.abs(dirs - dirs[max_score_idx].item())
+                lt_pi = (dirs_diff > pi).int()
+                dirs_diff = dirs_diff * (1 - lt_pi) + (
+                            2 * pi - dirs_diff) * lt_pi
+                score_lt_half_pi = s[dirs_diff > pi / 2].sum()  # larger than
+                score_set_half_pi = s[
+                    dirs_diff <= pi / 2].sum()  # small equal than
+                # select larger scored direction as final direction
+                if score_lt_half_pi <= score_set_half_pi:
+                    dirs[dirs_diff > pi / 2] += pi
+                else:
+                    dirs[dirs_diff <= pi / 2] += pi
+                dirs = limit_period(dirs)
+                s_normalized = s / s.sum()
+                sint = torch.sin(dirs) * s_normalized
+                cost = torch.cos(dirs) * s_normalized
+                theta = torch.atan2(sint.sum(), cost.sum()).view(1, )
+                center_dim = c[:, :-1] * s_normalized[:, None]
+                
+                boxes_fused.append(torch.cat([center_dim.sum(dim=0), theta]))
+                s_sorted = torch.sort(s, descending=True).values
+                s_fused = 0
+                for i, ss in enumerate(s_sorted):
+                    s_fused += ss ** (i + 1)
+                s_fused = torch.tensor([min(s_fused, 1.0)], device=s.device)
+                scores_fused.append(s_fused)
+
+        assert len(boxes_fused) > 0
+        boxes_fused = torch.stack(boxes_fused, dim=0)
+        len_records = [len(c) for c in clusters] # each frame
+        boxes_fused = [
+            boxes_fused[sum(len_records[:i]):sum(len_records[:i]) + l] for i, l
+            in enumerate(len_records)]
+        scores_fused = torch.stack(scores_fused, dim=0)
+        scores_fused = [
+            scores_fused[sum(len_records[:i]):sum(len_records[:i]) + l] for
+            i, l in enumerate(len_records)]
+
+        for i in range(len(boxes_fused)):
+            corners3d = boxes_to_corners_3d(boxes_fused[i], order='hwl')
+            mask = get_mask_for_boxes_within_range_torch(corners3d, self.pc_range)
+            boxes_fused[i] = boxes_fused[i][mask]
+            scores_fused[i] = scores_fused[i][mask]
+
+        return boxes_fused, scores_fused
+
+    def merge_keypoints(self, data_dict):
+        # merge keypoints
+        kpts_feat_out = []
+        kpts_coor_out = []
+        kpts_coor_out_ego = []
+        keypoints_features = data_dict['point_features'] # sum(record_len)
+        keypoints_coords = data_dict['point_coords'] # [[N,3],...]
+        idx = 0
+        record_len = data_dict['record_len']
+        lidar_poses = data_dict['lidar_pose'].cpu().numpy()
+        for l in data_dict['record_len']:
+            # Added by Yifan Lu
+            # if not project first, first transform the keypoints coords
+            if data_dict['proj_first'] is False:
+                kpts_coor_cur = []
+                for agent_id in range(0, l):
+                    tfm = x1_to_x2(lidar_poses[idx+agent_id], lidar_poses[idx])
+                    tfm = torch.from_numpy(tfm).to(keypoints_coords[0].device).float()
+                    keypoints_coords[idx+agent_id][:, :3] = project_points_by_matrix_torch(keypoints_coords[idx+agent_id][:,:3], tfm)
+
+                kpts_coor_out_ego.append(
+                    torch.cat(keypoints_coords[idx:l + idx], dim=0)
+                )
+                
+            kpts_coor_out.append(
+                torch.cat(keypoints_coords[idx:l + idx], dim=0))
+            kpts_feat_out.append(
+                torch.cat(keypoints_features[idx:l + idx], dim=0))
+            idx += l
+        data_dict['point_features'] = kpts_feat_out
+        data_dict['point_coords'] = kpts_coor_out
+
+        if data_dict['proj_first'] is False:
+            data_dict['point_coords'] = kpts_coor_out_ego
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher_v2.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..c24c2cc9fa86f8b07eca9bc3b058791c7071b454
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher_v2.py
@@ -0,0 +1,245 @@
+"""
+    A new version of proposal matcher. 
+    It will collect voxel features, instead of keypoint features.
+    TODO: Add agent-object pose graph optimization
+"""
+
+import torch
+from torch import nn
+import numpy as np
+import spconv
+from collections import OrderedDict
+import opencood.utils.spconv_utils as spconv_utils
+from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import boxes_iou3d_gpu
+from opencood.utils.box_utils import corner_to_center_torch, boxes_to_corners_3d, project_box3d, get_mask_for_boxes_within_range_torch
+from opencood.utils.transformation_utils import x1_to_x2
+from opencood.utils.common_utils import limit_period
+from icecream import ic
+
+
+class MatcherV2(nn.Module):
+
+    def __init__(self, cfg, pc_range):
+        super(MatcherV2, self).__init__()
+        self.order = cfg['order']
+        self.voxel_size = cfg['voxel_size']
+        self.feature_source = cfg['feature_source'] # ['x_conv2', 'x_conv3', 'x_conv4']
+        self.pc_range = pc_range
+        self.sp_wraper = spconv_utils.warpSparseTensor()
+        self.sp_merger = spconv_utils.MergeDuplicate("max")
+
+    @torch.no_grad()
+    def forward(self, data_dict):
+        clusters, scores = self.clustering(data_dict)
+        data_dict['boxes_fused'], data_dict[
+            'scores_fused'] = self.cluster_fusion(clusters, scores)
+        self.collect_voxel_feature(data_dict)
+        return data_dict
+
+    def clustering(self, data_dict):
+        """
+        Assign predicted boxes to clusters according to their ious with each other
+        """
+        clusters_batch = []
+        scores_batch = []
+        record_len = [int(cavnum) for cavnum in data_dict['record_len']] 
+        lidar_poses = data_dict['lidar_pose'].cpu().numpy()
+
+        # iterate each frame
+        for i, cavnum in enumerate(record_len):
+            cur_boxes_list = data_dict['det_boxes'][sum(record_len[:i]):sum(record_len[:i]) + cavnum]
+            cur_boxes_list_ego = []
+            # preserve ego boxes
+            cur_boxes_list_ego.append(cur_boxes_list[0])
+            # transform box to ego coordinate. [x,y,z,h,w,l,yaw]
+            for agent_id in range(1, cavnum):
+                tfm = x1_to_x2(lidar_poses[sum(record_len[:i])+agent_id], 
+                                lidar_poses[sum(record_len[:i])])
+                tfm = torch.from_numpy(tfm).to(cur_boxes_list[0].device).float()
+                cur_boxes = cur_boxes_list[agent_id]
+                cur_corners = boxes_to_corners_3d(cur_boxes, order=self.order)
+                cur_corners_ego = project_box3d(cur_corners, tfm)
+                cur_boxes_ego = corner_to_center_torch(cur_corners_ego, order=self.order)
+                cur_boxes_list_ego.append(cur_boxes_ego)
+
+            cur_boxes_list = cur_boxes_list_ego
+
+            cur_scores_list = data_dict['det_scores'][sum(record_len[:i]):sum(record_len[:i]) + cavnum]
+            cur_boxes_list = [b for b in cur_boxes_list if len(b) > 0]
+            cur_scores_list = [s for s in cur_scores_list if len(s) > 0]
+
+            if len(cur_scores_list) == 0:
+                clusters_batch.append([torch.Tensor([0.0, 0.0, 0.0, 1.6, 2.0, 4.0, 0]). #
+                                      to(torch.device('cuda:0')).view(1, 7)])
+                scores_batch.append([torch.Tensor([0.01]).to(torch.device('cuda:0')).view(-1)])
+                continue
+
+            pred_boxes_cat = torch.cat(cur_boxes_list, dim=0)
+            pred_boxes_cat[:, -1] = limit_period(pred_boxes_cat[:, -1])
+            pred_scores_cat = torch.cat(cur_scores_list, dim=0)
+
+            ious = boxes_iou3d_gpu(pred_boxes_cat, pred_boxes_cat)
+            cluster_indices = torch.zeros(len(ious)).int()
+            cur_cluster_id = 1
+
+            # cluster proposals
+            while torch.any(cluster_indices == 0):
+                cur_idx = torch.where(cluster_indices == 0)[0][0] # find the idx of the first pred which is not assigned yet
+                cluster_indices[torch.where(ious[cur_idx] > 0.1)[0]] = cur_cluster_id
+                cur_cluster_id += 1
+
+            clusters = []
+            scores = []
+
+            for j in range(1, cur_cluster_id):
+                clusters.append(pred_boxes_cat[cluster_indices==j])
+                scores.append(pred_scores_cat[cluster_indices==j])
+
+            clusters_batch.append(clusters)
+            scores_batch.append(scores)
+
+        return clusters_batch, scores_batch
+
+    def cluster_fusion(self, clusters, scores):
+        """
+        Merge boxes in each cluster with scores as weights for merging.
+        TODO: change to select the proposal with highest score? And then adjust the proposal
+        """
+        boxes_fused = []
+        scores_fused = []
+        for cl, sl in zip(clusters, scores): # cl, sl are clusters and scores within one sample
+            for c, s in zip(cl, sl): # one sample (cl) has many clusters (c), c,s,a correspond to one cluster
+                # reverse direction for non-dominant direction of boxes
+                dirs = c[:, -1]
+                max_score_idx = torch.argmax(s)
+                dirs_diff = torch.abs(dirs - dirs[max_score_idx].item())
+                lt_pi = (dirs_diff > np.pi).int()
+                dirs_diff = dirs_diff * (1 - lt_pi) + (
+                            2 *  np.pi - dirs_diff) * lt_pi
+                score_lt_half_pi = s[dirs_diff > np.pi / 2].sum()  # larger than
+                score_set_half_pi = s[
+                    dirs_diff <= np.pi / 2].sum()  # small equal than
+                # select larger scored direction as final direction
+                if score_lt_half_pi <= score_set_half_pi:
+                    dirs[dirs_diff > np.pi / 2] +=  np.pi
+                else:
+                    dirs[dirs_diff <= np.pi / 2] +=  np.pi
+                    
+                dirs = limit_period(dirs)
+                s_normalized = s / s.sum()
+                sint = torch.sin(dirs) * s_normalized
+                cost = torch.cos(dirs) * s_normalized
+                theta = torch.atan2(sint.sum(), cost.sum()).view(1, )
+                center_dim = c[:, :-1] * s_normalized[:, None]
+                
+                boxes_fused.append(torch.cat([center_dim.sum(dim=0), theta]))
+                s_sorted = torch.sort(s, descending=True).values
+                s_fused = 0
+                for i, ss in enumerate(s_sorted):
+                    s_fused += ss ** (i + 1)
+                s_fused = torch.tensor([min(s_fused, 1.0)], device=s.device)
+                scores_fused.append(s_fused)
+
+        assert len(boxes_fused) > 0
+        boxes_fused = torch.stack(boxes_fused, dim=0)
+        box_num_sample = [len(c) for c in clusters] # in a batch, each sample has how many boxes
+        boxes_fused = [
+            boxes_fused[sum(box_num_sample[:i]):sum(box_num_sample[:i]) + l] for i, l
+            in enumerate(box_num_sample)]
+        scores_fused = torch.stack(scores_fused, dim=0)
+        scores_fused = [
+            scores_fused[sum(box_num_sample[:i]):sum(box_num_sample[:i]) + l] for
+            i, l in enumerate(box_num_sample)]
+
+        for i in range(len(boxes_fused)):
+            corners3d = boxes_to_corners_3d(boxes_fused[i], order=self.order)
+            mask = get_mask_for_boxes_within_range_torch(corners3d, self.pc_range)
+            boxes_fused[i] = boxes_fused[i][mask]
+            scores_fused[i] = scores_fused[i][mask]
+
+        return boxes_fused, scores_fused
+
+    def retrieve_cav_sp_feature(self, sp_feature, agent_pos):
+        features = sp_feature.features
+        indices = sp_feature.indices
+        mask = indices[:, 0] == agent_pos
+
+        new_indices = indices.clone()
+        new_indices[:, 0] = 0
+
+        return spconv.SparseConvTensor(features[mask], new_indices[mask], sp_feature.spatial_shape, batch_size=1)
+
+    def collect_voxel_feature(self, data_dict):
+        """
+        1. collect features by feauture_source
+        2. convert sparse features to dense features
+        3. warp dense feature map and merge them
+        4. convert dense feature map to sparse 
+        """
+        
+        multi_scale_3d_features = data_dict['multi_scale_3d_features'] # sum(record_len), but SparseConvTensor
+        multi_scale_3d_stride = data_dict['multi_scale_3d_strides']
+        data_dict['multi_scale_3d_features_fused'] = OrderedDict()
+        lidar_poses = data_dict['lidar_pose'].cpu().numpy()
+        device = data_dict['lidar_pose'].device
+
+        for srcname in self.feature_source:
+            start_agent_pos = 0
+            sp_feature = multi_scale_3d_features[srcname]
+            stride = multi_scale_3d_stride[srcname]
+            voxel_size = torch.tensor(self.voxel_size).to(device)
+            voxel_size *= stride
+            sp_tensor_fused_list = [] # each sample
+            # ic(srcname)
+            # ic(sp_feature.indices)
+
+            for idx, cavnum in enumerate(data_dict['record_len']):
+                # each sample
+                sp_tensor_cav_list = [self.retrieve_cav_sp_feature(sp_feature, start_agent_pos)] # each cav
+            
+                for agent_id in range(1, cavnum):
+                    sp_tensor_cav = self.retrieve_cav_sp_feature(sp_feature, start_agent_pos+agent_id)
+
+                    if data_dict['proj_first'] is False:
+                        tfm = x1_to_x2(lidar_poses[start_agent_pos+agent_id], lidar_poses[start_agent_pos])
+                        tfm = torch.from_numpy(tfm).to(device) # cav_to_ego
+                        sp_tensor_warp = self.sp_wraper(sp_tensor_cav, tfm, voxel_size, self.pc_range)
+                        sp_tensor_cav = sp_tensor_warp
+
+                    sp_tensor_cav_list.append(sp_tensor_cav)
+                    
+                sp_tensor_fused = spconv_utils.fuseSparseTensor(sp_tensor_cav_list) # only fuse
+                sp_tensor_fused = self.sp_merger(sp_tensor_fused)
+                sp_tensor_fused.indices[:, 0] = idx # batch_idx set to sample idx
+
+                # sp_tensor_fused = self.retrieve_cav_sp_feature(sp_feature, start_agent_pos)
+                # sp_tensor_fused.indices[:, 0] = idx
+
+                sp_tensor_fused_list.append(sp_tensor_fused)
+
+                start_agent_pos += cavnum
+
+
+        
+
+            new_features = torch.cat([x.features for x in sp_tensor_fused_list], dim=0)
+            new_indice = torch.cat([x.indices for x in sp_tensor_fused_list], dim=0)
+            features_fused = spconv.SparseConvTensor(new_features, new_indice, sp_tensor_fused_list[0].spatial_shape,
+                                            len(data_dict['record_len']),  sp_tensor_fused_list[0].grid)
+
+            data_dict['multi_scale_3d_features_fused'][srcname] = features_fused
+
+            # ic("test dense feature")
+            
+            # # ic(features_fused.features)
+            # ic(features_fused.indices.shape)
+            # ic(features_fused.features.shape)
+            # ic(features_fused.spatial_shape)
+            # ic(features_fused.indices[:,1].min())
+            # ic(features_fused.indices[:,2].min())
+            # ic(features_fused.indices[:,3].min())
+            # ic(features_fused.indices[:,1].max())
+            # ic(features_fused.indices[:,2].max())
+            # ic(features_fused.indices[:,3].max())
+            # # dense_feature = features_fused.dense()
+            # # ic(dense_feature.shape)
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher_v3.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher_v3.py
new file mode 100644
index 0000000000000000000000000000000000000000..077c9d25528a34a151c033323631337e9869d3a1
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/matcher_v3.py
@@ -0,0 +1,244 @@
+"""
+    A new version of proposal matcher. 
+    It will collect BEV features, instead of keypoint features.
+    TODO: Add agent-object pose graph optimization
+
+"""
+
+import torch
+from torch import nn
+import numpy as np
+import spconv
+from collections import OrderedDict
+from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import boxes_iou3d_gpu
+from opencood.utils.box_utils import corner_to_center_torch, boxes_to_corners_3d, project_box3d, get_mask_for_boxes_within_range_torch
+from opencood.utils.transformation_utils import x1_to_x2
+from opencood.utils.common_utils import limit_period
+from icecream import ic
+from itertools import compress
+
+
+class MatcherV3(nn.Module):
+    def __init__(self, cfg, pc_range):
+        super(MatcherV3, self).__init__()
+        self.order = cfg['order']
+        self.pc_range = pc_range
+        self.enlarge_ratio = cfg.get("enlarge_ratio", 1)
+
+    @torch.no_grad()
+    def forward(self, data_dict):
+        clusters, scores, agentid_batch, view_vector_batch = self.clustering(data_dict)
+        data_dict['boxes_fused'], data_dict['scores_fused'], data_dict['agentid_fused'], data_dict['view_vector_fused'] \
+                = self.cluster_fusion(clusters, scores, agentid_batch, view_vector_batch)
+
+        self.get_roi_from_box(data_dict) # ['roi_fused']
+        return data_dict
+
+    def clustering(self, data_dict):
+        """
+        Assign predicted boxes to clusters according to their ious with each other.
+        Assign the order to boxes, belong to which agent 
+
+        Returns:
+            clusters_batch: [batch1, batch2, batch3, ...], 
+                where batch1 = [[box1_in_cluster1, box2_in_cluster1, box3_in_cluster1], [box1_in_cluster2, box2_in_cluster2], ...]
+        """
+        clusters_batch = []
+        scores_batch = []
+        agentid_batch = []
+        view_vector_batch = []
+
+        record_len = [int(cavnum) for cavnum in data_dict['record_len']] 
+        lidar_poses = data_dict['lidar_pose'].cpu().numpy()
+
+        # iterate each frame
+        for i, cavnum in enumerate(record_len):
+            cur_boxes_list = data_dict['det_boxes'][sum(record_len[:i]):sum(record_len[:i]) + cavnum] # within one sample, different cav
+            cur_boxes_list_ego = []
+            cur_agentid_list = []
+            cur_view_vector_list = []
+            # preserve ego boxes
+            cur_boxes_list_ego.append(cur_boxes_list[0])
+            cur_agentid_list.append(torch.tensor([sum(record_len[:i]) + 0] * len(cur_boxes_list[0])))
+
+            ### view vector ####
+            cur_boxes = cur_boxes_list[0]
+            view_angle = torch.atan2(cur_boxes[:, 1], cur_boxes[:, 0]) - cur_boxes[:,6] # view angle
+            view_angle = limit_period(view_angle) # normalized view angle
+            distance = (cur_boxes[:, 0] ** 2 + cur_boxes[:, 1] ** 2) ** 0.5
+            view_vector = torch.stack([view_angle, distance], dim=-1) # [proposalnum, 2]
+            cur_view_vector_list.append(view_vector)
+            ####################
+
+            # transform box to ego coordinate. [x,y,z,h,w,l,yaw]
+            # especially proj first is false
+            for agent_id in range(1, cavnum):
+                tfm = x1_to_x2(lidar_poses[sum(record_len[:i])+agent_id], 
+                                lidar_poses[sum(record_len[:i])])
+                tfm = torch.from_numpy(tfm).to(cur_boxes_list[0].device).float()
+                cur_boxes = cur_boxes_list[agent_id]
+                cur_corners = boxes_to_corners_3d(cur_boxes, order=self.order)
+
+                ### view vector ####
+                view_angle = torch.atan2(cur_boxes[:, 1], cur_boxes[:, 0]) - cur_boxes[:,6] # view angle
+                view_angle = limit_period(view_angle) # normalized view angle
+                distance = (cur_boxes[:, 0] ** 2 + cur_boxes[:, 1] ** 2) ** 0.5
+                view_vector = torch.stack([view_angle, distance], dim=-1) # [proposalnum, 2]
+                ####################
+
+                cur_corners_ego = project_box3d(cur_corners, tfm)
+                cur_boxes_ego = corner_to_center_torch(cur_corners_ego, order=self.order)
+                cur_boxes_list_ego.append(cur_boxes_ego)
+                cur_agentid_list.append(torch.tensor([sum(record_len[:i]) + agent_id] * len(cur_boxes_ego)))
+                cur_view_vector_list.append(view_vector)
+
+
+            cur_boxes_list = cur_boxes_list_ego
+            cur_scores_list = data_dict['det_scores'][sum(record_len[:i]):sum(record_len[:i]) + cavnum]
+
+            
+            cur_boxes_list = [b for b in cur_boxes_list if len(b) > 0]
+            cur_scores_list = [s for s in cur_scores_list if len(s) > 0]
+            cur_agentid_list = [a for a in cur_agentid_list if len(a) > 0]
+            cur_view_vector_list = [v for v in cur_view_vector_list if len(v) > 0]
+
+            if len(cur_scores_list) == 0:
+                clusters_batch.append([torch.Tensor([0.0, 0.0, 0.0, 1.6, 2.0, 4.0, 0]). # hwl
+                                      to(torch.device('cuda')).view(1, 7)])
+                scores_batch.append([torch.Tensor([0.01]).to(torch.device('cuda')).view(-1)])
+                agentid_batch.append([torch.tensor([0]).to(torch.device('cuda')).view(-1)])
+                view_vector_batch.append([torch.tensor([[0, 0]]).to(torch.device('cuda'))])
+                continue
+
+            pred_boxes_cat = torch.cat(cur_boxes_list, dim=0)
+            pred_boxes_cat[:, -1] = limit_period(pred_boxes_cat[:, -1])
+            pred_scores_cat = torch.cat(cur_scores_list, dim=0)
+            agentid_cat = torch.cat(cur_agentid_list, dim=0).to(torch.long)
+            view_vector_cat = torch.cat(cur_view_vector_list, dim=0)
+
+            ious = boxes_iou3d_gpu(pred_boxes_cat, pred_boxes_cat)
+            cluster_indices = torch.zeros(len(ious)).int()
+            cur_cluster_id = 1
+
+            # cluster proposals
+            while torch.any(cluster_indices == 0):
+                cur_idx = torch.where(cluster_indices == 0)[0][0] # find the idx of the first pred which is not assigned yet
+                cluster_indices[torch.where(ious[cur_idx] > 0.1)[0]] = cur_cluster_id
+                cur_cluster_id += 1
+
+            clusters = []
+            scores = []
+            agentid = []
+            view_vector = []
+
+            for j in range(1, cur_cluster_id):
+                clusters.append(pred_boxes_cat[cluster_indices==j])  # shape: [[num_in_cluster, 7], ... ]
+                scores.append(pred_scores_cat[cluster_indices==j])  # shape: [[num_in_cluster,], ...]
+                agentid.append(agentid_cat[cluster_indices==j])  # shape: [[num_in_cluster,], ...]
+                view_vector.append(view_vector_cat[cluster_indices==j])  # shape [[num_in_cluster, 2],...]
+
+            clusters_batch.append(clusters) # shape: [[[num_in_cluster, 7], ...], ... ]
+            scores_batch.append(scores)  # shape: [[[num_in_cluster,], ...], ... ]
+            agentid_batch.append(agentid)   # shape: [[[num_in_cluster,], ...], ...]
+            view_vector_batch.append(view_vector)    # shape [[[num_in_cluster, 2], ...], ...]
+
+        return clusters_batch, scores_batch, agentid_batch, view_vector_batch
+
+    def cluster_fusion(self, clusters, scores, agentid, view_vector):
+        """
+        Merge boxes in each cluster with scores as weights for merging.
+        """
+        boxes_fused = []
+        scores_fused = []
+        agentid_fused = []
+        view_vector_fused = []
+        for cl, sl, al, vl in zip(clusters, scores, agentid, view_vector): # cl, sl are clusters and scores within one sample
+            for c, s, a, v in zip(cl, sl, al, vl): # one sample (cl) has many clusters (c), c,s,a correspond to one cluster.
+                # reverse direction for non-dominant direction of boxes
+                dirs = c[:, -1]
+                max_score_idx = torch.argmax(s)
+                dirs_diff = torch.abs(dirs - dirs[max_score_idx].item())
+                lt_pi = (dirs_diff > np.pi).int()
+                dirs_diff = dirs_diff * (1 - lt_pi) + (
+                            2 *  np.pi - dirs_diff) * lt_pi
+                score_lt_half_pi = s[dirs_diff > np.pi / 2].sum()  # larger than
+                score_set_half_pi = s[dirs_diff <= np.pi / 2].sum()  # small equal than
+                # select larger scored direction as final direction
+                if score_lt_half_pi <= score_set_half_pi:
+                    dirs[dirs_diff > np.pi / 2] +=  np.pi
+                else:
+                    dirs[dirs_diff <= np.pi / 2] +=  np.pi
+                    
+                dirs = limit_period(dirs)
+                s_normalized = s / s.sum()
+                sint = torch.sin(dirs) * s_normalized
+                cost = torch.cos(dirs) * s_normalized
+                theta = torch.atan2(sint.sum(), cost.sum()).view(1, )
+                center_dim = c[:, :-1] * s_normalized[:, None]
+                
+                boxes_fused.append(torch.cat([center_dim.sum(dim=0), theta]))
+                # s_sorted = torch.sort(s, descending=True).values
+                # s_fused = 0
+                # for i, ss in enumerate(s_sorted):
+                #     s_fused += ss ** (i + 1)
+                # s_fused = torch.tensor([min(s_fused, 1.0)], device=s.device)
+                s_fused = torch.max(s)
+
+                scores_fused.append(s_fused) # content: [s_cluster0, s_cluster1, ...]
+                agentid_fused.append(a) # content [[id1,id2], [id1, id2, id3], ...]
+                view_vector_fused.append(v) # shape [[2, 2], [3, 2], ...]
+
+        assert len(boxes_fused) > 0
+        boxes_fused = torch.stack(boxes_fused, dim=0)
+        box_num_sample = [len(c) for c in clusters] # in a batch, each sample has how many clusters
+
+        boxes_fused = [boxes_fused[sum(box_num_sample[:i]):sum(box_num_sample[:i]) + l] for 
+                            i, l in enumerate(box_num_sample)] # shape [[num_of_cluster_in_sample1, 7], [num_of_cluster_in_sample2, 7], ...]
+
+        scores_fused = torch.stack(scores_fused, dim=0)
+        scores_fused = [scores_fused[sum(box_num_sample[:i]):sum(box_num_sample[:i]) + l] for
+                            i, l in enumerate(box_num_sample)]  # shape [[num_of_cluster_in_sample1,], [num_of_cluster_in_sample2,], ...]
+
+        agentid_fused = [agentid_fused[sum(box_num_sample[:i]):sum(box_num_sample[:i]) + l] for 
+                            i, l in enumerate(box_num_sample)]  # content [[[id1,id2], [id1, id2, id3], ... ], [sample2 content], ...]
+
+        view_vector_fused = [view_vector_fused[sum(box_num_sample[:i]):sum(box_num_sample[:i]) + l] for 
+                            i, l in enumerate(box_num_sample)]  # shape [[ [2,2], [3,2], ...], [sample2 content], ...]
+
+        for i in range(len(boxes_fused)):
+            corners3d = boxes_to_corners_3d(boxes_fused[i], order=self.order)
+            mask = get_mask_for_boxes_within_range_torch(corners3d, self.pc_range)
+            boxes_fused[i] = boxes_fused[i][mask]
+            scores_fused[i] = scores_fused[i][mask]
+            agentid_fused[i] = list(compress(agentid_fused[i], mask))
+            view_vector_fused[i] = list(compress(view_vector_fused[i], mask))
+
+        return boxes_fused, scores_fused, agentid_fused, view_vector_fused
+
+    def get_roi_from_box(self, data_dict):
+        feature_shape = data_dict['feature_shape'] # [H,W]
+        grid_size_H = (self.pc_range[4] - self.pc_range[1]) / feature_shape[0]
+        grid_size_W = (self.pc_range[3] - self.pc_range[0]) / feature_shape[1]
+
+        boxes_fused_list = data_dict['boxes_fused'] # [sample1, sample2, ...]
+        roi_list = []
+
+        for boxes_fused in boxes_fused_list:
+            # boxes_fused shape [N, 7], hwl order
+            # we omit the angle in the naive version
+            grid_center_x = (boxes_fused[:,0] - self.pc_range[0]) / grid_size_W
+            grid_center_y = (boxes_fused[:,1] - self.pc_range[1]) / grid_size_H
+            grid_offset_x =  boxes_fused[:, -2] / 2 / grid_size_W 
+            grid_offset_y =  boxes_fused[:, -3] / 2 / grid_size_H + 1  # enlarge
+
+            
+            xmin = (grid_center_x - grid_offset_x * self.enlarge_ratio).clamp(min=0)
+            xmax = (grid_center_x + grid_offset_x * self.enlarge_ratio).clamp(max=feature_shape[1] - 1)
+            ymin = (grid_center_y - grid_offset_y * self.enlarge_ratio).clamp(min=0)
+            ymax = (grid_center_y + grid_offset_y * self.enlarge_ratio).clamp(max=feature_shape[0] - 1)
+
+            roi = torch.stack([xmin, xmax, ymin, ymax], dim=-1).to(torch.long) # [boxnum, 4]
+            
+            roi_list.append(roi)
+
+        data_dict['roi_fused'] = roi_list # shape [[num_of_cluster_in_sample1, 4], [num_of_cluster_in_sample2, 4], ...]
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mean_vfe.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mean_vfe.py
new file mode 100644
index 0000000000000000000000000000000000000000..9231578bb86d11b48cfd43ec8ae9cb7bdd7fa683
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mean_vfe.py
@@ -0,0 +1,33 @@
+import torch
+import torch.nn as nn
+
+class MeanVFE(nn.Module):
+    def __init__(self, model_cfg, num_point_features, **kwargs):
+        super().__init__()
+        self.model_cfg = model_cfg
+        self.num_point_features = num_point_features
+
+    def get_output_feature_dim(self):
+        return self.num_point_features
+
+    def forward(self, batch_dict, **kwargs):
+        """
+        Args:
+            batch_dict:
+                voxels: (num_voxels, max_points_per_voxel, C)
+                voxel_num_points: optional (num_voxels)
+            **kwargs:
+
+        Returns:
+            vfe_features: (num_voxels, C)
+        """
+        voxel_features, voxel_num_points = batch_dict['voxel_features'], \
+                                           batch_dict['voxel_num_points']
+        points_mean = voxel_features[:, :, :].sum(dim=1, keepdim=False)
+        normalizer = torch.clamp_min(voxel_num_points.view(-1, 1), min=1.0).\
+            type_as(voxel_features)
+        points_mean = points_mean / normalizer
+        batch_dict['voxel_features'] = points_mean.contiguous()
+
+        return batch_dict
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/ms_deform_attn.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/ms_deform_attn.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f43ed689912d845206d587baee2cc2a56780622
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/ms_deform_attn.py
@@ -0,0 +1,115 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import warnings
+import math
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+from torch.nn.init import xavier_uniform_, constant_
+
+from .functions import MSDeformAttnFunction
+
+
+def _is_power_of_2(n):
+    if (not isinstance(n, int)) or (n < 0):
+        raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
+    return (n & (n-1) == 0) and n != 0
+
+
+class MSDeformAttn(nn.Module):
+    def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=4):
+        """
+        Multi-Scale Deformable Attention Module
+        :param d_model      hidden dimension
+        :param n_levels     number of feature levels
+        :param n_heads      number of attention heads
+        :param n_points     number of sampling points per attention head per feature level
+        """
+        super().__init__()
+        if d_model % n_heads != 0:
+            raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads))
+        _d_per_head = d_model // n_heads
+        # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
+        if not _is_power_of_2(_d_per_head):
+            warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
+                          "which is more efficient in our CUDA implementation.")
+
+        self.im2col_step = 64
+
+        self.d_model = d_model
+        self.n_levels = n_levels
+        self.n_heads = n_heads
+        self.n_points = n_points
+
+        self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points * 2)
+        self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points)
+        self.value_proj = nn.Linear(d_model, d_model)
+        self.output_proj = nn.Linear(d_model, d_model)
+
+        self._reset_parameters()
+
+    def _reset_parameters(self):
+        constant_(self.sampling_offsets.weight.data, 0.)
+        thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
+        grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
+        grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2).repeat(1, self.n_levels, self.n_points, 1)
+        for i in range(self.n_points):
+            grid_init[:, :, i, :] *= i + 1
+        with torch.no_grad():
+            self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
+        constant_(self.attention_weights.weight.data, 0.)
+        constant_(self.attention_weights.bias.data, 0.)
+        xavier_uniform_(self.value_proj.weight.data)
+        constant_(self.value_proj.bias.data, 0.)
+        xavier_uniform_(self.output_proj.weight.data)
+        constant_(self.output_proj.bias.data, 0.)
+
+    def forward(self, query, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None):
+        """
+        :param query                       (N, Length_{query}, C)
+        :param reference_points            (N, Length_{query}, n_levels, 2), range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area
+                                        or (N, Length_{query}, n_levels, 4), add additional (w, h) to form reference boxes
+        :param input_flatten               (N, \sum_{l=0}^{L-1} H_l \cdot W_l, C)
+        :param input_spatial_shapes        (n_levels, 2), [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})]
+        :param input_level_start_index     (n_levels, ), [0, H_0*W_0, H_0*W_0+H_1*W_1, H_0*W_0+H_1*W_1+H_2*W_2, ..., H_0*W_0+H_1*W_1+...+H_{L-1}*W_{L-1}]
+        :param input_padding_mask          (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements
+
+        :return output                     (N, Length_{query}, C)
+        """
+        N, Len_q, _ = query.shape
+        N, Len_in, _ = input_flatten.shape
+        assert (input_spatial_shapes[:, 0] * input_spatial_shapes[:, 1]).sum() == Len_in
+
+        value = self.value_proj(input_flatten)
+        if input_padding_mask is not None:
+            value = value.masked_fill(input_padding_mask[..., None], float(0))
+        value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
+        sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points, 2)
+        attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)
+        attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
+        # N, Len_q, n_heads, n_levels, n_points, 2
+        if reference_points.shape[-1] == 2:
+            offset_normalizer = torch.stack([input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1)
+            sampling_locations = reference_points[:, :, None, :, None, :] \
+                                 + sampling_offsets / offset_normalizer[None, None, None, :, None, :]
+        elif reference_points.shape[-1] == 4:
+            sampling_locations = reference_points[:, :, None, :, None, :2] \
+                                 + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 2:] * 0.5
+        else:
+            raise ValueError(
+                'Last dim of reference_points must be 2 or 4, but get {} instead.'.format(reference_points.shape[-1]))
+        output = MSDeformAttnFunction.apply(
+            value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights, self.im2col_step)
+        output = self.output_proj(output)
+        return output
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mswin.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mswin.py
new file mode 100644
index 0000000000000000000000000000000000000000..1af51b95205e7664b9c2c78ebef64e74a6eee390
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/mswin.py
@@ -0,0 +1,123 @@
+"""
+Multi-scale window transformer
+"""
+import torch
+import torch.nn as nn
+import numpy as np
+
+from einops import rearrange
+from opencood.models.sub_modules.split_attn import SplitAttn
+
+
+def get_relative_distances(window_size):
+    indices = torch.tensor(np.array(
+        [[x, y] for x in range(window_size) for y in range(window_size)]))
+    distances = indices[None, :, :] - indices[:, None, :]
+    return distances
+
+
+class BaseWindowAttention(nn.Module):
+    def __init__(self, dim, heads, dim_head, drop_out, window_size,
+                 relative_pos_embedding):
+        super().__init__()
+        inner_dim = dim_head * heads
+
+        self.heads = heads
+        self.scale = dim_head ** -0.5
+        self.window_size = window_size
+        self.relative_pos_embedding = relative_pos_embedding
+
+        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
+
+        if self.relative_pos_embedding:
+            self.relative_indices = get_relative_distances(window_size) + \
+                                    window_size - 1
+            self.pos_embedding = nn.Parameter(torch.randn(2 * window_size - 1,
+                                                          2 * window_size - 1))
+        else:
+            self.pos_embedding = nn.Parameter(torch.randn(window_size ** 2,
+                                                          window_size ** 2))
+
+        self.to_out = nn.Sequential(
+            nn.Linear(inner_dim, dim),
+            nn.Dropout(drop_out)
+        )
+
+    def forward(self, x):
+        b, l, h, w, c, m = *x.shape, self.heads
+
+        qkv = self.to_qkv(x).chunk(3, dim=-1)
+        new_h = h // self.window_size
+        new_w = w // self.window_size
+
+        # q : (b, l, m, new_h*new_w, window_size^2, c_head)
+        q, k, v = map(
+            lambda t: rearrange(t,
+                                'b l (new_h w_h) (new_w w_w) (m c) -> b l m (new_h new_w) (w_h w_w) c',
+                                m=m, w_h=self.window_size,
+                                w_w=self.window_size), qkv)
+        # b l m h window_size window_size
+        dots = torch.einsum('b l m h i c, b l m h j c -> b l m h i j',
+                            q, k, ) * self.scale
+        # consider prior knowledge of the local window
+        if self.relative_pos_embedding:
+            dots += self.pos_embedding[self.relative_indices[:, :, 0],
+                                       self.relative_indices[:, :, 1]]
+        else:
+            dots += self.pos_embedding
+
+        attn = dots.softmax(dim=-1)
+
+        out = torch.einsum('b l m h i j, b l m h j c -> b l m h i c', attn, v)
+        # b l h w c
+        out = rearrange(out,
+                        'b l m (new_h new_w) (w_h w_w) c -> b l (new_h w_h) (new_w w_w) (m c)',
+                        m=self.heads, w_h=self.window_size,
+                        w_w=self.window_size,
+                        new_w=new_w, new_h=new_h)
+        out = self.to_out(out)
+
+        return out
+
+
+class PyramidWindowAttention(nn.Module):
+    def __init__(self, dim, heads, dim_heads, drop_out, window_size,
+                 relative_pos_embedding, fuse_method='naive'):
+        super().__init__()
+
+        assert isinstance(window_size, list)
+        assert isinstance(heads, list)
+        assert isinstance(dim_heads, list)
+        assert len(dim_heads) == len(heads)
+
+        self.pwmsa = nn.ModuleList([])
+
+        for (head, dim_head, ws) in zip(heads, dim_heads, window_size):
+            self.pwmsa.append(BaseWindowAttention(dim,
+                                                  head,
+                                                  dim_head,
+                                                  drop_out,
+                                                  ws,
+                                                  relative_pos_embedding))
+        self.fuse_mehod = fuse_method
+        if fuse_method == 'split_attn':
+            self.split_attn = SplitAttn(256)
+        elif fuse_method == 'split_attn128':
+            self.split_attn = SplitAttn(128)
+        elif fuse_method == 'split_attn64':
+            self.split_attn = SplitAttn(64)
+
+    def forward(self, x):
+        output = None
+        # naive fusion will just sum up all window attention output and do a
+        # mean
+        if self.fuse_mehod == 'naive':
+            for wmsa in self.pwmsa:
+                output = wmsa(x) if output is None else output + wmsa(x)
+            return output / len(self.pwmsa)
+
+        elif self.fuse_mehod.startswith('split_attn'):
+            window_list = []
+            for wmsa in self.pwmsa:
+                window_list.append(wmsa(x))
+            return self.split_attn(window_list)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/naive_compress.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/naive_compress.py
new file mode 100644
index 0000000000000000000000000000000000000000..49842af9ce56bb9581ef4438d65b9ff23a6facf3
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/naive_compress.py
@@ -0,0 +1,32 @@
+import torch
+import torch.nn as nn
+
+
+class NaiveCompressor(nn.Module):
+    """
+    A very naive compression that only compress on the channel.
+    """
+    def __init__(self, input_dim, compress_raito):
+        super().__init__()
+        self.encoder = nn.Sequential(
+            nn.Conv2d(input_dim, input_dim//compress_raito, kernel_size=3,
+                      stride=1, padding=1),
+            nn.BatchNorm2d(input_dim//compress_raito, eps=1e-3, momentum=0.01),
+            nn.ReLU()
+        )
+        self.decoder = nn.Sequential(
+            nn.Conv2d(input_dim//compress_raito, input_dim, kernel_size=3,
+                      stride=1, padding=1),
+            nn.BatchNorm2d(input_dim, eps=1e-3, momentum=0.01),
+            nn.ReLU(),
+            nn.Conv2d(input_dim, input_dim, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(input_dim, eps=1e-3,
+                           momentum=0.01),
+            nn.ReLU()
+        )
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = self.decoder(x)
+
+        return x
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/pillar_vfe.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/pillar_vfe.py
new file mode 100644
index 0000000000000000000000000000000000000000..19fabe03682edde39302dfd69c7de08ad7f9fca9
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/pillar_vfe.py
@@ -0,0 +1,155 @@
+"""
+Pillar VFE, credits to OpenPCDet.
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class PFNLayer(nn.Module):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 use_norm=True,
+                 last_layer=False):
+        super().__init__()
+
+        self.last_vfe = last_layer
+        self.use_norm = use_norm
+        if not self.last_vfe:
+            out_channels = out_channels // 2
+
+        if self.use_norm:
+            self.linear = nn.Linear(in_channels, out_channels, bias=False)
+            self.norm = nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01)
+        else:
+            self.linear = nn.Linear(in_channels, out_channels, bias=True)
+
+        self.part = 50000
+
+    def forward(self, inputs):
+        if inputs.shape[0] > self.part:
+            # nn.Linear performs randomly when batch size is too large
+            num_parts = inputs.shape[0] // self.part
+            part_linear_out = [self.linear(
+                inputs[num_part * self.part:(num_part + 1) * self.part])
+                for num_part in range(num_parts + 1)]
+            x = torch.cat(part_linear_out, dim=0)
+        else:
+            x = self.linear(inputs)
+        torch.backends.cudnn.enabled = False
+        x = self.norm(x.permute(0, 2, 1)).permute(0, 2,
+                                                  1) if self.use_norm else x
+        torch.backends.cudnn.enabled = True
+        x = F.relu(x)
+        x_max = torch.max(x, dim=1, keepdim=True)[0]
+
+        if self.last_vfe:
+            return x_max
+        else:
+            x_repeat = x_max.repeat(1, inputs.shape[1], 1)
+            x_concatenated = torch.cat([x, x_repeat], dim=2)
+            return x_concatenated
+
+
+class PillarVFE(nn.Module):
+    def __init__(self, model_cfg, num_point_features, voxel_size,
+                 point_cloud_range):
+        super().__init__()
+        self.model_cfg = model_cfg
+
+        self.use_norm = self.model_cfg['use_norm']
+        self.with_distance = self.model_cfg['with_distance']
+
+        self.use_absolute_xyz = self.model_cfg['use_absolute_xyz']
+        num_point_features += 6 if self.use_absolute_xyz else 3
+        if self.with_distance:
+            num_point_features += 1
+
+        self.num_filters = self.model_cfg['num_filters']
+        assert len(self.num_filters) > 0
+        num_filters = [num_point_features] + list(self.num_filters)
+
+        pfn_layers = []
+        for i in range(len(num_filters) - 1):
+            in_filters = num_filters[i]
+            out_filters = num_filters[i + 1]
+            pfn_layers.append(
+                PFNLayer(in_filters, out_filters, self.use_norm,
+                         last_layer=(i >= len(num_filters) - 2))
+            )
+        self.pfn_layers = nn.ModuleList(pfn_layers)
+
+        self.voxel_x = voxel_size[0]
+        self.voxel_y = voxel_size[1]
+        self.voxel_z = voxel_size[2]
+        self.x_offset = self.voxel_x / 2 + point_cloud_range[0]
+        self.y_offset = self.voxel_y / 2 + point_cloud_range[1]
+        self.z_offset = self.voxel_z / 2 + point_cloud_range[2]
+
+    def get_output_feature_dim(self):
+        return self.num_filters[-1]
+
+    @staticmethod
+    def get_paddings_indicator(actual_num, max_num, axis=0):
+        actual_num = torch.unsqueeze(actual_num, axis + 1)
+        max_num_shape = [1] * len(actual_num.shape)
+        max_num_shape[axis + 1] = -1
+        max_num = torch.arange(max_num,
+                               dtype=torch.int,
+                               device=actual_num.device).view(max_num_shape)
+        paddings_indicator = actual_num.int() > max_num
+        return paddings_indicator
+
+    def forward(self, batch_dict):
+        """encoding voxel feature using point-pillar method
+        Args:
+            voxel_features: [M, 32, 4]
+            voxel_num_points: [M,]
+            voxel_coords: [M, 4]
+        Returns:
+            features: [M,64], after PFN
+        """
+        voxel_features, voxel_num_points, coords = \
+            batch_dict['voxel_features'], batch_dict['voxel_num_points'], \
+            batch_dict['voxel_coords']
+
+        points_mean = \
+            voxel_features[:, :, :3].sum(dim=1, keepdim=True) / \
+            voxel_num_points.type_as(voxel_features).view(-1, 1, 1)
+        f_cluster = voxel_features[:, :, :3] - points_mean
+
+        f_center = torch.zeros_like(voxel_features[:, :, :3])
+        f_center[:, :, 0] = voxel_features[:, :, 0] - (
+                coords[:, 3].to(voxel_features.dtype).unsqueeze(
+                    1) * self.voxel_x + self.x_offset)
+        f_center[:, :, 1] = voxel_features[:, :, 1] - (
+                coords[:, 2].to(voxel_features.dtype).unsqueeze(
+                    1) * self.voxel_y + self.y_offset)
+        f_center[:, :, 2] = voxel_features[:, :, 2] - (
+                coords[:, 1].to(voxel_features.dtype).unsqueeze(
+                    1) * self.voxel_z + self.z_offset)
+
+        if self.use_absolute_xyz:
+            features = [voxel_features, f_cluster, f_center]
+        else:
+            features = [voxel_features[..., 3:], f_cluster, f_center]
+
+        if self.with_distance:
+            points_dist = torch.norm(voxel_features[:, :, :3], 2, 2,
+                                     keepdim=True)
+            features.append(points_dist)
+        features = torch.cat(features, dim=-1)
+
+        voxel_count = features.shape[1]
+        mask = self.get_paddings_indicator(voxel_num_points, voxel_count,
+                                           axis=0)
+        mask = torch.unsqueeze(mask, -1).type_as(voxel_features)
+        features *= mask
+        for pfn in self.pfn_layers:
+            features = pfn(features)
+        features = features.squeeze()
+        batch_dict['pillar_features'] = features
+
+        return batch_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/point_pillar_scatter.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/point_pillar_scatter.py
new file mode 100644
index 0000000000000000000000000000000000000000..e74d14f844f90e9814117aa640536028dd9ae2fd
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/point_pillar_scatter.py
@@ -0,0 +1,73 @@
+import torch
+import torch.nn as nn
+
+
+class PointPillarScatter(nn.Module):
+    def __init__(self, model_cfg):
+        super().__init__()
+
+        self.model_cfg = model_cfg
+        self.num_bev_features = self.model_cfg['num_features']
+        self.nx, self.ny, self.nz = model_cfg['grid_size']  # [704, 200, 1] 
+
+        assert self.nz == 1
+
+    def forward(self, batch_dict):
+        """ 将生成的pillar按照坐标索引还原到原空间中
+        Args:
+            pillar_features:(M, 64)
+            coords:(M, 4) 第一维是batch_index
+
+        Returns:
+            batch_spatial_features:(4, 64, H, W)
+            
+            |-------|
+            |       |             |-------------|
+            |       |     ->      |  *          |
+            |       |             |             |
+            | *     |             |-------------|
+            |-------|
+
+            Lidar Point Cloud        Feature Map
+            x-axis up                Along with W 
+            y-axis right             Along with H
+
+            Something like clockwise rotation of 90 degree.
+
+        """
+        pillar_features, coords = batch_dict['pillar_features'], batch_dict[
+            'voxel_coords']
+        batch_spatial_features = []
+        batch_size = coords[:, 0].max().int().item() + 1
+
+        for batch_idx in range(batch_size):
+            spatial_feature = torch.zeros(
+                self.num_bev_features,
+                self.nz * self.nx * self.ny,
+                dtype=pillar_features.dtype,
+                device=pillar_features.device)
+            # batch_index的mask
+            batch_mask = coords[:, 0] == batch_idx
+            # 根据mask提取坐标
+            this_coords = coords[batch_mask, :] # (batch_idx_voxel,4)  # zyx order, x in [0,706], y in [0,200]
+            # 这里的坐标是b,z,y和x的形式,且只有一层，因此计算索引的方式如下
+            indices = this_coords[:, 1] + this_coords[:, 2] * self.nx + this_coords[:, 3]
+            # 转换数据类型
+            indices = indices.type(torch.long)
+            # 根据mask提取pillar_features
+            pillars = pillar_features[batch_mask, :] # (batch_idx_voxel,64)
+            pillars = pillars.t() # (64,batch_idx_voxel)
+            # 在索引位置填充pillars
+            spatial_feature[:, indices] = pillars
+            # 将空间特征加入list,每个元素为(64, self.nz * self.nx * self.ny)
+            batch_spatial_features.append(spatial_feature) 
+
+        batch_spatial_features = \
+            torch.stack(batch_spatial_features, 0)
+        batch_spatial_features = \
+            batch_spatial_features.view(batch_size, self.num_bev_features *
+                                        self.nz, self.ny, self.nx) # It put y axis(in lidar frame) as image height. [..., 200, 704]
+        batch_dict['spatial_features'] = batch_spatial_features
+
+        return batch_dict
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/pose_graph_optim.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/pose_graph_optim.py
new file mode 100644
index 0000000000000000000000000000000000000000..054bd8d23ee365e186e58e3ec2a5e42b8bc28802
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/pose_graph_optim.py
@@ -0,0 +1,148 @@
+"""
+This is pose graph optimizer, using g2o (bind to C++)
+"""
+import g2o
+import numpy as np
+
+class PoseGraphOptimization2D(g2o.SparseOptimizer):
+    def __init__(self, verbose=False):
+        super().__init__()
+        # solver = g2o.BlockSolverSE2(g2o.LinearSolverCholmodSE2())
+        solver = g2o.BlockSolverSE2(g2o.LinearSolverDenseSE2())
+        solver = g2o.OptimizationAlgorithmLevenberg(solver)
+        super().set_algorithm(solver)
+        super().set_verbose(verbose)
+
+    def optimize(self, max_iterations=1000):
+        super().initialize_optimization()
+        super().optimize(max_iterations)
+
+
+    def add_vertex(self, id, pose, fixed=False, SE2=True):
+        if SE2:
+            v = g2o.VertexSE2()
+        else:
+            v = g2o.VertexPointXY()
+        v.set_estimate(pose)
+        v.set_id(id)
+        v.set_fixed(fixed)
+        super().add_vertex(v)
+
+
+    def add_edge(self, vertices, measurement, 
+            information=np.identity(3),
+            robust_kernel=None, SE2 = True):
+        """
+        Args:
+            measurement: g2o.SE2
+        """
+        if SE2:
+            edge = g2o.EdgeSE2()
+        else:
+            edge = g2o.EdgeSE2PointXY()
+
+        for i, v in enumerate(vertices):
+            if isinstance(v, int):
+                v = self.vertex(v)
+            edge.set_vertex(i, v)
+
+        edge.set_measurement(measurement)  # relative pose shape [3, 1] / [2, 1]
+        edge.set_information(information)  # importance of each component shape [3, 3] / [2, 2]
+        if robust_kernel is not None:
+            edge.set_robust_kernel(robust_kernel)
+        super().add_edge(edge)
+
+    def get_pose(self, id):
+        return self.vertex(id).estimate()
+
+
+class PoseGraphOptimization(g2o.SparseOptimizer):
+    def __init__(self):
+        super().__init__()
+        solver = g2o.BlockSolverSE3(g2o.LinearSolverCholmodSE3())
+        solver = g2o.OptimizationAlgorithmLevenberg(solver)
+        super().set_algorithm(solver)
+        super().set_verbose(True)
+
+    def optimize(self, max_iterations=50):
+        super().initialize_optimization()
+        super().optimize(max_iterations)
+
+    def add_vertex(self, id, pose, fixed=False):
+        v_se3 = g2o.VertexSE3()
+        v_se3.set_estimate(pose)
+        v_se3.set_id(id)
+        v_se3.set_fixed(fixed)
+        super().add_vertex(v_se3)
+
+    def add_edge(self, vertices, measurement, 
+            information=np.identity(6),
+            robust_kernel=None):
+
+        edge = g2o.EdgeSE3()
+        for i, v in enumerate(vertices):
+            if isinstance(v, int):
+                v = self.vertex(v)
+            edge.set_vertex(i, v)
+
+        edge.set_measurement(measurement)  # relative pose, shape [4, 4]
+        edge.set_information(information)  # importance of each component, shape [6, 6]
+        if robust_kernel is not None:
+            edge.set_robust_kernel(robust_kernel)
+        super().add_edge(edge)
+
+    def get_pose(self, id):
+        return self.vertex(id).estimate()
+
+
+if __name__ == "__main__":
+    pgo = PoseGraphOptimization()
+
+    with open("/GPFS/rhome/yifanlu/workspace/g2o_test/noise.g2o","r") as f:
+        for line in f:
+            if line.startswith("VERTEX_SE3:QUAT"):
+                vertex_content = line.split(" ",1)[1]
+                vertex_content_array = np.fromstring(vertex_content, dtype=float, sep=" ")
+                ids = int(vertex_content_array[0])
+                index = [0,1,2,6,3,4,5]
+                pose_array = vertex_content_array[1:][index]
+
+                pose = np.eye(4)
+                pose[:3,3] = pose_array[:3]
+                pose[:3,:3] = g2o.Quaternion(pose_array[3:]).matrix()
+                pose = g2o.Isometry3d(pose)
+
+                fixed = True if ids==6 else False
+                # fixed = False
+                pgo.add_vertex(id=ids, pose=pose, fixed=fixed)
+
+            elif line.startswith("EDGE_SE3:QUAT"):
+                edge_content = line.split(" ", 1)[1]
+                edge_content_array = np.fromstring(edge_content, dtype=float, sep=" ")
+                
+                edge = [int(v) for v in edge_content_array[:2]]
+                index = [0,1,2,6,3,4,5]
+                pose_array = edge_content_array[2:2+7][index]
+                information_array = edge_content_array[2+7:]
+
+                pose = np.eye(4)
+                pose[:3,3] = pose_array[:3]
+                pose[:3,:3] = g2o.Quaternion(pose_array[3:]).matrix()
+                pose = g2o.Isometry3d(pose)
+
+                information = np.eye(6)
+                information[0,0] = information_array[0]
+                information[1,1] = information_array[6]
+                information[2,2] = information_array[11]
+                information[3,3] = information_array[15]
+                information[4,4] = information_array[18]
+                information[5,5] = information_array[20]
+
+                pgo.add_edge(edge, pose, information)
+
+
+    print('num vertices:', len(pgo.vertices()))
+    print('num edges:', len(pgo.edges()), end='\n\n')
+    pgo.optimize()
+
+    # pgo.save("out_pose_graph2.g2o")
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/refactor.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/refactor.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f3d93913160fc224e980d92ecf8a4c87faeddbc
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/refactor.py
@@ -0,0 +1,42 @@
+import torch
+import torch.nn as nn
+import numpy as np
+from icecream import ic
+
+def flatten(l):
+    return [item for sublist in l for item in sublist]
+
+def refactor(batch_dict, lidar_agent_indicator):
+        agent_num = len(lidar_agent_indicator)
+        proposal_agentids_sample_list = batch_dict['agentid_fused'] # [sample1, sample2, ..., sample{batchnum}]
+
+        lidar_matrix_list = []
+        camera_matrix_list = []
+
+        # scatter agentid
+        for proposal_agentids_list in proposal_agentids_sample_list: # [[0,1,2],[1,2],[0,2],...]
+            proposal_num = len(proposal_agentids_list)
+
+            sp_row = [[i]*len(proposal_agentids_list[i]) for i in range(len(proposal_agentids_list))]
+            sp_row = flatten(sp_row)
+            sp_col = torch.cat(proposal_agentids_list).tolist()
+
+            indice = np.array([sp_row, sp_col], dtype=np.int32)
+            value = np.ones_like(sp_row)
+
+            lidar_matrix = torch.sparse_coo_tensor(indice, value, (proposal_num, agent_num), device=lidar_agent_indicator.device).to_dense()
+            camera_matrix = torch.sparse_coo_tensor(indice, value, (proposal_num, agent_num), device=lidar_agent_indicator.device).to_dense()
+
+            lidar_mask = (lidar_agent_indicator)
+            camera_mask = (1 - lidar_agent_indicator)
+
+            lidar_matrix *= lidar_mask
+            camera_matrix *= camera_mask
+
+            lidar_matrix_list.append(lidar_matrix)
+            camera_matrix_list.append(camera_matrix)
+
+        batch_dict['lidar_matrix_list'] = lidar_matrix_list
+        batch_dict['camera_matrix_list'] = camera_matrix_list
+
+        return batch_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/resblock.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/resblock.py
new file mode 100644
index 0000000000000000000000000000000000000000..f14305e65425cf1ac3ba7ee3250181884c283b09
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/resblock.py
@@ -0,0 +1,372 @@
+import torch
+from torch import Tensor
+import torch.nn as nn
+from typing import Type, Any, Callable, Union, List, Optional
+
+
+__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
+           'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
+           'wide_resnet50_2', 'wide_resnet101_2']
+
+
+def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+
+
+def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion: int = 1
+
+    def __init__(
+        self,
+        inplanes: int,
+        planes: int,
+        stride: int = 1,
+        downsample: Optional[nn.Module] = None,
+        groups: int = 1,
+        base_width: int = 64,
+        dilation: int = 1,
+        norm_layer: Optional[Callable[..., nn.Module]] = None
+    ) -> None:
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x: Tensor) -> Tensor:
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
+    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
+    # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
+    # This variant is also known as ResNet V1.5 and improves accuracy according to
+    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
+
+    expansion: int = 4 # original 4
+
+    def __init__(
+        self,
+        inplanes: int,
+        planes: int,
+        stride: int = 1,
+        downsample: Optional[nn.Module] = None,
+        groups: int = 1,
+        base_width: int = 64,
+        dilation: int = 1,
+        norm_layer: Optional[Callable[..., nn.Module]] = None
+    ) -> None:
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x: Tensor) -> Tensor:
+        identity = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            identity = self.downsample(x)
+
+        out += identity
+        out = self.relu(out)
+
+        return out
+
+
+class ResNetModified(nn.Module):
+
+    def __init__(
+        self,
+        block: Type[Union[BasicBlock, Bottleneck]],
+        layers: List[int],  # number of block in one layer
+        layer_strides: List[int],  #  stride after one layer
+        num_filters: List[int],  # feature dim
+        zero_init_residual: bool = False,
+        groups: int = 1,
+        width_per_group: int = 64,
+        replace_stride_with_dilation: Optional[List[bool]] = None,
+        norm_layer: Optional[Callable[..., nn.Module]] = None,
+        inplanes = 64
+    ) -> None:
+        super(ResNetModified, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+
+        self.inplanes = inplanes
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+
+        self.layernum = len(num_filters)
+        for i in range(self.layernum):
+            self.__setattr__(f"layer{i}", self._make_layer(block, num_filters[i], layers[i], stride=layer_strides[i]))
+
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)  # type: ignore[arg-type]
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)  # type: ignore[arg-type]
+
+    def _make_layer(self, block: Type[Union[BasicBlock, Bottleneck]], planes: int, blocks: int,
+                    stride: int = 1, dilate: bool = False) -> nn.Sequential:
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+
+        layers = []
+        # if stride != 1, the first block will downsample the feature map
+        # plane is the feature dim
+        # if Bottleneck, then the output dim is planes * block.expansion(4)
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+
+        return nn.Sequential(*layers)
+
+    def _forward_impl(self, x: Tensor, return_interm: bool = True):
+        # See note [TorchScript super()]
+        interm_features = []
+        for i in range(self.layernum):
+            x = eval(f"self.layer{i}")(x)
+            interm_features.append(x)
+
+        if return_interm:
+            return interm_features
+        return x
+
+    def forward(self, x: Tensor):
+        return self._forward_impl(x)
+
+
+def _resnet(
+    arch: str,
+    block: Type[Union[BasicBlock, Bottleneck]],
+    layers: List[int],
+    pretrained: bool,
+    progress: bool,
+    **kwargs: Any
+) -> ResNetModified:
+    model = ResNetModified(block, layers, **kwargs)
+
+    return model
+
+
+def resnet18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified:
+    r"""ResNet-18 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress,
+                   **kwargs)
+
+
+def resnet34(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified:
+    r"""ResNet-34 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnet50(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified:
+    r"""ResNet-50 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnet101(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified:
+    r"""ResNet-101 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnet152(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified:
+    r"""ResNet-152 model from
+    `"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress,
+                   **kwargs)
+
+
+def resnext50_32x4d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified:
+    r"""ResNeXt-50 32x4d model from
+    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 4
+    return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3],
+                   pretrained, progress, **kwargs)
+
+
+def resnext101_32x8d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified:
+    r"""ResNeXt-101 32x8d model from
+    `"Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>`_.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['groups'] = 32
+    kwargs['width_per_group'] = 8
+    return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3],
+                   pretrained, progress, **kwargs)
+
+
+def wide_resnet50_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified:
+    r"""Wide ResNet-50-2 model from
+    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_.
+
+    The model is the same as ResNet except for the bottleneck number of channels
+    which is twice larger in every block. The number of channels in outer 1x1
+    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['width_per_group'] = 64 * 2
+    return _resnet('wide_resnet50_2', Bottleneck, [3, 4, 6, 3],
+                   pretrained, progress, **kwargs)
+
+
+def wide_resnet101_2(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNetModified:
+    r"""Wide ResNet-101-2 model from
+    `"Wide Residual Networks" <https://arxiv.org/pdf/1605.07146.pdf>`_.
+
+    The model is the same as ResNet except for the bottleneck number of channels
+    which is twice larger in every block. The number of channels in outer 1x1
+    convolutions is the same, e.g. last block in ResNet-50 has 2048-512-2048
+    channels, and in Wide ResNet-50-2 has 2048-1024-2048.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    kwargs['width_per_group'] = 64 * 2
+    return _resnet('wide_resnet101_2', Bottleneck, [3, 4, 23, 3],
+                   pretrained, progress, **kwargs)
+
+
+
+
+if __name__=="__main__":
+    model = ResNetModified(BasicBlock, [3,4,5])
+    input = torch.randn(4,64,200,704)
+    output = model(input)
+    from icecream import ic
+    for out in output:
+        ic(out.shape)
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/roi_head.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/roi_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8606c19de8b4c4e1e4c8bffe7bc8c1e50b926a9
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/roi_head.py
@@ -0,0 +1,286 @@
+import copy
+from icecream import ic
+import torch.nn as nn
+import torch
+import numpy as np
+from opencood.pcdet_utils.pointnet2.pointnet2_stack import \
+    pointnet2_modules as pointnet2_stack_modules
+from opencood.utils import common_utils
+from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import boxes_iou3d_gpu
+from opencood.utils import box_utils
+
+
+class RoIHead(nn.Module):
+    def __init__(self, model_cfg):
+        super().__init__()
+        self.model_cfg = model_cfg
+        input_channels = model_cfg['in_channels']
+        self.code_size = 7
+
+        mlps = copy.copy(self.model_cfg['roi_grid_pool']['mlps'])
+        for k in range(len(mlps)):
+            mlps[k] = [input_channels] + mlps[k]
+
+        self.roi_grid_pool_layer = pointnet2_stack_modules.StackSAModuleMSG(
+            radii=self.model_cfg['roi_grid_pool']['pool_radius'],
+            nsamples=self.model_cfg['roi_grid_pool']['n_sample'],
+            mlps=mlps,
+            use_xyz=True,
+            pool_method=self.model_cfg['roi_grid_pool']['pool_method'],
+        )
+
+        grid_size = self.model_cfg['roi_grid_pool']['grid_size']
+        self.grid_size = grid_size
+        c_out = sum([x[-1] for x in mlps])
+        pre_channel = grid_size * grid_size * grid_size * c_out
+        fc_layers = [self.model_cfg['n_fc_neurons']] * 2
+        self.shared_fc_layers, pre_channel = self._make_fc_layers(pre_channel,
+                                                                  fc_layers)
+
+        self.cls_layers, pre_channel = self._make_fc_layers(pre_channel,
+                                                            fc_layers,
+                                                            output_channels=
+                                                            self.model_cfg[
+                                                                'num_cls'])
+        self.iou_layers, _ = self._make_fc_layers(pre_channel, fc_layers,
+                                                  output_channels=
+                                                  self.model_cfg['num_cls'])
+        self.reg_layers, _ = self._make_fc_layers(pre_channel, fc_layers,
+                                                  output_channels=
+                                                  self.model_cfg[
+                                                      'num_cls'] * 7)
+
+        self._init_weights(weight_init='xavier')
+
+    def _init_weights(self, weight_init='xavier'):
+        if weight_init == 'kaiming':
+            init_func = nn.init.kaiming_normal_
+        elif weight_init == 'xavier':
+            init_func = nn.init.xavier_normal_
+        elif weight_init == 'normal':
+            init_func = nn.init.normal_
+        else:
+            raise NotImplementedError
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
+                if weight_init == 'normal':
+                    init_func(m.weight, mean=0, std=0.001)
+                else:
+                    init_func(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+        nn.init.normal_(self.reg_layers[-1].weight, mean=0, std=0.001)
+
+    def _make_fc_layers(self, input_channels, fc_list, output_channels=None):
+        fc_layers = []
+        pre_channel = input_channels
+        for k in range(len(fc_list)):
+            fc_layers.extend([
+                nn.Conv1d(pre_channel, fc_list[k], kernel_size=1, bias=False),
+                # nn.BatchNorm1d(fc_list[k]),
+                nn.ReLU()
+            ])
+            pre_channel = fc_list[k]
+            if self.model_cfg['dp_ratio'] > 0:
+                fc_layers.append(nn.Dropout(self.model_cfg['dp_ratio']))
+        if output_channels is not None:
+            fc_layers.append(
+                nn.Conv1d(pre_channel, output_channels, kernel_size=1,
+                          bias=True))
+        fc_layers = nn.Sequential(*fc_layers)
+        return fc_layers, pre_channel
+
+    def get_global_grid_points_of_roi(self, rois):
+        rois = rois.view(-1, rois.shape[-1])
+        batch_size_rcnn = rois.shape[0]
+
+        # (B, 6x6x6, 3)
+        local_roi_grid_points = self.get_dense_grid_points(rois,
+                                                           batch_size_rcnn,
+                                                           self.grid_size)
+        global_roi_grid_points = common_utils.rotate_points_along_z(
+            local_roi_grid_points.clone(), rois[:, 6]
+        ).squeeze(dim=1)
+        global_center = rois[:, 0:3].clone()
+        global_roi_grid_points += global_center.unsqueeze(dim=1)
+        return global_roi_grid_points, local_roi_grid_points
+
+    @staticmethod
+    def get_dense_grid_points(rois, batch_size_rcnn, grid_size):
+        """
+        Get the local coordinates of each grid point of a roi in the coordinate
+        system of the roi(origin lies in the center of this roi.
+        """
+        faked_features = rois.new_ones((grid_size, grid_size, grid_size))
+        dense_idx = torch.stack(torch.where(faked_features),
+                                dim=1)  # (N, 3) [x_idx, y_idx, z_idx]
+        dense_idx = dense_idx.repeat(batch_size_rcnn, 1,
+                                     1).float()  # (B, 6x6x6, 3)
+
+        local_roi_size = rois.view(batch_size_rcnn, -1)[:, 3:6]
+        roi_grid_points = (
+                                  dense_idx + 0.5) / grid_size * local_roi_size.unsqueeze(
+            dim=1) \
+                          - (local_roi_size.unsqueeze(
+            dim=1) / 2)  # (B, 6x6x6, 3)
+        return roi_grid_points
+
+    def assign_targets(self, batch_dict):
+        batch_dict['rcnn_label_dict'] = {
+            'rois': [],
+            'gt_of_rois': [],
+            'gt_of_rois_src': [],
+            'cls_tgt': [],
+            'reg_tgt': [],
+            'iou_tgt': [],
+            'rois_anchor': [],
+            'record_len': [],
+            'rois_scores_stage1': []
+        }
+        pred_boxes = batch_dict['boxes_fused']
+        pred_scores = batch_dict['scores_fused']
+        gt_boxes = [b[m][:, [0, 1, 2, 5, 4, 3, 6]].float() for b, m in
+                    zip(batch_dict['object_bbx_center'],
+                        batch_dict['object_bbx_mask'].bool())]  # hwl -> lwh order
+        for rois, scores, gts in zip(pred_boxes, pred_scores,  gt_boxes): # each frame
+            rois = rois[:, [0, 1, 2, 5, 4, 3, 6]]  # hwl -> lwh
+            if gts.shape[0] == 0:
+                gts = rois.clone()
+
+            ious = boxes_iou3d_gpu(rois, gts)
+            max_ious, gt_inds = ious.max(dim=1)
+            gt_of_rois = gts[gt_inds]
+            rcnn_labels = (max_ious > 0.3).float()
+            mask = torch.logical_not(rcnn_labels.bool())
+
+            # set negative samples back to rois, no correction in stage2 for them
+            gt_of_rois[mask] = rois[mask]
+            gt_of_rois_src = gt_of_rois.clone().detach()
+
+            # canoical transformation
+            roi_center = rois[:, 0:3]
+            # TODO: roi_ry > 0 in pcdet
+            roi_ry = rois[:, 6] % (2 * np.pi)
+            gt_of_rois[:, 0:3] = gt_of_rois[:, 0:3] - roi_center
+            gt_of_rois[:, 6] = gt_of_rois[:, 6] - roi_ry
+
+            # transfer LiDAR coords to local coords
+            gt_of_rois = common_utils.rotate_points_along_z(
+                points=gt_of_rois.view(-1, 1, gt_of_rois.shape[-1]),
+                angle=-roi_ry.view(-1)
+            ).view(-1, gt_of_rois.shape[-1])
+
+            # flip orientation if rois have opposite orientation
+            heading_label = (gt_of_rois[:, 6] + (
+                    torch.div(torch.abs(gt_of_rois[:, 6].min()),
+                              (2 * np.pi), rounding_mode='trunc')
+                    + 1) * 2 * np.pi) % (2 * np.pi)  # 0 ~ 2pi
+            opposite_flag = (heading_label > np.pi * 0.5) & (
+                    heading_label < np.pi * 1.5)
+
+            # (0 ~ pi/2, 3pi/2 ~ 2pi)
+            heading_label[opposite_flag] = (heading_label[
+                                                opposite_flag] + np.pi) % (
+                                                   2 * np.pi)
+            flag = heading_label > np.pi
+            heading_label[flag] = heading_label[
+                                      flag] - np.pi * 2  # (-pi/2, pi/2)
+            heading_label = torch.clamp(heading_label, min=-np.pi / 2,
+                                        max=np.pi / 2)
+            gt_of_rois[:, 6] = heading_label
+
+            # generate regression target
+            rois_anchor = rois.clone().detach().view(-1, self.code_size)
+            rois_anchor[:, 0:3] = 0
+            rois_anchor[:, 6] = 0
+
+            reg_targets = box_utils.box_encode(
+                gt_of_rois.view(-1, self.code_size), rois_anchor
+            )
+
+            batch_dict['rcnn_label_dict']['rois'].append(rois)
+            batch_dict['rcnn_label_dict']['rois_scores_stage1'].append(scores)
+            batch_dict['rcnn_label_dict']['gt_of_rois'].append(gt_of_rois)
+            batch_dict['rcnn_label_dict']['gt_of_rois_src'].append(
+                gt_of_rois_src)
+            batch_dict['rcnn_label_dict']['cls_tgt'].append(rcnn_labels)
+            batch_dict['rcnn_label_dict']['reg_tgt'].append(reg_targets)
+            batch_dict['rcnn_label_dict']['iou_tgt'].append(max_ious)
+            batch_dict['rcnn_label_dict']['rois_anchor'].append(rois_anchor)
+            batch_dict['rcnn_label_dict']['record_len'].append(rois.shape[0])
+            
+
+        # cat list to tensor
+        for k, v in batch_dict['rcnn_label_dict'].items():
+            if k == 'record_len':
+                continue
+            batch_dict['rcnn_label_dict'][k] = torch.cat(v, dim=0)
+
+        return batch_dict
+
+    def roi_grid_pool(self, batch_dict):
+        batch_size = len(batch_dict['record_len'])
+        rois = batch_dict['rcnn_label_dict']['rois']
+        point_coords = batch_dict['point_coords']
+        point_features = batch_dict['point_features']
+        label_record_len = batch_dict['rcnn_label_dict']['record_len']
+
+        point_features = torch.cat(point_features, dim=0)
+        # (BxN, 6x6x6, 3)
+        global_roi_grid_points, local_roi_grid_points = \
+            self.get_global_grid_points_of_roi(rois)
+        # (B, Nx6x6x6, 3)
+        global_roi_grid_points = global_roi_grid_points.view(batch_size, -1, 3)
+
+        xyz = torch.cat(point_coords, dim=0)
+        xyz_batch_cnt = xyz.new_zeros(batch_size).int()
+        for bs_idx in range(batch_size):
+            xyz_batch_cnt[bs_idx] = len(point_coords[bs_idx])
+        new_xyz = global_roi_grid_points.view(-1, 3)
+        new_xyz_batch_cnt = xyz.new_zeros(batch_size).int()
+        for bs_idx in range(batch_size):
+            new_xyz_batch_cnt[bs_idx] = label_record_len[
+                                            bs_idx] * self.grid_size ** 3
+
+        pooled_points, pooled_features = self.roi_grid_pool_layer(
+            xyz=xyz[:, :3].contiguous(),
+            xyz_batch_cnt=xyz_batch_cnt,
+            new_xyz=new_xyz[:, :3].contiguous(),
+            new_xyz_batch_cnt=new_xyz_batch_cnt,
+            features=point_features.contiguous(),  # weighted point features
+        )  # (M1 + M2 ..., C)
+        # (BxN, 6x6x6, C)
+        pooled_features = pooled_features.view(-1, self.grid_size ** 3,
+                                               pooled_features.shape[-1])
+
+        return pooled_features
+
+    def forward(self, batch_dict):
+        batch_dict = self.assign_targets(batch_dict)
+        # RoI aware pooling
+        pooled_features = self.roi_grid_pool(batch_dict)  # (BxN, 6x6x6, C)
+
+        batch_size_rcnn = pooled_features.shape[0]
+        pooled_features = pooled_features.permute(0, 2, 1). \
+            contiguous().view(batch_size_rcnn, -1, self.grid_size,
+                              self.grid_size,
+                              self.grid_size)  # (BxN, C, 6, 6, 6)
+        shared_features = self.shared_fc_layers(
+            pooled_features.view(batch_size_rcnn, -1, 1))
+        rcnn_cls = self.cls_layers(shared_features).transpose(1,
+                                                              2).contiguous().squeeze(
+            dim=1)  # (B, 1 or 2)
+        rcnn_iou = self.iou_layers(shared_features).transpose(1,
+                                                              2).contiguous().squeeze(
+            dim=1)  # (B, 1)
+        rcnn_reg = self.reg_layers(shared_features).transpose(1,
+                                                              2).contiguous().squeeze(
+            dim=1)  # (B, C)
+
+        batch_dict['stage2_out'] = {
+            'rcnn_cls': rcnn_cls,
+            'rcnn_iou': rcnn_iou,
+            'rcnn_reg': rcnn_reg,
+        }
+        return batch_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/sparse_backbone_3d.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/sparse_backbone_3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..874b4d9523ad897097d3628fb3888454f8337d13
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/sparse_backbone_3d.py
@@ -0,0 +1,146 @@
+from functools import partial
+
+import spconv
+import torch.nn as nn
+
+try: # spconv1
+    from spconv import SparseSequential, SubMConv3d, SparseConv3d, SparseInverseConv3d, SparseConvTensor
+except: # spconv2
+    from spconv.pytorch import  SparseSequential, SubMConv3d, SparseConv3d, SparseInverseConv3d, SparseConvTensor
+
+def post_act_block(in_channels, out_channels, kernel_size, indice_key=None, stride=1, padding=0,
+                   conv_type='subm', norm_fn=None):
+
+    if conv_type == 'subm':
+        conv = SubMConv3d(in_channels, out_channels, kernel_size, bias=False, indice_key=indice_key)
+    elif conv_type == 'spconv':
+        conv = SparseConv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding,
+                                   bias=False, indice_key=indice_key)
+    elif conv_type == 'inverseconv':
+        conv = SparseInverseConv3d(in_channels, out_channels, kernel_size, indice_key=indice_key, bias=False)
+    else:
+        raise NotImplementedError
+
+    m = SparseSequential(
+        conv,
+        norm_fn(out_channels),
+        nn.ReLU(),
+    )
+
+    return m
+
+
+class VoxelBackBone8x(nn.Module):
+    def __init__(self, model_cfg, input_channels, grid_size, **kwargs):
+        super().__init__()
+        self.model_cfg = model_cfg
+        norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01)
+
+        self.sparse_shape = grid_size[::-1] + [1, 0, 0]
+
+        self.conv_input = SparseSequential(
+            SubMConv3d(input_channels, 16, 3, padding=1, bias=False, indice_key='subm1'),
+            norm_fn(16),
+            nn.ReLU(),
+        )
+        block = post_act_block
+
+        self.conv1 = SparseSequential(
+            block(16, 16, 3, norm_fn=norm_fn, padding=1, indice_key='subm1'),
+        )
+
+        self.conv2 = SparseSequential(
+            # [1600, 1408, 41] <- [800, 704, 21]
+            block(16, 32, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv2', conv_type='spconv'),
+            block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'),
+            block(32, 32, 3, norm_fn=norm_fn, padding=1, indice_key='subm2'),
+        )
+
+        self.conv3 = SparseSequential(
+            # [800, 704, 21] <- [400, 352, 11]
+            block(32, 64, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv3', conv_type='spconv'),
+            block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'),
+            block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm3'),
+        )
+
+        self.conv4 = SparseSequential(
+            # [400, 352, 11] <- [200, 176, 5]
+            block(64, 64, 3, norm_fn=norm_fn, stride=2, padding=(0, 1, 1), indice_key='spconv4', conv_type='spconv'),
+            block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'),
+            block(64, 64, 3, norm_fn=norm_fn, padding=1, indice_key='subm4'),
+        )
+
+        last_pad = 0
+        if 'num_features_out' in self.model_cfg:
+            self.num_point_features = self.model_cfg['num_features_out']
+        else:
+            self.num_point_features = 128
+        self.conv_out = SparseSequential(
+            # [200, 150, 5] -> [200, 150, 2]
+            SparseConv3d(64, self.num_point_features, (3, 1, 1), stride=(2, 1, 1), padding=last_pad,
+                                bias=False, indice_key='spconv_down2'),
+            norm_fn(self.num_point_features),
+            nn.ReLU(),
+        )
+
+        self.backbone_channels = {
+            'x_conv1': 16,
+            'x_conv2': 32,
+            'x_conv3': 64,
+            'x_conv4': 64
+        }
+
+    def forward(self, batch_dict):
+        """
+        Args:
+            batch_dict:
+                batch_size: int
+                vfe_features: (num_voxels, C)
+                voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx]
+        Returns:
+            batch_dict:
+                encoded_spconv_tensor: sparse tensor
+        """
+        voxel_features, voxel_coords = batch_dict['voxel_features'], \
+                                       batch_dict['voxel_coords']
+        batch_size = batch_dict['batch_size']
+        input_sp_tensor = SparseConvTensor(
+            features=voxel_features,
+            indices=voxel_coords.int(),
+            spatial_shape=self.sparse_shape,
+            batch_size=batch_size
+        )
+
+        x = self.conv_input(input_sp_tensor)
+
+        x_conv1 = self.conv1(x)
+        x_conv2 = self.conv2(x_conv1)
+        x_conv3 = self.conv3(x_conv2)
+        x_conv4 = self.conv4(x_conv3)
+
+        # for detection head
+        # [200, 176, 5] -> [200, 176, 2]
+        out = self.conv_out(x_conv4)
+
+        batch_dict.update({
+            'encoded_spconv_tensor': out,
+            'encoded_spconv_tensor_stride': 8
+        })
+        batch_dict.update({
+            'multi_scale_3d_features': {
+                'x_conv1': x_conv1,
+                'x_conv2': x_conv2,
+                'x_conv3': x_conv3,
+                'x_conv4': x_conv4,
+            }
+        })
+        batch_dict.update({
+            'multi_scale_3d_strides': {
+                'x_conv1': 1,
+                'x_conv2': 2,
+                'x_conv3': 4,
+                'x_conv4': 8,
+            }
+        })
+
+        return batch_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/split_attn.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/split_attn.py
new file mode 100644
index 0000000000000000000000000000000000000000..5390b16c494e07bf1e05e06db0279ad8122b6bad
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/split_attn.py
@@ -0,0 +1,63 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class RadixSoftmax(nn.Module):
+    def __init__(self, radix, cardinality):
+        super(RadixSoftmax, self).__init__()
+        self.radix = radix
+        self.cardinality = cardinality
+
+    def forward(self, x):
+        # x: (B, L, 1, 1, 3C)
+        batch = x.size(0)
+        cav_num = x.size(1)
+
+        if self.radix > 1:
+            # x: (B, L, 1, 3, C)
+            x = x.view(batch,
+                       cav_num,
+                       self.cardinality, self.radix, -1)
+            x = F.softmax(x, dim=3)
+            # B, 3LC
+            x = x.reshape(batch, -1)
+        else:
+            x = torch.sigmoid(x)
+        return x
+
+
+class SplitAttn(nn.Module):
+    def __init__(self, input_dim):
+        super(SplitAttn, self).__init__()
+        self.input_dim = input_dim
+
+        self.fc1 = nn.Linear(input_dim, input_dim, bias=False)
+        self.bn1 = nn.LayerNorm(input_dim)
+        self.act1 = nn.ReLU()
+        self.fc2 = nn.Linear(input_dim, input_dim * 3, bias=False)
+
+        self.rsoftmax = RadixSoftmax(3, 1)
+
+    def forward(self, window_list):
+        # window list: [(B, L, H, W, C) * 3]
+        assert len(window_list) == 3, 'only 3 windows are supported'
+
+        sw, mw, bw = window_list[0], window_list[1], window_list[2]
+        B, L = sw.shape[0], sw.shape[1]
+
+        # global average pooling, B, L, H, W, C
+        x_gap = sw + mw + bw
+        # B, L, 1, 1, C
+        x_gap = x_gap.mean((2, 3), keepdim=True)
+        x_gap = self.act1(self.bn1(self.fc1(x_gap)))
+        # B, L, 1, 1, 3C
+        x_attn = self.fc2(x_gap)
+        # B L 1 1 3C
+        x_attn = self.rsoftmax(x_attn).view(B, L, 1, 1, -1)
+
+        out = sw * x_attn[:, :, :, :, 0:self.input_dim] + \
+              mw * x_attn[:, :, :, :, self.input_dim:2*self.input_dim] +\
+              bw * x_attn[:, :, :, :, self.input_dim*2:]
+
+        return out
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/torch_transformation_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/torch_transformation_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f96e301a760af673d915fec7a5d709ef34c3c3db
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/torch_transformation_utils.py
@@ -0,0 +1,443 @@
+"""
+torch_transformation_utils.py
+"""
+import os
+
+import torch
+import torch.nn.functional as F
+import numpy as np
+import matplotlib.pyplot as plt
+from icecream import ic
+
+def get_roi_and_cav_mask(shape, cav_mask, spatial_correction_matrix,
+                         discrete_ratio, downsample_rate):
+    """
+    Get mask for the combination of cav_mask and rorated ROI mask.
+    Parameters
+    ----------
+    shape : tuple
+        Shape of (B, L, H, W, C).
+    cav_mask : torch.Tensor
+        Shape of (B, L).
+    spatial_correction_matrix : torch.Tensor
+        Shape of (B, L, 4, 4)
+    discrete_ratio : float
+        Discrete ratio.
+    downsample_rate : float
+        Downsample rate.
+
+    Returns
+    -------
+    com_mask : torch.Tensor
+        Combined mask with shape (B, H, W, L, 1).
+
+    """
+    B, L, H, W, C = shape
+    C = 1
+    # (B,L,4,4)
+    dist_correction_matrix = get_discretized_transformation_matrix(
+        spatial_correction_matrix, discrete_ratio,
+        downsample_rate)
+    # (B*L,2,3)
+    T = get_transformation_matrix(
+        dist_correction_matrix.reshape(-1, 2, 3), (H, W))
+    # (B,L,1,H,W)
+    roi_mask = get_rotated_roi((B, L, C, H, W), T)
+    # (B,L,1,H,W)
+    com_mask = combine_roi_and_cav_mask(roi_mask, cav_mask)
+    # (B,H,W,1,L)
+    com_mask = com_mask.permute(0,3,4,2,1)
+    return com_mask
+
+
+def combine_roi_and_cav_mask(roi_mask, cav_mask):
+    """
+    Combine ROI mask and CAV mask
+
+    Parameters
+    ----------
+    roi_mask : torch.Tensor
+        Mask for ROI region after considering the spatial transformation/correction.
+    cav_mask : torch.Tensor
+        Mask for CAV to remove padded 0.
+
+    Returns
+    -------
+    com_mask : torch.Tensor
+        Combined mask.
+    """
+    # (B, L, 1, 1, 1)
+    cav_mask = cav_mask.unsqueeze(2).unsqueeze(3).unsqueeze(4)
+    # (B, L, C, H, W)
+    cav_mask = cav_mask.expand(roi_mask.shape)
+    # (B, L, C, H, W)
+    com_mask = roi_mask * cav_mask
+    return com_mask
+
+
+def get_rotated_roi(shape, correction_matrix):
+    """
+    Get rorated ROI mask.
+
+    Parameters
+    ----------
+    shape : tuple
+        Shape of (B,L,C,H,W).
+    correction_matrix : torch.Tensor
+        Correction matrix with shape (N,2,3).
+
+    Returns
+    -------
+    roi_mask : torch.Tensor
+        Roated ROI mask with shape (N,2,3).
+
+    """
+    B, L, C, H, W = shape
+    # To reduce the computation, we only need to calculate the
+    # mask for the first channel.
+    # (B,L,1,H,W)
+    x = torch.ones((B, L, 1, H, W)).to(correction_matrix.dtype).to(
+        correction_matrix.device)
+    # (B*L,1,H,W)
+    roi_mask = warp_affine(x.reshape(-1, 1, H, W), correction_matrix,
+                           dsize=(H, W), mode="nearest")
+    # (B,L,C,H,W)
+    roi_mask = torch.repeat_interleave(roi_mask, C, dim=1).reshape(B, L, C, H,
+                                                                   W)
+    return roi_mask
+
+
+def get_discretized_transformation_matrix(matrix, discrete_ratio,
+                                          downsample_rate):
+    """
+    Get disretized transformation matrix.
+    Parameters
+    ----------
+    matrix : torch.Tensor
+        Shape -- (B, L, 4, 4) where B is the batch size, L is the max cav
+        number.
+    discrete_ratio : float
+        Discrete ratio.
+    downsample_rate : float or int
+        downsample_rate
+
+    discrete_ratio * downsample_rate = ___ meter one pixel, in the current feature map.
+
+    Returns
+    -------
+    matrix : torch.Tensor
+        Output transformation matrix in 2D with shape (B, L, 2, 3),
+        including 2D transformation and 2D rotation. 
+        transformation is pixel level
+
+    """
+    matrix = matrix[:, :, [0, 1], :][:, :, :, [0, 1, 3]]
+    # normalize the x,y transformation
+    matrix[:, :, :, -1] = matrix[:, :, :, -1] \
+                          / (discrete_ratio * downsample_rate)
+
+    return matrix.type(dtype=torch.float)
+
+
+def _torch_inverse_cast(input):
+    r"""
+    Helper function to make torch.inverse work with other than fp32/64.
+    The function torch.inverse is only implemented for fp32/64 which makes
+    impossible to be used by fp16 or others. What this function does,
+    is cast input data type to fp32, apply torch.inverse,
+    and cast back to the input dtype.
+    Args:
+        input : torch.Tensor
+            Tensor to be inversed.
+
+    Returns:
+        out : torch.Tensor
+            Inversed Tensor.
+
+    """
+    dtype = input.dtype
+    if dtype not in (torch.float32, torch.float64):
+        dtype = torch.float32
+    out = torch.inverse(input.to(dtype)).to(input.dtype)
+    return out
+
+
+def normal_transform_pixel(
+        height, width, device, dtype, eps=1e-14):
+    r"""
+    Compute the normalization matrix from image size in pixels to [-1, 1].
+    Args:
+        height : int
+            Image height.
+        width : int
+            Image width.
+        device : torch.device
+            Output tensor devices.
+        dtype : torch.dtype
+            Output tensor data type.
+        eps : float
+            Epsilon to prevent divide-by-zero errors.
+
+    Returns:
+        tr_mat : torch.Tensor
+            Normalized transform with shape :math:`(1, 3, 3)`.
+    """
+    tr_mat = torch.tensor(
+        [[1.0, 0.0, -1.0], [0.0, 1.0, -1.0], [0.0, 0.0, 1.0]], device=device,
+        dtype=dtype)  # 3x3
+
+    # prevent divide by zero bugs
+    width_denom = eps if width == 1 else width - 1.0
+    height_denom = eps if height == 1 else height - 1.0
+
+    tr_mat[0, 0] = tr_mat[0, 0] * 2.0 / width_denom
+    tr_mat[1, 1] = tr_mat[1, 1] * 2.0 / height_denom
+
+    return tr_mat.unsqueeze(0)  # 1x3x3
+
+
+def eye_like(n, B, device, dtype):
+    r"""
+    Return a 2-D tensor with ones on the diagonal and
+    zeros elsewhere with the same batch size as the input.
+    Args:
+        n : int
+            The number of rows :math:`(n)`.
+        B : int
+            Btach size.
+        device : torch.device
+            Devices of the output tensor.
+        dtype : torch.dtype
+            Data type of the output tensor.
+
+    Returns:
+       The identity matrix with the shape :math:`(B, n, n)`.
+    """
+
+    identity = torch.eye(n, device=device, dtype=dtype)
+    return identity[None].repeat(B, 1, 1)
+
+
+def normalize_homography(dst_pix_trans_src_pix, dsize_src, dsize_dst=None):
+    r"""
+    Normalize a given homography in pixels to [-1, 1].
+    Args:
+        dst_pix_trans_src_pix : torch.Tensor
+            Homography/ies from source to destination to be normalized with
+            shape :math:`(B, 3, 3)`.
+        dsize_src : Tuple[int, int]
+            Size of the source image (height, width).
+        dsize_dst : Tuple[int, int]
+            Size of the destination image (height, width).
+
+    Returns:
+        dst_norm_trans_src_norm : torch.Tensor
+            The normalized homography of shape :math:`(B, 3, 3)`.
+    """
+    if dsize_dst is None:
+        dsize_dst = dsize_src
+    # source and destination sizes
+    src_h, src_w = dsize_src
+    dst_h, dst_w = dsize_dst
+    device = dst_pix_trans_src_pix.device
+    dtype = dst_pix_trans_src_pix.dtype
+    # compute the transformation pixel/norm for src/dst
+    src_norm_trans_src_pix = normal_transform_pixel(src_h, src_w, device,
+                                                    dtype).to(
+        dst_pix_trans_src_pix)
+
+    src_pix_trans_src_norm = _torch_inverse_cast(src_norm_trans_src_pix)
+    dst_norm_trans_dst_pix = normal_transform_pixel(dst_h, dst_w, device,
+                                                    dtype).to(
+        dst_pix_trans_src_pix)
+    # compute chain transformations
+    dst_norm_trans_src_norm: torch.Tensor = dst_norm_trans_dst_pix @ (
+            dst_pix_trans_src_pix @ src_pix_trans_src_norm)
+    return dst_norm_trans_src_norm
+
+
+def get_rotation_matrix2d(M, dsize):
+    r"""
+    Return rotation matrix for torch.affine_grid based on transformation matrix.
+    Args:
+        M : torch.Tensor
+            Transformation matrix with shape :math:`(B, 2, 3)`.
+        dsize : Tuple[int, int]
+            Size of the source image (height, width).
+
+    Returns:
+        R : torch.Tensor
+            Rotation matrix with shape :math:`(B, 2, 3)`.
+    """
+    H, W = dsize
+    B = M.shape[0]
+    center = torch.Tensor([W / 2, H / 2]).to(M.dtype).to(M.device).unsqueeze(0)
+    shift_m = eye_like(3, B, M.device, M.dtype)
+    shift_m[:, :2, 2] = center
+
+    shift_m_inv = eye_like(3, B, M.device, M.dtype)
+    shift_m_inv[:, :2, 2] = -center
+
+    rotat_m = eye_like(3, B, M.device, M.dtype)
+    rotat_m[:, :2, :2] = M[:, :2, :2]
+    affine_m = shift_m @ rotat_m @ shift_m_inv
+    return affine_m[:, :2, :]  # Bx2x3
+
+
+def get_transformation_matrix(M, dsize):
+    r"""
+    Return transformation matrix for torch.affine_grid.
+    Args:
+        M : torch.Tensor
+            Transformation matrix with shape :math:`(N, 2, 3)`.
+        dsize : Tuple[int, int]
+            Size of the source image (height, width).
+
+    Returns:
+        T : torch.Tensor
+            Transformation matrix with shape :math:`(N, 2, 3)`.
+    """
+    T = get_rotation_matrix2d(M, dsize)
+    T[..., 2] += M[..., 2]
+    return T
+
+
+def convert_affinematrix_to_homography(A):
+    r"""
+    Convert to homography coordinates
+    Args:
+        A : torch.Tensor
+            The affine matrix with shape :math:`(B,2,3)`.
+
+    Returns:
+        H : torch.Tensor
+            The homography matrix with shape of :math:`(B,3,3)`.
+    """
+    H: torch.Tensor = torch.nn.functional.pad(A, [0, 0, 0, 1], "constant",
+                                              value=0.0)
+    H[..., -1, -1] += 1.0
+    return H
+
+
+def warp_affine_simple(src, M, dsize,
+        mode='bilinear',
+        padding_mode='zeros',
+        align_corners=False):
+
+    B, C, H, W = src.size()
+    grid = F.affine_grid(M,
+                         [B, C, dsize[0], dsize[1]],
+                         align_corners=align_corners).to(src)
+    return F.grid_sample(src, grid, align_corners=align_corners)
+
+def warp_affine(
+        src, M, dsize,
+        mode='bilinear',
+        padding_mode='zeros',
+        align_corners=True):
+    r"""
+    Transform the src based on transformation matrix M.
+    Args:
+        src : torch.Tensor
+            Input feature map with shape :math:`(B,C,H,W)`.
+        M : torch.Tensor
+            Transformation matrix with shape :math:`(B,2,3)`.
+        dsize : tuple
+            Tuple of output image H_out and W_out.
+        mode : str
+            Interpolation methods for F.grid_sample.
+        padding_mode : str
+            Padding methods for F.grid_sample.
+        align_corners : boolean
+            Parameter of F.affine_grid.
+
+    Returns:
+        Transformed features with shape :math:`(B,C,H,W)`.
+    """
+
+    B, C, H, W = src.size()
+
+    # we generate a 3x3 transformation matrix from 2x3 affine
+    M_3x3 = convert_affinematrix_to_homography(M)
+    dst_norm_trans_src_norm = normalize_homography(M_3x3, (H, W), dsize)
+
+    # src_norm_trans_dst_norm = torch.inverse(dst_norm_trans_src_norm)
+    src_norm_trans_dst_norm = _torch_inverse_cast(dst_norm_trans_src_norm)
+    
+    grid = F.affine_grid(src_norm_trans_dst_norm[:, :2, :],
+                         [B, C, dsize[0], dsize[1]],
+                         align_corners=align_corners)
+
+    return F.grid_sample(src.half() if grid.dtype==torch.half else src, 
+                         grid, align_corners=align_corners, mode=mode,
+                         padding_mode=padding_mode)
+
+
+class Test:
+    """
+    Test the transformation in this file.
+    The methods in this class are not supposed to be used outside of this file.
+    """
+
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def load_img():
+        torch.manual_seed(0)
+        x = torch.randn(1, 5, 16, 400, 200) * 100
+        # x = torch.ones(1, 5, 16, 400, 200)
+        return x
+
+    @staticmethod
+    def load_raw_transformation_matrix(N):
+        a = 90 / 180 * np.pi
+        matrix = torch.Tensor([[np.cos(a), -np.sin(a), 10],
+                               [np.sin(a), np.cos(a), 10]])
+        matrix = torch.repeat_interleave(matrix.unsqueeze(0).unsqueeze(0), N,
+                                         dim=1)
+        return matrix
+
+    @staticmethod
+    def load_raw_transformation_matrix2(N, alpha):
+        a = alpha / 180 * np.pi
+        matrix = torch.Tensor([[np.cos(a), -np.sin(a), 0, 0],
+                               [np.sin(a), np.cos(a), 0, 0]])
+        matrix = torch.repeat_interleave(matrix.unsqueeze(0).unsqueeze(0), N,
+                                         dim=1)
+        return matrix
+
+    @staticmethod
+    def test():
+        img = Test.load_img()
+        B, L, C, H, W = img.shape
+        raw_T = Test.load_raw_transformation_matrix(5)
+        T = get_transformation_matrix(raw_T.reshape(-1, 2, 3), (H, W))
+        img_rot = warp_affine(img.reshape(-1, C, H, W), T, (H, W))
+        print(img_rot[0, 0, :, :])
+        plt.matshow(img_rot[0, 0, :, :])
+        plt.show()
+
+    @staticmethod
+    def test_combine_roi_and_cav_mask():
+        B = 2
+        L = 5
+        C = 16
+        H = 300
+        W = 400
+        # 2, 5
+        cav_mask = torch.Tensor([[1, 1, 1, 0, 0], [1, 0, 0, 0, 0]])
+        x = torch.zeros(B, L, C, H, W)
+        correction_matrix = Test.load_raw_transformation_matrix2(5, 10)
+        correction_matrix = torch.cat([correction_matrix, correction_matrix],
+                                      dim=0)
+        mask = get_roi_and_cav_mask((B, L, H, W, C), cav_mask, 
+                                    correction_matrix, 0.4, 4)
+        plt.matshow(mask[0, :, :, 0, 0])
+        plt.show()
+
+
+
+if __name__ == "__main__":
+    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
+    Test.test_combine_roi_and_cav_mask()
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/v2v_robust_module.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/v2v_robust_module.py
new file mode 100644
index 0000000000000000000000000000000000000000..56ab92444117cc2ac3f8efb9bd11bd8bd37e443f
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/v2v_robust_module.py
@@ -0,0 +1,403 @@
+from icecream import ic
+import torch
+import math
+import torch.nn as nn
+from opencood.models.sub_modules.torch_transformation_utils import warp_affine_simple
+from opencood.utils.transformation_utils import pose_to_tfm, tfm_to_pose_torch, tfm_to_xycs_torch, xycs_to_tfm_torch
+
+def regroup(x, record_len):
+    cum_sum_len = torch.cumsum(record_len, dim=0)
+    split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+    return split_x
+
+# Part1, Pose Regression Module
+
+class PoseRegression(nn.Module):
+    """
+    Args:
+        in_ch: 2*C
+
+    forward:
+        x: [N,2C,H,W] concatenated feature
+
+    Returns:
+        [N, 3]: x, y, yaw
+    
+    """
+    def __init__(self, in_ch=512, hidden_ch=256):
+        super(PoseRegression, self).__init__()
+        self.model = nn.Sequential(
+            nn.Conv2d(in_ch, hidden_ch, kernel_size=(3, 3), padding=1),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
+            nn.Conv2d(hidden_ch, hidden_ch, kernel_size=(3, 3), padding=1),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
+            nn.Conv2d(hidden_ch, hidden_ch, kernel_size=(3, 3), padding=1),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            nn.Conv2d(
+                hidden_ch, hidden_ch, kernel_size=(3, 3), stride=(2, 2), padding=1
+            ),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            nn.AdaptiveAvgPool2d(output_size=1),
+            nn.Flatten(),
+            nn.Linear(in_features=hidden_ch, out_features=hidden_ch, bias=True),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.Linear(in_features=hidden_ch, out_features=hidden_ch, bias=True),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.Linear(in_features=hidden_ch, out_features=3, bias=True),
+        )
+
+    def forward(self, x):
+
+        pose_reg = self.model(x)
+        return pose_reg
+
+
+
+class PoseRegressionWraper(nn.Module):
+    """
+    Args:
+        features: [sum(cav), C, H, W], 
+        record_len: list
+        pairwise_t_matrix: [B, L, L, 4, 4], original pairwise_t_matrix, noise contains
+    Retuens:
+        pairwise_t_matrix_new: [B, L, L, 4, 4], the relative pose after correction.
+    """
+    def __init__(self, in_ch, hidden_ch, affine_parameter):
+        super(PoseRegressionWraper, self).__init__()
+        self.pose_regression = PoseRegression(
+            in_ch=in_ch, hidden_ch=hidden_ch
+        )
+        self.H = affine_parameter['H']
+        self.W = affine_parameter['W']
+        self.downsample_rate = affine_parameter['downsample_rate']
+        self.discrete_ratio = affine_parameter['discrete_ratio']
+
+    def forward(self, features, record_len, pairwise_t_matrix):
+        _, C, H, W = features.shape
+        B, L = pairwise_t_matrix.shape[:2]
+        split_x = regroup(features, record_len)
+        pairwise_t_matrix_new = torch.eye(4, device=pairwise_t_matrix.device).view(1,1,1,4,4).repeat(B,L,L,1,1)
+        pose_corr_matrix = torch.zeros((B,L,L,3),device=pairwise_t_matrix.device)
+        for b in range(B):
+            N = record_len[b]
+            agent_features = split_x[b]
+            for i in range(N):
+                t_matrix = pairwise_t_matrix[b]
+                t_matrix = t_matrix[:,:,[0, 1],:][:,:,:,[0, 1, 3]] # [L, L, 2, 3]
+                t_matrix[...,0,1] = t_matrix[...,0,1] * H / W
+                t_matrix[...,1,0] = t_matrix[...,1,0] * W / H
+                t_matrix[...,0,2] = t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+                t_matrix[...,1,2] = t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+                # (N,C,H,W)
+                neighbors = warp_affine_simple(agent_features, t_matrix[i, :N, :, :], (self.H, self.W))
+                # (N,C,H,W)
+                ego_agent_feature = agent_features[i].unsqueeze(0).repeat(N, 1, 1, 1)
+                # (N,2C,H,W)
+                neighbor_feature = torch.cat(
+                        [neighbors, ego_agent_feature], dim=1)
+                # (N,3)
+                pose_corr = self.pose_regression(neighbor_feature)
+                pose_corr_matrix[b,i,:N] = pose_corr
+
+                # (N, 4, 4)
+                pose_corr_tfm = pose_to_tfm(pose_corr)
+                pairwise_t_matrix_new[b,i,:N] = pose_corr_tfm @ pairwise_t_matrix[b,i,:N]
+
+        return pose_corr_matrix, pairwise_t_matrix_new
+
+
+
+# Part 2, Global Consistent Module
+def get_intersection(pairwise_t_matrix, affine_parameter):
+    """ get intersection from pairwise_t_matrix
+
+    Args: 
+        pairwise_t_matrix: torch.Tensor, shape [L, L, 4, 4]
+            pairwise transformation matrix for one frame.
+            pairwise_t_matrix[i,j] = Tji, i is ego
+        affine_parameter: dict
+            H, W, etc.
+
+
+    Returns:
+        intersection: torch.Tensor, shape [L, L]
+    """
+    H = affine_parameter['H']
+    W = affine_parameter['W']
+    downsample_rate = affine_parameter['downsample_rate']
+    discrete_ratio = affine_parameter['discrete_ratio']
+    intersections = []
+
+    L = pairwise_t_matrix.shape[0]
+    one_tensor = torch.zeros((L,1,H,W), device=pairwise_t_matrix.device)
+    for i in range(L):
+        t_matrix = pairwise_t_matrix[:,:,[0, 1],:][:,:,:,[0, 1, 3]] # [L, L, 2, 3]
+        t_matrix[...,0,1] = t_matrix[...,0,1] * H / W
+        t_matrix[...,1,0] = t_matrix[...,1,0] * W / H
+        t_matrix[...,0,2] = t_matrix[...,0,2] / (downsample_rate * discrete_ratio * W) * 2
+        t_matrix[...,1,2] = t_matrix[...,1,2] / (downsample_rate * discrete_ratio * H) * 2
+
+        # [L,1,H,W]
+        neighbors = warp_affine_simple(one_tensor, t_matrix[i, :L, :, :], (H, W))
+        intersection = torch.sum(neighbors, dim=[1,2,3]) / (H * W)  # [L,]
+        intersections.append(intersection)
+
+    # [L, L], intersections[i,:], ego is i
+    intersections = torch.stack(intersections)
+
+    # if intersection is zero, may meet nan later.
+    eps = 0.01
+    intersections += eps
+    
+    return intersections
+
+
+
+
+def WeightedMLE(pose, pairwise_t_matrix, weight):
+    """ Weighted MLE for estimate mu and sigma of multivariate student t distribution.
+        simutanously for all nodes
+    Args:
+        pose: [N,3]
+        pairwise_t_matrix: [L, L, 4, 4]
+        weight: [L, L]
+
+    Returns:
+        pose_mu: [N, 3] , but [N, 4] now
+        pose_sigma: [N, 3, 3], but [N, 4] now
+    """
+
+    N = pose.shape[0]
+    mu_list = []
+    sigma_list = []
+
+    for i in range(N):
+
+        neighbor_ids = list(range(N))
+        neighbor_ids.remove(i)
+
+        weights = weight[i,neighbor_ids].repeat(2) # [2(N-1)]
+        relative_pose1 = pairwise_t_matrix[i,neighbor_ids] # [N-1, 4, 4]  Tji
+        relative_pose2 = pairwise_t_matrix[neighbor_ids,i] # [N-1, 4, 4]  Tij
+        relative_pose2 = torch.inverse(relative_pose2)
+        relative_pose = torch.cat([relative_pose1,relative_pose2], dim=0) # [2(N-1), 4, 4]
+
+        tfm = pose_to_tfm(pose[neighbor_ids]).repeat(2,1,1) # [2(N-1), 4, 4]
+        samples = tfm @ relative_pose # [2(N-1), 4, 4]
+        # here is one problem, -179 and +179 degree. They are close actually.
+        # so we use cos and sin to replace angle
+        samples = tfm_to_xycs_torch(samples).to(torch.float64) # [N, 4]
+
+
+        mu = samples.median(0).values
+        Sigma = torch.eye(4, device=pose.device, dtype=torch.float64)
+        small_identity = torch.eye(4, device=pose.device, dtype=torch.float64) * 0.05
+
+        diff = mu[None] - samples
+
+        v = 2
+        for _ in range(15):
+            eta = (v + mu.size(0)) / (
+                v + torch.einsum("ni,ij,nj->n", diff, Sigma.inverse(), diff)
+            )
+            mu = torch.einsum("n,n,ni->i", weights, eta, samples) / (weights * eta).sum()
+            diff = mu[None] - samples
+            # Sigma = torch.einsum('n,n,ni,nj->ij', weights, w, diff, diff) / weights.sum()
+            Sigma = (
+                torch.einsum("n,ni,nj->ij", eta, diff, diff) / diff.size(0) + small_identity
+            )
+
+        mu_list.append(mu.to(torch.float32))
+        sigma_list.append(Sigma.to(torch.float32))
+
+    pose_mu = torch.stack(mu_list)
+    pose_sigma = torch.stack(sigma_list)
+
+    return pose_mu, pose_sigma
+
+
+def WeightedEM(lidar_pose, pairwise_t_matrix, intersection):
+    """Weighted EM algorithm, for a single frame, not batch data
+    Args:
+        lidar_pose : torch.Tenosr
+            shape [N, 3]
+        pairwise_t_matrix: torch.Tensor
+            shape [L, L, 4, 4]
+        intersection: torch.Tensor
+            shape [L, L]
+
+    Returns:
+        pose_mu : torch.Tensor
+            new lidar pose after correction. shape [N, 3]
+    """
+    num_iters = 10
+    pose = lidar_pose
+    weight = torch.ones_like(intersection, device=intersection.device)
+    
+    for k in range(num_iters):
+        pose_mu, pose_sigma = WeightedMLE(pose, pairwise_t_matrix, weight) # [N, 4], [N, 4, 4]
+        weight = update_weight(pose_mu, pose_sigma, pairwise_t_matrix, intersection)
+    
+    N = lidar_pose.shape[0]
+    pose_new = torch.zeros((N,3), device=lidar_pose.device, dtype=lidar_pose.dtype)
+    pose_new[:,:2] = pose_mu[:,:2]
+    pose_new[:,2] = torch.rad2deg(torch.atan2(pose_mu[:,3], pose_mu[:,2])) # sin, cos
+
+    return pose_new
+
+def update_weight(pose_mu, pose_sigma, pairwise_t_matrix, intersection):
+    """ using the close form to update weight w.
+    Args:
+        pose_mu: [N,3], but [N, 4] now 
+        pose_sigma: [N, 3, 3], but [N, 4, 4] now 
+        pairwise_t_matrix: [L,L,4,4]
+        interesection: [L, L]
+    """
+    k = 120
+    df = 2 # degree of freedom
+    L = intersection.shape[0]
+    N = pose_mu.shape[0]
+    weight = torch.zeros_like(intersection, device=intersection.device)
+    for i in range(N):
+        for j in range(N):
+            if i!=j:
+                pose_estimate1 = xycs_to_tfm_torch(pose_mu[[j]])[0] @ pairwise_t_matrix[i,j] # [4,4]
+                pose_estimate2 = xycs_to_tfm_torch(pose_mu[[i]])[0] @ torch.inverse(pairwise_t_matrix[i,j]) # [4,4]
+                pose_estimate = torch.stack([pose_estimate1, pose_estimate2]) # [2, 4, 4]
+                pose_estimate = tfm_to_xycs_torch(pose_estimate) # [2, 4]
+                weight[i,j] = k * intersection[i,j] / (k - log_t(pose_estimate, pose_mu[i], pose_sigma[i], df).sum())
+
+    return weight
+
+
+
+def log_t(x, mu, Sigma, df): 
+    """ log pdf of t distribution
+    Args:
+        x: [N, 3]
+        mu: [3,]
+        Sigma: [3,3]
+        df: int, degree of freedom
+
+    Returns:
+        log_pdf: log of the pdf
+    """
+
+    assert len(x.shape) == 2
+    n, p = x.shape
+    # assert mu.shape[0] == p   # for now, allow multiple mu
+    assert Sigma.shape == (p, p)
+
+    v = torch.as_tensor(df, dtype=x.dtype, device=x.device)
+    p = torch.as_tensor(p, dtype=x.dtype, device=x.device)
+    pi = torch.tensor(math.pi, dtype=x.dtype, device=x.device)
+    half_v = v / 2.0
+    half_p = p / 2.0
+
+    log_num = (half_v + half_p).lgamma()
+    log_denom = half_v.lgamma() + half_p * (v.log() + pi.log()) + 0.5 * Sigma.logdet()
+
+    d = x - mu
+    log_val = -(half_p + half_v) * torch.log(
+        1 + torch.einsum("ni,ij,nj->n", d, Sigma.inverse(), d) / v
+    )
+
+    log_pdf = log_num - log_denom + log_val
+
+    return log_pdf
+
+
+# Part 3, Attention Module
+
+class Attention(nn.Module):
+    """
+    Args:
+        in_ch: 2*C
+
+    forward:
+        x: [N,2C,H,W] concatenated feature
+    
+    """
+    def __init__(self, in_ch, hidden_ch=160):
+        super(Attention, self).__init__()
+        self.model = nn.Sequential(
+            nn.Conv2d(in_ch, hidden_ch, 3, 1, 1),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            nn.Conv2d(hidden_ch, hidden_ch, 3, 1, 1),
+            nn.LeakyReLU(negative_slope=0.01),
+            nn.MaxPool2d(kernel_size=2, stride=2),
+            nn.AdaptiveMaxPool2d(output_size=1),
+            nn.Flatten(),
+            nn.Linear(in_features=hidden_ch, out_features=1, bias=True),
+            nn.Sigmoid(),
+        )
+
+    def forward(self, x):
+        out = self.model(x)
+        return out
+
+class AttentionWrapper(nn.Module):
+    """ wrapper of attention scoring
+    Args:
+        features: [sum(cav), C, H, W], 
+        record_len: list
+        pairwise_t_matrix: [B, L, L, 4, 4], original pairwise_t_matrix, noise contains
+    Retuens:
+        pairwise_score: [B, L, L]
+            pairwise_score[i,j], ego is i.
+    """
+    def __init__(self, in_ch, hidden_ch, affine_parameter, learnable_alpha=True):
+        super(AttentionWrapper, self).__init__()
+        self.attention_net = Attention(in_ch, hidden_ch)
+        self.H = affine_parameter['H']
+        self.W = affine_parameter['W']
+        self.downsample_rate = affine_parameter['downsample_rate']
+        self.discrete_ratio = affine_parameter['discrete_ratio']
+        if learnable_alpha:
+            self.alpha = nn.Parameter(torch.Tensor([0.15]))
+        else:
+            self.alpha = 0.35
+    
+    def forward(self, features, record_len, pairwise_t_matrix):
+        _, C, H, W = features.shape
+        B, L = pairwise_t_matrix.shape[:2]
+        split_x = regroup(features, record_len)
+        pairwise_score = torch.zeros((B, L, L), device=features.device)
+        # mask = torch.eye(L, device=features.device).expand(B,L,L)
+
+
+        for b in range(B):
+            N = record_len[b]
+            agent_features = split_x[b]
+            for i in range(N):
+                t_matrix = pairwise_t_matrix[b]
+                t_matrix = t_matrix[:,:,[0, 1],:][:,:,:,[0, 1, 3]] # [L, L, 2, 3]
+                t_matrix[...,0,1] = t_matrix[...,0,1] * self.H / self.W
+                t_matrix[...,1,0] = t_matrix[...,1,0] * self.W / self.H
+                t_matrix[...,0,2] = t_matrix[...,0,2] / (self.downsample_rate * self.discrete_ratio * W) * 2
+                t_matrix[...,1,2] = t_matrix[...,1,2] / (self.downsample_rate * self.discrete_ratio * H) * 2
+
+                # (N,C,H,W)
+                neighbors = warp_affine_simple(agent_features, t_matrix[i, :N, :, :], (self.H, self.W))
+                # (N,C,H,W)
+                ego_agent_feature = agent_features[i].unsqueeze(0).repeat(N, 1, 1, 1)
+                # (N,2C,H,W)
+                neighbor_feature = torch.cat(
+                        [neighbors, ego_agent_feature], dim=1)
+                # (N,1)
+                pairwise_score[b,i,:N] = self.attention_net(neighbor_feature).flatten()
+        
+        # pairwise_score *= mask
+
+        scores = pairwise_score
+        eps = 1e-4
+        # pairwise_score (B, L, L). pairwise_score[b,i,j] is agent j' feature warping to agent i's coordinate
+        # weight (B, L, L), normalized at dim=2
+        weight = pairwise_score / (torch.sum(pairwise_score, dim=2, keepdim=True) + self.alpha + eps)
+
+        return scores, weight
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/v2xvit_basic.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/v2xvit_basic.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f138232c39ec62537a12b55bba9a57704248c03
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/v2xvit_basic.py
@@ -0,0 +1,193 @@
+import math
+import torch
+import torch.nn as nn
+
+from opencood.models.sub_modules.base_transformer import *
+from opencood.models.sub_modules.hmsa import *
+from opencood.models.sub_modules.mswin import *
+from opencood.models.sub_modules.torch_transformation_utils import \
+    get_transformation_matrix, warp_affine, get_roi_and_cav_mask, \
+    get_discretized_transformation_matrix
+
+
+class STTF(nn.Module):
+    def __init__(self, args):
+        super(STTF, self).__init__()
+        self.discrete_ratio = args['voxel_size'][0]
+        self.downsample_rate = args['downsample_rate']
+
+    def forward(self, x, mask, spatial_correction_matrix):
+        x = x.permute(0, 1, 4, 2, 3)
+        dist_correction_matrix = get_discretized_transformation_matrix(
+            spatial_correction_matrix, self.discrete_ratio,
+            self.downsample_rate)
+        # Only compensate non-ego vehicles
+        B, L, C, H, W = x.shape
+
+        T = get_transformation_matrix(
+            dist_correction_matrix[:, 1:, :, :].reshape(-1, 2, 3), (H, W))
+        cav_features = warp_affine(x[:, 1:, :, :, :].reshape(-1, C, H, W), T,
+                                   (H, W))
+        cav_features = cav_features.reshape(B, -1, C, H, W)
+        x = torch.cat([x[:, 0, :, :, :].unsqueeze(1), cav_features], dim=1)
+        x = x.permute(0, 1, 3, 4, 2)
+        return x
+
+
+class RelTemporalEncoding(nn.Module):
+    """
+    Implement the Temporal Encoding (Sinusoid) function.
+    """
+
+    def __init__(self, n_hid, RTE_ratio, max_len=100, dropout=0.2):
+        super(RelTemporalEncoding, self).__init__()
+        position = torch.arange(0., max_len).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, n_hid, 2) *
+                             -(math.log(10000.0) / n_hid))
+        emb = nn.Embedding(max_len, n_hid)
+        emb.weight.data[:, 0::2] = torch.sin(position * div_term) / math.sqrt(
+            n_hid)
+        emb.weight.data[:, 1::2] = torch.cos(position * div_term) / math.sqrt(
+            n_hid)
+        emb.requires_grad = False
+        self.RTE_ratio = RTE_ratio
+        self.emb = emb
+        self.lin = nn.Linear(n_hid, n_hid)
+
+    def forward(self, x, t):
+        # When t has unit of 50ms, rte_ratio=1.
+        # So we can train on 100ms but test on 50ms
+        return x + self.lin(self.emb(t * self.RTE_ratio)).unsqueeze(
+            0).unsqueeze(1)
+
+
+class RTE(nn.Module):
+    def __init__(self, dim, RTE_ratio=2):
+        super(RTE, self).__init__()
+        self.RTE_ratio = RTE_ratio
+
+        self.emb = RelTemporalEncoding(dim, RTE_ratio=self.RTE_ratio)
+
+    def forward(self, x, dts):
+        # x: (B,L,H,W,C)
+        # dts: (B,L)
+        rte_batch = []
+        for b in range(x.shape[0]):
+            rte_list = []
+            for i in range(x.shape[1]):
+                rte_list.append(
+                    self.emb(x[b, i, :, :, :], dts[b, i]).unsqueeze(0))
+            rte_batch.append(torch.cat(rte_list, dim=0).unsqueeze(0))
+        return torch.cat(rte_batch, dim=0)
+
+
+class V2XFusionBlock(nn.Module):
+    def __init__(self, num_blocks, cav_att_config, pwindow_config):
+        super().__init__()
+        # first multi-agent attention and then multi-window attention
+        self.layers = nn.ModuleList([])
+        self.num_blocks = num_blocks
+
+        for _ in range(num_blocks):
+            att = HGTCavAttention(cav_att_config['dim'],
+                                  heads=cav_att_config['heads'],
+                                  dim_head=cav_att_config['dim_head'],
+                                  dropout=cav_att_config['dropout']) if \
+                cav_att_config['use_hetero'] else \
+                CavAttention(cav_att_config['dim'],
+                             heads=cav_att_config['heads'],
+                             dim_head=cav_att_config['dim_head'],
+                             dropout=cav_att_config['dropout'])
+            self.layers.append(nn.ModuleList([
+                PreNorm(cav_att_config['dim'], att),
+                PreNorm(cav_att_config['dim'],
+                        PyramidWindowAttention(pwindow_config['dim'],
+                                               heads=pwindow_config['heads'],
+                                               dim_heads=pwindow_config[
+                                                   'dim_head'],
+                                               drop_out=pwindow_config[
+                                                   'dropout'],
+                                               window_size=pwindow_config[
+                                                   'window_size'],
+                                               relative_pos_embedding=
+                                               pwindow_config[
+                                                   'relative_pos_embedding'],
+                                               fuse_method=pwindow_config[
+                                                   'fusion_method']))]))
+
+    def forward(self, x, mask, prior_encoding):
+        for cav_attn, pwindow_attn in self.layers:
+            x = cav_attn(x, mask=mask, prior_encoding=prior_encoding) + x
+            x = pwindow_attn(x) + x
+        return x
+
+
+class V2XTEncoder(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+
+        cav_att_config = args['cav_att_config']
+        pwindow_att_config = args['pwindow_att_config']
+        feed_config = args['feed_forward']
+
+        num_blocks = args['num_blocks']
+        depth = args['depth']
+        mlp_dim = feed_config['mlp_dim']
+        dropout = feed_config['dropout']
+
+        self.downsample_rate = args['sttf']['downsample_rate']
+        self.discrete_ratio = args['sttf']['voxel_size'][0]
+        self.use_roi_mask = args['use_roi_mask']
+        self.use_RTE = cav_att_config['use_RTE']
+        self.RTE_ratio = cav_att_config['RTE_ratio']
+        self.sttf = STTF(args['sttf'])
+        # adjust the channel numbers from 256+3 -> 256
+        self.prior_feed = nn.Linear(cav_att_config['dim'] + 3,
+                                    cav_att_config['dim'])
+        self.layers = nn.ModuleList([])
+        if self.use_RTE:
+            self.rte = RTE(cav_att_config['dim'], self.RTE_ratio)
+        for _ in range(depth):
+            self.layers.append(nn.ModuleList([
+                V2XFusionBlock(num_blocks, cav_att_config, pwindow_att_config),
+                PreNorm(cav_att_config['dim'],
+                        FeedForward(cav_att_config['dim'], mlp_dim,
+                                    dropout=dropout))
+            ]))
+
+    def forward(self, x, mask, spatial_correction_matrix):
+
+        # transform the features to the current timestamp
+        # velocity, time_delay, infra
+        # (B,L,H,W,3)
+        prior_encoding = x[..., -3:]
+        # (B,L,H,W,C)
+        x = x[..., :-3]
+        if self.use_RTE:
+            # dt: (B,L)
+            dt = prior_encoding[:, :, 0, 0, 1].to(torch.int)
+            x = self.rte(x, dt)
+        x = self.sttf(x, mask, spatial_correction_matrix)
+        com_mask = mask.unsqueeze(1).unsqueeze(2).unsqueeze(
+            3) if not self.use_roi_mask else get_roi_and_cav_mask(x.shape,
+                                                                  mask,
+                                                                  spatial_correction_matrix,
+                                                                  self.discrete_ratio,
+                                                                  self.downsample_rate)
+        for attn, ff in self.layers:
+            x = attn(x, mask=com_mask, prior_encoding=prior_encoding)
+            x = ff(x) + x
+        return x
+
+
+class V2XTransformer(nn.Module):
+    def __init__(self, args):
+        super(V2XTransformer, self).__init__()
+
+        encoder_args = args['encoder']
+        self.encoder = V2XTEncoder(encoder_args)
+
+    def forward(self, x, mask, spatial_correction_matrix):
+        output = self.encoder(x, mask, spatial_correction_matrix)
+        output = output[:, 0]
+        return output
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/view_embedding.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/view_embedding.py
new file mode 100644
index 0000000000000000000000000000000000000000..3826feec5de3de077ec7a40d1c67f63799067ad8
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/view_embedding.py
@@ -0,0 +1,328 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from icecream import ic
+import numpy as np
+from opencood.pcdet_utils.roiaware_pool3d.roiaware_pool3d_utils import points_in_boxes_gpu
+
+def all_pair_l2(A, B):
+    """ All pair L2 distance for A and B
+    Args:
+        A : np.ndarray
+            shape [N_A, D]
+        B : np.ndarray
+            shape [N_B, D]
+    Returns:
+        C : np.ndarray
+            shape [N_A, N_B]
+    """
+    TwoAB = 2*A@B.T  # [N_A, N_B]
+    C = torch.sqrt(
+              torch.sum(A * A, 1, keepdim=True).repeat_interleave(TwoAB.shape[1], dim=1) \
+            + torch.sum(B * B, 1, keepdim=True).T.repeat_interleave(TwoAB.shape[0], dim=0) \
+            - TwoAB
+        )
+    return C
+
+def bilinear_interpolate_torch(im, x, y):
+    """
+    .--------> x
+    |
+    |
+    |
+    v y
+
+    x0y0 ------ x1
+    |           |
+    |           |
+    |           |
+    |           |
+    y1 ------- x1y1
+
+    Args:
+        im: (H, W, C) [y, x]
+        x: (N)
+        y: (N)
+    Returns:
+    """
+    x0 = torch.floor(x).long()
+    x1 = x0 + 1
+
+    y0 = torch.floor(y).long()
+    y1 = y0 + 1
+
+    x0 = torch.clamp(x0, 0, im.shape[1] - 1)
+    x1 = torch.clamp(x1, 0, im.shape[1] - 1)
+    y0 = torch.clamp(y0, 0, im.shape[0] - 1)
+    y1 = torch.clamp(y1, 0, im.shape[0] - 1)
+
+    Ia = im[y0, x0]
+    Ib = im[y1, x0]
+    Ic = im[y0, x1]
+    Id = im[y1, x1]
+
+    wa = (x1.type_as(x) - x) * (y1.type_as(y) - y)
+    wb = (x1.type_as(x) - x) * (y - y0.type_as(y))
+    wc = (x - x0.type_as(x)) * (y1.type_as(y) - y)
+    wd = (x - x0.type_as(x)) * (y - y0.type_as(y))
+    ans = torch.t((torch.t(Ia) * wa)) + torch.t(torch.t(Ib) * wb) + torch.t(torch.t(Ic) * wc) + torch.t(torch.t(Id) * wd)
+    return ans
+
+def boxes_to_tfm(box3d):
+    with torch.no_grad():
+        cos_theta = torch.cos(box3d[:, -1])
+        sin_theta = torch.sin(box3d[:, -1])
+        pos_x = box3d[:, 0]
+        pos_y = box3d[:, 1]
+        T_ego_obj_row1 = torch.stack([cos_theta, -sin_theta, pos_x], dim=-1) # [N, 3]
+        T_ego_obj_row2 = torch.stack([sin_theta, cos_theta, pos_y], dim=-1)
+        T_ego_obj_row3 = torch.tensor([0,0,1.], device=T_ego_obj_row1.device).expand(T_ego_obj_row1.shape)
+        T_ego_obj = torch.stack([T_ego_obj_row1, T_ego_obj_row2, T_ego_obj_row3], dim=1)
+        return T_ego_obj
+
+def get_poi(pred_box3d_list, order, num_of_sample):
+    """
+        get point of interest
+
+    Frist, Divide the area of ego agent
+    .--------> x
+    |
+    |
+    |
+    v y
+
+       0   |   1     |   2 
+    -------+---------+-------
+       3   | (obj) 4 |   5
+    -------+---------+-------
+       6   |   7     |   8
+
+
+    Inputs:
+        pred_box3d_list: [[shape: N1, 7], [shape: N2, 7], ...], angle in rad
+    Returns
+        ego_partition_list: [[shape: N1], [shape: N2], ...]
+    """
+    poi_list = []
+    poi_norm_in_obj = []
+    poi_valid_mask_list = []
+    for box3d in pred_box3d_list:
+        T_ego_obj = boxes_to_tfm(box3d) # [N_box, 3, 3]
+        
+        T_obj_ego = torch.linalg.inv(T_ego_obj)
+        x_obj_ego = T_obj_ego[:, 0, 2]
+        y_obj_ego = T_obj_ego[:, 1, 2]
+
+        hwl = box3d[:, 3:6] if order == "hwl" else box3d[:, [5,4,3]]
+        ego_in_left = (x_obj_ego < - hwl[:, 2]/2).int().view(-1,1) # [N_box, 1]
+        ego_in_right = (x_obj_ego > - hwl[:, 2]/2).int().view(-1,1)
+        ego_in_up = (y_obj_ego < - hwl[:, 1]/2).int().view(-1,1)
+        ego_in_down = (y_obj_ego > hwl[:, 1]/2).int().view(-1,1)
+        
+        poi_norm = torch.rand((box3d.shape[0], num_of_sample, 2), device=box3d.device) * 2 - 1 # range [-1, 1]
+
+        ego_in_left_poi_deprecated_mask = (poi_norm[..., 0] > 0.6).int() # [N_box, num_of_sample]
+        ego_in_right_poi_deprecated_mask = (poi_norm[..., 0] < -0.6).int()
+        ego_in_up_poi_deprecated_mask = (poi_norm[..., 1] > 0.6).int()
+        ego_in_down_poi_deprecated_mask = (poi_norm[..., 1] < -0.6).int()
+
+        # filter poi
+        ego_in_left = (x_obj_ego < - hwl[:, 2]/2).int().view(-1,1) # [N_box, 1]
+        # [N_box, num_of_sample]
+        poi_deprecated_mask = ego_in_left * ego_in_left_poi_deprecated_mask + \
+                              ego_in_right * ego_in_right_poi_deprecated_mask + \
+                              ego_in_up * ego_in_up_poi_deprecated_mask + \
+                              ego_in_down * ego_in_down_poi_deprecated_mask 
+        poi_deprecated_mask = poi_deprecated_mask > 1
+        poi_valid_mask = poi_deprecated_mask == 0
+
+        poi_exact_pos_in_obj_coor = poi_norm * hwl[:, [2,1]].view(box3d.shape[0], 1, 2) # [N_box, num_of_sample ,2]
+        poi_exact_pos_in_obj_coor_homo = F.pad(poi_exact_pos_in_obj_coor, (0,1), 'constant', 1) # [N_box, num_of_sample, 3]
+        poi_exact_pos_in_ego_coor = torch.bmm(T_ego_obj, poi_exact_pos_in_obj_coor_homo.permute(0, 2, 1)) # [N_box, 3, num_of_sample]
+        poi_exact_pos_in_ego_coor = poi_exact_pos_in_ego_coor.permute(0, 2, 1)  # [N_box, num_of_sample, 3]
+        poi_exact_pos_in_ego_coor = poi_exact_pos_in_ego_coor[..., :2] # [N_box, num_of_sample, 2]
+
+        poi_list.append(poi_exact_pos_in_ego_coor)
+        poi_valid_mask_list.append(poi_valid_mask)
+        poi_norm_in_obj.append(poi_norm)
+
+    return poi_list, poi_norm_in_obj, poi_valid_mask_list
+
+class PoiExtractor(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.pc_range = args['pc_range']
+        self.bev_stride = args['stride']
+        self.voxel_size= args['voxel_size'][0]
+        self.grid_size = self.voxel_size * self.bev_stride
+        self.order = args['order']
+        self.sample_num = args['sample_num'] # 20 may be ok
+        self.feat_dim = args['feat_dim'] # 64
+
+        # learn from relative position (poi_norm) to feature
+        self.emb = Embedding(2, self.feat_dim, args['N_freqs'])
+        self.alpha = nn.Parameter(torch.tensor([0.5]))
+
+        # preset grids
+        grid_x = torch.linspace(self.pc_range[0] + self.grid_size/2, self.pc_range[3] - self.grid_size/2, steps = int((self.pc_range[3]-self.pc_range[0])//self.grid_size), device='cuda')
+        grid_y = torch.linspace(self.pc_range[1] + self.grid_size/2, self.pc_range[4] - self.grid_size/2, steps = int((self.pc_range[4]-self.pc_range[1])//self.grid_size), device='cuda')
+
+        self.grid_x_idx = torch.arange(int((self.pc_range[3]-self.pc_range[0])//self.grid_size), device='cuda')
+        self.grid_y_idx = torch.arange(int((self.pc_range[4]-self.pc_range[1])//self.grid_size), device='cuda')
+        self.bev_grid_idx = torch.cartesian_prod(self.grid_x_idx, self.grid_y_idx) # [num_of_grid, 2]
+
+        self.bev_grid_points = torch.cartesian_prod(grid_x, grid_y) # [num_of_grid, 2]
+        self.bev_grid_points_xyz = F.pad(self.bev_grid_points, (0,1), mode='constant', value=1) # x,y,z, [num_of_grid, 3]
+        
+
+    def forward(self, heter_feature_2d, pred_box3d_list, lidar_agent_indicator, inferring=False):
+        bs = heter_feature_2d.shape[0]
+        # poi_list              [[N_box1, num_of_sample, 2], ...]
+        # poi_norm_in_obj       [[N_box1, num_of_sample, 2], ...]
+        # poi_valid_mask_list   [[N_box1, num_of_sample]]
+        
+        lidar_pred_box3d_list = [x for i, x in enumerate(pred_box3d_list) if lidar_agent_indicator[i]]
+        poi_list, poi_norm_in_obj, poi_valid_mask_list = get_poi(lidar_pred_box3d_list, self.order, self.sample_num)
+
+        # learning. only within lidar agent
+        poi_feature_pred, poi_feature_gt = \
+            self.learning(heter_feature_2d[lidar_agent_indicator==1], poi_list, poi_norm_in_obj, poi_valid_mask_list)
+
+        if inferring:
+            heter_feature_2d_pred, heter_feature_2d_pred_mask = self.inferring(heter_feature_2d, pred_box3d_list)
+            heter_feature_2d = heter_feature_2d * (1 - heter_feature_2d_pred_mask) + \
+                               heter_feature_2d * (heter_feature_2d_pred_mask) * self.alpha + \
+                               heter_feature_2d_pred * (heter_feature_2d_pred_mask) * (1 - self.alpha)
+
+        return heter_feature_2d, poi_feature_pred, poi_feature_gt
+
+
+    def learning(self, lidar_feature_2d, poi_list, poi_norm_in_obj, poi_valid_mask_list):
+        poi_feature_list = []
+        poi_norm_valid_list = []
+
+        # learning
+        for i, (poi, poi_norm, mask) in enumerate(zip(poi_list, poi_norm_in_obj, poi_valid_mask_list)):
+            x_idxs = (poi[..., 0] - self.pc_range[0]) / self.grid_size + 0.5 # [N_box1, num_of_sample]
+            y_idxs = (poi[..., 1] - self.pc_range[1]) / self.grid_size + 0.5 # [N_box1, num_of_sample]
+            cur_x_idxs = x_idxs[mask == 1] # [N_poi, ]
+            cur_y_idxs = y_idxs[mask == 1] # [N_poi, ]
+
+            cur_bev_feature = lidar_feature_2d[i].permute(1, 2, 0) # [H, W, C]
+            poi_feature = bilinear_interpolate_torch(cur_bev_feature, cur_x_idxs, cur_y_idxs) # [N_poi, C]
+            poi_norm_valid = poi_norm[mask == 1] # [N_poi, 2]
+            
+            poi_feature_list.append(poi_feature)
+            poi_norm_valid_list.append(poi_norm_valid)
+
+        poi_feature_gt = torch.cat(poi_feature_list) # [sum(N_poi), C]
+        poi_norm = torch.cat(poi_norm_valid_list) # [sum(N_poi), 2]
+        poi_feature_pred = self.emb(poi_norm)
+
+        return poi_feature_pred, poi_feature_gt
+
+
+    def inferring(self, heter_feature_2d, pred_box3d_list):
+        max_len = max([len(pred_box3d) for pred_box3d in pred_box3d_list])
+        pred_box3d_tensor = torch.zeros((heter_feature_2d.shape[0], max_len, 7), device=heter_feature_2d.device) # [B, max_box_num, 7]
+        heter_feature_pred = torch.zeros_like(heter_feature_2d, device=heter_feature_2d.device)
+        heter_feature_pred_mask = torch.zeros((heter_feature_2d.shape[0], 1, heter_feature_2d.shape[2], heter_feature_2d.shape[3]), \
+                                               device=heter_feature_2d.device)
+
+        for i, pred_box3d in enumerate(pred_box3d_list):
+            pred_box3d_copy = pred_box3d.clone()
+            pred_box3d_copy[:, 2] = 1 # move the z center to 1
+            if self.order == "hwl":
+                pred_box3d_copy[:, [3,4,5]] = pred_box3d_copy[:, [5,4,3]] # -> dx dy dz
+
+            pred_box3d_tensor[i,:len(pred_box3d)] = pred_box3d_copy
+
+        bev_grid_points = self.bev_grid_points_xyz.expand(heter_feature_2d.shape[0], -1, -1) # [B, num_of_grid, 3]
+        masks = points_in_boxes_gpu(bev_grid_points, pred_box3d_tensor) # [B, num_of_grid]
+
+        for i, mask in enumerate(masks):
+            pred_box3d = pred_box3d_list[i]
+            if pred_box3d.shape[0] == 0 or sum(mask > 0) == 0:
+                continue
+            T_ego_objs = boxes_to_tfm(pred_box3d)
+            T_objs_ego = torch.linalg.inv(T_ego_objs) # [N_box, 3, 3]
+            object_xy_coor = pred_box3d[:, :2] # [N_box, 2]
+            bev_grid_xy_coor = bev_grid_points[i][..., :2][mask > 0] # [num_of_valid_grid, 2]
+
+            # assign grid to object
+            grid_object_l2dis = all_pair_l2(bev_grid_xy_coor, object_xy_coor)
+            grid_in_which_object = torch.argmin(grid_object_l2dis, dim=1) # shape [num_of_valid_grid,], value within [0, N_box)
+            T_objs_ego_for_the_grid = T_objs_ego[grid_in_which_object] # [num_of_valid_grid, 3, 3]
+
+            object_size_for_the_grid = pred_box3d[grid_in_which_object][:,[5,4]] if self.order=='hwl' \
+                                  else pred_box3d[grid_in_which_object][:,[3,4]] # [num_of_valid_grid, 2]
+
+            # get pos in object coord.
+            bev_grid_xy_homo = F.pad(bev_grid_xy_coor, (0,1), 'constant', 1).unsqueeze(-1) # [num_of_valid_grid, 3, 1]
+            grid_in_obj_coor = torch.bmm(T_objs_ego_for_the_grid, bev_grid_xy_homo) # [num_of_valid_grid, 3, 1]
+            grid_in_obj_xy_coor = grid_in_obj_coor[:,:2,0] # [num_of_valid_grid, 2]
+            grid_in_obj_xy_norm = grid_in_obj_xy_coor / object_size_for_the_grid # [num_of_valid_grid, 2]
+
+            feature_idx = self.bev_grid_idx[mask > 0] # [num_of_valid_grid, 2]
+            features = self.emb(grid_in_obj_xy_norm) # [num_of_valid_grid, 64]
+            
+            heter_feature_pred[i, :, feature_idx[:, 1], feature_idx[:, 0]] = features.T
+            heter_feature_pred_mask[i, 0, feature_idx[:, 1], feature_idx[:, 0]] = 1
+        
+        return heter_feature_pred, heter_feature_pred_mask
+        
+
+
+
+
+
+class Embedding(nn.Module):
+    def __init__(self, in_channels, out_channels=64, N_freqs=8, logscale=True):
+        """
+        Defines a function that embeds x to (x, sin(2^k x), cos(2^k x), ...)
+        """
+        super(Embedding, self).__init__()
+        self.N_freqs = N_freqs
+        self.in_channels = in_channels
+        self.funcs = [torch.sin, torch.cos]
+        self.mlp_in_channels = in_channels*(len(self.funcs)*N_freqs + 1) # 2 * 8 * 2 + 2 = 34
+        self.mlp_inter_channels = out_channels * 2
+        self.mlp_out_channels = out_channels
+
+        self.mlp_layers = [nn.Linear(self.mlp_in_channels, self.mlp_inter_channels)]
+        for i in range(4):
+            self.mlp_layers.append(nn.ReLU(inplace=True))
+            self.mlp_layers.append(nn.Linear(self.mlp_inter_channels, self.mlp_inter_channels))
+        self.mlp_layers.append(nn.ReLU(inplace=True))
+        self.mlp_layers.append(nn.Linear(self.mlp_inter_channels, self.mlp_out_channels))
+        
+        self.mlp_layers = nn.Sequential(*self.mlp_layers)
+
+ 
+        if logscale:
+            self.freq_bands = 2**torch.linspace(0, N_freqs-1, N_freqs)
+        else:
+            self.freq_bands = torch.linspace(1, 2**(N_freqs-1), N_freqs)
+ 
+    def forward(self, x):
+        """
+        Embeds x to (x, sin(2^k x), cos(2^k x), ...) 
+        Different from the paper, "x" is also in the output
+        See https://github.com/bmild/nerf/issues/12
+ 
+        Inputs:
+            x: (B, self.in_channels)
+ 
+        Outputs:
+            out: (B, self.out_channels)
+        """
+        out = [x]
+        for freq in self.freq_bands:
+            for func in self.funcs:
+                out += [func(freq*x)]
+
+        out = torch.cat(out, -1)
+        out = self.mlp_layers(out)
+ 
+        return out
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/voxel_rcnn_head.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/voxel_rcnn_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ce9a51c05cece04d18a143c77082ce9bd6767d2
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/voxel_rcnn_head.py
@@ -0,0 +1,379 @@
+import torch
+import numpy as np
+import torch.nn as nn
+from opencood.pcdet_utils.pointnet2.pointnet2_stack import voxel_pool_modules as voxelpool_stack_modules
+from opencood.utils import common_utils
+from opencood.pcdet_utils.iou3d_nms.iou3d_nms_utils import boxes_iou3d_gpu
+from opencood.utils import box_utils
+from icecream import ic
+from copy import deepcopy
+
+class VoxelRCNNHead(nn.Module):
+    def __init__(self, model_cfg, backbone_channels):    
+        super().__init__()
+        self.model_cfg = model_cfg # 模型配置
+        self.voxel_size = model_cfg['voxel_size'] # voxel大小
+        self.pool_cfg = model_cfg['pool_cfg']
+        self.point_cloud_range = model_cfg['pc_range']
+        self.grid_size = self.pool_cfg['grid_size'] # 6
+        self.feature_source = self.pool_cfg['feature_source']
+        self.code_size = 7
+
+        c_out = 0
+        self.roi_grid_pool_layers = nn.ModuleList() # 初始化ROI网格池化层MuduleList
+        
+        for src_name in self.feature_source: # FEATURES_SOURCE: ['x_conv2', 'x_conv3', 'x_conv4']
+            layer_cfg = self.pool_cfg['pool_layers'][src_name]
+            mlps = deepcopy(layer_cfg['mlps']) # 根据特征层获取MLP参数
+
+            for k in range(len(mlps)): # MLPS: [[32, 32]] 长度为1
+                # backbone_channels: {'x_conv1':16, 'x_conv2':32, 'x_conv3':64, 'x_conv4':64}
+                mlps[k] = [backbone_channels[src_name]] + mlps[k] # 计算MLP层输入输出维度,在最前面增加一个值eg:[[32,32,32]]
+
+            pool_layer = voxelpool_stack_modules.NeighborVoxelSAModuleMSG(
+                query_ranges=layer_cfg['query_ranges'], # 查询范围
+                nsamples=layer_cfg['nsample'], # 采样数量
+                radii=layer_cfg['pool_radius'], # 池化半径 0.4->0.8->1.6
+                mlps=mlps, # mlp层
+                pool_method=layer_cfg['pool_method'], # 池化方法
+            )
+            # 将池化层添加到ROI网格池化层MuduleList
+            self.roi_grid_pool_layers.append(pool_layer)
+
+            c_out += sum([x[-1] for x in mlps]) # 取mlps最后的输出维度 32->64->96
+
+        # c_out = sum([x[-1] for x in mlps])
+        pre_channel = self.grid_size * self.grid_size * self.grid_size * c_out # 20736=6*6*6*96
+
+
+        fc_layers = [self.model_cfg['n_fc_neurons']] * 2
+        self.shared_fc_layers, pre_channel = self._make_fc_layers(pre_channel,
+                                                                  fc_layers)
+
+        self.cls_layers, pre_channel = self._make_fc_layers(pre_channel,
+                                                            fc_layers,
+                                                            output_channels=1)
+        self.iou_layers, _ = self._make_fc_layers(pre_channel, fc_layers,
+                                                  output_channels=1)
+        self.reg_layers, _ = self._make_fc_layers(pre_channel, fc_layers,
+                                                  output_channels=7)
+        self._init_weights(weight_init='xavier')
+
+
+    def _init_weights(self, weight_init='xavier'):
+        if weight_init == 'kaiming':
+            init_func = nn.init.kaiming_normal_
+        elif weight_init == 'xavier':
+            init_func = nn.init.xavier_normal_
+        elif weight_init == 'normal':
+            init_func = nn.init.normal_
+        else:
+            raise NotImplementedError
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
+                if weight_init == 'normal':
+                    init_func(m.weight, mean=0, std=0.001)
+                else:
+                    init_func(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+        nn.init.normal_(self.reg_layers[-1].weight, mean=0, std=0.001)
+                    
+    
+    def _make_fc_layers(self, input_channels, fc_list, output_channels=None):
+        fc_layers = []
+        pre_channel = input_channels
+        for k in range(len(fc_list)):
+            fc_layers.extend([
+                nn.Conv1d(pre_channel, fc_list[k], kernel_size=1, bias=False),
+                # nn.BatchNorm1d(fc_list[k]),
+                nn.ReLU()
+            ])
+            pre_channel = fc_list[k]
+            if self.model_cfg['dp_ratio'] > 0:
+                fc_layers.append(nn.Dropout(self.model_cfg['dp_ratio']))
+        if output_channels is not None:
+            fc_layers.append(
+                nn.Conv1d(pre_channel, output_channels, kernel_size=1,
+                          bias=True))
+        fc_layers = nn.Sequential(*fc_layers)
+        return fc_layers, pre_channel
+
+    def roi_grid_pool(self, batch_dict):
+        """
+        roi_grid_pooling happens after box fusion and voxel feature merges
+
+        Args:
+            batch_dict:
+                batch_size:
+                rois: (sum(rois), 7 + C)
+                point_coords: (num_points, 4)  [bs_idx, x, y, z]
+                point_features: (num_points, C)
+                point_cls_scores: (N1 + N2 + N3 + ..., 1)
+                point_part_offset: (N1 + N2 + N3 + ..., 3)
+        Returns:
+
+        """
+ 
+        batch_size = len(batch_dict['record_len'])
+        rois = batch_dict['rcnn_label_dict']['rois'] # already lwh order 
+        label_record_len = batch_dict['rcnn_label_dict']['record_len']
+        with_vf_transform = batch_dict.get('with_voxel_feature_transform', False) # False
+        
+
+        # 1.计算roi网格点全局点云坐标（旋转+roi中心点平移）
+        roi_grid_xyz, _ = self.get_global_grid_points_of_roi(
+            rois, grid_size=self.grid_size
+        )  # (BxN, 6x6x6, 3) --> (1024, 216, 3)
+        # roi_grid_xyz: (B, Nx6x6x6, 3)
+        roi_grid_xyz = roi_grid_xyz.view(-1, 3)  # (sum(proposal)*6*6*6, 3)
+
+        # 2.compute the voxel coordinates of grid points
+        roi_grid_coords_x = torch.div((roi_grid_xyz[:, 0:1] - self.point_cloud_range[0]), self.voxel_size[0], rounding_mode='floor')
+        roi_grid_coords_y = torch.div((roi_grid_xyz[:, 1:2] - self.point_cloud_range[1]), self.voxel_size[1], rounding_mode='floor')
+        roi_grid_coords_z = torch.div((roi_grid_xyz[:, 2:3] - self.point_cloud_range[2]), self.voxel_size[2], rounding_mode='floor')
+
+        roi_grid_coords = torch.cat([roi_grid_coords_x, roi_grid_coords_y, roi_grid_coords_z], dim=-1) # 整数坐标 --> (sum(proposal)*6*6*6, 3)
+
+        # 3.逐帧赋值batch index
+        batch_idx = rois.new_zeros(roi_grid_coords.shape[0], 1) 
+        idx_start = 0
+        for bs_idx in range(batch_size):
+            batch_idx[idx_start:idx_start+label_record_len[bs_idx] * self.grid_size ** 3] = bs_idx
+            idx_start += label_record_len[bs_idx] * self.grid_size ** 3
+        
+        # 4.计算每帧roi grid的有效坐标点数(虚拟特征点数)
+        roi_grid_batch_cnt = rois.new_zeros(batch_size).int()
+        for bs_idx in range(batch_size):
+            roi_grid_batch_cnt[bs_idx] = label_record_len[bs_idx] * self.grid_size ** 3
+
+        pooled_features_list = []
+        for k, src_name in enumerate(self.feature_source):
+            pool_layer = self.roi_grid_pool_layers[k] # 获取第k个池化层
+            cur_stride = batch_dict['multi_scale_3d_strides'][src_name] # 获取该层下采样步长
+            cur_sp_tensors = batch_dict['multi_scale_3d_features_fused'][src_name] # 获取该层稀疏特征
+
+            # 1.compute voxel center xyz and batch_cnt
+            cur_coords = cur_sp_tensors.indices # 提取有效voxel的坐标 --> (204916, 4)
+            cur_voxel_xyz = common_utils.get_voxel_centers(
+                cur_coords[:, 1:4], # 第0维是batch index
+                downsample_times=cur_stride, # 下采样倍数
+                voxel_size=self.voxel_size, # voxel大小
+                point_cloud_range=self.point_cloud_range # 点云范围
+            ) # 有效voxle中心点云坐标 --> (204916, 3)
+            
+            # 2.统计每帧点云的有效坐标数
+            cur_voxel_xyz_batch_cnt = cur_voxel_xyz.new_zeros(batch_size).int()
+            for bs_idx in range(batch_size):
+                cur_voxel_xyz_batch_cnt[bs_idx] = (cur_coords[:, 0] == bs_idx).sum()
+            
+            # 3.get voxel2point tensor 计算空间voxel坐标与voxel特征之间的索引
+            v2p_ind_tensor = common_utils.generate_voxel2pinds(cur_sp_tensors) # (8, 21, 800, 704)
+            
+            # 4.compute the grid coordinates in this scale, in [batch_idx, x y z] order
+            cur_roi_grid_coords = torch.div(roi_grid_coords, cur_stride, rounding_mode='floor') # 计算下采样后的网格坐标 (sum(proposal)*6*6*6,3)
+            cur_roi_grid_coords = torch.cat([batch_idx, cur_roi_grid_coords], dim=-1) # 将batch index与roi grid coord拼接 --> (sum(proposal)*6*6*6,4)
+            cur_roi_grid_coords = cur_roi_grid_coords.int() # 转化为整数
+
+            
+            # ic(cur_voxel_xyz.contiguous())
+            # ic(cur_voxel_xyz.contiguous().shape)
+            # ic(cur_voxel_xyz_batch_cnt)
+
+            # ic(roi_grid_xyz.contiguous().view(-1, 3))
+            # ic(roi_grid_xyz.contiguous().view(-1, 3).shape)
+            # ic(roi_grid_batch_cnt)
+
+            # ic(cur_roi_grid_coords.contiguous().view(-1, 4))
+            # ic(cur_roi_grid_coords.contiguous().view(-1, 4).shape)
+            # ic(cur_sp_tensors.features.contiguous())
+            # ic(v2p_ind_tensor)
+            # ic("___________")
+
+
+            # 5.voxel neighbor aggregation
+            pooled_features = pool_layer(
+                xyz=cur_voxel_xyz.contiguous(), # voxle中心点云坐标
+                xyz_batch_cnt=cur_voxel_xyz_batch_cnt, # 每帧点云有效坐标的个数
+                new_xyz=roi_grid_xyz.contiguous().view(-1, 3), # roi grid点云坐标
+                new_xyz_batch_cnt=roi_grid_batch_cnt, # 每个roi grid中有效坐标个数
+                new_coords=cur_roi_grid_coords.contiguous().view(-1, 4), # 在该特征层上的roi voxle坐标
+                features=cur_sp_tensors.features.contiguous(), # 稀疏特征
+                voxel2point_indices=v2p_ind_tensor # 空间voxle坐标与voxle特征之间的索引(对应关系)
+            )
+
+
+            # 6.改变特征维度，并加入池化特征list
+            pooled_features = pooled_features.view(
+                -1, self.grid_size ** 3,
+                pooled_features.shape[-1]
+            )  # (sum(rcnn_proposal), 6x6x6, C) --> (1024, 216, 32)
+            pooled_features_list.append(pooled_features)
+        
+        ms_pooled_features = torch.cat(pooled_features_list, dim=-1)
+        
+        return ms_pooled_features # (sum(rcnn_proposal), 6x6x6, C) --> (1024, 216, 32)
+
+
+    def get_global_grid_points_of_roi(self, rois, grid_size):
+        """
+        计算roi网格点全局点云坐标（旋转+roi中心点平移）
+        Args:
+            rois:(1024, 7)
+            grid_size:6
+        Returns:
+            global_roi_grid_points, local_roi_grid_points: (1024, 216, 3)
+        """
+        rois = rois.view(-1, rois.shape[-1])
+        batch_size_rcnn = rois.shape[0] 
+
+        local_roi_grid_points = self.get_dense_grid_points(rois, batch_size_rcnn, grid_size)  # (B, 6x6x6, 3) --> (1024, 216, 3)
+        global_roi_grid_points = common_utils.rotate_points_along_z(
+            local_roi_grid_points.clone(), rois[:, 6]
+        ).squeeze(dim=1) # (1024, 216, 3) 前3维沿着z轴旋转
+        global_center = rois[:, 0:3].clone() # 提取roi的中心坐标 (1024,3)
+        global_roi_grid_points += global_center.unsqueeze(dim=1) # 将box平移到roi的中心 (1024, 216, 3)
+        return global_roi_grid_points, local_roi_grid_points
+
+    @staticmethod
+    def get_dense_grid_points(rois, batch_size_rcnn, grid_size):
+        """
+        根据roi的长宽高计算稠密的虚拟点云坐标(roi box划分为6x6x6的网格坐标)
+        Args:
+            rois:(1024, 7)
+            batch_size_rcnn:1024
+            grid_size:6
+        Returns:
+            roi_grid_points: (1024, 216, 3)
+        """
+        faked_features = rois.new_ones((grid_size, grid_size, grid_size)) # 初始化一个全1的6x6x6的伪特征
+        dense_idx = faked_features.nonzero()  # (N, 3) [x_idx, y_idx, z_idx] --> (216,3)
+        dense_idx = dense_idx.repeat(batch_size_rcnn, 1, 1).float()  # (B, 6x6x6, 3) --> (1024, 216, 3)
+
+        local_roi_size = rois.view(batch_size_rcnn, -1)[:, 3:6] # 取出roi的长宽高(1024,3)
+        # ROI网格点坐标：先平移0.5个单位，然后归一化，再乘roi的大小，最后将原点移动中心
+        # (1024,216,3) / (1024,1,3) - (1024,1,3)
+        roi_grid_points = (dense_idx + 0.5) / grid_size * local_roi_size.unsqueeze(dim=1) \
+                          - (local_roi_size.unsqueeze(dim=1) / 2)  # (B, 6x6x6, 3) 
+        return roi_grid_points # (1024, 216, 3)
+
+
+    def forward(self, batch_dict):
+        batch_dict = self.assign_targets(batch_dict)
+        # RoI aware pooling
+        pooled_features = self.roi_grid_pool(batch_dict)  # (BxN, 6x6x6, C)
+
+        
+        batch_size_rcnn = pooled_features.shape[0]
+        pooled_features = pooled_features.permute(0, 2, 1). \
+            contiguous().view(batch_size_rcnn, -1, self.grid_size,
+                              self.grid_size,
+                              self.grid_size)  # (BxN, C, 6, 6, 6)
+        
+        shared_features = self.shared_fc_layers(
+            pooled_features.view(batch_size_rcnn, -1, 1))
+        rcnn_cls = self.cls_layers(shared_features).transpose(1,2).contiguous().squeeze(dim=1)  # (B, 1 or 2)
+        rcnn_iou = self.iou_layers(shared_features).transpose(1,2).contiguous().squeeze(dim=1)  # (B, 1)
+        rcnn_reg = self.reg_layers(shared_features).transpose(1,2).contiguous().squeeze(dim=1)  # (B, C)
+
+        batch_dict['stage2_out'] = {
+            'rcnn_cls': rcnn_cls,
+            'rcnn_iou': rcnn_iou,
+            'rcnn_reg': rcnn_reg,
+        }
+
+        return batch_dict
+
+    def assign_targets(self, batch_dict):
+        batch_dict['rcnn_label_dict'] = {
+            'rois': [],
+            'gt_of_rois': [],
+            'gt_of_rois_src': [],
+            'cls_tgt': [],
+            'reg_tgt': [],
+            'iou_tgt': [],
+            'rois_anchor': [],
+            'record_len': [],
+            'rois_scores_stage1': []
+        }
+        pred_boxes = batch_dict['boxes_fused']
+        pred_scores = batch_dict['scores_fused']
+        gt_boxes = [b[m][:, [0, 1, 2, 5, 4, 3, 6]].float() for b, m in
+                    zip(batch_dict['object_bbx_center'],
+                        batch_dict['object_bbx_mask'].bool())]  # hwl -> lwh order
+        for rois, scores, gts in zip(pred_boxes, pred_scores,  gt_boxes): # each frame
+            rois = rois[:, [0, 1, 2, 5, 4, 3, 6]]  # hwl -> lwh
+            if gts.shape[0] == 0:
+                gts = rois.clone()
+
+            ious = boxes_iou3d_gpu(rois, gts)
+            max_ious, gt_inds = ious.max(dim=1)
+            gt_of_rois = gts[gt_inds]
+            rcnn_labels = (max_ious > 0.3).float()
+            mask = torch.logical_not(rcnn_labels.bool())
+
+            # set negative samples back to rois, no correction in stage2 for them
+            gt_of_rois[mask] = rois[mask]
+            gt_of_rois_src = gt_of_rois.clone().detach()
+
+            # canoical transformation
+            roi_center = rois[:, 0:3]
+            # TODO: roi_ry > 0 in pcdet
+            roi_ry = rois[:, 6] % (2 * np.pi)
+            gt_of_rois[:, 0:3] = gt_of_rois[:, 0:3] - roi_center
+            gt_of_rois[:, 6] = gt_of_rois[:, 6] - roi_ry
+
+            # transfer LiDAR coords to local coords
+            gt_of_rois = common_utils.rotate_points_along_z(
+                points=gt_of_rois.view(-1, 1, gt_of_rois.shape[-1]),
+                angle=-roi_ry.view(-1)
+            ).view(-1, gt_of_rois.shape[-1])
+
+            # flip orientation if rois have opposite orientation
+            heading_label = (gt_of_rois[:, 6] + (
+                    torch.div(torch.abs(gt_of_rois[:, 6].min()),
+                              (2 * np.pi), rounding_mode='trunc')
+                    + 1) * 2 * np.pi) % (2 * np.pi)  # 0 ~ 2pi
+            opposite_flag = (heading_label > np.pi * 0.5) & (
+                    heading_label < np.pi * 1.5)
+
+            # (0 ~ pi/2, 3pi/2 ~ 2pi)
+            heading_label[opposite_flag] = (heading_label[
+                                                opposite_flag] + np.pi) % (
+                                                   2 * np.pi)
+            flag = heading_label > np.pi
+            heading_label[flag] = heading_label[
+                                      flag] - np.pi * 2  # (-pi/2, pi/2)
+            heading_label = torch.clamp(heading_label, min=-np.pi / 2,
+                                        max=np.pi / 2)
+            gt_of_rois[:, 6] = heading_label
+
+            # generate regression target
+            rois_anchor = rois.clone().detach().view(-1, self.code_size)
+            rois_anchor[:, 0:3] = 0
+            rois_anchor[:, 6] = 0
+
+            reg_targets = box_utils.box_encode(
+                gt_of_rois.view(-1, self.code_size), rois_anchor
+            )
+
+            batch_dict['rcnn_label_dict']['rois'].append(rois)
+            batch_dict['rcnn_label_dict']['rois_scores_stage1'].append(scores.flatten())
+            batch_dict['rcnn_label_dict']['gt_of_rois'].append(gt_of_rois)
+            batch_dict['rcnn_label_dict']['gt_of_rois_src'].append(
+                gt_of_rois_src)
+            batch_dict['rcnn_label_dict']['cls_tgt'].append(rcnn_labels)
+            batch_dict['rcnn_label_dict']['reg_tgt'].append(reg_targets)
+            batch_dict['rcnn_label_dict']['iou_tgt'].append(max_ious)
+            batch_dict['rcnn_label_dict']['rois_anchor'].append(rois_anchor)
+            batch_dict['rcnn_label_dict']['record_len'].append(rois.shape[0])
+            
+
+        # cat list to tensor
+        for k, v in batch_dict['rcnn_label_dict'].items():
+            if k == 'record_len':
+                continue
+            batch_dict['rcnn_label_dict'][k] = torch.cat(v, dim=0)
+
+        return batch_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/voxel_roi_pooling.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/voxel_roi_pooling.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d7e995c5df52407c96f12c911f9f13b4eb4790b
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/voxel_roi_pooling.py
@@ -0,0 +1,182 @@
+import torch
+import torch.nn as nn
+from opencood.pcdet_utils.pointnet2.pointnet2_stack import voxel_pool_modules as voxelpool_stack_modules
+from opencood.utils import common_utils
+
+
+class VoxelRoIPooling(nn.Module):
+    def __init__(self, backbone_channels, model_cfg, voxel_size, point_cloud_range, **kwargs):
+        super().__init__()
+        self.model_cfg = model_cfg
+        layer_cfg = self.model_cfg['pool_layers']
+        self.point_cloud_range = point_cloud_range
+        self.voxel_size = voxel_size
+        self.grid_size = model_cfg['grid_size']
+
+        c_out = 0
+        self.roi_grid_pool_layers = nn.ModuleList()
+        for src_name in layer_cfg['features_source']:
+            mlps = layer_cfg[src_name]['mlps']
+            for k in range(len(mlps)):
+                mlps[k] = [backbone_channels[src_name]] + mlps[k]
+            pool_layer = voxelpool_stack_modules.NeighborVoxelSAModuleMSG(
+                query_ranges=layer_cfg[src_name]['query_ranges'],
+                nsamples=layer_cfg[src_name]['nsample'],
+                radii=layer_cfg[src_name]['pool_radius'],
+                mlps=mlps,
+                pool_method=layer_cfg[src_name]['pool_method'],
+            )
+            
+            self.roi_grid_pool_layers.append(pool_layer)
+
+            c_out += sum([x[-1] for x in mlps])
+
+        self.init_weights()
+
+    def init_weights(self):
+        init_func = nn.init.xavier_normal_
+        for module_list in [self.shared_fc_layer, self.cls_fc_layers, self.reg_fc_layers]:
+            for m in module_list.modules():
+                if isinstance(m, nn.Linear):
+                    init_func(m.weight)
+                    if m.bias is not None:
+                        nn.init.constant_(m.bias, 0)
+                    
+        nn.init.normal_(self.cls_pred_layer.weight, 0, 0.01)
+        nn.init.constant_(self.cls_pred_layer.bias, 0)
+        nn.init.normal_(self.reg_pred_layer.weight, mean=0, std=0.001)
+        nn.init.constant_(self.reg_pred_layer.bias, 0)
+
+
+    def roi_grid_pool(self, batch_dict):
+        """
+        Args:
+            batch_dict:
+                batch_size:
+                rois: (B, num_rois, 7 + C)
+                point_coords: (num_points, 4)  [bs_idx, x, y, z]
+                point_features: (num_points, C)
+                point_cls_scores: (N1 + N2 + N3 + ..., 1)
+                point_part_offset: (N1 + N2 + N3 + ..., 3)
+        Returns:
+        """
+        rois = batch_dict['rois']
+        batch_size = batch_dict['batch_size']
+        with_vf_transform = batch_dict.get('with_voxel_feature_transform', False)
+        
+        roi_grid_xyz, _ = self.get_global_grid_points_of_roi(
+            rois, grid_size=self.grid_size
+        )  # (BxN, 6x6x6, 3)
+        # roi_grid_xyz: (B, Nx6x6x6, 3)
+        roi_grid_xyz = roi_grid_xyz.view(batch_size, -1, 3)  
+
+        # compute the voxel coordinates of grid points
+        roi_grid_coords_x = (roi_grid_xyz[:, :, 0:1] - self.point_cloud_range[0]) // self.voxel_size[0]
+        roi_grid_coords_y = (roi_grid_xyz[:, :, 1:2] - self.point_cloud_range[1]) // self.voxel_size[1]
+        roi_grid_coords_z = (roi_grid_xyz[:, :, 2:3] - self.point_cloud_range[2]) // self.voxel_size[2]
+        # roi_grid_coords: (B, Nx6x6x6, 3)
+        roi_grid_coords = torch.cat([roi_grid_coords_x, roi_grid_coords_y, roi_grid_coords_z], dim=-1)
+
+        batch_idx = rois.new_zeros(batch_size, roi_grid_coords.shape[1], 1)
+        for bs_idx in range(batch_size):
+            batch_idx[bs_idx, :, 0] = bs_idx
+
+        roi_grid_batch_cnt = rois.new_zeros(batch_size).int().fill_(roi_grid_coords.shape[1])
+
+        pooled_features_list = []
+        for k, src_name in enumerate(self.model_cfg['features_source']):
+            pool_layer = self.roi_grid_pool_layers[k]
+            cur_stride = batch_dict['multi_scale_3d_strides'][src_name]
+            cur_sp_tensors = batch_dict['multi_scale_3d_features'][src_name]
+
+            if with_vf_transform:
+                cur_sp_tensors = batch_dict['multi_scale_3d_features_post'][src_name]
+            else:
+                cur_sp_tensors = batch_dict['multi_scale_3d_features'][src_name]
+
+            # compute voxel center xyz and batch_cnt
+            cur_coords = cur_sp_tensors.indices
+            cur_voxel_xyz = common_utils.get_voxel_centers(
+                cur_coords[:, 1:4],
+                downsample_times=cur_stride,
+                voxel_size=self.voxel_size,
+                point_cloud_range=self.point_cloud_range
+            )
+            cur_voxel_xyz_batch_cnt = cur_voxel_xyz.new_zeros(batch_size).int()
+            for bs_idx in range(batch_size):
+                cur_voxel_xyz_batch_cnt[bs_idx] = (cur_coords[:, 0] == bs_idx).sum()
+            # get voxel2point tensor
+            v2p_ind_tensor = common_utils.generate_voxel2pinds(cur_sp_tensors)
+            # compute the grid coordinates in this scale, in [batch_idx, x y z] order
+            cur_roi_grid_coords = roi_grid_coords // cur_stride
+            cur_roi_grid_coords = torch.cat([batch_idx, cur_roi_grid_coords], dim=-1)
+            cur_roi_grid_coords = cur_roi_grid_coords.int()
+            # voxel neighbor aggregation
+            pooled_features = pool_layer(
+                xyz=cur_voxel_xyz.contiguous(),
+                xyz_batch_cnt=cur_voxel_xyz_batch_cnt,
+                new_xyz=roi_grid_xyz.contiguous().view(-1, 3),
+                new_xyz_batch_cnt=roi_grid_batch_cnt,
+                new_coords=cur_roi_grid_coords.contiguous().view(-1, 4),
+                features=cur_sp_tensors.features.contiguous(),
+                voxel2point_indices=v2p_ind_tensor
+            )
+
+            pooled_features = pooled_features.view(
+                -1, self.grid_size ** 3,
+                pooled_features.shape[-1]
+            )  # (BxN, 6x6x6, C)
+            pooled_features_list.append(pooled_features)
+        
+        ms_pooled_features = torch.cat(pooled_features_list, dim=-1)
+        
+        return ms_pooled_features
+
+
+    def get_global_grid_points_of_roi(self, rois, grid_size):
+        rois = rois.view(-1, rois.shape[-1])
+        batch_size_rcnn = rois.shape[0]
+
+        local_roi_grid_points = self.get_dense_grid_points(rois, batch_size_rcnn, grid_size)  # (B, 6x6x6, 3)
+        global_roi_grid_points = common_utils.rotate_points_along_z(
+            local_roi_grid_points.clone(), rois[:, 6]
+        ).squeeze(dim=1)
+        global_center = rois[:, 0:3].clone()
+        global_roi_grid_points += global_center.unsqueeze(dim=1)
+        return global_roi_grid_points, local_roi_grid_points
+
+    @staticmethod
+    def get_dense_grid_points(rois, batch_size_rcnn, grid_size):
+        faked_features = rois.new_ones((grid_size, grid_size, grid_size))
+        dense_idx = faked_features.nonzero()  # (N, 3) [x_idx, y_idx, z_idx]
+        dense_idx = dense_idx.repeat(batch_size_rcnn, 1, 1).float()  # (B, 6x6x6, 3)
+
+        local_roi_size = rois.view(batch_size_rcnn, -1)[:, 3:6]
+        roi_grid_points = (dense_idx + 0.5) / grid_size * local_roi_size.unsqueeze(dim=1) \
+                          - (local_roi_size.unsqueeze(dim=1) / 2)  # (B, 6x6x6, 3)
+        return roi_grid_points
+
+    def forward(self, batch_dict):
+        """
+        :param input_data: input dict
+        :return:
+        """
+
+        """
+        Move those part to roi heads
+
+        # targets_dict = self.proposal_layer(
+        #     batch_dict, nms_config=self.model_cfg.NMS_CONFIG['TRAIN' if self.training else 'TEST']
+        # )
+        # if self.training:
+        #     targets_dict = self.assign_targets(batch_dict)
+        #     batch_dict['rois'] = targets_dict['rois']
+        #     batch_dict['roi_labels'] = targets_dict['roi_labels']
+        """
+
+
+        # RoI aware pooling
+        pooled_features = self.roi_grid_pool(batch_dict)  # (BxN, 6x6x6, C)
+        batch_dict['pooled_features'] = pooled_features
+        
+        return batch_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/vsa.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/vsa.py
new file mode 100644
index 0000000000000000000000000000000000000000..42293b252f037b77fc2fa6ab0a49fc642fd070c2
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/sub_modules/vsa.py
@@ -0,0 +1,274 @@
+import copy
+import random
+
+import torch
+import torch.nn as nn
+
+from opencood.pcdet_utils.pointnet2.pointnet2_stack import pointnet2_modules as pointnet2_stack_modules
+from opencood.pcdet_utils.pointnet2.pointnet2_stack import pointnet2_utils as pointnet2_stack_utils
+from opencood.pcdet_utils.roiaware_pool3d.roiaware_pool3d_utils import points_in_boxes_gpu
+from opencood.utils import common_utils
+
+
+def bilinear_interpolate_torch(im, x, y):
+    """
+    Args:
+        im: (H, W, C) [y, x]
+        x: (N)
+        y: (N)
+    Returns:
+    """
+    x0 = torch.floor(x).long()
+    x1 = x0 + 1
+
+    y0 = torch.floor(y).long()
+    y1 = y0 + 1
+
+    x0 = torch.clamp(x0, 0, im.shape[1] - 1)
+    x1 = torch.clamp(x1, 0, im.shape[1] - 1)
+    y0 = torch.clamp(y0, 0, im.shape[0] - 1)
+    y1 = torch.clamp(y1, 0, im.shape[0] - 1)
+
+    Ia = im[y0, x0]
+    Ib = im[y1, x0]
+    Ic = im[y0, x1]
+    Id = im[y1, x1]
+
+    wa = (x1.type_as(x) - x) * (y1.type_as(y) - y)
+    wb = (x1.type_as(x) - x) * (y - y0.type_as(y))
+    wc = (x - x0.type_as(x)) * (y1.type_as(y) - y)
+    wd = (x - x0.type_as(x)) * (y - y0.type_as(y))
+    ans = torch.t((torch.t(Ia) * wa)) + torch.t(torch.t(Ib) * wb) + torch.t(torch.t(Ic) * wc) + torch.t(torch.t(Id) * wd)
+    return ans
+
+
+class VoxelSetAbstraction(nn.Module):
+    def __init__(self, model_cfg, voxel_size, point_cloud_range, num_bev_features=None,
+                 num_rawpoint_features=None, **kwargs):
+        super().__init__()
+        self.model_cfg = model_cfg
+        self.voxel_size = voxel_size
+        self.point_cloud_range = point_cloud_range
+
+        SA_cfg = self.model_cfg['sa_layer']
+
+        self.SA_layers = nn.ModuleList()
+        self.SA_layer_names = []
+        self.downsample_times_map = {}
+        c_in = 0
+        for src_name in self.model_cfg['features_source']:
+            if src_name in ['bev', 'raw_points']:
+                continue
+            self.downsample_times_map[src_name] = SA_cfg[src_name]['downsample_factor']
+            mlps = copy.copy(SA_cfg[src_name]['mlps'])
+            for k in range(len(mlps)):
+                mlps[k] = [mlps[k][0]] + mlps[k]
+            cur_layer = pointnet2_stack_modules.StackSAModuleMSG(
+                radii=SA_cfg[src_name]['pool_radius'],
+                nsamples=SA_cfg[src_name]['n_sample'],
+                mlps=mlps,
+                use_xyz=True,
+                pool_method='max_pool',
+            )
+            self.SA_layers.append(cur_layer)
+            self.SA_layer_names.append(src_name)
+
+            c_in += sum([x[-1] for x in mlps])
+
+        if 'bev' in self.model_cfg['features_source']:
+            c_bev = num_bev_features
+            c_in += c_bev
+
+        if 'raw_points' in self.model_cfg['features_source']:
+            mlps = copy.copy(SA_cfg['raw_points']['mlps'])
+            for k in range(len(mlps)):
+                mlps[k] = [num_rawpoint_features - 3] + mlps[k]
+
+            self.SA_rawpoints = pointnet2_stack_modules.StackSAModuleMSG(
+                radii=SA_cfg['raw_points']['pool_radius'],
+                nsamples=SA_cfg['raw_points']['n_sample'],
+                mlps=mlps,
+                use_xyz=True,
+                pool_method='max_pool'
+            )
+            c_in += sum([x[-1] for x in mlps])
+
+        self.vsa_point_feature_fusion = nn.Sequential(
+            nn.Linear(c_in, self.model_cfg['num_out_features'], bias=False),
+            nn.BatchNorm1d(self.model_cfg['num_out_features']),
+            nn.ReLU(),
+        )
+        self.num_point_features = self.model_cfg['num_out_features']
+        self.num_point_features_before_fusion = c_in
+
+    def interpolate_from_bev_features(self, keypoints, bev_features, batch_size, bev_stride):
+        x_idxs = (keypoints[:, :, 0] - self.point_cloud_range[0]) / self.voxel_size[0]
+        y_idxs = (keypoints[:, :, 1] - self.point_cloud_range[1]) / self.voxel_size[1]
+        x_idxs = x_idxs / bev_stride
+        y_idxs = y_idxs / bev_stride
+
+        point_bev_features_list = []
+        for k in range(batch_size):
+            cur_x_idxs = x_idxs[k]
+            cur_y_idxs = y_idxs[k]
+            cur_bev_features = bev_features[k].permute(1, 2, 0)  # (H, W, C)
+            point_bev_features = bilinear_interpolate_torch(cur_bev_features, cur_x_idxs, cur_y_idxs)
+            point_bev_features_list.append(point_bev_features.unsqueeze(dim=0))
+
+        point_bev_features = torch.cat(point_bev_features_list, dim=0)  # (B, N, C0)
+        return point_bev_features
+
+    def get_sampled_points(self, batch_dict):
+        batch_size = batch_dict['batch_size']
+        if self.model_cfg['point_source'] == 'raw_points':
+            src_points = batch_dict['origin_lidar_for_vsa'][:, 1:]
+            batch_indices = batch_dict['origin_lidar_for_vsa'][:, 0].long()
+        elif self.model_cfg['point_source'] == 'voxel_centers':
+            src_points = common_utils.get_voxel_centers(
+                batch_dict['voxel_coords'][:, 1:4],
+                downsample_times=1,
+                voxel_size=self.voxel_size,
+                point_cloud_range=self.point_cloud_range
+            )
+            batch_indices = batch_dict['voxel_coords'][:, 0].long()
+        else:
+            raise NotImplementedError
+
+        keypoints_batch = torch.randn((batch_size, self.model_cfg['num_keypoints'], 4), device=src_points.device)
+        keypoints_batch[..., 0] = keypoints_batch[..., 0] * 140
+        keypoints_batch[..., 1] = keypoints_batch[..., 0] * 40
+        # points with height flag 10 are padding/invalid, for later filtering
+        keypoints_batch[..., 2] = 10.0
+        for bs_idx in range(batch_size):
+            bs_mask = (batch_indices == bs_idx)
+            sampled_points = src_points[bs_mask].unsqueeze(dim=0)  # (1, N, 3)
+            # sample points with FPS
+            # some cropped pcd may have very few points, select various number
+            # of points to ensure similar sample density
+            # 50000 is approximately the number of points in one full pcd
+            num_kpts = int(self.model_cfg['num_keypoints'] * sampled_points.shape[1] / 50000) + 1
+            num_kpts = min(num_kpts, self.model_cfg['num_keypoints'])
+            cur_pt_idxs = pointnet2_stack_utils.furthest_point_sample(
+                sampled_points[:, :, 0:3].contiguous(), num_kpts
+            ).long()
+
+            if sampled_points.shape[1] < num_kpts:
+                empty_num = num_kpts - sampled_points.shape[1]
+                cur_pt_idxs[0, -empty_num:] = cur_pt_idxs[0, :empty_num]
+
+            keypoints = sampled_points[0][cur_pt_idxs[0]].unsqueeze(dim=0)
+
+            keypoints_batch[bs_idx, :len(keypoints[0]), :] = keypoints
+
+        # keypoints = torch.cat(keypoints_list, dim=0)  # (B, M, 3)
+        return keypoints_batch
+
+    def forward(self, batch_dict):
+        """
+        Args:
+            batch_dict:
+                batch_size:
+                keypoints: (B, num_keypoints, 3)
+                multi_scale_3d_features: {
+                        'x_conv4': ...
+                    }
+                points: optional (N, 1 + 3 + C) [bs_idx, x, y, z, ...]
+                spatial_features: optional
+                spatial_features_stride: optional
+        Returns:
+            point_features: (N, C)
+            point_coords: (N, 4)
+        """
+        keypoints = self.get_sampled_points(batch_dict) # BxNx4
+        kpt_mask1 = torch.logical_and(keypoints[..., 2] > -2.8, keypoints[..., 2] < 1.0)
+        kpt_mask2 = None
+        # Only select the points that are in the predicted bounding boxes
+        if 'det_boxes' in batch_dict:
+            dets_list = batch_dict['det_boxes']
+            max_len = max([len(dets) for dets in dets_list])
+            boxes = torch.zeros((len(dets_list), max_len, 7), dtype=dets_list[0].dtype,
+                                device=dets_list[0].device)
+            for i, dets in enumerate(dets_list):
+                dets = dets[:, [0,1,2,5,4,3,6]] # hwl -> lwh
+                if len(dets)==0:
+                    continue
+                cur_dets = dets.clone()
+                if self.model_cfg['enlarge_selection_boxes']:
+                    cur_dets[:, 3:6] += 0.5
+                boxes[i, :len(dets)] = cur_dets
+            # mask out some keypoints to spare the GPU storage
+            kpt_mask2 = points_in_boxes_gpu(keypoints[..., :3], boxes) >= 0
+
+        kpt_mask = torch.logical_and(kpt_mask1, kpt_mask2) if kpt_mask2 is not None else kpt_mask1
+        # Ensure there are more than 2 points are selected to satisfy the
+        # condition of batch norm in the FC layers of feature fusion module
+        if (kpt_mask).sum() < 2:
+            kpt_mask[0, torch.randint(0, 1024, (2,))] = True
+
+
+        point_features_list = []
+        if 'bev' in self.model_cfg['features_source']:
+            point_bev_features = self.interpolate_from_bev_features(
+                keypoints[..., :3], batch_dict['spatial_features'], batch_dict['batch_size'],
+                bev_stride=batch_dict['spatial_features_stride']
+            )
+            point_features_list.append(point_bev_features[kpt_mask])
+
+        batch_size, num_keypoints, _ = keypoints.shape
+
+        new_xyz = keypoints[kpt_mask]
+        new_xyz_batch_cnt = torch.tensor([(mask).sum() for mask in kpt_mask], device=new_xyz.device).int()
+
+        if 'raw_points' in self.model_cfg['features_source']:
+            raw_points = batch_dict['origin_lidar_for_vsa']
+            xyz = raw_points[:, 1:4]
+            xyz_batch_cnt = xyz.new_zeros(batch_size).int()
+            indices = raw_points[:, 0].long()
+            for bs_idx in range(batch_size):
+                xyz_batch_cnt[bs_idx] = (indices == bs_idx).sum()
+            point_features = None
+
+            pooled_points, pooled_features = self.SA_rawpoints(
+                xyz=xyz.contiguous(),
+                xyz_batch_cnt=xyz_batch_cnt,
+                new_xyz=new_xyz[:, :3].contiguous(),
+                new_xyz_batch_cnt=new_xyz_batch_cnt,
+                features=point_features,
+            )
+            point_features_list.append(pooled_features)
+
+        for k, src_name in enumerate(self.SA_layer_names):
+            cur_coords = batch_dict['multi_scale_3d_features'][src_name].indices
+            xyz = common_utils.get_voxel_centers(
+                cur_coords[:, 1:4],
+                downsample_times=self.downsample_times_map[src_name],
+                voxel_size=self.voxel_size,
+                point_cloud_range=self.point_cloud_range
+            )
+            xyz_batch_cnt = xyz.new_zeros(batch_size).int()
+            for bs_idx in range(batch_size):
+                xyz_batch_cnt[bs_idx] = (cur_coords[:, 0] == bs_idx).sum()
+
+            pooled_points, pooled_features = self.SA_layers[k](
+                xyz=xyz.contiguous(),
+                xyz_batch_cnt=xyz_batch_cnt,
+                new_xyz=new_xyz[:, :3].contiguous(),
+                new_xyz_batch_cnt=new_xyz_batch_cnt,
+                features=batch_dict['multi_scale_3d_features'][src_name].features.contiguous(),
+            )
+
+            point_features_list.append(pooled_features)
+
+        point_features = torch.cat(point_features_list, dim=1)
+        batch_dict['point_features_before_fusion'] = point_features.view(-1, point_features.shape[-1])  # torch.Size([373, 640])
+        point_features = self.vsa_point_feature_fusion(point_features.view(-1, point_features.shape[-1]))  # (0): Linear(in_features=512, out_features=32, bias=False)
+
+        cur_idx = 0
+        batch_dict['point_features'] = []
+        batch_dict['point_coords'] = []
+        for num in new_xyz_batch_cnt:
+            batch_dict['point_features'].append(point_features[cur_idx:cur_idx + num])
+            batch_dict['point_coords'].append(new_xyz[cur_idx:cur_idx + num])
+            cur_idx += num
+
+        return batch_dict
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/voxel_net.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/voxel_net.py
new file mode 100644
index 0000000000000000000000000000000000000000..78116a2c5f16ffad61b738701a543b8e5bd84433
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/voxel_net.py
@@ -0,0 +1,234 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, OpenPCDet
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import torch.nn as nn
+import torch.nn.functional as F
+import torch
+from torch.autograd import Variable
+
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.utils.common_utils import torch_tensor_to_numpy
+
+
+# conv2d + bn + relu
+class Conv2d(nn.Module):
+
+    def __init__(self, in_channels, out_channels, k, s, p, activation=True,
+                 batch_norm=True):
+        super(Conv2d, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=k,
+                              stride=s, padding=p)
+        if batch_norm:
+            self.bn = nn.BatchNorm2d(out_channels)
+        else:
+            self.bn = None
+        self.activation = activation
+
+    def forward(self, x):
+        x = self.conv(x)
+        if self.bn is not None:
+            x = self.bn(x)
+        if self.activation:
+            return F.relu(x, inplace=True)
+        else:
+            return x
+
+
+# conv3d + bn + relu
+class Conv3d(nn.Module):
+
+    def __init__(self, in_channels, out_channels, k, s, p, batch_norm=True):
+        super(Conv3d, self).__init__()
+        self.conv = nn.Conv3d(in_channels, out_channels, kernel_size=k,
+                              stride=s, padding=p)
+        if batch_norm:
+            self.bn = nn.BatchNorm3d(out_channels)
+        else:
+            self.bn = None
+
+    def forward(self, x):
+        x = self.conv(x)
+        if self.bn is not None:
+            x = self.bn(x)
+
+        return F.relu(x, inplace=True)
+
+
+# Fully Connected Network
+class FCN(nn.Module):
+
+    def __init__(self, cin, cout):
+        super(FCN, self).__init__()
+        self.cout = cout
+        self.linear = nn.Linear(cin, cout)
+        self.bn = nn.BatchNorm1d(cout)
+
+    def forward(self, x):
+        # KK is the stacked k across batch
+        kk, t, _ = x.shape
+        x = self.linear(x.view(kk * t, -1))
+        x = F.relu(self.bn(x))
+        return x.view(kk, t, -1)
+
+
+# Voxel Feature Encoding layer
+class VFE(nn.Module):
+
+    def __init__(self, cin, cout, T):
+        super(VFE, self).__init__()
+        assert cout % 2 == 0
+        self.units = cout // 2
+        self.fcn = FCN(cin, self.units)
+        self.T = T
+
+    def forward(self, x, mask):
+        # point-wise feature
+        pwf = self.fcn(x)
+        # locally aggregated feature
+        laf = torch.max(pwf, 1)[0].unsqueeze(1).repeat(1, self.T, 1)
+        # point-wise concat feature
+        pwcf = torch.cat((pwf, laf), dim=2)
+        # apply mask
+        mask = mask.unsqueeze(2).repeat(1, 1, self.units * 2)
+        pwcf = pwcf * mask.float()
+
+        return pwcf
+
+
+# Stacked Voxel Feature Encoding
+class SVFE(nn.Module):
+
+    def __init__(self, T):
+        super(SVFE, self).__init__()
+        self.vfe_1 = VFE(7, 32, T)
+        self.vfe_2 = VFE(32, 128, T)
+        self.fcn = FCN(128, 128)
+
+    def forward(self, x):
+        mask = torch.ne(torch.max(x, 2)[0], 0)
+        x = self.vfe_1(x, mask)
+        x = self.vfe_2(x, mask)
+        x = self.fcn(x)
+        # element-wise max pooling
+        x = torch.max(x, 1)[0]
+        return x
+
+
+# Convolutional Middle Layer
+class CML(nn.Module):
+    def __init__(self):
+        super(CML, self).__init__()
+        self.conv3d_1 = Conv3d(64, 64, 3, s=(2, 1, 1), p=(1, 1, 1))
+        self.conv3d_2 = Conv3d(64, 64, 3, s=(1, 1, 1), p=(0, 1, 1))
+        self.conv3d_3 = Conv3d(64, 64, 3, s=(2, 1, 1), p=(1, 1, 1))
+
+    def forward(self, x):
+        x = self.conv3d_1(x)
+        x = self.conv3d_2(x)
+        x = self.conv3d_3(x)
+        return x
+
+
+# Region Proposal Network
+class RPN(nn.Module):
+    def __init__(self, anchor_num=2):
+        super(RPN, self).__init__()
+        self.anchor_num = anchor_num
+
+        self.block_1 = [Conv2d(128, 128, 3, 2, 1)]
+        self.block_1 += [Conv2d(128, 128, 3, 1, 1) for _ in range(3)]
+        self.block_1 = nn.Sequential(*self.block_1)
+
+        self.block_2 = [Conv2d(128, 128, 3, 2, 1)]
+        self.block_2 += [Conv2d(128, 128, 3, 1, 1) for _ in range(5)]
+        self.block_2 = nn.Sequential(*self.block_2)
+
+        self.block_3 = [Conv2d(128, 256, 3, 2, 1)]
+        self.block_3 += [nn.Conv2d(256, 256, 3, 1, 1) for _ in range(5)]
+        self.block_3 = nn.Sequential(*self.block_3)
+
+        self.deconv_1 = nn.Sequential(nn.ConvTranspose2d(256, 256, 4, 4, 0),
+                                      nn.BatchNorm2d(256))
+        self.deconv_2 = nn.Sequential(nn.ConvTranspose2d(128, 256, 2, 2, 0),
+                                      nn.BatchNorm2d(256))
+        self.deconv_3 = nn.Sequential(nn.ConvTranspose2d(128, 256, 1, 1, 0),
+                                      nn.BatchNorm2d(256))
+
+        self.score_head = Conv2d(768, self.anchor_num, 1, 1, 0,
+                                 activation=False, batch_norm=False)
+        self.reg_head = Conv2d(768, 7 * self.anchor_num, 1, 1, 0,
+                               activation=False, batch_norm=False)
+
+    def forward(self, x):
+        x = self.block_1(x)
+        x_skip_1 = x
+        x = self.block_2(x)
+        x_skip_2 = x
+        x = self.block_3(x)
+        x_0 = self.deconv_1(x)
+        x_1 = self.deconv_2(x_skip_2)
+        x_2 = self.deconv_3(x_skip_1)
+        x = torch.cat((x_0, x_1, x_2), 1)
+        return self.score_head(x), self.reg_head(x)
+
+
+class VoxelNet(nn.Module):
+    def __init__(self, args):
+        super(VoxelNet, self).__init__()
+        self.svfe = PillarVFE(args['pillar_vfe'],
+                              num_point_features=4,
+                              voxel_size=args['voxel_size'],
+                              point_cloud_range=args['lidar_range'])
+
+        # self.svfe = SVFE(args['T'])
+        self.cml = CML()
+        self.rpn = RPN(args['anchor_num'])
+
+        self.N = args['N']
+        self.D = args['D']
+        self.H = args['H']
+        self.W = args['W']
+        self.T = args['T']
+        self.anchor_num = args['anchor_num']
+
+    def voxel_indexing(self, sparse_features, coords):
+        dim = sparse_features.shape[-1]
+
+        dense_feature = Variable(
+            torch.zeros(dim, self.N, self.D, self.H, self.W).cuda())
+
+        dense_feature[:, coords[:, 0], coords[:, 1], coords[:, 2],
+        coords[:, 3]] = sparse_features.transpose(0, 1)
+
+        return dense_feature.transpose(0, 1)
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points}
+
+        # feature learning network
+        vwfs = self.svfe(batch_dict)['pillar_features']
+
+        voxel_coords = torch_tensor_to_numpy(voxel_coords)
+        vwfs = self.voxel_indexing(vwfs, voxel_coords)
+
+        # convolutional middle network
+        vwfs = self.cml(vwfs)
+
+        # region proposal network
+
+        # merge the depth and feature dim into one, output probability score
+        # map and regression map
+        psm, rm = self.rpn(vwfs.view(self.N, -1, self.H, self.W))
+
+        output_dict = {'psm': psm,
+                       'rm': rm}
+
+        return output_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/voxel_net_intermediate.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/voxel_net_intermediate.py
new file mode 100644
index 0000000000000000000000000000000000000000..fec3aa3157554322033f9cc5331fe9f8f7485221
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/models/voxel_net_intermediate.py
@@ -0,0 +1,203 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+"""
+VoxelNet for intermediate fusion
+"""
+import torch.nn as nn
+import torch.nn.functional as F
+import torch
+import numpy as np
+from torch.autograd import Variable
+
+from opencood.models.voxel_net import RPN, CML
+from opencood.models.sub_modules.pillar_vfe import PillarVFE
+from opencood.utils.common_utils import torch_tensor_to_numpy
+from opencood.models.fuse_modules.self_attn import AttFusion
+from opencood.models.sub_modules.auto_encoder import AutoEncoder
+
+
+# conv2d + bn + relu
+class Conv2d(nn.Module):
+
+    def __init__(self, in_channels, out_channels, k, s, p, activation=True,
+                 batch_norm=True, bias=True):
+        super(Conv2d, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=k,
+                              stride=s, padding=p, bias=bias)
+        if batch_norm:
+            self.bn = nn.BatchNorm2d(out_channels)
+        else:
+            self.bn = None
+        self.activation = activation
+
+    def forward(self, x):
+        x = self.conv(x)
+        if self.bn is not None:
+            x = self.bn(x)
+        if self.activation:
+            return F.relu(x, inplace=True)
+        else:
+            return x
+
+
+class NaiveFusion(nn.Module):
+
+    def __init__(self):
+        super(NaiveFusion, self).__init__()
+        self.conv1 = Conv2d(128 * 5, 256, 3, 1, 1,
+                            batch_norm=False, bias=False)
+        self.conv2 = Conv2d(256, 128, 3, 1, 1)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+
+        return x
+
+
+class VoxelNetIntermediate(nn.Module):
+    def __init__(self, args):
+        super(VoxelNetIntermediate, self).__init__()
+        self.svfe = PillarVFE(args['pillar_vfe'],
+                              num_point_features=4,
+                              voxel_size=args['voxel_size'],
+                              point_cloud_range=args['lidar_range'])
+
+        self.proj_first = True
+        if ('proj_first' in args) and (args['proj_first'] is False):
+            self.proj_first = False
+
+        self.cml = CML()
+        self.fusion_net = AttFusion(128)
+        self.rpn = RPN(args['anchor_num'])
+
+        self.N = args['N']
+        self.D = args['D']
+        self.H = args['H']
+        self.W = args['W']
+        self.T = args['T']
+        self.anchor_num = args['anchor_num']
+
+        self.compression = False
+        if 'compression' in args and args['compression'] > 0:
+            self.compression = True
+            self.compression_layer = AutoEncoder(128, args['compression'])
+
+    def voxel_indexing(self, sparse_features, coords):
+        dim = sparse_features.shape[-1]
+
+        dense_feature = Variable(
+            torch.zeros(dim, self.N, self.D, self.H, self.W).cuda())
+
+        dense_feature[:, coords[:, 0], coords[:, 1], coords[:, 2],
+        coords[:, 3]] = sparse_features.transpose(0, 1)
+
+        return dense_feature.transpose(0, 1)
+
+    def regroup(self, dense_feature, record_len):
+        """
+        Regroup the data based on the record_len.
+
+        Parameters
+        ----------
+        dense_feature : torch.Tensor
+            N, C, H, W
+        record_len : list
+            [sample1_len, sample2_len, ...]
+
+        Returns
+        -------
+        regroup_feature : torch.Tensor
+            B, 5C, H, W
+        """
+        cum_sum_len = list(np.cumsum(record_len))
+        split_features = torch.tensor_split(dense_feature,
+                                            cum_sum_len[:-1])
+        regroup_features = []
+
+        for split_feature in split_features:
+            # M, C, H, W
+            feature_shape = split_feature.shape
+
+            # the maximum M is 5 as most 5 cavs
+            padding_len = 5 - feature_shape[0]
+            padding_tensor = torch.zeros(padding_len, feature_shape[1],
+                                         feature_shape[2], feature_shape[3])
+            padding_tensor = padding_tensor.to(split_feature.device)
+
+            split_feature = torch.cat([split_feature, padding_tensor],
+                                      dim=0)
+
+            # 1, 5C, H, W
+            split_feature = split_feature.view(-1,
+                                               feature_shape[2],
+                                               feature_shape[3]).unsqueeze(0)
+            regroup_features.append(split_feature)
+
+        # B, 5C, H, W
+        regroup_features = torch.cat(regroup_features, dim=0)
+
+        return regroup_features
+
+    def forward(self, data_dict):
+        voxel_features = data_dict['processed_lidar']['voxel_features']
+        voxel_coords = data_dict['processed_lidar']['voxel_coords']
+        voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
+        record_len = data_dict['record_len']
+        pairwise_t_matrix = data_dict['pairwise_t_matrix']
+
+
+        batch_dict = {'voxel_features': voxel_features,
+                      'voxel_coords': voxel_coords,
+                      'voxel_num_points': voxel_num_points,
+                      'pairwise_t_matrix': pairwise_t_matrix}
+
+        if voxel_coords.is_cuda:
+            record_len_tmp = record_len.cpu()
+
+        record_len_tmp = list(record_len_tmp.numpy())
+
+        self.N = sum(record_len_tmp)
+
+        # feature learning network
+        vwfs = self.svfe(batch_dict)['pillar_features']
+
+        voxel_coords = torch_tensor_to_numpy(voxel_coords)
+        vwfs = self.voxel_indexing(vwfs, voxel_coords)
+
+        # convolutional middle network
+        vwfs = self.cml(vwfs)
+        # convert from 3d to 2d N C H W
+        vmfs = vwfs.view(self.N, -1, self.H, self.W)
+
+        # compression layer
+        if self.compression:
+            vmfs = self.compression_layer(vmfs)
+
+        # pairwise_t_matrix
+        # project_first must be right
+        # have not check project_first=False
+        pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+
+        if not self.proj_first:
+            pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * self.H / self.W
+            pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * self.W / self.H
+            pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / self.W * 2
+            pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / self.H * 2
+
+
+        # information naive fusion
+        vmfs_fusion = self.fusion_net(vmfs, record_len, pairwise_t_matrix)
+
+        # region proposal network
+        # merge the depth and feature dim into one, output probability score
+        # map and regression map
+        psm, rm = self.rpn(vmfs_fusion)
+
+        output_dict = {'psm': psm,
+                       'rm': rm}
+
+        return output_dict
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__init__.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/__init__.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/__init__.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7e7ef00135af613c65097efb7d1715ace89e4e83
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/__init__.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/box_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/box_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3794450e0618ae83c48f5c51fd31fc166842074e
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/box_utils.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/camera_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/camera_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4963ec68151a79563b87312eaa75434b5efd22a6
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/camera_utils.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/common_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/common_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0340bf3a78476f191af36b10c02acb262a934625
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/common_utils.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/eval_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/eval_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4ab71a410cdb71dbf8db703395987c7551e90a66
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/eval_utils.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/heter_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/heter_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a56d98944bbad90a634fd510128843b299fde59f
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/heter_utils.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/pcd_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/pcd_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e22e2505e142e8dbcdf442dc28a593dbc19118a5
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/pcd_utils.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/pose_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/pose_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b3d52d369d81a134570672cfd4293f4c561cb775
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/pose_utils.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/transformation_utils.cpython-37.pyc b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/transformation_utils.cpython-37.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7c9765efe9edb70249e07a5682722cbdca1438b6
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/__pycache__/transformation_utils.cpython-37.pyc differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/bessel_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/bessel_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..12140e1143b0c1f7234c4576b32a305f5cbf8689
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/bessel_utils.py
@@ -0,0 +1,288 @@
+"""
+Pytorch implementation of the logarithm of the modified Bessel function 
+of the 1st kind I(nu, z).
+Based mainly on scipy and extends the definition domain compared to 
+scipy methods (which can easily provide infinite values). The extension
+is done using numerical approximations involving the ratio of Bessel 
+functions:   
+-	https://arxiv.org/pdf/1606.02008.pdf (Theorems 5,6)
+-	https://arxiv.org/pdf/1902.02603.pdf (Appendix)
+Jean-Remy Conti
+2021
+"""
+
+import torch
+from scipy import special, pi
+
+
+def logbessel_I_scipy(nu, z, check = True):
+	'''
+	Pytorch version of scipy computation of modified Bessel functions 
+	of the 1st kind I(nu,z).
+	Parameters
+	----------
+	nu: positive int, float
+		Order of modified Bessel function of 1st kind.
+	z: int/float or tensor, shape (N,) 
+		Argument of Bessel function.
+	check: bool
+		If True, check if argument of log is non zero.
+	
+	Return
+	------
+	result: tensor, shape (N,)
+	'''
+	if not isinstance(z, torch.Tensor):
+		z = torch.tensor(z)
+	z = z.reshape(-1)
+
+	result = special.ive(nu, z)
+	if check:
+		assert len(result[ result == 0]) == 0, ('Bessel functions take ' +
+												'value 0 for z = {}'.format(
+												z[ result == 0]))
+	result = torch.log(result) + z
+	return result
+
+
+
+def logbessel_I_asymptotic(nu, z):
+	'''
+	Asymptotic branches of the modified Bessel function of the 1st kind.
+	https://arxiv.org/pdf/1902.02603.pdf
+	Parameters
+	----------
+	nu: positive int, float
+		Order of modified Bessel function of 1st kind.
+	z: tensor, shape (N,) 
+		Argument of Bessel function.
+	
+	Return
+	------
+	result: tensor, shape (N,)
+	'''
+	z = z.double()
+	eta = (nu + 0.5)/(2* (nu+1) )
+	result = torch.zeros(z.shape[0]).double()
+
+	result[ z <= nu ] = (
+						nu*torch.log(z[ z <= nu ]) + eta*z[ z <= nu ] 
+						- (nu+eta)*torch.log(torch.tensor(2.))
+						- torch.log( torch.tensor(special.gamma(nu+1)) )
+						)
+	
+	result[ z > nu ] = (
+						z[ z > nu ] - 0.5*torch.log(z[ z > nu ]) 
+						- 0.5*torch.log(torch.tensor(2*pi)) 
+						)
+	return result
+
+def B(alpha, nu, z):
+	'''
+	https://arxiv.org/pdf/1606.02008.pdf
+	'''
+	nu = nu.reshape(1,-1)
+	z = z.reshape(-1,1)
+	lamda = nu + float(alpha-1)/2. 
+	delta = nu-0.5 + lamda / (2*torch.sqrt(lamda**2 + z**2))
+	return z / (delta + torch.sqrt(delta**2 + z**2) )
+def B_tilde(alpha, nu, z):
+	'''
+	https://arxiv.org/pdf/1606.02008.pdf
+	'''
+	nu = nu.reshape(1,-1)
+	z = z.reshape(-1,1)
+	sigma = nu + float(alpha+1)/2.
+	delta_p = nu + 0.5 + sigma/(2*torch.sqrt(sigma**2 + z**2))
+	delta_m = nu - 0.5 - sigma/(2*torch.sqrt(sigma**2 + z**2))
+	return z/( delta_m + torch.sqrt(delta_p**2 + z**2))
+def lb_Ak(nu, z):
+	'''
+	Lower-bound for the ratio of modified Bessel functions of 1st kind. 
+	https://arxiv.org/pdf/1606.02008.pdf (Theorems 5 and 6).
+	'''
+	assert torch.all(nu >= 0)
+	nu = nu.reshape(1,-1)
+	z = z.reshape(-1,1)
+	return B_tilde(0, nu, z)
+def ub_Ak(nu, z):
+	'''
+	Upper-bound for the ratio of modified Bessel functions of 1st kind.
+	https://arxiv.org/pdf/1606.02008.pdf (Theorems 5 and 6).
+	Return
+	------
+	ub: tensor, shape (z.shape[0], nu.shape[0])
+		Upper-bound for Ak(nu, z).
+	'''
+	assert torch.all(nu >= 0)
+	nu = nu.reshape(1,-1)
+	z = z.reshape(-1,1)
+
+	ub = torch.zeros(z.shape[0], nu.shape[1])
+	ub[:, nu.reshape(-1) >= 0.5] = torch.min(B(0, nu[ nu >= 0.5 ], z), 
+											B_tilde(2, nu[ nu >= 0.5 ], z))
+	ub[:, nu.reshape(-1) < 0.5] = B_tilde(2, nu[ nu < 0.5 ], z) 
+	return ub
+
+def Ak_approx(nu, z):
+	'''
+	Approximation of ratio of modified Bessel functions of 1st kind.
+	https://arxiv.org/pdf/1902.02603.pdf
+	Parameters
+	----------
+	nu: tensor, shape (N0,)
+		Order of modified Bessel functions of 1st kind.
+	z: tensor, shape (N1,) 
+		Argument of Bessel function. Positive values only.
+	
+	Return
+	------
+	tensor, shape (N1, N0)
+	'''
+	return 0.5*(lb_Ak(nu, z) + ub_Ak(nu, z)) 
+
+
+def logbessel_I_approx(nu, z):
+	'''
+	Approximation of the logarithm of the modified Bessel function of 
+	1st kind I(nu, z) using their ratio.
+	https://arxiv.org/pdf/1902.02603.pdf
+	Parameters
+	----------
+	nu: positive int, float
+		Order of modified Bessel function of 1st kind.
+	z: tensor, shape (N,) 
+		Argument of Bessel function. Positive values only.
+	Return
+	------
+	approx: tensor, shape (N,)
+	'''
+	assert nu >= 0
+	approx = logbessel_I_scipy(nu-int(nu), z, check= True) 
+	nu_v = nu - torch.arange(0, int(nu))
+	A = Ak_approx(nu_v, z) 
+	approx += torch.log(A).sum(axis= 1)
+	return approx
+
+
+def logbessel_I(nu, z, fast = False, check = True):
+	'''
+	Pytorch implementation of the logarithm of the modified Bessel 
+	function of the 1st kind I(nu, z).
+	Based mainly on scipy and extends the definition domain compared to 
+	scipy methods (which can easily provide infinite values). The 
+	extension is done using numerical approximations involving the ratio
+	of Bessel functions:   
+	-	https://arxiv.org/pdf/1606.02008.pdf (Theorems 5,6)
+	-	https://arxiv.org/pdf/1902.02603.pdf (Appendix)
+	Parameters
+	----------
+	nu: positive int, float
+		Order of modified Bessel function of 1st kind.
+	z: int/float or tensor, shape (N,) 
+		Argument of Bessel function.
+	fast: bool
+		If True, use asymptotic behavior as approximation when main 
+		scipy method is not tractable. If False, use tight bounds for 
+		the ratio of Bessel functions:
+		https://arxiv.org/pdf/1902.02603.pdf
+	check: bool
+		If True, check if argument of log is non zero and not NaN.
+	
+	Return
+	------
+	result: tensor, shape (N,)
+	'''
+	if not isinstance(z, torch.Tensor):
+		z = torch.tensor(z)
+	z = z.reshape(-1)
+
+	result = special.ive(nu, z)
+	# Indices for which scipy.special.ive is wrong
+	bad_idx = torch.arange(result.shape[0])[ result == 0]
+	result = torch.log(result) + z
+	if fast:
+		result[ bad_idx ] = logbessel_I_asymptotic(nu, z[ bad_idx ])
+	else: 	 
+		result[ bad_idx ] = logbessel_I_approx(nu, z[ bad_idx ])
+
+	if check:
+		# If problem with assertion, use a better defined init in 
+		# logbessel_approx
+		assert len(result[ torch.isnan(result) ]) == 0, ('Bessel functions take ' +
+												'NaN value for z = {}'.format(
+		 										z[ torch.isnan(result) ]))
+		assert len(result[ torch.isinf(result) ]) == 0, ('Bessel functions take ' +
+		 										'inf value for z = {}'.format(
+		 										z[ torch.isinf(result) ]))
+	return result
+
+
+
+if __name__ == "__main__":
+
+	import time
+	import matplotlib.pyplot as plt
+
+	# ------- Single test ------- #
+	print(logbessel_I(nu= 10000, z= 10000))
+
+
+	# ------- Vectorized tests ------- #
+	nu = 1000
+	z = torch.arange(1, 200001)
+
+	# Scipy
+	print('------\nScipy adaptation:')
+	start = time.time()
+	scipy_adapt = logbessel_I_scipy(nu, z, check= False)
+	end = time.time()
+	print('Computation time: ', "%.4f"%(end-start), 's')
+
+	# Asymptotic
+	print('------\nAsymptotic computation:')
+	start = time.time()
+	asympt = logbessel_I_asymptotic(nu, z)
+	end = time.time()
+	print('Computation time: ', "%.4f"%(end-start), 's')
+
+	# Approximation using ratios of Bessel functions
+	print('------\nApproximation via ratios of Bessel functions:')
+	start = time.time()
+	approx = logbessel_I_approx(nu, z)
+	end = time.time()
+	print('Computation time: ', "%.4f"%(end-start), 's')
+
+	# Fast extension of scipy
+	print('------\nFast method (proposed):')
+	start = time.time()
+	logbessels_fast = logbessel_I(nu, z, fast= True, check= False)
+	end = time.time()
+	print('Computation time: ', "%.4f"%(end-start), 's')
+
+	# Precise extension of scipy
+	print('------\nPrecise method (proposed):')
+	start = time.time()
+	logbessels = logbessel_I(nu, z, fast= False, check= False)
+	end = time.time()
+	print('Computation time: ', "%.4f"%(end-start), 's')
+
+
+	# ------- Plots ------- #
+	linewidth = 4
+
+	# Plot different methods
+	plt.plot(z, scipy_adapt, '-', label='scipy', linewidth = linewidth)
+	plt.plot(z, asympt, '--', label='asymptotic', linewidth = linewidth)
+	plt.plot(z, logbessels, '--', label='ours', linewidth = linewidth)
+	plt.xlabel(r'$z$')	
+	plt.title(r'$\log[I(\nu = {}, z)]$'.format(nu))
+	plt.legend()
+
+	# Plot relative error of approximation
+	plt.figure()
+	plt.plot(z, torch.abs((approx - scipy_adapt)/scipy_adapt), '-', label=None, linewidth = linewidth)
+	plt.xlabel(r'$z$')
+	plt.title(r'Relative error of approximation for $\log[I(\nu = {}, z)]$'.format(nu))
+
+	plt.show()
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.c b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.c
new file mode 100644
index 0000000000000000000000000000000000000000..e626057643ea66b5248e6e6b10d38d69ba553a45
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.c
@@ -0,0 +1,13565 @@
+/* Generated by Cython 3.0.11 */
+
+/* BEGIN: Cython Metadata
+{
+    "distutils": {
+        "depends": [],
+        "name": "opencood.utils.box_overlaps",
+        "sources": [
+            "opencood/utils/box_overlaps.pyx"
+        ]
+    },
+    "module_name": "opencood.utils.box_overlaps"
+}
+END: Cython Metadata */
+
+#ifndef PY_SSIZE_T_CLEAN
+#define PY_SSIZE_T_CLEAN
+#endif /* PY_SSIZE_T_CLEAN */
+#if defined(CYTHON_LIMITED_API) && 0
+  #ifndef Py_LIMITED_API
+    #if CYTHON_LIMITED_API+0 > 0x03030000
+      #define Py_LIMITED_API CYTHON_LIMITED_API
+    #else
+      #define Py_LIMITED_API 0x03030000
+    #endif
+  #endif
+#endif
+
+#include "Python.h"
+#ifndef Py_PYTHON_H
+    #error Python headers needed to compile C extensions, please install development version of Python.
+#elif PY_VERSION_HEX < 0x02070000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
+    #error Cython requires Python 2.7+ or Python 3.3+.
+#else
+#if defined(CYTHON_LIMITED_API) && CYTHON_LIMITED_API
+#define __PYX_EXTRA_ABI_MODULE_NAME "limited"
+#else
+#define __PYX_EXTRA_ABI_MODULE_NAME ""
+#endif
+#define CYTHON_ABI "3_0_11" __PYX_EXTRA_ABI_MODULE_NAME
+#define __PYX_ABI_MODULE_NAME "_cython_" CYTHON_ABI
+#define __PYX_TYPE_MODULE_PREFIX __PYX_ABI_MODULE_NAME "."
+#define CYTHON_HEX_VERSION 0x03000BF0
+#define CYTHON_FUTURE_DIVISION 1
+#include <stddef.h>
+#ifndef offsetof
+  #define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
+#endif
+#if !defined(_WIN32) && !defined(WIN32) && !defined(MS_WINDOWS)
+  #ifndef __stdcall
+    #define __stdcall
+  #endif
+  #ifndef __cdecl
+    #define __cdecl
+  #endif
+  #ifndef __fastcall
+    #define __fastcall
+  #endif
+#endif
+#ifndef DL_IMPORT
+  #define DL_IMPORT(t) t
+#endif
+#ifndef DL_EXPORT
+  #define DL_EXPORT(t) t
+#endif
+#define __PYX_COMMA ,
+#ifndef HAVE_LONG_LONG
+  #define HAVE_LONG_LONG
+#endif
+#ifndef PY_LONG_LONG
+  #define PY_LONG_LONG LONG_LONG
+#endif
+#ifndef Py_HUGE_VAL
+  #define Py_HUGE_VAL HUGE_VAL
+#endif
+#define __PYX_LIMITED_VERSION_HEX PY_VERSION_HEX
+#if defined(GRAALVM_PYTHON)
+  /* For very preliminary testing purposes. Most variables are set the same as PyPy.
+     The existence of this section does not imply that anything works or is even tested */
+  #define CYTHON_COMPILING_IN_PYPY 0
+  #define CYTHON_COMPILING_IN_CPYTHON 0
+  #define CYTHON_COMPILING_IN_LIMITED_API 0
+  #define CYTHON_COMPILING_IN_GRAAL 1
+  #define CYTHON_COMPILING_IN_NOGIL 0
+  #undef CYTHON_USE_TYPE_SLOTS
+  #define CYTHON_USE_TYPE_SLOTS 0
+  #undef CYTHON_USE_TYPE_SPECS
+  #define CYTHON_USE_TYPE_SPECS 0
+  #undef CYTHON_USE_PYTYPE_LOOKUP
+  #define CYTHON_USE_PYTYPE_LOOKUP 0
+  #if PY_VERSION_HEX < 0x03050000
+    #undef CYTHON_USE_ASYNC_SLOTS
+    #define CYTHON_USE_ASYNC_SLOTS 0
+  #elif !defined(CYTHON_USE_ASYNC_SLOTS)
+    #define CYTHON_USE_ASYNC_SLOTS 1
+  #endif
+  #undef CYTHON_USE_PYLIST_INTERNALS
+  #define CYTHON_USE_PYLIST_INTERNALS 0
+  #undef CYTHON_USE_UNICODE_INTERNALS
+  #define CYTHON_USE_UNICODE_INTERNALS 0
+  #undef CYTHON_USE_UNICODE_WRITER
+  #define CYTHON_USE_UNICODE_WRITER 0
+  #undef CYTHON_USE_PYLONG_INTERNALS
+  #define CYTHON_USE_PYLONG_INTERNALS 0
+  #undef CYTHON_AVOID_BORROWED_REFS
+  #define CYTHON_AVOID_BORROWED_REFS 1
+  #undef CYTHON_ASSUME_SAFE_MACROS
+  #define CYTHON_ASSUME_SAFE_MACROS 0
+  #undef CYTHON_UNPACK_METHODS
+  #define CYTHON_UNPACK_METHODS 0
+  #undef CYTHON_FAST_THREAD_STATE
+  #define CYTHON_FAST_THREAD_STATE 0
+  #undef CYTHON_FAST_GIL
+  #define CYTHON_FAST_GIL 0
+  #undef CYTHON_METH_FASTCALL
+  #define CYTHON_METH_FASTCALL 0
+  #undef CYTHON_FAST_PYCALL
+  #define CYTHON_FAST_PYCALL 0
+  #ifndef CYTHON_PEP487_INIT_SUBCLASS
+    #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3)
+  #endif
+  #undef CYTHON_PEP489_MULTI_PHASE_INIT
+  #define CYTHON_PEP489_MULTI_PHASE_INIT 1
+  #undef CYTHON_USE_MODULE_STATE
+  #define CYTHON_USE_MODULE_STATE 0
+  #undef CYTHON_USE_TP_FINALIZE
+  #define CYTHON_USE_TP_FINALIZE 0
+  #undef CYTHON_USE_DICT_VERSIONS
+  #define CYTHON_USE_DICT_VERSIONS 0
+  #undef CYTHON_USE_EXC_INFO_STACK
+  #define CYTHON_USE_EXC_INFO_STACK 0
+  #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+    #define CYTHON_UPDATE_DESCRIPTOR_DOC 0
+  #endif
+  #undef CYTHON_USE_FREELISTS
+  #define CYTHON_USE_FREELISTS 0
+#elif defined(PYPY_VERSION)
+  #define CYTHON_COMPILING_IN_PYPY 1
+  #define CYTHON_COMPILING_IN_CPYTHON 0
+  #define CYTHON_COMPILING_IN_LIMITED_API 0
+  #define CYTHON_COMPILING_IN_GRAAL 0
+  #define CYTHON_COMPILING_IN_NOGIL 0
+  #undef CYTHON_USE_TYPE_SLOTS
+  #define CYTHON_USE_TYPE_SLOTS 0
+  #ifndef CYTHON_USE_TYPE_SPECS
+    #define CYTHON_USE_TYPE_SPECS 0
+  #endif
+  #undef CYTHON_USE_PYTYPE_LOOKUP
+  #define CYTHON_USE_PYTYPE_LOOKUP 0
+  #if PY_VERSION_HEX < 0x03050000
+    #undef CYTHON_USE_ASYNC_SLOTS
+    #define CYTHON_USE_ASYNC_SLOTS 0
+  #elif !defined(CYTHON_USE_ASYNC_SLOTS)
+    #define CYTHON_USE_ASYNC_SLOTS 1
+  #endif
+  #undef CYTHON_USE_PYLIST_INTERNALS
+  #define CYTHON_USE_PYLIST_INTERNALS 0
+  #undef CYTHON_USE_UNICODE_INTERNALS
+  #define CYTHON_USE_UNICODE_INTERNALS 0
+  #undef CYTHON_USE_UNICODE_WRITER
+  #define CYTHON_USE_UNICODE_WRITER 0
+  #undef CYTHON_USE_PYLONG_INTERNALS
+  #define CYTHON_USE_PYLONG_INTERNALS 0
+  #undef CYTHON_AVOID_BORROWED_REFS
+  #define CYTHON_AVOID_BORROWED_REFS 1
+  #undef CYTHON_ASSUME_SAFE_MACROS
+  #define CYTHON_ASSUME_SAFE_MACROS 0
+  #undef CYTHON_UNPACK_METHODS
+  #define CYTHON_UNPACK_METHODS 0
+  #undef CYTHON_FAST_THREAD_STATE
+  #define CYTHON_FAST_THREAD_STATE 0
+  #undef CYTHON_FAST_GIL
+  #define CYTHON_FAST_GIL 0
+  #undef CYTHON_METH_FASTCALL
+  #define CYTHON_METH_FASTCALL 0
+  #undef CYTHON_FAST_PYCALL
+  #define CYTHON_FAST_PYCALL 0
+  #ifndef CYTHON_PEP487_INIT_SUBCLASS
+    #define CYTHON_PEP487_INIT_SUBCLASS (PY_MAJOR_VERSION >= 3)
+  #endif
+  #if PY_VERSION_HEX < 0x03090000
+    #undef CYTHON_PEP489_MULTI_PHASE_INIT
+    #define CYTHON_PEP489_MULTI_PHASE_INIT 0
+  #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT)
+    #define CYTHON_PEP489_MULTI_PHASE_INIT 1
+  #endif
+  #undef CYTHON_USE_MODULE_STATE
+  #define CYTHON_USE_MODULE_STATE 0
+  #undef CYTHON_USE_TP_FINALIZE
+  #define CYTHON_USE_TP_FINALIZE (PY_VERSION_HEX >= 0x030400a1 && PYPY_VERSION_NUM >= 0x07030C00)
+  #undef CYTHON_USE_DICT_VERSIONS
+  #define CYTHON_USE_DICT_VERSIONS 0
+  #undef CYTHON_USE_EXC_INFO_STACK
+  #define CYTHON_USE_EXC_INFO_STACK 0
+  #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+    #define CYTHON_UPDATE_DESCRIPTOR_DOC 0
+  #endif
+  #undef CYTHON_USE_FREELISTS
+  #define CYTHON_USE_FREELISTS 0
+#elif defined(CYTHON_LIMITED_API)
+  #ifdef Py_LIMITED_API
+    #undef __PYX_LIMITED_VERSION_HEX
+    #define __PYX_LIMITED_VERSION_HEX Py_LIMITED_API
+  #endif
+  #define CYTHON_COMPILING_IN_PYPY 0
+  #define CYTHON_COMPILING_IN_CPYTHON 0
+  #define CYTHON_COMPILING_IN_LIMITED_API 1
+  #define CYTHON_COMPILING_IN_GRAAL 0
+  #define CYTHON_COMPILING_IN_NOGIL 0
+  #undef CYTHON_CLINE_IN_TRACEBACK
+  #define CYTHON_CLINE_IN_TRACEBACK 0
+  #undef CYTHON_USE_TYPE_SLOTS
+  #define CYTHON_USE_TYPE_SLOTS 0
+  #undef CYTHON_USE_TYPE_SPECS
+  #define CYTHON_USE_TYPE_SPECS 1
+  #undef CYTHON_USE_PYTYPE_LOOKUP
+  #define CYTHON_USE_PYTYPE_LOOKUP 0
+  #undef CYTHON_USE_ASYNC_SLOTS
+  #define CYTHON_USE_ASYNC_SLOTS 0
+  #undef CYTHON_USE_PYLIST_INTERNALS
+  #define CYTHON_USE_PYLIST_INTERNALS 0
+  #undef CYTHON_USE_UNICODE_INTERNALS
+  #define CYTHON_USE_UNICODE_INTERNALS 0
+  #ifndef CYTHON_USE_UNICODE_WRITER
+    #define CYTHON_USE_UNICODE_WRITER 0
+  #endif
+  #undef CYTHON_USE_PYLONG_INTERNALS
+  #define CYTHON_USE_PYLONG_INTERNALS 0
+  #ifndef CYTHON_AVOID_BORROWED_REFS
+    #define CYTHON_AVOID_BORROWED_REFS 0
+  #endif
+  #undef CYTHON_ASSUME_SAFE_MACROS
+  #define CYTHON_ASSUME_SAFE_MACROS 0
+  #undef CYTHON_UNPACK_METHODS
+  #define CYTHON_UNPACK_METHODS 0
+  #undef CYTHON_FAST_THREAD_STATE
+  #define CYTHON_FAST_THREAD_STATE 0
+  #undef CYTHON_FAST_GIL
+  #define CYTHON_FAST_GIL 0
+  #undef CYTHON_METH_FASTCALL
+  #define CYTHON_METH_FASTCALL 0
+  #undef CYTHON_FAST_PYCALL
+  #define CYTHON_FAST_PYCALL 0
+  #ifndef CYTHON_PEP487_INIT_SUBCLASS
+    #define CYTHON_PEP487_INIT_SUBCLASS 1
+  #endif
+  #undef CYTHON_PEP489_MULTI_PHASE_INIT
+  #define CYTHON_PEP489_MULTI_PHASE_INIT 0
+  #undef CYTHON_USE_MODULE_STATE
+  #define CYTHON_USE_MODULE_STATE 1
+  #ifndef CYTHON_USE_TP_FINALIZE
+    #define CYTHON_USE_TP_FINALIZE 0
+  #endif
+  #undef CYTHON_USE_DICT_VERSIONS
+  #define CYTHON_USE_DICT_VERSIONS 0
+  #undef CYTHON_USE_EXC_INFO_STACK
+  #define CYTHON_USE_EXC_INFO_STACK 0
+  #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+    #define CYTHON_UPDATE_DESCRIPTOR_DOC 0
+  #endif
+  #undef CYTHON_USE_FREELISTS
+  #define CYTHON_USE_FREELISTS 0
+#elif defined(Py_GIL_DISABLED) || defined(Py_NOGIL)
+  #define CYTHON_COMPILING_IN_PYPY 0
+  #define CYTHON_COMPILING_IN_CPYTHON 0
+  #define CYTHON_COMPILING_IN_LIMITED_API 0
+  #define CYTHON_COMPILING_IN_GRAAL 0
+  #define CYTHON_COMPILING_IN_NOGIL 1
+  #ifndef CYTHON_USE_TYPE_SLOTS
+    #define CYTHON_USE_TYPE_SLOTS 1
+  #endif
+  #ifndef CYTHON_USE_TYPE_SPECS
+    #define CYTHON_USE_TYPE_SPECS 0
+  #endif
+  #undef CYTHON_USE_PYTYPE_LOOKUP
+  #define CYTHON_USE_PYTYPE_LOOKUP 0
+  #ifndef CYTHON_USE_ASYNC_SLOTS
+    #define CYTHON_USE_ASYNC_SLOTS 1
+  #endif
+  #ifndef CYTHON_USE_PYLONG_INTERNALS
+    #define CYTHON_USE_PYLONG_INTERNALS 0
+  #endif
+  #undef CYTHON_USE_PYLIST_INTERNALS
+  #define CYTHON_USE_PYLIST_INTERNALS 0
+  #ifndef CYTHON_USE_UNICODE_INTERNALS
+    #define CYTHON_USE_UNICODE_INTERNALS 1
+  #endif
+  #undef CYTHON_USE_UNICODE_WRITER
+  #define CYTHON_USE_UNICODE_WRITER 0
+  #ifndef CYTHON_AVOID_BORROWED_REFS
+    #define CYTHON_AVOID_BORROWED_REFS 0
+  #endif
+  #ifndef CYTHON_ASSUME_SAFE_MACROS
+    #define CYTHON_ASSUME_SAFE_MACROS 1
+  #endif
+  #ifndef CYTHON_UNPACK_METHODS
+    #define CYTHON_UNPACK_METHODS 1
+  #endif
+  #undef CYTHON_FAST_THREAD_STATE
+  #define CYTHON_FAST_THREAD_STATE 0
+  #undef CYTHON_FAST_GIL
+  #define CYTHON_FAST_GIL 0
+  #ifndef CYTHON_METH_FASTCALL
+    #define CYTHON_METH_FASTCALL 1
+  #endif
+  #undef CYTHON_FAST_PYCALL
+  #define CYTHON_FAST_PYCALL 0
+  #ifndef CYTHON_PEP487_INIT_SUBCLASS
+    #define CYTHON_PEP487_INIT_SUBCLASS 1
+  #endif
+  #ifndef CYTHON_PEP489_MULTI_PHASE_INIT
+    #define CYTHON_PEP489_MULTI_PHASE_INIT 1
+  #endif
+  #ifndef CYTHON_USE_MODULE_STATE
+    #define CYTHON_USE_MODULE_STATE 0
+  #endif
+  #ifndef CYTHON_USE_TP_FINALIZE
+    #define CYTHON_USE_TP_FINALIZE 1
+  #endif
+  #undef CYTHON_USE_DICT_VERSIONS
+  #define CYTHON_USE_DICT_VERSIONS 0
+  #undef CYTHON_USE_EXC_INFO_STACK
+  #define CYTHON_USE_EXC_INFO_STACK 0
+  #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+    #define CYTHON_UPDATE_DESCRIPTOR_DOC 1
+  #endif
+  #ifndef CYTHON_USE_FREELISTS
+    #define CYTHON_USE_FREELISTS 0
+  #endif
+#else
+  #define CYTHON_COMPILING_IN_PYPY 0
+  #define CYTHON_COMPILING_IN_CPYTHON 1
+  #define CYTHON_COMPILING_IN_LIMITED_API 0
+  #define CYTHON_COMPILING_IN_GRAAL 0
+  #define CYTHON_COMPILING_IN_NOGIL 0
+  #ifndef CYTHON_USE_TYPE_SLOTS
+    #define CYTHON_USE_TYPE_SLOTS 1
+  #endif
+  #ifndef CYTHON_USE_TYPE_SPECS
+    #define CYTHON_USE_TYPE_SPECS 0
+  #endif
+  #ifndef CYTHON_USE_PYTYPE_LOOKUP
+    #define CYTHON_USE_PYTYPE_LOOKUP 1
+  #endif
+  #if PY_MAJOR_VERSION < 3
+    #undef CYTHON_USE_ASYNC_SLOTS
+    #define CYTHON_USE_ASYNC_SLOTS 0
+  #elif !defined(CYTHON_USE_ASYNC_SLOTS)
+    #define CYTHON_USE_ASYNC_SLOTS 1
+  #endif
+  #ifndef CYTHON_USE_PYLONG_INTERNALS
+    #define CYTHON_USE_PYLONG_INTERNALS 1
+  #endif
+  #ifndef CYTHON_USE_PYLIST_INTERNALS
+    #define CYTHON_USE_PYLIST_INTERNALS 1
+  #endif
+  #ifndef CYTHON_USE_UNICODE_INTERNALS
+    #define CYTHON_USE_UNICODE_INTERNALS 1
+  #endif
+  #if PY_VERSION_HEX < 0x030300F0 || PY_VERSION_HEX >= 0x030B00A2
+    #undef CYTHON_USE_UNICODE_WRITER
+    #define CYTHON_USE_UNICODE_WRITER 0
+  #elif !defined(CYTHON_USE_UNICODE_WRITER)
+    #define CYTHON_USE_UNICODE_WRITER 1
+  #endif
+  #ifndef CYTHON_AVOID_BORROWED_REFS
+    #define CYTHON_AVOID_BORROWED_REFS 0
+  #endif
+  #ifndef CYTHON_ASSUME_SAFE_MACROS
+    #define CYTHON_ASSUME_SAFE_MACROS 1
+  #endif
+  #ifndef CYTHON_UNPACK_METHODS
+    #define CYTHON_UNPACK_METHODS 1
+  #endif
+  #ifndef CYTHON_FAST_THREAD_STATE
+    #define CYTHON_FAST_THREAD_STATE 1
+  #endif
+  #ifndef CYTHON_FAST_GIL
+    #define CYTHON_FAST_GIL (PY_MAJOR_VERSION < 3 || PY_VERSION_HEX >= 0x03060000 && PY_VERSION_HEX < 0x030C00A6)
+  #endif
+  #ifndef CYTHON_METH_FASTCALL
+    #define CYTHON_METH_FASTCALL (PY_VERSION_HEX >= 0x030700A1)
+  #endif
+  #ifndef CYTHON_FAST_PYCALL
+    #define CYTHON_FAST_PYCALL 1
+  #endif
+  #ifndef CYTHON_PEP487_INIT_SUBCLASS
+    #define CYTHON_PEP487_INIT_SUBCLASS 1
+  #endif
+  #if PY_VERSION_HEX < 0x03050000
+    #undef CYTHON_PEP489_MULTI_PHASE_INIT
+    #define CYTHON_PEP489_MULTI_PHASE_INIT 0
+  #elif !defined(CYTHON_PEP489_MULTI_PHASE_INIT)
+    #define CYTHON_PEP489_MULTI_PHASE_INIT 1
+  #endif
+  #ifndef CYTHON_USE_MODULE_STATE
+    #define CYTHON_USE_MODULE_STATE 0
+  #endif
+  #if PY_VERSION_HEX < 0x030400a1
+    #undef CYTHON_USE_TP_FINALIZE
+    #define CYTHON_USE_TP_FINALIZE 0
+  #elif !defined(CYTHON_USE_TP_FINALIZE)
+    #define CYTHON_USE_TP_FINALIZE 1
+  #endif
+  #if PY_VERSION_HEX < 0x030600B1
+    #undef CYTHON_USE_DICT_VERSIONS
+    #define CYTHON_USE_DICT_VERSIONS 0
+  #elif !defined(CYTHON_USE_DICT_VERSIONS)
+    #define CYTHON_USE_DICT_VERSIONS  (PY_VERSION_HEX < 0x030C00A5)
+  #endif
+  #if PY_VERSION_HEX < 0x030700A3
+    #undef CYTHON_USE_EXC_INFO_STACK
+    #define CYTHON_USE_EXC_INFO_STACK 0
+  #elif !defined(CYTHON_USE_EXC_INFO_STACK)
+    #define CYTHON_USE_EXC_INFO_STACK 1
+  #endif
+  #ifndef CYTHON_UPDATE_DESCRIPTOR_DOC
+    #define CYTHON_UPDATE_DESCRIPTOR_DOC 1
+  #endif
+  #ifndef CYTHON_USE_FREELISTS
+    #define CYTHON_USE_FREELISTS 1
+  #endif
+#endif
+#if !defined(CYTHON_FAST_PYCCALL)
+#define CYTHON_FAST_PYCCALL  (CYTHON_FAST_PYCALL && PY_VERSION_HEX >= 0x030600B1)
+#endif
+#if !defined(CYTHON_VECTORCALL)
+#define CYTHON_VECTORCALL  (CYTHON_FAST_PYCCALL && PY_VERSION_HEX >= 0x030800B1)
+#endif
+#define CYTHON_BACKPORT_VECTORCALL (CYTHON_METH_FASTCALL && PY_VERSION_HEX < 0x030800B1)
+#if CYTHON_USE_PYLONG_INTERNALS
+  #if PY_MAJOR_VERSION < 3
+    #include "longintrepr.h"
+  #endif
+  #undef SHIFT
+  #undef BASE
+  #undef MASK
+  #ifdef SIZEOF_VOID_P
+    enum { __pyx_check_sizeof_voidp = 1 / (int)(SIZEOF_VOID_P == sizeof(void*)) };
+  #endif
+#endif
+#ifndef __has_attribute
+  #define __has_attribute(x) 0
+#endif
+#ifndef __has_cpp_attribute
+  #define __has_cpp_attribute(x) 0
+#endif
+#ifndef CYTHON_RESTRICT
+  #if defined(__GNUC__)
+    #define CYTHON_RESTRICT __restrict__
+  #elif defined(_MSC_VER) && _MSC_VER >= 1400
+    #define CYTHON_RESTRICT __restrict
+  #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    #define CYTHON_RESTRICT restrict
+  #else
+    #define CYTHON_RESTRICT
+  #endif
+#endif
+#ifndef CYTHON_UNUSED
+  #if defined(__cplusplus)
+    /* for clang __has_cpp_attribute(maybe_unused) is true even before C++17
+     * but leads to warnings with -pedantic, since it is a C++17 feature */
+    #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
+      #if __has_cpp_attribute(maybe_unused)
+        #define CYTHON_UNUSED [[maybe_unused]]
+      #endif
+    #endif
+  #endif
+#endif
+#ifndef CYTHON_UNUSED
+# if defined(__GNUC__)
+#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#     define CYTHON_UNUSED __attribute__ ((__unused__))
+#   else
+#     define CYTHON_UNUSED
+#   endif
+# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
+#   define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+#   define CYTHON_UNUSED
+# endif
+#endif
+#ifndef CYTHON_UNUSED_VAR
+#  if defined(__cplusplus)
+     template<class T> void CYTHON_UNUSED_VAR( const T& ) { }
+#  else
+#    define CYTHON_UNUSED_VAR(x) (void)(x)
+#  endif
+#endif
+#ifndef CYTHON_MAYBE_UNUSED_VAR
+  #define CYTHON_MAYBE_UNUSED_VAR(x) CYTHON_UNUSED_VAR(x)
+#endif
+#ifndef CYTHON_NCP_UNUSED
+# if CYTHON_COMPILING_IN_CPYTHON
+#  define CYTHON_NCP_UNUSED
+# else
+#  define CYTHON_NCP_UNUSED CYTHON_UNUSED
+# endif
+#endif
+#ifndef CYTHON_USE_CPP_STD_MOVE
+  #if defined(__cplusplus) && (\
+    __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1600))
+    #define CYTHON_USE_CPP_STD_MOVE 1
+  #else
+    #define CYTHON_USE_CPP_STD_MOVE 0
+  #endif
+#endif
+#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None)
+#ifdef _MSC_VER
+    #ifndef _MSC_STDINT_H_
+        #if _MSC_VER < 1300
+            typedef unsigned char     uint8_t;
+            typedef unsigned short    uint16_t;
+            typedef unsigned int      uint32_t;
+        #else
+            typedef unsigned __int8   uint8_t;
+            typedef unsigned __int16  uint16_t;
+            typedef unsigned __int32  uint32_t;
+        #endif
+    #endif
+    #if _MSC_VER < 1300
+        #ifdef _WIN64
+            typedef unsigned long long  __pyx_uintptr_t;
+        #else
+            typedef unsigned int        __pyx_uintptr_t;
+        #endif
+    #else
+        #ifdef _WIN64
+            typedef unsigned __int64    __pyx_uintptr_t;
+        #else
+            typedef unsigned __int32    __pyx_uintptr_t;
+        #endif
+    #endif
+#else
+    #include <stdint.h>
+    typedef uintptr_t  __pyx_uintptr_t;
+#endif
+#ifndef CYTHON_FALLTHROUGH
+  #if defined(__cplusplus)
+    /* for clang __has_cpp_attribute(fallthrough) is true even before C++17
+     * but leads to warnings with -pedantic, since it is a C++17 feature */
+    #if ((defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) || __cplusplus >= 201703L)
+      #if __has_cpp_attribute(fallthrough)
+        #define CYTHON_FALLTHROUGH [[fallthrough]]
+      #endif
+    #endif
+    #ifndef CYTHON_FALLTHROUGH
+      #if __has_cpp_attribute(clang::fallthrough)
+        #define CYTHON_FALLTHROUGH [[clang::fallthrough]]
+      #elif __has_cpp_attribute(gnu::fallthrough)
+        #define CYTHON_FALLTHROUGH [[gnu::fallthrough]]
+      #endif
+    #endif
+  #endif
+  #ifndef CYTHON_FALLTHROUGH
+    #if __has_attribute(fallthrough)
+      #define CYTHON_FALLTHROUGH __attribute__((fallthrough))
+    #else
+      #define CYTHON_FALLTHROUGH
+    #endif
+  #endif
+  #if defined(__clang__) && defined(__apple_build_version__)
+    #if __apple_build_version__ < 7000000
+      #undef  CYTHON_FALLTHROUGH
+      #define CYTHON_FALLTHROUGH
+    #endif
+  #endif
+#endif
+#ifdef __cplusplus
+  template <typename T>
+  struct __PYX_IS_UNSIGNED_IMPL {static const bool value = T(0) < T(-1);};
+  #define __PYX_IS_UNSIGNED(type) (__PYX_IS_UNSIGNED_IMPL<type>::value)
+#else
+  #define __PYX_IS_UNSIGNED(type) (((type)-1) > 0)
+#endif
+#if CYTHON_COMPILING_IN_PYPY == 1
+  #define __PYX_NEED_TP_PRINT_SLOT  (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x030A0000)
+#else
+  #define __PYX_NEED_TP_PRINT_SLOT  (PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000)
+#endif
+#define __PYX_REINTERPRET_FUNCION(func_pointer, other_pointer) ((func_pointer)(void(*)(void))(other_pointer))
+
+#ifndef CYTHON_INLINE
+  #if defined(__clang__)
+    #define CYTHON_INLINE __inline__ __attribute__ ((__unused__))
+  #elif defined(__GNUC__)
+    #define CYTHON_INLINE __inline__
+  #elif defined(_MSC_VER)
+    #define CYTHON_INLINE __inline
+  #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    #define CYTHON_INLINE inline
+  #else
+    #define CYTHON_INLINE
+  #endif
+#endif
+
+#define __PYX_BUILD_PY_SSIZE_T "n"
+#define CYTHON_FORMAT_SSIZE_T "z"
+#if PY_MAJOR_VERSION < 3
+  #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
+  #define __Pyx_DefaultClassType PyClass_Type
+  #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+          PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#else
+  #define __Pyx_BUILTIN_MODULE_NAME "builtins"
+  #define __Pyx_DefaultClassType PyType_Type
+#if CYTHON_COMPILING_IN_LIMITED_API
+    static CYTHON_INLINE PyObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f,
+                                                    PyObject *code, PyObject *c, PyObject* n, PyObject *v,
+                                                    PyObject *fv, PyObject *cell, PyObject* fn,
+                                                    PyObject *name, int fline, PyObject *lnos) {
+        PyObject *exception_table = NULL;
+        PyObject *types_module=NULL, *code_type=NULL, *result=NULL;
+        #if __PYX_LIMITED_VERSION_HEX < 0x030B0000
+        PyObject *version_info;
+        PyObject *py_minor_version = NULL;
+        #endif
+        long minor_version = 0;
+        PyObject *type, *value, *traceback;
+        PyErr_Fetch(&type, &value, &traceback);
+        #if __PYX_LIMITED_VERSION_HEX >= 0x030B0000
+        minor_version = 11;
+        #else
+        if (!(version_info = PySys_GetObject("version_info"))) goto end;
+        if (!(py_minor_version = PySequence_GetItem(version_info, 1))) goto end;
+        minor_version = PyLong_AsLong(py_minor_version);
+        Py_DECREF(py_minor_version);
+        if (minor_version == -1 && PyErr_Occurred()) goto end;
+        #endif
+        if (!(types_module = PyImport_ImportModule("types"))) goto end;
+        if (!(code_type = PyObject_GetAttrString(types_module, "CodeType"))) goto end;
+        if (minor_version <= 7) {
+            (void)p;
+            result = PyObject_CallFunction(code_type, "iiiiiOOOOOOiOO", a, k, l, s, f, code,
+                          c, n, v, fn, name, fline, lnos, fv, cell);
+        } else if (minor_version <= 10) {
+            result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOiOO", a,p, k, l, s, f, code,
+                          c, n, v, fn, name, fline, lnos, fv, cell);
+        } else {
+            if (!(exception_table = PyBytes_FromStringAndSize(NULL, 0))) goto end;
+            result = PyObject_CallFunction(code_type, "iiiiiiOOOOOOOiOO", a,p, k, l, s, f, code,
+                          c, n, v, fn, name, name, fline, lnos, exception_table, fv, cell);
+        }
+    end:
+        Py_XDECREF(code_type);
+        Py_XDECREF(exception_table);
+        Py_XDECREF(types_module);
+        if (type) {
+            PyErr_Restore(type, value, traceback);
+        }
+        return result;
+    }
+    #ifndef CO_OPTIMIZED
+    #define CO_OPTIMIZED 0x0001
+    #endif
+    #ifndef CO_NEWLOCALS
+    #define CO_NEWLOCALS 0x0002
+    #endif
+    #ifndef CO_VARARGS
+    #define CO_VARARGS 0x0004
+    #endif
+    #ifndef CO_VARKEYWORDS
+    #define CO_VARKEYWORDS 0x0008
+    #endif
+    #ifndef CO_ASYNC_GENERATOR
+    #define CO_ASYNC_GENERATOR 0x0200
+    #endif
+    #ifndef CO_GENERATOR
+    #define CO_GENERATOR 0x0020
+    #endif
+    #ifndef CO_COROUTINE
+    #define CO_COROUTINE 0x0080
+    #endif
+#elif PY_VERSION_HEX >= 0x030B0000
+  static CYTHON_INLINE PyCodeObject* __Pyx_PyCode_New(int a, int p, int k, int l, int s, int f,
+                                                    PyObject *code, PyObject *c, PyObject* n, PyObject *v,
+                                                    PyObject *fv, PyObject *cell, PyObject* fn,
+                                                    PyObject *name, int fline, PyObject *lnos) {
+    PyCodeObject *result;
+    PyObject *empty_bytes = PyBytes_FromStringAndSize("", 0);
+    if (!empty_bytes) return NULL;
+    result =
+      #if PY_VERSION_HEX >= 0x030C0000
+        PyUnstable_Code_NewWithPosOnlyArgs
+      #else
+        PyCode_NewWithPosOnlyArgs
+      #endif
+        (a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, name, fline, lnos, empty_bytes);
+    Py_DECREF(empty_bytes);
+    return result;
+  }
+#elif PY_VERSION_HEX >= 0x030800B2 && !CYTHON_COMPILING_IN_PYPY
+  #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+          PyCode_NewWithPosOnlyArgs(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#else
+  #define __Pyx_PyCode_New(a, p, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+          PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#endif
+#endif
+#if PY_VERSION_HEX >= 0x030900A4 || defined(Py_IS_TYPE)
+  #define __Pyx_IS_TYPE(ob, type) Py_IS_TYPE(ob, type)
+#else
+  #define __Pyx_IS_TYPE(ob, type) (((const PyObject*)ob)->ob_type == (type))
+#endif
+#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_Is)
+  #define __Pyx_Py_Is(x, y)  Py_Is(x, y)
+#else
+  #define __Pyx_Py_Is(x, y) ((x) == (y))
+#endif
+#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsNone)
+  #define __Pyx_Py_IsNone(ob) Py_IsNone(ob)
+#else
+  #define __Pyx_Py_IsNone(ob) __Pyx_Py_Is((ob), Py_None)
+#endif
+#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsTrue)
+  #define __Pyx_Py_IsTrue(ob) Py_IsTrue(ob)
+#else
+  #define __Pyx_Py_IsTrue(ob) __Pyx_Py_Is((ob), Py_True)
+#endif
+#if PY_VERSION_HEX >= 0x030A00B1 || defined(Py_IsFalse)
+  #define __Pyx_Py_IsFalse(ob) Py_IsFalse(ob)
+#else
+  #define __Pyx_Py_IsFalse(ob) __Pyx_Py_Is((ob), Py_False)
+#endif
+#define __Pyx_NoneAsNull(obj)  (__Pyx_Py_IsNone(obj) ? NULL : (obj))
+#if PY_VERSION_HEX >= 0x030900F0 && !CYTHON_COMPILING_IN_PYPY
+  #define __Pyx_PyObject_GC_IsFinalized(o) PyObject_GC_IsFinalized(o)
+#else
+  #define __Pyx_PyObject_GC_IsFinalized(o) _PyGC_FINALIZED(o)
+#endif
+#ifndef CO_COROUTINE
+  #define CO_COROUTINE 0x80
+#endif
+#ifndef CO_ASYNC_GENERATOR
+  #define CO_ASYNC_GENERATOR 0x200
+#endif
+#ifndef Py_TPFLAGS_CHECKTYPES
+  #define Py_TPFLAGS_CHECKTYPES 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_INDEX
+  #define Py_TPFLAGS_HAVE_INDEX 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_NEWBUFFER
+  #define Py_TPFLAGS_HAVE_NEWBUFFER 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_FINALIZE
+  #define Py_TPFLAGS_HAVE_FINALIZE 0
+#endif
+#ifndef Py_TPFLAGS_SEQUENCE
+  #define Py_TPFLAGS_SEQUENCE 0
+#endif
+#ifndef Py_TPFLAGS_MAPPING
+  #define Py_TPFLAGS_MAPPING 0
+#endif
+#ifndef METH_STACKLESS
+  #define METH_STACKLESS 0
+#endif
+#if PY_VERSION_HEX <= 0x030700A3 || !defined(METH_FASTCALL)
+  #ifndef METH_FASTCALL
+     #define METH_FASTCALL 0x80
+  #endif
+  typedef PyObject *(*__Pyx_PyCFunctionFast) (PyObject *self, PyObject *const *args, Py_ssize_t nargs);
+  typedef PyObject *(*__Pyx_PyCFunctionFastWithKeywords) (PyObject *self, PyObject *const *args,
+                                                          Py_ssize_t nargs, PyObject *kwnames);
+#else
+  #if PY_VERSION_HEX >= 0x030d00A4
+  #  define __Pyx_PyCFunctionFast PyCFunctionFast
+  #  define __Pyx_PyCFunctionFastWithKeywords PyCFunctionFastWithKeywords
+  #else
+  #  define __Pyx_PyCFunctionFast _PyCFunctionFast
+  #  define __Pyx_PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords
+  #endif
+#endif
+#if CYTHON_METH_FASTCALL
+  #define __Pyx_METH_FASTCALL METH_FASTCALL
+  #define __Pyx_PyCFunction_FastCall __Pyx_PyCFunctionFast
+  #define __Pyx_PyCFunction_FastCallWithKeywords __Pyx_PyCFunctionFastWithKeywords
+#else
+  #define __Pyx_METH_FASTCALL METH_VARARGS
+  #define __Pyx_PyCFunction_FastCall PyCFunction
+  #define __Pyx_PyCFunction_FastCallWithKeywords PyCFunctionWithKeywords
+#endif
+#if CYTHON_VECTORCALL
+  #define __pyx_vectorcallfunc vectorcallfunc
+  #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET  PY_VECTORCALL_ARGUMENTS_OFFSET
+  #define __Pyx_PyVectorcall_NARGS(n)  PyVectorcall_NARGS((size_t)(n))
+#elif CYTHON_BACKPORT_VECTORCALL
+  typedef PyObject *(*__pyx_vectorcallfunc)(PyObject *callable, PyObject *const *args,
+                                            size_t nargsf, PyObject *kwnames);
+  #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET  ((size_t)1 << (8 * sizeof(size_t) - 1))
+  #define __Pyx_PyVectorcall_NARGS(n)  ((Py_ssize_t)(((size_t)(n)) & ~__Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET))
+#else
+  #define __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET  0
+  #define __Pyx_PyVectorcall_NARGS(n)  ((Py_ssize_t)(n))
+#endif
+#if PY_MAJOR_VERSION >= 0x030900B1
+#define __Pyx_PyCFunction_CheckExact(func)  PyCFunction_CheckExact(func)
+#else
+#define __Pyx_PyCFunction_CheckExact(func)  PyCFunction_Check(func)
+#endif
+#define __Pyx_CyOrPyCFunction_Check(func)  PyCFunction_Check(func)
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func)  (((PyCFunctionObject*)(func))->m_ml->ml_meth)
+#elif !CYTHON_COMPILING_IN_LIMITED_API
+#define __Pyx_CyOrPyCFunction_GET_FUNCTION(func)  PyCFunction_GET_FUNCTION(func)
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_CyOrPyCFunction_GET_FLAGS(func)  (((PyCFunctionObject*)(func))->m_ml->ml_flags)
+static CYTHON_INLINE PyObject* __Pyx_CyOrPyCFunction_GET_SELF(PyObject *func) {
+    return (__Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_STATIC) ? NULL : ((PyCFunctionObject*)func)->m_self;
+}
+#endif
+static CYTHON_INLINE int __Pyx__IsSameCFunction(PyObject *func, void *cfunc) {
+#if CYTHON_COMPILING_IN_LIMITED_API
+    return PyCFunction_Check(func) && PyCFunction_GetFunction(func) == (PyCFunction) cfunc;
+#else
+    return PyCFunction_Check(func) && PyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc;
+#endif
+}
+#define __Pyx_IsSameCFunction(func, cfunc)   __Pyx__IsSameCFunction(func, cfunc)
+#if __PYX_LIMITED_VERSION_HEX < 0x030900B1
+  #define __Pyx_PyType_FromModuleAndSpec(m, s, b)  ((void)m, PyType_FromSpecWithBases(s, b))
+  typedef PyObject *(*__Pyx_PyCMethod)(PyObject *, PyTypeObject *, PyObject *const *, size_t, PyObject *);
+#else
+  #define __Pyx_PyType_FromModuleAndSpec(m, s, b)  PyType_FromModuleAndSpec(m, s, b)
+  #define __Pyx_PyCMethod  PyCMethod
+#endif
+#ifndef METH_METHOD
+  #define METH_METHOD 0x200
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc)
+  #define PyObject_Malloc(s)   PyMem_Malloc(s)
+  #define PyObject_Free(p)     PyMem_Free(p)
+  #define PyObject_Realloc(p)  PyMem_Realloc(p)
+#endif
+#if CYTHON_COMPILING_IN_LIMITED_API
+  #define __Pyx_PyCode_HasFreeVars(co)  (PyCode_GetNumFree(co) > 0)
+  #define __Pyx_PyFrame_SetLineNumber(frame, lineno)
+#else
+  #define __Pyx_PyCode_HasFreeVars(co)  (PyCode_GetNumFree(co) > 0)
+  #define __Pyx_PyFrame_SetLineNumber(frame, lineno)  (frame)->f_lineno = (lineno)
+#endif
+#if CYTHON_COMPILING_IN_LIMITED_API
+  #define __Pyx_PyThreadState_Current PyThreadState_Get()
+#elif !CYTHON_FAST_THREAD_STATE
+  #define __Pyx_PyThreadState_Current PyThreadState_GET()
+#elif PY_VERSION_HEX >= 0x030d00A1
+  #define __Pyx_PyThreadState_Current PyThreadState_GetUnchecked()
+#elif PY_VERSION_HEX >= 0x03060000
+  #define __Pyx_PyThreadState_Current _PyThreadState_UncheckedGet()
+#elif PY_VERSION_HEX >= 0x03000000
+  #define __Pyx_PyThreadState_Current PyThreadState_GET()
+#else
+  #define __Pyx_PyThreadState_Current _PyThreadState_Current
+#endif
+#if CYTHON_COMPILING_IN_LIMITED_API
+static CYTHON_INLINE void *__Pyx_PyModule_GetState(PyObject *op)
+{
+    void *result;
+    result = PyModule_GetState(op);
+    if (!result)
+        Py_FatalError("Couldn't find the module state");
+    return result;
+}
+#endif
+#define __Pyx_PyObject_GetSlot(obj, name, func_ctype)  __Pyx_PyType_GetSlot(Py_TYPE(obj), name, func_ctype)
+#if CYTHON_COMPILING_IN_LIMITED_API
+  #define __Pyx_PyType_GetSlot(type, name, func_ctype)  ((func_ctype) PyType_GetSlot((type), Py_##name))
+#else
+  #define __Pyx_PyType_GetSlot(type, name, func_ctype)  ((type)->name)
+#endif
+#if PY_VERSION_HEX < 0x030700A2 && !defined(PyThread_tss_create) && !defined(Py_tss_NEEDS_INIT)
+#include "pythread.h"
+#define Py_tss_NEEDS_INIT 0
+typedef int Py_tss_t;
+static CYTHON_INLINE int PyThread_tss_create(Py_tss_t *key) {
+  *key = PyThread_create_key();
+  return 0;
+}
+static CYTHON_INLINE Py_tss_t * PyThread_tss_alloc(void) {
+  Py_tss_t *key = (Py_tss_t *)PyObject_Malloc(sizeof(Py_tss_t));
+  *key = Py_tss_NEEDS_INIT;
+  return key;
+}
+static CYTHON_INLINE void PyThread_tss_free(Py_tss_t *key) {
+  PyObject_Free(key);
+}
+static CYTHON_INLINE int PyThread_tss_is_created(Py_tss_t *key) {
+  return *key != Py_tss_NEEDS_INIT;
+}
+static CYTHON_INLINE void PyThread_tss_delete(Py_tss_t *key) {
+  PyThread_delete_key(*key);
+  *key = Py_tss_NEEDS_INIT;
+}
+static CYTHON_INLINE int PyThread_tss_set(Py_tss_t *key, void *value) {
+  return PyThread_set_key_value(*key, value);
+}
+static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
+  return PyThread_get_key_value(*key);
+}
+#endif
+#if PY_MAJOR_VERSION < 3
+    #if CYTHON_COMPILING_IN_PYPY
+        #if PYPY_VERSION_NUM < 0x07030600
+            #if defined(__cplusplus) && __cplusplus >= 201402L
+                [[deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")]]
+            #elif defined(__GNUC__) || defined(__clang__)
+                __attribute__ ((__deprecated__("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6")))
+            #elif defined(_MSC_VER)
+                __declspec(deprecated("`with nogil:` inside a nogil function will not release the GIL in PyPy2 < 7.3.6"))
+            #endif
+            static CYTHON_INLINE int PyGILState_Check(void) {
+                return 0;
+            }
+        #else  // PYPY_VERSION_NUM < 0x07030600
+        #endif  // PYPY_VERSION_NUM < 0x07030600
+    #else
+        static CYTHON_INLINE int PyGILState_Check(void) {
+            PyThreadState * tstate = _PyThreadState_Current;
+            return tstate && (tstate == PyGILState_GetThisThreadState());
+        }
+    #endif
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030d0000 || defined(_PyDict_NewPresized)
+#define __Pyx_PyDict_NewPresized(n)  ((n <= 8) ? PyDict_New() : _PyDict_NewPresized(n))
+#else
+#define __Pyx_PyDict_NewPresized(n)  PyDict_New()
+#endif
+#if PY_MAJOR_VERSION >= 3 || CYTHON_FUTURE_DIVISION
+  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_TrueDivide(x,y)
+  #define __Pyx_PyNumber_InPlaceDivide(x,y)  PyNumber_InPlaceTrueDivide(x,y)
+#else
+  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_Divide(x,y)
+  #define __Pyx_PyNumber_InPlaceDivide(x,y)  PyNumber_InPlaceDivide(x,y)
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX > 0x030600B4 && PY_VERSION_HEX < 0x030d0000 && CYTHON_USE_UNICODE_INTERNALS
+#define __Pyx_PyDict_GetItemStrWithError(dict, name)  _PyDict_GetItem_KnownHash(dict, name, ((PyASCIIObject *) name)->hash)
+static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStr(PyObject *dict, PyObject *name) {
+    PyObject *res = __Pyx_PyDict_GetItemStrWithError(dict, name);
+    if (res == NULL) PyErr_Clear();
+    return res;
+}
+#elif PY_MAJOR_VERSION >= 3 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07020000)
+#define __Pyx_PyDict_GetItemStrWithError  PyDict_GetItemWithError
+#define __Pyx_PyDict_GetItemStr           PyDict_GetItem
+#else
+static CYTHON_INLINE PyObject * __Pyx_PyDict_GetItemStrWithError(PyObject *dict, PyObject *name) {
+#if CYTHON_COMPILING_IN_PYPY
+    return PyDict_GetItem(dict, name);
+#else
+    PyDictEntry *ep;
+    PyDictObject *mp = (PyDictObject*) dict;
+    long hash = ((PyStringObject *) name)->ob_shash;
+    assert(hash != -1);
+    ep = (mp->ma_lookup)(mp, name, hash);
+    if (ep == NULL) {
+        return NULL;
+    }
+    return ep->me_value;
+#endif
+}
+#define __Pyx_PyDict_GetItemStr           PyDict_GetItem
+#endif
+#if CYTHON_USE_TYPE_SLOTS
+  #define __Pyx_PyType_GetFlags(tp)   (((PyTypeObject *)tp)->tp_flags)
+  #define __Pyx_PyType_HasFeature(type, feature)  ((__Pyx_PyType_GetFlags(type) & (feature)) != 0)
+  #define __Pyx_PyObject_GetIterNextFunc(obj)  (Py_TYPE(obj)->tp_iternext)
+#else
+  #define __Pyx_PyType_GetFlags(tp)   (PyType_GetFlags((PyTypeObject *)tp))
+  #define __Pyx_PyType_HasFeature(type, feature)  PyType_HasFeature(type, feature)
+  #define __Pyx_PyObject_GetIterNextFunc(obj)  PyIter_Next
+#endif
+#if CYTHON_COMPILING_IN_LIMITED_API
+  #define __Pyx_SetItemOnTypeDict(tp, k, v) PyObject_GenericSetAttr((PyObject*)tp, k, v)
+#else
+  #define __Pyx_SetItemOnTypeDict(tp, k, v) PyDict_SetItem(tp->tp_dict, k, v)
+#endif
+#if CYTHON_USE_TYPE_SPECS && PY_VERSION_HEX >= 0x03080000
+#define __Pyx_PyHeapTypeObject_GC_Del(obj)  {\
+    PyTypeObject *type = Py_TYPE((PyObject*)obj);\
+    assert(__Pyx_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE));\
+    PyObject_GC_Del(obj);\
+    Py_DECREF(type);\
+}
+#else
+#define __Pyx_PyHeapTypeObject_GC_Del(obj)  PyObject_GC_Del(obj)
+#endif
+#if CYTHON_COMPILING_IN_LIMITED_API
+  #define CYTHON_PEP393_ENABLED 1
+  #define __Pyx_PyUnicode_READY(op)       (0)
+  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GetLength(u)
+  #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_ReadChar(u, i)
+  #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u)   ((void)u, 1114111U)
+  #define __Pyx_PyUnicode_KIND(u)         ((void)u, (0))
+  #define __Pyx_PyUnicode_DATA(u)         ((void*)u)
+  #define __Pyx_PyUnicode_READ(k, d, i)   ((void)k, PyUnicode_ReadChar((PyObject*)(d), i))
+  #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != PyUnicode_GetLength(u))
+#elif PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
+  #define CYTHON_PEP393_ENABLED 1
+  #if PY_VERSION_HEX >= 0x030C0000
+    #define __Pyx_PyUnicode_READY(op)       (0)
+  #else
+    #define __Pyx_PyUnicode_READY(op)       (likely(PyUnicode_IS_READY(op)) ?\
+                                                0 : _PyUnicode_Ready((PyObject *)(op)))
+  #endif
+  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_LENGTH(u)
+  #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
+  #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u)   PyUnicode_MAX_CHAR_VALUE(u)
+  #define __Pyx_PyUnicode_KIND(u)         ((int)PyUnicode_KIND(u))
+  #define __Pyx_PyUnicode_DATA(u)         PyUnicode_DATA(u)
+  #define __Pyx_PyUnicode_READ(k, d, i)   PyUnicode_READ(k, d, i)
+  #define __Pyx_PyUnicode_WRITE(k, d, i, ch)  PyUnicode_WRITE(k, d, i, (Py_UCS4) ch)
+  #if PY_VERSION_HEX >= 0x030C0000
+    #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != PyUnicode_GET_LENGTH(u))
+  #else
+    #if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x03090000
+    #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : ((PyCompactUnicodeObject *)(u))->wstr_length))
+    #else
+    #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
+    #endif
+  #endif
+#else
+  #define CYTHON_PEP393_ENABLED 0
+  #define PyUnicode_1BYTE_KIND  1
+  #define PyUnicode_2BYTE_KIND  2
+  #define PyUnicode_4BYTE_KIND  4
+  #define __Pyx_PyUnicode_READY(op)       (0)
+  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_SIZE(u)
+  #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
+  #define __Pyx_PyUnicode_MAX_CHAR_VALUE(u)   ((sizeof(Py_UNICODE) == 2) ? 65535U : 1114111U)
+  #define __Pyx_PyUnicode_KIND(u)         ((int)sizeof(Py_UNICODE))
+  #define __Pyx_PyUnicode_DATA(u)         ((void*)PyUnicode_AS_UNICODE(u))
+  #define __Pyx_PyUnicode_READ(k, d, i)   ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
+  #define __Pyx_PyUnicode_WRITE(k, d, i, ch)  (((void)(k)), ((Py_UNICODE*)d)[i] = (Py_UNICODE) ch)
+  #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != PyUnicode_GET_SIZE(u))
+#endif
+#if CYTHON_COMPILING_IN_PYPY
+  #define __Pyx_PyUnicode_Concat(a, b)      PyNumber_Add(a, b)
+  #define __Pyx_PyUnicode_ConcatSafe(a, b)  PyNumber_Add(a, b)
+#else
+  #define __Pyx_PyUnicode_Concat(a, b)      PyUnicode_Concat(a, b)
+  #define __Pyx_PyUnicode_ConcatSafe(a, b)  ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\
+      PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
+#endif
+#if CYTHON_COMPILING_IN_PYPY
+  #if !defined(PyUnicode_DecodeUnicodeEscape)
+    #define PyUnicode_DecodeUnicodeEscape(s, size, errors)  PyUnicode_Decode(s, size, "unicode_escape", errors)
+  #endif
+  #if !defined(PyUnicode_Contains) || (PY_MAJOR_VERSION == 2 && PYPY_VERSION_NUM < 0x07030500)
+    #undef PyUnicode_Contains
+    #define PyUnicode_Contains(u, s)  PySequence_Contains(u, s)
+  #endif
+  #if !defined(PyByteArray_Check)
+    #define PyByteArray_Check(obj)  PyObject_TypeCheck(obj, &PyByteArray_Type)
+  #endif
+  #if !defined(PyObject_Format)
+    #define PyObject_Format(obj, fmt)  PyObject_CallMethod(obj, "__format__", "O", fmt)
+  #endif
+#endif
+#define __Pyx_PyString_FormatSafe(a, b)   ((unlikely((a) == Py_None || (PyString_Check(b) && !PyString_CheckExact(b)))) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
+#define __Pyx_PyUnicode_FormatSafe(a, b)  ((unlikely((a) == Py_None || (PyUnicode_Check(b) && !PyUnicode_CheckExact(b)))) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyString_Format(a, b)  PyUnicode_Format(a, b)
+#else
+  #define __Pyx_PyString_Format(a, b)  PyString_Format(a, b)
+#endif
+#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII)
+  #define PyObject_ASCII(o)            PyObject_Repr(o)
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyBaseString_Type            PyUnicode_Type
+  #define PyStringObject               PyUnicodeObject
+  #define PyString_Type                PyUnicode_Type
+  #define PyString_Check               PyUnicode_Check
+  #define PyString_CheckExact          PyUnicode_CheckExact
+#ifndef PyObject_Unicode
+  #define PyObject_Unicode             PyObject_Str
+#endif
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
+  #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
+#else
+  #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj))
+  #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON
+  #define __Pyx_PySequence_ListKeepNew(obj)\
+    (likely(PyList_CheckExact(obj) && Py_REFCNT(obj) == 1) ? __Pyx_NewRef(obj) : PySequence_List(obj))
+#else
+  #define __Pyx_PySequence_ListKeepNew(obj)  PySequence_List(obj)
+#endif
+#ifndef PySet_CheckExact
+  #define PySet_CheckExact(obj)        __Pyx_IS_TYPE(obj, &PySet_Type)
+#endif
+#if PY_VERSION_HEX >= 0x030900A4
+  #define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt)
+  #define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size)
+#else
+  #define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt)
+  #define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size)
+#endif
+#if CYTHON_ASSUME_SAFE_MACROS
+  #define __Pyx_PySequence_ITEM(o, i) PySequence_ITEM(o, i)
+  #define __Pyx_PySequence_SIZE(seq)  Py_SIZE(seq)
+  #define __Pyx_PyTuple_SET_ITEM(o, i, v) (PyTuple_SET_ITEM(o, i, v), (0))
+  #define __Pyx_PyList_SET_ITEM(o, i, v) (PyList_SET_ITEM(o, i, v), (0))
+  #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_GET_SIZE(o)
+  #define __Pyx_PyList_GET_SIZE(o) PyList_GET_SIZE(o)
+  #define __Pyx_PySet_GET_SIZE(o) PySet_GET_SIZE(o)
+  #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_GET_SIZE(o)
+  #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_GET_SIZE(o)
+#else
+  #define __Pyx_PySequence_ITEM(o, i) PySequence_GetItem(o, i)
+  #define __Pyx_PySequence_SIZE(seq)  PySequence_Size(seq)
+  #define __Pyx_PyTuple_SET_ITEM(o, i, v) PyTuple_SetItem(o, i, v)
+  #define __Pyx_PyList_SET_ITEM(o, i, v) PyList_SetItem(o, i, v)
+  #define __Pyx_PyTuple_GET_SIZE(o) PyTuple_Size(o)
+  #define __Pyx_PyList_GET_SIZE(o) PyList_Size(o)
+  #define __Pyx_PySet_GET_SIZE(o) PySet_Size(o)
+  #define __Pyx_PyBytes_GET_SIZE(o) PyBytes_Size(o)
+  #define __Pyx_PyByteArray_GET_SIZE(o) PyByteArray_Size(o)
+#endif
+#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1
+  #define __Pyx_PyImport_AddModuleRef(name) PyImport_AddModuleRef(name)
+#else
+  static CYTHON_INLINE PyObject *__Pyx_PyImport_AddModuleRef(const char *name) {
+      PyObject *module = PyImport_AddModule(name);
+      Py_XINCREF(module);
+      return module;
+  }
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyIntObject                  PyLongObject
+  #define PyInt_Type                   PyLong_Type
+  #define PyInt_Check(op)              PyLong_Check(op)
+  #define PyInt_CheckExact(op)         PyLong_CheckExact(op)
+  #define __Pyx_Py3Int_Check(op)       PyLong_Check(op)
+  #define __Pyx_Py3Int_CheckExact(op)  PyLong_CheckExact(op)
+  #define PyInt_FromString             PyLong_FromString
+  #define PyInt_FromUnicode            PyLong_FromUnicode
+  #define PyInt_FromLong               PyLong_FromLong
+  #define PyInt_FromSize_t             PyLong_FromSize_t
+  #define PyInt_FromSsize_t            PyLong_FromSsize_t
+  #define PyInt_AsLong                 PyLong_AsLong
+  #define PyInt_AS_LONG                PyLong_AS_LONG
+  #define PyInt_AsSsize_t              PyLong_AsSsize_t
+  #define PyInt_AsUnsignedLongMask     PyLong_AsUnsignedLongMask
+  #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
+  #define PyNumber_Int                 PyNumber_Long
+#else
+  #define __Pyx_Py3Int_Check(op)       (PyLong_Check(op) || PyInt_Check(op))
+  #define __Pyx_Py3Int_CheckExact(op)  (PyLong_CheckExact(op) || PyInt_CheckExact(op))
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyBoolObject                 PyLongObject
+#endif
+#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
+  #ifndef PyUnicode_InternFromString
+    #define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
+  #endif
+#endif
+#if PY_VERSION_HEX < 0x030200A4
+  typedef long Py_hash_t;
+  #define __Pyx_PyInt_FromHash_t PyInt_FromLong
+  #define __Pyx_PyInt_AsHash_t   __Pyx_PyIndex_AsHash_t
+#else
+  #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
+  #define __Pyx_PyInt_AsHash_t   __Pyx_PyIndex_AsSsize_t
+#endif
+#if CYTHON_USE_ASYNC_SLOTS
+  #if PY_VERSION_HEX >= 0x030500B1
+    #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods
+    #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async)
+  #else
+    #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved))
+  #endif
+#else
+  #define __Pyx_PyType_AsAsync(obj) NULL
+#endif
+#ifndef __Pyx_PyAsyncMethodsStruct
+    typedef struct {
+        unaryfunc am_await;
+        unaryfunc am_aiter;
+        unaryfunc am_anext;
+    } __Pyx_PyAsyncMethodsStruct;
+#endif
+
+#if defined(_WIN32) || defined(WIN32) || defined(MS_WINDOWS)
+  #if !defined(_USE_MATH_DEFINES)
+    #define _USE_MATH_DEFINES
+  #endif
+#endif
+#include <math.h>
+#ifdef NAN
+#define __PYX_NAN() ((float) NAN)
+#else
+static CYTHON_INLINE float __PYX_NAN() {
+  float value;
+  memset(&value, 0xFF, sizeof(value));
+  return value;
+}
+#endif
+#if defined(__CYGWIN__) && defined(_LDBL_EQ_DBL)
+#define __Pyx_truncl trunc
+#else
+#define __Pyx_truncl truncl
+#endif
+
+#define __PYX_MARK_ERR_POS(f_index, lineno) \
+    { __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__;  (void)__pyx_clineno; }
+#define __PYX_ERR(f_index, lineno, Ln_error) \
+    { __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }
+
+#ifdef CYTHON_EXTERN_C
+    #undef __PYX_EXTERN_C
+    #define __PYX_EXTERN_C CYTHON_EXTERN_C
+#elif defined(__PYX_EXTERN_C)
+    #ifdef _MSC_VER
+    #pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.")
+    #else
+    #warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.
+    #endif
+#else
+  #ifdef __cplusplus
+    #define __PYX_EXTERN_C extern "C"
+  #else
+    #define __PYX_EXTERN_C extern
+  #endif
+#endif
+
+#define __PYX_HAVE__opencood__utils__box_overlaps
+#define __PYX_HAVE_API__opencood__utils__box_overlaps
+/* Early includes */
+#include <string.h>
+#include <stdio.h>
+
+    /* Using NumPy API declarations from "Cython/Includes/numpy/" */
+    
+#include "numpy/arrayobject.h"
+#include "numpy/ndarrayobject.h"
+#include "numpy/ndarraytypes.h"
+#include "numpy/arrayscalars.h"
+#include "numpy/ufuncobject.h"
+#ifdef _OPENMP
+#include <omp.h>
+#endif /* _OPENMP */
+
+#if defined(PYREX_WITHOUT_ASSERTIONS) && !defined(CYTHON_WITHOUT_ASSERTIONS)
+#define CYTHON_WITHOUT_ASSERTIONS
+#endif
+
+typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding;
+                const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry;
+
+#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
+#define __PYX_DEFAULT_STRING_ENCODING_IS_UTF8 0
+#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT (PY_MAJOR_VERSION >= 3 && __PYX_DEFAULT_STRING_ENCODING_IS_UTF8)
+#define __PYX_DEFAULT_STRING_ENCODING ""
+#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
+#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#define __Pyx_uchar_cast(c) ((unsigned char)c)
+#define __Pyx_long_cast(x) ((long)x)
+#define __Pyx_fits_Py_ssize_t(v, type, is_signed)  (\
+    (sizeof(type) < sizeof(Py_ssize_t))  ||\
+    (sizeof(type) > sizeof(Py_ssize_t) &&\
+          likely(v < (type)PY_SSIZE_T_MAX ||\
+                 v == (type)PY_SSIZE_T_MAX)  &&\
+          (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\
+                                v == (type)PY_SSIZE_T_MIN)))  ||\
+    (sizeof(type) == sizeof(Py_ssize_t) &&\
+          (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\
+                               v == (type)PY_SSIZE_T_MAX)))  )
+static CYTHON_INLINE int __Pyx_is_valid_index(Py_ssize_t i, Py_ssize_t limit) {
+    return (size_t) i < (size_t) limit;
+}
+#if defined (__cplusplus) && __cplusplus >= 201103L
+    #include <cstdlib>
+    #define __Pyx_sst_abs(value) std::abs(value)
+#elif SIZEOF_INT >= SIZEOF_SIZE_T
+    #define __Pyx_sst_abs(value) abs(value)
+#elif SIZEOF_LONG >= SIZEOF_SIZE_T
+    #define __Pyx_sst_abs(value) labs(value)
+#elif defined (_MSC_VER)
+    #define __Pyx_sst_abs(value) ((Py_ssize_t)_abs64(value))
+#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    #define __Pyx_sst_abs(value) llabs(value)
+#elif defined (__GNUC__)
+    #define __Pyx_sst_abs(value) __builtin_llabs(value)
+#else
+    #define __Pyx_sst_abs(value) ((value<0) ? -value : value)
+#endif
+static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s);
+static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject*);
+static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
+static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char*);
+#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
+#define __Pyx_PyBytes_FromString        PyBytes_FromString
+#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*);
+#if PY_MAJOR_VERSION < 3
+    #define __Pyx_PyStr_FromString        __Pyx_PyBytes_FromString
+    #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#else
+    #define __Pyx_PyStr_FromString        __Pyx_PyUnicode_FromString
+    #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
+#endif
+#define __Pyx_PyBytes_AsWritableString(s)     ((char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsWritableSString(s)    ((signed char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsWritableUString(s)    ((unsigned char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsString(s)     ((const char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsSString(s)    ((const signed char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyBytes_AsUString(s)    ((const unsigned char*) PyBytes_AS_STRING(s))
+#define __Pyx_PyObject_AsWritableString(s)    ((char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsWritableSString(s)    ((signed char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsWritableUString(s)    ((unsigned char*)(__pyx_uintptr_t) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsSString(s)    ((const signed char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsUString(s)    ((const unsigned char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_FromCString(s)  __Pyx_PyObject_FromString((const char*)s)
+#define __Pyx_PyBytes_FromCString(s)   __Pyx_PyBytes_FromString((const char*)s)
+#define __Pyx_PyByteArray_FromCString(s)   __Pyx_PyByteArray_FromString((const char*)s)
+#define __Pyx_PyStr_FromCString(s)     __Pyx_PyStr_FromString((const char*)s)
+#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s)
+#define __Pyx_PyUnicode_FromOrdinal(o)       PyUnicode_FromOrdinal((int)o)
+#define __Pyx_PyUnicode_AsUnicode            PyUnicode_AsUnicode
+#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj)
+#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None)
+static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b);
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
+static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject*);
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x);
+#define __Pyx_PySequence_Tuple(obj)\
+    (likely(PyTuple_CheckExact(obj)) ? __Pyx_NewRef(obj) : PySequence_Tuple(obj))
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
+static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject*);
+#if CYTHON_ASSUME_SAFE_MACROS
+#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
+#else
+#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
+#endif
+#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
+#if PY_MAJOR_VERSION >= 3
+#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x))
+#else
+#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x))
+#endif
+#if CYTHON_USE_PYLONG_INTERNALS
+  #if PY_VERSION_HEX >= 0x030C00A7
+  #ifndef _PyLong_SIGN_MASK
+    #define _PyLong_SIGN_MASK 3
+  #endif
+  #ifndef _PyLong_NON_SIZE_BITS
+    #define _PyLong_NON_SIZE_BITS 3
+  #endif
+  #define __Pyx_PyLong_Sign(x)  (((PyLongObject*)x)->long_value.lv_tag & _PyLong_SIGN_MASK)
+  #define __Pyx_PyLong_IsNeg(x)  ((__Pyx_PyLong_Sign(x) & 2) != 0)
+  #define __Pyx_PyLong_IsNonNeg(x)  (!__Pyx_PyLong_IsNeg(x))
+  #define __Pyx_PyLong_IsZero(x)  (__Pyx_PyLong_Sign(x) & 1)
+  #define __Pyx_PyLong_IsPos(x)  (__Pyx_PyLong_Sign(x) == 0)
+  #define __Pyx_PyLong_CompactValueUnsigned(x)  (__Pyx_PyLong_Digits(x)[0])
+  #define __Pyx_PyLong_DigitCount(x)  ((Py_ssize_t) (((PyLongObject*)x)->long_value.lv_tag >> _PyLong_NON_SIZE_BITS))
+  #define __Pyx_PyLong_SignedDigitCount(x)\
+        ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * __Pyx_PyLong_DigitCount(x))
+  #if defined(PyUnstable_Long_IsCompact) && defined(PyUnstable_Long_CompactValue)
+    #define __Pyx_PyLong_IsCompact(x)     PyUnstable_Long_IsCompact((PyLongObject*) x)
+    #define __Pyx_PyLong_CompactValue(x)  PyUnstable_Long_CompactValue((PyLongObject*) x)
+  #else
+    #define __Pyx_PyLong_IsCompact(x)     (((PyLongObject*)x)->long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS))
+    #define __Pyx_PyLong_CompactValue(x)  ((1 - (Py_ssize_t) __Pyx_PyLong_Sign(x)) * (Py_ssize_t) __Pyx_PyLong_Digits(x)[0])
+  #endif
+  typedef Py_ssize_t  __Pyx_compact_pylong;
+  typedef size_t  __Pyx_compact_upylong;
+  #else
+  #define __Pyx_PyLong_IsNeg(x)  (Py_SIZE(x) < 0)
+  #define __Pyx_PyLong_IsNonNeg(x)  (Py_SIZE(x) >= 0)
+  #define __Pyx_PyLong_IsZero(x)  (Py_SIZE(x) == 0)
+  #define __Pyx_PyLong_IsPos(x)  (Py_SIZE(x) > 0)
+  #define __Pyx_PyLong_CompactValueUnsigned(x)  ((Py_SIZE(x) == 0) ? 0 : __Pyx_PyLong_Digits(x)[0])
+  #define __Pyx_PyLong_DigitCount(x)  __Pyx_sst_abs(Py_SIZE(x))
+  #define __Pyx_PyLong_SignedDigitCount(x)  Py_SIZE(x)
+  #define __Pyx_PyLong_IsCompact(x)  (Py_SIZE(x) == 0 || Py_SIZE(x) == 1 || Py_SIZE(x) == -1)
+  #define __Pyx_PyLong_CompactValue(x)\
+        ((Py_SIZE(x) == 0) ? (sdigit) 0 : ((Py_SIZE(x) < 0) ? -(sdigit)__Pyx_PyLong_Digits(x)[0] : (sdigit)__Pyx_PyLong_Digits(x)[0]))
+  typedef sdigit  __Pyx_compact_pylong;
+  typedef digit  __Pyx_compact_upylong;
+  #endif
+  #if PY_VERSION_HEX >= 0x030C00A5
+  #define __Pyx_PyLong_Digits(x)  (((PyLongObject*)x)->long_value.ob_digit)
+  #else
+  #define __Pyx_PyLong_Digits(x)  (((PyLongObject*)x)->ob_digit)
+  #endif
+#endif
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+#include <string.h>
+static int __Pyx_sys_getdefaultencoding_not_ascii;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+    PyObject* sys;
+    PyObject* default_encoding = NULL;
+    PyObject* ascii_chars_u = NULL;
+    PyObject* ascii_chars_b = NULL;
+    const char* default_encoding_c;
+    sys = PyImport_ImportModule("sys");
+    if (!sys) goto bad;
+    default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL);
+    Py_DECREF(sys);
+    if (!default_encoding) goto bad;
+    default_encoding_c = PyBytes_AsString(default_encoding);
+    if (!default_encoding_c) goto bad;
+    if (strcmp(default_encoding_c, "ascii") == 0) {
+        __Pyx_sys_getdefaultencoding_not_ascii = 0;
+    } else {
+        char ascii_chars[128];
+        int c;
+        for (c = 0; c < 128; c++) {
+            ascii_chars[c] = (char) c;
+        }
+        __Pyx_sys_getdefaultencoding_not_ascii = 1;
+        ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
+        if (!ascii_chars_u) goto bad;
+        ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
+        if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
+            PyErr_Format(
+                PyExc_ValueError,
+                "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
+                default_encoding_c);
+            goto bad;
+        }
+        Py_DECREF(ascii_chars_u);
+        Py_DECREF(ascii_chars_b);
+    }
+    Py_DECREF(default_encoding);
+    return 0;
+bad:
+    Py_XDECREF(default_encoding);
+    Py_XDECREF(ascii_chars_u);
+    Py_XDECREF(ascii_chars_b);
+    return -1;
+}
+#endif
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
+#else
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+#include <string.h>
+static char* __PYX_DEFAULT_STRING_ENCODING;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+    PyObject* sys;
+    PyObject* default_encoding = NULL;
+    char* default_encoding_c;
+    sys = PyImport_ImportModule("sys");
+    if (!sys) goto bad;
+    default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
+    Py_DECREF(sys);
+    if (!default_encoding) goto bad;
+    default_encoding_c = PyBytes_AsString(default_encoding);
+    if (!default_encoding_c) goto bad;
+    __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c) + 1);
+    if (!__PYX_DEFAULT_STRING_ENCODING) goto bad;
+    strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
+    Py_DECREF(default_encoding);
+    return 0;
+bad:
+    Py_XDECREF(default_encoding);
+    return -1;
+}
+#endif
+#endif
+
+
+/* Test for GCC > 2.95 */
+#if defined(__GNUC__)     && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))
+  #define likely(x)   __builtin_expect(!!(x), 1)
+  #define unlikely(x) __builtin_expect(!!(x), 0)
+#else /* !__GNUC__ or GCC < 2.95 */
+  #define likely(x)   (x)
+  #define unlikely(x) (x)
+#endif /* __GNUC__ */
+static CYTHON_INLINE void __Pyx_pretend_to_initialize(void* ptr) { (void)ptr; }
+
+#if !CYTHON_USE_MODULE_STATE
+static PyObject *__pyx_m = NULL;
+#endif
+static int __pyx_lineno;
+static int __pyx_clineno = 0;
+static const char * __pyx_cfilenm = __FILE__;
+static const char *__pyx_filename;
+
+/* Header.proto */
+#if !defined(CYTHON_CCOMPLEX)
+  #if defined(__cplusplus)
+    #define CYTHON_CCOMPLEX 1
+  #elif (defined(_Complex_I) && !defined(_MSC_VER)) || ((defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_COMPLEX__) && !defined(_MSC_VER))
+    #define CYTHON_CCOMPLEX 1
+  #else
+    #define CYTHON_CCOMPLEX 0
+  #endif
+#endif
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    #include <complex>
+  #else
+    #include <complex.h>
+  #endif
+#endif
+#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__)
+  #undef _Complex_I
+  #define _Complex_I 1.0fj
+#endif
+
+/* #### Code section: filename_table ### */
+
+static const char *__pyx_f[] = {
+  "opencood/utils/box_overlaps.pyx",
+  "__init__.pxd",
+  "type.pxd",
+};
+/* #### Code section: utility_code_proto_before_types ### */
+/* ForceInitThreads.proto */
+#ifndef __PYX_FORCE_INIT_THREADS
+  #define __PYX_FORCE_INIT_THREADS 0
+#endif
+
+/* BufferFormatStructs.proto */
+struct __Pyx_StructField_;
+#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0)
+typedef struct {
+  const char* name;
+  struct __Pyx_StructField_* fields;
+  size_t size;
+  size_t arraysize[8];
+  int ndim;
+  char typegroup;
+  char is_unsigned;
+  int flags;
+} __Pyx_TypeInfo;
+typedef struct __Pyx_StructField_ {
+  __Pyx_TypeInfo* type;
+  const char* name;
+  size_t offset;
+} __Pyx_StructField;
+typedef struct {
+  __Pyx_StructField* field;
+  size_t parent_offset;
+} __Pyx_BufFmt_StackElem;
+typedef struct {
+  __Pyx_StructField root;
+  __Pyx_BufFmt_StackElem* head;
+  size_t fmt_offset;
+  size_t new_count, enc_count;
+  size_t struct_alignment;
+  int is_complex;
+  char enc_type;
+  char new_packmode;
+  char enc_packmode;
+  char is_valid_array;
+} __Pyx_BufFmt_Context;
+
+/* #### Code section: numeric_typedefs ### */
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":736
+ * # in Cython to enable them only on the right systems.
+ * 
+ * ctypedef npy_int8       int8_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t
+ */
+typedef npy_int8 __pyx_t_5numpy_int8_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":737
+ * 
+ * ctypedef npy_int8       int8_t
+ * ctypedef npy_int16      int16_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int32      int32_t
+ * ctypedef npy_int64      int64_t
+ */
+typedef npy_int16 __pyx_t_5numpy_int16_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":738
+ * ctypedef npy_int8       int8_t
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int64      int64_t
+ * #ctypedef npy_int96      int96_t
+ */
+typedef npy_int32 __pyx_t_5numpy_int32_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":739
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t
+ * ctypedef npy_int64      int64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_int96      int96_t
+ * #ctypedef npy_int128     int128_t
+ */
+typedef npy_int64 __pyx_t_5numpy_int64_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":743
+ * #ctypedef npy_int128     int128_t
+ * 
+ * ctypedef npy_uint8      uint8_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t
+ */
+typedef npy_uint8 __pyx_t_5numpy_uint8_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":744
+ * 
+ * ctypedef npy_uint8      uint8_t
+ * ctypedef npy_uint16     uint16_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint32     uint32_t
+ * ctypedef npy_uint64     uint64_t
+ */
+typedef npy_uint16 __pyx_t_5numpy_uint16_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":745
+ * ctypedef npy_uint8      uint8_t
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint64     uint64_t
+ * #ctypedef npy_uint96     uint96_t
+ */
+typedef npy_uint32 __pyx_t_5numpy_uint32_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":746
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t
+ * ctypedef npy_uint64     uint64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_uint96     uint96_t
+ * #ctypedef npy_uint128    uint128_t
+ */
+typedef npy_uint64 __pyx_t_5numpy_uint64_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":750
+ * #ctypedef npy_uint128    uint128_t
+ * 
+ * ctypedef npy_float32    float32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_float64    float64_t
+ * #ctypedef npy_float80    float80_t
+ */
+typedef npy_float32 __pyx_t_5numpy_float32_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":751
+ * 
+ * ctypedef npy_float32    float32_t
+ * ctypedef npy_float64    float64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_float80    float80_t
+ * #ctypedef npy_float128   float128_t
+ */
+typedef npy_float64 __pyx_t_5numpy_float64_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":760
+ * # The int types are mapped a bit surprising --
+ * # numpy.int corresponds to 'l' and numpy.long to 'q'
+ * ctypedef npy_long       int_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_longlong   longlong_t
+ * 
+ */
+typedef npy_long __pyx_t_5numpy_int_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":761
+ * # numpy.int corresponds to 'l' and numpy.long to 'q'
+ * ctypedef npy_long       int_t
+ * ctypedef npy_longlong   longlong_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_ulong      uint_t
+ */
+typedef npy_longlong __pyx_t_5numpy_longlong_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":763
+ * ctypedef npy_longlong   longlong_t
+ * 
+ * ctypedef npy_ulong      uint_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_ulonglong  ulonglong_t
+ * 
+ */
+typedef npy_ulong __pyx_t_5numpy_uint_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":764
+ * 
+ * ctypedef npy_ulong      uint_t
+ * ctypedef npy_ulonglong  ulonglong_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_intp       intp_t
+ */
+typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":766
+ * ctypedef npy_ulonglong  ulonglong_t
+ * 
+ * ctypedef npy_intp       intp_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uintp      uintp_t
+ * 
+ */
+typedef npy_intp __pyx_t_5numpy_intp_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":767
+ * 
+ * ctypedef npy_intp       intp_t
+ * ctypedef npy_uintp      uintp_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_double     float_t
+ */
+typedef npy_uintp __pyx_t_5numpy_uintp_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":769
+ * ctypedef npy_uintp      uintp_t
+ * 
+ * ctypedef npy_double     float_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_double     double_t
+ * ctypedef npy_longdouble longdouble_t
+ */
+typedef npy_double __pyx_t_5numpy_float_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":770
+ * 
+ * ctypedef npy_double     float_t
+ * ctypedef npy_double     double_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_longdouble longdouble_t
+ * 
+ */
+typedef npy_double __pyx_t_5numpy_double_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":771
+ * ctypedef npy_double     float_t
+ * ctypedef npy_double     double_t
+ * ctypedef npy_longdouble longdouble_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_cfloat      cfloat_t
+ */
+typedef npy_longdouble __pyx_t_5numpy_longdouble_t;
+
+/* "opencood/utils/box_overlaps.pyx":14
+ * 
+ * DTYPE = np.float32
+ * ctypedef float DTYPE_t             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+typedef float __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t;
+/* #### Code section: complex_type_declarations ### */
+/* Declarations.proto */
+#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus)
+  #ifdef __cplusplus
+    typedef ::std::complex< float > __pyx_t_float_complex;
+  #else
+    typedef float _Complex __pyx_t_float_complex;
+  #endif
+#else
+    typedef struct { float real, imag; } __pyx_t_float_complex;
+#endif
+static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float);
+
+/* Declarations.proto */
+#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus)
+  #ifdef __cplusplus
+    typedef ::std::complex< double > __pyx_t_double_complex;
+  #else
+    typedef double _Complex __pyx_t_double_complex;
+  #endif
+#else
+    typedef struct { double real, imag; } __pyx_t_double_complex;
+#endif
+static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double);
+
+/* #### Code section: type_declarations ### */
+
+/*--- Type declarations ---*/
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":773
+ * ctypedef npy_longdouble longdouble_t
+ * 
+ * ctypedef npy_cfloat      cfloat_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_cdouble     cdouble_t
+ * ctypedef npy_clongdouble clongdouble_t
+ */
+typedef npy_cfloat __pyx_t_5numpy_cfloat_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":774
+ * 
+ * ctypedef npy_cfloat      cfloat_t
+ * ctypedef npy_cdouble     cdouble_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_clongdouble clongdouble_t
+ * 
+ */
+typedef npy_cdouble __pyx_t_5numpy_cdouble_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":775
+ * ctypedef npy_cfloat      cfloat_t
+ * ctypedef npy_cdouble     cdouble_t
+ * ctypedef npy_clongdouble clongdouble_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_cdouble     complex_t
+ */
+typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t;
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":777
+ * ctypedef npy_clongdouble clongdouble_t
+ * 
+ * ctypedef npy_cdouble     complex_t             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):
+ */
+typedef npy_cdouble __pyx_t_5numpy_complex_t;
+/* #### Code section: utility_code_proto ### */
+
+/* --- Runtime support code (head) --- */
+/* Refnanny.proto */
+#ifndef CYTHON_REFNANNY
+  #define CYTHON_REFNANNY 0
+#endif
+#if CYTHON_REFNANNY
+  typedef struct {
+    void (*INCREF)(void*, PyObject*, Py_ssize_t);
+    void (*DECREF)(void*, PyObject*, Py_ssize_t);
+    void (*GOTREF)(void*, PyObject*, Py_ssize_t);
+    void (*GIVEREF)(void*, PyObject*, Py_ssize_t);
+    void* (*SetupContext)(const char*, Py_ssize_t, const char*);
+    void (*FinishContext)(void**);
+  } __Pyx_RefNannyAPIStruct;
+  static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
+  static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname);
+  #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
+#ifdef WITH_THREAD
+  #define __Pyx_RefNannySetupContext(name, acquire_gil)\
+          if (acquire_gil) {\
+              PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
+              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\
+              PyGILState_Release(__pyx_gilstate_save);\
+          } else {\
+              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__));\
+          }
+  #define __Pyx_RefNannyFinishContextNogil() {\
+              PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
+              __Pyx_RefNannyFinishContext();\
+              PyGILState_Release(__pyx_gilstate_save);\
+          }
+#else
+  #define __Pyx_RefNannySetupContext(name, acquire_gil)\
+          __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), (__LINE__), (__FILE__))
+  #define __Pyx_RefNannyFinishContextNogil() __Pyx_RefNannyFinishContext()
+#endif
+  #define __Pyx_RefNannyFinishContextNogil() {\
+              PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
+              __Pyx_RefNannyFinishContext();\
+              PyGILState_Release(__pyx_gilstate_save);\
+          }
+  #define __Pyx_RefNannyFinishContext()\
+          __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
+  #define __Pyx_INCREF(r)  __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), (__LINE__))
+  #define __Pyx_DECREF(r)  __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), (__LINE__))
+  #define __Pyx_GOTREF(r)  __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), (__LINE__))
+  #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), (__LINE__))
+  #define __Pyx_XINCREF(r)  do { if((r) == NULL); else {__Pyx_INCREF(r); }} while(0)
+  #define __Pyx_XDECREF(r)  do { if((r) == NULL); else {__Pyx_DECREF(r); }} while(0)
+  #define __Pyx_XGOTREF(r)  do { if((r) == NULL); else {__Pyx_GOTREF(r); }} while(0)
+  #define __Pyx_XGIVEREF(r) do { if((r) == NULL); else {__Pyx_GIVEREF(r);}} while(0)
+#else
+  #define __Pyx_RefNannyDeclarations
+  #define __Pyx_RefNannySetupContext(name, acquire_gil)
+  #define __Pyx_RefNannyFinishContextNogil()
+  #define __Pyx_RefNannyFinishContext()
+  #define __Pyx_INCREF(r) Py_INCREF(r)
+  #define __Pyx_DECREF(r) Py_DECREF(r)
+  #define __Pyx_GOTREF(r)
+  #define __Pyx_GIVEREF(r)
+  #define __Pyx_XINCREF(r) Py_XINCREF(r)
+  #define __Pyx_XDECREF(r) Py_XDECREF(r)
+  #define __Pyx_XGOTREF(r)
+  #define __Pyx_XGIVEREF(r)
+#endif
+#define __Pyx_Py_XDECREF_SET(r, v) do {\
+        PyObject *tmp = (PyObject *) r;\
+        r = v; Py_XDECREF(tmp);\
+    } while (0)
+#define __Pyx_XDECREF_SET(r, v) do {\
+        PyObject *tmp = (PyObject *) r;\
+        r = v; __Pyx_XDECREF(tmp);\
+    } while (0)
+#define __Pyx_DECREF_SET(r, v) do {\
+        PyObject *tmp = (PyObject *) r;\
+        r = v; __Pyx_DECREF(tmp);\
+    } while (0)
+#define __Pyx_CLEAR(r)    do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
+#define __Pyx_XCLEAR(r)   do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
+
+/* PyErrExceptionMatches.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyErr_ExceptionMatches(err) __Pyx_PyErr_ExceptionMatchesInState(__pyx_tstate, err)
+static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err);
+#else
+#define __Pyx_PyErr_ExceptionMatches(err)  PyErr_ExceptionMatches(err)
+#endif
+
+/* PyThreadStateGet.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyThreadState_declare  PyThreadState *__pyx_tstate;
+#define __Pyx_PyThreadState_assign  __pyx_tstate = __Pyx_PyThreadState_Current;
+#if PY_VERSION_HEX >= 0x030C00A6
+#define __Pyx_PyErr_Occurred()  (__pyx_tstate->current_exception != NULL)
+#define __Pyx_PyErr_CurrentExceptionType()  (__pyx_tstate->current_exception ? (PyObject*) Py_TYPE(__pyx_tstate->current_exception) : (PyObject*) NULL)
+#else
+#define __Pyx_PyErr_Occurred()  (__pyx_tstate->curexc_type != NULL)
+#define __Pyx_PyErr_CurrentExceptionType()  (__pyx_tstate->curexc_type)
+#endif
+#else
+#define __Pyx_PyThreadState_declare
+#define __Pyx_PyThreadState_assign
+#define __Pyx_PyErr_Occurred()  (PyErr_Occurred() != NULL)
+#define __Pyx_PyErr_CurrentExceptionType()  PyErr_Occurred()
+#endif
+
+/* PyErrFetchRestore.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_PyErr_Clear() __Pyx_ErrRestore(NULL, NULL, NULL)
+#define __Pyx_ErrRestoreWithState(type, value, tb)  __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb)
+#define __Pyx_ErrFetchWithState(type, value, tb)    __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb)
+#define __Pyx_ErrRestore(type, value, tb)  __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb)
+#define __Pyx_ErrFetch(type, value, tb)    __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
+static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A6
+#define __Pyx_PyErr_SetNone(exc) (Py_INCREF(exc), __Pyx_ErrRestore((exc), NULL, NULL))
+#else
+#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
+#endif
+#else
+#define __Pyx_PyErr_Clear() PyErr_Clear()
+#define __Pyx_PyErr_SetNone(exc) PyErr_SetNone(exc)
+#define __Pyx_ErrRestoreWithState(type, value, tb)  PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetchWithState(type, value, tb)  PyErr_Fetch(type, value, tb)
+#define __Pyx_ErrRestoreInState(tstate, type, value, tb)  PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetchInState(tstate, type, value, tb)  PyErr_Fetch(type, value, tb)
+#define __Pyx_ErrRestore(type, value, tb)  PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetch(type, value, tb)  PyErr_Fetch(type, value, tb)
+#endif
+
+/* PyObjectGetAttrStr.proto */
+#if CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name);
+#else
+#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n)
+#endif
+
+/* PyObjectGetAttrStrNoError.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name);
+
+/* GetBuiltinName.proto */
+static PyObject *__Pyx_GetBuiltinName(PyObject *name);
+
+/* GetTopmostException.proto */
+#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE
+static _PyErr_StackItem * __Pyx_PyErr_GetTopmostException(PyThreadState *tstate);
+#endif
+
+/* SaveResetException.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_ExceptionSave(type, value, tb)  __Pyx__ExceptionSave(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#define __Pyx_ExceptionReset(type, value, tb)  __Pyx__ExceptionReset(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
+#else
+#define __Pyx_ExceptionSave(type, value, tb)   PyErr_GetExcInfo(type, value, tb)
+#define __Pyx_ExceptionReset(type, value, tb)  PyErr_SetExcInfo(type, value, tb)
+#endif
+
+/* GetException.proto */
+#if CYTHON_FAST_THREAD_STATE
+#define __Pyx_GetException(type, value, tb)  __Pyx__GetException(__pyx_tstate, type, value, tb)
+static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#else
+static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb);
+#endif
+
+/* PyObjectCall.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw);
+#else
+#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw)
+#endif
+
+/* RaiseException.proto */
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause);
+
+/* TupleAndListFromArray.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n);
+static CYTHON_INLINE PyObject* __Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n);
+#endif
+
+/* IncludeStringH.proto */
+#include <string.h>
+
+/* BytesEquals.proto */
+static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals);
+
+/* UnicodeEquals.proto */
+static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals);
+
+/* fastcall.proto */
+#if CYTHON_AVOID_BORROWED_REFS
+    #define __Pyx_Arg_VARARGS(args, i) PySequence_GetItem(args, i)
+#elif CYTHON_ASSUME_SAFE_MACROS
+    #define __Pyx_Arg_VARARGS(args, i) PyTuple_GET_ITEM(args, i)
+#else
+    #define __Pyx_Arg_VARARGS(args, i) PyTuple_GetItem(args, i)
+#endif
+#if CYTHON_AVOID_BORROWED_REFS
+    #define __Pyx_Arg_NewRef_VARARGS(arg) __Pyx_NewRef(arg)
+    #define __Pyx_Arg_XDECREF_VARARGS(arg) Py_XDECREF(arg)
+#else
+    #define __Pyx_Arg_NewRef_VARARGS(arg) arg
+    #define __Pyx_Arg_XDECREF_VARARGS(arg)
+#endif
+#define __Pyx_NumKwargs_VARARGS(kwds) PyDict_Size(kwds)
+#define __Pyx_KwValues_VARARGS(args, nargs) NULL
+#define __Pyx_GetKwValue_VARARGS(kw, kwvalues, s) __Pyx_PyDict_GetItemStrWithError(kw, s)
+#define __Pyx_KwargsAsDict_VARARGS(kw, kwvalues) PyDict_Copy(kw)
+#if CYTHON_METH_FASTCALL
+    #define __Pyx_Arg_FASTCALL(args, i) args[i]
+    #define __Pyx_NumKwargs_FASTCALL(kwds) PyTuple_GET_SIZE(kwds)
+    #define __Pyx_KwValues_FASTCALL(args, nargs) ((args) + (nargs))
+    static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s);
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000
+    CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues);
+  #else
+    #define __Pyx_KwargsAsDict_FASTCALL(kw, kwvalues) _PyStack_AsDict(kwvalues, kw)
+  #endif
+    #define __Pyx_Arg_NewRef_FASTCALL(arg) arg  /* no-op, __Pyx_Arg_FASTCALL is direct and this needs
+                                                   to have the same reference counting */
+    #define __Pyx_Arg_XDECREF_FASTCALL(arg)
+#else
+    #define __Pyx_Arg_FASTCALL __Pyx_Arg_VARARGS
+    #define __Pyx_NumKwargs_FASTCALL __Pyx_NumKwargs_VARARGS
+    #define __Pyx_KwValues_FASTCALL __Pyx_KwValues_VARARGS
+    #define __Pyx_GetKwValue_FASTCALL __Pyx_GetKwValue_VARARGS
+    #define __Pyx_KwargsAsDict_FASTCALL __Pyx_KwargsAsDict_VARARGS
+    #define __Pyx_Arg_NewRef_FASTCALL(arg) __Pyx_Arg_NewRef_VARARGS(arg)
+    #define __Pyx_Arg_XDECREF_FASTCALL(arg) __Pyx_Arg_XDECREF_VARARGS(arg)
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+#define __Pyx_ArgsSlice_VARARGS(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_VARARGS(args, start), stop - start)
+#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) __Pyx_PyTuple_FromArray(&__Pyx_Arg_FASTCALL(args, start), stop - start)
+#else
+#define __Pyx_ArgsSlice_VARARGS(args, start, stop) PyTuple_GetSlice(args, start, stop)
+#define __Pyx_ArgsSlice_FASTCALL(args, start, stop) PyTuple_GetSlice(args, start, stop)
+#endif
+
+/* RaiseArgTupleInvalid.proto */
+static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
+    Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found);
+
+/* RaiseDoubleKeywords.proto */
+static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name);
+
+/* ParseKeywords.proto */
+static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject *const *kwvalues,
+    PyObject **argnames[],
+    PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,
+    const char* function_name);
+
+/* ArgTypeTest.proto */
+#define __Pyx_ArgTypeTest(obj, type, none_allowed, name, exact)\
+    ((likely(__Pyx_IS_TYPE(obj, type) | (none_allowed && (obj == Py_None)))) ? 1 :\
+        __Pyx__ArgTypeTest(obj, type, name, exact))
+static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact);
+
+/* IsLittleEndian.proto */
+static CYTHON_INLINE int __Pyx_Is_Little_Endian(void);
+
+/* BufferFormatCheck.proto */
+static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts);
+static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
+                              __Pyx_BufFmt_StackElem* stack,
+                              __Pyx_TypeInfo* type);
+
+/* BufferGetAndValidate.proto */
+#define __Pyx_GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack)\
+    ((obj == Py_None || obj == NULL) ?\
+    (__Pyx_ZeroBuffer(buf), 0) :\
+    __Pyx__GetBufferAndValidate(buf, obj, dtype, flags, nd, cast, stack))
+static int  __Pyx__GetBufferAndValidate(Py_buffer* buf, PyObject* obj,
+    __Pyx_TypeInfo* dtype, int flags, int nd, int cast, __Pyx_BufFmt_StackElem* stack);
+static void __Pyx_ZeroBuffer(Py_buffer* buf);
+static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info);
+static Py_ssize_t __Pyx_minusones[] = { -1, -1, -1, -1, -1, -1, -1, -1 };
+static Py_ssize_t __Pyx_zeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+/* PyDictVersioning.proto */
+#if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS
+#define __PYX_DICT_VERSION_INIT  ((PY_UINT64_T) -1)
+#define __PYX_GET_DICT_VERSION(dict)  (((PyDictObject*)(dict))->ma_version_tag)
+#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)\
+    (version_var) = __PYX_GET_DICT_VERSION(dict);\
+    (cache_var) = (value);
+#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP) {\
+    static PY_UINT64_T __pyx_dict_version = 0;\
+    static PyObject *__pyx_dict_cached_value = NULL;\
+    if (likely(__PYX_GET_DICT_VERSION(DICT) == __pyx_dict_version)) {\
+        (VAR) = __pyx_dict_cached_value;\
+    } else {\
+        (VAR) = __pyx_dict_cached_value = (LOOKUP);\
+        __pyx_dict_version = __PYX_GET_DICT_VERSION(DICT);\
+    }\
+}
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj);
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj);
+static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version);
+#else
+#define __PYX_GET_DICT_VERSION(dict)  (0)
+#define __PYX_UPDATE_DICT_CACHE(dict, value, cache_var, version_var)
+#define __PYX_PY_DICT_LOOKUP_IF_MODIFIED(VAR, DICT, LOOKUP)  (VAR) = (LOOKUP);
+#endif
+
+/* GetModuleGlobalName.proto */
+#if CYTHON_USE_DICT_VERSIONS
+#define __Pyx_GetModuleGlobalName(var, name)  do {\
+    static PY_UINT64_T __pyx_dict_version = 0;\
+    static PyObject *__pyx_dict_cached_value = NULL;\
+    (var) = (likely(__pyx_dict_version == __PYX_GET_DICT_VERSION(__pyx_d))) ?\
+        (likely(__pyx_dict_cached_value) ? __Pyx_NewRef(__pyx_dict_cached_value) : __Pyx_GetBuiltinName(name)) :\
+        __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
+} while(0)
+#define __Pyx_GetModuleGlobalNameUncached(var, name)  do {\
+    PY_UINT64_T __pyx_dict_version;\
+    PyObject *__pyx_dict_cached_value;\
+    (var) = __Pyx__GetModuleGlobalName(name, &__pyx_dict_version, &__pyx_dict_cached_value);\
+} while(0)
+static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value);
+#else
+#define __Pyx_GetModuleGlobalName(var, name)  (var) = __Pyx__GetModuleGlobalName(name)
+#define __Pyx_GetModuleGlobalNameUncached(var, name)  (var) = __Pyx__GetModuleGlobalName(name)
+static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name);
+#endif
+
+/* ExtTypeTest.proto */
+static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type);
+
+/* BufferIndexError.proto */
+static void __Pyx_RaiseBufferIndexError(int axis);
+
+#define __Pyx_BufPtrStrided2d(type, buf, i0, s0, i1, s1) (type)((char*)buf + i0 * s0 + i1 * s1)
+/* GetItemInt.proto */
+#define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
+    __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) :\
+    (is_list ? (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL) :\
+               __Pyx_GetItemInt_Generic(o, to_py_func(i))))
+#define __Pyx_GetItemInt_List(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
+    __Pyx_GetItemInt_List_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\
+    (PyErr_SetString(PyExc_IndexError, "list index out of range"), (PyObject*)NULL))
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i,
+                                                              int wraparound, int boundscheck);
+#define __Pyx_GetItemInt_Tuple(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
+    __Pyx_GetItemInt_Tuple_Fast(o, (Py_ssize_t)i, wraparound, boundscheck) :\
+    (PyErr_SetString(PyExc_IndexError, "tuple index out of range"), (PyObject*)NULL))
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i,
+                                                              int wraparound, int boundscheck);
+static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j);
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i,
+                                                     int is_list, int wraparound, int boundscheck);
+
+/* PyFunctionFastCall.proto */
+#if CYTHON_FAST_PYCALL
+#if !CYTHON_VECTORCALL
+#define __Pyx_PyFunction_FastCall(func, args, nargs)\
+    __Pyx_PyFunction_FastCallDict((func), (args), (nargs), NULL)
+static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs);
+#endif
+#define __Pyx_BUILD_ASSERT_EXPR(cond)\
+    (sizeof(char [1 - 2*!(cond)]) - 1)
+#ifndef Py_MEMBER_SIZE
+#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member)
+#endif
+#if !CYTHON_VECTORCALL
+#if PY_VERSION_HEX >= 0x03080000
+  #include "frameobject.h"
+#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API
+  #ifndef Py_BUILD_CORE
+    #define Py_BUILD_CORE 1
+  #endif
+  #include "internal/pycore_frame.h"
+#endif
+  #define __Pxy_PyFrame_Initialize_Offsets()
+  #define __Pyx_PyFrame_GetLocalsplus(frame)  ((frame)->f_localsplus)
+#else
+  static size_t __pyx_pyframe_localsplus_offset = 0;
+  #include "frameobject.h"
+  #define __Pxy_PyFrame_Initialize_Offsets()\
+    ((void)__Pyx_BUILD_ASSERT_EXPR(sizeof(PyFrameObject) == offsetof(PyFrameObject, f_localsplus) + Py_MEMBER_SIZE(PyFrameObject, f_localsplus)),\
+     (void)(__pyx_pyframe_localsplus_offset = ((size_t)PyFrame_Type.tp_basicsize) - Py_MEMBER_SIZE(PyFrameObject, f_localsplus)))
+  #define __Pyx_PyFrame_GetLocalsplus(frame)\
+    (assert(__pyx_pyframe_localsplus_offset), (PyObject **)(((char *)(frame)) + __pyx_pyframe_localsplus_offset))
+#endif
+#endif
+#endif
+
+/* PyObjectCallMethO.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg);
+#endif
+
+/* PyObjectFastCall.proto */
+#define __Pyx_PyObject_FastCall(func, args, nargs)  __Pyx_PyObject_FastCallDict(func, args, (size_t)(nargs), NULL)
+static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs);
+
+/* PyObjectCallOneArg.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg);
+
+/* ObjectGetItem.proto */
+#if CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject *key);
+#else
+#define __Pyx_PyObject_GetItem(obj, key)  PyObject_GetItem(obj, key)
+#endif
+
+/* BufferFallbackError.proto */
+static void __Pyx_RaiseBufferFallbackError(void);
+
+#define __Pyx_BufPtrStrided1d(type, buf, i0, s0) (type)((char*)buf + i0 * s0)
+/* PyIntBinop.proto */
+#if !CYTHON_COMPILING_IN_PYPY
+static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, long intval, int inplace, int zerodivision_check);
+#else
+#define __Pyx_PyInt_AddObjC(op1, op2, intval, inplace, zerodivision_check)\
+    (inplace ? PyNumber_InPlaceAdd(op1, op2) : PyNumber_Add(op1, op2))
+#endif
+
+/* SliceObject.proto */
+#define __Pyx_PyObject_DelSlice(obj, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound)\
+    __Pyx_PyObject_SetSlice(obj, (PyObject*)NULL, cstart, cstop, py_start, py_stop, py_slice, has_cstart, has_cstop, wraparound)
+static CYTHON_INLINE int __Pyx_PyObject_SetSlice(
+        PyObject* obj, PyObject* value, Py_ssize_t cstart, Py_ssize_t cstop,
+        PyObject** py_start, PyObject** py_stop, PyObject** py_slice,
+        int has_cstart, int has_cstop, int wraparound);
+
+/* SetItemInt.proto */
+#define __Pyx_SetItemInt(o, i, v, type, is_signed, to_py_func, is_list, wraparound, boundscheck)\
+    (__Pyx_fits_Py_ssize_t(i, type, is_signed) ?\
+    __Pyx_SetItemInt_Fast(o, (Py_ssize_t)i, v, is_list, wraparound, boundscheck) :\
+    (is_list ? (PyErr_SetString(PyExc_IndexError, "list assignment index out of range"), -1) :\
+               __Pyx_SetItemInt_Generic(o, to_py_func(i), v)))
+static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v);
+static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v,
+                                               int is_list, int wraparound, int boundscheck);
+
+/* TypeImport.proto */
+#ifndef __PYX_HAVE_RT_ImportType_proto_3_0_11
+#define __PYX_HAVE_RT_ImportType_proto_3_0_11
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L
+#include <stdalign.h>
+#endif
+#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) || __cplusplus >= 201103L
+#define __PYX_GET_STRUCT_ALIGNMENT_3_0_11(s) alignof(s)
+#else
+#define __PYX_GET_STRUCT_ALIGNMENT_3_0_11(s) sizeof(void*)
+#endif
+enum __Pyx_ImportType_CheckSize_3_0_11 {
+   __Pyx_ImportType_CheckSize_Error_3_0_11 = 0,
+   __Pyx_ImportType_CheckSize_Warn_3_0_11 = 1,
+   __Pyx_ImportType_CheckSize_Ignore_3_0_11 = 2
+};
+static PyTypeObject *__Pyx_ImportType_3_0_11(PyObject* module, const char *module_name, const char *class_name, size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_0_11 check_size);
+#endif
+
+/* Import.proto */
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level);
+
+/* ImportDottedModule.proto */
+static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple);
+#if PY_MAJOR_VERSION >= 3
+static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple);
+#endif
+
+/* IncludeStructmemberH.proto */
+#include <structmember.h>
+
+/* FixUpExtensionType.proto */
+#if CYTHON_USE_TYPE_SPECS
+static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type);
+#endif
+
+/* FetchSharedCythonModule.proto */
+static PyObject *__Pyx_FetchSharedCythonABIModule(void);
+
+/* FetchCommonType.proto */
+#if !CYTHON_USE_TYPE_SPECS
+static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type);
+#else
+static PyTypeObject* __Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases);
+#endif
+
+/* PyMethodNew.proto */
+#if CYTHON_COMPILING_IN_LIMITED_API
+static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) {
+    PyObject *typesModule=NULL, *methodType=NULL, *result=NULL;
+    CYTHON_UNUSED_VAR(typ);
+    if (!self)
+        return __Pyx_NewRef(func);
+    typesModule = PyImport_ImportModule("types");
+    if (!typesModule) return NULL;
+    methodType = PyObject_GetAttrString(typesModule, "MethodType");
+    Py_DECREF(typesModule);
+    if (!methodType) return NULL;
+    result = PyObject_CallFunctionObjArgs(methodType, func, self, NULL);
+    Py_DECREF(methodType);
+    return result;
+}
+#elif PY_MAJOR_VERSION >= 3
+static PyObject *__Pyx_PyMethod_New(PyObject *func, PyObject *self, PyObject *typ) {
+    CYTHON_UNUSED_VAR(typ);
+    if (!self)
+        return __Pyx_NewRef(func);
+    return PyMethod_New(func, self);
+}
+#else
+    #define __Pyx_PyMethod_New PyMethod_New
+#endif
+
+/* PyVectorcallFastCallDict.proto */
+#if CYTHON_METH_FASTCALL
+static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw);
+#endif
+
+/* CythonFunctionShared.proto */
+#define __Pyx_CyFunction_USED
+#define __Pyx_CYFUNCTION_STATICMETHOD  0x01
+#define __Pyx_CYFUNCTION_CLASSMETHOD   0x02
+#define __Pyx_CYFUNCTION_CCLASS        0x04
+#define __Pyx_CYFUNCTION_COROUTINE     0x08
+#define __Pyx_CyFunction_GetClosure(f)\
+    (((__pyx_CyFunctionObject *) (f))->func_closure)
+#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API
+  #define __Pyx_CyFunction_GetClassObj(f)\
+      (((__pyx_CyFunctionObject *) (f))->func_classobj)
+#else
+  #define __Pyx_CyFunction_GetClassObj(f)\
+      ((PyObject*) ((PyCMethodObject *) (f))->mm_class)
+#endif
+#define __Pyx_CyFunction_SetClassObj(f, classobj)\
+    __Pyx__CyFunction_SetClassObj((__pyx_CyFunctionObject *) (f), (classobj))
+#define __Pyx_CyFunction_Defaults(type, f)\
+    ((type *)(((__pyx_CyFunctionObject *) (f))->defaults))
+#define __Pyx_CyFunction_SetDefaultsGetter(f, g)\
+    ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g)
+typedef struct {
+#if CYTHON_COMPILING_IN_LIMITED_API
+    PyObject_HEAD
+    PyObject *func;
+#elif PY_VERSION_HEX < 0x030900B1
+    PyCFunctionObject func;
+#else
+    PyCMethodObject func;
+#endif
+#if CYTHON_BACKPORT_VECTORCALL
+    __pyx_vectorcallfunc func_vectorcall;
+#endif
+#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API
+    PyObject *func_weakreflist;
+#endif
+    PyObject *func_dict;
+    PyObject *func_name;
+    PyObject *func_qualname;
+    PyObject *func_doc;
+    PyObject *func_globals;
+    PyObject *func_code;
+    PyObject *func_closure;
+#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API
+    PyObject *func_classobj;
+#endif
+    void *defaults;
+    int defaults_pyobjects;
+    size_t defaults_size;
+    int flags;
+    PyObject *defaults_tuple;
+    PyObject *defaults_kwdict;
+    PyObject *(*defaults_getter)(PyObject *);
+    PyObject *func_annotations;
+    PyObject *func_is_coroutine;
+} __pyx_CyFunctionObject;
+#undef __Pyx_CyOrPyCFunction_Check
+#define __Pyx_CyFunction_Check(obj)  __Pyx_TypeCheck(obj, __pyx_CyFunctionType)
+#define __Pyx_CyOrPyCFunction_Check(obj)  __Pyx_TypeCheck2(obj, __pyx_CyFunctionType, &PyCFunction_Type)
+#define __Pyx_CyFunction_CheckExact(obj)  __Pyx_IS_TYPE(obj, __pyx_CyFunctionType)
+static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc);
+#undef __Pyx_IsSameCFunction
+#define __Pyx_IsSameCFunction(func, cfunc)   __Pyx__IsSameCyOrCFunction(func, cfunc)
+static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml,
+                                      int flags, PyObject* qualname,
+                                      PyObject *closure,
+                                      PyObject *module, PyObject *globals,
+                                      PyObject* code);
+static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj);
+static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m,
+                                                         size_t size,
+                                                         int pyobjects);
+static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m,
+                                                            PyObject *tuple);
+static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *m,
+                                                             PyObject *dict);
+static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m,
+                                                              PyObject *dict);
+static int __pyx_CyFunction_init(PyObject *module);
+#if CYTHON_METH_FASTCALL
+static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames);
+static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames);
+static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames);
+static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames);
+#if CYTHON_BACKPORT_VECTORCALL
+#define __Pyx_CyFunction_func_vectorcall(f) (((__pyx_CyFunctionObject*)f)->func_vectorcall)
+#else
+#define __Pyx_CyFunction_func_vectorcall(f) (((PyCFunctionObject*)f)->vectorcall)
+#endif
+#endif
+
+/* CythonFunction.proto */
+static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml,
+                                      int flags, PyObject* qualname,
+                                      PyObject *closure,
+                                      PyObject *module, PyObject *globals,
+                                      PyObject* code);
+
+/* CLineInTraceback.proto */
+#ifdef CYTHON_CLINE_IN_TRACEBACK
+#define __Pyx_CLineForTraceback(tstate, c_line)  (((CYTHON_CLINE_IN_TRACEBACK)) ? c_line : 0)
+#else
+static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line);
+#endif
+
+/* CodeObjectCache.proto */
+#if !CYTHON_COMPILING_IN_LIMITED_API
+typedef struct {
+    PyCodeObject* code_object;
+    int code_line;
+} __Pyx_CodeObjectCacheEntry;
+struct __Pyx_CodeObjectCache {
+    int count;
+    int max_count;
+    __Pyx_CodeObjectCacheEntry* entries;
+};
+static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
+static PyCodeObject *__pyx_find_code_object(int code_line);
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
+#endif
+
+/* AddTraceback.proto */
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+                               int py_line, const char *filename);
+
+/* BufferStructDeclare.proto */
+typedef struct {
+  Py_ssize_t shape, strides, suboffsets;
+} __Pyx_Buf_DimInfo;
+typedef struct {
+  size_t refcount;
+  Py_buffer pybuffer;
+} __Pyx_Buffer;
+typedef struct {
+  __Pyx_Buffer *rcbuffer;
+  char *data;
+  __Pyx_Buf_DimInfo diminfo[8];
+} __Pyx_LocalBuf_ND;
+
+#if PY_MAJOR_VERSION < 3
+    static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags);
+    static void __Pyx_ReleaseBuffer(Py_buffer *view);
+#else
+    #define __Pyx_GetBuffer PyObject_GetBuffer
+    #define __Pyx_ReleaseBuffer PyBuffer_Release
+#endif
+
+
+/* GCCDiagnostics.proto */
+#if !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
+#define __Pyx_HAS_GCC_DIAGNOSTIC
+#endif
+
+/* RealImag.proto */
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    #define __Pyx_CREAL(z) ((z).real())
+    #define __Pyx_CIMAG(z) ((z).imag())
+  #else
+    #define __Pyx_CREAL(z) (__real__(z))
+    #define __Pyx_CIMAG(z) (__imag__(z))
+  #endif
+#else
+    #define __Pyx_CREAL(z) ((z).real)
+    #define __Pyx_CIMAG(z) ((z).imag)
+#endif
+#if defined(__cplusplus) && CYTHON_CCOMPLEX\
+        && (defined(_WIN32) || defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 5 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )) || __cplusplus >= 201103)
+    #define __Pyx_SET_CREAL(z,x) ((z).real(x))
+    #define __Pyx_SET_CIMAG(z,y) ((z).imag(y))
+#else
+    #define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x)
+    #define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y)
+#endif
+
+/* Arithmetic.proto */
+#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus)
+    #define __Pyx_c_eq_float(a, b)   ((a)==(b))
+    #define __Pyx_c_sum_float(a, b)  ((a)+(b))
+    #define __Pyx_c_diff_float(a, b) ((a)-(b))
+    #define __Pyx_c_prod_float(a, b) ((a)*(b))
+    #define __Pyx_c_quot_float(a, b) ((a)/(b))
+    #define __Pyx_c_neg_float(a)     (-(a))
+  #ifdef __cplusplus
+    #define __Pyx_c_is_zero_float(z) ((z)==(float)0)
+    #define __Pyx_c_conj_float(z)    (::std::conj(z))
+    #if 1
+        #define __Pyx_c_abs_float(z)     (::std::abs(z))
+        #define __Pyx_c_pow_float(a, b)  (::std::pow(a, b))
+    #endif
+  #else
+    #define __Pyx_c_is_zero_float(z) ((z)==0)
+    #define __Pyx_c_conj_float(z)    (conjf(z))
+    #if 1
+        #define __Pyx_c_abs_float(z)     (cabsf(z))
+        #define __Pyx_c_pow_float(a, b)  (cpowf(a, b))
+    #endif
+ #endif
+#else
+    static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex);
+    static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex);
+    #if 1
+        static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex);
+        static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex, __pyx_t_float_complex);
+    #endif
+#endif
+
+/* Arithmetic.proto */
+#if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus)
+    #define __Pyx_c_eq_double(a, b)   ((a)==(b))
+    #define __Pyx_c_sum_double(a, b)  ((a)+(b))
+    #define __Pyx_c_diff_double(a, b) ((a)-(b))
+    #define __Pyx_c_prod_double(a, b) ((a)*(b))
+    #define __Pyx_c_quot_double(a, b) ((a)/(b))
+    #define __Pyx_c_neg_double(a)     (-(a))
+  #ifdef __cplusplus
+    #define __Pyx_c_is_zero_double(z) ((z)==(double)0)
+    #define __Pyx_c_conj_double(z)    (::std::conj(z))
+    #if 1
+        #define __Pyx_c_abs_double(z)     (::std::abs(z))
+        #define __Pyx_c_pow_double(a, b)  (::std::pow(a, b))
+    #endif
+  #else
+    #define __Pyx_c_is_zero_double(z) ((z)==0)
+    #define __Pyx_c_conj_double(z)    (conj(z))
+    #if 1
+        #define __Pyx_c_abs_double(z)     (cabs(z))
+        #define __Pyx_c_pow_double(a, b)  (cpow(a, b))
+    #endif
+ #endif
+#else
+    static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex);
+    static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex);
+    #if 1
+        static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex);
+        static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex, __pyx_t_double_complex);
+    #endif
+#endif
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *);
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);
+
+/* FormatTypeName.proto */
+#if CYTHON_COMPILING_IN_LIMITED_API
+typedef PyObject *__Pyx_TypeName;
+#define __Pyx_FMT_TYPENAME "%U"
+static __Pyx_TypeName __Pyx_PyType_GetName(PyTypeObject* tp);
+#define __Pyx_DECREF_TypeName(obj) Py_XDECREF(obj)
+#else
+typedef const char *__Pyx_TypeName;
+#define __Pyx_FMT_TYPENAME "%.200s"
+#define __Pyx_PyType_GetName(tp) ((tp)->tp_name)
+#define __Pyx_DECREF_TypeName(obj)
+#endif
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
+
+/* FastTypeChecks.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_TypeCheck(obj, type) __Pyx_IsSubtype(Py_TYPE(obj), (PyTypeObject *)type)
+#define __Pyx_TypeCheck2(obj, type1, type2) __Pyx_IsAnySubtype2(Py_TYPE(obj), (PyTypeObject *)type1, (PyTypeObject *)type2)
+static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b);
+static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b);
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject *type);
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *type1, PyObject *type2);
+#else
+#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
+#define __Pyx_TypeCheck2(obj, type1, type2) (PyObject_TypeCheck(obj, (PyTypeObject *)type1) || PyObject_TypeCheck(obj, (PyTypeObject *)type2))
+#define __Pyx_PyErr_GivenExceptionMatches(err, type) PyErr_GivenExceptionMatches(err, type)
+#define __Pyx_PyErr_GivenExceptionMatches2(err, type1, type2) (PyErr_GivenExceptionMatches(err, type1) || PyErr_GivenExceptionMatches(err, type2))
+#endif
+#define __Pyx_PyErr_ExceptionMatches2(err1, err2)  __Pyx_PyErr_GivenExceptionMatches2(__Pyx_PyErr_CurrentExceptionType(), err1, err2)
+#define __Pyx_PyException_Check(obj) __Pyx_TypeCheck(obj, PyExc_Exception)
+
+/* CheckBinaryVersion.proto */
+static unsigned long __Pyx_get_runtime_version(void);
+static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer);
+
+/* InitStrings.proto */
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t);
+
+/* #### Code section: module_declarations ### */
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_7ndarray_4base_base(PyArrayObject *__pyx_v_self); /* proto*/
+static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArrayObject *__pyx_v_self); /* proto*/
+static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx_v_self); /* proto*/
+static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObject *__pyx_v_self); /* proto*/
+static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayObject *__pyx_v_self); /* proto*/
+static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject *__pyx_v_self); /* proto*/
+static CYTHON_INLINE char *__pyx_f_5numpy_7ndarray_4data_data(PyArrayObject *__pyx_v_self); /* proto*/
+
+/* Module declarations from "libc.string" */
+
+/* Module declarations from "libc.stdio" */
+
+/* Module declarations from "__builtin__" */
+
+/* Module declarations from "cpython.type" */
+
+/* Module declarations from "cpython" */
+
+/* Module declarations from "cpython.object" */
+
+/* Module declarations from "cpython.ref" */
+
+/* Module declarations from "numpy" */
+
+/* Module declarations from "numpy" */
+
+/* Module declarations from "opencood.utils.box_overlaps" */
+/* #### Code section: typeinfo ### */
+static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t = { "DTYPE_t", NULL, sizeof(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t), { 0 }, 0, 'R', 0, 0 };
+static __Pyx_TypeInfo __Pyx_TypeInfo_float = { "float", NULL, sizeof(float), { 0 }, 0, 'R', 0, 0 };
+/* #### Code section: before_global_var ### */
+#define __Pyx_MODULE_NAME "opencood.utils.box_overlaps"
+extern int __pyx_module_is_main_opencood__utils__box_overlaps;
+int __pyx_module_is_main_opencood__utils__box_overlaps = 0;
+
+/* Implementation of "opencood.utils.box_overlaps" */
+/* #### Code section: global_var ### */
+static PyObject *__pyx_builtin_range;
+static PyObject *__pyx_builtin_ImportError;
+/* #### Code section: string_decls ### */
+static const char __pyx_k_K[] = "K";
+static const char __pyx_k_M[] = "M";
+static const char __pyx_k_N[] = "N";
+static const char __pyx_k_i[] = "i";
+static const char __pyx_k_k[] = "k";
+static const char __pyx_k_m[] = "m";
+static const char __pyx_k_n[] = "n";
+static const char __pyx_k__6[] = "*";
+static const char __pyx_k_ih[] = "ih";
+static const char __pyx_k_iw[] = "iw";
+static const char __pyx_k_np[] = "np";
+static const char __pyx_k_ov[] = "ov";
+static const char __pyx_k_ua[] = "ua";
+static const char __pyx_k__13[] = "?";
+static const char __pyx_k_bi0[] = "bi0";
+static const char __pyx_k_bi1[] = "bi1";
+static const char __pyx_k_bi2[] = "bi2";
+static const char __pyx_k_bi3[] = "bi3";
+static const char __pyx_k_det[] = "det";
+static const char __pyx_k_bit2[] = "bit2";
+static const char __pyx_k_det2[] = "det2";
+static const char __pyx_k_main[] = "__main__";
+static const char __pyx_k_name[] = "__name__";
+static const char __pyx_k_spec[] = "__spec__";
+static const char __pyx_k_test[] = "__test__";
+static const char __pyx_k_DTYPE[] = "DTYPE";
+static const char __pyx_k_boxes[] = "boxes";
+static const char __pyx_k_dtype[] = "dtype";
+static const char __pyx_k_numpy[] = "numpy";
+static const char __pyx_k_range[] = "range";
+static const char __pyx_k_zeros[] = "zeros";
+static const char __pyx_k_import[] = "__import__";
+static const char __pyx_k_thresh[] = "thresh";
+static const char __pyx_k_acc_box[] = "acc_box";
+static const char __pyx_k_float32[] = "float32";
+static const char __pyx_k_box_area[] = "box_area";
+static const char __pyx_k_box_vote[] = "box_vote";
+static const char __pyx_k_dets_NMS[] = "dets_NMS";
+static const char __pyx_k_dets_all[] = "dets_all";
+static const char __pyx_k_intersec[] = "intersec";
+static const char __pyx_k_overlaps[] = "overlaps";
+static const char __pyx_k_acc_score[] = "acc_score";
+static const char __pyx_k_dets_voted[] = "dets_voted";
+static const char __pyx_k_ImportError[] = "ImportError";
+static const char __pyx_k_query_boxes[] = "query_boxes";
+static const char __pyx_k_initializing[] = "_initializing";
+static const char __pyx_k_is_coroutine[] = "_is_coroutine";
+static const char __pyx_k_bbox_overlaps[] = "bbox_overlaps";
+static const char __pyx_k_class_getitem[] = "__class_getitem__";
+static const char __pyx_k_asyncio_coroutines[] = "asyncio.coroutines";
+static const char __pyx_k_bbox_intersections[] = "bbox_intersections";
+static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback";
+static const char __pyx_k_opencood_utils_box_overlaps[] = "opencood.utils.box_overlaps";
+static const char __pyx_k_numpy_core_multiarray_failed_to[] = "numpy.core.multiarray failed to import";
+static const char __pyx_k_opencood_utils_box_overlaps_pyx[] = "opencood/utils/box_overlaps.pyx";
+static const char __pyx_k_numpy_core_umath_failed_to_impor[] = "numpy.core.umath failed to import";
+/* #### Code section: decls ### */
+static PyObject *__pyx_pf_8opencood_5utils_12box_overlaps_bbox_overlaps(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_boxes, PyArrayObject *__pyx_v_query_boxes); /* proto */
+static PyObject *__pyx_pf_8opencood_5utils_12box_overlaps_2bbox_intersections(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_boxes, PyArrayObject *__pyx_v_query_boxes); /* proto */
+static PyObject *__pyx_pf_8opencood_5utils_12box_overlaps_4box_vote(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_dets_NMS, PyArrayObject *__pyx_v_dets_all); /* proto */
+/* #### Code section: late_includes ### */
+/* #### Code section: module_state ### */
+typedef struct {
+  PyObject *__pyx_d;
+  PyObject *__pyx_b;
+  PyObject *__pyx_cython_runtime;
+  PyObject *__pyx_empty_tuple;
+  PyObject *__pyx_empty_bytes;
+  PyObject *__pyx_empty_unicode;
+  #ifdef __Pyx_CyFunction_USED
+  PyTypeObject *__pyx_CyFunctionType;
+  #endif
+  #ifdef __Pyx_FusedFunction_USED
+  PyTypeObject *__pyx_FusedFunctionType;
+  #endif
+  #ifdef __Pyx_Generator_USED
+  PyTypeObject *__pyx_GeneratorType;
+  #endif
+  #ifdef __Pyx_IterableCoroutine_USED
+  PyTypeObject *__pyx_IterableCoroutineType;
+  #endif
+  #ifdef __Pyx_Coroutine_USED
+  PyTypeObject *__pyx_CoroutineAwaitType;
+  #endif
+  #ifdef __Pyx_Coroutine_USED
+  PyTypeObject *__pyx_CoroutineType;
+  #endif
+  #if CYTHON_USE_MODULE_STATE
+  #endif
+  #if CYTHON_USE_MODULE_STATE
+  #endif
+  #if CYTHON_USE_MODULE_STATE
+  #endif
+  #if CYTHON_USE_MODULE_STATE
+  #endif
+  PyTypeObject *__pyx_ptype_7cpython_4type_type;
+  #if CYTHON_USE_MODULE_STATE
+  #endif
+  #if CYTHON_USE_MODULE_STATE
+  #endif
+  #if CYTHON_USE_MODULE_STATE
+  #endif
+  #if CYTHON_USE_MODULE_STATE
+  #endif
+  #if CYTHON_USE_MODULE_STATE
+  #endif
+  PyTypeObject *__pyx_ptype_5numpy_dtype;
+  PyTypeObject *__pyx_ptype_5numpy_flatiter;
+  PyTypeObject *__pyx_ptype_5numpy_broadcast;
+  PyTypeObject *__pyx_ptype_5numpy_ndarray;
+  PyTypeObject *__pyx_ptype_5numpy_generic;
+  PyTypeObject *__pyx_ptype_5numpy_number;
+  PyTypeObject *__pyx_ptype_5numpy_integer;
+  PyTypeObject *__pyx_ptype_5numpy_signedinteger;
+  PyTypeObject *__pyx_ptype_5numpy_unsignedinteger;
+  PyTypeObject *__pyx_ptype_5numpy_inexact;
+  PyTypeObject *__pyx_ptype_5numpy_floating;
+  PyTypeObject *__pyx_ptype_5numpy_complexfloating;
+  PyTypeObject *__pyx_ptype_5numpy_flexible;
+  PyTypeObject *__pyx_ptype_5numpy_character;
+  PyTypeObject *__pyx_ptype_5numpy_ufunc;
+  #if CYTHON_USE_MODULE_STATE
+  #endif
+  PyObject *__pyx_n_s_DTYPE;
+  PyObject *__pyx_n_s_ImportError;
+  PyObject *__pyx_n_s_K;
+  PyObject *__pyx_n_s_M;
+  PyObject *__pyx_n_s_N;
+  PyObject *__pyx_n_s__13;
+  PyObject *__pyx_n_s__6;
+  PyObject *__pyx_n_s_acc_box;
+  PyObject *__pyx_n_s_acc_score;
+  PyObject *__pyx_n_s_asyncio_coroutines;
+  PyObject *__pyx_n_s_bbox_intersections;
+  PyObject *__pyx_n_s_bbox_overlaps;
+  PyObject *__pyx_n_s_bi0;
+  PyObject *__pyx_n_s_bi1;
+  PyObject *__pyx_n_s_bi2;
+  PyObject *__pyx_n_s_bi3;
+  PyObject *__pyx_n_s_bit2;
+  PyObject *__pyx_n_s_box_area;
+  PyObject *__pyx_n_s_box_vote;
+  PyObject *__pyx_n_s_boxes;
+  PyObject *__pyx_n_s_class_getitem;
+  PyObject *__pyx_n_s_cline_in_traceback;
+  PyObject *__pyx_n_s_det;
+  PyObject *__pyx_n_s_det2;
+  PyObject *__pyx_n_s_dets_NMS;
+  PyObject *__pyx_n_s_dets_all;
+  PyObject *__pyx_n_s_dets_voted;
+  PyObject *__pyx_n_s_dtype;
+  PyObject *__pyx_n_s_float32;
+  PyObject *__pyx_n_s_i;
+  PyObject *__pyx_n_s_ih;
+  PyObject *__pyx_n_s_import;
+  PyObject *__pyx_n_s_initializing;
+  PyObject *__pyx_n_s_intersec;
+  PyObject *__pyx_n_s_is_coroutine;
+  PyObject *__pyx_n_s_iw;
+  PyObject *__pyx_n_s_k;
+  PyObject *__pyx_n_s_m;
+  PyObject *__pyx_n_s_main;
+  PyObject *__pyx_n_s_n;
+  PyObject *__pyx_n_s_name;
+  PyObject *__pyx_n_s_np;
+  PyObject *__pyx_n_s_numpy;
+  PyObject *__pyx_kp_s_numpy_core_multiarray_failed_to;
+  PyObject *__pyx_kp_s_numpy_core_umath_failed_to_impor;
+  PyObject *__pyx_n_s_opencood_utils_box_overlaps;
+  PyObject *__pyx_kp_s_opencood_utils_box_overlaps_pyx;
+  PyObject *__pyx_n_s_ov;
+  PyObject *__pyx_n_s_overlaps;
+  PyObject *__pyx_n_s_query_boxes;
+  PyObject *__pyx_n_s_range;
+  PyObject *__pyx_n_s_spec;
+  PyObject *__pyx_n_s_test;
+  PyObject *__pyx_n_s_thresh;
+  PyObject *__pyx_n_s_ua;
+  PyObject *__pyx_n_s_zeros;
+  PyObject *__pyx_int_0;
+  PyObject *__pyx_int_1;
+  PyObject *__pyx_int_4;
+  PyObject *__pyx_tuple_;
+  PyObject *__pyx_slice__3;
+  PyObject *__pyx_slice__5;
+  PyObject *__pyx_tuple__2;
+  PyObject *__pyx_tuple__4;
+  PyObject *__pyx_tuple__7;
+  PyObject *__pyx_tuple__9;
+  PyObject *__pyx_tuple__11;
+  PyObject *__pyx_codeobj__8;
+  PyObject *__pyx_codeobj__10;
+  PyObject *__pyx_codeobj__12;
+} __pyx_mstate;
+
+#if CYTHON_USE_MODULE_STATE
+#ifdef __cplusplus
+namespace {
+  extern struct PyModuleDef __pyx_moduledef;
+} /* anonymous namespace */
+#else
+static struct PyModuleDef __pyx_moduledef;
+#endif
+
+#define __pyx_mstate(o) ((__pyx_mstate *)__Pyx_PyModule_GetState(o))
+
+#define __pyx_mstate_global (__pyx_mstate(PyState_FindModule(&__pyx_moduledef)))
+
+#define __pyx_m (PyState_FindModule(&__pyx_moduledef))
+#else
+static __pyx_mstate __pyx_mstate_global_static =
+#ifdef __cplusplus
+    {};
+#else
+    {0};
+#endif
+static __pyx_mstate *__pyx_mstate_global = &__pyx_mstate_global_static;
+#endif
+/* #### Code section: module_state_clear ### */
+#if CYTHON_USE_MODULE_STATE
+static int __pyx_m_clear(PyObject *m) {
+  __pyx_mstate *clear_module_state = __pyx_mstate(m);
+  if (!clear_module_state) return 0;
+  Py_CLEAR(clear_module_state->__pyx_d);
+  Py_CLEAR(clear_module_state->__pyx_b);
+  Py_CLEAR(clear_module_state->__pyx_cython_runtime);
+  Py_CLEAR(clear_module_state->__pyx_empty_tuple);
+  Py_CLEAR(clear_module_state->__pyx_empty_bytes);
+  Py_CLEAR(clear_module_state->__pyx_empty_unicode);
+  #ifdef __Pyx_CyFunction_USED
+  Py_CLEAR(clear_module_state->__pyx_CyFunctionType);
+  #endif
+  #ifdef __Pyx_FusedFunction_USED
+  Py_CLEAR(clear_module_state->__pyx_FusedFunctionType);
+  #endif
+  Py_CLEAR(clear_module_state->__pyx_ptype_7cpython_4type_type);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_dtype);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_flatiter);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_broadcast);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_ndarray);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_generic);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_number);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_integer);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_signedinteger);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_unsignedinteger);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_inexact);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_floating);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_complexfloating);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_flexible);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_character);
+  Py_CLEAR(clear_module_state->__pyx_ptype_5numpy_ufunc);
+  Py_CLEAR(clear_module_state->__pyx_n_s_DTYPE);
+  Py_CLEAR(clear_module_state->__pyx_n_s_ImportError);
+  Py_CLEAR(clear_module_state->__pyx_n_s_K);
+  Py_CLEAR(clear_module_state->__pyx_n_s_M);
+  Py_CLEAR(clear_module_state->__pyx_n_s_N);
+  Py_CLEAR(clear_module_state->__pyx_n_s__13);
+  Py_CLEAR(clear_module_state->__pyx_n_s__6);
+  Py_CLEAR(clear_module_state->__pyx_n_s_acc_box);
+  Py_CLEAR(clear_module_state->__pyx_n_s_acc_score);
+  Py_CLEAR(clear_module_state->__pyx_n_s_asyncio_coroutines);
+  Py_CLEAR(clear_module_state->__pyx_n_s_bbox_intersections);
+  Py_CLEAR(clear_module_state->__pyx_n_s_bbox_overlaps);
+  Py_CLEAR(clear_module_state->__pyx_n_s_bi0);
+  Py_CLEAR(clear_module_state->__pyx_n_s_bi1);
+  Py_CLEAR(clear_module_state->__pyx_n_s_bi2);
+  Py_CLEAR(clear_module_state->__pyx_n_s_bi3);
+  Py_CLEAR(clear_module_state->__pyx_n_s_bit2);
+  Py_CLEAR(clear_module_state->__pyx_n_s_box_area);
+  Py_CLEAR(clear_module_state->__pyx_n_s_box_vote);
+  Py_CLEAR(clear_module_state->__pyx_n_s_boxes);
+  Py_CLEAR(clear_module_state->__pyx_n_s_class_getitem);
+  Py_CLEAR(clear_module_state->__pyx_n_s_cline_in_traceback);
+  Py_CLEAR(clear_module_state->__pyx_n_s_det);
+  Py_CLEAR(clear_module_state->__pyx_n_s_det2);
+  Py_CLEAR(clear_module_state->__pyx_n_s_dets_NMS);
+  Py_CLEAR(clear_module_state->__pyx_n_s_dets_all);
+  Py_CLEAR(clear_module_state->__pyx_n_s_dets_voted);
+  Py_CLEAR(clear_module_state->__pyx_n_s_dtype);
+  Py_CLEAR(clear_module_state->__pyx_n_s_float32);
+  Py_CLEAR(clear_module_state->__pyx_n_s_i);
+  Py_CLEAR(clear_module_state->__pyx_n_s_ih);
+  Py_CLEAR(clear_module_state->__pyx_n_s_import);
+  Py_CLEAR(clear_module_state->__pyx_n_s_initializing);
+  Py_CLEAR(clear_module_state->__pyx_n_s_intersec);
+  Py_CLEAR(clear_module_state->__pyx_n_s_is_coroutine);
+  Py_CLEAR(clear_module_state->__pyx_n_s_iw);
+  Py_CLEAR(clear_module_state->__pyx_n_s_k);
+  Py_CLEAR(clear_module_state->__pyx_n_s_m);
+  Py_CLEAR(clear_module_state->__pyx_n_s_main);
+  Py_CLEAR(clear_module_state->__pyx_n_s_n);
+  Py_CLEAR(clear_module_state->__pyx_n_s_name);
+  Py_CLEAR(clear_module_state->__pyx_n_s_np);
+  Py_CLEAR(clear_module_state->__pyx_n_s_numpy);
+  Py_CLEAR(clear_module_state->__pyx_kp_s_numpy_core_multiarray_failed_to);
+  Py_CLEAR(clear_module_state->__pyx_kp_s_numpy_core_umath_failed_to_impor);
+  Py_CLEAR(clear_module_state->__pyx_n_s_opencood_utils_box_overlaps);
+  Py_CLEAR(clear_module_state->__pyx_kp_s_opencood_utils_box_overlaps_pyx);
+  Py_CLEAR(clear_module_state->__pyx_n_s_ov);
+  Py_CLEAR(clear_module_state->__pyx_n_s_overlaps);
+  Py_CLEAR(clear_module_state->__pyx_n_s_query_boxes);
+  Py_CLEAR(clear_module_state->__pyx_n_s_range);
+  Py_CLEAR(clear_module_state->__pyx_n_s_spec);
+  Py_CLEAR(clear_module_state->__pyx_n_s_test);
+  Py_CLEAR(clear_module_state->__pyx_n_s_thresh);
+  Py_CLEAR(clear_module_state->__pyx_n_s_ua);
+  Py_CLEAR(clear_module_state->__pyx_n_s_zeros);
+  Py_CLEAR(clear_module_state->__pyx_int_0);
+  Py_CLEAR(clear_module_state->__pyx_int_1);
+  Py_CLEAR(clear_module_state->__pyx_int_4);
+  Py_CLEAR(clear_module_state->__pyx_tuple_);
+  Py_CLEAR(clear_module_state->__pyx_slice__3);
+  Py_CLEAR(clear_module_state->__pyx_slice__5);
+  Py_CLEAR(clear_module_state->__pyx_tuple__2);
+  Py_CLEAR(clear_module_state->__pyx_tuple__4);
+  Py_CLEAR(clear_module_state->__pyx_tuple__7);
+  Py_CLEAR(clear_module_state->__pyx_tuple__9);
+  Py_CLEAR(clear_module_state->__pyx_tuple__11);
+  Py_CLEAR(clear_module_state->__pyx_codeobj__8);
+  Py_CLEAR(clear_module_state->__pyx_codeobj__10);
+  Py_CLEAR(clear_module_state->__pyx_codeobj__12);
+  return 0;
+}
+#endif
+/* #### Code section: module_state_traverse ### */
+#if CYTHON_USE_MODULE_STATE
+static int __pyx_m_traverse(PyObject *m, visitproc visit, void *arg) {
+  __pyx_mstate *traverse_module_state = __pyx_mstate(m);
+  if (!traverse_module_state) return 0;
+  Py_VISIT(traverse_module_state->__pyx_d);
+  Py_VISIT(traverse_module_state->__pyx_b);
+  Py_VISIT(traverse_module_state->__pyx_cython_runtime);
+  Py_VISIT(traverse_module_state->__pyx_empty_tuple);
+  Py_VISIT(traverse_module_state->__pyx_empty_bytes);
+  Py_VISIT(traverse_module_state->__pyx_empty_unicode);
+  #ifdef __Pyx_CyFunction_USED
+  Py_VISIT(traverse_module_state->__pyx_CyFunctionType);
+  #endif
+  #ifdef __Pyx_FusedFunction_USED
+  Py_VISIT(traverse_module_state->__pyx_FusedFunctionType);
+  #endif
+  Py_VISIT(traverse_module_state->__pyx_ptype_7cpython_4type_type);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_dtype);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_flatiter);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_broadcast);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_ndarray);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_generic);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_number);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_integer);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_signedinteger);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_unsignedinteger);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_inexact);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_floating);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_complexfloating);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_flexible);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_character);
+  Py_VISIT(traverse_module_state->__pyx_ptype_5numpy_ufunc);
+  Py_VISIT(traverse_module_state->__pyx_n_s_DTYPE);
+  Py_VISIT(traverse_module_state->__pyx_n_s_ImportError);
+  Py_VISIT(traverse_module_state->__pyx_n_s_K);
+  Py_VISIT(traverse_module_state->__pyx_n_s_M);
+  Py_VISIT(traverse_module_state->__pyx_n_s_N);
+  Py_VISIT(traverse_module_state->__pyx_n_s__13);
+  Py_VISIT(traverse_module_state->__pyx_n_s__6);
+  Py_VISIT(traverse_module_state->__pyx_n_s_acc_box);
+  Py_VISIT(traverse_module_state->__pyx_n_s_acc_score);
+  Py_VISIT(traverse_module_state->__pyx_n_s_asyncio_coroutines);
+  Py_VISIT(traverse_module_state->__pyx_n_s_bbox_intersections);
+  Py_VISIT(traverse_module_state->__pyx_n_s_bbox_overlaps);
+  Py_VISIT(traverse_module_state->__pyx_n_s_bi0);
+  Py_VISIT(traverse_module_state->__pyx_n_s_bi1);
+  Py_VISIT(traverse_module_state->__pyx_n_s_bi2);
+  Py_VISIT(traverse_module_state->__pyx_n_s_bi3);
+  Py_VISIT(traverse_module_state->__pyx_n_s_bit2);
+  Py_VISIT(traverse_module_state->__pyx_n_s_box_area);
+  Py_VISIT(traverse_module_state->__pyx_n_s_box_vote);
+  Py_VISIT(traverse_module_state->__pyx_n_s_boxes);
+  Py_VISIT(traverse_module_state->__pyx_n_s_class_getitem);
+  Py_VISIT(traverse_module_state->__pyx_n_s_cline_in_traceback);
+  Py_VISIT(traverse_module_state->__pyx_n_s_det);
+  Py_VISIT(traverse_module_state->__pyx_n_s_det2);
+  Py_VISIT(traverse_module_state->__pyx_n_s_dets_NMS);
+  Py_VISIT(traverse_module_state->__pyx_n_s_dets_all);
+  Py_VISIT(traverse_module_state->__pyx_n_s_dets_voted);
+  Py_VISIT(traverse_module_state->__pyx_n_s_dtype);
+  Py_VISIT(traverse_module_state->__pyx_n_s_float32);
+  Py_VISIT(traverse_module_state->__pyx_n_s_i);
+  Py_VISIT(traverse_module_state->__pyx_n_s_ih);
+  Py_VISIT(traverse_module_state->__pyx_n_s_import);
+  Py_VISIT(traverse_module_state->__pyx_n_s_initializing);
+  Py_VISIT(traverse_module_state->__pyx_n_s_intersec);
+  Py_VISIT(traverse_module_state->__pyx_n_s_is_coroutine);
+  Py_VISIT(traverse_module_state->__pyx_n_s_iw);
+  Py_VISIT(traverse_module_state->__pyx_n_s_k);
+  Py_VISIT(traverse_module_state->__pyx_n_s_m);
+  Py_VISIT(traverse_module_state->__pyx_n_s_main);
+  Py_VISIT(traverse_module_state->__pyx_n_s_n);
+  Py_VISIT(traverse_module_state->__pyx_n_s_name);
+  Py_VISIT(traverse_module_state->__pyx_n_s_np);
+  Py_VISIT(traverse_module_state->__pyx_n_s_numpy);
+  Py_VISIT(traverse_module_state->__pyx_kp_s_numpy_core_multiarray_failed_to);
+  Py_VISIT(traverse_module_state->__pyx_kp_s_numpy_core_umath_failed_to_impor);
+  Py_VISIT(traverse_module_state->__pyx_n_s_opencood_utils_box_overlaps);
+  Py_VISIT(traverse_module_state->__pyx_kp_s_opencood_utils_box_overlaps_pyx);
+  Py_VISIT(traverse_module_state->__pyx_n_s_ov);
+  Py_VISIT(traverse_module_state->__pyx_n_s_overlaps);
+  Py_VISIT(traverse_module_state->__pyx_n_s_query_boxes);
+  Py_VISIT(traverse_module_state->__pyx_n_s_range);
+  Py_VISIT(traverse_module_state->__pyx_n_s_spec);
+  Py_VISIT(traverse_module_state->__pyx_n_s_test);
+  Py_VISIT(traverse_module_state->__pyx_n_s_thresh);
+  Py_VISIT(traverse_module_state->__pyx_n_s_ua);
+  Py_VISIT(traverse_module_state->__pyx_n_s_zeros);
+  Py_VISIT(traverse_module_state->__pyx_int_0);
+  Py_VISIT(traverse_module_state->__pyx_int_1);
+  Py_VISIT(traverse_module_state->__pyx_int_4);
+  Py_VISIT(traverse_module_state->__pyx_tuple_);
+  Py_VISIT(traverse_module_state->__pyx_slice__3);
+  Py_VISIT(traverse_module_state->__pyx_slice__5);
+  Py_VISIT(traverse_module_state->__pyx_tuple__2);
+  Py_VISIT(traverse_module_state->__pyx_tuple__4);
+  Py_VISIT(traverse_module_state->__pyx_tuple__7);
+  Py_VISIT(traverse_module_state->__pyx_tuple__9);
+  Py_VISIT(traverse_module_state->__pyx_tuple__11);
+  Py_VISIT(traverse_module_state->__pyx_codeobj__8);
+  Py_VISIT(traverse_module_state->__pyx_codeobj__10);
+  Py_VISIT(traverse_module_state->__pyx_codeobj__12);
+  return 0;
+}
+#endif
+/* #### Code section: module_state_defines ### */
+#define __pyx_d __pyx_mstate_global->__pyx_d
+#define __pyx_b __pyx_mstate_global->__pyx_b
+#define __pyx_cython_runtime __pyx_mstate_global->__pyx_cython_runtime
+#define __pyx_empty_tuple __pyx_mstate_global->__pyx_empty_tuple
+#define __pyx_empty_bytes __pyx_mstate_global->__pyx_empty_bytes
+#define __pyx_empty_unicode __pyx_mstate_global->__pyx_empty_unicode
+#ifdef __Pyx_CyFunction_USED
+#define __pyx_CyFunctionType __pyx_mstate_global->__pyx_CyFunctionType
+#endif
+#ifdef __Pyx_FusedFunction_USED
+#define __pyx_FusedFunctionType __pyx_mstate_global->__pyx_FusedFunctionType
+#endif
+#ifdef __Pyx_Generator_USED
+#define __pyx_GeneratorType __pyx_mstate_global->__pyx_GeneratorType
+#endif
+#ifdef __Pyx_IterableCoroutine_USED
+#define __pyx_IterableCoroutineType __pyx_mstate_global->__pyx_IterableCoroutineType
+#endif
+#ifdef __Pyx_Coroutine_USED
+#define __pyx_CoroutineAwaitType __pyx_mstate_global->__pyx_CoroutineAwaitType
+#endif
+#ifdef __Pyx_Coroutine_USED
+#define __pyx_CoroutineType __pyx_mstate_global->__pyx_CoroutineType
+#endif
+#if CYTHON_USE_MODULE_STATE
+#endif
+#if CYTHON_USE_MODULE_STATE
+#endif
+#if CYTHON_USE_MODULE_STATE
+#endif
+#if CYTHON_USE_MODULE_STATE
+#endif
+#define __pyx_ptype_7cpython_4type_type __pyx_mstate_global->__pyx_ptype_7cpython_4type_type
+#if CYTHON_USE_MODULE_STATE
+#endif
+#if CYTHON_USE_MODULE_STATE
+#endif
+#if CYTHON_USE_MODULE_STATE
+#endif
+#if CYTHON_USE_MODULE_STATE
+#endif
+#if CYTHON_USE_MODULE_STATE
+#endif
+#define __pyx_ptype_5numpy_dtype __pyx_mstate_global->__pyx_ptype_5numpy_dtype
+#define __pyx_ptype_5numpy_flatiter __pyx_mstate_global->__pyx_ptype_5numpy_flatiter
+#define __pyx_ptype_5numpy_broadcast __pyx_mstate_global->__pyx_ptype_5numpy_broadcast
+#define __pyx_ptype_5numpy_ndarray __pyx_mstate_global->__pyx_ptype_5numpy_ndarray
+#define __pyx_ptype_5numpy_generic __pyx_mstate_global->__pyx_ptype_5numpy_generic
+#define __pyx_ptype_5numpy_number __pyx_mstate_global->__pyx_ptype_5numpy_number
+#define __pyx_ptype_5numpy_integer __pyx_mstate_global->__pyx_ptype_5numpy_integer
+#define __pyx_ptype_5numpy_signedinteger __pyx_mstate_global->__pyx_ptype_5numpy_signedinteger
+#define __pyx_ptype_5numpy_unsignedinteger __pyx_mstate_global->__pyx_ptype_5numpy_unsignedinteger
+#define __pyx_ptype_5numpy_inexact __pyx_mstate_global->__pyx_ptype_5numpy_inexact
+#define __pyx_ptype_5numpy_floating __pyx_mstate_global->__pyx_ptype_5numpy_floating
+#define __pyx_ptype_5numpy_complexfloating __pyx_mstate_global->__pyx_ptype_5numpy_complexfloating
+#define __pyx_ptype_5numpy_flexible __pyx_mstate_global->__pyx_ptype_5numpy_flexible
+#define __pyx_ptype_5numpy_character __pyx_mstate_global->__pyx_ptype_5numpy_character
+#define __pyx_ptype_5numpy_ufunc __pyx_mstate_global->__pyx_ptype_5numpy_ufunc
+#if CYTHON_USE_MODULE_STATE
+#endif
+#define __pyx_n_s_DTYPE __pyx_mstate_global->__pyx_n_s_DTYPE
+#define __pyx_n_s_ImportError __pyx_mstate_global->__pyx_n_s_ImportError
+#define __pyx_n_s_K __pyx_mstate_global->__pyx_n_s_K
+#define __pyx_n_s_M __pyx_mstate_global->__pyx_n_s_M
+#define __pyx_n_s_N __pyx_mstate_global->__pyx_n_s_N
+#define __pyx_n_s__13 __pyx_mstate_global->__pyx_n_s__13
+#define __pyx_n_s__6 __pyx_mstate_global->__pyx_n_s__6
+#define __pyx_n_s_acc_box __pyx_mstate_global->__pyx_n_s_acc_box
+#define __pyx_n_s_acc_score __pyx_mstate_global->__pyx_n_s_acc_score
+#define __pyx_n_s_asyncio_coroutines __pyx_mstate_global->__pyx_n_s_asyncio_coroutines
+#define __pyx_n_s_bbox_intersections __pyx_mstate_global->__pyx_n_s_bbox_intersections
+#define __pyx_n_s_bbox_overlaps __pyx_mstate_global->__pyx_n_s_bbox_overlaps
+#define __pyx_n_s_bi0 __pyx_mstate_global->__pyx_n_s_bi0
+#define __pyx_n_s_bi1 __pyx_mstate_global->__pyx_n_s_bi1
+#define __pyx_n_s_bi2 __pyx_mstate_global->__pyx_n_s_bi2
+#define __pyx_n_s_bi3 __pyx_mstate_global->__pyx_n_s_bi3
+#define __pyx_n_s_bit2 __pyx_mstate_global->__pyx_n_s_bit2
+#define __pyx_n_s_box_area __pyx_mstate_global->__pyx_n_s_box_area
+#define __pyx_n_s_box_vote __pyx_mstate_global->__pyx_n_s_box_vote
+#define __pyx_n_s_boxes __pyx_mstate_global->__pyx_n_s_boxes
+#define __pyx_n_s_class_getitem __pyx_mstate_global->__pyx_n_s_class_getitem
+#define __pyx_n_s_cline_in_traceback __pyx_mstate_global->__pyx_n_s_cline_in_traceback
+#define __pyx_n_s_det __pyx_mstate_global->__pyx_n_s_det
+#define __pyx_n_s_det2 __pyx_mstate_global->__pyx_n_s_det2
+#define __pyx_n_s_dets_NMS __pyx_mstate_global->__pyx_n_s_dets_NMS
+#define __pyx_n_s_dets_all __pyx_mstate_global->__pyx_n_s_dets_all
+#define __pyx_n_s_dets_voted __pyx_mstate_global->__pyx_n_s_dets_voted
+#define __pyx_n_s_dtype __pyx_mstate_global->__pyx_n_s_dtype
+#define __pyx_n_s_float32 __pyx_mstate_global->__pyx_n_s_float32
+#define __pyx_n_s_i __pyx_mstate_global->__pyx_n_s_i
+#define __pyx_n_s_ih __pyx_mstate_global->__pyx_n_s_ih
+#define __pyx_n_s_import __pyx_mstate_global->__pyx_n_s_import
+#define __pyx_n_s_initializing __pyx_mstate_global->__pyx_n_s_initializing
+#define __pyx_n_s_intersec __pyx_mstate_global->__pyx_n_s_intersec
+#define __pyx_n_s_is_coroutine __pyx_mstate_global->__pyx_n_s_is_coroutine
+#define __pyx_n_s_iw __pyx_mstate_global->__pyx_n_s_iw
+#define __pyx_n_s_k __pyx_mstate_global->__pyx_n_s_k
+#define __pyx_n_s_m __pyx_mstate_global->__pyx_n_s_m
+#define __pyx_n_s_main __pyx_mstate_global->__pyx_n_s_main
+#define __pyx_n_s_n __pyx_mstate_global->__pyx_n_s_n
+#define __pyx_n_s_name __pyx_mstate_global->__pyx_n_s_name
+#define __pyx_n_s_np __pyx_mstate_global->__pyx_n_s_np
+#define __pyx_n_s_numpy __pyx_mstate_global->__pyx_n_s_numpy
+#define __pyx_kp_s_numpy_core_multiarray_failed_to __pyx_mstate_global->__pyx_kp_s_numpy_core_multiarray_failed_to
+#define __pyx_kp_s_numpy_core_umath_failed_to_impor __pyx_mstate_global->__pyx_kp_s_numpy_core_umath_failed_to_impor
+#define __pyx_n_s_opencood_utils_box_overlaps __pyx_mstate_global->__pyx_n_s_opencood_utils_box_overlaps
+#define __pyx_kp_s_opencood_utils_box_overlaps_pyx __pyx_mstate_global->__pyx_kp_s_opencood_utils_box_overlaps_pyx
+#define __pyx_n_s_ov __pyx_mstate_global->__pyx_n_s_ov
+#define __pyx_n_s_overlaps __pyx_mstate_global->__pyx_n_s_overlaps
+#define __pyx_n_s_query_boxes __pyx_mstate_global->__pyx_n_s_query_boxes
+#define __pyx_n_s_range __pyx_mstate_global->__pyx_n_s_range
+#define __pyx_n_s_spec __pyx_mstate_global->__pyx_n_s_spec
+#define __pyx_n_s_test __pyx_mstate_global->__pyx_n_s_test
+#define __pyx_n_s_thresh __pyx_mstate_global->__pyx_n_s_thresh
+#define __pyx_n_s_ua __pyx_mstate_global->__pyx_n_s_ua
+#define __pyx_n_s_zeros __pyx_mstate_global->__pyx_n_s_zeros
+#define __pyx_int_0 __pyx_mstate_global->__pyx_int_0
+#define __pyx_int_1 __pyx_mstate_global->__pyx_int_1
+#define __pyx_int_4 __pyx_mstate_global->__pyx_int_4
+#define __pyx_tuple_ __pyx_mstate_global->__pyx_tuple_
+#define __pyx_slice__3 __pyx_mstate_global->__pyx_slice__3
+#define __pyx_slice__5 __pyx_mstate_global->__pyx_slice__5
+#define __pyx_tuple__2 __pyx_mstate_global->__pyx_tuple__2
+#define __pyx_tuple__4 __pyx_mstate_global->__pyx_tuple__4
+#define __pyx_tuple__7 __pyx_mstate_global->__pyx_tuple__7
+#define __pyx_tuple__9 __pyx_mstate_global->__pyx_tuple__9
+#define __pyx_tuple__11 __pyx_mstate_global->__pyx_tuple__11
+#define __pyx_codeobj__8 __pyx_mstate_global->__pyx_codeobj__8
+#define __pyx_codeobj__10 __pyx_mstate_global->__pyx_codeobj__10
+#define __pyx_codeobj__12 __pyx_mstate_global->__pyx_codeobj__12
+/* #### Code section: module_code ### */
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":250
+ * 
+ *         @property
+ *         cdef inline PyObject* base(self) nogil:             # <<<<<<<<<<<<<<
+ *             """Returns a borrowed reference to the object owning the data/memory.
+ *             """
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_7ndarray_4base_base(PyArrayObject *__pyx_v_self) {
+  PyObject *__pyx_r;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":253
+ *             """Returns a borrowed reference to the object owning the data/memory.
+ *             """
+ *             return PyArray_BASE(self)             # <<<<<<<<<<<<<<
+ * 
+ *         @property
+ */
+  __pyx_r = PyArray_BASE(__pyx_v_self);
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":250
+ * 
+ *         @property
+ *         cdef inline PyObject* base(self) nogil:             # <<<<<<<<<<<<<<
+ *             """Returns a borrowed reference to the object owning the data/memory.
+ *             """
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":256
+ * 
+ *         @property
+ *         cdef inline dtype descr(self):             # <<<<<<<<<<<<<<
+ *             """Returns an owned reference to the dtype of the array.
+ *             """
+ */
+
+static CYTHON_INLINE PyArray_Descr *__pyx_f_5numpy_7ndarray_5descr_descr(PyArrayObject *__pyx_v_self) {
+  PyArray_Descr *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyArray_Descr *__pyx_t_1;
+  __Pyx_RefNannySetupContext("descr", 1);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":259
+ *             """Returns an owned reference to the dtype of the array.
+ *             """
+ *             return <dtype>PyArray_DESCR(self)             # <<<<<<<<<<<<<<
+ * 
+ *         @property
+ */
+  __Pyx_XDECREF((PyObject *)__pyx_r);
+  __pyx_t_1 = PyArray_DESCR(__pyx_v_self);
+  __Pyx_INCREF((PyObject *)((PyArray_Descr *)__pyx_t_1));
+  __pyx_r = ((PyArray_Descr *)__pyx_t_1);
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":256
+ * 
+ *         @property
+ *         cdef inline dtype descr(self):             # <<<<<<<<<<<<<<
+ *             """Returns an owned reference to the dtype of the array.
+ *             """
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_XGIVEREF((PyObject *)__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":262
+ * 
+ *         @property
+ *         cdef inline int ndim(self) nogil:             # <<<<<<<<<<<<<<
+ *             """Returns the number of dimensions in the array.
+ *             """
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_7ndarray_4ndim_ndim(PyArrayObject *__pyx_v_self) {
+  int __pyx_r;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":265
+ *             """Returns the number of dimensions in the array.
+ *             """
+ *             return PyArray_NDIM(self)             # <<<<<<<<<<<<<<
+ * 
+ *         @property
+ */
+  __pyx_r = PyArray_NDIM(__pyx_v_self);
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":262
+ * 
+ *         @property
+ *         cdef inline int ndim(self) nogil:             # <<<<<<<<<<<<<<
+ *             """Returns the number of dimensions in the array.
+ *             """
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":268
+ * 
+ *         @property
+ *         cdef inline npy_intp *shape(self) nogil:             # <<<<<<<<<<<<<<
+ *             """Returns a pointer to the dimensions/shape of the array.
+ *             The number of elements matches the number of dimensions of the array (ndim).
+ */
+
+static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_5shape_shape(PyArrayObject *__pyx_v_self) {
+  npy_intp *__pyx_r;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":273
+ *             Can return NULL for 0-dimensional arrays.
+ *             """
+ *             return PyArray_DIMS(self)             # <<<<<<<<<<<<<<
+ * 
+ *         @property
+ */
+  __pyx_r = PyArray_DIMS(__pyx_v_self);
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":268
+ * 
+ *         @property
+ *         cdef inline npy_intp *shape(self) nogil:             # <<<<<<<<<<<<<<
+ *             """Returns a pointer to the dimensions/shape of the array.
+ *             The number of elements matches the number of dimensions of the array (ndim).
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":276
+ * 
+ *         @property
+ *         cdef inline npy_intp *strides(self) nogil:             # <<<<<<<<<<<<<<
+ *             """Returns a pointer to the strides of the array.
+ *             The number of elements matches the number of dimensions of the array (ndim).
+ */
+
+static CYTHON_INLINE npy_intp *__pyx_f_5numpy_7ndarray_7strides_strides(PyArrayObject *__pyx_v_self) {
+  npy_intp *__pyx_r;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":280
+ *             The number of elements matches the number of dimensions of the array (ndim).
+ *             """
+ *             return PyArray_STRIDES(self)             # <<<<<<<<<<<<<<
+ * 
+ *         @property
+ */
+  __pyx_r = PyArray_STRIDES(__pyx_v_self);
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":276
+ * 
+ *         @property
+ *         cdef inline npy_intp *strides(self) nogil:             # <<<<<<<<<<<<<<
+ *             """Returns a pointer to the strides of the array.
+ *             The number of elements matches the number of dimensions of the array (ndim).
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":283
+ * 
+ *         @property
+ *         cdef inline npy_intp size(self) nogil:             # <<<<<<<<<<<<<<
+ *             """Returns the total size (in number of elements) of the array.
+ *             """
+ */
+
+static CYTHON_INLINE npy_intp __pyx_f_5numpy_7ndarray_4size_size(PyArrayObject *__pyx_v_self) {
+  npy_intp __pyx_r;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":286
+ *             """Returns the total size (in number of elements) of the array.
+ *             """
+ *             return PyArray_SIZE(self)             # <<<<<<<<<<<<<<
+ * 
+ *         @property
+ */
+  __pyx_r = PyArray_SIZE(__pyx_v_self);
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":283
+ * 
+ *         @property
+ *         cdef inline npy_intp size(self) nogil:             # <<<<<<<<<<<<<<
+ *             """Returns the total size (in number of elements) of the array.
+ *             """
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":289
+ * 
+ *         @property
+ *         cdef inline char* data(self) nogil:             # <<<<<<<<<<<<<<
+ *             """The pointer to the data buffer as a char*.
+ *             This is provided for legacy reasons to avoid direct struct field access.
+ */
+
+static CYTHON_INLINE char *__pyx_f_5numpy_7ndarray_4data_data(PyArrayObject *__pyx_v_self) {
+  char *__pyx_r;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":295
+ *             of `PyArray_DATA()` instead, which returns a 'void*'.
+ *             """
+ *             return PyArray_BYTES(self)             # <<<<<<<<<<<<<<
+ * 
+ *     ctypedef unsigned char      npy_bool
+ */
+  __pyx_r = PyArray_BYTES(__pyx_v_self);
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":289
+ * 
+ *         @property
+ *         cdef inline char* data(self) nogil:             # <<<<<<<<<<<<<<
+ *             """The pointer to the data buffer as a char*.
+ *             This is provided for legacy reasons to avoid direct struct field access.
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":779
+ * ctypedef npy_cdouble     complex_t
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 1);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":780
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):
+ *     return PyArray_MultiIterNew(1, <void*>a)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 780, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":779
+ * ctypedef npy_cdouble     complex_t
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":782
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 1);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":783
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 783, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":782
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":785
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 1);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":786
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 786, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":785
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":788
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 1);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":789
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 789, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":788
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew4", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":791
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 1);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":792
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 792, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":791
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew5", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":794
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):             # <<<<<<<<<<<<<<
+ *     if PyDataType_HASSUBARRAY(d):
+ *         return <tuple>d.subarray.shape
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyDataType_SHAPE(PyArray_Descr *__pyx_v_d) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  __Pyx_RefNannySetupContext("PyDataType_SHAPE", 1);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":795
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ *     if PyDataType_HASSUBARRAY(d):             # <<<<<<<<<<<<<<
+ *         return <tuple>d.subarray.shape
+ *     else:
+ */
+  __pyx_t_1 = PyDataType_HASSUBARRAY(__pyx_v_d);
+  if (__pyx_t_1) {
+
+    /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":796
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ *     if PyDataType_HASSUBARRAY(d):
+ *         return <tuple>d.subarray.shape             # <<<<<<<<<<<<<<
+ *     else:
+ *         return ()
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __Pyx_INCREF(((PyObject*)__pyx_v_d->subarray->shape));
+    __pyx_r = ((PyObject*)__pyx_v_d->subarray->shape);
+    goto __pyx_L0;
+
+    /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":795
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):
+ *     if PyDataType_HASSUBARRAY(d):             # <<<<<<<<<<<<<<
+ *         return <tuple>d.subarray.shape
+ *     else:
+ */
+  }
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":798
+ *         return <tuple>d.subarray.shape
+ *     else:
+ *         return ()             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  /*else*/ {
+    __Pyx_XDECREF(__pyx_r);
+    __Pyx_INCREF(__pyx_empty_tuple);
+    __pyx_r = __pyx_empty_tuple;
+    goto __pyx_L0;
+  }
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":794
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ * cdef inline tuple PyDataType_SHAPE(dtype d):             # <<<<<<<<<<<<<<
+ *     if PyDataType_HASSUBARRAY(d):
+ *         return <tuple>d.subarray.shape
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":975
+ *     int _import_umath() except -1
+ * 
+ * cdef inline void set_array_base(ndarray arr, object base):             # <<<<<<<<<<<<<<
+ *     Py_INCREF(base) # important to do this before stealing the reference below!
+ *     PyArray_SetBaseObject(arr, base)
+ */
+
+static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) {
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":976
+ * 
+ * cdef inline void set_array_base(ndarray arr, object base):
+ *     Py_INCREF(base) # important to do this before stealing the reference below!             # <<<<<<<<<<<<<<
+ *     PyArray_SetBaseObject(arr, base)
+ * 
+ */
+  Py_INCREF(__pyx_v_base);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":977
+ * cdef inline void set_array_base(ndarray arr, object base):
+ *     Py_INCREF(base) # important to do this before stealing the reference below!
+ *     PyArray_SetBaseObject(arr, base)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object get_array_base(ndarray arr):
+ */
+  (void)(PyArray_SetBaseObject(__pyx_v_arr, __pyx_v_base));
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":975
+ *     int _import_umath() except -1
+ * 
+ * cdef inline void set_array_base(ndarray arr, object base):             # <<<<<<<<<<<<<<
+ *     Py_INCREF(base) # important to do this before stealing the reference below!
+ *     PyArray_SetBaseObject(arr, base)
+ */
+
+  /* function exit code */
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":979
+ *     PyArray_SetBaseObject(arr, base)
+ * 
+ * cdef inline object get_array_base(ndarray arr):             # <<<<<<<<<<<<<<
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) {
+  PyObject *__pyx_v_base;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  __Pyx_RefNannySetupContext("get_array_base", 1);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":980
+ * 
+ * cdef inline object get_array_base(ndarray arr):
+ *     base = PyArray_BASE(arr)             # <<<<<<<<<<<<<<
+ *     if base is NULL:
+ *         return None
+ */
+  __pyx_v_base = PyArray_BASE(__pyx_v_arr);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":981
+ * cdef inline object get_array_base(ndarray arr):
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:             # <<<<<<<<<<<<<<
+ *         return None
+ *     return <object>base
+ */
+  __pyx_t_1 = (__pyx_v_base == NULL);
+  if (__pyx_t_1) {
+
+    /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":982
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:
+ *         return None             # <<<<<<<<<<<<<<
+ *     return <object>base
+ * 
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+    goto __pyx_L0;
+
+    /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":981
+ * cdef inline object get_array_base(ndarray arr):
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:             # <<<<<<<<<<<<<<
+ *         return None
+ *     return <object>base
+ */
+  }
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":983
+ *     if base is NULL:
+ *         return None
+ *     return <object>base             # <<<<<<<<<<<<<<
+ * 
+ * # Versions of the import_* functions which are more suitable for
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF(((PyObject *)__pyx_v_base));
+  __pyx_r = ((PyObject *)__pyx_v_base);
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":979
+ *     PyArray_SetBaseObject(arr, base)
+ * 
+ * cdef inline object get_array_base(ndarray arr):             # <<<<<<<<<<<<<<
+ *     base = PyArray_BASE(arr)
+ *     if base is NULL:
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":987
+ * # Versions of the import_* functions which are more suitable for
+ * # Cython code.
+ * cdef inline int import_array() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         __pyx_import_array()
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_import_array(void) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_t_4;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  PyObject *__pyx_t_7 = NULL;
+  PyObject *__pyx_t_8 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("import_array", 1);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":988
+ * # Cython code.
+ * cdef inline int import_array() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         __pyx_import_array()
+ *     except Exception:
+ */
+  {
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3);
+    __Pyx_XGOTREF(__pyx_t_1);
+    __Pyx_XGOTREF(__pyx_t_2);
+    __Pyx_XGOTREF(__pyx_t_3);
+    /*try:*/ {
+
+      /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":989
+ * cdef inline int import_array() except -1:
+ *     try:
+ *         __pyx_import_array()             # <<<<<<<<<<<<<<
+ *     except Exception:
+ *         raise ImportError("numpy.core.multiarray failed to import")
+ */
+      __pyx_t_4 = _import_array(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 989, __pyx_L3_error)
+
+      /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":988
+ * # Cython code.
+ * cdef inline int import_array() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         __pyx_import_array()
+ *     except Exception:
+ */
+    }
+    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+    goto __pyx_L8_try_end;
+    __pyx_L3_error:;
+
+    /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":990
+ *     try:
+ *         __pyx_import_array()
+ *     except Exception:             # <<<<<<<<<<<<<<
+ *         raise ImportError("numpy.core.multiarray failed to import")
+ * 
+ */
+    __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
+    if (__pyx_t_4) {
+      __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename);
+      if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 990, __pyx_L5_except_error)
+      __Pyx_XGOTREF(__pyx_t_5);
+      __Pyx_XGOTREF(__pyx_t_6);
+      __Pyx_XGOTREF(__pyx_t_7);
+
+      /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":991
+ *         __pyx_import_array()
+ *     except Exception:
+ *         raise ImportError("numpy.core.multiarray failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline int import_umath() except -1:
+ */
+      __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 991, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_8);
+      __Pyx_Raise(__pyx_t_8, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+      __PYX_ERR(1, 991, __pyx_L5_except_error)
+    }
+    goto __pyx_L5_except_error;
+
+    /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":988
+ * # Cython code.
+ * cdef inline int import_array() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         __pyx_import_array()
+ *     except Exception:
+ */
+    __pyx_L5_except_error:;
+    __Pyx_XGIVEREF(__pyx_t_1);
+    __Pyx_XGIVEREF(__pyx_t_2);
+    __Pyx_XGIVEREF(__pyx_t_3);
+    __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3);
+    goto __pyx_L1_error;
+    __pyx_L8_try_end:;
+  }
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":987
+ * # Versions of the import_* functions which are more suitable for
+ * # Cython code.
+ * cdef inline int import_array() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         __pyx_import_array()
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("numpy.import_array", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":993
+ *         raise ImportError("numpy.core.multiarray failed to import")
+ * 
+ * cdef inline int import_umath() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         _import_umath()
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_import_umath(void) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_t_4;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  PyObject *__pyx_t_7 = NULL;
+  PyObject *__pyx_t_8 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("import_umath", 1);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":994
+ * 
+ * cdef inline int import_umath() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+  {
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3);
+    __Pyx_XGOTREF(__pyx_t_1);
+    __Pyx_XGOTREF(__pyx_t_2);
+    __Pyx_XGOTREF(__pyx_t_3);
+    /*try:*/ {
+
+      /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":995
+ * cdef inline int import_umath() except -1:
+ *     try:
+ *         _import_umath()             # <<<<<<<<<<<<<<
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")
+ */
+      __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 995, __pyx_L3_error)
+
+      /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":994
+ * 
+ * cdef inline int import_umath() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+    }
+    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+    goto __pyx_L8_try_end;
+    __pyx_L3_error:;
+
+    /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":996
+ *     try:
+ *         _import_umath()
+ *     except Exception:             # <<<<<<<<<<<<<<
+ *         raise ImportError("numpy.core.umath failed to import")
+ * 
+ */
+    __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
+    if (__pyx_t_4) {
+      __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename);
+      if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 996, __pyx_L5_except_error)
+      __Pyx_XGOTREF(__pyx_t_5);
+      __Pyx_XGOTREF(__pyx_t_6);
+      __Pyx_XGOTREF(__pyx_t_7);
+
+      /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":997
+ *         _import_umath()
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline int import_ufunc() except -1:
+ */
+      __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 997, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_8);
+      __Pyx_Raise(__pyx_t_8, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+      __PYX_ERR(1, 997, __pyx_L5_except_error)
+    }
+    goto __pyx_L5_except_error;
+
+    /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":994
+ * 
+ * cdef inline int import_umath() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+    __pyx_L5_except_error:;
+    __Pyx_XGIVEREF(__pyx_t_1);
+    __Pyx_XGIVEREF(__pyx_t_2);
+    __Pyx_XGIVEREF(__pyx_t_3);
+    __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3);
+    goto __pyx_L1_error;
+    __pyx_L8_try_end:;
+  }
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":993
+ *         raise ImportError("numpy.core.multiarray failed to import")
+ * 
+ * cdef inline int import_umath() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         _import_umath()
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("numpy.import_umath", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":999
+ *         raise ImportError("numpy.core.umath failed to import")
+ * 
+ * cdef inline int import_ufunc() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         _import_umath()
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_import_ufunc(void) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_t_4;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  PyObject *__pyx_t_7 = NULL;
+  PyObject *__pyx_t_8 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("import_ufunc", 1);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1000
+ * 
+ * cdef inline int import_ufunc() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+  {
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ExceptionSave(&__pyx_t_1, &__pyx_t_2, &__pyx_t_3);
+    __Pyx_XGOTREF(__pyx_t_1);
+    __Pyx_XGOTREF(__pyx_t_2);
+    __Pyx_XGOTREF(__pyx_t_3);
+    /*try:*/ {
+
+      /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1001
+ * cdef inline int import_ufunc() except -1:
+ *     try:
+ *         _import_umath()             # <<<<<<<<<<<<<<
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")
+ */
+      __pyx_t_4 = _import_umath(); if (unlikely(__pyx_t_4 == ((int)-1))) __PYX_ERR(1, 1001, __pyx_L3_error)
+
+      /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1000
+ * 
+ * cdef inline int import_ufunc() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+    }
+    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
+    __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0;
+    goto __pyx_L8_try_end;
+    __pyx_L3_error:;
+
+    /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1002
+ *     try:
+ *         _import_umath()
+ *     except Exception:             # <<<<<<<<<<<<<<
+ *         raise ImportError("numpy.core.umath failed to import")
+ * 
+ */
+    __pyx_t_4 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
+    if (__pyx_t_4) {
+      __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename);
+      if (__Pyx_GetException(&__pyx_t_5, &__pyx_t_6, &__pyx_t_7) < 0) __PYX_ERR(1, 1002, __pyx_L5_except_error)
+      __Pyx_XGOTREF(__pyx_t_5);
+      __Pyx_XGOTREF(__pyx_t_6);
+      __Pyx_XGOTREF(__pyx_t_7);
+
+      /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1003
+ *         _import_umath()
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+      __pyx_t_8 = __Pyx_PyObject_Call(__pyx_builtin_ImportError, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_8)) __PYX_ERR(1, 1003, __pyx_L5_except_error)
+      __Pyx_GOTREF(__pyx_t_8);
+      __Pyx_Raise(__pyx_t_8, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+      __PYX_ERR(1, 1003, __pyx_L5_except_error)
+    }
+    goto __pyx_L5_except_error;
+
+    /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1000
+ * 
+ * cdef inline int import_ufunc() except -1:
+ *     try:             # <<<<<<<<<<<<<<
+ *         _import_umath()
+ *     except Exception:
+ */
+    __pyx_L5_except_error:;
+    __Pyx_XGIVEREF(__pyx_t_1);
+    __Pyx_XGIVEREF(__pyx_t_2);
+    __Pyx_XGIVEREF(__pyx_t_3);
+    __Pyx_ExceptionReset(__pyx_t_1, __pyx_t_2, __pyx_t_3);
+    goto __pyx_L1_error;
+    __pyx_L8_try_end:;
+  }
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":999
+ *         raise ImportError("numpy.core.umath failed to import")
+ * 
+ * cdef inline int import_ufunc() except -1:             # <<<<<<<<<<<<<<
+ *     try:
+ *         _import_umath()
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_XDECREF(__pyx_t_7);
+  __Pyx_XDECREF(__pyx_t_8);
+  __Pyx_AddTraceback("numpy.import_ufunc", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1006
+ * 
+ * 
+ * cdef inline bint is_timedelta64_object(object obj):             # <<<<<<<<<<<<<<
+ *     """
+ *     Cython equivalent of `isinstance(obj, np.timedelta64)`
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_is_timedelta64_object(PyObject *__pyx_v_obj) {
+  int __pyx_r;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1018
+ *     bool
+ *     """
+ *     return PyObject_TypeCheck(obj, &PyTimedeltaArrType_Type)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyTimedeltaArrType_Type));
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1006
+ * 
+ * 
+ * cdef inline bint is_timedelta64_object(object obj):             # <<<<<<<<<<<<<<
+ *     """
+ *     Cython equivalent of `isinstance(obj, np.timedelta64)`
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1021
+ * 
+ * 
+ * cdef inline bint is_datetime64_object(object obj):             # <<<<<<<<<<<<<<
+ *     """
+ *     Cython equivalent of `isinstance(obj, np.datetime64)`
+ */
+
+static CYTHON_INLINE int __pyx_f_5numpy_is_datetime64_object(PyObject *__pyx_v_obj) {
+  int __pyx_r;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1033
+ *     bool
+ *     """
+ *     return PyObject_TypeCheck(obj, &PyDatetimeArrType_Type)             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = PyObject_TypeCheck(__pyx_v_obj, (&PyDatetimeArrType_Type));
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1021
+ * 
+ * 
+ * cdef inline bint is_datetime64_object(object obj):             # <<<<<<<<<<<<<<
+ *     """
+ *     Cython equivalent of `isinstance(obj, np.datetime64)`
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1036
+ * 
+ * 
+ * cdef inline npy_datetime get_datetime64_value(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the int64 value underlying scalar numpy datetime64 object
+ */
+
+static CYTHON_INLINE npy_datetime __pyx_f_5numpy_get_datetime64_value(PyObject *__pyx_v_obj) {
+  npy_datetime __pyx_r;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1043
+ *     also needed.  That can be found using `get_datetime64_unit`.
+ *     """
+ *     return (<PyDatetimeScalarObject*>obj).obval             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = ((PyDatetimeScalarObject *)__pyx_v_obj)->obval;
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1036
+ * 
+ * 
+ * cdef inline npy_datetime get_datetime64_value(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the int64 value underlying scalar numpy datetime64 object
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1046
+ * 
+ * 
+ * cdef inline npy_timedelta get_timedelta64_value(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the int64 value underlying scalar numpy timedelta64 object
+ */
+
+static CYTHON_INLINE npy_timedelta __pyx_f_5numpy_get_timedelta64_value(PyObject *__pyx_v_obj) {
+  npy_timedelta __pyx_r;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1050
+ *     returns the int64 value underlying scalar numpy timedelta64 object
+ *     """
+ *     return (<PyTimedeltaScalarObject*>obj).obval             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = ((PyTimedeltaScalarObject *)__pyx_v_obj)->obval;
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1046
+ * 
+ * 
+ * cdef inline npy_timedelta get_timedelta64_value(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the int64 value underlying scalar numpy timedelta64 object
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1053
+ * 
+ * 
+ * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the unit part of the dtype for a numpy datetime64 object.
+ */
+
+static CYTHON_INLINE NPY_DATETIMEUNIT __pyx_f_5numpy_get_datetime64_unit(PyObject *__pyx_v_obj) {
+  NPY_DATETIMEUNIT __pyx_r;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1057
+ *     returns the unit part of the dtype for a numpy datetime64 object.
+ *     """
+ *     return <NPY_DATETIMEUNIT>(<PyDatetimeScalarObject*>obj).obmeta.base             # <<<<<<<<<<<<<<
+ */
+  __pyx_r = ((NPY_DATETIMEUNIT)((PyDatetimeScalarObject *)__pyx_v_obj)->obmeta.base);
+  goto __pyx_L0;
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":1053
+ * 
+ * 
+ * cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil:             # <<<<<<<<<<<<<<
+ *     """
+ *     returns the unit part of the dtype for a numpy datetime64 object.
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  return __pyx_r;
+}
+
+/* "opencood/utils/box_overlaps.pyx":17
+ * 
+ * 
+ * def bbox_overlaps(             # <<<<<<<<<<<<<<
+ *         np.ndarray[DTYPE_t, ndim=2] boxes,
+ *         np.ndarray[DTYPE_t, ndim=2] query_boxes):
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_8opencood_5utils_12box_overlaps_1bbox_overlaps(PyObject *__pyx_self, 
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+); /*proto*/
+PyDoc_STRVAR(__pyx_doc_8opencood_5utils_12box_overlaps_bbox_overlaps, "\n    Parameters\n    ----------\n    boxes: (N, 4) ndarray of float\n    query_boxes: (K, 4) ndarray of float\n    Returns\n    -------\n    overlaps: (N, K) ndarray of overlap between boxes and query_boxes\n    ");
+static PyMethodDef __pyx_mdef_8opencood_5utils_12box_overlaps_1bbox_overlaps = {"bbox_overlaps", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8opencood_5utils_12box_overlaps_1bbox_overlaps, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8opencood_5utils_12box_overlaps_bbox_overlaps};
+static PyObject *__pyx_pw_8opencood_5utils_12box_overlaps_1bbox_overlaps(PyObject *__pyx_self, 
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+) {
+  PyArrayObject *__pyx_v_boxes = 0;
+  PyArrayObject *__pyx_v_query_boxes = 0;
+  #if !CYTHON_METH_FASTCALL
+  CYTHON_UNUSED Py_ssize_t __pyx_nargs;
+  #endif
+  CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
+  PyObject* values[2] = {0,0};
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("bbox_overlaps (wrapper)", 0);
+  #if !CYTHON_METH_FASTCALL
+  #if CYTHON_ASSUME_SAFE_MACROS
+  __pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
+  #else
+  __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL;
+  #endif
+  #endif
+  __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs);
+  {
+    PyObject **__pyx_pyargnames[] = {&__pyx_n_s_boxes,&__pyx_n_s_query_boxes,0};
+    if (__pyx_kwds) {
+      Py_ssize_t kw_args;
+      switch (__pyx_nargs) {
+        case  2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1);
+        CYTHON_FALLTHROUGH;
+        case  1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0);
+        CYTHON_FALLTHROUGH;
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds);
+      switch (__pyx_nargs) {
+        case  0:
+        if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_boxes)) != 0)) {
+          (void)__Pyx_Arg_NewRef_FASTCALL(values[0]);
+          kw_args--;
+        }
+        else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 17, __pyx_L3_error)
+        else goto __pyx_L5_argtuple_error;
+        CYTHON_FALLTHROUGH;
+        case  1:
+        if (likely((values[1] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_query_boxes)) != 0)) {
+          (void)__Pyx_Arg_NewRef_FASTCALL(values[1]);
+          kw_args--;
+        }
+        else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 17, __pyx_L3_error)
+        else {
+          __Pyx_RaiseArgtupleInvalid("bbox_overlaps", 1, 2, 2, 1); __PYX_ERR(0, 17, __pyx_L3_error)
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        const Py_ssize_t kwd_pos_args = __pyx_nargs;
+        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "bbox_overlaps") < 0)) __PYX_ERR(0, 17, __pyx_L3_error)
+      }
+    } else if (unlikely(__pyx_nargs != 2)) {
+      goto __pyx_L5_argtuple_error;
+    } else {
+      values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0);
+      values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1);
+    }
+    __pyx_v_boxes = ((PyArrayObject *)values[0]);
+    __pyx_v_query_boxes = ((PyArrayObject *)values[1]);
+  }
+  goto __pyx_L6_skip;
+  __pyx_L5_argtuple_error:;
+  __Pyx_RaiseArgtupleInvalid("bbox_overlaps", 1, 2, 2, __pyx_nargs); __PYX_ERR(0, 17, __pyx_L3_error)
+  __pyx_L6_skip:;
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L3_error:;
+  {
+    Py_ssize_t __pyx_temp;
+    for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+      __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]);
+    }
+  }
+  __Pyx_AddTraceback("opencood.utils.box_overlaps.bbox_overlaps", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_RefNannyFinishContext();
+  return NULL;
+  __pyx_L4_argument_unpacking_done:;
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_boxes), __pyx_ptype_5numpy_ndarray, 1, "boxes", 0))) __PYX_ERR(0, 18, __pyx_L1_error)
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_query_boxes), __pyx_ptype_5numpy_ndarray, 1, "query_boxes", 0))) __PYX_ERR(0, 19, __pyx_L1_error)
+  __pyx_r = __pyx_pf_8opencood_5utils_12box_overlaps_bbox_overlaps(__pyx_self, __pyx_v_boxes, __pyx_v_query_boxes);
+
+  /* function exit code */
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __pyx_r = NULL;
+  __pyx_L0:;
+  {
+    Py_ssize_t __pyx_temp;
+    for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+      __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]);
+    }
+  }
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_8opencood_5utils_12box_overlaps_bbox_overlaps(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_boxes, PyArrayObject *__pyx_v_query_boxes) {
+  unsigned int __pyx_v_N;
+  unsigned int __pyx_v_K;
+  PyArrayObject *__pyx_v_overlaps = 0;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_v_iw;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_v_ih;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_v_box_area;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_v_ua;
+  unsigned int __pyx_v_k;
+  unsigned int __pyx_v_n;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_boxes;
+  __Pyx_Buffer __pyx_pybuffer_boxes;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_overlaps;
+  __Pyx_Buffer __pyx_pybuffer_overlaps;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_query_boxes;
+  __Pyx_Buffer __pyx_pybuffer_query_boxes;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  npy_intp *__pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  PyArrayObject *__pyx_t_6 = NULL;
+  unsigned int __pyx_t_7;
+  unsigned int __pyx_t_8;
+  unsigned int __pyx_t_9;
+  size_t __pyx_t_10;
+  Py_ssize_t __pyx_t_11;
+  int __pyx_t_12;
+  size_t __pyx_t_13;
+  Py_ssize_t __pyx_t_14;
+  size_t __pyx_t_15;
+  Py_ssize_t __pyx_t_16;
+  size_t __pyx_t_17;
+  Py_ssize_t __pyx_t_18;
+  unsigned int __pyx_t_19;
+  unsigned int __pyx_t_20;
+  unsigned int __pyx_t_21;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_22;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_23;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_24;
+  int __pyx_t_25;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_26;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("bbox_overlaps", 1);
+  __pyx_pybuffer_overlaps.pybuffer.buf = NULL;
+  __pyx_pybuffer_overlaps.refcount = 0;
+  __pyx_pybuffernd_overlaps.data = NULL;
+  __pyx_pybuffernd_overlaps.rcbuffer = &__pyx_pybuffer_overlaps;
+  __pyx_pybuffer_boxes.pybuffer.buf = NULL;
+  __pyx_pybuffer_boxes.refcount = 0;
+  __pyx_pybuffernd_boxes.data = NULL;
+  __pyx_pybuffernd_boxes.rcbuffer = &__pyx_pybuffer_boxes;
+  __pyx_pybuffer_query_boxes.pybuffer.buf = NULL;
+  __pyx_pybuffer_query_boxes.refcount = 0;
+  __pyx_pybuffernd_query_boxes.data = NULL;
+  __pyx_pybuffernd_query_boxes.rcbuffer = &__pyx_pybuffer_query_boxes;
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer, (PyObject*)__pyx_v_boxes, &__Pyx_TypeInfo_nn___pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 17, __pyx_L1_error)
+  }
+  __pyx_pybuffernd_boxes.diminfo[0].strides = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_boxes.diminfo[0].shape = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_boxes.diminfo[1].strides = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_boxes.diminfo[1].shape = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.shape[1];
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer, (PyObject*)__pyx_v_query_boxes, &__Pyx_TypeInfo_nn___pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 17, __pyx_L1_error)
+  }
+  __pyx_pybuffernd_query_boxes.diminfo[0].strides = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_query_boxes.diminfo[0].shape = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_query_boxes.diminfo[1].strides = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_query_boxes.diminfo[1].shape = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.shape[1];
+
+  /* "opencood/utils/box_overlaps.pyx":29
+ *     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
+ *     """
+ *     cdef unsigned int N = boxes.shape[0]             # <<<<<<<<<<<<<<
+ *     cdef unsigned int K = query_boxes.shape[0]
+ *     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
+ */
+  __pyx_t_1 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_boxes)); if (unlikely(__pyx_t_1 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 29, __pyx_L1_error)
+  __pyx_v_N = (__pyx_t_1[0]);
+
+  /* "opencood/utils/box_overlaps.pyx":30
+ *     """
+ *     cdef unsigned int N = boxes.shape[0]
+ *     cdef unsigned int K = query_boxes.shape[0]             # <<<<<<<<<<<<<<
+ *     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
+ *     cdef DTYPE_t iw, ih, box_area
+ */
+  __pyx_t_1 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_query_boxes)); if (unlikely(__pyx_t_1 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 30, __pyx_L1_error)
+  __pyx_v_K = (__pyx_t_1[0]);
+
+  /* "opencood/utils/box_overlaps.pyx":31
+ *     cdef unsigned int N = boxes.shape[0]
+ *     cdef unsigned int K = query_boxes.shape[0]
+ *     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)             # <<<<<<<<<<<<<<
+ *     cdef DTYPE_t iw, ih, box_area
+ *     cdef DTYPE_t ua
+ */
+  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_np); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 31, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __pyx_t_2 = __Pyx_PyInt_From_unsigned_int(__pyx_v_N); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_t_4 = __Pyx_PyInt_From_unsigned_int(__pyx_v_K); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 31, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 31, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_GIVEREF(__pyx_t_2);
+  if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error);
+  __Pyx_GIVEREF(__pyx_t_4);
+  if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4)) __PYX_ERR(0, 31, __pyx_L1_error);
+  __pyx_t_2 = 0;
+  __pyx_t_4 = 0;
+  __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 31, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __Pyx_GIVEREF(__pyx_t_5);
+  if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_5)) __PYX_ERR(0, 31, __pyx_L1_error);
+  __pyx_t_5 = 0;
+  __pyx_t_5 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 31, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_DTYPE); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  if (PyDict_SetItem(__pyx_t_5, __pyx_n_s_dtype, __pyx_t_2) < 0) __PYX_ERR(0, 31, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_4, __pyx_t_5); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 31, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 31, __pyx_L1_error)
+  __pyx_t_6 = ((PyArrayObject *)__pyx_t_2);
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_overlaps.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_nn___pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES| PyBUF_WRITABLE, 2, 0, __pyx_stack) == -1)) {
+      __pyx_v_overlaps = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.buf = NULL;
+      __PYX_ERR(0, 31, __pyx_L1_error)
+    } else {__pyx_pybuffernd_overlaps.diminfo[0].strides = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_overlaps.diminfo[0].shape = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_overlaps.diminfo[1].strides = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_overlaps.diminfo[1].shape = __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.shape[1];
+    }
+  }
+  __pyx_t_6 = 0;
+  __pyx_v_overlaps = ((PyArrayObject *)__pyx_t_2);
+  __pyx_t_2 = 0;
+
+  /* "opencood/utils/box_overlaps.pyx":35
+ *     cdef DTYPE_t ua
+ *     cdef unsigned int k, n
+ *     for k in range(K):             # <<<<<<<<<<<<<<
+ *         box_area = (
+ *             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
+ */
+  __pyx_t_7 = __pyx_v_K;
+  __pyx_t_8 = __pyx_t_7;
+  for (__pyx_t_9 = 0; __pyx_t_9 < __pyx_t_8; __pyx_t_9+=1) {
+    __pyx_v_k = __pyx_t_9;
+
+    /* "opencood/utils/box_overlaps.pyx":37
+ *     for k in range(K):
+ *         box_area = (
+ *             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *             # <<<<<<<<<<<<<<
+ *             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+ *         )
+ */
+    __pyx_t_10 = __pyx_v_k;
+    __pyx_t_11 = 2;
+    __pyx_t_12 = -1;
+    if (unlikely(__pyx_t_10 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+    if (__pyx_t_11 < 0) {
+      __pyx_t_11 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+      if (unlikely(__pyx_t_11 < 0)) __pyx_t_12 = 1;
+    } else if (unlikely(__pyx_t_11 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+    if (unlikely(__pyx_t_12 != -1)) {
+      __Pyx_RaiseBufferIndexError(__pyx_t_12);
+      __PYX_ERR(0, 37, __pyx_L1_error)
+    }
+    __pyx_t_13 = __pyx_v_k;
+    __pyx_t_14 = 0;
+    __pyx_t_12 = -1;
+    if (unlikely(__pyx_t_13 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+    if (__pyx_t_14 < 0) {
+      __pyx_t_14 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+      if (unlikely(__pyx_t_14 < 0)) __pyx_t_12 = 1;
+    } else if (unlikely(__pyx_t_14 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+    if (unlikely(__pyx_t_12 != -1)) {
+      __Pyx_RaiseBufferIndexError(__pyx_t_12);
+      __PYX_ERR(0, 37, __pyx_L1_error)
+    }
+
+    /* "opencood/utils/box_overlaps.pyx":38
+ *         box_area = (
+ *             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
+ *             (query_boxes[k, 3] - query_boxes[k, 1] + 1)             # <<<<<<<<<<<<<<
+ *         )
+ *         for n in range(N):
+ */
+    __pyx_t_15 = __pyx_v_k;
+    __pyx_t_16 = 3;
+    __pyx_t_12 = -1;
+    if (unlikely(__pyx_t_15 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+    if (__pyx_t_16 < 0) {
+      __pyx_t_16 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+      if (unlikely(__pyx_t_16 < 0)) __pyx_t_12 = 1;
+    } else if (unlikely(__pyx_t_16 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+    if (unlikely(__pyx_t_12 != -1)) {
+      __Pyx_RaiseBufferIndexError(__pyx_t_12);
+      __PYX_ERR(0, 38, __pyx_L1_error)
+    }
+    __pyx_t_17 = __pyx_v_k;
+    __pyx_t_18 = 1;
+    __pyx_t_12 = -1;
+    if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+    if (__pyx_t_18 < 0) {
+      __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+      if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+    } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+    if (unlikely(__pyx_t_12 != -1)) {
+      __Pyx_RaiseBufferIndexError(__pyx_t_12);
+      __PYX_ERR(0, 38, __pyx_L1_error)
+    }
+
+    /* "opencood/utils/box_overlaps.pyx":37
+ *     for k in range(K):
+ *         box_area = (
+ *             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *             # <<<<<<<<<<<<<<
+ *             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+ *         )
+ */
+    __pyx_v_box_area = ((((*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_10, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_11, __pyx_pybuffernd_query_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_13, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_14, __pyx_pybuffernd_query_boxes.diminfo[1].strides))) + 1.0) * (((*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_15, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_16, __pyx_pybuffernd_query_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides))) + 1.0));
+
+    /* "opencood/utils/box_overlaps.pyx":40
+ *             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+ *         )
+ *         for n in range(N):             # <<<<<<<<<<<<<<
+ *             iw = (
+ *                 min(boxes[n, 2], query_boxes[k, 2]) -
+ */
+    __pyx_t_19 = __pyx_v_N;
+    __pyx_t_20 = __pyx_t_19;
+    for (__pyx_t_21 = 0; __pyx_t_21 < __pyx_t_20; __pyx_t_21+=1) {
+      __pyx_v_n = __pyx_t_21;
+
+      /* "opencood/utils/box_overlaps.pyx":42
+ *         for n in range(N):
+ *             iw = (
+ *                 min(boxes[n, 2], query_boxes[k, 2]) -             # <<<<<<<<<<<<<<
+ *                 max(boxes[n, 0], query_boxes[k, 0]) + 1
+ *             )
+ */
+      __pyx_t_17 = __pyx_v_k;
+      __pyx_t_18 = 2;
+      __pyx_t_12 = -1;
+      if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (__pyx_t_18 < 0) {
+        __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+        if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+      } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 42, __pyx_L1_error)
+      }
+      __pyx_t_22 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+      __pyx_t_17 = __pyx_v_n;
+      __pyx_t_18 = 2;
+      __pyx_t_12 = -1;
+      if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (__pyx_t_18 < 0) {
+        __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+        if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+      } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 42, __pyx_L1_error)
+      }
+      __pyx_t_23 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides));
+      __pyx_t_25 = (__pyx_t_22 < __pyx_t_23);
+      if (__pyx_t_25) {
+        __pyx_t_24 = __pyx_t_22;
+      } else {
+        __pyx_t_24 = __pyx_t_23;
+      }
+
+      /* "opencood/utils/box_overlaps.pyx":43
+ *             iw = (
+ *                 min(boxes[n, 2], query_boxes[k, 2]) -
+ *                 max(boxes[n, 0], query_boxes[k, 0]) + 1             # <<<<<<<<<<<<<<
+ *             )
+ *             if iw > 0:
+ */
+      __pyx_t_17 = __pyx_v_k;
+      __pyx_t_18 = 0;
+      __pyx_t_12 = -1;
+      if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (__pyx_t_18 < 0) {
+        __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+        if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+      } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 43, __pyx_L1_error)
+      }
+      __pyx_t_22 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+      __pyx_t_17 = __pyx_v_n;
+      __pyx_t_18 = 0;
+      __pyx_t_12 = -1;
+      if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (__pyx_t_18 < 0) {
+        __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+        if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+      } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 43, __pyx_L1_error)
+      }
+      __pyx_t_23 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides));
+      __pyx_t_25 = (__pyx_t_22 > __pyx_t_23);
+      if (__pyx_t_25) {
+        __pyx_t_26 = __pyx_t_22;
+      } else {
+        __pyx_t_26 = __pyx_t_23;
+      }
+
+      /* "opencood/utils/box_overlaps.pyx":42
+ *         for n in range(N):
+ *             iw = (
+ *                 min(boxes[n, 2], query_boxes[k, 2]) -             # <<<<<<<<<<<<<<
+ *                 max(boxes[n, 0], query_boxes[k, 0]) + 1
+ *             )
+ */
+      __pyx_v_iw = ((__pyx_t_24 - __pyx_t_26) + 1.0);
+
+      /* "opencood/utils/box_overlaps.pyx":45
+ *                 max(boxes[n, 0], query_boxes[k, 0]) + 1
+ *             )
+ *             if iw > 0:             # <<<<<<<<<<<<<<
+ *                 ih = (
+ *                     min(boxes[n, 3], query_boxes[k, 3]) -
+ */
+      __pyx_t_25 = (__pyx_v_iw > 0.0);
+      if (__pyx_t_25) {
+
+        /* "opencood/utils/box_overlaps.pyx":47
+ *             if iw > 0:
+ *                 ih = (
+ *                     min(boxes[n, 3], query_boxes[k, 3]) -             # <<<<<<<<<<<<<<
+ *                     max(boxes[n, 1], query_boxes[k, 1]) + 1
+ *                 )
+ */
+        __pyx_t_17 = __pyx_v_k;
+        __pyx_t_18 = 3;
+        __pyx_t_12 = -1;
+        if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+        if (__pyx_t_18 < 0) {
+          __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+          if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+        } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+        if (unlikely(__pyx_t_12 != -1)) {
+          __Pyx_RaiseBufferIndexError(__pyx_t_12);
+          __PYX_ERR(0, 47, __pyx_L1_error)
+        }
+        __pyx_t_26 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+        __pyx_t_17 = __pyx_v_n;
+        __pyx_t_18 = 3;
+        __pyx_t_12 = -1;
+        if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+        if (__pyx_t_18 < 0) {
+          __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+          if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+        } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+        if (unlikely(__pyx_t_12 != -1)) {
+          __Pyx_RaiseBufferIndexError(__pyx_t_12);
+          __PYX_ERR(0, 47, __pyx_L1_error)
+        }
+        __pyx_t_24 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides));
+        __pyx_t_25 = (__pyx_t_26 < __pyx_t_24);
+        if (__pyx_t_25) {
+          __pyx_t_22 = __pyx_t_26;
+        } else {
+          __pyx_t_22 = __pyx_t_24;
+        }
+
+        /* "opencood/utils/box_overlaps.pyx":48
+ *                 ih = (
+ *                     min(boxes[n, 3], query_boxes[k, 3]) -
+ *                     max(boxes[n, 1], query_boxes[k, 1]) + 1             # <<<<<<<<<<<<<<
+ *                 )
+ *                 if ih > 0:
+ */
+        __pyx_t_17 = __pyx_v_k;
+        __pyx_t_18 = 1;
+        __pyx_t_12 = -1;
+        if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+        if (__pyx_t_18 < 0) {
+          __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+          if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+        } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+        if (unlikely(__pyx_t_12 != -1)) {
+          __Pyx_RaiseBufferIndexError(__pyx_t_12);
+          __PYX_ERR(0, 48, __pyx_L1_error)
+        }
+        __pyx_t_26 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+        __pyx_t_17 = __pyx_v_n;
+        __pyx_t_18 = 1;
+        __pyx_t_12 = -1;
+        if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+        if (__pyx_t_18 < 0) {
+          __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+          if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+        } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+        if (unlikely(__pyx_t_12 != -1)) {
+          __Pyx_RaiseBufferIndexError(__pyx_t_12);
+          __PYX_ERR(0, 48, __pyx_L1_error)
+        }
+        __pyx_t_24 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides));
+        __pyx_t_25 = (__pyx_t_26 > __pyx_t_24);
+        if (__pyx_t_25) {
+          __pyx_t_23 = __pyx_t_26;
+        } else {
+          __pyx_t_23 = __pyx_t_24;
+        }
+
+        /* "opencood/utils/box_overlaps.pyx":47
+ *             if iw > 0:
+ *                 ih = (
+ *                     min(boxes[n, 3], query_boxes[k, 3]) -             # <<<<<<<<<<<<<<
+ *                     max(boxes[n, 1], query_boxes[k, 1]) + 1
+ *                 )
+ */
+        __pyx_v_ih = ((__pyx_t_22 - __pyx_t_23) + 1.0);
+
+        /* "opencood/utils/box_overlaps.pyx":50
+ *                     max(boxes[n, 1], query_boxes[k, 1]) + 1
+ *                 )
+ *                 if ih > 0:             # <<<<<<<<<<<<<<
+ *                     ua = float(
+ *                         (boxes[n, 2] - boxes[n, 0] + 1) *
+ */
+        __pyx_t_25 = (__pyx_v_ih > 0.0);
+        if (__pyx_t_25) {
+
+          /* "opencood/utils/box_overlaps.pyx":52
+ *                 if ih > 0:
+ *                     ua = float(
+ *                         (boxes[n, 2] - boxes[n, 0] + 1) *             # <<<<<<<<<<<<<<
+ *                         (boxes[n, 3] - boxes[n, 1] + 1) +
+ *                         box_area - iw * ih
+ */
+          __pyx_t_17 = __pyx_v_n;
+          __pyx_t_18 = 2;
+          __pyx_t_12 = -1;
+          if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+          if (__pyx_t_18 < 0) {
+            __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+            if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+          } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+          if (unlikely(__pyx_t_12 != -1)) {
+            __Pyx_RaiseBufferIndexError(__pyx_t_12);
+            __PYX_ERR(0, 52, __pyx_L1_error)
+          }
+          __pyx_t_15 = __pyx_v_n;
+          __pyx_t_16 = 0;
+          __pyx_t_12 = -1;
+          if (unlikely(__pyx_t_15 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+          if (__pyx_t_16 < 0) {
+            __pyx_t_16 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+            if (unlikely(__pyx_t_16 < 0)) __pyx_t_12 = 1;
+          } else if (unlikely(__pyx_t_16 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+          if (unlikely(__pyx_t_12 != -1)) {
+            __Pyx_RaiseBufferIndexError(__pyx_t_12);
+            __PYX_ERR(0, 52, __pyx_L1_error)
+          }
+
+          /* "opencood/utils/box_overlaps.pyx":53
+ *                     ua = float(
+ *                         (boxes[n, 2] - boxes[n, 0] + 1) *
+ *                         (boxes[n, 3] - boxes[n, 1] + 1) +             # <<<<<<<<<<<<<<
+ *                         box_area - iw * ih
+ *                     )
+ */
+          __pyx_t_13 = __pyx_v_n;
+          __pyx_t_14 = 3;
+          __pyx_t_12 = -1;
+          if (unlikely(__pyx_t_13 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+          if (__pyx_t_14 < 0) {
+            __pyx_t_14 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+            if (unlikely(__pyx_t_14 < 0)) __pyx_t_12 = 1;
+          } else if (unlikely(__pyx_t_14 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+          if (unlikely(__pyx_t_12 != -1)) {
+            __Pyx_RaiseBufferIndexError(__pyx_t_12);
+            __PYX_ERR(0, 53, __pyx_L1_error)
+          }
+          __pyx_t_10 = __pyx_v_n;
+          __pyx_t_11 = 1;
+          __pyx_t_12 = -1;
+          if (unlikely(__pyx_t_10 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+          if (__pyx_t_11 < 0) {
+            __pyx_t_11 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+            if (unlikely(__pyx_t_11 < 0)) __pyx_t_12 = 1;
+          } else if (unlikely(__pyx_t_11 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+          if (unlikely(__pyx_t_12 != -1)) {
+            __Pyx_RaiseBufferIndexError(__pyx_t_12);
+            __PYX_ERR(0, 53, __pyx_L1_error)
+          }
+
+          /* "opencood/utils/box_overlaps.pyx":51
+ *                 )
+ *                 if ih > 0:
+ *                     ua = float(             # <<<<<<<<<<<<<<
+ *                         (boxes[n, 2] - boxes[n, 0] + 1) *
+ *                         (boxes[n, 3] - boxes[n, 1] + 1) +
+ */
+          __pyx_v_ua = ((double)((((((*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_15, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_16, __pyx_pybuffernd_boxes.diminfo[1].strides))) + 1.0) * (((*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_13, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_14, __pyx_pybuffernd_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_10, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_11, __pyx_pybuffernd_boxes.diminfo[1].strides))) + 1.0)) + __pyx_v_box_area) - (__pyx_v_iw * __pyx_v_ih)));
+
+          /* "opencood/utils/box_overlaps.pyx":56
+ *                         box_area - iw * ih
+ *                     )
+ *                     overlaps[n, k] = iw * ih / ua             # <<<<<<<<<<<<<<
+ *     return overlaps
+ * 
+ */
+          __pyx_t_23 = (__pyx_v_iw * __pyx_v_ih);
+          if (unlikely(__pyx_v_ua == 0)) {
+            PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+            __PYX_ERR(0, 56, __pyx_L1_error)
+          }
+          __pyx_t_10 = __pyx_v_n;
+          __pyx_t_13 = __pyx_v_k;
+          __pyx_t_12 = -1;
+          if (unlikely(__pyx_t_10 >= (size_t)__pyx_pybuffernd_overlaps.diminfo[0].shape)) __pyx_t_12 = 0;
+          if (unlikely(__pyx_t_13 >= (size_t)__pyx_pybuffernd_overlaps.diminfo[1].shape)) __pyx_t_12 = 1;
+          if (unlikely(__pyx_t_12 != -1)) {
+            __Pyx_RaiseBufferIndexError(__pyx_t_12);
+            __PYX_ERR(0, 56, __pyx_L1_error)
+          }
+          *__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_overlaps.rcbuffer->pybuffer.buf, __pyx_t_10, __pyx_pybuffernd_overlaps.diminfo[0].strides, __pyx_t_13, __pyx_pybuffernd_overlaps.diminfo[1].strides) = (__pyx_t_23 / __pyx_v_ua);
+
+          /* "opencood/utils/box_overlaps.pyx":50
+ *                     max(boxes[n, 1], query_boxes[k, 1]) + 1
+ *                 )
+ *                 if ih > 0:             # <<<<<<<<<<<<<<
+ *                     ua = float(
+ *                         (boxes[n, 2] - boxes[n, 0] + 1) *
+ */
+        }
+
+        /* "opencood/utils/box_overlaps.pyx":45
+ *                 max(boxes[n, 0], query_boxes[k, 0]) + 1
+ *             )
+ *             if iw > 0:             # <<<<<<<<<<<<<<
+ *                 ih = (
+ *                     min(boxes[n, 3], query_boxes[k, 3]) -
+ */
+      }
+    }
+  }
+
+  /* "opencood/utils/box_overlaps.pyx":57
+ *                     )
+ *                     overlaps[n, k] = iw * ih / ua
+ *     return overlaps             # <<<<<<<<<<<<<<
+ * 
+ * def bbox_intersections(
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF((PyObject *)__pyx_v_overlaps);
+  __pyx_r = ((PyObject *)__pyx_v_overlaps);
+  goto __pyx_L0;
+
+  /* "opencood/utils/box_overlaps.pyx":17
+ * 
+ * 
+ * def bbox_overlaps(             # <<<<<<<<<<<<<<
+ *         np.ndarray[DTYPE_t, ndim=2] boxes,
+ *         np.ndarray[DTYPE_t, ndim=2] query_boxes):
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
+  { PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_overlaps.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer);
+  __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
+  __Pyx_AddTraceback("opencood.utils.box_overlaps.bbox_overlaps", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  goto __pyx_L2;
+  __pyx_L0:;
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_overlaps.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer);
+  __pyx_L2:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_overlaps);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "opencood/utils/box_overlaps.pyx":59
+ *     return overlaps
+ * 
+ * def bbox_intersections(             # <<<<<<<<<<<<<<
+ *         np.ndarray[DTYPE_t, ndim=2] boxes,
+ *         np.ndarray[DTYPE_t, ndim=2] query_boxes):
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_8opencood_5utils_12box_overlaps_3bbox_intersections(PyObject *__pyx_self, 
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+); /*proto*/
+PyDoc_STRVAR(__pyx_doc_8opencood_5utils_12box_overlaps_2bbox_intersections, "\n    For each query box compute the intersection ratio covered by boxes\n    ----------\n    Parameters\n    ----------\n    boxes: (N, 4) ndarray of float\n    query_boxes: (K, 4) ndarray of float\n    Returns\n    -------\n    overlaps: (N, K) ndarray of intersec between boxes and query_boxes\n    ");
+static PyMethodDef __pyx_mdef_8opencood_5utils_12box_overlaps_3bbox_intersections = {"bbox_intersections", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8opencood_5utils_12box_overlaps_3bbox_intersections, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8opencood_5utils_12box_overlaps_2bbox_intersections};
+static PyObject *__pyx_pw_8opencood_5utils_12box_overlaps_3bbox_intersections(PyObject *__pyx_self, 
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+) {
+  PyArrayObject *__pyx_v_boxes = 0;
+  PyArrayObject *__pyx_v_query_boxes = 0;
+  #if !CYTHON_METH_FASTCALL
+  CYTHON_UNUSED Py_ssize_t __pyx_nargs;
+  #endif
+  CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
+  PyObject* values[2] = {0,0};
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("bbox_intersections (wrapper)", 0);
+  #if !CYTHON_METH_FASTCALL
+  #if CYTHON_ASSUME_SAFE_MACROS
+  __pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
+  #else
+  __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL;
+  #endif
+  #endif
+  __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs);
+  {
+    PyObject **__pyx_pyargnames[] = {&__pyx_n_s_boxes,&__pyx_n_s_query_boxes,0};
+    if (__pyx_kwds) {
+      Py_ssize_t kw_args;
+      switch (__pyx_nargs) {
+        case  2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1);
+        CYTHON_FALLTHROUGH;
+        case  1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0);
+        CYTHON_FALLTHROUGH;
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds);
+      switch (__pyx_nargs) {
+        case  0:
+        if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_boxes)) != 0)) {
+          (void)__Pyx_Arg_NewRef_FASTCALL(values[0]);
+          kw_args--;
+        }
+        else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 59, __pyx_L3_error)
+        else goto __pyx_L5_argtuple_error;
+        CYTHON_FALLTHROUGH;
+        case  1:
+        if (likely((values[1] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_query_boxes)) != 0)) {
+          (void)__Pyx_Arg_NewRef_FASTCALL(values[1]);
+          kw_args--;
+        }
+        else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 59, __pyx_L3_error)
+        else {
+          __Pyx_RaiseArgtupleInvalid("bbox_intersections", 1, 2, 2, 1); __PYX_ERR(0, 59, __pyx_L3_error)
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        const Py_ssize_t kwd_pos_args = __pyx_nargs;
+        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "bbox_intersections") < 0)) __PYX_ERR(0, 59, __pyx_L3_error)
+      }
+    } else if (unlikely(__pyx_nargs != 2)) {
+      goto __pyx_L5_argtuple_error;
+    } else {
+      values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0);
+      values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1);
+    }
+    __pyx_v_boxes = ((PyArrayObject *)values[0]);
+    __pyx_v_query_boxes = ((PyArrayObject *)values[1]);
+  }
+  goto __pyx_L6_skip;
+  __pyx_L5_argtuple_error:;
+  __Pyx_RaiseArgtupleInvalid("bbox_intersections", 1, 2, 2, __pyx_nargs); __PYX_ERR(0, 59, __pyx_L3_error)
+  __pyx_L6_skip:;
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L3_error:;
+  {
+    Py_ssize_t __pyx_temp;
+    for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+      __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]);
+    }
+  }
+  __Pyx_AddTraceback("opencood.utils.box_overlaps.bbox_intersections", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_RefNannyFinishContext();
+  return NULL;
+  __pyx_L4_argument_unpacking_done:;
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_boxes), __pyx_ptype_5numpy_ndarray, 1, "boxes", 0))) __PYX_ERR(0, 60, __pyx_L1_error)
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_query_boxes), __pyx_ptype_5numpy_ndarray, 1, "query_boxes", 0))) __PYX_ERR(0, 61, __pyx_L1_error)
+  __pyx_r = __pyx_pf_8opencood_5utils_12box_overlaps_2bbox_intersections(__pyx_self, __pyx_v_boxes, __pyx_v_query_boxes);
+
+  /* function exit code */
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __pyx_r = NULL;
+  __pyx_L0:;
+  {
+    Py_ssize_t __pyx_temp;
+    for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+      __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]);
+    }
+  }
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_8opencood_5utils_12box_overlaps_2bbox_intersections(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_boxes, PyArrayObject *__pyx_v_query_boxes) {
+  unsigned int __pyx_v_N;
+  unsigned int __pyx_v_K;
+  PyArrayObject *__pyx_v_intersec = 0;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_v_iw;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_v_ih;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_v_box_area;
+  unsigned int __pyx_v_k;
+  unsigned int __pyx_v_n;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_boxes;
+  __Pyx_Buffer __pyx_pybuffer_boxes;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_intersec;
+  __Pyx_Buffer __pyx_pybuffer_intersec;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_query_boxes;
+  __Pyx_Buffer __pyx_pybuffer_query_boxes;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  npy_intp *__pyx_t_1;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  PyArrayObject *__pyx_t_6 = NULL;
+  unsigned int __pyx_t_7;
+  unsigned int __pyx_t_8;
+  unsigned int __pyx_t_9;
+  size_t __pyx_t_10;
+  Py_ssize_t __pyx_t_11;
+  int __pyx_t_12;
+  size_t __pyx_t_13;
+  Py_ssize_t __pyx_t_14;
+  size_t __pyx_t_15;
+  Py_ssize_t __pyx_t_16;
+  size_t __pyx_t_17;
+  Py_ssize_t __pyx_t_18;
+  unsigned int __pyx_t_19;
+  unsigned int __pyx_t_20;
+  unsigned int __pyx_t_21;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_22;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_23;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_24;
+  int __pyx_t_25;
+  __pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t __pyx_t_26;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("bbox_intersections", 1);
+  __pyx_pybuffer_intersec.pybuffer.buf = NULL;
+  __pyx_pybuffer_intersec.refcount = 0;
+  __pyx_pybuffernd_intersec.data = NULL;
+  __pyx_pybuffernd_intersec.rcbuffer = &__pyx_pybuffer_intersec;
+  __pyx_pybuffer_boxes.pybuffer.buf = NULL;
+  __pyx_pybuffer_boxes.refcount = 0;
+  __pyx_pybuffernd_boxes.data = NULL;
+  __pyx_pybuffernd_boxes.rcbuffer = &__pyx_pybuffer_boxes;
+  __pyx_pybuffer_query_boxes.pybuffer.buf = NULL;
+  __pyx_pybuffer_query_boxes.refcount = 0;
+  __pyx_pybuffernd_query_boxes.data = NULL;
+  __pyx_pybuffernd_query_boxes.rcbuffer = &__pyx_pybuffer_query_boxes;
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer, (PyObject*)__pyx_v_boxes, &__Pyx_TypeInfo_nn___pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 59, __pyx_L1_error)
+  }
+  __pyx_pybuffernd_boxes.diminfo[0].strides = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_boxes.diminfo[0].shape = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_boxes.diminfo[1].strides = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_boxes.diminfo[1].shape = __pyx_pybuffernd_boxes.rcbuffer->pybuffer.shape[1];
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer, (PyObject*)__pyx_v_query_boxes, &__Pyx_TypeInfo_nn___pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 59, __pyx_L1_error)
+  }
+  __pyx_pybuffernd_query_boxes.diminfo[0].strides = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_query_boxes.diminfo[0].shape = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_query_boxes.diminfo[1].strides = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_query_boxes.diminfo[1].shape = __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.shape[1];
+
+  /* "opencood/utils/box_overlaps.pyx":73
+ *     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
+ *     """
+ *     cdef unsigned int N = boxes.shape[0]             # <<<<<<<<<<<<<<
+ *     cdef unsigned int K = query_boxes.shape[0]
+ *     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
+ */
+  __pyx_t_1 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_boxes)); if (unlikely(__pyx_t_1 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 73, __pyx_L1_error)
+  __pyx_v_N = (__pyx_t_1[0]);
+
+  /* "opencood/utils/box_overlaps.pyx":74
+ *     """
+ *     cdef unsigned int N = boxes.shape[0]
+ *     cdef unsigned int K = query_boxes.shape[0]             # <<<<<<<<<<<<<<
+ *     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
+ *     cdef DTYPE_t iw, ih, box_area
+ */
+  __pyx_t_1 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_query_boxes)); if (unlikely(__pyx_t_1 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 74, __pyx_L1_error)
+  __pyx_v_K = (__pyx_t_1[0]);
+
+  /* "opencood/utils/box_overlaps.pyx":75
+ *     cdef unsigned int N = boxes.shape[0]
+ *     cdef unsigned int K = query_boxes.shape[0]
+ *     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)             # <<<<<<<<<<<<<<
+ *     cdef DTYPE_t iw, ih, box_area
+ *     cdef DTYPE_t ua
+ */
+  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_np); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 75, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 75, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __pyx_t_2 = __Pyx_PyInt_From_unsigned_int(__pyx_v_N); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 75, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_t_4 = __Pyx_PyInt_From_unsigned_int(__pyx_v_K); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 75, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 75, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_GIVEREF(__pyx_t_2);
+  if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_2)) __PYX_ERR(0, 75, __pyx_L1_error);
+  __Pyx_GIVEREF(__pyx_t_4);
+  if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4)) __PYX_ERR(0, 75, __pyx_L1_error);
+  __pyx_t_2 = 0;
+  __pyx_t_4 = 0;
+  __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 75, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __Pyx_GIVEREF(__pyx_t_5);
+  if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_5)) __PYX_ERR(0, 75, __pyx_L1_error);
+  __pyx_t_5 = 0;
+  __pyx_t_5 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 75, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_DTYPE); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 75, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  if (PyDict_SetItem(__pyx_t_5, __pyx_n_s_dtype, __pyx_t_2) < 0) __PYX_ERR(0, 75, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_3, __pyx_t_4, __pyx_t_5); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 75, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 75, __pyx_L1_error)
+  __pyx_t_6 = ((PyArrayObject *)__pyx_t_2);
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_intersec.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_nn___pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t, PyBUF_FORMAT| PyBUF_STRIDES| PyBUF_WRITABLE, 2, 0, __pyx_stack) == -1)) {
+      __pyx_v_intersec = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_intersec.rcbuffer->pybuffer.buf = NULL;
+      __PYX_ERR(0, 75, __pyx_L1_error)
+    } else {__pyx_pybuffernd_intersec.diminfo[0].strides = __pyx_pybuffernd_intersec.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_intersec.diminfo[0].shape = __pyx_pybuffernd_intersec.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_intersec.diminfo[1].strides = __pyx_pybuffernd_intersec.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_intersec.diminfo[1].shape = __pyx_pybuffernd_intersec.rcbuffer->pybuffer.shape[1];
+    }
+  }
+  __pyx_t_6 = 0;
+  __pyx_v_intersec = ((PyArrayObject *)__pyx_t_2);
+  __pyx_t_2 = 0;
+
+  /* "opencood/utils/box_overlaps.pyx":79
+ *     cdef DTYPE_t ua
+ *     cdef unsigned int k, n
+ *     for k in range(K):             # <<<<<<<<<<<<<<
+ *         box_area = (
+ *             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
+ */
+  __pyx_t_7 = __pyx_v_K;
+  __pyx_t_8 = __pyx_t_7;
+  for (__pyx_t_9 = 0; __pyx_t_9 < __pyx_t_8; __pyx_t_9+=1) {
+    __pyx_v_k = __pyx_t_9;
+
+    /* "opencood/utils/box_overlaps.pyx":81
+ *     for k in range(K):
+ *         box_area = (
+ *             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *             # <<<<<<<<<<<<<<
+ *             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+ *         )
+ */
+    __pyx_t_10 = __pyx_v_k;
+    __pyx_t_11 = 2;
+    __pyx_t_12 = -1;
+    if (unlikely(__pyx_t_10 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+    if (__pyx_t_11 < 0) {
+      __pyx_t_11 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+      if (unlikely(__pyx_t_11 < 0)) __pyx_t_12 = 1;
+    } else if (unlikely(__pyx_t_11 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+    if (unlikely(__pyx_t_12 != -1)) {
+      __Pyx_RaiseBufferIndexError(__pyx_t_12);
+      __PYX_ERR(0, 81, __pyx_L1_error)
+    }
+    __pyx_t_13 = __pyx_v_k;
+    __pyx_t_14 = 0;
+    __pyx_t_12 = -1;
+    if (unlikely(__pyx_t_13 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+    if (__pyx_t_14 < 0) {
+      __pyx_t_14 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+      if (unlikely(__pyx_t_14 < 0)) __pyx_t_12 = 1;
+    } else if (unlikely(__pyx_t_14 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+    if (unlikely(__pyx_t_12 != -1)) {
+      __Pyx_RaiseBufferIndexError(__pyx_t_12);
+      __PYX_ERR(0, 81, __pyx_L1_error)
+    }
+
+    /* "opencood/utils/box_overlaps.pyx":82
+ *         box_area = (
+ *             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
+ *             (query_boxes[k, 3] - query_boxes[k, 1] + 1)             # <<<<<<<<<<<<<<
+ *         )
+ *         for n in range(N):
+ */
+    __pyx_t_15 = __pyx_v_k;
+    __pyx_t_16 = 3;
+    __pyx_t_12 = -1;
+    if (unlikely(__pyx_t_15 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+    if (__pyx_t_16 < 0) {
+      __pyx_t_16 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+      if (unlikely(__pyx_t_16 < 0)) __pyx_t_12 = 1;
+    } else if (unlikely(__pyx_t_16 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+    if (unlikely(__pyx_t_12 != -1)) {
+      __Pyx_RaiseBufferIndexError(__pyx_t_12);
+      __PYX_ERR(0, 82, __pyx_L1_error)
+    }
+    __pyx_t_17 = __pyx_v_k;
+    __pyx_t_18 = 1;
+    __pyx_t_12 = -1;
+    if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+    if (__pyx_t_18 < 0) {
+      __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+      if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+    } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+    if (unlikely(__pyx_t_12 != -1)) {
+      __Pyx_RaiseBufferIndexError(__pyx_t_12);
+      __PYX_ERR(0, 82, __pyx_L1_error)
+    }
+
+    /* "opencood/utils/box_overlaps.pyx":81
+ *     for k in range(K):
+ *         box_area = (
+ *             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *             # <<<<<<<<<<<<<<
+ *             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+ *         )
+ */
+    __pyx_v_box_area = ((((*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_10, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_11, __pyx_pybuffernd_query_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_13, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_14, __pyx_pybuffernd_query_boxes.diminfo[1].strides))) + 1.0) * (((*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_15, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_16, __pyx_pybuffernd_query_boxes.diminfo[1].strides)) - (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides))) + 1.0));
+
+    /* "opencood/utils/box_overlaps.pyx":84
+ *             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+ *         )
+ *         for n in range(N):             # <<<<<<<<<<<<<<
+ *             iw = (
+ *                 min(boxes[n, 2], query_boxes[k, 2]) -
+ */
+    __pyx_t_19 = __pyx_v_N;
+    __pyx_t_20 = __pyx_t_19;
+    for (__pyx_t_21 = 0; __pyx_t_21 < __pyx_t_20; __pyx_t_21+=1) {
+      __pyx_v_n = __pyx_t_21;
+
+      /* "opencood/utils/box_overlaps.pyx":86
+ *         for n in range(N):
+ *             iw = (
+ *                 min(boxes[n, 2], query_boxes[k, 2]) -             # <<<<<<<<<<<<<<
+ *                 max(boxes[n, 0], query_boxes[k, 0]) + 1
+ *             )
+ */
+      __pyx_t_17 = __pyx_v_k;
+      __pyx_t_18 = 2;
+      __pyx_t_12 = -1;
+      if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (__pyx_t_18 < 0) {
+        __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+        if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+      } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 86, __pyx_L1_error)
+      }
+      __pyx_t_22 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+      __pyx_t_17 = __pyx_v_n;
+      __pyx_t_18 = 2;
+      __pyx_t_12 = -1;
+      if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (__pyx_t_18 < 0) {
+        __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+        if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+      } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 86, __pyx_L1_error)
+      }
+      __pyx_t_23 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides));
+      __pyx_t_25 = (__pyx_t_22 < __pyx_t_23);
+      if (__pyx_t_25) {
+        __pyx_t_24 = __pyx_t_22;
+      } else {
+        __pyx_t_24 = __pyx_t_23;
+      }
+
+      /* "opencood/utils/box_overlaps.pyx":87
+ *             iw = (
+ *                 min(boxes[n, 2], query_boxes[k, 2]) -
+ *                 max(boxes[n, 0], query_boxes[k, 0]) + 1             # <<<<<<<<<<<<<<
+ *             )
+ *             if iw > 0:
+ */
+      __pyx_t_17 = __pyx_v_k;
+      __pyx_t_18 = 0;
+      __pyx_t_12 = -1;
+      if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (__pyx_t_18 < 0) {
+        __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+        if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+      } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 87, __pyx_L1_error)
+      }
+      __pyx_t_22 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+      __pyx_t_17 = __pyx_v_n;
+      __pyx_t_18 = 0;
+      __pyx_t_12 = -1;
+      if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (__pyx_t_18 < 0) {
+        __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+        if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+      } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 87, __pyx_L1_error)
+      }
+      __pyx_t_23 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides));
+      __pyx_t_25 = (__pyx_t_22 > __pyx_t_23);
+      if (__pyx_t_25) {
+        __pyx_t_26 = __pyx_t_22;
+      } else {
+        __pyx_t_26 = __pyx_t_23;
+      }
+
+      /* "opencood/utils/box_overlaps.pyx":86
+ *         for n in range(N):
+ *             iw = (
+ *                 min(boxes[n, 2], query_boxes[k, 2]) -             # <<<<<<<<<<<<<<
+ *                 max(boxes[n, 0], query_boxes[k, 0]) + 1
+ *             )
+ */
+      __pyx_v_iw = ((__pyx_t_24 - __pyx_t_26) + 1.0);
+
+      /* "opencood/utils/box_overlaps.pyx":89
+ *                 max(boxes[n, 0], query_boxes[k, 0]) + 1
+ *             )
+ *             if iw > 0:             # <<<<<<<<<<<<<<
+ *                 ih = (
+ *                     min(boxes[n, 3], query_boxes[k, 3]) -
+ */
+      __pyx_t_25 = (__pyx_v_iw > 0.0);
+      if (__pyx_t_25) {
+
+        /* "opencood/utils/box_overlaps.pyx":91
+ *             if iw > 0:
+ *                 ih = (
+ *                     min(boxes[n, 3], query_boxes[k, 3]) -             # <<<<<<<<<<<<<<
+ *                     max(boxes[n, 1], query_boxes[k, 1]) + 1
+ *                 )
+ */
+        __pyx_t_17 = __pyx_v_k;
+        __pyx_t_18 = 3;
+        __pyx_t_12 = -1;
+        if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+        if (__pyx_t_18 < 0) {
+          __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+          if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+        } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+        if (unlikely(__pyx_t_12 != -1)) {
+          __Pyx_RaiseBufferIndexError(__pyx_t_12);
+          __PYX_ERR(0, 91, __pyx_L1_error)
+        }
+        __pyx_t_26 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+        __pyx_t_17 = __pyx_v_n;
+        __pyx_t_18 = 3;
+        __pyx_t_12 = -1;
+        if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+        if (__pyx_t_18 < 0) {
+          __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+          if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+        } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+        if (unlikely(__pyx_t_12 != -1)) {
+          __Pyx_RaiseBufferIndexError(__pyx_t_12);
+          __PYX_ERR(0, 91, __pyx_L1_error)
+        }
+        __pyx_t_24 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides));
+        __pyx_t_25 = (__pyx_t_26 < __pyx_t_24);
+        if (__pyx_t_25) {
+          __pyx_t_22 = __pyx_t_26;
+        } else {
+          __pyx_t_22 = __pyx_t_24;
+        }
+
+        /* "opencood/utils/box_overlaps.pyx":92
+ *                 ih = (
+ *                     min(boxes[n, 3], query_boxes[k, 3]) -
+ *                     max(boxes[n, 1], query_boxes[k, 1]) + 1             # <<<<<<<<<<<<<<
+ *                 )
+ *                 if ih > 0:
+ */
+        __pyx_t_17 = __pyx_v_k;
+        __pyx_t_18 = 1;
+        __pyx_t_12 = -1;
+        if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_query_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+        if (__pyx_t_18 < 0) {
+          __pyx_t_18 += __pyx_pybuffernd_query_boxes.diminfo[1].shape;
+          if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+        } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_query_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+        if (unlikely(__pyx_t_12 != -1)) {
+          __Pyx_RaiseBufferIndexError(__pyx_t_12);
+          __PYX_ERR(0, 92, __pyx_L1_error)
+        }
+        __pyx_t_26 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_query_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_query_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_query_boxes.diminfo[1].strides));
+        __pyx_t_17 = __pyx_v_n;
+        __pyx_t_18 = 1;
+        __pyx_t_12 = -1;
+        if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_boxes.diminfo[0].shape)) __pyx_t_12 = 0;
+        if (__pyx_t_18 < 0) {
+          __pyx_t_18 += __pyx_pybuffernd_boxes.diminfo[1].shape;
+          if (unlikely(__pyx_t_18 < 0)) __pyx_t_12 = 1;
+        } else if (unlikely(__pyx_t_18 >= __pyx_pybuffernd_boxes.diminfo[1].shape)) __pyx_t_12 = 1;
+        if (unlikely(__pyx_t_12 != -1)) {
+          __Pyx_RaiseBufferIndexError(__pyx_t_12);
+          __PYX_ERR(0, 92, __pyx_L1_error)
+        }
+        __pyx_t_24 = (*__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_boxes.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_boxes.diminfo[0].strides, __pyx_t_18, __pyx_pybuffernd_boxes.diminfo[1].strides));
+        __pyx_t_25 = (__pyx_t_26 > __pyx_t_24);
+        if (__pyx_t_25) {
+          __pyx_t_23 = __pyx_t_26;
+        } else {
+          __pyx_t_23 = __pyx_t_24;
+        }
+
+        /* "opencood/utils/box_overlaps.pyx":91
+ *             if iw > 0:
+ *                 ih = (
+ *                     min(boxes[n, 3], query_boxes[k, 3]) -             # <<<<<<<<<<<<<<
+ *                     max(boxes[n, 1], query_boxes[k, 1]) + 1
+ *                 )
+ */
+        __pyx_v_ih = ((__pyx_t_22 - __pyx_t_23) + 1.0);
+
+        /* "opencood/utils/box_overlaps.pyx":94
+ *                     max(boxes[n, 1], query_boxes[k, 1]) + 1
+ *                 )
+ *                 if ih > 0:             # <<<<<<<<<<<<<<
+ *                     intersec[n, k] = iw * ih / box_area
+ *     return intersec
+ */
+        __pyx_t_25 = (__pyx_v_ih > 0.0);
+        if (__pyx_t_25) {
+
+          /* "opencood/utils/box_overlaps.pyx":95
+ *                 )
+ *                 if ih > 0:
+ *                     intersec[n, k] = iw * ih / box_area             # <<<<<<<<<<<<<<
+ *     return intersec
+ * 
+ */
+          __pyx_t_23 = (__pyx_v_iw * __pyx_v_ih);
+          if (unlikely(__pyx_v_box_area == 0)) {
+            PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+            __PYX_ERR(0, 95, __pyx_L1_error)
+          }
+          __pyx_t_17 = __pyx_v_n;
+          __pyx_t_15 = __pyx_v_k;
+          __pyx_t_12 = -1;
+          if (unlikely(__pyx_t_17 >= (size_t)__pyx_pybuffernd_intersec.diminfo[0].shape)) __pyx_t_12 = 0;
+          if (unlikely(__pyx_t_15 >= (size_t)__pyx_pybuffernd_intersec.diminfo[1].shape)) __pyx_t_12 = 1;
+          if (unlikely(__pyx_t_12 != -1)) {
+            __Pyx_RaiseBufferIndexError(__pyx_t_12);
+            __PYX_ERR(0, 95, __pyx_L1_error)
+          }
+          *__Pyx_BufPtrStrided2d(__pyx_t_8opencood_5utils_12box_overlaps_DTYPE_t *, __pyx_pybuffernd_intersec.rcbuffer->pybuffer.buf, __pyx_t_17, __pyx_pybuffernd_intersec.diminfo[0].strides, __pyx_t_15, __pyx_pybuffernd_intersec.diminfo[1].strides) = (__pyx_t_23 / __pyx_v_box_area);
+
+          /* "opencood/utils/box_overlaps.pyx":94
+ *                     max(boxes[n, 1], query_boxes[k, 1]) + 1
+ *                 )
+ *                 if ih > 0:             # <<<<<<<<<<<<<<
+ *                     intersec[n, k] = iw * ih / box_area
+ *     return intersec
+ */
+        }
+
+        /* "opencood/utils/box_overlaps.pyx":89
+ *                 max(boxes[n, 0], query_boxes[k, 0]) + 1
+ *             )
+ *             if iw > 0:             # <<<<<<<<<<<<<<
+ *                 ih = (
+ *                     min(boxes[n, 3], query_boxes[k, 3]) -
+ */
+      }
+    }
+  }
+
+  /* "opencood/utils/box_overlaps.pyx":96
+ *                 if ih > 0:
+ *                     intersec[n, k] = iw * ih / box_area
+ *     return intersec             # <<<<<<<<<<<<<<
+ * 
+ * # Compute bounding box voting
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF((PyObject *)__pyx_v_intersec);
+  __pyx_r = ((PyObject *)__pyx_v_intersec);
+  goto __pyx_L0;
+
+  /* "opencood/utils/box_overlaps.pyx":59
+ *     return overlaps
+ * 
+ * def bbox_intersections(             # <<<<<<<<<<<<<<
+ *         np.ndarray[DTYPE_t, ndim=2] boxes,
+ *         np.ndarray[DTYPE_t, ndim=2] query_boxes):
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
+  { PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_intersec.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer);
+  __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
+  __Pyx_AddTraceback("opencood.utils.box_overlaps.bbox_intersections", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  goto __pyx_L2;
+  __pyx_L0:;
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_boxes.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_intersec.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_query_boxes.rcbuffer->pybuffer);
+  __pyx_L2:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_intersec);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "opencood/utils/box_overlaps.pyx":99
+ * 
+ * # Compute bounding box voting
+ * def box_vote(             # <<<<<<<<<<<<<<
+ *         np.ndarray[float, ndim=2] dets_NMS,
+ *         np.ndarray[float, ndim=2] dets_all):
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_8opencood_5utils_12box_overlaps_5box_vote(PyObject *__pyx_self, 
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+); /*proto*/
+static PyMethodDef __pyx_mdef_8opencood_5utils_12box_overlaps_5box_vote = {"box_vote", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8opencood_5utils_12box_overlaps_5box_vote, __Pyx_METH_FASTCALL|METH_KEYWORDS, 0};
+static PyObject *__pyx_pw_8opencood_5utils_12box_overlaps_5box_vote(PyObject *__pyx_self, 
+#if CYTHON_METH_FASTCALL
+PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
+#else
+PyObject *__pyx_args, PyObject *__pyx_kwds
+#endif
+) {
+  PyArrayObject *__pyx_v_dets_NMS = 0;
+  PyArrayObject *__pyx_v_dets_all = 0;
+  #if !CYTHON_METH_FASTCALL
+  CYTHON_UNUSED Py_ssize_t __pyx_nargs;
+  #endif
+  CYTHON_UNUSED PyObject *const *__pyx_kwvalues;
+  PyObject* values[2] = {0,0};
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("box_vote (wrapper)", 0);
+  #if !CYTHON_METH_FASTCALL
+  #if CYTHON_ASSUME_SAFE_MACROS
+  __pyx_nargs = PyTuple_GET_SIZE(__pyx_args);
+  #else
+  __pyx_nargs = PyTuple_Size(__pyx_args); if (unlikely(__pyx_nargs < 0)) return NULL;
+  #endif
+  #endif
+  __pyx_kwvalues = __Pyx_KwValues_FASTCALL(__pyx_args, __pyx_nargs);
+  {
+    PyObject **__pyx_pyargnames[] = {&__pyx_n_s_dets_NMS,&__pyx_n_s_dets_all,0};
+    if (__pyx_kwds) {
+      Py_ssize_t kw_args;
+      switch (__pyx_nargs) {
+        case  2: values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1);
+        CYTHON_FALLTHROUGH;
+        case  1: values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0);
+        CYTHON_FALLTHROUGH;
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = __Pyx_NumKwargs_FASTCALL(__pyx_kwds);
+      switch (__pyx_nargs) {
+        case  0:
+        if (likely((values[0] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_dets_NMS)) != 0)) {
+          (void)__Pyx_Arg_NewRef_FASTCALL(values[0]);
+          kw_args--;
+        }
+        else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 99, __pyx_L3_error)
+        else goto __pyx_L5_argtuple_error;
+        CYTHON_FALLTHROUGH;
+        case  1:
+        if (likely((values[1] = __Pyx_GetKwValue_FASTCALL(__pyx_kwds, __pyx_kwvalues, __pyx_n_s_dets_all)) != 0)) {
+          (void)__Pyx_Arg_NewRef_FASTCALL(values[1]);
+          kw_args--;
+        }
+        else if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 99, __pyx_L3_error)
+        else {
+          __Pyx_RaiseArgtupleInvalid("box_vote", 1, 2, 2, 1); __PYX_ERR(0, 99, __pyx_L3_error)
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        const Py_ssize_t kwd_pos_args = __pyx_nargs;
+        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_kwvalues, __pyx_pyargnames, 0, values + 0, kwd_pos_args, "box_vote") < 0)) __PYX_ERR(0, 99, __pyx_L3_error)
+      }
+    } else if (unlikely(__pyx_nargs != 2)) {
+      goto __pyx_L5_argtuple_error;
+    } else {
+      values[0] = __Pyx_Arg_FASTCALL(__pyx_args, 0);
+      values[1] = __Pyx_Arg_FASTCALL(__pyx_args, 1);
+    }
+    __pyx_v_dets_NMS = ((PyArrayObject *)values[0]);
+    __pyx_v_dets_all = ((PyArrayObject *)values[1]);
+  }
+  goto __pyx_L6_skip;
+  __pyx_L5_argtuple_error:;
+  __Pyx_RaiseArgtupleInvalid("box_vote", 1, 2, 2, __pyx_nargs); __PYX_ERR(0, 99, __pyx_L3_error)
+  __pyx_L6_skip:;
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L3_error:;
+  {
+    Py_ssize_t __pyx_temp;
+    for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+      __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]);
+    }
+  }
+  __Pyx_AddTraceback("opencood.utils.box_overlaps.box_vote", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_RefNannyFinishContext();
+  return NULL;
+  __pyx_L4_argument_unpacking_done:;
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_dets_NMS), __pyx_ptype_5numpy_ndarray, 1, "dets_NMS", 0))) __PYX_ERR(0, 100, __pyx_L1_error)
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_dets_all), __pyx_ptype_5numpy_ndarray, 1, "dets_all", 0))) __PYX_ERR(0, 101, __pyx_L1_error)
+  __pyx_r = __pyx_pf_8opencood_5utils_12box_overlaps_4box_vote(__pyx_self, __pyx_v_dets_NMS, __pyx_v_dets_all);
+
+  /* function exit code */
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __pyx_r = NULL;
+  __pyx_L0:;
+  {
+    Py_ssize_t __pyx_temp;
+    for (__pyx_temp=0; __pyx_temp < (Py_ssize_t)(sizeof(values)/sizeof(values[0])); ++__pyx_temp) {
+      __Pyx_Arg_XDECREF_FASTCALL(values[__pyx_temp]);
+    }
+  }
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_8opencood_5utils_12box_overlaps_4box_vote(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_dets_NMS, PyArrayObject *__pyx_v_dets_all) {
+  PyArrayObject *__pyx_v_dets_voted = 0;
+  unsigned int __pyx_v_N;
+  unsigned int __pyx_v_M;
+  PyArrayObject *__pyx_v_det = 0;
+  PyArrayObject *__pyx_v_acc_box = 0;
+  float __pyx_v_acc_score;
+  PyArrayObject *__pyx_v_det2 = 0;
+  float __pyx_v_bi0;
+  float __pyx_v_bi1;
+  float __pyx_v_bi3;
+  float __pyx_v_iw;
+  float __pyx_v_ih;
+  float __pyx_v_ua;
+  float __pyx_v_thresh;
+  unsigned int __pyx_v_i;
+  unsigned int __pyx_v_m;
+  PyObject *__pyx_v_bi2 = NULL;
+  float __pyx_v_ov;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_acc_box;
+  __Pyx_Buffer __pyx_pybuffer_acc_box;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_det;
+  __Pyx_Buffer __pyx_pybuffer_det;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_det2;
+  __Pyx_Buffer __pyx_pybuffer_det2;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_dets_NMS;
+  __Pyx_Buffer __pyx_pybuffer_dets_NMS;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_dets_all;
+  __Pyx_Buffer __pyx_pybuffer_dets_all;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_dets_voted;
+  __Pyx_Buffer __pyx_pybuffer_dets_voted;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  npy_intp *__pyx_t_3;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  PyObject *__pyx_t_6 = NULL;
+  PyArrayObject *__pyx_t_7 = NULL;
+  unsigned int __pyx_t_8;
+  unsigned int __pyx_t_9;
+  unsigned int __pyx_t_10;
+  PyArrayObject *__pyx_t_11 = NULL;
+  int __pyx_t_12;
+  PyObject *__pyx_t_13 = NULL;
+  PyObject *__pyx_t_14 = NULL;
+  PyObject *__pyx_t_15 = NULL;
+  PyArrayObject *__pyx_t_16 = NULL;
+  unsigned int __pyx_t_17;
+  unsigned int __pyx_t_18;
+  unsigned int __pyx_t_19;
+  PyArrayObject *__pyx_t_20 = NULL;
+  Py_ssize_t __pyx_t_21;
+  float __pyx_t_22;
+  float __pyx_t_23;
+  float __pyx_t_24;
+  int __pyx_t_25;
+  int __pyx_t_26;
+  Py_ssize_t __pyx_t_27;
+  Py_ssize_t __pyx_t_28;
+  Py_ssize_t __pyx_t_29;
+  Py_ssize_t __pyx_t_30;
+  Py_ssize_t __pyx_t_31;
+  Py_ssize_t __pyx_t_32;
+  Py_ssize_t __pyx_t_33;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("box_vote", 1);
+  __pyx_pybuffer_dets_voted.pybuffer.buf = NULL;
+  __pyx_pybuffer_dets_voted.refcount = 0;
+  __pyx_pybuffernd_dets_voted.data = NULL;
+  __pyx_pybuffernd_dets_voted.rcbuffer = &__pyx_pybuffer_dets_voted;
+  __pyx_pybuffer_det.pybuffer.buf = NULL;
+  __pyx_pybuffer_det.refcount = 0;
+  __pyx_pybuffernd_det.data = NULL;
+  __pyx_pybuffernd_det.rcbuffer = &__pyx_pybuffer_det;
+  __pyx_pybuffer_acc_box.pybuffer.buf = NULL;
+  __pyx_pybuffer_acc_box.refcount = 0;
+  __pyx_pybuffernd_acc_box.data = NULL;
+  __pyx_pybuffernd_acc_box.rcbuffer = &__pyx_pybuffer_acc_box;
+  __pyx_pybuffer_det2.pybuffer.buf = NULL;
+  __pyx_pybuffer_det2.refcount = 0;
+  __pyx_pybuffernd_det2.data = NULL;
+  __pyx_pybuffernd_det2.rcbuffer = &__pyx_pybuffer_det2;
+  __pyx_pybuffer_dets_NMS.pybuffer.buf = NULL;
+  __pyx_pybuffer_dets_NMS.refcount = 0;
+  __pyx_pybuffernd_dets_NMS.data = NULL;
+  __pyx_pybuffernd_dets_NMS.rcbuffer = &__pyx_pybuffer_dets_NMS;
+  __pyx_pybuffer_dets_all.pybuffer.buf = NULL;
+  __pyx_pybuffer_dets_all.refcount = 0;
+  __pyx_pybuffernd_dets_all.data = NULL;
+  __pyx_pybuffernd_dets_all.rcbuffer = &__pyx_pybuffer_dets_all;
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_dets_NMS.rcbuffer->pybuffer, (PyObject*)__pyx_v_dets_NMS, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 99, __pyx_L1_error)
+  }
+  __pyx_pybuffernd_dets_NMS.diminfo[0].strides = __pyx_pybuffernd_dets_NMS.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_dets_NMS.diminfo[0].shape = __pyx_pybuffernd_dets_NMS.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_dets_NMS.diminfo[1].strides = __pyx_pybuffernd_dets_NMS.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_dets_NMS.diminfo[1].shape = __pyx_pybuffernd_dets_NMS.rcbuffer->pybuffer.shape[1];
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_dets_all.rcbuffer->pybuffer, (PyObject*)__pyx_v_dets_all, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 99, __pyx_L1_error)
+  }
+  __pyx_pybuffernd_dets_all.diminfo[0].strides = __pyx_pybuffernd_dets_all.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_dets_all.diminfo[0].shape = __pyx_pybuffernd_dets_all.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_dets_all.diminfo[1].strides = __pyx_pybuffernd_dets_all.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_dets_all.diminfo[1].shape = __pyx_pybuffernd_dets_all.rcbuffer->pybuffer.shape[1];
+
+  /* "opencood/utils/box_overlaps.pyx":102
+ *         np.ndarray[float, ndim=2] dets_NMS,
+ *         np.ndarray[float, ndim=2] dets_all):
+ *     cdef np.ndarray[float, ndim=2] dets_voted = np.zeros((dets_NMS.shape[0], dets_NMS.shape[1]), dtype=np.float32)             # <<<<<<<<<<<<<<
+ *     cdef unsigned int N = dets_NMS.shape[0]
+ *     cdef unsigned int M = dets_all.shape[0]
+ */
+  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 102, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_zeros); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 102, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_3 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_dets_NMS)); if (unlikely(__pyx_t_3 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 102, __pyx_L1_error)
+  __pyx_t_1 = PyInt_FromSsize_t((__pyx_t_3[0])); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 102, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_3 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_dets_NMS)); if (unlikely(__pyx_t_3 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 102, __pyx_L1_error)
+  __pyx_t_4 = PyInt_FromSsize_t((__pyx_t_3[1])); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 102, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 102, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_GIVEREF(__pyx_t_1);
+  if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_1)) __PYX_ERR(0, 102, __pyx_L1_error);
+  __Pyx_GIVEREF(__pyx_t_4);
+  if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_t_4)) __PYX_ERR(0, 102, __pyx_L1_error);
+  __pyx_t_1 = 0;
+  __pyx_t_4 = 0;
+  __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 102, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __Pyx_GIVEREF(__pyx_t_5);
+  if (__Pyx_PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_5)) __PYX_ERR(0, 102, __pyx_L1_error);
+  __pyx_t_5 = 0;
+  __pyx_t_5 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 102, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 102, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_float32); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 102, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_6);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  if (PyDict_SetItem(__pyx_t_5, __pyx_n_s_dtype, __pyx_t_6) < 0) __PYX_ERR(0, 102, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+  __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_4, __pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 102, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_6);
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 102, __pyx_L1_error)
+  __pyx_t_7 = ((PyArrayObject *)__pyx_t_6);
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_dets_voted.rcbuffer->pybuffer, (PyObject*)__pyx_t_7, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) {
+      __pyx_v_dets_voted = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_dets_voted.rcbuffer->pybuffer.buf = NULL;
+      __PYX_ERR(0, 102, __pyx_L1_error)
+    } else {__pyx_pybuffernd_dets_voted.diminfo[0].strides = __pyx_pybuffernd_dets_voted.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_dets_voted.diminfo[0].shape = __pyx_pybuffernd_dets_voted.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_dets_voted.diminfo[1].strides = __pyx_pybuffernd_dets_voted.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_dets_voted.diminfo[1].shape = __pyx_pybuffernd_dets_voted.rcbuffer->pybuffer.shape[1];
+    }
+  }
+  __pyx_t_7 = 0;
+  __pyx_v_dets_voted = ((PyArrayObject *)__pyx_t_6);
+  __pyx_t_6 = 0;
+
+  /* "opencood/utils/box_overlaps.pyx":103
+ *         np.ndarray[float, ndim=2] dets_all):
+ *     cdef np.ndarray[float, ndim=2] dets_voted = np.zeros((dets_NMS.shape[0], dets_NMS.shape[1]), dtype=np.float32)
+ *     cdef unsigned int N = dets_NMS.shape[0]             # <<<<<<<<<<<<<<
+ *     cdef unsigned int M = dets_all.shape[0]
+ * 
+ */
+  __pyx_t_3 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_dets_NMS)); if (unlikely(__pyx_t_3 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 103, __pyx_L1_error)
+  __pyx_v_N = (__pyx_t_3[0]);
+
+  /* "opencood/utils/box_overlaps.pyx":104
+ *     cdef np.ndarray[float, ndim=2] dets_voted = np.zeros((dets_NMS.shape[0], dets_NMS.shape[1]), dtype=np.float32)
+ *     cdef unsigned int N = dets_NMS.shape[0]
+ *     cdef unsigned int M = dets_all.shape[0]             # <<<<<<<<<<<<<<
+ * 
+ *     cdef np.ndarray[float, ndim=1] det
+ */
+  __pyx_t_3 = __pyx_f_5numpy_7ndarray_5shape_shape(((PyArrayObject *)__pyx_v_dets_all)); if (unlikely(__pyx_t_3 == ((npy_intp *)NULL) && PyErr_Occurred())) __PYX_ERR(0, 104, __pyx_L1_error)
+  __pyx_v_M = (__pyx_t_3[0]);
+
+  /* "opencood/utils/box_overlaps.pyx":114
+ *     cdef float iw, ih, ua
+ * 
+ *     cdef float thresh=0.5             # <<<<<<<<<<<<<<
+ * 
+ *     for i in range(N):
+ */
+  __pyx_v_thresh = 0.5;
+
+  /* "opencood/utils/box_overlaps.pyx":116
+ *     cdef float thresh=0.5
+ * 
+ *     for i in range(N):             # <<<<<<<<<<<<<<
+ *         det = dets_NMS[i, :]
+ *         acc_box = np.zeros((4), dtype=np.float32)
+ */
+  __pyx_t_8 = __pyx_v_N;
+  __pyx_t_9 = __pyx_t_8;
+  for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
+    __pyx_v_i = __pyx_t_10;
+
+    /* "opencood/utils/box_overlaps.pyx":117
+ * 
+ *     for i in range(N):
+ *         det = dets_NMS[i, :]             # <<<<<<<<<<<<<<
+ *         acc_box = np.zeros((4), dtype=np.float32)
+ *         acc_score = 0.0
+ */
+    __pyx_t_6 = __Pyx_PyInt_From_unsigned_int(__pyx_v_i); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 117, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_6);
+    __pyx_t_5 = PyTuple_New(2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 117, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    __Pyx_GIVEREF(__pyx_t_6);
+    if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_6)) __PYX_ERR(0, 117, __pyx_L1_error);
+    __Pyx_INCREF(__pyx_slice__3);
+    __Pyx_GIVEREF(__pyx_slice__3);
+    if (__Pyx_PyTuple_SET_ITEM(__pyx_t_5, 1, __pyx_slice__3)) __PYX_ERR(0, 117, __pyx_L1_error);
+    __pyx_t_6 = 0;
+    __pyx_t_6 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_dets_NMS), __pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 117, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_6);
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+    if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 117, __pyx_L1_error)
+    __pyx_t_11 = ((PyArrayObject *)__pyx_t_6);
+    {
+      __Pyx_BufFmt_StackElem __pyx_stack[1];
+      __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_det.rcbuffer->pybuffer);
+      __pyx_t_12 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_det.rcbuffer->pybuffer, (PyObject*)__pyx_t_11, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
+      if (unlikely(__pyx_t_12 < 0)) {
+        PyErr_Fetch(&__pyx_t_13, &__pyx_t_14, &__pyx_t_15);
+        if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_det.rcbuffer->pybuffer, (PyObject*)__pyx_v_det, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
+          Py_XDECREF(__pyx_t_13); Py_XDECREF(__pyx_t_14); Py_XDECREF(__pyx_t_15);
+          __Pyx_RaiseBufferFallbackError();
+        } else {
+          PyErr_Restore(__pyx_t_13, __pyx_t_14, __pyx_t_15);
+        }
+        __pyx_t_13 = __pyx_t_14 = __pyx_t_15 = 0;
+      }
+      __pyx_pybuffernd_det.diminfo[0].strides = __pyx_pybuffernd_det.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_det.diminfo[0].shape = __pyx_pybuffernd_det.rcbuffer->pybuffer.shape[0];
+      if (unlikely((__pyx_t_12 < 0))) __PYX_ERR(0, 117, __pyx_L1_error)
+    }
+    __pyx_t_11 = 0;
+    __Pyx_XDECREF_SET(__pyx_v_det, ((PyArrayObject *)__pyx_t_6));
+    __pyx_t_6 = 0;
+
+    /* "opencood/utils/box_overlaps.pyx":118
+ *     for i in range(N):
+ *         det = dets_NMS[i, :]
+ *         acc_box = np.zeros((4), dtype=np.float32)             # <<<<<<<<<<<<<<
+ *         acc_score = 0.0
+ * 
+ */
+    __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_np); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 118, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_6);
+    __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_zeros); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 118, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+    __pyx_t_6 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 118, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_6);
+    __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_np); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 118, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_float32); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 118, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    if (PyDict_SetItem(__pyx_t_6, __pyx_n_s_dtype, __pyx_t_2) < 0) __PYX_ERR(0, 118, __pyx_L1_error)
+    __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+    __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_5, __pyx_tuple__4, __pyx_t_6); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 118, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_2);
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+    if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 118, __pyx_L1_error)
+    __pyx_t_16 = ((PyArrayObject *)__pyx_t_2);
+    {
+      __Pyx_BufFmt_StackElem __pyx_stack[1];
+      __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer);
+      __pyx_t_12 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer, (PyObject*)__pyx_t_16, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
+      if (unlikely(__pyx_t_12 < 0)) {
+        PyErr_Fetch(&__pyx_t_15, &__pyx_t_14, &__pyx_t_13);
+        if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer, (PyObject*)__pyx_v_acc_box, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
+          Py_XDECREF(__pyx_t_15); Py_XDECREF(__pyx_t_14); Py_XDECREF(__pyx_t_13);
+          __Pyx_RaiseBufferFallbackError();
+        } else {
+          PyErr_Restore(__pyx_t_15, __pyx_t_14, __pyx_t_13);
+        }
+        __pyx_t_15 = __pyx_t_14 = __pyx_t_13 = 0;
+      }
+      __pyx_pybuffernd_acc_box.diminfo[0].strides = __pyx_pybuffernd_acc_box.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_acc_box.diminfo[0].shape = __pyx_pybuffernd_acc_box.rcbuffer->pybuffer.shape[0];
+      if (unlikely((__pyx_t_12 < 0))) __PYX_ERR(0, 118, __pyx_L1_error)
+    }
+    __pyx_t_16 = 0;
+    __Pyx_XDECREF_SET(__pyx_v_acc_box, ((PyArrayObject *)__pyx_t_2));
+    __pyx_t_2 = 0;
+
+    /* "opencood/utils/box_overlaps.pyx":119
+ *         det = dets_NMS[i, :]
+ *         acc_box = np.zeros((4), dtype=np.float32)
+ *         acc_score = 0.0             # <<<<<<<<<<<<<<
+ * 
+ *         for m in range(M):
+ */
+    __pyx_v_acc_score = 0.0;
+
+    /* "opencood/utils/box_overlaps.pyx":121
+ *         acc_score = 0.0
+ * 
+ *         for m in range(M):             # <<<<<<<<<<<<<<
+ *             det2 = dets_all[m, :]
+ * 
+ */
+    __pyx_t_17 = __pyx_v_M;
+    __pyx_t_18 = __pyx_t_17;
+    for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) {
+      __pyx_v_m = __pyx_t_19;
+
+      /* "opencood/utils/box_overlaps.pyx":122
+ * 
+ *         for m in range(M):
+ *             det2 = dets_all[m, :]             # <<<<<<<<<<<<<<
+ * 
+ *             bi0 = max(det[0], det2[0])
+ */
+      __pyx_t_2 = __Pyx_PyInt_From_unsigned_int(__pyx_v_m); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 122, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_2);
+      __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 122, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_GIVEREF(__pyx_t_2);
+      if (__Pyx_PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_2)) __PYX_ERR(0, 122, __pyx_L1_error);
+      __Pyx_INCREF(__pyx_slice__3);
+      __Pyx_GIVEREF(__pyx_slice__3);
+      if (__Pyx_PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_slice__3)) __PYX_ERR(0, 122, __pyx_L1_error);
+      __pyx_t_2 = 0;
+      __pyx_t_2 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_dets_all), __pyx_t_6); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 122, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_2);
+      __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+      if (!(likely(((__pyx_t_2) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_2, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 122, __pyx_L1_error)
+      __pyx_t_20 = ((PyArrayObject *)__pyx_t_2);
+      {
+        __Pyx_BufFmt_StackElem __pyx_stack[1];
+        __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_det2.rcbuffer->pybuffer);
+        __pyx_t_12 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_det2.rcbuffer->pybuffer, (PyObject*)__pyx_t_20, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
+        if (unlikely(__pyx_t_12 < 0)) {
+          PyErr_Fetch(&__pyx_t_13, &__pyx_t_14, &__pyx_t_15);
+          if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_det2.rcbuffer->pybuffer, (PyObject*)__pyx_v_det2, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
+            Py_XDECREF(__pyx_t_13); Py_XDECREF(__pyx_t_14); Py_XDECREF(__pyx_t_15);
+            __Pyx_RaiseBufferFallbackError();
+          } else {
+            PyErr_Restore(__pyx_t_13, __pyx_t_14, __pyx_t_15);
+          }
+          __pyx_t_13 = __pyx_t_14 = __pyx_t_15 = 0;
+        }
+        __pyx_pybuffernd_det2.diminfo[0].strides = __pyx_pybuffernd_det2.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_det2.diminfo[0].shape = __pyx_pybuffernd_det2.rcbuffer->pybuffer.shape[0];
+        if (unlikely((__pyx_t_12 < 0))) __PYX_ERR(0, 122, __pyx_L1_error)
+      }
+      __pyx_t_20 = 0;
+      __Pyx_XDECREF_SET(__pyx_v_det2, ((PyArrayObject *)__pyx_t_2));
+      __pyx_t_2 = 0;
+
+      /* "opencood/utils/box_overlaps.pyx":124
+ *             det2 = dets_all[m, :]
+ * 
+ *             bi0 = max(det[0], det2[0])             # <<<<<<<<<<<<<<
+ *             bi1 = max(det[1], det2[1])
+ *             bi2 = min(det[2], det2[2])
+ */
+      __pyx_t_21 = 0;
+      __pyx_t_12 = -1;
+      if (__pyx_t_21 < 0) {
+        __pyx_t_21 += __pyx_pybuffernd_det2.diminfo[0].shape;
+        if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 124, __pyx_L1_error)
+      }
+      __pyx_t_22 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det2.diminfo[0].strides));
+      __pyx_t_21 = 0;
+      __pyx_t_12 = -1;
+      if (__pyx_t_21 < 0) {
+        __pyx_t_21 += __pyx_pybuffernd_det.diminfo[0].shape;
+        if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 124, __pyx_L1_error)
+      }
+      __pyx_t_23 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det.diminfo[0].strides));
+      __pyx_t_25 = (__pyx_t_22 > __pyx_t_23);
+      if (__pyx_t_25) {
+        __pyx_t_24 = __pyx_t_22;
+      } else {
+        __pyx_t_24 = __pyx_t_23;
+      }
+      __pyx_v_bi0 = __pyx_t_24;
+
+      /* "opencood/utils/box_overlaps.pyx":125
+ * 
+ *             bi0 = max(det[0], det2[0])
+ *             bi1 = max(det[1], det2[1])             # <<<<<<<<<<<<<<
+ *             bi2 = min(det[2], det2[2])
+ *             bi3 = min(det[3], det2[3])
+ */
+      __pyx_t_21 = 1;
+      __pyx_t_12 = -1;
+      if (__pyx_t_21 < 0) {
+        __pyx_t_21 += __pyx_pybuffernd_det2.diminfo[0].shape;
+        if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 125, __pyx_L1_error)
+      }
+      __pyx_t_24 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det2.diminfo[0].strides));
+      __pyx_t_21 = 1;
+      __pyx_t_12 = -1;
+      if (__pyx_t_21 < 0) {
+        __pyx_t_21 += __pyx_pybuffernd_det.diminfo[0].shape;
+        if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 125, __pyx_L1_error)
+      }
+      __pyx_t_22 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det.diminfo[0].strides));
+      __pyx_t_25 = (__pyx_t_24 > __pyx_t_22);
+      if (__pyx_t_25) {
+        __pyx_t_23 = __pyx_t_24;
+      } else {
+        __pyx_t_23 = __pyx_t_22;
+      }
+      __pyx_v_bi1 = __pyx_t_23;
+
+      /* "opencood/utils/box_overlaps.pyx":126
+ *             bi0 = max(det[0], det2[0])
+ *             bi1 = max(det[1], det2[1])
+ *             bi2 = min(det[2], det2[2])             # <<<<<<<<<<<<<<
+ *             bi3 = min(det[3], det2[3])
+ * 
+ */
+      __pyx_t_21 = 2;
+      __pyx_t_12 = -1;
+      if (__pyx_t_21 < 0) {
+        __pyx_t_21 += __pyx_pybuffernd_det2.diminfo[0].shape;
+        if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 126, __pyx_L1_error)
+      }
+      __pyx_t_23 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det2.diminfo[0].strides));
+      __pyx_t_21 = 2;
+      __pyx_t_12 = -1;
+      if (__pyx_t_21 < 0) {
+        __pyx_t_21 += __pyx_pybuffernd_det.diminfo[0].shape;
+        if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 126, __pyx_L1_error)
+      }
+      __pyx_t_24 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det.diminfo[0].strides));
+      __pyx_t_25 = (__pyx_t_23 < __pyx_t_24);
+      if (__pyx_t_25) {
+        __pyx_t_22 = __pyx_t_23;
+      } else {
+        __pyx_t_22 = __pyx_t_24;
+      }
+      __pyx_t_2 = PyFloat_FromDouble(__pyx_t_22); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 126, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_2);
+      __Pyx_XDECREF_SET(__pyx_v_bi2, __pyx_t_2);
+      __pyx_t_2 = 0;
+
+      /* "opencood/utils/box_overlaps.pyx":127
+ *             bi1 = max(det[1], det2[1])
+ *             bi2 = min(det[2], det2[2])
+ *             bi3 = min(det[3], det2[3])             # <<<<<<<<<<<<<<
+ * 
+ *             iw = bi2 - bi0 + 1
+ */
+      __pyx_t_21 = 3;
+      __pyx_t_12 = -1;
+      if (__pyx_t_21 < 0) {
+        __pyx_t_21 += __pyx_pybuffernd_det2.diminfo[0].shape;
+        if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 127, __pyx_L1_error)
+      }
+      __pyx_t_22 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det2.diminfo[0].strides));
+      __pyx_t_21 = 3;
+      __pyx_t_12 = -1;
+      if (__pyx_t_21 < 0) {
+        __pyx_t_21 += __pyx_pybuffernd_det.diminfo[0].shape;
+        if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 127, __pyx_L1_error)
+      }
+      __pyx_t_23 = (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det.diminfo[0].strides));
+      __pyx_t_25 = (__pyx_t_22 < __pyx_t_23);
+      if (__pyx_t_25) {
+        __pyx_t_24 = __pyx_t_22;
+      } else {
+        __pyx_t_24 = __pyx_t_23;
+      }
+      __pyx_v_bi3 = __pyx_t_24;
+
+      /* "opencood/utils/box_overlaps.pyx":129
+ *             bi3 = min(det[3], det2[3])
+ * 
+ *             iw = bi2 - bi0 + 1             # <<<<<<<<<<<<<<
+ *             ih = bi3 - bi1 + 1
+ * 
+ */
+      __pyx_t_2 = PyFloat_FromDouble(__pyx_v_bi0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 129, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_2);
+      __pyx_t_6 = PyNumber_Subtract(__pyx_v_bi2, __pyx_t_2); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 129, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+      __pyx_t_2 = __Pyx_PyInt_AddObjC(__pyx_t_6, __pyx_int_1, 1, 0, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 129, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_2);
+      __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+      __pyx_t_24 = __pyx_PyFloat_AsFloat(__pyx_t_2); if (unlikely((__pyx_t_24 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 129, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+      __pyx_v_iw = __pyx_t_24;
+
+      /* "opencood/utils/box_overlaps.pyx":130
+ * 
+ *             iw = bi2 - bi0 + 1
+ *             ih = bi3 - bi1 + 1             # <<<<<<<<<<<<<<
+ * 
+ *             if not (iw > 0 and ih > 0):
+ */
+      __pyx_v_ih = ((__pyx_v_bi3 - __pyx_v_bi1) + 1.0);
+
+      /* "opencood/utils/box_overlaps.pyx":132
+ *             ih = bi3 - bi1 + 1
+ * 
+ *             if not (iw > 0 and ih > 0):             # <<<<<<<<<<<<<<
+ *                 continue
+ * 
+ */
+      __pyx_t_26 = (__pyx_v_iw > 0.0);
+      if (__pyx_t_26) {
+      } else {
+        __pyx_t_25 = __pyx_t_26;
+        goto __pyx_L8_bool_binop_done;
+      }
+      __pyx_t_26 = (__pyx_v_ih > 0.0);
+      __pyx_t_25 = __pyx_t_26;
+      __pyx_L8_bool_binop_done:;
+      __pyx_t_26 = (!__pyx_t_25);
+      if (__pyx_t_26) {
+
+        /* "opencood/utils/box_overlaps.pyx":133
+ * 
+ *             if not (iw > 0 and ih > 0):
+ *                 continue             # <<<<<<<<<<<<<<
+ * 
+ *             ua = (det[2] - det[0] + 1) * (det[3] - det[1] + 1) + (det2[2] - det2[0] + 1) * (det2[3] - det2[1] + 1) - iw * ih
+ */
+        goto __pyx_L5_continue;
+
+        /* "opencood/utils/box_overlaps.pyx":132
+ *             ih = bi3 - bi1 + 1
+ * 
+ *             if not (iw > 0 and ih > 0):             # <<<<<<<<<<<<<<
+ *                 continue
+ * 
+ */
+      }
+
+      /* "opencood/utils/box_overlaps.pyx":135
+ *                 continue
+ * 
+ *             ua = (det[2] - det[0] + 1) * (det[3] - det[1] + 1) + (det2[2] - det2[0] + 1) * (det2[3] - det2[1] + 1) - iw * ih             # <<<<<<<<<<<<<<
+ *             ov = iw * ih / ua
+ * 
+ */
+      __pyx_t_21 = 2;
+      __pyx_t_12 = -1;
+      if (__pyx_t_21 < 0) {
+        __pyx_t_21 += __pyx_pybuffernd_det.diminfo[0].shape;
+        if (unlikely(__pyx_t_21 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_21 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 135, __pyx_L1_error)
+      }
+      __pyx_t_27 = 0;
+      __pyx_t_12 = -1;
+      if (__pyx_t_27 < 0) {
+        __pyx_t_27 += __pyx_pybuffernd_det.diminfo[0].shape;
+        if (unlikely(__pyx_t_27 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_27 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 135, __pyx_L1_error)
+      }
+      __pyx_t_28 = 3;
+      __pyx_t_12 = -1;
+      if (__pyx_t_28 < 0) {
+        __pyx_t_28 += __pyx_pybuffernd_det.diminfo[0].shape;
+        if (unlikely(__pyx_t_28 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_28 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 135, __pyx_L1_error)
+      }
+      __pyx_t_29 = 1;
+      __pyx_t_12 = -1;
+      if (__pyx_t_29 < 0) {
+        __pyx_t_29 += __pyx_pybuffernd_det.diminfo[0].shape;
+        if (unlikely(__pyx_t_29 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_29 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 135, __pyx_L1_error)
+      }
+      __pyx_t_30 = 2;
+      __pyx_t_12 = -1;
+      if (__pyx_t_30 < 0) {
+        __pyx_t_30 += __pyx_pybuffernd_det2.diminfo[0].shape;
+        if (unlikely(__pyx_t_30 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_30 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 135, __pyx_L1_error)
+      }
+      __pyx_t_31 = 0;
+      __pyx_t_12 = -1;
+      if (__pyx_t_31 < 0) {
+        __pyx_t_31 += __pyx_pybuffernd_det2.diminfo[0].shape;
+        if (unlikely(__pyx_t_31 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_31 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 135, __pyx_L1_error)
+      }
+      __pyx_t_32 = 3;
+      __pyx_t_12 = -1;
+      if (__pyx_t_32 < 0) {
+        __pyx_t_32 += __pyx_pybuffernd_det2.diminfo[0].shape;
+        if (unlikely(__pyx_t_32 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_32 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 135, __pyx_L1_error)
+      }
+      __pyx_t_33 = 1;
+      __pyx_t_12 = -1;
+      if (__pyx_t_33 < 0) {
+        __pyx_t_33 += __pyx_pybuffernd_det2.diminfo[0].shape;
+        if (unlikely(__pyx_t_33 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_33 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 135, __pyx_L1_error)
+      }
+      __pyx_v_ua = ((((((*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_21, __pyx_pybuffernd_det.diminfo[0].strides)) - (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_27, __pyx_pybuffernd_det.diminfo[0].strides))) + 1.0) * (((*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_28, __pyx_pybuffernd_det.diminfo[0].strides)) - (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_29, __pyx_pybuffernd_det.diminfo[0].strides))) + 1.0)) + ((((*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_30, __pyx_pybuffernd_det2.diminfo[0].strides)) - (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_31, __pyx_pybuffernd_det2.diminfo[0].strides))) + 1.0) * (((*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_32, __pyx_pybuffernd_det2.diminfo[0].strides)) - (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_33, __pyx_pybuffernd_det2.diminfo[0].strides))) + 1.0))) - (__pyx_v_iw * __pyx_v_ih));
+
+      /* "opencood/utils/box_overlaps.pyx":136
+ * 
+ *             ua = (det[2] - det[0] + 1) * (det[3] - det[1] + 1) + (det2[2] - det2[0] + 1) * (det2[3] - det2[1] + 1) - iw * ih
+ *             ov = iw * ih / ua             # <<<<<<<<<<<<<<
+ * 
+ *             if (ov < thresh):
+ */
+      __pyx_t_24 = (__pyx_v_iw * __pyx_v_ih);
+      if (unlikely(__pyx_v_ua == 0)) {
+        PyErr_SetString(PyExc_ZeroDivisionError, "float division");
+        __PYX_ERR(0, 136, __pyx_L1_error)
+      }
+      __pyx_v_ov = (__pyx_t_24 / __pyx_v_ua);
+
+      /* "opencood/utils/box_overlaps.pyx":138
+ *             ov = iw * ih / ua
+ * 
+ *             if (ov < thresh):             # <<<<<<<<<<<<<<
+ *                 continue
+ * 
+ */
+      __pyx_t_26 = (__pyx_v_ov < __pyx_v_thresh);
+      if (__pyx_t_26) {
+
+        /* "opencood/utils/box_overlaps.pyx":139
+ * 
+ *             if (ov < thresh):
+ *                 continue             # <<<<<<<<<<<<<<
+ * 
+ *             acc_box += det2[4] * det2[0:4]
+ */
+        goto __pyx_L5_continue;
+
+        /* "opencood/utils/box_overlaps.pyx":138
+ *             ov = iw * ih / ua
+ * 
+ *             if (ov < thresh):             # <<<<<<<<<<<<<<
+ *                 continue
+ * 
+ */
+      }
+
+      /* "opencood/utils/box_overlaps.pyx":141
+ *                 continue
+ * 
+ *             acc_box += det2[4] * det2[0:4]             # <<<<<<<<<<<<<<
+ *             acc_score += det2[4]
+ * 
+ */
+      __pyx_t_33 = 4;
+      __pyx_t_12 = -1;
+      if (__pyx_t_33 < 0) {
+        __pyx_t_33 += __pyx_pybuffernd_det2.diminfo[0].shape;
+        if (unlikely(__pyx_t_33 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_33 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 141, __pyx_L1_error)
+      }
+      __pyx_t_2 = PyFloat_FromDouble((*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_33, __pyx_pybuffernd_det2.diminfo[0].strides))); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 141, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_2);
+      __pyx_t_6 = __Pyx_PyObject_GetItem(((PyObject *)__pyx_v_det2), __pyx_slice__5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 141, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_6);
+      __pyx_t_5 = PyNumber_Multiply(__pyx_t_2, __pyx_t_6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 141, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_5);
+      __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+      __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+      __pyx_t_6 = PyNumber_InPlaceAdd(((PyObject *)__pyx_v_acc_box), __pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 141, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+      if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 141, __pyx_L1_error)
+      __pyx_t_16 = ((PyArrayObject *)__pyx_t_6);
+      {
+        __Pyx_BufFmt_StackElem __pyx_stack[1];
+        __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer);
+        __pyx_t_12 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer, (PyObject*)__pyx_t_16, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
+        if (unlikely(__pyx_t_12 < 0)) {
+          PyErr_Fetch(&__pyx_t_15, &__pyx_t_14, &__pyx_t_13);
+          if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer, (PyObject*)__pyx_v_acc_box, &__Pyx_TypeInfo_float, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
+            Py_XDECREF(__pyx_t_15); Py_XDECREF(__pyx_t_14); Py_XDECREF(__pyx_t_13);
+            __Pyx_RaiseBufferFallbackError();
+          } else {
+            PyErr_Restore(__pyx_t_15, __pyx_t_14, __pyx_t_13);
+          }
+          __pyx_t_15 = __pyx_t_14 = __pyx_t_13 = 0;
+        }
+        __pyx_pybuffernd_acc_box.diminfo[0].strides = __pyx_pybuffernd_acc_box.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_acc_box.diminfo[0].shape = __pyx_pybuffernd_acc_box.rcbuffer->pybuffer.shape[0];
+        if (unlikely((__pyx_t_12 < 0))) __PYX_ERR(0, 141, __pyx_L1_error)
+      }
+      __pyx_t_16 = 0;
+      __Pyx_DECREF_SET(__pyx_v_acc_box, ((PyArrayObject *)__pyx_t_6));
+      __pyx_t_6 = 0;
+
+      /* "opencood/utils/box_overlaps.pyx":142
+ * 
+ *             acc_box += det2[4] * det2[0:4]
+ *             acc_score += det2[4]             # <<<<<<<<<<<<<<
+ * 
+ *         dets_voted[i][0:4] = acc_box / acc_score
+ */
+      __pyx_t_33 = 4;
+      __pyx_t_12 = -1;
+      if (__pyx_t_33 < 0) {
+        __pyx_t_33 += __pyx_pybuffernd_det2.diminfo[0].shape;
+        if (unlikely(__pyx_t_33 < 0)) __pyx_t_12 = 0;
+      } else if (unlikely(__pyx_t_33 >= __pyx_pybuffernd_det2.diminfo[0].shape)) __pyx_t_12 = 0;
+      if (unlikely(__pyx_t_12 != -1)) {
+        __Pyx_RaiseBufferIndexError(__pyx_t_12);
+        __PYX_ERR(0, 142, __pyx_L1_error)
+      }
+      __pyx_v_acc_score = (__pyx_v_acc_score + (*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det2.rcbuffer->pybuffer.buf, __pyx_t_33, __pyx_pybuffernd_det2.diminfo[0].strides)));
+      __pyx_L5_continue:;
+    }
+
+    /* "opencood/utils/box_overlaps.pyx":144
+ *             acc_score += det2[4]
+ * 
+ *         dets_voted[i][0:4] = acc_box / acc_score             # <<<<<<<<<<<<<<
+ *         dets_voted[i][4] = det[4]       # Keep the original score
+ * 
+ */
+    __pyx_t_6 = PyFloat_FromDouble(__pyx_v_acc_score); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 144, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_6);
+    __pyx_t_5 = __Pyx_PyNumber_Divide(((PyObject *)__pyx_v_acc_box), __pyx_t_6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 144, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+    __pyx_t_6 = __Pyx_GetItemInt(((PyObject *)__pyx_v_dets_voted), __pyx_v_i, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 144, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_6);
+    if (__Pyx_PyObject_SetSlice(__pyx_t_6, __pyx_t_5, 0, 4, NULL, NULL, &__pyx_slice__5, 1, 1, 1) < 0) __PYX_ERR(0, 144, __pyx_L1_error)
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+
+    /* "opencood/utils/box_overlaps.pyx":145
+ * 
+ *         dets_voted[i][0:4] = acc_box / acc_score
+ *         dets_voted[i][4] = det[4]       # Keep the original score             # <<<<<<<<<<<<<<
+ * 
+ *     return dets_voted
+ */
+    __pyx_t_33 = 4;
+    __pyx_t_12 = -1;
+    if (__pyx_t_33 < 0) {
+      __pyx_t_33 += __pyx_pybuffernd_det.diminfo[0].shape;
+      if (unlikely(__pyx_t_33 < 0)) __pyx_t_12 = 0;
+    } else if (unlikely(__pyx_t_33 >= __pyx_pybuffernd_det.diminfo[0].shape)) __pyx_t_12 = 0;
+    if (unlikely(__pyx_t_12 != -1)) {
+      __Pyx_RaiseBufferIndexError(__pyx_t_12);
+      __PYX_ERR(0, 145, __pyx_L1_error)
+    }
+    __pyx_t_5 = PyFloat_FromDouble((*__Pyx_BufPtrStrided1d(float *, __pyx_pybuffernd_det.rcbuffer->pybuffer.buf, __pyx_t_33, __pyx_pybuffernd_det.diminfo[0].strides))); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 145, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_5);
+    __pyx_t_6 = __Pyx_GetItemInt(((PyObject *)__pyx_v_dets_voted), __pyx_v_i, unsigned int, 0, __Pyx_PyInt_From_unsigned_int, 0, 0, 1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 145, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_6);
+    if (unlikely((__Pyx_SetItemInt(__pyx_t_6, 4, __pyx_t_5, long, 1, __Pyx_PyInt_From_long, 0, 0, 1) < 0))) __PYX_ERR(0, 145, __pyx_L1_error)
+    __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+    __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  }
+
+  /* "opencood/utils/box_overlaps.pyx":147
+ *         dets_voted[i][4] = det[4]       # Keep the original score
+ * 
+ *     return dets_voted             # <<<<<<<<<<<<<<
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __Pyx_INCREF((PyObject *)__pyx_v_dets_voted);
+  __pyx_r = ((PyObject *)__pyx_v_dets_voted);
+  goto __pyx_L0;
+
+  /* "opencood/utils/box_overlaps.pyx":99
+ * 
+ * # Compute bounding box voting
+ * def box_vote(             # <<<<<<<<<<<<<<
+ *         np.ndarray[float, ndim=2] dets_NMS,
+ *         np.ndarray[float, ndim=2] dets_all):
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
+  __Pyx_XDECREF(__pyx_t_6);
+  { PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_det.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_det2.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets_NMS.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets_all.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets_voted.rcbuffer->pybuffer);
+  __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
+  __Pyx_AddTraceback("opencood.utils.box_overlaps.box_vote", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  goto __pyx_L2;
+  __pyx_L0:;
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_acc_box.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_det.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_det2.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets_NMS.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets_all.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets_voted.rcbuffer->pybuffer);
+  __pyx_L2:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_dets_voted);
+  __Pyx_XDECREF((PyObject *)__pyx_v_det);
+  __Pyx_XDECREF((PyObject *)__pyx_v_acc_box);
+  __Pyx_XDECREF((PyObject *)__pyx_v_det2);
+  __Pyx_XDECREF(__pyx_v_bi2);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyMethodDef __pyx_methods[] = {
+  {0, 0, 0, 0}
+};
+#ifndef CYTHON_SMALL_CODE
+#if defined(__clang__)
+    #define CYTHON_SMALL_CODE
+#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+    #define CYTHON_SMALL_CODE __attribute__((cold))
+#else
+    #define CYTHON_SMALL_CODE
+#endif
+#endif
+/* #### Code section: pystring_table ### */
+
+static int __Pyx_CreateStringTabAndInitStrings(void) {
+  __Pyx_StringTabEntry __pyx_string_tab[] = {
+    {&__pyx_n_s_DTYPE, __pyx_k_DTYPE, sizeof(__pyx_k_DTYPE), 0, 0, 1, 1},
+    {&__pyx_n_s_ImportError, __pyx_k_ImportError, sizeof(__pyx_k_ImportError), 0, 0, 1, 1},
+    {&__pyx_n_s_K, __pyx_k_K, sizeof(__pyx_k_K), 0, 0, 1, 1},
+    {&__pyx_n_s_M, __pyx_k_M, sizeof(__pyx_k_M), 0, 0, 1, 1},
+    {&__pyx_n_s_N, __pyx_k_N, sizeof(__pyx_k_N), 0, 0, 1, 1},
+    {&__pyx_n_s__13, __pyx_k__13, sizeof(__pyx_k__13), 0, 0, 1, 1},
+    {&__pyx_n_s__6, __pyx_k__6, sizeof(__pyx_k__6), 0, 0, 1, 1},
+    {&__pyx_n_s_acc_box, __pyx_k_acc_box, sizeof(__pyx_k_acc_box), 0, 0, 1, 1},
+    {&__pyx_n_s_acc_score, __pyx_k_acc_score, sizeof(__pyx_k_acc_score), 0, 0, 1, 1},
+    {&__pyx_n_s_asyncio_coroutines, __pyx_k_asyncio_coroutines, sizeof(__pyx_k_asyncio_coroutines), 0, 0, 1, 1},
+    {&__pyx_n_s_bbox_intersections, __pyx_k_bbox_intersections, sizeof(__pyx_k_bbox_intersections), 0, 0, 1, 1},
+    {&__pyx_n_s_bbox_overlaps, __pyx_k_bbox_overlaps, sizeof(__pyx_k_bbox_overlaps), 0, 0, 1, 1},
+    {&__pyx_n_s_bi0, __pyx_k_bi0, sizeof(__pyx_k_bi0), 0, 0, 1, 1},
+    {&__pyx_n_s_bi1, __pyx_k_bi1, sizeof(__pyx_k_bi1), 0, 0, 1, 1},
+    {&__pyx_n_s_bi2, __pyx_k_bi2, sizeof(__pyx_k_bi2), 0, 0, 1, 1},
+    {&__pyx_n_s_bi3, __pyx_k_bi3, sizeof(__pyx_k_bi3), 0, 0, 1, 1},
+    {&__pyx_n_s_bit2, __pyx_k_bit2, sizeof(__pyx_k_bit2), 0, 0, 1, 1},
+    {&__pyx_n_s_box_area, __pyx_k_box_area, sizeof(__pyx_k_box_area), 0, 0, 1, 1},
+    {&__pyx_n_s_box_vote, __pyx_k_box_vote, sizeof(__pyx_k_box_vote), 0, 0, 1, 1},
+    {&__pyx_n_s_boxes, __pyx_k_boxes, sizeof(__pyx_k_boxes), 0, 0, 1, 1},
+    {&__pyx_n_s_class_getitem, __pyx_k_class_getitem, sizeof(__pyx_k_class_getitem), 0, 0, 1, 1},
+    {&__pyx_n_s_cline_in_traceback, __pyx_k_cline_in_traceback, sizeof(__pyx_k_cline_in_traceback), 0, 0, 1, 1},
+    {&__pyx_n_s_det, __pyx_k_det, sizeof(__pyx_k_det), 0, 0, 1, 1},
+    {&__pyx_n_s_det2, __pyx_k_det2, sizeof(__pyx_k_det2), 0, 0, 1, 1},
+    {&__pyx_n_s_dets_NMS, __pyx_k_dets_NMS, sizeof(__pyx_k_dets_NMS), 0, 0, 1, 1},
+    {&__pyx_n_s_dets_all, __pyx_k_dets_all, sizeof(__pyx_k_dets_all), 0, 0, 1, 1},
+    {&__pyx_n_s_dets_voted, __pyx_k_dets_voted, sizeof(__pyx_k_dets_voted), 0, 0, 1, 1},
+    {&__pyx_n_s_dtype, __pyx_k_dtype, sizeof(__pyx_k_dtype), 0, 0, 1, 1},
+    {&__pyx_n_s_float32, __pyx_k_float32, sizeof(__pyx_k_float32), 0, 0, 1, 1},
+    {&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1},
+    {&__pyx_n_s_ih, __pyx_k_ih, sizeof(__pyx_k_ih), 0, 0, 1, 1},
+    {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1},
+    {&__pyx_n_s_initializing, __pyx_k_initializing, sizeof(__pyx_k_initializing), 0, 0, 1, 1},
+    {&__pyx_n_s_intersec, __pyx_k_intersec, sizeof(__pyx_k_intersec), 0, 0, 1, 1},
+    {&__pyx_n_s_is_coroutine, __pyx_k_is_coroutine, sizeof(__pyx_k_is_coroutine), 0, 0, 1, 1},
+    {&__pyx_n_s_iw, __pyx_k_iw, sizeof(__pyx_k_iw), 0, 0, 1, 1},
+    {&__pyx_n_s_k, __pyx_k_k, sizeof(__pyx_k_k), 0, 0, 1, 1},
+    {&__pyx_n_s_m, __pyx_k_m, sizeof(__pyx_k_m), 0, 0, 1, 1},
+    {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1},
+    {&__pyx_n_s_n, __pyx_k_n, sizeof(__pyx_k_n), 0, 0, 1, 1},
+    {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1},
+    {&__pyx_n_s_np, __pyx_k_np, sizeof(__pyx_k_np), 0, 0, 1, 1},
+    {&__pyx_n_s_numpy, __pyx_k_numpy, sizeof(__pyx_k_numpy), 0, 0, 1, 1},
+    {&__pyx_kp_s_numpy_core_multiarray_failed_to, __pyx_k_numpy_core_multiarray_failed_to, sizeof(__pyx_k_numpy_core_multiarray_failed_to), 0, 0, 1, 0},
+    {&__pyx_kp_s_numpy_core_umath_failed_to_impor, __pyx_k_numpy_core_umath_failed_to_impor, sizeof(__pyx_k_numpy_core_umath_failed_to_impor), 0, 0, 1, 0},
+    {&__pyx_n_s_opencood_utils_box_overlaps, __pyx_k_opencood_utils_box_overlaps, sizeof(__pyx_k_opencood_utils_box_overlaps), 0, 0, 1, 1},
+    {&__pyx_kp_s_opencood_utils_box_overlaps_pyx, __pyx_k_opencood_utils_box_overlaps_pyx, sizeof(__pyx_k_opencood_utils_box_overlaps_pyx), 0, 0, 1, 0},
+    {&__pyx_n_s_ov, __pyx_k_ov, sizeof(__pyx_k_ov), 0, 0, 1, 1},
+    {&__pyx_n_s_overlaps, __pyx_k_overlaps, sizeof(__pyx_k_overlaps), 0, 0, 1, 1},
+    {&__pyx_n_s_query_boxes, __pyx_k_query_boxes, sizeof(__pyx_k_query_boxes), 0, 0, 1, 1},
+    {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1},
+    {&__pyx_n_s_spec, __pyx_k_spec, sizeof(__pyx_k_spec), 0, 0, 1, 1},
+    {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1},
+    {&__pyx_n_s_thresh, __pyx_k_thresh, sizeof(__pyx_k_thresh), 0, 0, 1, 1},
+    {&__pyx_n_s_ua, __pyx_k_ua, sizeof(__pyx_k_ua), 0, 0, 1, 1},
+    {&__pyx_n_s_zeros, __pyx_k_zeros, sizeof(__pyx_k_zeros), 0, 0, 1, 1},
+    {0, 0, 0, 0, 0, 0, 0}
+  };
+  return __Pyx_InitStrings(__pyx_string_tab);
+}
+/* #### Code section: cached_builtins ### */
+static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) {
+  __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(0, 35, __pyx_L1_error)
+  __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) __PYX_ERR(1, 991, __pyx_L1_error)
+  return 0;
+  __pyx_L1_error:;
+  return -1;
+}
+/* #### Code section: cached_constants ### */
+
+static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":991
+ *         __pyx_import_array()
+ *     except Exception:
+ *         raise ImportError("numpy.core.multiarray failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline int import_umath() except -1:
+ */
+  __pyx_tuple_ = PyTuple_Pack(1, __pyx_kp_s_numpy_core_multiarray_failed_to); if (unlikely(!__pyx_tuple_)) __PYX_ERR(1, 991, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple_);
+  __Pyx_GIVEREF(__pyx_tuple_);
+
+  /* "../../../miniconda3/envs/v2xverse/lib/python3.7/site-packages/Cython/Includes/numpy/__init__.pxd":997
+ *         _import_umath()
+ *     except Exception:
+ *         raise ImportError("numpy.core.umath failed to import")             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline int import_ufunc() except -1:
+ */
+  __pyx_tuple__2 = PyTuple_Pack(1, __pyx_kp_s_numpy_core_umath_failed_to_impor); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(1, 997, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__2);
+  __Pyx_GIVEREF(__pyx_tuple__2);
+
+  /* "opencood/utils/box_overlaps.pyx":117
+ * 
+ *     for i in range(N):
+ *         det = dets_NMS[i, :]             # <<<<<<<<<<<<<<
+ *         acc_box = np.zeros((4), dtype=np.float32)
+ *         acc_score = 0.0
+ */
+  __pyx_slice__3 = PySlice_New(Py_None, Py_None, Py_None); if (unlikely(!__pyx_slice__3)) __PYX_ERR(0, 117, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_slice__3);
+  __Pyx_GIVEREF(__pyx_slice__3);
+
+  /* "opencood/utils/box_overlaps.pyx":118
+ *     for i in range(N):
+ *         det = dets_NMS[i, :]
+ *         acc_box = np.zeros((4), dtype=np.float32)             # <<<<<<<<<<<<<<
+ *         acc_score = 0.0
+ * 
+ */
+  __pyx_tuple__4 = PyTuple_Pack(1, __pyx_int_4); if (unlikely(!__pyx_tuple__4)) __PYX_ERR(0, 118, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__4);
+  __Pyx_GIVEREF(__pyx_tuple__4);
+
+  /* "opencood/utils/box_overlaps.pyx":141
+ *                 continue
+ * 
+ *             acc_box += det2[4] * det2[0:4]             # <<<<<<<<<<<<<<
+ *             acc_score += det2[4]
+ * 
+ */
+  __pyx_slice__5 = PySlice_New(__pyx_int_0, __pyx_int_4, Py_None); if (unlikely(!__pyx_slice__5)) __PYX_ERR(0, 141, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_slice__5);
+  __Pyx_GIVEREF(__pyx_slice__5);
+
+  /* "opencood/utils/box_overlaps.pyx":17
+ * 
+ * 
+ * def bbox_overlaps(             # <<<<<<<<<<<<<<
+ *         np.ndarray[DTYPE_t, ndim=2] boxes,
+ *         np.ndarray[DTYPE_t, ndim=2] query_boxes):
+ */
+  __pyx_tuple__7 = PyTuple_Pack(11, __pyx_n_s_boxes, __pyx_n_s_query_boxes, __pyx_n_s_N, __pyx_n_s_K, __pyx_n_s_overlaps, __pyx_n_s_iw, __pyx_n_s_ih, __pyx_n_s_box_area, __pyx_n_s_ua, __pyx_n_s_k, __pyx_n_s_n); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(0, 17, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__7);
+  __Pyx_GIVEREF(__pyx_tuple__7);
+  __pyx_codeobj__8 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 11, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__7, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_opencood_utils_box_overlaps_pyx, __pyx_n_s_bbox_overlaps, 17, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__8)) __PYX_ERR(0, 17, __pyx_L1_error)
+
+  /* "opencood/utils/box_overlaps.pyx":59
+ *     return overlaps
+ * 
+ * def bbox_intersections(             # <<<<<<<<<<<<<<
+ *         np.ndarray[DTYPE_t, ndim=2] boxes,
+ *         np.ndarray[DTYPE_t, ndim=2] query_boxes):
+ */
+  __pyx_tuple__9 = PyTuple_Pack(11, __pyx_n_s_boxes, __pyx_n_s_query_boxes, __pyx_n_s_N, __pyx_n_s_K, __pyx_n_s_intersec, __pyx_n_s_iw, __pyx_n_s_ih, __pyx_n_s_box_area, __pyx_n_s_ua, __pyx_n_s_k, __pyx_n_s_n); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(0, 59, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__9);
+  __Pyx_GIVEREF(__pyx_tuple__9);
+  __pyx_codeobj__10 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 11, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__9, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_opencood_utils_box_overlaps_pyx, __pyx_n_s_bbox_intersections, 59, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__10)) __PYX_ERR(0, 59, __pyx_L1_error)
+
+  /* "opencood/utils/box_overlaps.pyx":99
+ * 
+ * # Compute bounding box voting
+ * def box_vote(             # <<<<<<<<<<<<<<
+ *         np.ndarray[float, ndim=2] dets_NMS,
+ *         np.ndarray[float, ndim=2] dets_all):
+ */
+  __pyx_tuple__11 = PyTuple_Pack(21, __pyx_n_s_dets_NMS, __pyx_n_s_dets_all, __pyx_n_s_dets_voted, __pyx_n_s_N, __pyx_n_s_M, __pyx_n_s_det, __pyx_n_s_acc_box, __pyx_n_s_acc_score, __pyx_n_s_det2, __pyx_n_s_bi0, __pyx_n_s_bi1, __pyx_n_s_bit2, __pyx_n_s_bi3, __pyx_n_s_iw, __pyx_n_s_ih, __pyx_n_s_ua, __pyx_n_s_thresh, __pyx_n_s_i, __pyx_n_s_m, __pyx_n_s_bi2, __pyx_n_s_ov); if (unlikely(!__pyx_tuple__11)) __PYX_ERR(0, 99, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__11);
+  __Pyx_GIVEREF(__pyx_tuple__11);
+  __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(2, 0, 0, 21, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__11, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_opencood_utils_box_overlaps_pyx, __pyx_n_s_box_vote, 99, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) __PYX_ERR(0, 99, __pyx_L1_error)
+  __Pyx_RefNannyFinishContext();
+  return 0;
+  __pyx_L1_error:;
+  __Pyx_RefNannyFinishContext();
+  return -1;
+}
+/* #### Code section: init_constants ### */
+
+static CYTHON_SMALL_CODE int __Pyx_InitConstants(void) {
+  if (__Pyx_CreateStringTabAndInitStrings() < 0) __PYX_ERR(0, 1, __pyx_L1_error);
+  __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_int_1 = PyInt_FromLong(1); if (unlikely(!__pyx_int_1)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_int_4 = PyInt_FromLong(4); if (unlikely(!__pyx_int_4)) __PYX_ERR(0, 1, __pyx_L1_error)
+  return 0;
+  __pyx_L1_error:;
+  return -1;
+}
+/* #### Code section: init_globals ### */
+
+static CYTHON_SMALL_CODE int __Pyx_InitGlobals(void) {
+  /* NumpyImportArray.init */
+  /*
+ * Cython has automatically inserted a call to _import_array since
+ * you didn't include one when you cimported numpy. To disable this
+ * add the line
+ *   <void>numpy._import_array
+ */
+#ifdef NPY_FEATURE_VERSION
+#ifndef NO_IMPORT_ARRAY
+if (unlikely(_import_array() == -1)) {
+    PyErr_SetString(PyExc_ImportError, "numpy.core.multiarray failed to import "
+    "(auto-generated because you didn't call 'numpy.import_array()' after cimporting numpy; "
+    "use '<void>numpy._import_array' to disable if you are certain you don't need it).");
+}
+#endif
+#endif
+
+if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 1, __pyx_L1_error)
+
+  /* InitThreads.init */
+  #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0
+PyEval_InitThreads();
+#endif
+
+if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 1, __pyx_L1_error)
+
+  return 0;
+  __pyx_L1_error:;
+  return -1;
+}
+/* #### Code section: init_module ### */
+
+static CYTHON_SMALL_CODE int __Pyx_modinit_global_init_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_variable_export_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_function_export_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_type_init_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_type_import_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_variable_import_code(void); /*proto*/
+static CYTHON_SMALL_CODE int __Pyx_modinit_function_import_code(void); /*proto*/
+
+static int __Pyx_modinit_global_init_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_global_init_code", 0);
+  /*--- Global init code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+static int __Pyx_modinit_variable_export_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_variable_export_code", 0);
+  /*--- Variable export code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+static int __Pyx_modinit_function_export_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_function_export_code", 0);
+  /*--- Function export code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+static int __Pyx_modinit_type_init_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0);
+  /*--- Type init code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+static int __Pyx_modinit_type_import_code(void) {
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannySetupContext("__Pyx_modinit_type_import_code", 0);
+  /*--- Type import code ---*/
+  __pyx_t_1 = PyImport_ImportModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_t_1)) __PYX_ERR(2, 9, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_ptype_7cpython_4type_type = __Pyx_ImportType_3_0_11(__pyx_t_1, __Pyx_BUILTIN_MODULE_NAME, "type", 
+  #if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x050B0000
+  sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyTypeObject),
+  #elif CYTHON_COMPILING_IN_LIMITED_API
+  sizeof(PyTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyTypeObject),
+  #else
+  sizeof(PyHeapTypeObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyHeapTypeObject),
+  #endif
+  __Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_7cpython_4type_type) __PYX_ERR(2, 9, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = PyImport_ImportModule("numpy"); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 207, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_ptype_5numpy_dtype = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "dtype", sizeof(PyArray_Descr), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArray_Descr),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_dtype) __PYX_ERR(1, 207, __pyx_L1_error)
+  __pyx_ptype_5numpy_flatiter = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "flatiter", sizeof(PyArrayIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArrayIterObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_flatiter) __PYX_ERR(1, 230, __pyx_L1_error)
+  __pyx_ptype_5numpy_broadcast = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "broadcast", sizeof(PyArrayMultiIterObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArrayMultiIterObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_broadcast) __PYX_ERR(1, 234, __pyx_L1_error)
+  __pyx_ptype_5numpy_ndarray = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "ndarray", sizeof(PyArrayObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyArrayObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_ndarray) __PYX_ERR(1, 243, __pyx_L1_error)
+  __pyx_ptype_5numpy_generic = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "generic", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_generic) __PYX_ERR(1, 815, __pyx_L1_error)
+  __pyx_ptype_5numpy_number = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "number", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_number) __PYX_ERR(1, 817, __pyx_L1_error)
+  __pyx_ptype_5numpy_integer = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "integer", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_integer) __PYX_ERR(1, 819, __pyx_L1_error)
+  __pyx_ptype_5numpy_signedinteger = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "signedinteger", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_signedinteger) __PYX_ERR(1, 821, __pyx_L1_error)
+  __pyx_ptype_5numpy_unsignedinteger = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "unsignedinteger", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_unsignedinteger) __PYX_ERR(1, 823, __pyx_L1_error)
+  __pyx_ptype_5numpy_inexact = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "inexact", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_inexact) __PYX_ERR(1, 825, __pyx_L1_error)
+  __pyx_ptype_5numpy_floating = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "floating", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_floating) __PYX_ERR(1, 827, __pyx_L1_error)
+  __pyx_ptype_5numpy_complexfloating = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "complexfloating", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_complexfloating) __PYX_ERR(1, 829, __pyx_L1_error)
+  __pyx_ptype_5numpy_flexible = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "flexible", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_flexible) __PYX_ERR(1, 831, __pyx_L1_error)
+  __pyx_ptype_5numpy_character = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "character", sizeof(PyObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyObject),__Pyx_ImportType_CheckSize_Warn_3_0_11); if (!__pyx_ptype_5numpy_character) __PYX_ERR(1, 833, __pyx_L1_error)
+  __pyx_ptype_5numpy_ufunc = __Pyx_ImportType_3_0_11(__pyx_t_1, "numpy", "ufunc", sizeof(PyUFuncObject), __PYX_GET_STRUCT_ALIGNMENT_3_0_11(PyUFuncObject),__Pyx_ImportType_CheckSize_Ignore_3_0_11); if (!__pyx_ptype_5numpy_ufunc) __PYX_ERR(1, 871, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __Pyx_RefNannyFinishContext();
+  return 0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_RefNannyFinishContext();
+  return -1;
+}
+
+static int __Pyx_modinit_variable_import_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_variable_import_code", 0);
+  /*--- Variable import code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+static int __Pyx_modinit_function_import_code(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_modinit_function_import_code", 0);
+  /*--- Function import code ---*/
+  __Pyx_RefNannyFinishContext();
+  return 0;
+}
+
+
+#if PY_MAJOR_VERSION >= 3
+#if CYTHON_PEP489_MULTI_PHASE_INIT
+static PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def); /*proto*/
+static int __pyx_pymod_exec_box_overlaps(PyObject* module); /*proto*/
+static PyModuleDef_Slot __pyx_moduledef_slots[] = {
+  {Py_mod_create, (void*)__pyx_pymod_create},
+  {Py_mod_exec, (void*)__pyx_pymod_exec_box_overlaps},
+  {0, NULL}
+};
+#endif
+
+#ifdef __cplusplus
+namespace {
+  struct PyModuleDef __pyx_moduledef =
+  #else
+  static struct PyModuleDef __pyx_moduledef =
+  #endif
+  {
+      PyModuleDef_HEAD_INIT,
+      "box_overlaps",
+      0, /* m_doc */
+    #if CYTHON_PEP489_MULTI_PHASE_INIT
+      0, /* m_size */
+    #elif CYTHON_USE_MODULE_STATE
+      sizeof(__pyx_mstate), /* m_size */
+    #else
+      -1, /* m_size */
+    #endif
+      __pyx_methods /* m_methods */,
+    #if CYTHON_PEP489_MULTI_PHASE_INIT
+      __pyx_moduledef_slots, /* m_slots */
+    #else
+      NULL, /* m_reload */
+    #endif
+    #if CYTHON_USE_MODULE_STATE
+      __pyx_m_traverse, /* m_traverse */
+      __pyx_m_clear, /* m_clear */
+      NULL /* m_free */
+    #else
+      NULL, /* m_traverse */
+      NULL, /* m_clear */
+      NULL /* m_free */
+    #endif
+  };
+  #ifdef __cplusplus
+} /* anonymous namespace */
+#endif
+#endif
+
+#ifndef CYTHON_NO_PYINIT_EXPORT
+#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
+#elif PY_MAJOR_VERSION < 3
+#ifdef __cplusplus
+#define __Pyx_PyMODINIT_FUNC extern "C" void
+#else
+#define __Pyx_PyMODINIT_FUNC void
+#endif
+#else
+#ifdef __cplusplus
+#define __Pyx_PyMODINIT_FUNC extern "C" PyObject *
+#else
+#define __Pyx_PyMODINIT_FUNC PyObject *
+#endif
+#endif
+
+
+#if PY_MAJOR_VERSION < 3
+__Pyx_PyMODINIT_FUNC initbox_overlaps(void) CYTHON_SMALL_CODE; /*proto*/
+__Pyx_PyMODINIT_FUNC initbox_overlaps(void)
+#else
+__Pyx_PyMODINIT_FUNC PyInit_box_overlaps(void) CYTHON_SMALL_CODE; /*proto*/
+__Pyx_PyMODINIT_FUNC PyInit_box_overlaps(void)
+#if CYTHON_PEP489_MULTI_PHASE_INIT
+{
+  return PyModuleDef_Init(&__pyx_moduledef);
+}
+static CYTHON_SMALL_CODE int __Pyx_check_single_interpreter(void) {
+    #if PY_VERSION_HEX >= 0x030700A1
+    static PY_INT64_T main_interpreter_id = -1;
+    PY_INT64_T current_id = PyInterpreterState_GetID(PyThreadState_Get()->interp);
+    if (main_interpreter_id == -1) {
+        main_interpreter_id = current_id;
+        return (unlikely(current_id == -1)) ? -1 : 0;
+    } else if (unlikely(main_interpreter_id != current_id))
+    #else
+    static PyInterpreterState *main_interpreter = NULL;
+    PyInterpreterState *current_interpreter = PyThreadState_Get()->interp;
+    if (!main_interpreter) {
+        main_interpreter = current_interpreter;
+    } else if (unlikely(main_interpreter != current_interpreter))
+    #endif
+    {
+        PyErr_SetString(
+            PyExc_ImportError,
+            "Interpreter change detected - this module can only be loaded into one interpreter per process.");
+        return -1;
+    }
+    return 0;
+}
+#if CYTHON_COMPILING_IN_LIMITED_API
+static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *module, const char* from_name, const char* to_name, int allow_none)
+#else
+static CYTHON_SMALL_CODE int __Pyx_copy_spec_to_module(PyObject *spec, PyObject *moddict, const char* from_name, const char* to_name, int allow_none)
+#endif
+{
+    PyObject *value = PyObject_GetAttrString(spec, from_name);
+    int result = 0;
+    if (likely(value)) {
+        if (allow_none || value != Py_None) {
+#if CYTHON_COMPILING_IN_LIMITED_API
+            result = PyModule_AddObject(module, to_name, value);
+#else
+            result = PyDict_SetItemString(moddict, to_name, value);
+#endif
+        }
+        Py_DECREF(value);
+    } else if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
+        PyErr_Clear();
+    } else {
+        result = -1;
+    }
+    return result;
+}
+static CYTHON_SMALL_CODE PyObject* __pyx_pymod_create(PyObject *spec, PyModuleDef *def) {
+    PyObject *module = NULL, *moddict, *modname;
+    CYTHON_UNUSED_VAR(def);
+    if (__Pyx_check_single_interpreter())
+        return NULL;
+    if (__pyx_m)
+        return __Pyx_NewRef(__pyx_m);
+    modname = PyObject_GetAttrString(spec, "name");
+    if (unlikely(!modname)) goto bad;
+    module = PyModule_NewObject(modname);
+    Py_DECREF(modname);
+    if (unlikely(!module)) goto bad;
+#if CYTHON_COMPILING_IN_LIMITED_API
+    moddict = module;
+#else
+    moddict = PyModule_GetDict(module);
+    if (unlikely(!moddict)) goto bad;
+#endif
+    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "loader", "__loader__", 1) < 0)) goto bad;
+    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "origin", "__file__", 1) < 0)) goto bad;
+    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "parent", "__package__", 1) < 0)) goto bad;
+    if (unlikely(__Pyx_copy_spec_to_module(spec, moddict, "submodule_search_locations", "__path__", 0) < 0)) goto bad;
+    return module;
+bad:
+    Py_XDECREF(module);
+    return NULL;
+}
+
+
+static CYTHON_SMALL_CODE int __pyx_pymod_exec_box_overlaps(PyObject *__pyx_pyinit_module)
+#endif
+#endif
+{
+  int stringtab_initialized = 0;
+  #if CYTHON_USE_MODULE_STATE
+  int pystate_addmodule_run = 0;
+  #endif
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_lineno = 0;
+  const char *__pyx_filename = NULL;
+  int __pyx_clineno = 0;
+  __Pyx_RefNannyDeclarations
+  #if CYTHON_PEP489_MULTI_PHASE_INIT
+  if (__pyx_m) {
+    if (__pyx_m == __pyx_pyinit_module) return 0;
+    PyErr_SetString(PyExc_RuntimeError, "Module 'box_overlaps' has already been imported. Re-initialisation is not supported.");
+    return -1;
+  }
+  #elif PY_MAJOR_VERSION >= 3
+  if (__pyx_m) return __Pyx_NewRef(__pyx_m);
+  #endif
+  /*--- Module creation code ---*/
+  #if CYTHON_PEP489_MULTI_PHASE_INIT
+  __pyx_m = __pyx_pyinit_module;
+  Py_INCREF(__pyx_m);
+  #else
+  #if PY_MAJOR_VERSION < 3
+  __pyx_m = Py_InitModule4("box_overlaps", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m);
+  if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error)
+  #elif CYTHON_USE_MODULE_STATE
+  __pyx_t_1 = PyModule_Create(&__pyx_moduledef); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error)
+  {
+    int add_module_result = PyState_AddModule(__pyx_t_1, &__pyx_moduledef);
+    __pyx_t_1 = 0; /* transfer ownership from __pyx_t_1 to "box_overlaps" pseudovariable */
+    if (unlikely((add_module_result < 0))) __PYX_ERR(0, 1, __pyx_L1_error)
+    pystate_addmodule_run = 1;
+  }
+  #else
+  __pyx_m = PyModule_Create(&__pyx_moduledef);
+  if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #endif
+  CYTHON_UNUSED_VAR(__pyx_t_1);
+  __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error)
+  Py_INCREF(__pyx_d);
+  __pyx_b = __Pyx_PyImport_AddModuleRef(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_cython_runtime = __Pyx_PyImport_AddModuleRef((const char *) "cython_runtime"); if (unlikely(!__pyx_cython_runtime)) __PYX_ERR(0, 1, __pyx_L1_error)
+  if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #if CYTHON_REFNANNY
+__Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny");
+if (!__Pyx_RefNanny) {
+  PyErr_Clear();
+  __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny");
+  if (!__Pyx_RefNanny)
+      Py_FatalError("failed to import 'refnanny' module");
+}
+#endif
+  __Pyx_RefNannySetupContext("__Pyx_PyMODINIT_FUNC PyInit_box_overlaps(void)", 0);
+  if (__Pyx_check_binary_version(__PYX_LIMITED_VERSION_HEX, __Pyx_get_runtime_version(), CYTHON_COMPILING_IN_LIMITED_API) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #ifdef __Pxy_PyFrame_Initialize_Offsets
+  __Pxy_PyFrame_Initialize_Offsets();
+  #endif
+  __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error)
+  #ifdef __Pyx_CyFunction_USED
+  if (__pyx_CyFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_FusedFunction_USED
+  if (__pyx_FusedFunction_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_Coroutine_USED
+  if (__pyx_Coroutine_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_Generator_USED
+  if (__pyx_Generator_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_AsyncGen_USED
+  if (__pyx_AsyncGen_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_StopAsyncIteration_USED
+  if (__pyx_StopAsyncIteration_init(__pyx_m) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  /*--- Library function declarations ---*/
+  /*--- Threads initialization code ---*/
+  #if defined(WITH_THREAD) && PY_VERSION_HEX < 0x030700F0 && defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS
+  PyEval_InitThreads();
+  #endif
+  /*--- Initialize various global constants etc. ---*/
+  if (__Pyx_InitConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  stringtab_initialized = 1;
+  if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
+  if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  if (__pyx_module_is_main_opencood__utils__box_overlaps) {
+    if (PyObject_SetAttr(__pyx_m, __pyx_n_s_name, __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  }
+  #if PY_MAJOR_VERSION >= 3
+  {
+    PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error)
+    if (!PyDict_GetItemString(modules, "opencood.utils.box_overlaps")) {
+      if (unlikely((PyDict_SetItemString(modules, "opencood.utils.box_overlaps", __pyx_m) < 0))) __PYX_ERR(0, 1, __pyx_L1_error)
+    }
+  }
+  #endif
+  /*--- Builtin init code ---*/
+  if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  /*--- Constants init code ---*/
+  if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  /*--- Global type/function init code ---*/
+  (void)__Pyx_modinit_global_init_code();
+  (void)__Pyx_modinit_variable_export_code();
+  (void)__Pyx_modinit_function_export_code();
+  (void)__Pyx_modinit_type_init_code();
+  if (unlikely((__Pyx_modinit_type_import_code() < 0))) __PYX_ERR(0, 1, __pyx_L1_error)
+  (void)__Pyx_modinit_variable_import_code();
+  (void)__Pyx_modinit_function_import_code();
+  /*--- Execution code ---*/
+  #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED)
+  if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+
+  /* "opencood/utils/box_overlaps.pyx":8
+ * # --------------------------------------------------------
+ * 
+ * import numpy as np             # <<<<<<<<<<<<<<
+ * cimport numpy as np
+ * from cython.parallel import prange, parallel
+ */
+  __pyx_t_2 = __Pyx_ImportDottedModule(__pyx_n_s_numpy, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 8, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_2) < 0) __PYX_ERR(0, 8, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+
+  /* "opencood/utils/box_overlaps.pyx":13
+ * 
+ * 
+ * DTYPE = np.float32             # <<<<<<<<<<<<<<
+ * ctypedef float DTYPE_t
+ * 
+ */
+  __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_np); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 13, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_float32); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 13, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_DTYPE, __pyx_t_3) < 0) __PYX_ERR(0, 13, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+  /* "opencood/utils/box_overlaps.pyx":17
+ * 
+ * 
+ * def bbox_overlaps(             # <<<<<<<<<<<<<<
+ *         np.ndarray[DTYPE_t, ndim=2] boxes,
+ *         np.ndarray[DTYPE_t, ndim=2] query_boxes):
+ */
+  __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_8opencood_5utils_12box_overlaps_1bbox_overlaps, 0, __pyx_n_s_bbox_overlaps, NULL, __pyx_n_s_opencood_utils_box_overlaps, __pyx_d, ((PyObject *)__pyx_codeobj__8)); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 17, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_bbox_overlaps, __pyx_t_3) < 0) __PYX_ERR(0, 17, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+  /* "opencood/utils/box_overlaps.pyx":59
+ *     return overlaps
+ * 
+ * def bbox_intersections(             # <<<<<<<<<<<<<<
+ *         np.ndarray[DTYPE_t, ndim=2] boxes,
+ *         np.ndarray[DTYPE_t, ndim=2] query_boxes):
+ */
+  __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_8opencood_5utils_12box_overlaps_3bbox_intersections, 0, __pyx_n_s_bbox_intersections, NULL, __pyx_n_s_opencood_utils_box_overlaps, __pyx_d, ((PyObject *)__pyx_codeobj__10)); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 59, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_bbox_intersections, __pyx_t_3) < 0) __PYX_ERR(0, 59, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+  /* "opencood/utils/box_overlaps.pyx":99
+ * 
+ * # Compute bounding box voting
+ * def box_vote(             # <<<<<<<<<<<<<<
+ *         np.ndarray[float, ndim=2] dets_NMS,
+ *         np.ndarray[float, ndim=2] dets_all):
+ */
+  __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_8opencood_5utils_12box_overlaps_5box_vote, 0, __pyx_n_s_box_vote, NULL, __pyx_n_s_opencood_utils_box_overlaps, __pyx_d, ((PyObject *)__pyx_codeobj__12)); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 99, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_box_vote, __pyx_t_3) < 0) __PYX_ERR(0, 99, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+  /* "opencood/utils/box_overlaps.pyx":1
+ * # --------------------------------------------------------             # <<<<<<<<<<<<<<
+ * # Fast R-CNN
+ * # Copyright (c) 2015 Microsoft
+ */
+  __pyx_t_3 = __Pyx_PyDict_NewPresized(0); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_3) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+  /*--- Wrapped vars code ---*/
+
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  if (__pyx_m) {
+    if (__pyx_d && stringtab_initialized) {
+      __Pyx_AddTraceback("init opencood.utils.box_overlaps", __pyx_clineno, __pyx_lineno, __pyx_filename);
+    }
+    #if !CYTHON_USE_MODULE_STATE
+    Py_CLEAR(__pyx_m);
+    #else
+    Py_DECREF(__pyx_m);
+    if (pystate_addmodule_run) {
+      PyObject *tp, *value, *tb;
+      PyErr_Fetch(&tp, &value, &tb);
+      PyState_RemoveModule(&__pyx_moduledef);
+      PyErr_Restore(tp, value, tb);
+    }
+    #endif
+  } else if (!PyErr_Occurred()) {
+    PyErr_SetString(PyExc_ImportError, "init opencood.utils.box_overlaps");
+  }
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  #if CYTHON_PEP489_MULTI_PHASE_INIT
+  return (__pyx_m != NULL) ? 0 : -1;
+  #elif PY_MAJOR_VERSION >= 3
+  return __pyx_m;
+  #else
+  return;
+  #endif
+}
+/* #### Code section: cleanup_globals ### */
+/* #### Code section: cleanup_module ### */
+/* #### Code section: main_method ### */
+/* #### Code section: utility_code_pragmas ### */
+#ifdef _MSC_VER
+#pragma warning( push )
+/* Warning 4127: conditional expression is constant
+ * Cython uses constant conditional expressions to allow in inline functions to be optimized at
+ * compile-time, so this warning is not useful
+ */
+#pragma warning( disable : 4127 )
+#endif
+
+
+
+/* #### Code section: utility_code_def ### */
+
+/* --- Runtime support code --- */
+/* Refnanny */
+#if CYTHON_REFNANNY
+static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) {
+    PyObject *m = NULL, *p = NULL;
+    void *r = NULL;
+    m = PyImport_ImportModule(modname);
+    if (!m) goto end;
+    p = PyObject_GetAttrString(m, "RefNannyAPI");
+    if (!p) goto end;
+    r = PyLong_AsVoidPtr(p);
+end:
+    Py_XDECREF(p);
+    Py_XDECREF(m);
+    return (__Pyx_RefNannyAPIStruct *)r;
+}
+#endif
+
+/* PyErrExceptionMatches */
+#if CYTHON_FAST_THREAD_STATE
+static int __Pyx_PyErr_ExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) {
+    Py_ssize_t i, n;
+    n = PyTuple_GET_SIZE(tuple);
+#if PY_MAJOR_VERSION >= 3
+    for (i=0; i<n; i++) {
+        if (exc_type == PyTuple_GET_ITEM(tuple, i)) return 1;
+    }
+#endif
+    for (i=0; i<n; i++) {
+        if (__Pyx_PyErr_GivenExceptionMatches(exc_type, PyTuple_GET_ITEM(tuple, i))) return 1;
+    }
+    return 0;
+}
+static CYTHON_INLINE int __Pyx_PyErr_ExceptionMatchesInState(PyThreadState* tstate, PyObject* err) {
+    int result;
+    PyObject *exc_type;
+#if PY_VERSION_HEX >= 0x030C00A6
+    PyObject *current_exception = tstate->current_exception;
+    if (unlikely(!current_exception)) return 0;
+    exc_type = (PyObject*) Py_TYPE(current_exception);
+    if (exc_type == err) return 1;
+#else
+    exc_type = tstate->curexc_type;
+    if (exc_type == err) return 1;
+    if (unlikely(!exc_type)) return 0;
+#endif
+    #if CYTHON_AVOID_BORROWED_REFS
+    Py_INCREF(exc_type);
+    #endif
+    if (unlikely(PyTuple_Check(err))) {
+        result = __Pyx_PyErr_ExceptionMatchesTuple(exc_type, err);
+    } else {
+        result = __Pyx_PyErr_GivenExceptionMatches(exc_type, err);
+    }
+    #if CYTHON_AVOID_BORROWED_REFS
+    Py_DECREF(exc_type);
+    #endif
+    return result;
+}
+#endif
+
+/* PyErrFetchRestore */
+#if CYTHON_FAST_THREAD_STATE
+static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) {
+#if PY_VERSION_HEX >= 0x030C00A6
+    PyObject *tmp_value;
+    assert(type == NULL || (value != NULL && type == (PyObject*) Py_TYPE(value)));
+    if (value) {
+        #if CYTHON_COMPILING_IN_CPYTHON
+        if (unlikely(((PyBaseExceptionObject*) value)->traceback != tb))
+        #endif
+            PyException_SetTraceback(value, tb);
+    }
+    tmp_value = tstate->current_exception;
+    tstate->current_exception = value;
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(type);
+    Py_XDECREF(tb);
+#else
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+    tmp_type = tstate->curexc_type;
+    tmp_value = tstate->curexc_value;
+    tmp_tb = tstate->curexc_traceback;
+    tstate->curexc_type = type;
+    tstate->curexc_value = value;
+    tstate->curexc_traceback = tb;
+    Py_XDECREF(tmp_type);
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(tmp_tb);
+#endif
+}
+static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) {
+#if PY_VERSION_HEX >= 0x030C00A6
+    PyObject* exc_value;
+    exc_value = tstate->current_exception;
+    tstate->current_exception = 0;
+    *value = exc_value;
+    *type = NULL;
+    *tb = NULL;
+    if (exc_value) {
+        *type = (PyObject*) Py_TYPE(exc_value);
+        Py_INCREF(*type);
+        #if CYTHON_COMPILING_IN_CPYTHON
+        *tb = ((PyBaseExceptionObject*) exc_value)->traceback;
+        Py_XINCREF(*tb);
+        #else
+        *tb = PyException_GetTraceback(exc_value);
+        #endif
+    }
+#else
+    *type = tstate->curexc_type;
+    *value = tstate->curexc_value;
+    *tb = tstate->curexc_traceback;
+    tstate->curexc_type = 0;
+    tstate->curexc_value = 0;
+    tstate->curexc_traceback = 0;
+#endif
+}
+#endif
+
+/* PyObjectGetAttrStr */
+#if CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) {
+    PyTypeObject* tp = Py_TYPE(obj);
+    if (likely(tp->tp_getattro))
+        return tp->tp_getattro(obj, attr_name);
+#if PY_MAJOR_VERSION < 3
+    if (likely(tp->tp_getattr))
+        return tp->tp_getattr(obj, PyString_AS_STRING(attr_name));
+#endif
+    return PyObject_GetAttr(obj, attr_name);
+}
+#endif
+
+/* PyObjectGetAttrStrNoError */
+#if __PYX_LIMITED_VERSION_HEX < 0x030d00A1
+static void __Pyx_PyObject_GetAttrStr_ClearAttributeError(void) {
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    if (likely(__Pyx_PyErr_ExceptionMatches(PyExc_AttributeError)))
+        __Pyx_PyErr_Clear();
+}
+#endif
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStrNoError(PyObject* obj, PyObject* attr_name) {
+    PyObject *result;
+#if __PYX_LIMITED_VERSION_HEX >= 0x030d00A1
+    (void) PyObject_GetOptionalAttr(obj, attr_name, &result);
+    return result;
+#else
+#if CYTHON_COMPILING_IN_CPYTHON && CYTHON_USE_TYPE_SLOTS && PY_VERSION_HEX >= 0x030700B1
+    PyTypeObject* tp = Py_TYPE(obj);
+    if (likely(tp->tp_getattro == PyObject_GenericGetAttr)) {
+        return _PyObject_GenericGetAttrWithDict(obj, attr_name, NULL, 1);
+    }
+#endif
+    result = __Pyx_PyObject_GetAttrStr(obj, attr_name);
+    if (unlikely(!result)) {
+        __Pyx_PyObject_GetAttrStr_ClearAttributeError();
+    }
+    return result;
+#endif
+}
+
+/* GetBuiltinName */
+static PyObject *__Pyx_GetBuiltinName(PyObject *name) {
+    PyObject* result = __Pyx_PyObject_GetAttrStrNoError(__pyx_b, name);
+    if (unlikely(!result) && !PyErr_Occurred()) {
+        PyErr_Format(PyExc_NameError,
+#if PY_MAJOR_VERSION >= 3
+            "name '%U' is not defined", name);
+#else
+            "name '%.200s' is not defined", PyString_AS_STRING(name));
+#endif
+    }
+    return result;
+}
+
+/* GetTopmostException */
+#if CYTHON_USE_EXC_INFO_STACK && CYTHON_FAST_THREAD_STATE
+static _PyErr_StackItem *
+__Pyx_PyErr_GetTopmostException(PyThreadState *tstate)
+{
+    _PyErr_StackItem *exc_info = tstate->exc_info;
+    while ((exc_info->exc_value == NULL || exc_info->exc_value == Py_None) &&
+           exc_info->previous_item != NULL)
+    {
+        exc_info = exc_info->previous_item;
+    }
+    return exc_info;
+}
+#endif
+
+/* SaveResetException */
+#if CYTHON_FAST_THREAD_STATE
+static CYTHON_INLINE void __Pyx__ExceptionSave(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) {
+  #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4
+    _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate);
+    PyObject *exc_value = exc_info->exc_value;
+    if (exc_value == NULL || exc_value == Py_None) {
+        *value = NULL;
+        *type = NULL;
+        *tb = NULL;
+    } else {
+        *value = exc_value;
+        Py_INCREF(*value);
+        *type = (PyObject*) Py_TYPE(exc_value);
+        Py_INCREF(*type);
+        *tb = PyException_GetTraceback(exc_value);
+    }
+  #elif CYTHON_USE_EXC_INFO_STACK
+    _PyErr_StackItem *exc_info = __Pyx_PyErr_GetTopmostException(tstate);
+    *type = exc_info->exc_type;
+    *value = exc_info->exc_value;
+    *tb = exc_info->exc_traceback;
+    Py_XINCREF(*type);
+    Py_XINCREF(*value);
+    Py_XINCREF(*tb);
+  #else
+    *type = tstate->exc_type;
+    *value = tstate->exc_value;
+    *tb = tstate->exc_traceback;
+    Py_XINCREF(*type);
+    Py_XINCREF(*value);
+    Py_XINCREF(*tb);
+  #endif
+}
+static CYTHON_INLINE void __Pyx__ExceptionReset(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) {
+  #if CYTHON_USE_EXC_INFO_STACK && PY_VERSION_HEX >= 0x030B00a4
+    _PyErr_StackItem *exc_info = tstate->exc_info;
+    PyObject *tmp_value = exc_info->exc_value;
+    exc_info->exc_value = value;
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(type);
+    Py_XDECREF(tb);
+  #else
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+    #if CYTHON_USE_EXC_INFO_STACK
+    _PyErr_StackItem *exc_info = tstate->exc_info;
+    tmp_type = exc_info->exc_type;
+    tmp_value = exc_info->exc_value;
+    tmp_tb = exc_info->exc_traceback;
+    exc_info->exc_type = type;
+    exc_info->exc_value = value;
+    exc_info->exc_traceback = tb;
+    #else
+    tmp_type = tstate->exc_type;
+    tmp_value = tstate->exc_value;
+    tmp_tb = tstate->exc_traceback;
+    tstate->exc_type = type;
+    tstate->exc_value = value;
+    tstate->exc_traceback = tb;
+    #endif
+    Py_XDECREF(tmp_type);
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(tmp_tb);
+  #endif
+}
+#endif
+
+/* GetException */
+#if CYTHON_FAST_THREAD_STATE
+static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb)
+#else
+static int __Pyx_GetException(PyObject **type, PyObject **value, PyObject **tb)
+#endif
+{
+    PyObject *local_type = NULL, *local_value, *local_tb = NULL;
+#if CYTHON_FAST_THREAD_STATE
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+  #if PY_VERSION_HEX >= 0x030C00A6
+    local_value = tstate->current_exception;
+    tstate->current_exception = 0;
+    if (likely(local_value)) {
+        local_type = (PyObject*) Py_TYPE(local_value);
+        Py_INCREF(local_type);
+        local_tb = PyException_GetTraceback(local_value);
+    }
+  #else
+    local_type = tstate->curexc_type;
+    local_value = tstate->curexc_value;
+    local_tb = tstate->curexc_traceback;
+    tstate->curexc_type = 0;
+    tstate->curexc_value = 0;
+    tstate->curexc_traceback = 0;
+  #endif
+#else
+    PyErr_Fetch(&local_type, &local_value, &local_tb);
+#endif
+    PyErr_NormalizeException(&local_type, &local_value, &local_tb);
+#if CYTHON_FAST_THREAD_STATE && PY_VERSION_HEX >= 0x030C00A6
+    if (unlikely(tstate->current_exception))
+#elif CYTHON_FAST_THREAD_STATE
+    if (unlikely(tstate->curexc_type))
+#else
+    if (unlikely(PyErr_Occurred()))
+#endif
+        goto bad;
+    #if PY_MAJOR_VERSION >= 3
+    if (local_tb) {
+        if (unlikely(PyException_SetTraceback(local_value, local_tb) < 0))
+            goto bad;
+    }
+    #endif
+    Py_XINCREF(local_tb);
+    Py_XINCREF(local_type);
+    Py_XINCREF(local_value);
+    *type = local_type;
+    *value = local_value;
+    *tb = local_tb;
+#if CYTHON_FAST_THREAD_STATE
+    #if CYTHON_USE_EXC_INFO_STACK
+    {
+        _PyErr_StackItem *exc_info = tstate->exc_info;
+      #if PY_VERSION_HEX >= 0x030B00a4
+        tmp_value = exc_info->exc_value;
+        exc_info->exc_value = local_value;
+        tmp_type = NULL;
+        tmp_tb = NULL;
+        Py_XDECREF(local_type);
+        Py_XDECREF(local_tb);
+      #else
+        tmp_type = exc_info->exc_type;
+        tmp_value = exc_info->exc_value;
+        tmp_tb = exc_info->exc_traceback;
+        exc_info->exc_type = local_type;
+        exc_info->exc_value = local_value;
+        exc_info->exc_traceback = local_tb;
+      #endif
+    }
+    #else
+    tmp_type = tstate->exc_type;
+    tmp_value = tstate->exc_value;
+    tmp_tb = tstate->exc_traceback;
+    tstate->exc_type = local_type;
+    tstate->exc_value = local_value;
+    tstate->exc_traceback = local_tb;
+    #endif
+    Py_XDECREF(tmp_type);
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(tmp_tb);
+#else
+    PyErr_SetExcInfo(local_type, local_value, local_tb);
+#endif
+    return 0;
+bad:
+    *type = 0;
+    *value = 0;
+    *tb = 0;
+    Py_XDECREF(local_type);
+    Py_XDECREF(local_value);
+    Py_XDECREF(local_tb);
+    return -1;
+}
+
+/* PyObjectCall */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) {
+    PyObject *result;
+    ternaryfunc call = Py_TYPE(func)->tp_call;
+    if (unlikely(!call))
+        return PyObject_Call(func, arg, kw);
+    #if PY_MAJOR_VERSION < 3
+    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
+        return NULL;
+    #else
+    if (unlikely(Py_EnterRecursiveCall(" while calling a Python object")))
+        return NULL;
+    #endif
+    result = (*call)(func, arg, kw);
+    Py_LeaveRecursiveCall();
+    if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
+        PyErr_SetString(
+            PyExc_SystemError,
+            "NULL result without error in PyObject_Call");
+    }
+    return result;
+}
+#endif
+
+/* RaiseException */
+#if PY_MAJOR_VERSION < 3
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) {
+    __Pyx_PyThreadState_declare
+    CYTHON_UNUSED_VAR(cause);
+    Py_XINCREF(type);
+    if (!value || value == Py_None)
+        value = NULL;
+    else
+        Py_INCREF(value);
+    if (!tb || tb == Py_None)
+        tb = NULL;
+    else {
+        Py_INCREF(tb);
+        if (!PyTraceBack_Check(tb)) {
+            PyErr_SetString(PyExc_TypeError,
+                "raise: arg 3 must be a traceback or None");
+            goto raise_error;
+        }
+    }
+    if (PyType_Check(type)) {
+#if CYTHON_COMPILING_IN_PYPY
+        if (!value) {
+            Py_INCREF(Py_None);
+            value = Py_None;
+        }
+#endif
+        PyErr_NormalizeException(&type, &value, &tb);
+    } else {
+        if (value) {
+            PyErr_SetString(PyExc_TypeError,
+                "instance exception may not have a separate value");
+            goto raise_error;
+        }
+        value = type;
+        type = (PyObject*) Py_TYPE(type);
+        Py_INCREF(type);
+        if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) {
+            PyErr_SetString(PyExc_TypeError,
+                "raise: exception class must be a subclass of BaseException");
+            goto raise_error;
+        }
+    }
+    __Pyx_PyThreadState_assign
+    __Pyx_ErrRestore(type, value, tb);
+    return;
+raise_error:
+    Py_XDECREF(value);
+    Py_XDECREF(type);
+    Py_XDECREF(tb);
+    return;
+}
+#else
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) {
+    PyObject* owned_instance = NULL;
+    if (tb == Py_None) {
+        tb = 0;
+    } else if (tb && !PyTraceBack_Check(tb)) {
+        PyErr_SetString(PyExc_TypeError,
+            "raise: arg 3 must be a traceback or None");
+        goto bad;
+    }
+    if (value == Py_None)
+        value = 0;
+    if (PyExceptionInstance_Check(type)) {
+        if (value) {
+            PyErr_SetString(PyExc_TypeError,
+                "instance exception may not have a separate value");
+            goto bad;
+        }
+        value = type;
+        type = (PyObject*) Py_TYPE(value);
+    } else if (PyExceptionClass_Check(type)) {
+        PyObject *instance_class = NULL;
+        if (value && PyExceptionInstance_Check(value)) {
+            instance_class = (PyObject*) Py_TYPE(value);
+            if (instance_class != type) {
+                int is_subclass = PyObject_IsSubclass(instance_class, type);
+                if (!is_subclass) {
+                    instance_class = NULL;
+                } else if (unlikely(is_subclass == -1)) {
+                    goto bad;
+                } else {
+                    type = instance_class;
+                }
+            }
+        }
+        if (!instance_class) {
+            PyObject *args;
+            if (!value)
+                args = PyTuple_New(0);
+            else if (PyTuple_Check(value)) {
+                Py_INCREF(value);
+                args = value;
+            } else
+                args = PyTuple_Pack(1, value);
+            if (!args)
+                goto bad;
+            owned_instance = PyObject_Call(type, args, NULL);
+            Py_DECREF(args);
+            if (!owned_instance)
+                goto bad;
+            value = owned_instance;
+            if (!PyExceptionInstance_Check(value)) {
+                PyErr_Format(PyExc_TypeError,
+                             "calling %R should have returned an instance of "
+                             "BaseException, not %R",
+                             type, Py_TYPE(value));
+                goto bad;
+            }
+        }
+    } else {
+        PyErr_SetString(PyExc_TypeError,
+            "raise: exception class must be a subclass of BaseException");
+        goto bad;
+    }
+    if (cause) {
+        PyObject *fixed_cause;
+        if (cause == Py_None) {
+            fixed_cause = NULL;
+        } else if (PyExceptionClass_Check(cause)) {
+            fixed_cause = PyObject_CallObject(cause, NULL);
+            if (fixed_cause == NULL)
+                goto bad;
+        } else if (PyExceptionInstance_Check(cause)) {
+            fixed_cause = cause;
+            Py_INCREF(fixed_cause);
+        } else {
+            PyErr_SetString(PyExc_TypeError,
+                            "exception causes must derive from "
+                            "BaseException");
+            goto bad;
+        }
+        PyException_SetCause(value, fixed_cause);
+    }
+    PyErr_SetObject(type, value);
+    if (tb) {
+      #if PY_VERSION_HEX >= 0x030C00A6
+        PyException_SetTraceback(value, tb);
+      #elif CYTHON_FAST_THREAD_STATE
+        PyThreadState *tstate = __Pyx_PyThreadState_Current;
+        PyObject* tmp_tb = tstate->curexc_traceback;
+        if (tb != tmp_tb) {
+            Py_INCREF(tb);
+            tstate->curexc_traceback = tb;
+            Py_XDECREF(tmp_tb);
+        }
+#else
+        PyObject *tmp_type, *tmp_value, *tmp_tb;
+        PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb);
+        Py_INCREF(tb);
+        PyErr_Restore(tmp_type, tmp_value, tb);
+        Py_XDECREF(tmp_tb);
+#endif
+    }
+bad:
+    Py_XDECREF(owned_instance);
+    return;
+}
+#endif
+
+/* TupleAndListFromArray */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE void __Pyx_copy_object_array(PyObject *const *CYTHON_RESTRICT src, PyObject** CYTHON_RESTRICT dest, Py_ssize_t length) {
+    PyObject *v;
+    Py_ssize_t i;
+    for (i = 0; i < length; i++) {
+        v = dest[i] = src[i];
+        Py_INCREF(v);
+    }
+}
+static CYTHON_INLINE PyObject *
+__Pyx_PyTuple_FromArray(PyObject *const *src, Py_ssize_t n)
+{
+    PyObject *res;
+    if (n <= 0) {
+        Py_INCREF(__pyx_empty_tuple);
+        return __pyx_empty_tuple;
+    }
+    res = PyTuple_New(n);
+    if (unlikely(res == NULL)) return NULL;
+    __Pyx_copy_object_array(src, ((PyTupleObject*)res)->ob_item, n);
+    return res;
+}
+static CYTHON_INLINE PyObject *
+__Pyx_PyList_FromArray(PyObject *const *src, Py_ssize_t n)
+{
+    PyObject *res;
+    if (n <= 0) {
+        return PyList_New(0);
+    }
+    res = PyList_New(n);
+    if (unlikely(res == NULL)) return NULL;
+    __Pyx_copy_object_array(src, ((PyListObject*)res)->ob_item, n);
+    return res;
+}
+#endif
+
+/* BytesEquals */
+static CYTHON_INLINE int __Pyx_PyBytes_Equals(PyObject* s1, PyObject* s2, int equals) {
+#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API
+    return PyObject_RichCompareBool(s1, s2, equals);
+#else
+    if (s1 == s2) {
+        return (equals == Py_EQ);
+    } else if (PyBytes_CheckExact(s1) & PyBytes_CheckExact(s2)) {
+        const char *ps1, *ps2;
+        Py_ssize_t length = PyBytes_GET_SIZE(s1);
+        if (length != PyBytes_GET_SIZE(s2))
+            return (equals == Py_NE);
+        ps1 = PyBytes_AS_STRING(s1);
+        ps2 = PyBytes_AS_STRING(s2);
+        if (ps1[0] != ps2[0]) {
+            return (equals == Py_NE);
+        } else if (length == 1) {
+            return (equals == Py_EQ);
+        } else {
+            int result;
+#if CYTHON_USE_UNICODE_INTERNALS && (PY_VERSION_HEX < 0x030B0000)
+            Py_hash_t hash1, hash2;
+            hash1 = ((PyBytesObject*)s1)->ob_shash;
+            hash2 = ((PyBytesObject*)s2)->ob_shash;
+            if (hash1 != hash2 && hash1 != -1 && hash2 != -1) {
+                return (equals == Py_NE);
+            }
+#endif
+            result = memcmp(ps1, ps2, (size_t)length);
+            return (equals == Py_EQ) ? (result == 0) : (result != 0);
+        }
+    } else if ((s1 == Py_None) & PyBytes_CheckExact(s2)) {
+        return (equals == Py_NE);
+    } else if ((s2 == Py_None) & PyBytes_CheckExact(s1)) {
+        return (equals == Py_NE);
+    } else {
+        int result;
+        PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
+        if (!py_result)
+            return -1;
+        result = __Pyx_PyObject_IsTrue(py_result);
+        Py_DECREF(py_result);
+        return result;
+    }
+#endif
+}
+
+/* UnicodeEquals */
+static CYTHON_INLINE int __Pyx_PyUnicode_Equals(PyObject* s1, PyObject* s2, int equals) {
+#if CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API
+    return PyObject_RichCompareBool(s1, s2, equals);
+#else
+#if PY_MAJOR_VERSION < 3
+    PyObject* owned_ref = NULL;
+#endif
+    int s1_is_unicode, s2_is_unicode;
+    if (s1 == s2) {
+        goto return_eq;
+    }
+    s1_is_unicode = PyUnicode_CheckExact(s1);
+    s2_is_unicode = PyUnicode_CheckExact(s2);
+#if PY_MAJOR_VERSION < 3
+    if ((s1_is_unicode & (!s2_is_unicode)) && PyString_CheckExact(s2)) {
+        owned_ref = PyUnicode_FromObject(s2);
+        if (unlikely(!owned_ref))
+            return -1;
+        s2 = owned_ref;
+        s2_is_unicode = 1;
+    } else if ((s2_is_unicode & (!s1_is_unicode)) && PyString_CheckExact(s1)) {
+        owned_ref = PyUnicode_FromObject(s1);
+        if (unlikely(!owned_ref))
+            return -1;
+        s1 = owned_ref;
+        s1_is_unicode = 1;
+    } else if (((!s2_is_unicode) & (!s1_is_unicode))) {
+        return __Pyx_PyBytes_Equals(s1, s2, equals);
+    }
+#endif
+    if (s1_is_unicode & s2_is_unicode) {
+        Py_ssize_t length;
+        int kind;
+        void *data1, *data2;
+        if (unlikely(__Pyx_PyUnicode_READY(s1) < 0) || unlikely(__Pyx_PyUnicode_READY(s2) < 0))
+            return -1;
+        length = __Pyx_PyUnicode_GET_LENGTH(s1);
+        if (length != __Pyx_PyUnicode_GET_LENGTH(s2)) {
+            goto return_ne;
+        }
+#if CYTHON_USE_UNICODE_INTERNALS
+        {
+            Py_hash_t hash1, hash2;
+        #if CYTHON_PEP393_ENABLED
+            hash1 = ((PyASCIIObject*)s1)->hash;
+            hash2 = ((PyASCIIObject*)s2)->hash;
+        #else
+            hash1 = ((PyUnicodeObject*)s1)->hash;
+            hash2 = ((PyUnicodeObject*)s2)->hash;
+        #endif
+            if (hash1 != hash2 && hash1 != -1 && hash2 != -1) {
+                goto return_ne;
+            }
+        }
+#endif
+        kind = __Pyx_PyUnicode_KIND(s1);
+        if (kind != __Pyx_PyUnicode_KIND(s2)) {
+            goto return_ne;
+        }
+        data1 = __Pyx_PyUnicode_DATA(s1);
+        data2 = __Pyx_PyUnicode_DATA(s2);
+        if (__Pyx_PyUnicode_READ(kind, data1, 0) != __Pyx_PyUnicode_READ(kind, data2, 0)) {
+            goto return_ne;
+        } else if (length == 1) {
+            goto return_eq;
+        } else {
+            int result = memcmp(data1, data2, (size_t)(length * kind));
+            #if PY_MAJOR_VERSION < 3
+            Py_XDECREF(owned_ref);
+            #endif
+            return (equals == Py_EQ) ? (result == 0) : (result != 0);
+        }
+    } else if ((s1 == Py_None) & s2_is_unicode) {
+        goto return_ne;
+    } else if ((s2 == Py_None) & s1_is_unicode) {
+        goto return_ne;
+    } else {
+        int result;
+        PyObject* py_result = PyObject_RichCompare(s1, s2, equals);
+        #if PY_MAJOR_VERSION < 3
+        Py_XDECREF(owned_ref);
+        #endif
+        if (!py_result)
+            return -1;
+        result = __Pyx_PyObject_IsTrue(py_result);
+        Py_DECREF(py_result);
+        return result;
+    }
+return_eq:
+    #if PY_MAJOR_VERSION < 3
+    Py_XDECREF(owned_ref);
+    #endif
+    return (equals == Py_EQ);
+return_ne:
+    #if PY_MAJOR_VERSION < 3
+    Py_XDECREF(owned_ref);
+    #endif
+    return (equals == Py_NE);
+#endif
+}
+
+/* fastcall */
+#if CYTHON_METH_FASTCALL
+static CYTHON_INLINE PyObject * __Pyx_GetKwValue_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues, PyObject *s)
+{
+    Py_ssize_t i, n = PyTuple_GET_SIZE(kwnames);
+    for (i = 0; i < n; i++)
+    {
+        if (s == PyTuple_GET_ITEM(kwnames, i)) return kwvalues[i];
+    }
+    for (i = 0; i < n; i++)
+    {
+        int eq = __Pyx_PyUnicode_Equals(s, PyTuple_GET_ITEM(kwnames, i), Py_EQ);
+        if (unlikely(eq != 0)) {
+            if (unlikely(eq < 0)) return NULL;
+            return kwvalues[i];
+        }
+    }
+    return NULL;
+}
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030d0000
+CYTHON_UNUSED static PyObject *__Pyx_KwargsAsDict_FASTCALL(PyObject *kwnames, PyObject *const *kwvalues) {
+    Py_ssize_t i, nkwargs = PyTuple_GET_SIZE(kwnames);
+    PyObject *dict;
+    dict = PyDict_New();
+    if (unlikely(!dict))
+        return NULL;
+    for (i=0; i<nkwargs; i++) {
+        PyObject *key = PyTuple_GET_ITEM(kwnames, i);
+        if (unlikely(PyDict_SetItem(dict, key, kwvalues[i]) < 0))
+            goto bad;
+    }
+    return dict;
+bad:
+    Py_DECREF(dict);
+    return NULL;
+}
+#endif
+#endif
+
+/* RaiseArgTupleInvalid */
+static void __Pyx_RaiseArgtupleInvalid(
+    const char* func_name,
+    int exact,
+    Py_ssize_t num_min,
+    Py_ssize_t num_max,
+    Py_ssize_t num_found)
+{
+    Py_ssize_t num_expected;
+    const char *more_or_less;
+    if (num_found < num_min) {
+        num_expected = num_min;
+        more_or_less = "at least";
+    } else {
+        num_expected = num_max;
+        more_or_less = "at most";
+    }
+    if (exact) {
+        more_or_less = "exactly";
+    }
+    PyErr_Format(PyExc_TypeError,
+                 "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)",
+                 func_name, more_or_less, num_expected,
+                 (num_expected == 1) ? "" : "s", num_found);
+}
+
+/* RaiseDoubleKeywords */
+static void __Pyx_RaiseDoubleKeywordsError(
+    const char* func_name,
+    PyObject* kw_name)
+{
+    PyErr_Format(PyExc_TypeError,
+        #if PY_MAJOR_VERSION >= 3
+        "%s() got multiple values for keyword argument '%U'", func_name, kw_name);
+        #else
+        "%s() got multiple values for keyword argument '%s'", func_name,
+        PyString_AsString(kw_name));
+        #endif
+}
+
+/* ParseKeywords */
+static int __Pyx_ParseOptionalKeywords(
+    PyObject *kwds,
+    PyObject *const *kwvalues,
+    PyObject **argnames[],
+    PyObject *kwds2,
+    PyObject *values[],
+    Py_ssize_t num_pos_args,
+    const char* function_name)
+{
+    PyObject *key = 0, *value = 0;
+    Py_ssize_t pos = 0;
+    PyObject*** name;
+    PyObject*** first_kw_arg = argnames + num_pos_args;
+    int kwds_is_tuple = CYTHON_METH_FASTCALL && likely(PyTuple_Check(kwds));
+    while (1) {
+        Py_XDECREF(key); key = NULL;
+        Py_XDECREF(value); value = NULL;
+        if (kwds_is_tuple) {
+            Py_ssize_t size;
+#if CYTHON_ASSUME_SAFE_MACROS
+            size = PyTuple_GET_SIZE(kwds);
+#else
+            size = PyTuple_Size(kwds);
+            if (size < 0) goto bad;
+#endif
+            if (pos >= size) break;
+#if CYTHON_AVOID_BORROWED_REFS
+            key = __Pyx_PySequence_ITEM(kwds, pos);
+            if (!key) goto bad;
+#elif CYTHON_ASSUME_SAFE_MACROS
+            key = PyTuple_GET_ITEM(kwds, pos);
+#else
+            key = PyTuple_GetItem(kwds, pos);
+            if (!key) goto bad;
+#endif
+            value = kwvalues[pos];
+            pos++;
+        }
+        else
+        {
+            if (!PyDict_Next(kwds, &pos, &key, &value)) break;
+#if CYTHON_AVOID_BORROWED_REFS
+            Py_INCREF(key);
+#endif
+        }
+        name = first_kw_arg;
+        while (*name && (**name != key)) name++;
+        if (*name) {
+            values[name-argnames] = value;
+#if CYTHON_AVOID_BORROWED_REFS
+            Py_INCREF(value);
+            Py_DECREF(key);
+#endif
+            key = NULL;
+            value = NULL;
+            continue;
+        }
+#if !CYTHON_AVOID_BORROWED_REFS
+        Py_INCREF(key);
+#endif
+        Py_INCREF(value);
+        name = first_kw_arg;
+        #if PY_MAJOR_VERSION < 3
+        if (likely(PyString_Check(key))) {
+            while (*name) {
+                if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key))
+                        && _PyString_Eq(**name, key)) {
+                    values[name-argnames] = value;
+#if CYTHON_AVOID_BORROWED_REFS
+                    value = NULL;
+#endif
+                    break;
+                }
+                name++;
+            }
+            if (*name) continue;
+            else {
+                PyObject*** argname = argnames;
+                while (argname != first_kw_arg) {
+                    if ((**argname == key) || (
+                            (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key))
+                             && _PyString_Eq(**argname, key))) {
+                        goto arg_passed_twice;
+                    }
+                    argname++;
+                }
+            }
+        } else
+        #endif
+        if (likely(PyUnicode_Check(key))) {
+            while (*name) {
+                int cmp = (
+                #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
+                    (__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
+                #endif
+                    PyUnicode_Compare(**name, key)
+                );
+                if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
+                if (cmp == 0) {
+                    values[name-argnames] = value;
+#if CYTHON_AVOID_BORROWED_REFS
+                    value = NULL;
+#endif
+                    break;
+                }
+                name++;
+            }
+            if (*name) continue;
+            else {
+                PyObject*** argname = argnames;
+                while (argname != first_kw_arg) {
+                    int cmp = (**argname == key) ? 0 :
+                    #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
+                        (__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
+                    #endif
+                        PyUnicode_Compare(**argname, key);
+                    if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
+                    if (cmp == 0) goto arg_passed_twice;
+                    argname++;
+                }
+            }
+        } else
+            goto invalid_keyword_type;
+        if (kwds2) {
+            if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad;
+        } else {
+            goto invalid_keyword;
+        }
+    }
+    Py_XDECREF(key);
+    Py_XDECREF(value);
+    return 0;
+arg_passed_twice:
+    __Pyx_RaiseDoubleKeywordsError(function_name, key);
+    goto bad;
+invalid_keyword_type:
+    PyErr_Format(PyExc_TypeError,
+        "%.200s() keywords must be strings", function_name);
+    goto bad;
+invalid_keyword:
+    #if PY_MAJOR_VERSION < 3
+    PyErr_Format(PyExc_TypeError,
+        "%.200s() got an unexpected keyword argument '%.200s'",
+        function_name, PyString_AsString(key));
+    #else
+    PyErr_Format(PyExc_TypeError,
+        "%s() got an unexpected keyword argument '%U'",
+        function_name, key);
+    #endif
+bad:
+    Py_XDECREF(key);
+    Py_XDECREF(value);
+    return -1;
+}
+
+/* ArgTypeTest */
+static int __Pyx__ArgTypeTest(PyObject *obj, PyTypeObject *type, const char *name, int exact)
+{
+    __Pyx_TypeName type_name;
+    __Pyx_TypeName obj_type_name;
+    if (unlikely(!type)) {
+        PyErr_SetString(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
+    else if (exact) {
+        #if PY_MAJOR_VERSION == 2
+        if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1;
+        #endif
+    }
+    else {
+        if (likely(__Pyx_TypeCheck(obj, type))) return 1;
+    }
+    type_name = __Pyx_PyType_GetName(type);
+    obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj));
+    PyErr_Format(PyExc_TypeError,
+        "Argument '%.200s' has incorrect type (expected " __Pyx_FMT_TYPENAME
+        ", got " __Pyx_FMT_TYPENAME ")", name, type_name, obj_type_name);
+    __Pyx_DECREF_TypeName(type_name);
+    __Pyx_DECREF_TypeName(obj_type_name);
+    return 0;
+}
+
+/* IsLittleEndian */
+static CYTHON_INLINE int __Pyx_Is_Little_Endian(void)
+{
+  union {
+    uint32_t u32;
+    uint8_t u8[4];
+  } S;
+  S.u32 = 0x01020304;
+  return S.u8[0] == 4;
+}
+
+/* BufferFormatCheck */
+static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
+                              __Pyx_BufFmt_StackElem* stack,
+                              __Pyx_TypeInfo* type) {
+  stack[0].field = &ctx->root;
+  stack[0].parent_offset = 0;
+  ctx->root.type = type;
+  ctx->root.name = "buffer dtype";
+  ctx->root.offset = 0;
+  ctx->head = stack;
+  ctx->head->field = &ctx->root;
+  ctx->fmt_offset = 0;
+  ctx->head->parent_offset = 0;
+  ctx->new_packmode = '@';
+  ctx->enc_packmode = '@';
+  ctx->new_count = 1;
+  ctx->enc_count = 0;
+  ctx->enc_type = 0;
+  ctx->is_complex = 0;
+  ctx->is_valid_array = 0;
+  ctx->struct_alignment = 0;
+  while (type->typegroup == 'S') {
+    ++ctx->head;
+    ctx->head->field = type->fields;
+    ctx->head->parent_offset = 0;
+    type = type->fields->type;
+  }
+}
+static int __Pyx_BufFmt_ParseNumber(const char** ts) {
+    int count;
+    const char* t = *ts;
+    if (*t < '0' || *t > '9') {
+      return -1;
+    } else {
+        count = *t++ - '0';
+        while (*t >= '0' && *t <= '9') {
+            count *= 10;
+            count += *t++ - '0';
+        }
+    }
+    *ts = t;
+    return count;
+}
+static int __Pyx_BufFmt_ExpectNumber(const char **ts) {
+    int number = __Pyx_BufFmt_ParseNumber(ts);
+    if (number == -1)
+        PyErr_Format(PyExc_ValueError,\
+                     "Does not understand character buffer dtype format string ('%c')", **ts);
+    return number;
+}
+static void __Pyx_BufFmt_RaiseUnexpectedChar(char ch) {
+  PyErr_Format(PyExc_ValueError,
+               "Unexpected format string character: '%c'", ch);
+}
+static const char* __Pyx_BufFmt_DescribeTypeChar(char ch, int is_complex) {
+  switch (ch) {
+    case '?': return "'bool'";
+    case 'c': return "'char'";
+    case 'b': return "'signed char'";
+    case 'B': return "'unsigned char'";
+    case 'h': return "'short'";
+    case 'H': return "'unsigned short'";
+    case 'i': return "'int'";
+    case 'I': return "'unsigned int'";
+    case 'l': return "'long'";
+    case 'L': return "'unsigned long'";
+    case 'q': return "'long long'";
+    case 'Q': return "'unsigned long long'";
+    case 'f': return (is_complex ? "'complex float'" : "'float'");
+    case 'd': return (is_complex ? "'complex double'" : "'double'");
+    case 'g': return (is_complex ? "'complex long double'" : "'long double'");
+    case 'T': return "a struct";
+    case 'O': return "Python object";
+    case 'P': return "a pointer";
+    case 's': case 'p': return "a string";
+    case 0: return "end";
+    default: return "unparsable format string";
+  }
+}
+static size_t __Pyx_BufFmt_TypeCharToStandardSize(char ch, int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return 2;
+    case 'i': case 'I': case 'l': case 'L': return 4;
+    case 'q': case 'Q': return 8;
+    case 'f': return (is_complex ? 8 : 4);
+    case 'd': return (is_complex ? 16 : 8);
+    case 'g': {
+      PyErr_SetString(PyExc_ValueError, "Python does not define a standard format string size for long double ('g')..");
+      return 0;
+    }
+    case 'O': case 'P': return sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+static size_t __Pyx_BufFmt_TypeCharToNativeSize(char ch, int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(short);
+    case 'i': case 'I': return sizeof(int);
+    case 'l': case 'L': return sizeof(long);
+    #ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(PY_LONG_LONG);
+    #endif
+    case 'f': return sizeof(float) * (is_complex ? 2 : 1);
+    case 'd': return sizeof(double) * (is_complex ? 2 : 1);
+    case 'g': return sizeof(long double) * (is_complex ? 2 : 1);
+    case 'O': case 'P': return sizeof(void*);
+    default: {
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+  }
+}
+typedef struct { char c; short x; } __Pyx_st_short;
+typedef struct { char c; int x; } __Pyx_st_int;
+typedef struct { char c; long x; } __Pyx_st_long;
+typedef struct { char c; float x; } __Pyx_st_float;
+typedef struct { char c; double x; } __Pyx_st_double;
+typedef struct { char c; long double x; } __Pyx_st_longdouble;
+typedef struct { char c; void *x; } __Pyx_st_void_p;
+#ifdef HAVE_LONG_LONG
+typedef struct { char c; PY_LONG_LONG x; } __Pyx_st_longlong;
+#endif
+static size_t __Pyx_BufFmt_TypeCharToAlignment(char ch, int is_complex) {
+  CYTHON_UNUSED_VAR(is_complex);
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(__Pyx_st_short) - sizeof(short);
+    case 'i': case 'I': return sizeof(__Pyx_st_int) - sizeof(int);
+    case 'l': case 'L': return sizeof(__Pyx_st_long) - sizeof(long);
+#ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(__Pyx_st_longlong) - sizeof(PY_LONG_LONG);
+#endif
+    case 'f': return sizeof(__Pyx_st_float) - sizeof(float);
+    case 'd': return sizeof(__Pyx_st_double) - sizeof(double);
+    case 'g': return sizeof(__Pyx_st_longdouble) - sizeof(long double);
+    case 'P': case 'O': return sizeof(__Pyx_st_void_p) - sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+/* These are for computing the padding at the end of the struct to align
+   on the first member of the struct. This will probably the same as above,
+   but we don't have any guarantees.
+ */
+typedef struct { short x; char c; } __Pyx_pad_short;
+typedef struct { int x; char c; } __Pyx_pad_int;
+typedef struct { long x; char c; } __Pyx_pad_long;
+typedef struct { float x; char c; } __Pyx_pad_float;
+typedef struct { double x; char c; } __Pyx_pad_double;
+typedef struct { long double x; char c; } __Pyx_pad_longdouble;
+typedef struct { void *x; char c; } __Pyx_pad_void_p;
+#ifdef HAVE_LONG_LONG
+typedef struct { PY_LONG_LONG x; char c; } __Pyx_pad_longlong;
+#endif
+static size_t __Pyx_BufFmt_TypeCharToPadding(char ch, int is_complex) {
+  CYTHON_UNUSED_VAR(is_complex);
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(__Pyx_pad_short) - sizeof(short);
+    case 'i': case 'I': return sizeof(__Pyx_pad_int) - sizeof(int);
+    case 'l': case 'L': return sizeof(__Pyx_pad_long) - sizeof(long);
+#ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(__Pyx_pad_longlong) - sizeof(PY_LONG_LONG);
+#endif
+    case 'f': return sizeof(__Pyx_pad_float) - sizeof(float);
+    case 'd': return sizeof(__Pyx_pad_double) - sizeof(double);
+    case 'g': return sizeof(__Pyx_pad_longdouble) - sizeof(long double);
+    case 'P': case 'O': return sizeof(__Pyx_pad_void_p) - sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+static char __Pyx_BufFmt_TypeCharToGroup(char ch, int is_complex) {
+  switch (ch) {
+    case 'c':
+        return 'H';
+    case 'b': case 'h': case 'i':
+    case 'l': case 'q': case 's': case 'p':
+        return 'I';
+    case '?': case 'B': case 'H': case 'I': case 'L': case 'Q':
+        return 'U';
+    case 'f': case 'd': case 'g':
+        return (is_complex ? 'C' : 'R');
+    case 'O':
+        return 'O';
+    case 'P':
+        return 'P';
+    default: {
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+  }
+}
+static void __Pyx_BufFmt_RaiseExpected(__Pyx_BufFmt_Context* ctx) {
+  if (ctx->head == NULL || ctx->head->field == &ctx->root) {
+    const char* expected;
+    const char* quote;
+    if (ctx->head == NULL) {
+      expected = "end";
+      quote = "";
+    } else {
+      expected = ctx->head->field->type->name;
+      quote = "'";
+    }
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer dtype mismatch, expected %s%s%s but got %s",
+                 quote, expected, quote,
+                 __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex));
+  } else {
+    __Pyx_StructField* field = ctx->head->field;
+    __Pyx_StructField* parent = (ctx->head - 1)->field;
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer dtype mismatch, expected '%s' but got %s in '%s.%s'",
+                 field->type->name, __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex),
+                 parent->type->name, field->name);
+  }
+}
+static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) {
+  char group;
+  size_t size, offset, arraysize = 1;
+  if (ctx->enc_type == 0) return 0;
+  if (ctx->head->field->type->arraysize[0]) {
+    int i, ndim = 0;
+    if (ctx->enc_type == 's' || ctx->enc_type == 'p') {
+        ctx->is_valid_array = ctx->head->field->type->ndim == 1;
+        ndim = 1;
+        if (ctx->enc_count != ctx->head->field->type->arraysize[0]) {
+            PyErr_Format(PyExc_ValueError,
+                         "Expected a dimension of size %zu, got %zu",
+                         ctx->head->field->type->arraysize[0], ctx->enc_count);
+            return -1;
+        }
+    }
+    if (!ctx->is_valid_array) {
+      PyErr_Format(PyExc_ValueError, "Expected %d dimensions, got %d",
+                   ctx->head->field->type->ndim, ndim);
+      return -1;
+    }
+    for (i = 0; i < ctx->head->field->type->ndim; i++) {
+      arraysize *= ctx->head->field->type->arraysize[i];
+    }
+    ctx->is_valid_array = 0;
+    ctx->enc_count = 1;
+  }
+  group = __Pyx_BufFmt_TypeCharToGroup(ctx->enc_type, ctx->is_complex);
+  do {
+    __Pyx_StructField* field = ctx->head->field;
+    __Pyx_TypeInfo* type = field->type;
+    if (ctx->enc_packmode == '@' || ctx->enc_packmode == '^') {
+      size = __Pyx_BufFmt_TypeCharToNativeSize(ctx->enc_type, ctx->is_complex);
+    } else {
+      size = __Pyx_BufFmt_TypeCharToStandardSize(ctx->enc_type, ctx->is_complex);
+    }
+    if (ctx->enc_packmode == '@') {
+      size_t align_at = __Pyx_BufFmt_TypeCharToAlignment(ctx->enc_type, ctx->is_complex);
+      size_t align_mod_offset;
+      if (align_at == 0) return -1;
+      align_mod_offset = ctx->fmt_offset % align_at;
+      if (align_mod_offset > 0) ctx->fmt_offset += align_at - align_mod_offset;
+      if (ctx->struct_alignment == 0)
+          ctx->struct_alignment = __Pyx_BufFmt_TypeCharToPadding(ctx->enc_type,
+                                                                 ctx->is_complex);
+    }
+    if (type->size != size || type->typegroup != group) {
+      if (type->typegroup == 'C' && type->fields != NULL) {
+        size_t parent_offset = ctx->head->parent_offset + field->offset;
+        ++ctx->head;
+        ctx->head->field = type->fields;
+        ctx->head->parent_offset = parent_offset;
+        continue;
+      }
+      if ((type->typegroup == 'H' || group == 'H') && type->size == size) {
+      } else {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return -1;
+      }
+    }
+    offset = ctx->head->parent_offset + field->offset;
+    if (ctx->fmt_offset != offset) {
+      PyErr_Format(PyExc_ValueError,
+                   "Buffer dtype mismatch; next field is at offset %" CYTHON_FORMAT_SSIZE_T "d but %" CYTHON_FORMAT_SSIZE_T "d expected",
+                   (Py_ssize_t)ctx->fmt_offset, (Py_ssize_t)offset);
+      return -1;
+    }
+    ctx->fmt_offset += size;
+    if (arraysize)
+      ctx->fmt_offset += (arraysize - 1) * size;
+    --ctx->enc_count;
+    while (1) {
+      if (field == &ctx->root) {
+        ctx->head = NULL;
+        if (ctx->enc_count != 0) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return -1;
+        }
+        break;
+      }
+      ctx->head->field = ++field;
+      if (field->type == NULL) {
+        --ctx->head;
+        field = ctx->head->field;
+        continue;
+      } else if (field->type->typegroup == 'S') {
+        size_t parent_offset = ctx->head->parent_offset + field->offset;
+        if (field->type->fields->type == NULL) continue;
+        field = field->type->fields;
+        ++ctx->head;
+        ctx->head->field = field;
+        ctx->head->parent_offset = parent_offset;
+        break;
+      } else {
+        break;
+      }
+    }
+  } while (ctx->enc_count);
+  ctx->enc_type = 0;
+  ctx->is_complex = 0;
+  return 0;
+}
+static int
+__pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp)
+{
+    const char *ts = *tsp;
+    int i = 0, number, ndim;
+    ++ts;
+    if (ctx->new_count != 1) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Cannot handle repeated arrays in format string");
+        return -1;
+    }
+    if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return -1;
+    ndim = ctx->head->field->type->ndim;
+    while (*ts && *ts != ')') {
+        switch (*ts) {
+            case ' ': case '\f': case '\r': case '\n': case '\t': case '\v':  continue;
+            default:  break;
+        }
+        number = __Pyx_BufFmt_ExpectNumber(&ts);
+        if (number == -1) return -1;
+        if (i < ndim && (size_t) number != ctx->head->field->type->arraysize[i]) {
+            PyErr_Format(PyExc_ValueError,
+                        "Expected a dimension of size %zu, got %d",
+                        ctx->head->field->type->arraysize[i], number);
+            return -1;
+        }
+        if (*ts != ',' && *ts != ')') {
+            PyErr_Format(PyExc_ValueError,
+                                "Expected a comma in format string, got '%c'", *ts);
+            return -1;
+        }
+        if (*ts == ',') ts++;
+        i++;
+    }
+    if (i != ndim) {
+        PyErr_Format(PyExc_ValueError, "Expected %d dimension(s), got %d",
+                            ctx->head->field->type->ndim, i);
+        return -1;
+    }
+    if (!*ts) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Unexpected end of format string, expected ')'");
+        return -1;
+    }
+    ctx->is_valid_array = 1;
+    ctx->new_count = 1;
+    *tsp = ++ts;
+    return 0;
+}
+static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) {
+  int got_Z = 0;
+  while (1) {
+    switch(*ts) {
+      case 0:
+        if (ctx->enc_type != 0 && ctx->head == NULL) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return NULL;
+        }
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        if (ctx->head != NULL) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return NULL;
+        }
+        return ts;
+      case ' ':
+      case '\r':
+      case '\n':
+        ++ts;
+        break;
+      case '<':
+        if (!__Pyx_Is_Little_Endian()) {
+          PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler");
+          return NULL;
+        }
+        ctx->new_packmode = '=';
+        ++ts;
+        break;
+      case '>':
+      case '!':
+        if (__Pyx_Is_Little_Endian()) {
+          PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler");
+          return NULL;
+        }
+        ctx->new_packmode = '=';
+        ++ts;
+        break;
+      case '=':
+      case '@':
+      case '^':
+        ctx->new_packmode = *ts++;
+        break;
+      case 'T':
+        {
+          const char* ts_after_sub;
+          size_t i, struct_count = ctx->new_count;
+          size_t struct_alignment = ctx->struct_alignment;
+          ctx->new_count = 1;
+          ++ts;
+          if (*ts != '{') {
+            PyErr_SetString(PyExc_ValueError, "Buffer acquisition: Expected '{' after 'T'");
+            return NULL;
+          }
+          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+          ctx->enc_type = 0;
+          ctx->enc_count = 0;
+          ctx->struct_alignment = 0;
+          ++ts;
+          ts_after_sub = ts;
+          for (i = 0; i != struct_count; ++i) {
+            ts_after_sub = __Pyx_BufFmt_CheckString(ctx, ts);
+            if (!ts_after_sub) return NULL;
+          }
+          ts = ts_after_sub;
+          if (struct_alignment) ctx->struct_alignment = struct_alignment;
+        }
+        break;
+      case '}':
+        {
+          size_t alignment = ctx->struct_alignment;
+          ++ts;
+          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+          ctx->enc_type = 0;
+          if (alignment && ctx->fmt_offset % alignment) {
+            ctx->fmt_offset += alignment - (ctx->fmt_offset % alignment);
+          }
+        }
+        return ts;
+      case 'x':
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        ctx->fmt_offset += ctx->new_count;
+        ctx->new_count = 1;
+        ctx->enc_count = 0;
+        ctx->enc_type = 0;
+        ctx->enc_packmode = ctx->new_packmode;
+        ++ts;
+        break;
+      case 'Z':
+        got_Z = 1;
+        ++ts;
+        if (*ts != 'f' && *ts != 'd' && *ts != 'g') {
+          __Pyx_BufFmt_RaiseUnexpectedChar('Z');
+          return NULL;
+        }
+        CYTHON_FALLTHROUGH;
+      case '?': case 'c': case 'b': case 'B': case 'h': case 'H': case 'i': case 'I':
+      case 'l': case 'L': case 'q': case 'Q':
+      case 'f': case 'd': case 'g':
+      case 'O': case 'p':
+        if ((ctx->enc_type == *ts) && (got_Z == ctx->is_complex) &&
+            (ctx->enc_packmode == ctx->new_packmode) && (!ctx->is_valid_array)) {
+          ctx->enc_count += ctx->new_count;
+          ctx->new_count = 1;
+          got_Z = 0;
+          ++ts;
+          break;
+        }
+        CYTHON_FALLTHROUGH;
+      case 's':
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        ctx->enc_count = ctx->new_count;
+        ctx->enc_packmode = ctx->new_packmode;
+        ctx->enc_type = *ts;
+        ctx->is_complex = got_Z;
+        ++ts;
+        ctx->new_count = 1;
+        got_Z = 0;
+        break;
+      case ':':
+        ++ts;
+        while(*ts != ':') ++ts;
+        ++ts;
+        break;
+      case '(':
+        if (__pyx_buffmt_parse_array(ctx, &ts) < 0) return NULL;
+        break;
+      default:
+        {
+          int number = __Pyx_BufFmt_ExpectNumber(&ts);
+          if (number == -1) return NULL;
+          ctx->new_count = (size_t)number;
+        }
+    }
+  }
+}
+
+/* BufferGetAndValidate */
+  static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info) {
+  if (unlikely(info->buf == NULL)) return;
+  if (info->suboffsets == __Pyx_minusones) info->suboffsets = NULL;
+  __Pyx_ReleaseBuffer(info);
+}
+static void __Pyx_ZeroBuffer(Py_buffer* buf) {
+  buf->buf = NULL;
+  buf->obj = NULL;
+  buf->strides = __Pyx_zeros;
+  buf->shape = __Pyx_zeros;
+  buf->suboffsets = __Pyx_minusones;
+}
+static int __Pyx__GetBufferAndValidate(
+        Py_buffer* buf, PyObject* obj,  __Pyx_TypeInfo* dtype, int flags,
+        int nd, int cast, __Pyx_BufFmt_StackElem* stack)
+{
+  buf->buf = NULL;
+  if (unlikely(__Pyx_GetBuffer(obj, buf, flags) == -1)) {
+    __Pyx_ZeroBuffer(buf);
+    return -1;
+  }
+  if (unlikely(buf->ndim != nd)) {
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer has wrong number of dimensions (expected %d, got %d)",
+                 nd, buf->ndim);
+    goto fail;
+  }
+  if (!cast) {
+    __Pyx_BufFmt_Context ctx;
+    __Pyx_BufFmt_Init(&ctx, stack, dtype);
+    if (!__Pyx_BufFmt_CheckString(&ctx, buf->format)) goto fail;
+  }
+  if (unlikely((size_t)buf->itemsize != dtype->size)) {
+    PyErr_Format(PyExc_ValueError,
+      "Item size of buffer (%" CYTHON_FORMAT_SSIZE_T "d byte%s) does not match size of '%s' (%" CYTHON_FORMAT_SSIZE_T "d byte%s)",
+      buf->itemsize, (buf->itemsize > 1) ? "s" : "",
+      dtype->name, (Py_ssize_t)dtype->size, (dtype->size > 1) ? "s" : "");
+    goto fail;
+  }
+  if (buf->suboffsets == NULL) buf->suboffsets = __Pyx_minusones;
+  return 0;
+fail:;
+  __Pyx_SafeReleaseBuffer(buf);
+  return -1;
+}
+
+/* PyDictVersioning */
+  #if CYTHON_USE_DICT_VERSIONS && CYTHON_USE_TYPE_SLOTS
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_tp_dict_version(PyObject *obj) {
+    PyObject *dict = Py_TYPE(obj)->tp_dict;
+    return likely(dict) ? __PYX_GET_DICT_VERSION(dict) : 0;
+}
+static CYTHON_INLINE PY_UINT64_T __Pyx_get_object_dict_version(PyObject *obj) {
+    PyObject **dictptr = NULL;
+    Py_ssize_t offset = Py_TYPE(obj)->tp_dictoffset;
+    if (offset) {
+#if CYTHON_COMPILING_IN_CPYTHON
+        dictptr = (likely(offset > 0)) ? (PyObject **) ((char *)obj + offset) : _PyObject_GetDictPtr(obj);
+#else
+        dictptr = _PyObject_GetDictPtr(obj);
+#endif
+    }
+    return (dictptr && *dictptr) ? __PYX_GET_DICT_VERSION(*dictptr) : 0;
+}
+static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UINT64_T tp_dict_version, PY_UINT64_T obj_dict_version) {
+    PyObject *dict = Py_TYPE(obj)->tp_dict;
+    if (unlikely(!dict) || unlikely(tp_dict_version != __PYX_GET_DICT_VERSION(dict)))
+        return 0;
+    return obj_dict_version == __Pyx_get_object_dict_version(obj);
+}
+#endif
+
+/* GetModuleGlobalName */
+  #if CYTHON_USE_DICT_VERSIONS
+static PyObject *__Pyx__GetModuleGlobalName(PyObject *name, PY_UINT64_T *dict_version, PyObject **dict_cached_value)
+#else
+static CYTHON_INLINE PyObject *__Pyx__GetModuleGlobalName(PyObject *name)
+#endif
+{
+    PyObject *result;
+#if !CYTHON_AVOID_BORROWED_REFS
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030500A1 && PY_VERSION_HEX < 0x030d0000
+    result = _PyDict_GetItem_KnownHash(__pyx_d, name, ((PyASCIIObject *) name)->hash);
+    __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
+    if (likely(result)) {
+        return __Pyx_NewRef(result);
+    } else if (unlikely(PyErr_Occurred())) {
+        return NULL;
+    }
+#elif CYTHON_COMPILING_IN_LIMITED_API
+    if (unlikely(!__pyx_m)) {
+        return NULL;
+    }
+    result = PyObject_GetAttr(__pyx_m, name);
+    if (likely(result)) {
+        return result;
+    }
+#else
+    result = PyDict_GetItem(__pyx_d, name);
+    __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
+    if (likely(result)) {
+        return __Pyx_NewRef(result);
+    }
+#endif
+#else
+    result = PyObject_GetItem(__pyx_d, name);
+    __PYX_UPDATE_DICT_CACHE(__pyx_d, result, *dict_cached_value, *dict_version)
+    if (likely(result)) {
+        return __Pyx_NewRef(result);
+    }
+    PyErr_Clear();
+#endif
+    return __Pyx_GetBuiltinName(name);
+}
+
+/* ExtTypeTest */
+  static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) {
+    __Pyx_TypeName obj_type_name;
+    __Pyx_TypeName type_name;
+    if (unlikely(!type)) {
+        PyErr_SetString(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
+    if (likely(__Pyx_TypeCheck(obj, type)))
+        return 1;
+    obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj));
+    type_name = __Pyx_PyType_GetName(type);
+    PyErr_Format(PyExc_TypeError,
+                 "Cannot convert " __Pyx_FMT_TYPENAME " to " __Pyx_FMT_TYPENAME,
+                 obj_type_name, type_name);
+    __Pyx_DECREF_TypeName(obj_type_name);
+    __Pyx_DECREF_TypeName(type_name);
+    return 0;
+}
+
+/* BufferIndexError */
+  static void __Pyx_RaiseBufferIndexError(int axis) {
+  PyErr_Format(PyExc_IndexError,
+     "Out of bounds on buffer access (axis %d)", axis);
+}
+
+/* GetItemInt */
+  static PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) {
+    PyObject *r;
+    if (unlikely(!j)) return NULL;
+    r = PyObject_GetItem(o, j);
+    Py_DECREF(j);
+    return r;
+}
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i,
+                                                              CYTHON_NCP_UNUSED int wraparound,
+                                                              CYTHON_NCP_UNUSED int boundscheck) {
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+    Py_ssize_t wrapped_i = i;
+    if (wraparound & unlikely(i < 0)) {
+        wrapped_i += PyList_GET_SIZE(o);
+    }
+    if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyList_GET_SIZE(o)))) {
+        PyObject *r = PyList_GET_ITEM(o, wrapped_i);
+        Py_INCREF(r);
+        return r;
+    }
+    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+#else
+    return PySequence_GetItem(o, i);
+#endif
+}
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i,
+                                                              CYTHON_NCP_UNUSED int wraparound,
+                                                              CYTHON_NCP_UNUSED int boundscheck) {
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+    Py_ssize_t wrapped_i = i;
+    if (wraparound & unlikely(i < 0)) {
+        wrapped_i += PyTuple_GET_SIZE(o);
+    }
+    if ((!boundscheck) || likely(__Pyx_is_valid_index(wrapped_i, PyTuple_GET_SIZE(o)))) {
+        PyObject *r = PyTuple_GET_ITEM(o, wrapped_i);
+        Py_INCREF(r);
+        return r;
+    }
+    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+#else
+    return PySequence_GetItem(o, i);
+#endif
+}
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list,
+                                                     CYTHON_NCP_UNUSED int wraparound,
+                                                     CYTHON_NCP_UNUSED int boundscheck) {
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS
+    if (is_list || PyList_CheckExact(o)) {
+        Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyList_GET_SIZE(o);
+        if ((!boundscheck) || (likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o))))) {
+            PyObject *r = PyList_GET_ITEM(o, n);
+            Py_INCREF(r);
+            return r;
+        }
+    }
+    else if (PyTuple_CheckExact(o)) {
+        Py_ssize_t n = ((!wraparound) | likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o);
+        if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyTuple_GET_SIZE(o)))) {
+            PyObject *r = PyTuple_GET_ITEM(o, n);
+            Py_INCREF(r);
+            return r;
+        }
+    } else {
+        PyMappingMethods *mm = Py_TYPE(o)->tp_as_mapping;
+        PySequenceMethods *sm = Py_TYPE(o)->tp_as_sequence;
+        if (mm && mm->mp_subscript) {
+            PyObject *r, *key = PyInt_FromSsize_t(i);
+            if (unlikely(!key)) return NULL;
+            r = mm->mp_subscript(o, key);
+            Py_DECREF(key);
+            return r;
+        }
+        if (likely(sm && sm->sq_item)) {
+            if (wraparound && unlikely(i < 0) && likely(sm->sq_length)) {
+                Py_ssize_t l = sm->sq_length(o);
+                if (likely(l >= 0)) {
+                    i += l;
+                } else {
+                    if (!PyErr_ExceptionMatches(PyExc_OverflowError))
+                        return NULL;
+                    PyErr_Clear();
+                }
+            }
+            return sm->sq_item(o, i);
+        }
+    }
+#else
+    if (is_list || !PyMapping_Check(o)) {
+        return PySequence_GetItem(o, i);
+    }
+#endif
+    return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+}
+
+/* PyFunctionFastCall */
+  #if CYTHON_FAST_PYCALL && !CYTHON_VECTORCALL
+static PyObject* __Pyx_PyFunction_FastCallNoKw(PyCodeObject *co, PyObject **args, Py_ssize_t na,
+                                               PyObject *globals) {
+    PyFrameObject *f;
+    PyThreadState *tstate = __Pyx_PyThreadState_Current;
+    PyObject **fastlocals;
+    Py_ssize_t i;
+    PyObject *result;
+    assert(globals != NULL);
+    /* XXX Perhaps we should create a specialized
+       PyFrame_New() that doesn't take locals, but does
+       take builtins without sanity checking them.
+       */
+    assert(tstate != NULL);
+    f = PyFrame_New(tstate, co, globals, NULL);
+    if (f == NULL) {
+        return NULL;
+    }
+    fastlocals = __Pyx_PyFrame_GetLocalsplus(f);
+    for (i = 0; i < na; i++) {
+        Py_INCREF(*args);
+        fastlocals[i] = *args++;
+    }
+    result = PyEval_EvalFrameEx(f,0);
+    ++tstate->recursion_depth;
+    Py_DECREF(f);
+    --tstate->recursion_depth;
+    return result;
+}
+static PyObject *__Pyx_PyFunction_FastCallDict(PyObject *func, PyObject **args, Py_ssize_t nargs, PyObject *kwargs) {
+    PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
+    PyObject *globals = PyFunction_GET_GLOBALS(func);
+    PyObject *argdefs = PyFunction_GET_DEFAULTS(func);
+    PyObject *closure;
+#if PY_MAJOR_VERSION >= 3
+    PyObject *kwdefs;
+#endif
+    PyObject *kwtuple, **k;
+    PyObject **d;
+    Py_ssize_t nd;
+    Py_ssize_t nk;
+    PyObject *result;
+    assert(kwargs == NULL || PyDict_Check(kwargs));
+    nk = kwargs ? PyDict_Size(kwargs) : 0;
+    #if PY_MAJOR_VERSION < 3
+    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) {
+        return NULL;
+    }
+    #else
+    if (unlikely(Py_EnterRecursiveCall(" while calling a Python object"))) {
+        return NULL;
+    }
+    #endif
+    if (
+#if PY_MAJOR_VERSION >= 3
+            co->co_kwonlyargcount == 0 &&
+#endif
+            likely(kwargs == NULL || nk == 0) &&
+            co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) {
+        if (argdefs == NULL && co->co_argcount == nargs) {
+            result = __Pyx_PyFunction_FastCallNoKw(co, args, nargs, globals);
+            goto done;
+        }
+        else if (nargs == 0 && argdefs != NULL
+                 && co->co_argcount == Py_SIZE(argdefs)) {
+            /* function called with no arguments, but all parameters have
+               a default value: use default values as arguments .*/
+            args = &PyTuple_GET_ITEM(argdefs, 0);
+            result =__Pyx_PyFunction_FastCallNoKw(co, args, Py_SIZE(argdefs), globals);
+            goto done;
+        }
+    }
+    if (kwargs != NULL) {
+        Py_ssize_t pos, i;
+        kwtuple = PyTuple_New(2 * nk);
+        if (kwtuple == NULL) {
+            result = NULL;
+            goto done;
+        }
+        k = &PyTuple_GET_ITEM(kwtuple, 0);
+        pos = i = 0;
+        while (PyDict_Next(kwargs, &pos, &k[i], &k[i+1])) {
+            Py_INCREF(k[i]);
+            Py_INCREF(k[i+1]);
+            i += 2;
+        }
+        nk = i / 2;
+    }
+    else {
+        kwtuple = NULL;
+        k = NULL;
+    }
+    closure = PyFunction_GET_CLOSURE(func);
+#if PY_MAJOR_VERSION >= 3
+    kwdefs = PyFunction_GET_KW_DEFAULTS(func);
+#endif
+    if (argdefs != NULL) {
+        d = &PyTuple_GET_ITEM(argdefs, 0);
+        nd = Py_SIZE(argdefs);
+    }
+    else {
+        d = NULL;
+        nd = 0;
+    }
+#if PY_MAJOR_VERSION >= 3
+    result = PyEval_EvalCodeEx((PyObject*)co, globals, (PyObject *)NULL,
+                               args, (int)nargs,
+                               k, (int)nk,
+                               d, (int)nd, kwdefs, closure);
+#else
+    result = PyEval_EvalCodeEx(co, globals, (PyObject *)NULL,
+                               args, (int)nargs,
+                               k, (int)nk,
+                               d, (int)nd, closure);
+#endif
+    Py_XDECREF(kwtuple);
+done:
+    Py_LeaveRecursiveCall();
+    return result;
+}
+#endif
+
+/* PyObjectCallMethO */
+  #if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) {
+    PyObject *self, *result;
+    PyCFunction cfunc;
+    cfunc = __Pyx_CyOrPyCFunction_GET_FUNCTION(func);
+    self = __Pyx_CyOrPyCFunction_GET_SELF(func);
+    #if PY_MAJOR_VERSION < 3
+    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
+        return NULL;
+    #else
+    if (unlikely(Py_EnterRecursiveCall(" while calling a Python object")))
+        return NULL;
+    #endif
+    result = cfunc(self, arg);
+    Py_LeaveRecursiveCall();
+    if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
+        PyErr_SetString(
+            PyExc_SystemError,
+            "NULL result without error in PyObject_Call");
+    }
+    return result;
+}
+#endif
+
+/* PyObjectFastCall */
+  #if PY_VERSION_HEX < 0x03090000 || CYTHON_COMPILING_IN_LIMITED_API
+static PyObject* __Pyx_PyObject_FastCall_fallback(PyObject *func, PyObject **args, size_t nargs, PyObject *kwargs) {
+    PyObject *argstuple;
+    PyObject *result = 0;
+    size_t i;
+    argstuple = PyTuple_New((Py_ssize_t)nargs);
+    if (unlikely(!argstuple)) return NULL;
+    for (i = 0; i < nargs; i++) {
+        Py_INCREF(args[i]);
+        if (__Pyx_PyTuple_SET_ITEM(argstuple, (Py_ssize_t)i, args[i]) < 0) goto bad;
+    }
+    result = __Pyx_PyObject_Call(func, argstuple, kwargs);
+  bad:
+    Py_DECREF(argstuple);
+    return result;
+}
+#endif
+static CYTHON_INLINE PyObject* __Pyx_PyObject_FastCallDict(PyObject *func, PyObject **args, size_t _nargs, PyObject *kwargs) {
+    Py_ssize_t nargs = __Pyx_PyVectorcall_NARGS(_nargs);
+#if CYTHON_COMPILING_IN_CPYTHON
+    if (nargs == 0 && kwargs == NULL) {
+        if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_NOARGS))
+            return __Pyx_PyObject_CallMethO(func, NULL);
+    }
+    else if (nargs == 1 && kwargs == NULL) {
+        if (__Pyx_CyOrPyCFunction_Check(func) && likely( __Pyx_CyOrPyCFunction_GET_FLAGS(func) & METH_O))
+            return __Pyx_PyObject_CallMethO(func, args[0]);
+    }
+#endif
+    #if PY_VERSION_HEX < 0x030800B1
+    #if CYTHON_FAST_PYCCALL
+    if (PyCFunction_Check(func)) {
+        if (kwargs) {
+            return _PyCFunction_FastCallDict(func, args, nargs, kwargs);
+        } else {
+            return _PyCFunction_FastCallKeywords(func, args, nargs, NULL);
+        }
+    }
+    #if PY_VERSION_HEX >= 0x030700A1
+    if (!kwargs && __Pyx_IS_TYPE(func, &PyMethodDescr_Type)) {
+        return _PyMethodDescr_FastCallKeywords(func, args, nargs, NULL);
+    }
+    #endif
+    #endif
+    #if CYTHON_FAST_PYCALL
+    if (PyFunction_Check(func)) {
+        return __Pyx_PyFunction_FastCallDict(func, args, nargs, kwargs);
+    }
+    #endif
+    #endif
+    if (kwargs == NULL) {
+        #if CYTHON_VECTORCALL
+        #if PY_VERSION_HEX < 0x03090000
+        vectorcallfunc f = _PyVectorcall_Function(func);
+        #else
+        vectorcallfunc f = PyVectorcall_Function(func);
+        #endif
+        if (f) {
+            return f(func, args, (size_t)nargs, NULL);
+        }
+        #elif defined(__Pyx_CyFunction_USED) && CYTHON_BACKPORT_VECTORCALL
+        if (__Pyx_CyFunction_CheckExact(func)) {
+            __pyx_vectorcallfunc f = __Pyx_CyFunction_func_vectorcall(func);
+            if (f) return f(func, args, (size_t)nargs, NULL);
+        }
+        #endif
+    }
+    if (nargs == 0) {
+        return __Pyx_PyObject_Call(func, __pyx_empty_tuple, kwargs);
+    }
+    #if PY_VERSION_HEX >= 0x03090000 && !CYTHON_COMPILING_IN_LIMITED_API
+    return PyObject_VectorcallDict(func, args, (size_t)nargs, kwargs);
+    #else
+    return __Pyx_PyObject_FastCall_fallback(func, args, (size_t)nargs, kwargs);
+    #endif
+}
+
+/* PyObjectCallOneArg */
+  static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+    PyObject *args[2] = {NULL, arg};
+    return __Pyx_PyObject_FastCall(func, args+1, 1 | __Pyx_PY_VECTORCALL_ARGUMENTS_OFFSET);
+}
+
+/* ObjectGetItem */
+  #if CYTHON_USE_TYPE_SLOTS
+static PyObject *__Pyx_PyObject_GetIndex(PyObject *obj, PyObject *index) {
+    PyObject *runerr = NULL;
+    Py_ssize_t key_value;
+    key_value = __Pyx_PyIndex_AsSsize_t(index);
+    if (likely(key_value != -1 || !(runerr = PyErr_Occurred()))) {
+        return __Pyx_GetItemInt_Fast(obj, key_value, 0, 1, 1);
+    }
+    if (PyErr_GivenExceptionMatches(runerr, PyExc_OverflowError)) {
+        __Pyx_TypeName index_type_name = __Pyx_PyType_GetName(Py_TYPE(index));
+        PyErr_Clear();
+        PyErr_Format(PyExc_IndexError,
+            "cannot fit '" __Pyx_FMT_TYPENAME "' into an index-sized integer", index_type_name);
+        __Pyx_DECREF_TypeName(index_type_name);
+    }
+    return NULL;
+}
+static PyObject *__Pyx_PyObject_GetItem_Slow(PyObject *obj, PyObject *key) {
+    __Pyx_TypeName obj_type_name;
+    if (likely(PyType_Check(obj))) {
+        PyObject *meth = __Pyx_PyObject_GetAttrStrNoError(obj, __pyx_n_s_class_getitem);
+        if (!meth) {
+            PyErr_Clear();
+        } else {
+            PyObject *result = __Pyx_PyObject_CallOneArg(meth, key);
+            Py_DECREF(meth);
+            return result;
+        }
+    }
+    obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj));
+    PyErr_Format(PyExc_TypeError,
+        "'" __Pyx_FMT_TYPENAME "' object is not subscriptable", obj_type_name);
+    __Pyx_DECREF_TypeName(obj_type_name);
+    return NULL;
+}
+static PyObject *__Pyx_PyObject_GetItem(PyObject *obj, PyObject *key) {
+    PyTypeObject *tp = Py_TYPE(obj);
+    PyMappingMethods *mm = tp->tp_as_mapping;
+    PySequenceMethods *sm = tp->tp_as_sequence;
+    if (likely(mm && mm->mp_subscript)) {
+        return mm->mp_subscript(obj, key);
+    }
+    if (likely(sm && sm->sq_item)) {
+        return __Pyx_PyObject_GetIndex(obj, key);
+    }
+    return __Pyx_PyObject_GetItem_Slow(obj, key);
+}
+#endif
+
+/* BufferFallbackError */
+  static void __Pyx_RaiseBufferFallbackError(void) {
+  PyErr_SetString(PyExc_ValueError,
+     "Buffer acquisition failed on assignment; and then reacquiring the old buffer failed too!");
+}
+
+/* PyIntBinop */
+  #if !CYTHON_COMPILING_IN_PYPY
+static PyObject* __Pyx_PyInt_AddObjC(PyObject *op1, PyObject *op2, long intval, int inplace, int zerodivision_check) {
+    CYTHON_MAYBE_UNUSED_VAR(intval);
+    CYTHON_MAYBE_UNUSED_VAR(inplace);
+    CYTHON_UNUSED_VAR(zerodivision_check);
+    #if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_CheckExact(op1))) {
+        const long b = intval;
+        long x;
+        long a = PyInt_AS_LONG(op1);
+        
+            x = (long)((unsigned long)a + (unsigned long)b);
+            if (likely((x^a) >= 0 || (x^b) >= 0))
+                return PyInt_FromLong(x);
+            return PyLong_Type.tp_as_number->nb_add(op1, op2);
+    }
+    #endif
+    #if CYTHON_USE_PYLONG_INTERNALS
+    if (likely(PyLong_CheckExact(op1))) {
+        const long b = intval;
+        long a, x;
+#ifdef HAVE_LONG_LONG
+        const PY_LONG_LONG llb = intval;
+        PY_LONG_LONG lla, llx;
+#endif
+        if (unlikely(__Pyx_PyLong_IsZero(op1))) {
+            return __Pyx_NewRef(op2);
+        }
+        if (likely(__Pyx_PyLong_IsCompact(op1))) {
+            a = __Pyx_PyLong_CompactValue(op1);
+        } else {
+            const digit* digits = __Pyx_PyLong_Digits(op1);
+            const Py_ssize_t size = __Pyx_PyLong_SignedDigitCount(op1);
+            switch (size) {
+                case -2:
+                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
+                        a = -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
+                        break;
+                    #ifdef HAVE_LONG_LONG
+                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) {
+                        lla = -(PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
+                        goto long_long;
+                    #endif
+                    }
+                    CYTHON_FALLTHROUGH;
+                case 2:
+                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
+                        a = (long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
+                        break;
+                    #ifdef HAVE_LONG_LONG
+                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 2 * PyLong_SHIFT) {
+                        lla = (PY_LONG_LONG) (((((unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
+                        goto long_long;
+                    #endif
+                    }
+                    CYTHON_FALLTHROUGH;
+                case -3:
+                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
+                        a = -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
+                        break;
+                    #ifdef HAVE_LONG_LONG
+                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) {
+                        lla = -(PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
+                        goto long_long;
+                    #endif
+                    }
+                    CYTHON_FALLTHROUGH;
+                case 3:
+                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
+                        a = (long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
+                        break;
+                    #ifdef HAVE_LONG_LONG
+                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 3 * PyLong_SHIFT) {
+                        lla = (PY_LONG_LONG) (((((((unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
+                        goto long_long;
+                    #endif
+                    }
+                    CYTHON_FALLTHROUGH;
+                case -4:
+                    if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
+                        a = -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
+                        break;
+                    #ifdef HAVE_LONG_LONG
+                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) {
+                        lla = -(PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
+                        goto long_long;
+                    #endif
+                    }
+                    CYTHON_FALLTHROUGH;
+                case 4:
+                    if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
+                        a = (long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]));
+                        break;
+                    #ifdef HAVE_LONG_LONG
+                    } else if (8 * sizeof(PY_LONG_LONG) - 1 > 4 * PyLong_SHIFT) {
+                        lla = (PY_LONG_LONG) (((((((((unsigned PY_LONG_LONG)digits[3]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[2]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[1]) << PyLong_SHIFT) | (unsigned PY_LONG_LONG)digits[0]));
+                        goto long_long;
+                    #endif
+                    }
+                    CYTHON_FALLTHROUGH;
+                default: return PyLong_Type.tp_as_number->nb_add(op1, op2);
+            }
+        }
+                x = a + b;
+            return PyLong_FromLong(x);
+#ifdef HAVE_LONG_LONG
+        long_long:
+                llx = lla + llb;
+            return PyLong_FromLongLong(llx);
+#endif
+        
+        
+    }
+    #endif
+    if (PyFloat_CheckExact(op1)) {
+        const long b = intval;
+#if CYTHON_COMPILING_IN_LIMITED_API
+        double a = __pyx_PyFloat_AsDouble(op1);
+#else
+        double a = PyFloat_AS_DOUBLE(op1);
+#endif
+            double result;
+            
+            PyFPE_START_PROTECT("add", return NULL)
+            result = ((double)a) + (double)b;
+            PyFPE_END_PROTECT(result)
+            return PyFloat_FromDouble(result);
+    }
+    return (inplace ? PyNumber_InPlaceAdd : PyNumber_Add)(op1, op2);
+}
+#endif
+
+/* SliceObject */
+  static CYTHON_INLINE int __Pyx_PyObject_SetSlice(PyObject* obj, PyObject* value,
+        Py_ssize_t cstart, Py_ssize_t cstop,
+        PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice,
+        int has_cstart, int has_cstop, int wraparound) {
+    __Pyx_TypeName obj_type_name;
+#if CYTHON_USE_TYPE_SLOTS
+    PyMappingMethods* mp;
+#if PY_MAJOR_VERSION < 3
+    PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence;
+    if (likely(ms && ms->sq_ass_slice)) {
+        if (!has_cstart) {
+            if (_py_start && (*_py_start != Py_None)) {
+                cstart = __Pyx_PyIndex_AsSsize_t(*_py_start);
+                if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad;
+            } else
+                cstart = 0;
+        }
+        if (!has_cstop) {
+            if (_py_stop && (*_py_stop != Py_None)) {
+                cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop);
+                if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad;
+            } else
+                cstop = PY_SSIZE_T_MAX;
+        }
+        if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) {
+            Py_ssize_t l = ms->sq_length(obj);
+            if (likely(l >= 0)) {
+                if (cstop < 0) {
+                    cstop += l;
+                    if (cstop < 0) cstop = 0;
+                }
+                if (cstart < 0) {
+                    cstart += l;
+                    if (cstart < 0) cstart = 0;
+                }
+            } else {
+                if (!PyErr_ExceptionMatches(PyExc_OverflowError))
+                    goto bad;
+                PyErr_Clear();
+            }
+        }
+        return ms->sq_ass_slice(obj, cstart, cstop, value);
+    }
+#else
+    CYTHON_UNUSED_VAR(wraparound);
+#endif
+    mp = Py_TYPE(obj)->tp_as_mapping;
+    if (likely(mp && mp->mp_ass_subscript))
+#else
+    CYTHON_UNUSED_VAR(wraparound);
+#endif
+    {
+        int result;
+        PyObject *py_slice, *py_start, *py_stop;
+        if (_py_slice) {
+            py_slice = *_py_slice;
+        } else {
+            PyObject* owned_start = NULL;
+            PyObject* owned_stop = NULL;
+            if (_py_start) {
+                py_start = *_py_start;
+            } else {
+                if (has_cstart) {
+                    owned_start = py_start = PyInt_FromSsize_t(cstart);
+                    if (unlikely(!py_start)) goto bad;
+                } else
+                    py_start = Py_None;
+            }
+            if (_py_stop) {
+                py_stop = *_py_stop;
+            } else {
+                if (has_cstop) {
+                    owned_stop = py_stop = PyInt_FromSsize_t(cstop);
+                    if (unlikely(!py_stop)) {
+                        Py_XDECREF(owned_start);
+                        goto bad;
+                    }
+                } else
+                    py_stop = Py_None;
+            }
+            py_slice = PySlice_New(py_start, py_stop, Py_None);
+            Py_XDECREF(owned_start);
+            Py_XDECREF(owned_stop);
+            if (unlikely(!py_slice)) goto bad;
+        }
+#if CYTHON_USE_TYPE_SLOTS
+        result = mp->mp_ass_subscript(obj, py_slice, value);
+#else
+        result = value ? PyObject_SetItem(obj, py_slice, value) : PyObject_DelItem(obj, py_slice);
+#endif
+        if (!_py_slice) {
+            Py_DECREF(py_slice);
+        }
+        return result;
+    }
+    obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj));
+    PyErr_Format(PyExc_TypeError,
+        "'" __Pyx_FMT_TYPENAME "' object does not support slice %.10s",
+        obj_type_name, value ? "assignment" : "deletion");
+    __Pyx_DECREF_TypeName(obj_type_name);
+bad:
+    return -1;
+}
+
+/* SetItemInt */
+  static int __Pyx_SetItemInt_Generic(PyObject *o, PyObject *j, PyObject *v) {
+    int r;
+    if (unlikely(!j)) return -1;
+    r = PyObject_SetItem(o, j, v);
+    Py_DECREF(j);
+    return r;
+}
+static CYTHON_INLINE int __Pyx_SetItemInt_Fast(PyObject *o, Py_ssize_t i, PyObject *v, int is_list,
+                                               CYTHON_NCP_UNUSED int wraparound, CYTHON_NCP_UNUSED int boundscheck) {
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS && CYTHON_USE_TYPE_SLOTS
+    if (is_list || PyList_CheckExact(o)) {
+        Py_ssize_t n = (!wraparound) ? i : ((likely(i >= 0)) ? i : i + PyList_GET_SIZE(o));
+        if ((!boundscheck) || likely(__Pyx_is_valid_index(n, PyList_GET_SIZE(o)))) {
+            PyObject* old = PyList_GET_ITEM(o, n);
+            Py_INCREF(v);
+            PyList_SET_ITEM(o, n, v);
+            Py_DECREF(old);
+            return 1;
+        }
+    } else {
+        PyMappingMethods *mm = Py_TYPE(o)->tp_as_mapping;
+        PySequenceMethods *sm = Py_TYPE(o)->tp_as_sequence;
+        if (mm && mm->mp_ass_subscript) {
+            int r;
+            PyObject *key = PyInt_FromSsize_t(i);
+            if (unlikely(!key)) return -1;
+            r = mm->mp_ass_subscript(o, key, v);
+            Py_DECREF(key);
+            return r;
+        }
+        if (likely(sm && sm->sq_ass_item)) {
+            if (wraparound && unlikely(i < 0) && likely(sm->sq_length)) {
+                Py_ssize_t l = sm->sq_length(o);
+                if (likely(l >= 0)) {
+                    i += l;
+                } else {
+                    if (!PyErr_ExceptionMatches(PyExc_OverflowError))
+                        return -1;
+                    PyErr_Clear();
+                }
+            }
+            return sm->sq_ass_item(o, i, v);
+        }
+    }
+#else
+    if (is_list || !PyMapping_Check(o))
+    {
+        return PySequence_SetItem(o, i, v);
+    }
+#endif
+    return __Pyx_SetItemInt_Generic(o, PyInt_FromSsize_t(i), v);
+}
+
+/* TypeImport */
+  #ifndef __PYX_HAVE_RT_ImportType_3_0_11
+#define __PYX_HAVE_RT_ImportType_3_0_11
+static PyTypeObject *__Pyx_ImportType_3_0_11(PyObject *module, const char *module_name, const char *class_name,
+    size_t size, size_t alignment, enum __Pyx_ImportType_CheckSize_3_0_11 check_size)
+{
+    PyObject *result = 0;
+    char warning[200];
+    Py_ssize_t basicsize;
+    Py_ssize_t itemsize;
+#if CYTHON_COMPILING_IN_LIMITED_API
+    PyObject *py_basicsize;
+    PyObject *py_itemsize;
+#endif
+    result = PyObject_GetAttrString(module, class_name);
+    if (!result)
+        goto bad;
+    if (!PyType_Check(result)) {
+        PyErr_Format(PyExc_TypeError,
+            "%.200s.%.200s is not a type object",
+            module_name, class_name);
+        goto bad;
+    }
+#if !CYTHON_COMPILING_IN_LIMITED_API
+    basicsize = ((PyTypeObject *)result)->tp_basicsize;
+    itemsize = ((PyTypeObject *)result)->tp_itemsize;
+#else
+    py_basicsize = PyObject_GetAttrString(result, "__basicsize__");
+    if (!py_basicsize)
+        goto bad;
+    basicsize = PyLong_AsSsize_t(py_basicsize);
+    Py_DECREF(py_basicsize);
+    py_basicsize = 0;
+    if (basicsize == (Py_ssize_t)-1 && PyErr_Occurred())
+        goto bad;
+    py_itemsize = PyObject_GetAttrString(result, "__itemsize__");
+    if (!py_itemsize)
+        goto bad;
+    itemsize = PyLong_AsSsize_t(py_itemsize);
+    Py_DECREF(py_itemsize);
+    py_itemsize = 0;
+    if (itemsize == (Py_ssize_t)-1 && PyErr_Occurred())
+        goto bad;
+#endif
+    if (itemsize) {
+        if (size % alignment) {
+            alignment = size % alignment;
+        }
+        if (itemsize < (Py_ssize_t)alignment)
+            itemsize = (Py_ssize_t)alignment;
+    }
+    if ((size_t)(basicsize + itemsize) < size) {
+        PyErr_Format(PyExc_ValueError,
+            "%.200s.%.200s size changed, may indicate binary incompatibility. "
+            "Expected %zd from C header, got %zd from PyObject",
+            module_name, class_name, size, basicsize+itemsize);
+        goto bad;
+    }
+    if (check_size == __Pyx_ImportType_CheckSize_Error_3_0_11 &&
+            ((size_t)basicsize > size || (size_t)(basicsize + itemsize) < size)) {
+        PyErr_Format(PyExc_ValueError,
+            "%.200s.%.200s size changed, may indicate binary incompatibility. "
+            "Expected %zd from C header, got %zd-%zd from PyObject",
+            module_name, class_name, size, basicsize, basicsize+itemsize);
+        goto bad;
+    }
+    else if (check_size == __Pyx_ImportType_CheckSize_Warn_3_0_11 && (size_t)basicsize > size) {
+        PyOS_snprintf(warning, sizeof(warning),
+            "%s.%s size changed, may indicate binary incompatibility. "
+            "Expected %zd from C header, got %zd from PyObject",
+            module_name, class_name, size, basicsize);
+        if (PyErr_WarnEx(NULL, warning, 0) < 0) goto bad;
+    }
+    return (PyTypeObject *)result;
+bad:
+    Py_XDECREF(result);
+    return NULL;
+}
+#endif
+
+/* Import */
+  static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) {
+    PyObject *module = 0;
+    PyObject *empty_dict = 0;
+    PyObject *empty_list = 0;
+    #if PY_MAJOR_VERSION < 3
+    PyObject *py_import;
+    py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import);
+    if (unlikely(!py_import))
+        goto bad;
+    if (!from_list) {
+        empty_list = PyList_New(0);
+        if (unlikely(!empty_list))
+            goto bad;
+        from_list = empty_list;
+    }
+    #endif
+    empty_dict = PyDict_New();
+    if (unlikely(!empty_dict))
+        goto bad;
+    {
+        #if PY_MAJOR_VERSION >= 3
+        if (level == -1) {
+            if (strchr(__Pyx_MODULE_NAME, '.') != NULL) {
+                module = PyImport_ImportModuleLevelObject(
+                    name, __pyx_d, empty_dict, from_list, 1);
+                if (unlikely(!module)) {
+                    if (unlikely(!PyErr_ExceptionMatches(PyExc_ImportError)))
+                        goto bad;
+                    PyErr_Clear();
+                }
+            }
+            level = 0;
+        }
+        #endif
+        if (!module) {
+            #if PY_MAJOR_VERSION < 3
+            PyObject *py_level = PyInt_FromLong(level);
+            if (unlikely(!py_level))
+                goto bad;
+            module = PyObject_CallFunctionObjArgs(py_import,
+                name, __pyx_d, empty_dict, from_list, py_level, (PyObject *)NULL);
+            Py_DECREF(py_level);
+            #else
+            module = PyImport_ImportModuleLevelObject(
+                name, __pyx_d, empty_dict, from_list, level);
+            #endif
+        }
+    }
+bad:
+    Py_XDECREF(empty_dict);
+    Py_XDECREF(empty_list);
+    #if PY_MAJOR_VERSION < 3
+    Py_XDECREF(py_import);
+    #endif
+    return module;
+}
+
+/* ImportDottedModule */
+  #if PY_MAJOR_VERSION >= 3
+static PyObject *__Pyx__ImportDottedModule_Error(PyObject *name, PyObject *parts_tuple, Py_ssize_t count) {
+    PyObject *partial_name = NULL, *slice = NULL, *sep = NULL;
+    if (unlikely(PyErr_Occurred())) {
+        PyErr_Clear();
+    }
+    if (likely(PyTuple_GET_SIZE(parts_tuple) == count)) {
+        partial_name = name;
+    } else {
+        slice = PySequence_GetSlice(parts_tuple, 0, count);
+        if (unlikely(!slice))
+            goto bad;
+        sep = PyUnicode_FromStringAndSize(".", 1);
+        if (unlikely(!sep))
+            goto bad;
+        partial_name = PyUnicode_Join(sep, slice);
+    }
+    PyErr_Format(
+#if PY_MAJOR_VERSION < 3
+        PyExc_ImportError,
+        "No module named '%s'", PyString_AS_STRING(partial_name));
+#else
+#if PY_VERSION_HEX >= 0x030600B1
+        PyExc_ModuleNotFoundError,
+#else
+        PyExc_ImportError,
+#endif
+        "No module named '%U'", partial_name);
+#endif
+bad:
+    Py_XDECREF(sep);
+    Py_XDECREF(slice);
+    Py_XDECREF(partial_name);
+    return NULL;
+}
+#endif
+#if PY_MAJOR_VERSION >= 3
+static PyObject *__Pyx__ImportDottedModule_Lookup(PyObject *name) {
+    PyObject *imported_module;
+#if PY_VERSION_HEX < 0x030700A1 || (CYTHON_COMPILING_IN_PYPY && PYPY_VERSION_NUM  < 0x07030400)
+    PyObject *modules = PyImport_GetModuleDict();
+    if (unlikely(!modules))
+        return NULL;
+    imported_module = __Pyx_PyDict_GetItemStr(modules, name);
+    Py_XINCREF(imported_module);
+#else
+    imported_module = PyImport_GetModule(name);
+#endif
+    return imported_module;
+}
+#endif
+#if PY_MAJOR_VERSION >= 3
+static PyObject *__Pyx_ImportDottedModule_WalkParts(PyObject *module, PyObject *name, PyObject *parts_tuple) {
+    Py_ssize_t i, nparts;
+    nparts = PyTuple_GET_SIZE(parts_tuple);
+    for (i=1; i < nparts && module; i++) {
+        PyObject *part, *submodule;
+#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+        part = PyTuple_GET_ITEM(parts_tuple, i);
+#else
+        part = PySequence_ITEM(parts_tuple, i);
+#endif
+        submodule = __Pyx_PyObject_GetAttrStrNoError(module, part);
+#if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS)
+        Py_DECREF(part);
+#endif
+        Py_DECREF(module);
+        module = submodule;
+    }
+    if (unlikely(!module)) {
+        return __Pyx__ImportDottedModule_Error(name, parts_tuple, i);
+    }
+    return module;
+}
+#endif
+static PyObject *__Pyx__ImportDottedModule(PyObject *name, PyObject *parts_tuple) {
+#if PY_MAJOR_VERSION < 3
+    PyObject *module, *from_list, *star = __pyx_n_s__6;
+    CYTHON_UNUSED_VAR(parts_tuple);
+    from_list = PyList_New(1);
+    if (unlikely(!from_list))
+        return NULL;
+    Py_INCREF(star);
+    PyList_SET_ITEM(from_list, 0, star);
+    module = __Pyx_Import(name, from_list, 0);
+    Py_DECREF(from_list);
+    return module;
+#else
+    PyObject *imported_module;
+    PyObject *module = __Pyx_Import(name, NULL, 0);
+    if (!parts_tuple || unlikely(!module))
+        return module;
+    imported_module = __Pyx__ImportDottedModule_Lookup(name);
+    if (likely(imported_module)) {
+        Py_DECREF(module);
+        return imported_module;
+    }
+    PyErr_Clear();
+    return __Pyx_ImportDottedModule_WalkParts(module, name, parts_tuple);
+#endif
+}
+static PyObject *__Pyx_ImportDottedModule(PyObject *name, PyObject *parts_tuple) {
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x030400B1
+    PyObject *module = __Pyx__ImportDottedModule_Lookup(name);
+    if (likely(module)) {
+        PyObject *spec = __Pyx_PyObject_GetAttrStrNoError(module, __pyx_n_s_spec);
+        if (likely(spec)) {
+            PyObject *unsafe = __Pyx_PyObject_GetAttrStrNoError(spec, __pyx_n_s_initializing);
+            if (likely(!unsafe || !__Pyx_PyObject_IsTrue(unsafe))) {
+                Py_DECREF(spec);
+                spec = NULL;
+            }
+            Py_XDECREF(unsafe);
+        }
+        if (likely(!spec)) {
+            PyErr_Clear();
+            return module;
+        }
+        Py_DECREF(spec);
+        Py_DECREF(module);
+    } else if (PyErr_Occurred()) {
+        PyErr_Clear();
+    }
+#endif
+    return __Pyx__ImportDottedModule(name, parts_tuple);
+}
+
+/* FixUpExtensionType */
+  #if CYTHON_USE_TYPE_SPECS
+static int __Pyx_fix_up_extension_type_from_spec(PyType_Spec *spec, PyTypeObject *type) {
+#if PY_VERSION_HEX > 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API
+    CYTHON_UNUSED_VAR(spec);
+    CYTHON_UNUSED_VAR(type);
+#else
+    const PyType_Slot *slot = spec->slots;
+    while (slot && slot->slot && slot->slot != Py_tp_members)
+        slot++;
+    if (slot && slot->slot == Py_tp_members) {
+        int changed = 0;
+#if !(PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON)
+        const
+#endif
+            PyMemberDef *memb = (PyMemberDef*) slot->pfunc;
+        while (memb && memb->name) {
+            if (memb->name[0] == '_' && memb->name[1] == '_') {
+#if PY_VERSION_HEX < 0x030900b1
+                if (strcmp(memb->name, "__weaklistoffset__") == 0) {
+                    assert(memb->type == T_PYSSIZET);
+                    assert(memb->flags == READONLY);
+                    type->tp_weaklistoffset = memb->offset;
+                    changed = 1;
+                }
+                else if (strcmp(memb->name, "__dictoffset__") == 0) {
+                    assert(memb->type == T_PYSSIZET);
+                    assert(memb->flags == READONLY);
+                    type->tp_dictoffset = memb->offset;
+                    changed = 1;
+                }
+#if CYTHON_METH_FASTCALL
+                else if (strcmp(memb->name, "__vectorcalloffset__") == 0) {
+                    assert(memb->type == T_PYSSIZET);
+                    assert(memb->flags == READONLY);
+#if PY_VERSION_HEX >= 0x030800b4
+                    type->tp_vectorcall_offset = memb->offset;
+#else
+                    type->tp_print = (printfunc) memb->offset;
+#endif
+                    changed = 1;
+                }
+#endif
+#else
+                if ((0));
+#endif
+#if PY_VERSION_HEX <= 0x030900b1 && CYTHON_COMPILING_IN_CPYTHON
+                else if (strcmp(memb->name, "__module__") == 0) {
+                    PyObject *descr;
+                    assert(memb->type == T_OBJECT);
+                    assert(memb->flags == 0 || memb->flags == READONLY);
+                    descr = PyDescr_NewMember(type, memb);
+                    if (unlikely(!descr))
+                        return -1;
+                    if (unlikely(PyDict_SetItem(type->tp_dict, PyDescr_NAME(descr), descr) < 0)) {
+                        Py_DECREF(descr);
+                        return -1;
+                    }
+                    Py_DECREF(descr);
+                    changed = 1;
+                }
+#endif
+            }
+            memb++;
+        }
+        if (changed)
+            PyType_Modified(type);
+    }
+#endif
+    return 0;
+}
+#endif
+
+/* FetchSharedCythonModule */
+  static PyObject *__Pyx_FetchSharedCythonABIModule(void) {
+    return __Pyx_PyImport_AddModuleRef((char*) __PYX_ABI_MODULE_NAME);
+}
+
+/* FetchCommonType */
+  static int __Pyx_VerifyCachedType(PyObject *cached_type,
+                               const char *name,
+                               Py_ssize_t basicsize,
+                               Py_ssize_t expected_basicsize) {
+    if (!PyType_Check(cached_type)) {
+        PyErr_Format(PyExc_TypeError,
+            "Shared Cython type %.200s is not a type object", name);
+        return -1;
+    }
+    if (basicsize != expected_basicsize) {
+        PyErr_Format(PyExc_TypeError,
+            "Shared Cython type %.200s has the wrong size, try recompiling",
+            name);
+        return -1;
+    }
+    return 0;
+}
+#if !CYTHON_USE_TYPE_SPECS
+static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type) {
+    PyObject* abi_module;
+    const char* object_name;
+    PyTypeObject *cached_type = NULL;
+    abi_module = __Pyx_FetchSharedCythonABIModule();
+    if (!abi_module) return NULL;
+    object_name = strrchr(type->tp_name, '.');
+    object_name = object_name ? object_name+1 : type->tp_name;
+    cached_type = (PyTypeObject*) PyObject_GetAttrString(abi_module, object_name);
+    if (cached_type) {
+        if (__Pyx_VerifyCachedType(
+              (PyObject *)cached_type,
+              object_name,
+              cached_type->tp_basicsize,
+              type->tp_basicsize) < 0) {
+            goto bad;
+        }
+        goto done;
+    }
+    if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad;
+    PyErr_Clear();
+    if (PyType_Ready(type) < 0) goto bad;
+    if (PyObject_SetAttrString(abi_module, object_name, (PyObject *)type) < 0)
+        goto bad;
+    Py_INCREF(type);
+    cached_type = type;
+done:
+    Py_DECREF(abi_module);
+    return cached_type;
+bad:
+    Py_XDECREF(cached_type);
+    cached_type = NULL;
+    goto done;
+}
+#else
+static PyTypeObject *__Pyx_FetchCommonTypeFromSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) {
+    PyObject *abi_module, *cached_type = NULL;
+    const char* object_name = strrchr(spec->name, '.');
+    object_name = object_name ? object_name+1 : spec->name;
+    abi_module = __Pyx_FetchSharedCythonABIModule();
+    if (!abi_module) return NULL;
+    cached_type = PyObject_GetAttrString(abi_module, object_name);
+    if (cached_type) {
+        Py_ssize_t basicsize;
+#if CYTHON_COMPILING_IN_LIMITED_API
+        PyObject *py_basicsize;
+        py_basicsize = PyObject_GetAttrString(cached_type, "__basicsize__");
+        if (unlikely(!py_basicsize)) goto bad;
+        basicsize = PyLong_AsSsize_t(py_basicsize);
+        Py_DECREF(py_basicsize);
+        py_basicsize = 0;
+        if (unlikely(basicsize == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad;
+#else
+        basicsize = likely(PyType_Check(cached_type)) ? ((PyTypeObject*) cached_type)->tp_basicsize : -1;
+#endif
+        if (__Pyx_VerifyCachedType(
+              cached_type,
+              object_name,
+              basicsize,
+              spec->basicsize) < 0) {
+            goto bad;
+        }
+        goto done;
+    }
+    if (!PyErr_ExceptionMatches(PyExc_AttributeError)) goto bad;
+    PyErr_Clear();
+    CYTHON_UNUSED_VAR(module);
+    cached_type = __Pyx_PyType_FromModuleAndSpec(abi_module, spec, bases);
+    if (unlikely(!cached_type)) goto bad;
+    if (unlikely(__Pyx_fix_up_extension_type_from_spec(spec, (PyTypeObject *) cached_type) < 0)) goto bad;
+    if (PyObject_SetAttrString(abi_module, object_name, cached_type) < 0) goto bad;
+done:
+    Py_DECREF(abi_module);
+    assert(cached_type == NULL || PyType_Check(cached_type));
+    return (PyTypeObject *) cached_type;
+bad:
+    Py_XDECREF(cached_type);
+    cached_type = NULL;
+    goto done;
+}
+#endif
+
+/* PyVectorcallFastCallDict */
+  #if CYTHON_METH_FASTCALL
+static PyObject *__Pyx_PyVectorcall_FastCallDict_kw(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw)
+{
+    PyObject *res = NULL;
+    PyObject *kwnames;
+    PyObject **newargs;
+    PyObject **kwvalues;
+    Py_ssize_t i, pos;
+    size_t j;
+    PyObject *key, *value;
+    unsigned long keys_are_strings;
+    Py_ssize_t nkw = PyDict_GET_SIZE(kw);
+    newargs = (PyObject **)PyMem_Malloc((nargs + (size_t)nkw) * sizeof(args[0]));
+    if (unlikely(newargs == NULL)) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    for (j = 0; j < nargs; j++) newargs[j] = args[j];
+    kwnames = PyTuple_New(nkw);
+    if (unlikely(kwnames == NULL)) {
+        PyMem_Free(newargs);
+        return NULL;
+    }
+    kwvalues = newargs + nargs;
+    pos = i = 0;
+    keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS;
+    while (PyDict_Next(kw, &pos, &key, &value)) {
+        keys_are_strings &= Py_TYPE(key)->tp_flags;
+        Py_INCREF(key);
+        Py_INCREF(value);
+        PyTuple_SET_ITEM(kwnames, i, key);
+        kwvalues[i] = value;
+        i++;
+    }
+    if (unlikely(!keys_are_strings)) {
+        PyErr_SetString(PyExc_TypeError, "keywords must be strings");
+        goto cleanup;
+    }
+    res = vc(func, newargs, nargs, kwnames);
+cleanup:
+    Py_DECREF(kwnames);
+    for (i = 0; i < nkw; i++)
+        Py_DECREF(kwvalues[i]);
+    PyMem_Free(newargs);
+    return res;
+}
+static CYTHON_INLINE PyObject *__Pyx_PyVectorcall_FastCallDict(PyObject *func, __pyx_vectorcallfunc vc, PyObject *const *args, size_t nargs, PyObject *kw)
+{
+    if (likely(kw == NULL) || PyDict_GET_SIZE(kw) == 0) {
+        return vc(func, args, nargs, NULL);
+    }
+    return __Pyx_PyVectorcall_FastCallDict_kw(func, vc, args, nargs, kw);
+}
+#endif
+
+/* CythonFunctionShared */
+  #if CYTHON_COMPILING_IN_LIMITED_API
+static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) {
+    if (__Pyx_CyFunction_Check(func)) {
+        return PyCFunction_GetFunction(((__pyx_CyFunctionObject*)func)->func) == (PyCFunction) cfunc;
+    } else if (PyCFunction_Check(func)) {
+        return PyCFunction_GetFunction(func) == (PyCFunction) cfunc;
+    }
+    return 0;
+}
+#else
+static CYTHON_INLINE int __Pyx__IsSameCyOrCFunction(PyObject *func, void *cfunc) {
+    return __Pyx_CyOrPyCFunction_Check(func) && __Pyx_CyOrPyCFunction_GET_FUNCTION(func) == (PyCFunction) cfunc;
+}
+#endif
+static CYTHON_INLINE void __Pyx__CyFunction_SetClassObj(__pyx_CyFunctionObject* f, PyObject* classobj) {
+#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API
+    __Pyx_Py_XDECREF_SET(
+        __Pyx_CyFunction_GetClassObj(f),
+            ((classobj) ? __Pyx_NewRef(classobj) : NULL));
+#else
+    __Pyx_Py_XDECREF_SET(
+        ((PyCMethodObject *) (f))->mm_class,
+        (PyTypeObject*)((classobj) ? __Pyx_NewRef(classobj) : NULL));
+#endif
+}
+static PyObject *
+__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, void *closure)
+{
+    CYTHON_UNUSED_VAR(closure);
+    if (unlikely(op->func_doc == NULL)) {
+#if CYTHON_COMPILING_IN_LIMITED_API
+        op->func_doc = PyObject_GetAttrString(op->func, "__doc__");
+        if (unlikely(!op->func_doc)) return NULL;
+#else
+        if (((PyCFunctionObject*)op)->m_ml->ml_doc) {
+#if PY_MAJOR_VERSION >= 3
+            op->func_doc = PyUnicode_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc);
+#else
+            op->func_doc = PyString_FromString(((PyCFunctionObject*)op)->m_ml->ml_doc);
+#endif
+            if (unlikely(op->func_doc == NULL))
+                return NULL;
+        } else {
+            Py_INCREF(Py_None);
+            return Py_None;
+        }
+#endif
+    }
+    Py_INCREF(op->func_doc);
+    return op->func_doc;
+}
+static int
+__Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value, void *context)
+{
+    CYTHON_UNUSED_VAR(context);
+    if (value == NULL) {
+        value = Py_None;
+    }
+    Py_INCREF(value);
+    __Pyx_Py_XDECREF_SET(op->func_doc, value);
+    return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op, void *context)
+{
+    CYTHON_UNUSED_VAR(context);
+    if (unlikely(op->func_name == NULL)) {
+#if CYTHON_COMPILING_IN_LIMITED_API
+        op->func_name = PyObject_GetAttrString(op->func, "__name__");
+#elif PY_MAJOR_VERSION >= 3
+        op->func_name = PyUnicode_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name);
+#else
+        op->func_name = PyString_InternFromString(((PyCFunctionObject*)op)->m_ml->ml_name);
+#endif
+        if (unlikely(op->func_name == NULL))
+            return NULL;
+    }
+    Py_INCREF(op->func_name);
+    return op->func_name;
+}
+static int
+__Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value, void *context)
+{
+    CYTHON_UNUSED_VAR(context);
+#if PY_MAJOR_VERSION >= 3
+    if (unlikely(value == NULL || !PyUnicode_Check(value)))
+#else
+    if (unlikely(value == NULL || !PyString_Check(value)))
+#endif
+    {
+        PyErr_SetString(PyExc_TypeError,
+                        "__name__ must be set to a string object");
+        return -1;
+    }
+    Py_INCREF(value);
+    __Pyx_Py_XDECREF_SET(op->func_name, value);
+    return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_qualname(__pyx_CyFunctionObject *op, void *context)
+{
+    CYTHON_UNUSED_VAR(context);
+    Py_INCREF(op->func_qualname);
+    return op->func_qualname;
+}
+static int
+__Pyx_CyFunction_set_qualname(__pyx_CyFunctionObject *op, PyObject *value, void *context)
+{
+    CYTHON_UNUSED_VAR(context);
+#if PY_MAJOR_VERSION >= 3
+    if (unlikely(value == NULL || !PyUnicode_Check(value)))
+#else
+    if (unlikely(value == NULL || !PyString_Check(value)))
+#endif
+    {
+        PyErr_SetString(PyExc_TypeError,
+                        "__qualname__ must be set to a string object");
+        return -1;
+    }
+    Py_INCREF(value);
+    __Pyx_Py_XDECREF_SET(op->func_qualname, value);
+    return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op, void *context)
+{
+    CYTHON_UNUSED_VAR(context);
+    if (unlikely(op->func_dict == NULL)) {
+        op->func_dict = PyDict_New();
+        if (unlikely(op->func_dict == NULL))
+            return NULL;
+    }
+    Py_INCREF(op->func_dict);
+    return op->func_dict;
+}
+static int
+__Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value, void *context)
+{
+    CYTHON_UNUSED_VAR(context);
+    if (unlikely(value == NULL)) {
+        PyErr_SetString(PyExc_TypeError,
+               "function's dictionary may not be deleted");
+        return -1;
+    }
+    if (unlikely(!PyDict_Check(value))) {
+        PyErr_SetString(PyExc_TypeError,
+               "setting function's dictionary to a non-dict");
+        return -1;
+    }
+    Py_INCREF(value);
+    __Pyx_Py_XDECREF_SET(op->func_dict, value);
+    return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_globals(__pyx_CyFunctionObject *op, void *context)
+{
+    CYTHON_UNUSED_VAR(context);
+    Py_INCREF(op->func_globals);
+    return op->func_globals;
+}
+static PyObject *
+__Pyx_CyFunction_get_closure(__pyx_CyFunctionObject *op, void *context)
+{
+    CYTHON_UNUSED_VAR(op);
+    CYTHON_UNUSED_VAR(context);
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+static PyObject *
+__Pyx_CyFunction_get_code(__pyx_CyFunctionObject *op, void *context)
+{
+    PyObject* result = (op->func_code) ? op->func_code : Py_None;
+    CYTHON_UNUSED_VAR(context);
+    Py_INCREF(result);
+    return result;
+}
+static int
+__Pyx_CyFunction_init_defaults(__pyx_CyFunctionObject *op) {
+    int result = 0;
+    PyObject *res = op->defaults_getter((PyObject *) op);
+    if (unlikely(!res))
+        return -1;
+    #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+    op->defaults_tuple = PyTuple_GET_ITEM(res, 0);
+    Py_INCREF(op->defaults_tuple);
+    op->defaults_kwdict = PyTuple_GET_ITEM(res, 1);
+    Py_INCREF(op->defaults_kwdict);
+    #else
+    op->defaults_tuple = __Pyx_PySequence_ITEM(res, 0);
+    if (unlikely(!op->defaults_tuple)) result = -1;
+    else {
+        op->defaults_kwdict = __Pyx_PySequence_ITEM(res, 1);
+        if (unlikely(!op->defaults_kwdict)) result = -1;
+    }
+    #endif
+    Py_DECREF(res);
+    return result;
+}
+static int
+__Pyx_CyFunction_set_defaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) {
+    CYTHON_UNUSED_VAR(context);
+    if (!value) {
+        value = Py_None;
+    } else if (unlikely(value != Py_None && !PyTuple_Check(value))) {
+        PyErr_SetString(PyExc_TypeError,
+                        "__defaults__ must be set to a tuple object");
+        return -1;
+    }
+    PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__defaults__ will not "
+                 "currently affect the values used in function calls", 1);
+    Py_INCREF(value);
+    __Pyx_Py_XDECREF_SET(op->defaults_tuple, value);
+    return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op, void *context) {
+    PyObject* result = op->defaults_tuple;
+    CYTHON_UNUSED_VAR(context);
+    if (unlikely(!result)) {
+        if (op->defaults_getter) {
+            if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL;
+            result = op->defaults_tuple;
+        } else {
+            result = Py_None;
+        }
+    }
+    Py_INCREF(result);
+    return result;
+}
+static int
+__Pyx_CyFunction_set_kwdefaults(__pyx_CyFunctionObject *op, PyObject* value, void *context) {
+    CYTHON_UNUSED_VAR(context);
+    if (!value) {
+        value = Py_None;
+    } else if (unlikely(value != Py_None && !PyDict_Check(value))) {
+        PyErr_SetString(PyExc_TypeError,
+                        "__kwdefaults__ must be set to a dict object");
+        return -1;
+    }
+    PyErr_WarnEx(PyExc_RuntimeWarning, "changes to cyfunction.__kwdefaults__ will not "
+                 "currently affect the values used in function calls", 1);
+    Py_INCREF(value);
+    __Pyx_Py_XDECREF_SET(op->defaults_kwdict, value);
+    return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_kwdefaults(__pyx_CyFunctionObject *op, void *context) {
+    PyObject* result = op->defaults_kwdict;
+    CYTHON_UNUSED_VAR(context);
+    if (unlikely(!result)) {
+        if (op->defaults_getter) {
+            if (unlikely(__Pyx_CyFunction_init_defaults(op) < 0)) return NULL;
+            result = op->defaults_kwdict;
+        } else {
+            result = Py_None;
+        }
+    }
+    Py_INCREF(result);
+    return result;
+}
+static int
+__Pyx_CyFunction_set_annotations(__pyx_CyFunctionObject *op, PyObject* value, void *context) {
+    CYTHON_UNUSED_VAR(context);
+    if (!value || value == Py_None) {
+        value = NULL;
+    } else if (unlikely(!PyDict_Check(value))) {
+        PyErr_SetString(PyExc_TypeError,
+                        "__annotations__ must be set to a dict object");
+        return -1;
+    }
+    Py_XINCREF(value);
+    __Pyx_Py_XDECREF_SET(op->func_annotations, value);
+    return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_annotations(__pyx_CyFunctionObject *op, void *context) {
+    PyObject* result = op->func_annotations;
+    CYTHON_UNUSED_VAR(context);
+    if (unlikely(!result)) {
+        result = PyDict_New();
+        if (unlikely(!result)) return NULL;
+        op->func_annotations = result;
+    }
+    Py_INCREF(result);
+    return result;
+}
+static PyObject *
+__Pyx_CyFunction_get_is_coroutine(__pyx_CyFunctionObject *op, void *context) {
+    int is_coroutine;
+    CYTHON_UNUSED_VAR(context);
+    if (op->func_is_coroutine) {
+        return __Pyx_NewRef(op->func_is_coroutine);
+    }
+    is_coroutine = op->flags & __Pyx_CYFUNCTION_COROUTINE;
+#if PY_VERSION_HEX >= 0x03050000
+    if (is_coroutine) {
+        PyObject *module, *fromlist, *marker = __pyx_n_s_is_coroutine;
+        fromlist = PyList_New(1);
+        if (unlikely(!fromlist)) return NULL;
+        Py_INCREF(marker);
+#if CYTHON_ASSUME_SAFE_MACROS
+        PyList_SET_ITEM(fromlist, 0, marker);
+#else
+        if (unlikely(PyList_SetItem(fromlist, 0, marker) < 0)) {
+            Py_DECREF(marker);
+            Py_DECREF(fromlist);
+            return NULL;
+        }
+#endif
+        module = PyImport_ImportModuleLevelObject(__pyx_n_s_asyncio_coroutines, NULL, NULL, fromlist, 0);
+        Py_DECREF(fromlist);
+        if (unlikely(!module)) goto ignore;
+        op->func_is_coroutine = __Pyx_PyObject_GetAttrStr(module, marker);
+        Py_DECREF(module);
+        if (likely(op->func_is_coroutine)) {
+            return __Pyx_NewRef(op->func_is_coroutine);
+        }
+ignore:
+        PyErr_Clear();
+    }
+#endif
+    op->func_is_coroutine = __Pyx_PyBool_FromLong(is_coroutine);
+    return __Pyx_NewRef(op->func_is_coroutine);
+}
+#if CYTHON_COMPILING_IN_LIMITED_API
+static PyObject *
+__Pyx_CyFunction_get_module(__pyx_CyFunctionObject *op, void *context) {
+    CYTHON_UNUSED_VAR(context);
+    return PyObject_GetAttrString(op->func, "__module__");
+}
+static int
+__Pyx_CyFunction_set_module(__pyx_CyFunctionObject *op, PyObject* value, void *context) {
+    CYTHON_UNUSED_VAR(context);
+    return PyObject_SetAttrString(op->func, "__module__", value);
+}
+#endif
+static PyGetSetDef __pyx_CyFunction_getsets[] = {
+    {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0},
+    {(char *) "__doc__",  (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0},
+    {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0},
+    {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0},
+    {(char *) "__qualname__", (getter)__Pyx_CyFunction_get_qualname, (setter)__Pyx_CyFunction_set_qualname, 0, 0},
+    {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0},
+    {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0},
+    {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0},
+    {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0},
+    {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0},
+    {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0},
+    {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0},
+    {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0},
+    {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0},
+    {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, (setter)__Pyx_CyFunction_set_defaults, 0, 0},
+    {(char *) "__kwdefaults__", (getter)__Pyx_CyFunction_get_kwdefaults, (setter)__Pyx_CyFunction_set_kwdefaults, 0, 0},
+    {(char *) "__annotations__", (getter)__Pyx_CyFunction_get_annotations, (setter)__Pyx_CyFunction_set_annotations, 0, 0},
+    {(char *) "_is_coroutine", (getter)__Pyx_CyFunction_get_is_coroutine, 0, 0, 0},
+#if CYTHON_COMPILING_IN_LIMITED_API
+    {"__module__", (getter)__Pyx_CyFunction_get_module, (setter)__Pyx_CyFunction_set_module, 0, 0},
+#endif
+    {0, 0, 0, 0, 0}
+};
+static PyMemberDef __pyx_CyFunction_members[] = {
+#if !CYTHON_COMPILING_IN_LIMITED_API
+    {(char *) "__module__", T_OBJECT, offsetof(PyCFunctionObject, m_module), 0, 0},
+#endif
+#if CYTHON_USE_TYPE_SPECS
+    {(char *) "__dictoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_dict), READONLY, 0},
+#if CYTHON_METH_FASTCALL
+#if CYTHON_BACKPORT_VECTORCALL
+    {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_vectorcall), READONLY, 0},
+#else
+#if !CYTHON_COMPILING_IN_LIMITED_API
+    {(char *) "__vectorcalloffset__", T_PYSSIZET, offsetof(PyCFunctionObject, vectorcall), READONLY, 0},
+#endif
+#endif
+#endif
+#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API
+    {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(__pyx_CyFunctionObject, func_weakreflist), READONLY, 0},
+#else
+    {(char *) "__weaklistoffset__", T_PYSSIZET, offsetof(PyCFunctionObject, m_weakreflist), READONLY, 0},
+#endif
+#endif
+    {0, 0, 0,  0, 0}
+};
+static PyObject *
+__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, PyObject *args)
+{
+    CYTHON_UNUSED_VAR(args);
+#if PY_MAJOR_VERSION >= 3
+    Py_INCREF(m->func_qualname);
+    return m->func_qualname;
+#else
+    return PyString_FromString(((PyCFunctionObject*)m)->m_ml->ml_name);
+#endif
+}
+static PyMethodDef __pyx_CyFunction_methods[] = {
+    {"__reduce__", (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0},
+    {0, 0, 0, 0}
+};
+#if PY_VERSION_HEX < 0x030500A0 || CYTHON_COMPILING_IN_LIMITED_API
+#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func_weakreflist)
+#else
+#define __Pyx_CyFunction_weakreflist(cyfunc) (((PyCFunctionObject*)cyfunc)->m_weakreflist)
+#endif
+static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname,
+                                       PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
+#if !CYTHON_COMPILING_IN_LIMITED_API
+    PyCFunctionObject *cf = (PyCFunctionObject*) op;
+#endif
+    if (unlikely(op == NULL))
+        return NULL;
+#if CYTHON_COMPILING_IN_LIMITED_API
+    op->func = PyCFunction_NewEx(ml, (PyObject*)op, module);
+    if (unlikely(!op->func)) return NULL;
+#endif
+    op->flags = flags;
+    __Pyx_CyFunction_weakreflist(op) = NULL;
+#if !CYTHON_COMPILING_IN_LIMITED_API
+    cf->m_ml = ml;
+    cf->m_self = (PyObject *) op;
+#endif
+    Py_XINCREF(closure);
+    op->func_closure = closure;
+#if !CYTHON_COMPILING_IN_LIMITED_API
+    Py_XINCREF(module);
+    cf->m_module = module;
+#endif
+    op->func_dict = NULL;
+    op->func_name = NULL;
+    Py_INCREF(qualname);
+    op->func_qualname = qualname;
+    op->func_doc = NULL;
+#if PY_VERSION_HEX < 0x030900B1 || CYTHON_COMPILING_IN_LIMITED_API
+    op->func_classobj = NULL;
+#else
+    ((PyCMethodObject*)op)->mm_class = NULL;
+#endif
+    op->func_globals = globals;
+    Py_INCREF(op->func_globals);
+    Py_XINCREF(code);
+    op->func_code = code;
+    op->defaults_pyobjects = 0;
+    op->defaults_size = 0;
+    op->defaults = NULL;
+    op->defaults_tuple = NULL;
+    op->defaults_kwdict = NULL;
+    op->defaults_getter = NULL;
+    op->func_annotations = NULL;
+    op->func_is_coroutine = NULL;
+#if CYTHON_METH_FASTCALL
+    switch (ml->ml_flags & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) {
+    case METH_NOARGS:
+        __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_NOARGS;
+        break;
+    case METH_O:
+        __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_O;
+        break;
+    case METH_METHOD | METH_FASTCALL | METH_KEYWORDS:
+        __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD;
+        break;
+    case METH_FASTCALL | METH_KEYWORDS:
+        __Pyx_CyFunction_func_vectorcall(op) = __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS;
+        break;
+    case METH_VARARGS | METH_KEYWORDS:
+        __Pyx_CyFunction_func_vectorcall(op) = NULL;
+        break;
+    default:
+        PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction");
+        Py_DECREF(op);
+        return NULL;
+    }
+#endif
+    return (PyObject *) op;
+}
+static int
+__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m)
+{
+    Py_CLEAR(m->func_closure);
+#if CYTHON_COMPILING_IN_LIMITED_API
+    Py_CLEAR(m->func);
+#else
+    Py_CLEAR(((PyCFunctionObject*)m)->m_module);
+#endif
+    Py_CLEAR(m->func_dict);
+    Py_CLEAR(m->func_name);
+    Py_CLEAR(m->func_qualname);
+    Py_CLEAR(m->func_doc);
+    Py_CLEAR(m->func_globals);
+    Py_CLEAR(m->func_code);
+#if !CYTHON_COMPILING_IN_LIMITED_API
+#if PY_VERSION_HEX < 0x030900B1
+    Py_CLEAR(__Pyx_CyFunction_GetClassObj(m));
+#else
+    {
+        PyObject *cls = (PyObject*) ((PyCMethodObject *) (m))->mm_class;
+        ((PyCMethodObject *) (m))->mm_class = NULL;
+        Py_XDECREF(cls);
+    }
+#endif
+#endif
+    Py_CLEAR(m->defaults_tuple);
+    Py_CLEAR(m->defaults_kwdict);
+    Py_CLEAR(m->func_annotations);
+    Py_CLEAR(m->func_is_coroutine);
+    if (m->defaults) {
+        PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m);
+        int i;
+        for (i = 0; i < m->defaults_pyobjects; i++)
+            Py_XDECREF(pydefaults[i]);
+        PyObject_Free(m->defaults);
+        m->defaults = NULL;
+    }
+    return 0;
+}
+static void __Pyx__CyFunction_dealloc(__pyx_CyFunctionObject *m)
+{
+    if (__Pyx_CyFunction_weakreflist(m) != NULL)
+        PyObject_ClearWeakRefs((PyObject *) m);
+    __Pyx_CyFunction_clear(m);
+    __Pyx_PyHeapTypeObject_GC_Del(m);
+}
+static void __Pyx_CyFunction_dealloc(__pyx_CyFunctionObject *m)
+{
+    PyObject_GC_UnTrack(m);
+    __Pyx__CyFunction_dealloc(m);
+}
+static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg)
+{
+    Py_VISIT(m->func_closure);
+#if CYTHON_COMPILING_IN_LIMITED_API
+    Py_VISIT(m->func);
+#else
+    Py_VISIT(((PyCFunctionObject*)m)->m_module);
+#endif
+    Py_VISIT(m->func_dict);
+    Py_VISIT(m->func_name);
+    Py_VISIT(m->func_qualname);
+    Py_VISIT(m->func_doc);
+    Py_VISIT(m->func_globals);
+    Py_VISIT(m->func_code);
+#if !CYTHON_COMPILING_IN_LIMITED_API
+    Py_VISIT(__Pyx_CyFunction_GetClassObj(m));
+#endif
+    Py_VISIT(m->defaults_tuple);
+    Py_VISIT(m->defaults_kwdict);
+    Py_VISIT(m->func_is_coroutine);
+    if (m->defaults) {
+        PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m);
+        int i;
+        for (i = 0; i < m->defaults_pyobjects; i++)
+            Py_VISIT(pydefaults[i]);
+    }
+    return 0;
+}
+static PyObject*
+__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op)
+{
+#if PY_MAJOR_VERSION >= 3
+    return PyUnicode_FromFormat("<cyfunction %U at %p>",
+                                op->func_qualname, (void *)op);
+#else
+    return PyString_FromFormat("<cyfunction %s at %p>",
+                               PyString_AsString(op->func_qualname), (void *)op);
+#endif
+}
+static PyObject * __Pyx_CyFunction_CallMethod(PyObject *func, PyObject *self, PyObject *arg, PyObject *kw) {
+#if CYTHON_COMPILING_IN_LIMITED_API
+    PyObject *f = ((__pyx_CyFunctionObject*)func)->func;
+    PyObject *py_name = NULL;
+    PyCFunction meth;
+    int flags;
+    meth = PyCFunction_GetFunction(f);
+    if (unlikely(!meth)) return NULL;
+    flags = PyCFunction_GetFlags(f);
+    if (unlikely(flags < 0)) return NULL;
+#else
+    PyCFunctionObject* f = (PyCFunctionObject*)func;
+    PyCFunction meth = f->m_ml->ml_meth;
+    int flags = f->m_ml->ml_flags;
+#endif
+    Py_ssize_t size;
+    switch (flags & (METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O)) {
+    case METH_VARARGS:
+        if (likely(kw == NULL || PyDict_Size(kw) == 0))
+            return (*meth)(self, arg);
+        break;
+    case METH_VARARGS | METH_KEYWORDS:
+        return (*(PyCFunctionWithKeywords)(void*)meth)(self, arg, kw);
+    case METH_NOARGS:
+        if (likely(kw == NULL || PyDict_Size(kw) == 0)) {
+#if CYTHON_ASSUME_SAFE_MACROS
+            size = PyTuple_GET_SIZE(arg);
+#else
+            size = PyTuple_Size(arg);
+            if (unlikely(size < 0)) return NULL;
+#endif
+            if (likely(size == 0))
+                return (*meth)(self, NULL);
+#if CYTHON_COMPILING_IN_LIMITED_API
+            py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL);
+            if (!py_name) return NULL;
+            PyErr_Format(PyExc_TypeError,
+                "%.200S() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)",
+                py_name, size);
+            Py_DECREF(py_name);
+#else
+            PyErr_Format(PyExc_TypeError,
+                "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)",
+                f->m_ml->ml_name, size);
+#endif
+            return NULL;
+        }
+        break;
+    case METH_O:
+        if (likely(kw == NULL || PyDict_Size(kw) == 0)) {
+#if CYTHON_ASSUME_SAFE_MACROS
+            size = PyTuple_GET_SIZE(arg);
+#else
+            size = PyTuple_Size(arg);
+            if (unlikely(size < 0)) return NULL;
+#endif
+            if (likely(size == 1)) {
+                PyObject *result, *arg0;
+                #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS
+                arg0 = PyTuple_GET_ITEM(arg, 0);
+                #else
+                arg0 = __Pyx_PySequence_ITEM(arg, 0); if (unlikely(!arg0)) return NULL;
+                #endif
+                result = (*meth)(self, arg0);
+                #if !(CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS)
+                Py_DECREF(arg0);
+                #endif
+                return result;
+            }
+#if CYTHON_COMPILING_IN_LIMITED_API
+            py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL);
+            if (!py_name) return NULL;
+            PyErr_Format(PyExc_TypeError,
+                "%.200S() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)",
+                py_name, size);
+            Py_DECREF(py_name);
+#else
+            PyErr_Format(PyExc_TypeError,
+                "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)",
+                f->m_ml->ml_name, size);
+#endif
+            return NULL;
+        }
+        break;
+    default:
+        PyErr_SetString(PyExc_SystemError, "Bad call flags for CyFunction");
+        return NULL;
+    }
+#if CYTHON_COMPILING_IN_LIMITED_API
+    py_name = __Pyx_CyFunction_get_name((__pyx_CyFunctionObject*)func, NULL);
+    if (!py_name) return NULL;
+    PyErr_Format(PyExc_TypeError, "%.200S() takes no keyword arguments",
+                 py_name);
+    Py_DECREF(py_name);
+#else
+    PyErr_Format(PyExc_TypeError, "%.200s() takes no keyword arguments",
+                 f->m_ml->ml_name);
+#endif
+    return NULL;
+}
+static CYTHON_INLINE PyObject *__Pyx_CyFunction_Call(PyObject *func, PyObject *arg, PyObject *kw) {
+    PyObject *self, *result;
+#if CYTHON_COMPILING_IN_LIMITED_API
+    self = PyCFunction_GetSelf(((__pyx_CyFunctionObject*)func)->func);
+    if (unlikely(!self) && PyErr_Occurred()) return NULL;
+#else
+    self = ((PyCFunctionObject*)func)->m_self;
+#endif
+    result = __Pyx_CyFunction_CallMethod(func, self, arg, kw);
+    return result;
+}
+static PyObject *__Pyx_CyFunction_CallAsMethod(PyObject *func, PyObject *args, PyObject *kw) {
+    PyObject *result;
+    __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *) func;
+#if CYTHON_METH_FASTCALL
+     __pyx_vectorcallfunc vc = __Pyx_CyFunction_func_vectorcall(cyfunc);
+    if (vc) {
+#if CYTHON_ASSUME_SAFE_MACROS
+        return __Pyx_PyVectorcall_FastCallDict(func, vc, &PyTuple_GET_ITEM(args, 0), (size_t)PyTuple_GET_SIZE(args), kw);
+#else
+        (void) &__Pyx_PyVectorcall_FastCallDict;
+        return PyVectorcall_Call(func, args, kw);
+#endif
+    }
+#endif
+    if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) {
+        Py_ssize_t argc;
+        PyObject *new_args;
+        PyObject *self;
+#if CYTHON_ASSUME_SAFE_MACROS
+        argc = PyTuple_GET_SIZE(args);
+#else
+        argc = PyTuple_Size(args);
+        if (unlikely(!argc) < 0) return NULL;
+#endif
+        new_args = PyTuple_GetSlice(args, 1, argc);
+        if (unlikely(!new_args))
+            return NULL;
+        self = PyTuple_GetItem(args, 0);
+        if (unlikely(!self)) {
+            Py_DECREF(new_args);
+#if PY_MAJOR_VERSION > 2
+            PyErr_Format(PyExc_TypeError,
+                         "unbound method %.200S() needs an argument",
+                         cyfunc->func_qualname);
+#else
+            PyErr_SetString(PyExc_TypeError,
+                            "unbound method needs an argument");
+#endif
+            return NULL;
+        }
+        result = __Pyx_CyFunction_CallMethod(func, self, new_args, kw);
+        Py_DECREF(new_args);
+    } else {
+        result = __Pyx_CyFunction_Call(func, args, kw);
+    }
+    return result;
+}
+#if CYTHON_METH_FASTCALL
+static CYTHON_INLINE int __Pyx_CyFunction_Vectorcall_CheckArgs(__pyx_CyFunctionObject *cyfunc, Py_ssize_t nargs, PyObject *kwnames)
+{
+    int ret = 0;
+    if ((cyfunc->flags & __Pyx_CYFUNCTION_CCLASS) && !(cyfunc->flags & __Pyx_CYFUNCTION_STATICMETHOD)) {
+        if (unlikely(nargs < 1)) {
+            PyErr_Format(PyExc_TypeError, "%.200s() needs an argument",
+                         ((PyCFunctionObject*)cyfunc)->m_ml->ml_name);
+            return -1;
+        }
+        ret = 1;
+    }
+    if (unlikely(kwnames) && unlikely(PyTuple_GET_SIZE(kwnames))) {
+        PyErr_Format(PyExc_TypeError,
+                     "%.200s() takes no keyword arguments", ((PyCFunctionObject*)cyfunc)->m_ml->ml_name);
+        return -1;
+    }
+    return ret;
+}
+static PyObject * __Pyx_CyFunction_Vectorcall_NOARGS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames)
+{
+    __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func;
+    PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml;
+#if CYTHON_BACKPORT_VECTORCALL
+    Py_ssize_t nargs = (Py_ssize_t)nargsf;
+#else
+    Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
+#endif
+    PyObject *self;
+    switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) {
+    case 1:
+        self = args[0];
+        args += 1;
+        nargs -= 1;
+        break;
+    case 0:
+        self = ((PyCFunctionObject*)cyfunc)->m_self;
+        break;
+    default:
+        return NULL;
+    }
+    if (unlikely(nargs != 0)) {
+        PyErr_Format(PyExc_TypeError,
+            "%.200s() takes no arguments (%" CYTHON_FORMAT_SSIZE_T "d given)",
+            def->ml_name, nargs);
+        return NULL;
+    }
+    return def->ml_meth(self, NULL);
+}
+static PyObject * __Pyx_CyFunction_Vectorcall_O(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames)
+{
+    __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func;
+    PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml;
+#if CYTHON_BACKPORT_VECTORCALL
+    Py_ssize_t nargs = (Py_ssize_t)nargsf;
+#else
+    Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
+#endif
+    PyObject *self;
+    switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, kwnames)) {
+    case 1:
+        self = args[0];
+        args += 1;
+        nargs -= 1;
+        break;
+    case 0:
+        self = ((PyCFunctionObject*)cyfunc)->m_self;
+        break;
+    default:
+        return NULL;
+    }
+    if (unlikely(nargs != 1)) {
+        PyErr_Format(PyExc_TypeError,
+            "%.200s() takes exactly one argument (%" CYTHON_FORMAT_SSIZE_T "d given)",
+            def->ml_name, nargs);
+        return NULL;
+    }
+    return def->ml_meth(self, args[0]);
+}
+static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames)
+{
+    __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func;
+    PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml;
+#if CYTHON_BACKPORT_VECTORCALL
+    Py_ssize_t nargs = (Py_ssize_t)nargsf;
+#else
+    Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
+#endif
+    PyObject *self;
+    switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) {
+    case 1:
+        self = args[0];
+        args += 1;
+        nargs -= 1;
+        break;
+    case 0:
+        self = ((PyCFunctionObject*)cyfunc)->m_self;
+        break;
+    default:
+        return NULL;
+    }
+    return ((__Pyx_PyCFunctionFastWithKeywords)(void(*)(void))def->ml_meth)(self, args, nargs, kwnames);
+}
+static PyObject * __Pyx_CyFunction_Vectorcall_FASTCALL_KEYWORDS_METHOD(PyObject *func, PyObject *const *args, size_t nargsf, PyObject *kwnames)
+{
+    __pyx_CyFunctionObject *cyfunc = (__pyx_CyFunctionObject *)func;
+    PyMethodDef* def = ((PyCFunctionObject*)cyfunc)->m_ml;
+    PyTypeObject *cls = (PyTypeObject *) __Pyx_CyFunction_GetClassObj(cyfunc);
+#if CYTHON_BACKPORT_VECTORCALL
+    Py_ssize_t nargs = (Py_ssize_t)nargsf;
+#else
+    Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
+#endif
+    PyObject *self;
+    switch (__Pyx_CyFunction_Vectorcall_CheckArgs(cyfunc, nargs, NULL)) {
+    case 1:
+        self = args[0];
+        args += 1;
+        nargs -= 1;
+        break;
+    case 0:
+        self = ((PyCFunctionObject*)cyfunc)->m_self;
+        break;
+    default:
+        return NULL;
+    }
+    return ((__Pyx_PyCMethod)(void(*)(void))def->ml_meth)(self, cls, args, (size_t)nargs, kwnames);
+}
+#endif
+#if CYTHON_USE_TYPE_SPECS
+static PyType_Slot __pyx_CyFunctionType_slots[] = {
+    {Py_tp_dealloc, (void *)__Pyx_CyFunction_dealloc},
+    {Py_tp_repr, (void *)__Pyx_CyFunction_repr},
+    {Py_tp_call, (void *)__Pyx_CyFunction_CallAsMethod},
+    {Py_tp_traverse, (void *)__Pyx_CyFunction_traverse},
+    {Py_tp_clear, (void *)__Pyx_CyFunction_clear},
+    {Py_tp_methods, (void *)__pyx_CyFunction_methods},
+    {Py_tp_members, (void *)__pyx_CyFunction_members},
+    {Py_tp_getset, (void *)__pyx_CyFunction_getsets},
+    {Py_tp_descr_get, (void *)__Pyx_PyMethod_New},
+    {0, 0},
+};
+static PyType_Spec __pyx_CyFunctionType_spec = {
+    __PYX_TYPE_MODULE_PREFIX "cython_function_or_method",
+    sizeof(__pyx_CyFunctionObject),
+    0,
+#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR
+    Py_TPFLAGS_METHOD_DESCRIPTOR |
+#endif
+#if (defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL)
+    _Py_TPFLAGS_HAVE_VECTORCALL |
+#endif
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE,
+    __pyx_CyFunctionType_slots
+};
+#else
+static PyTypeObject __pyx_CyFunctionType_type = {
+    PyVarObject_HEAD_INIT(0, 0)
+    __PYX_TYPE_MODULE_PREFIX "cython_function_or_method",
+    sizeof(__pyx_CyFunctionObject),
+    0,
+    (destructor) __Pyx_CyFunction_dealloc,
+#if !CYTHON_METH_FASTCALL
+    0,
+#elif CYTHON_BACKPORT_VECTORCALL
+    (printfunc)offsetof(__pyx_CyFunctionObject, func_vectorcall),
+#else
+    offsetof(PyCFunctionObject, vectorcall),
+#endif
+    0,
+    0,
+#if PY_MAJOR_VERSION < 3
+    0,
+#else
+    0,
+#endif
+    (reprfunc) __Pyx_CyFunction_repr,
+    0,
+    0,
+    0,
+    0,
+    __Pyx_CyFunction_CallAsMethod,
+    0,
+    0,
+    0,
+    0,
+#ifdef Py_TPFLAGS_METHOD_DESCRIPTOR
+    Py_TPFLAGS_METHOD_DESCRIPTOR |
+#endif
+#if defined(_Py_TPFLAGS_HAVE_VECTORCALL) && CYTHON_METH_FASTCALL
+    _Py_TPFLAGS_HAVE_VECTORCALL |
+#endif
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE,
+    0,
+    (traverseproc) __Pyx_CyFunction_traverse,
+    (inquiry) __Pyx_CyFunction_clear,
+    0,
+#if PY_VERSION_HEX < 0x030500A0
+    offsetof(__pyx_CyFunctionObject, func_weakreflist),
+#else
+    offsetof(PyCFunctionObject, m_weakreflist),
+#endif
+    0,
+    0,
+    __pyx_CyFunction_methods,
+    __pyx_CyFunction_members,
+    __pyx_CyFunction_getsets,
+    0,
+    0,
+    __Pyx_PyMethod_New,
+    0,
+    offsetof(__pyx_CyFunctionObject, func_dict),
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+    0,
+#if PY_VERSION_HEX >= 0x030400a1
+    0,
+#endif
+#if PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800)
+    0,
+#endif
+#if __PYX_NEED_TP_PRINT_SLOT
+    0,
+#endif
+#if PY_VERSION_HEX >= 0x030C0000
+    0,
+#endif
+#if PY_VERSION_HEX >= 0x030d00A4
+    0,
+#endif
+#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000
+    0,
+#endif
+};
+#endif
+static int __pyx_CyFunction_init(PyObject *module) {
+#if CYTHON_USE_TYPE_SPECS
+    __pyx_CyFunctionType = __Pyx_FetchCommonTypeFromSpec(module, &__pyx_CyFunctionType_spec, NULL);
+#else
+    CYTHON_UNUSED_VAR(module);
+    __pyx_CyFunctionType = __Pyx_FetchCommonType(&__pyx_CyFunctionType_type);
+#endif
+    if (unlikely(__pyx_CyFunctionType == NULL)) {
+        return -1;
+    }
+    return 0;
+}
+static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects) {
+    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+    m->defaults = PyObject_Malloc(size);
+    if (unlikely(!m->defaults))
+        return PyErr_NoMemory();
+    memset(m->defaults, 0, size);
+    m->defaults_pyobjects = pyobjects;
+    m->defaults_size = size;
+    return m->defaults;
+}
+static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) {
+    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+    m->defaults_tuple = tuple;
+    Py_INCREF(tuple);
+}
+static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsKwDict(PyObject *func, PyObject *dict) {
+    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+    m->defaults_kwdict = dict;
+    Py_INCREF(dict);
+}
+static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, PyObject *dict) {
+    __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+    m->func_annotations = dict;
+    Py_INCREF(dict);
+}
+
+/* CythonFunction */
+  static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname,
+                                      PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
+    PyObject *op = __Pyx_CyFunction_Init(
+        PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType),
+        ml, flags, qualname, closure, module, globals, code
+    );
+    if (likely(op)) {
+        PyObject_GC_Track(op);
+    }
+    return op;
+}
+
+/* CLineInTraceback */
+  #ifndef CYTHON_CLINE_IN_TRACEBACK
+static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) {
+    PyObject *use_cline;
+    PyObject *ptype, *pvalue, *ptraceback;
+#if CYTHON_COMPILING_IN_CPYTHON
+    PyObject **cython_runtime_dict;
+#endif
+    CYTHON_MAYBE_UNUSED_VAR(tstate);
+    if (unlikely(!__pyx_cython_runtime)) {
+        return c_line;
+    }
+    __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback);
+#if CYTHON_COMPILING_IN_CPYTHON
+    cython_runtime_dict = _PyObject_GetDictPtr(__pyx_cython_runtime);
+    if (likely(cython_runtime_dict)) {
+        __PYX_PY_DICT_LOOKUP_IF_MODIFIED(
+            use_cline, *cython_runtime_dict,
+            __Pyx_PyDict_GetItemStr(*cython_runtime_dict, __pyx_n_s_cline_in_traceback))
+    } else
+#endif
+    {
+      PyObject *use_cline_obj = __Pyx_PyObject_GetAttrStrNoError(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback);
+      if (use_cline_obj) {
+        use_cline = PyObject_Not(use_cline_obj) ? Py_False : Py_True;
+        Py_DECREF(use_cline_obj);
+      } else {
+        PyErr_Clear();
+        use_cline = NULL;
+      }
+    }
+    if (!use_cline) {
+        c_line = 0;
+        (void) PyObject_SetAttr(__pyx_cython_runtime, __pyx_n_s_cline_in_traceback, Py_False);
+    }
+    else if (use_cline == Py_False || (use_cline != Py_True && PyObject_Not(use_cline) != 0)) {
+        c_line = 0;
+    }
+    __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback);
+    return c_line;
+}
+#endif
+
+/* CodeObjectCache */
+  #if !CYTHON_COMPILING_IN_LIMITED_API
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) {
+    int start = 0, mid = 0, end = count - 1;
+    if (end >= 0 && code_line > entries[end].code_line) {
+        return count;
+    }
+    while (start < end) {
+        mid = start + (end - start) / 2;
+        if (code_line < entries[mid].code_line) {
+            end = mid;
+        } else if (code_line > entries[mid].code_line) {
+             start = mid + 1;
+        } else {
+            return mid;
+        }
+    }
+    if (code_line <= entries[mid].code_line) {
+        return mid;
+    } else {
+        return mid + 1;
+    }
+}
+static PyCodeObject *__pyx_find_code_object(int code_line) {
+    PyCodeObject* code_object;
+    int pos;
+    if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) {
+        return NULL;
+    }
+    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+    if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) {
+        return NULL;
+    }
+    code_object = __pyx_code_cache.entries[pos].code_object;
+    Py_INCREF(code_object);
+    return code_object;
+}
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
+    int pos, i;
+    __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries;
+    if (unlikely(!code_line)) {
+        return;
+    }
+    if (unlikely(!entries)) {
+        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry));
+        if (likely(entries)) {
+            __pyx_code_cache.entries = entries;
+            __pyx_code_cache.max_count = 64;
+            __pyx_code_cache.count = 1;
+            entries[0].code_line = code_line;
+            entries[0].code_object = code_object;
+            Py_INCREF(code_object);
+        }
+        return;
+    }
+    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+    if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) {
+        PyCodeObject* tmp = entries[pos].code_object;
+        entries[pos].code_object = code_object;
+        Py_DECREF(tmp);
+        return;
+    }
+    if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
+        int new_max = __pyx_code_cache.max_count + 64;
+        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
+            __pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry));
+        if (unlikely(!entries)) {
+            return;
+        }
+        __pyx_code_cache.entries = entries;
+        __pyx_code_cache.max_count = new_max;
+    }
+    for (i=__pyx_code_cache.count; i>pos; i--) {
+        entries[i] = entries[i-1];
+    }
+    entries[pos].code_line = code_line;
+    entries[pos].code_object = code_object;
+    __pyx_code_cache.count++;
+    Py_INCREF(code_object);
+}
+#endif
+
+/* AddTraceback */
+  #include "compile.h"
+#include "frameobject.h"
+#include "traceback.h"
+#if PY_VERSION_HEX >= 0x030b00a6 && !CYTHON_COMPILING_IN_LIMITED_API
+  #ifndef Py_BUILD_CORE
+    #define Py_BUILD_CORE 1
+  #endif
+  #include "internal/pycore_frame.h"
+#endif
+#if CYTHON_COMPILING_IN_LIMITED_API
+static PyObject *__Pyx_PyCode_Replace_For_AddTraceback(PyObject *code, PyObject *scratch_dict,
+                                                       PyObject *firstlineno, PyObject *name) {
+    PyObject *replace = NULL;
+    if (unlikely(PyDict_SetItemString(scratch_dict, "co_firstlineno", firstlineno))) return NULL;
+    if (unlikely(PyDict_SetItemString(scratch_dict, "co_name", name))) return NULL;
+    replace = PyObject_GetAttrString(code, "replace");
+    if (likely(replace)) {
+        PyObject *result;
+        result = PyObject_Call(replace, __pyx_empty_tuple, scratch_dict);
+        Py_DECREF(replace);
+        return result;
+    }
+    PyErr_Clear();
+    #if __PYX_LIMITED_VERSION_HEX < 0x030780000
+    {
+        PyObject *compiled = NULL, *result = NULL;
+        if (unlikely(PyDict_SetItemString(scratch_dict, "code", code))) return NULL;
+        if (unlikely(PyDict_SetItemString(scratch_dict, "type", (PyObject*)(&PyType_Type)))) return NULL;
+        compiled = Py_CompileString(
+            "out = type(code)(\n"
+            "  code.co_argcount, code.co_kwonlyargcount, code.co_nlocals, code.co_stacksize,\n"
+            "  code.co_flags, code.co_code, code.co_consts, code.co_names,\n"
+            "  code.co_varnames, code.co_filename, co_name, co_firstlineno,\n"
+            "  code.co_lnotab)\n", "<dummy>", Py_file_input);
+        if (!compiled) return NULL;
+        result = PyEval_EvalCode(compiled, scratch_dict, scratch_dict);
+        Py_DECREF(compiled);
+        if (!result) PyErr_Print();
+        Py_DECREF(result);
+        result = PyDict_GetItemString(scratch_dict, "out");
+        if (result) Py_INCREF(result);
+        return result;
+    }
+    #else
+    return NULL;
+    #endif
+}
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+                               int py_line, const char *filename) {
+    PyObject *code_object = NULL, *py_py_line = NULL, *py_funcname = NULL, *dict = NULL;
+    PyObject *replace = NULL, *getframe = NULL, *frame = NULL;
+    PyObject *exc_type, *exc_value, *exc_traceback;
+    int success = 0;
+    if (c_line) {
+        (void) __pyx_cfilenm;
+        (void) __Pyx_CLineForTraceback(__Pyx_PyThreadState_Current, c_line);
+    }
+    PyErr_Fetch(&exc_type, &exc_value, &exc_traceback);
+    code_object = Py_CompileString("_getframe()", filename, Py_eval_input);
+    if (unlikely(!code_object)) goto bad;
+    py_py_line = PyLong_FromLong(py_line);
+    if (unlikely(!py_py_line)) goto bad;
+    py_funcname = PyUnicode_FromString(funcname);
+    if (unlikely(!py_funcname)) goto bad;
+    dict = PyDict_New();
+    if (unlikely(!dict)) goto bad;
+    {
+        PyObject *old_code_object = code_object;
+        code_object = __Pyx_PyCode_Replace_For_AddTraceback(code_object, dict, py_py_line, py_funcname);
+        Py_DECREF(old_code_object);
+    }
+    if (unlikely(!code_object)) goto bad;
+    getframe = PySys_GetObject("_getframe");
+    if (unlikely(!getframe)) goto bad;
+    if (unlikely(PyDict_SetItemString(dict, "_getframe", getframe))) goto bad;
+    frame = PyEval_EvalCode(code_object, dict, dict);
+    if (unlikely(!frame) || frame == Py_None) goto bad;
+    success = 1;
+  bad:
+    PyErr_Restore(exc_type, exc_value, exc_traceback);
+    Py_XDECREF(code_object);
+    Py_XDECREF(py_py_line);
+    Py_XDECREF(py_funcname);
+    Py_XDECREF(dict);
+    Py_XDECREF(replace);
+    if (success) {
+        PyTraceBack_Here(
+            (struct _frame*)frame);
+    }
+    Py_XDECREF(frame);
+}
+#else
+static PyCodeObject* __Pyx_CreateCodeObjectForTraceback(
+            const char *funcname, int c_line,
+            int py_line, const char *filename) {
+    PyCodeObject *py_code = NULL;
+    PyObject *py_funcname = NULL;
+    #if PY_MAJOR_VERSION < 3
+    PyObject *py_srcfile = NULL;
+    py_srcfile = PyString_FromString(filename);
+    if (!py_srcfile) goto bad;
+    #endif
+    if (c_line) {
+        #if PY_MAJOR_VERSION < 3
+        py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+        if (!py_funcname) goto bad;
+        #else
+        py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+        if (!py_funcname) goto bad;
+        funcname = PyUnicode_AsUTF8(py_funcname);
+        if (!funcname) goto bad;
+        #endif
+    }
+    else {
+        #if PY_MAJOR_VERSION < 3
+        py_funcname = PyString_FromString(funcname);
+        if (!py_funcname) goto bad;
+        #endif
+    }
+    #if PY_MAJOR_VERSION < 3
+    py_code = __Pyx_PyCode_New(
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        __pyx_empty_bytes, /*PyObject *code,*/
+        __pyx_empty_tuple, /*PyObject *consts,*/
+        __pyx_empty_tuple, /*PyObject *names,*/
+        __pyx_empty_tuple, /*PyObject *varnames,*/
+        __pyx_empty_tuple, /*PyObject *freevars,*/
+        __pyx_empty_tuple, /*PyObject *cellvars,*/
+        py_srcfile,   /*PyObject *filename,*/
+        py_funcname,  /*PyObject *name,*/
+        py_line,
+        __pyx_empty_bytes  /*PyObject *lnotab*/
+    );
+    Py_DECREF(py_srcfile);
+    #else
+    py_code = PyCode_NewEmpty(filename, funcname, py_line);
+    #endif
+    Py_XDECREF(py_funcname);
+    return py_code;
+bad:
+    Py_XDECREF(py_funcname);
+    #if PY_MAJOR_VERSION < 3
+    Py_XDECREF(py_srcfile);
+    #endif
+    return NULL;
+}
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+                               int py_line, const char *filename) {
+    PyCodeObject *py_code = 0;
+    PyFrameObject *py_frame = 0;
+    PyThreadState *tstate = __Pyx_PyThreadState_Current;
+    PyObject *ptype, *pvalue, *ptraceback;
+    if (c_line) {
+        c_line = __Pyx_CLineForTraceback(tstate, c_line);
+    }
+    py_code = __pyx_find_code_object(c_line ? -c_line : py_line);
+    if (!py_code) {
+        __Pyx_ErrFetchInState(tstate, &ptype, &pvalue, &ptraceback);
+        py_code = __Pyx_CreateCodeObjectForTraceback(
+            funcname, c_line, py_line, filename);
+        if (!py_code) {
+            /* If the code object creation fails, then we should clear the
+               fetched exception references and propagate the new exception */
+            Py_XDECREF(ptype);
+            Py_XDECREF(pvalue);
+            Py_XDECREF(ptraceback);
+            goto bad;
+        }
+        __Pyx_ErrRestoreInState(tstate, ptype, pvalue, ptraceback);
+        __pyx_insert_code_object(c_line ? -c_line : py_line, py_code);
+    }
+    py_frame = PyFrame_New(
+        tstate,            /*PyThreadState *tstate,*/
+        py_code,           /*PyCodeObject *code,*/
+        __pyx_d,    /*PyObject *globals,*/
+        0                  /*PyObject *locals*/
+    );
+    if (!py_frame) goto bad;
+    __Pyx_PyFrame_SetLineNumber(py_frame, py_line);
+    PyTraceBack_Here(py_frame);
+bad:
+    Py_XDECREF(py_code);
+    Py_XDECREF(py_frame);
+}
+#endif
+
+#if PY_MAJOR_VERSION < 3
+static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags) {
+    __Pyx_TypeName obj_type_name;
+    if (PyObject_CheckBuffer(obj)) return PyObject_GetBuffer(obj, view, flags);
+    obj_type_name = __Pyx_PyType_GetName(Py_TYPE(obj));
+    PyErr_Format(PyExc_TypeError,
+                 "'" __Pyx_FMT_TYPENAME "' does not have the buffer interface",
+                 obj_type_name);
+    __Pyx_DECREF_TypeName(obj_type_name);
+    return -1;
+}
+static void __Pyx_ReleaseBuffer(Py_buffer *view) {
+    PyObject *obj = view->obj;
+    if (!obj) return;
+    if (PyObject_CheckBuffer(obj)) {
+        PyBuffer_Release(view);
+        return;
+    }
+    if ((0)) {}
+    view->obj = NULL;
+    Py_DECREF(obj);
+}
+#endif
+
+
+  /* CIntFromPyVerify */
+  #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\
+    __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0)
+#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\
+    __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1)
+#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\
+    {\
+        func_type value = func_value;\
+        if (sizeof(target_type) < sizeof(func_type)) {\
+            if (unlikely(value != (func_type) (target_type) value)) {\
+                func_type zero = 0;\
+                if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\
+                    return (target_type) -1;\
+                if (is_unsigned && unlikely(value < zero))\
+                    goto raise_neg_overflow;\
+                else\
+                    goto raise_overflow;\
+            }\
+        }\
+        return (target_type) value;\
+    }
+
+/* Declarations */
+  #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus)
+  #ifdef __cplusplus
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      return ::std::complex< float >(x, y);
+    }
+  #else
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      return x + y*(__pyx_t_float_complex)_Complex_I;
+    }
+  #endif
+#else
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      __pyx_t_float_complex z;
+      z.real = x;
+      z.imag = y;
+      return z;
+    }
+#endif
+
+/* Arithmetic */
+  #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus)
+#else
+    static CYTHON_INLINE int __Pyx_c_eq_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+       return (a.real == b.real) && (a.imag == b.imag);
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sum_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real + b.real;
+        z.imag = a.imag + b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_diff_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real - b.real;
+        z.imag = a.imag - b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prod_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real * b.real - a.imag * b.imag;
+        z.imag = a.real * b.imag + a.imag * b.real;
+        return z;
+    }
+    #if 1
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        if (b.imag == 0) {
+            return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real);
+        } else if (fabsf(b.real) >= fabsf(b.imag)) {
+            if (b.real == 0 && b.imag == 0) {
+                return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.imag);
+            } else {
+                float r = b.imag / b.real;
+                float s = (float)(1.0) / (b.real + b.imag * r);
+                return __pyx_t_float_complex_from_parts(
+                    (a.real + a.imag * r) * s, (a.imag - a.real * r) * s);
+            }
+        } else {
+            float r = b.real / b.imag;
+            float s = (float)(1.0) / (b.imag + b.real * r);
+            return __pyx_t_float_complex_from_parts(
+                (a.real * r + a.imag) * s, (a.imag * r - a.real) * s);
+        }
+    }
+    #else
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quot_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        if (b.imag == 0) {
+            return __pyx_t_float_complex_from_parts(a.real / b.real, a.imag / b.real);
+        } else {
+            float denom = b.real * b.real + b.imag * b.imag;
+            return __pyx_t_float_complex_from_parts(
+                (a.real * b.real + a.imag * b.imag) / denom,
+                (a.imag * b.real - a.real * b.imag) / denom);
+        }
+    }
+    #endif
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_neg_float(__pyx_t_float_complex a) {
+        __pyx_t_float_complex z;
+        z.real = -a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    static CYTHON_INLINE int __Pyx_c_is_zero_float(__pyx_t_float_complex a) {
+       return (a.real == 0) && (a.imag == 0);
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conj_float(__pyx_t_float_complex a) {
+        __pyx_t_float_complex z;
+        z.real =  a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    #if 1
+        static CYTHON_INLINE float __Pyx_c_abs_float(__pyx_t_float_complex z) {
+          #if !defined(HAVE_HYPOT) || defined(_MSC_VER)
+            return sqrtf(z.real*z.real + z.imag*z.imag);
+          #else
+            return hypotf(z.real, z.imag);
+          #endif
+        }
+        static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_pow_float(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+            __pyx_t_float_complex z;
+            float r, lnr, theta, z_r, z_theta;
+            if (b.imag == 0 && b.real == (int)b.real) {
+                if (b.real < 0) {
+                    float denom = a.real * a.real + a.imag * a.imag;
+                    a.real = a.real / denom;
+                    a.imag = -a.imag / denom;
+                    b.real = -b.real;
+                }
+                switch ((int)b.real) {
+                    case 0:
+                        z.real = 1;
+                        z.imag = 0;
+                        return z;
+                    case 1:
+                        return a;
+                    case 2:
+                        return __Pyx_c_prod_float(a, a);
+                    case 3:
+                        z = __Pyx_c_prod_float(a, a);
+                        return __Pyx_c_prod_float(z, a);
+                    case 4:
+                        z = __Pyx_c_prod_float(a, a);
+                        return __Pyx_c_prod_float(z, z);
+                }
+            }
+            if (a.imag == 0) {
+                if (a.real == 0) {
+                    return a;
+                } else if ((b.imag == 0) && (a.real >= 0)) {
+                    z.real = powf(a.real, b.real);
+                    z.imag = 0;
+                    return z;
+                } else if (a.real > 0) {
+                    r = a.real;
+                    theta = 0;
+                } else {
+                    r = -a.real;
+                    theta = atan2f(0.0, -1.0);
+                }
+            } else {
+                r = __Pyx_c_abs_float(a);
+                theta = atan2f(a.imag, a.real);
+            }
+            lnr = logf(r);
+            z_r = expf(lnr * b.real - theta * b.imag);
+            z_theta = theta * b.real + lnr * b.imag;
+            z.real = z_r * cosf(z_theta);
+            z.imag = z_r * sinf(z_theta);
+            return z;
+        }
+    #endif
+#endif
+
+/* Declarations */
+  #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus)
+  #ifdef __cplusplus
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      return ::std::complex< double >(x, y);
+    }
+  #else
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      return x + y*(__pyx_t_double_complex)_Complex_I;
+    }
+  #endif
+#else
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      __pyx_t_double_complex z;
+      z.real = x;
+      z.imag = y;
+      return z;
+    }
+#endif
+
+/* Arithmetic */
+  #if CYTHON_CCOMPLEX && (1) && (!0 || __cplusplus)
+#else
+    static CYTHON_INLINE int __Pyx_c_eq_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+       return (a.real == b.real) && (a.imag == b.imag);
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real + b.real;
+        z.imag = a.imag + b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real - b.real;
+        z.imag = a.imag - b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real * b.real - a.imag * b.imag;
+        z.imag = a.real * b.imag + a.imag * b.real;
+        return z;
+    }
+    #if 1
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        if (b.imag == 0) {
+            return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real);
+        } else if (fabs(b.real) >= fabs(b.imag)) {
+            if (b.real == 0 && b.imag == 0) {
+                return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.imag);
+            } else {
+                double r = b.imag / b.real;
+                double s = (double)(1.0) / (b.real + b.imag * r);
+                return __pyx_t_double_complex_from_parts(
+                    (a.real + a.imag * r) * s, (a.imag - a.real * r) * s);
+            }
+        } else {
+            double r = b.real / b.imag;
+            double s = (double)(1.0) / (b.imag + b.real * r);
+            return __pyx_t_double_complex_from_parts(
+                (a.real * r + a.imag) * s, (a.imag * r - a.real) * s);
+        }
+    }
+    #else
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        if (b.imag == 0) {
+            return __pyx_t_double_complex_from_parts(a.real / b.real, a.imag / b.real);
+        } else {
+            double denom = b.real * b.real + b.imag * b.imag;
+            return __pyx_t_double_complex_from_parts(
+                (a.real * b.real + a.imag * b.imag) / denom,
+                (a.imag * b.real - a.real * b.imag) / denom);
+        }
+    }
+    #endif
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg_double(__pyx_t_double_complex a) {
+        __pyx_t_double_complex z;
+        z.real = -a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    static CYTHON_INLINE int __Pyx_c_is_zero_double(__pyx_t_double_complex a) {
+       return (a.real == 0) && (a.imag == 0);
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj_double(__pyx_t_double_complex a) {
+        __pyx_t_double_complex z;
+        z.real =  a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    #if 1
+        static CYTHON_INLINE double __Pyx_c_abs_double(__pyx_t_double_complex z) {
+          #if !defined(HAVE_HYPOT) || defined(_MSC_VER)
+            return sqrt(z.real*z.real + z.imag*z.imag);
+          #else
+            return hypot(z.real, z.imag);
+          #endif
+        }
+        static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow_double(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+            __pyx_t_double_complex z;
+            double r, lnr, theta, z_r, z_theta;
+            if (b.imag == 0 && b.real == (int)b.real) {
+                if (b.real < 0) {
+                    double denom = a.real * a.real + a.imag * a.imag;
+                    a.real = a.real / denom;
+                    a.imag = -a.imag / denom;
+                    b.real = -b.real;
+                }
+                switch ((int)b.real) {
+                    case 0:
+                        z.real = 1;
+                        z.imag = 0;
+                        return z;
+                    case 1:
+                        return a;
+                    case 2:
+                        return __Pyx_c_prod_double(a, a);
+                    case 3:
+                        z = __Pyx_c_prod_double(a, a);
+                        return __Pyx_c_prod_double(z, a);
+                    case 4:
+                        z = __Pyx_c_prod_double(a, a);
+                        return __Pyx_c_prod_double(z, z);
+                }
+            }
+            if (a.imag == 0) {
+                if (a.real == 0) {
+                    return a;
+                } else if ((b.imag == 0) && (a.real >= 0)) {
+                    z.real = pow(a.real, b.real);
+                    z.imag = 0;
+                    return z;
+                } else if (a.real > 0) {
+                    r = a.real;
+                    theta = 0;
+                } else {
+                    r = -a.real;
+                    theta = atan2(0.0, -1.0);
+                }
+            } else {
+                r = __Pyx_c_abs_double(a);
+                theta = atan2(a.imag, a.real);
+            }
+            lnr = log(r);
+            z_r = exp(lnr * b.real - theta * b.imag);
+            z_theta = theta * b.real + lnr * b.imag;
+            z.real = z_r * cos(z_theta);
+            z.imag = z_r * sin(z_theta);
+            return z;
+        }
+    #endif
+#endif
+
+/* CIntToPy */
+  static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_int(unsigned int value) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const unsigned int neg_one = (unsigned int) -1, const_zero = (unsigned int) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+    if (is_unsigned) {
+        if (sizeof(unsigned int) < sizeof(long)) {
+            return PyInt_FromLong((long) value);
+        } else if (sizeof(unsigned int) <= sizeof(unsigned long)) {
+            return PyLong_FromUnsignedLong((unsigned long) value);
+#ifdef HAVE_LONG_LONG
+        } else if (sizeof(unsigned int) <= sizeof(unsigned PY_LONG_LONG)) {
+            return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
+#endif
+        }
+    } else {
+        if (sizeof(unsigned int) <= sizeof(long)) {
+            return PyInt_FromLong((long) value);
+#ifdef HAVE_LONG_LONG
+        } else if (sizeof(unsigned int) <= sizeof(PY_LONG_LONG)) {
+            return PyLong_FromLongLong((PY_LONG_LONG) value);
+#endif
+        }
+    }
+    {
+        unsigned char *bytes = (unsigned char *)&value;
+#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4
+        if (is_unsigned) {
+            return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1);
+        } else {
+            return PyLong_FromNativeBytes(bytes, sizeof(value), -1);
+        }
+#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000
+        int one = 1; int little = (int)*(unsigned char *)&one;
+        return _PyLong_FromByteArray(bytes, sizeof(unsigned int),
+                                     little, !is_unsigned);
+#else
+        int one = 1; int little = (int)*(unsigned char *)&one;
+        PyObject *from_bytes, *result = NULL;
+        PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL;
+        from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes");
+        if (!from_bytes) return NULL;
+        py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(unsigned int));
+        if (!py_bytes) goto limited_bad;
+        order_str = PyUnicode_FromString(little ? "little" : "big");
+        if (!order_str) goto limited_bad;
+        arg_tuple = PyTuple_Pack(2, py_bytes, order_str);
+        if (!arg_tuple) goto limited_bad;
+        if (!is_unsigned) {
+            kwds = PyDict_New();
+            if (!kwds) goto limited_bad;
+            if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad;
+        }
+        result = PyObject_Call(from_bytes, arg_tuple, kwds);
+        limited_bad:
+        Py_XDECREF(kwds);
+        Py_XDECREF(arg_tuple);
+        Py_XDECREF(order_str);
+        Py_XDECREF(py_bytes);
+        Py_XDECREF(from_bytes);
+        return result;
+#endif
+    }
+}
+
+/* CIntFromPy */
+  static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *x) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const unsigned int neg_one = (unsigned int) -1, const_zero = (unsigned int) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_Check(x))) {
+        if ((sizeof(unsigned int) < sizeof(long))) {
+            __PYX_VERIFY_RETURN_INT(unsigned int, long, PyInt_AS_LONG(x))
+        } else {
+            long val = PyInt_AS_LONG(x);
+            if (is_unsigned && unlikely(val < 0)) {
+                goto raise_neg_overflow;
+            }
+            return (unsigned int) val;
+        }
+    }
+#endif
+    if (unlikely(!PyLong_Check(x))) {
+        unsigned int val;
+        PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
+        if (!tmp) return (unsigned int) -1;
+        val = __Pyx_PyInt_As_unsigned_int(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+    if (is_unsigned) {
+#if CYTHON_USE_PYLONG_INTERNALS
+        if (unlikely(__Pyx_PyLong_IsNeg(x))) {
+            goto raise_neg_overflow;
+        } else if (__Pyx_PyLong_IsCompact(x)) {
+            __PYX_VERIFY_RETURN_INT(unsigned int, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x))
+        } else {
+            const digit* digits = __Pyx_PyLong_Digits(x);
+            assert(__Pyx_PyLong_DigitCount(x) > 1);
+            switch (__Pyx_PyLong_DigitCount(x)) {
+                case 2:
+                    if ((8 * sizeof(unsigned int) > 1 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(unsigned int) >= 2 * PyLong_SHIFT)) {
+                            return (unsigned int) (((((unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]));
+                        }
+                    }
+                    break;
+                case 3:
+                    if ((8 * sizeof(unsigned int) > 2 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(unsigned int) >= 3 * PyLong_SHIFT)) {
+                            return (unsigned int) (((((((unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]));
+                        }
+                    }
+                    break;
+                case 4:
+                    if ((8 * sizeof(unsigned int) > 3 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(unsigned int) >= 4 * PyLong_SHIFT)) {
+                            return (unsigned int) (((((((((unsigned int)digits[3]) << PyLong_SHIFT) | (unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0]));
+                        }
+                    }
+                    break;
+            }
+        }
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7
+        if (unlikely(Py_SIZE(x) < 0)) {
+            goto raise_neg_overflow;
+        }
+#else
+        {
+            int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+            if (unlikely(result < 0))
+                return (unsigned int) -1;
+            if (unlikely(result == 1))
+                goto raise_neg_overflow;
+        }
+#endif
+        if ((sizeof(unsigned int) <= sizeof(unsigned long))) {
+            __PYX_VERIFY_RETURN_INT_EXC(unsigned int, unsigned long, PyLong_AsUnsignedLong(x))
+#ifdef HAVE_LONG_LONG
+        } else if ((sizeof(unsigned int) <= sizeof(unsigned PY_LONG_LONG))) {
+            __PYX_VERIFY_RETURN_INT_EXC(unsigned int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
+#endif
+        }
+    } else {
+#if CYTHON_USE_PYLONG_INTERNALS
+        if (__Pyx_PyLong_IsCompact(x)) {
+            __PYX_VERIFY_RETURN_INT(unsigned int, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x))
+        } else {
+            const digit* digits = __Pyx_PyLong_Digits(x);
+            assert(__Pyx_PyLong_DigitCount(x) > 1);
+            switch (__Pyx_PyLong_SignedDigitCount(x)) {
+                case -2:
+                    if ((8 * sizeof(unsigned int) - 1 > 1 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(unsigned int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(unsigned int) - 1 > 2 * PyLong_SHIFT)) {
+                            return (unsigned int) (((unsigned int)-1)*(((((unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])));
+                        }
+                    }
+                    break;
+                case 2:
+                    if ((8 * sizeof(unsigned int) > 1 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(unsigned int) - 1 > 2 * PyLong_SHIFT)) {
+                            return (unsigned int) ((((((unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])));
+                        }
+                    }
+                    break;
+                case -3:
+                    if ((8 * sizeof(unsigned int) - 1 > 2 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(unsigned int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(unsigned int) - 1 > 3 * PyLong_SHIFT)) {
+                            return (unsigned int) (((unsigned int)-1)*(((((((unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])));
+                        }
+                    }
+                    break;
+                case 3:
+                    if ((8 * sizeof(unsigned int) > 2 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(unsigned int) - 1 > 3 * PyLong_SHIFT)) {
+                            return (unsigned int) ((((((((unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])));
+                        }
+                    }
+                    break;
+                case -4:
+                    if ((8 * sizeof(unsigned int) - 1 > 3 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(unsigned int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(unsigned int) - 1 > 4 * PyLong_SHIFT)) {
+                            return (unsigned int) (((unsigned int)-1)*(((((((((unsigned int)digits[3]) << PyLong_SHIFT) | (unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])));
+                        }
+                    }
+                    break;
+                case 4:
+                    if ((8 * sizeof(unsigned int) > 3 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(unsigned int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(unsigned int) - 1 > 4 * PyLong_SHIFT)) {
+                            return (unsigned int) ((((((((((unsigned int)digits[3]) << PyLong_SHIFT) | (unsigned int)digits[2]) << PyLong_SHIFT) | (unsigned int)digits[1]) << PyLong_SHIFT) | (unsigned int)digits[0])));
+                        }
+                    }
+                    break;
+            }
+        }
+#endif
+        if ((sizeof(unsigned int) <= sizeof(long))) {
+            __PYX_VERIFY_RETURN_INT_EXC(unsigned int, long, PyLong_AsLong(x))
+#ifdef HAVE_LONG_LONG
+        } else if ((sizeof(unsigned int) <= sizeof(PY_LONG_LONG))) {
+            __PYX_VERIFY_RETURN_INT_EXC(unsigned int, PY_LONG_LONG, PyLong_AsLongLong(x))
+#endif
+        }
+    }
+    {
+        unsigned int val;
+        int ret = -1;
+#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API
+        Py_ssize_t bytes_copied = PyLong_AsNativeBytes(
+            x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0));
+        if (unlikely(bytes_copied == -1)) {
+        } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) {
+            goto raise_overflow;
+        } else {
+            ret = 0;
+        }
+#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray)
+        int one = 1; int is_little = (int)*(unsigned char *)&one;
+        unsigned char *bytes = (unsigned char *)&val;
+        ret = _PyLong_AsByteArray((PyLongObject *)x,
+                                    bytes, sizeof(val),
+                                    is_little, !is_unsigned);
+#else
+        PyObject *v;
+        PyObject *stepval = NULL, *mask = NULL, *shift = NULL;
+        int bits, remaining_bits, is_negative = 0;
+        int chunk_size = (sizeof(long) < 8) ? 30 : 62;
+        if (likely(PyLong_CheckExact(x))) {
+            v = __Pyx_NewRef(x);
+        } else {
+            v = PyNumber_Long(x);
+            if (unlikely(!v)) return (unsigned int) -1;
+            assert(PyLong_CheckExact(v));
+        }
+        {
+            int result = PyObject_RichCompareBool(v, Py_False, Py_LT);
+            if (unlikely(result < 0)) {
+                Py_DECREF(v);
+                return (unsigned int) -1;
+            }
+            is_negative = result == 1;
+        }
+        if (is_unsigned && unlikely(is_negative)) {
+            Py_DECREF(v);
+            goto raise_neg_overflow;
+        } else if (is_negative) {
+            stepval = PyNumber_Invert(v);
+            Py_DECREF(v);
+            if (unlikely(!stepval))
+                return (unsigned int) -1;
+        } else {
+            stepval = v;
+        }
+        v = NULL;
+        val = (unsigned int) 0;
+        mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done;
+        shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done;
+        for (bits = 0; bits < (int) sizeof(unsigned int) * 8 - chunk_size; bits += chunk_size) {
+            PyObject *tmp, *digit;
+            long idigit;
+            digit = PyNumber_And(stepval, mask);
+            if (unlikely(!digit)) goto done;
+            idigit = PyLong_AsLong(digit);
+            Py_DECREF(digit);
+            if (unlikely(idigit < 0)) goto done;
+            val |= ((unsigned int) idigit) << bits;
+            tmp = PyNumber_Rshift(stepval, shift);
+            if (unlikely(!tmp)) goto done;
+            Py_DECREF(stepval); stepval = tmp;
+        }
+        Py_DECREF(shift); shift = NULL;
+        Py_DECREF(mask); mask = NULL;
+        {
+            long idigit = PyLong_AsLong(stepval);
+            if (unlikely(idigit < 0)) goto done;
+            remaining_bits = ((int) sizeof(unsigned int) * 8) - bits - (is_unsigned ? 0 : 1);
+            if (unlikely(idigit >= (1L << remaining_bits)))
+                goto raise_overflow;
+            val |= ((unsigned int) idigit) << bits;
+        }
+        if (!is_unsigned) {
+            if (unlikely(val & (((unsigned int) 1) << (sizeof(unsigned int) * 8 - 1))))
+                goto raise_overflow;
+            if (is_negative)
+                val = ~val;
+        }
+        ret = 0;
+    done:
+        Py_XDECREF(shift);
+        Py_XDECREF(mask);
+        Py_XDECREF(stepval);
+#endif
+        if (unlikely(ret))
+            return (unsigned int) -1;
+        return val;
+    }
+raise_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "value too large to convert to unsigned int");
+    return (unsigned int) -1;
+raise_neg_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "can't convert negative value to unsigned int");
+    return (unsigned int) -1;
+}
+
+/* CIntToPy */
+  static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const long neg_one = (long) -1, const_zero = (long) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+    if (is_unsigned) {
+        if (sizeof(long) < sizeof(long)) {
+            return PyInt_FromLong((long) value);
+        } else if (sizeof(long) <= sizeof(unsigned long)) {
+            return PyLong_FromUnsignedLong((unsigned long) value);
+#ifdef HAVE_LONG_LONG
+        } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) {
+            return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
+#endif
+        }
+    } else {
+        if (sizeof(long) <= sizeof(long)) {
+            return PyInt_FromLong((long) value);
+#ifdef HAVE_LONG_LONG
+        } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) {
+            return PyLong_FromLongLong((PY_LONG_LONG) value);
+#endif
+        }
+    }
+    {
+        unsigned char *bytes = (unsigned char *)&value;
+#if !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX >= 0x030d00A4
+        if (is_unsigned) {
+            return PyLong_FromUnsignedNativeBytes(bytes, sizeof(value), -1);
+        } else {
+            return PyLong_FromNativeBytes(bytes, sizeof(value), -1);
+        }
+#elif !CYTHON_COMPILING_IN_LIMITED_API && PY_VERSION_HEX < 0x030d0000
+        int one = 1; int little = (int)*(unsigned char *)&one;
+        return _PyLong_FromByteArray(bytes, sizeof(long),
+                                     little, !is_unsigned);
+#else
+        int one = 1; int little = (int)*(unsigned char *)&one;
+        PyObject *from_bytes, *result = NULL;
+        PyObject *py_bytes = NULL, *arg_tuple = NULL, *kwds = NULL, *order_str = NULL;
+        from_bytes = PyObject_GetAttrString((PyObject*)&PyLong_Type, "from_bytes");
+        if (!from_bytes) return NULL;
+        py_bytes = PyBytes_FromStringAndSize((char*)bytes, sizeof(long));
+        if (!py_bytes) goto limited_bad;
+        order_str = PyUnicode_FromString(little ? "little" : "big");
+        if (!order_str) goto limited_bad;
+        arg_tuple = PyTuple_Pack(2, py_bytes, order_str);
+        if (!arg_tuple) goto limited_bad;
+        if (!is_unsigned) {
+            kwds = PyDict_New();
+            if (!kwds) goto limited_bad;
+            if (PyDict_SetItemString(kwds, "signed", __Pyx_NewRef(Py_True))) goto limited_bad;
+        }
+        result = PyObject_Call(from_bytes, arg_tuple, kwds);
+        limited_bad:
+        Py_XDECREF(kwds);
+        Py_XDECREF(arg_tuple);
+        Py_XDECREF(order_str);
+        Py_XDECREF(py_bytes);
+        Py_XDECREF(from_bytes);
+        return result;
+#endif
+    }
+}
+
+/* FormatTypeName */
+  #if CYTHON_COMPILING_IN_LIMITED_API
+static __Pyx_TypeName
+__Pyx_PyType_GetName(PyTypeObject* tp)
+{
+    PyObject *name = __Pyx_PyObject_GetAttrStr((PyObject *)tp,
+                                               __pyx_n_s_name);
+    if (unlikely(name == NULL) || unlikely(!PyUnicode_Check(name))) {
+        PyErr_Clear();
+        Py_XDECREF(name);
+        name = __Pyx_NewRef(__pyx_n_s__13);
+    }
+    return name;
+}
+#endif
+
+/* CIntFromPy */
+  static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const long neg_one = (long) -1, const_zero = (long) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_Check(x))) {
+        if ((sizeof(long) < sizeof(long))) {
+            __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x))
+        } else {
+            long val = PyInt_AS_LONG(x);
+            if (is_unsigned && unlikely(val < 0)) {
+                goto raise_neg_overflow;
+            }
+            return (long) val;
+        }
+    }
+#endif
+    if (unlikely(!PyLong_Check(x))) {
+        long val;
+        PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
+        if (!tmp) return (long) -1;
+        val = __Pyx_PyInt_As_long(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+    if (is_unsigned) {
+#if CYTHON_USE_PYLONG_INTERNALS
+        if (unlikely(__Pyx_PyLong_IsNeg(x))) {
+            goto raise_neg_overflow;
+        } else if (__Pyx_PyLong_IsCompact(x)) {
+            __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x))
+        } else {
+            const digit* digits = __Pyx_PyLong_Digits(x);
+            assert(__Pyx_PyLong_DigitCount(x) > 1);
+            switch (__Pyx_PyLong_DigitCount(x)) {
+                case 2:
+                    if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(long) >= 2 * PyLong_SHIFT)) {
+                            return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+                        }
+                    }
+                    break;
+                case 3:
+                    if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(long) >= 3 * PyLong_SHIFT)) {
+                            return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+                        }
+                    }
+                    break;
+                case 4:
+                    if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(long) >= 4 * PyLong_SHIFT)) {
+                            return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+                        }
+                    }
+                    break;
+            }
+        }
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7
+        if (unlikely(Py_SIZE(x) < 0)) {
+            goto raise_neg_overflow;
+        }
+#else
+        {
+            int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+            if (unlikely(result < 0))
+                return (long) -1;
+            if (unlikely(result == 1))
+                goto raise_neg_overflow;
+        }
+#endif
+        if ((sizeof(long) <= sizeof(unsigned long))) {
+            __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x))
+#ifdef HAVE_LONG_LONG
+        } else if ((sizeof(long) <= sizeof(unsigned PY_LONG_LONG))) {
+            __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
+#endif
+        }
+    } else {
+#if CYTHON_USE_PYLONG_INTERNALS
+        if (__Pyx_PyLong_IsCompact(x)) {
+            __PYX_VERIFY_RETURN_INT(long, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x))
+        } else {
+            const digit* digits = __Pyx_PyLong_Digits(x);
+            assert(__Pyx_PyLong_DigitCount(x) > 1);
+            switch (__Pyx_PyLong_SignedDigitCount(x)) {
+                case -2:
+                    if ((8 * sizeof(long) - 1 > 1 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) {
+                            return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case 2:
+                    if ((8 * sizeof(long) > 1 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) {
+                            return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case -3:
+                    if ((8 * sizeof(long) - 1 > 2 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) {
+                            return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case 3:
+                    if ((8 * sizeof(long) > 2 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) {
+                            return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case -4:
+                    if ((8 * sizeof(long) - 1 > 3 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) {
+                            return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case 4:
+                    if ((8 * sizeof(long) > 3 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(long) - 1 > 4 * PyLong_SHIFT)) {
+                            return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+            }
+        }
+#endif
+        if ((sizeof(long) <= sizeof(long))) {
+            __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x))
+#ifdef HAVE_LONG_LONG
+        } else if ((sizeof(long) <= sizeof(PY_LONG_LONG))) {
+            __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x))
+#endif
+        }
+    }
+    {
+        long val;
+        int ret = -1;
+#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API
+        Py_ssize_t bytes_copied = PyLong_AsNativeBytes(
+            x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0));
+        if (unlikely(bytes_copied == -1)) {
+        } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) {
+            goto raise_overflow;
+        } else {
+            ret = 0;
+        }
+#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray)
+        int one = 1; int is_little = (int)*(unsigned char *)&one;
+        unsigned char *bytes = (unsigned char *)&val;
+        ret = _PyLong_AsByteArray((PyLongObject *)x,
+                                    bytes, sizeof(val),
+                                    is_little, !is_unsigned);
+#else
+        PyObject *v;
+        PyObject *stepval = NULL, *mask = NULL, *shift = NULL;
+        int bits, remaining_bits, is_negative = 0;
+        int chunk_size = (sizeof(long) < 8) ? 30 : 62;
+        if (likely(PyLong_CheckExact(x))) {
+            v = __Pyx_NewRef(x);
+        } else {
+            v = PyNumber_Long(x);
+            if (unlikely(!v)) return (long) -1;
+            assert(PyLong_CheckExact(v));
+        }
+        {
+            int result = PyObject_RichCompareBool(v, Py_False, Py_LT);
+            if (unlikely(result < 0)) {
+                Py_DECREF(v);
+                return (long) -1;
+            }
+            is_negative = result == 1;
+        }
+        if (is_unsigned && unlikely(is_negative)) {
+            Py_DECREF(v);
+            goto raise_neg_overflow;
+        } else if (is_negative) {
+            stepval = PyNumber_Invert(v);
+            Py_DECREF(v);
+            if (unlikely(!stepval))
+                return (long) -1;
+        } else {
+            stepval = v;
+        }
+        v = NULL;
+        val = (long) 0;
+        mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done;
+        shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done;
+        for (bits = 0; bits < (int) sizeof(long) * 8 - chunk_size; bits += chunk_size) {
+            PyObject *tmp, *digit;
+            long idigit;
+            digit = PyNumber_And(stepval, mask);
+            if (unlikely(!digit)) goto done;
+            idigit = PyLong_AsLong(digit);
+            Py_DECREF(digit);
+            if (unlikely(idigit < 0)) goto done;
+            val |= ((long) idigit) << bits;
+            tmp = PyNumber_Rshift(stepval, shift);
+            if (unlikely(!tmp)) goto done;
+            Py_DECREF(stepval); stepval = tmp;
+        }
+        Py_DECREF(shift); shift = NULL;
+        Py_DECREF(mask); mask = NULL;
+        {
+            long idigit = PyLong_AsLong(stepval);
+            if (unlikely(idigit < 0)) goto done;
+            remaining_bits = ((int) sizeof(long) * 8) - bits - (is_unsigned ? 0 : 1);
+            if (unlikely(idigit >= (1L << remaining_bits)))
+                goto raise_overflow;
+            val |= ((long) idigit) << bits;
+        }
+        if (!is_unsigned) {
+            if (unlikely(val & (((long) 1) << (sizeof(long) * 8 - 1))))
+                goto raise_overflow;
+            if (is_negative)
+                val = ~val;
+        }
+        ret = 0;
+    done:
+        Py_XDECREF(shift);
+        Py_XDECREF(mask);
+        Py_XDECREF(stepval);
+#endif
+        if (unlikely(ret))
+            return (long) -1;
+        return val;
+    }
+raise_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "value too large to convert to long");
+    return (long) -1;
+raise_neg_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "can't convert negative value to long");
+    return (long) -1;
+}
+
+/* CIntFromPy */
+  static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) {
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#endif
+    const int neg_one = (int) -1, const_zero = (int) 0;
+#ifdef __Pyx_HAS_GCC_DIAGNOSTIC
+#pragma GCC diagnostic pop
+#endif
+    const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_Check(x))) {
+        if ((sizeof(int) < sizeof(long))) {
+            __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x))
+        } else {
+            long val = PyInt_AS_LONG(x);
+            if (is_unsigned && unlikely(val < 0)) {
+                goto raise_neg_overflow;
+            }
+            return (int) val;
+        }
+    }
+#endif
+    if (unlikely(!PyLong_Check(x))) {
+        int val;
+        PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
+        if (!tmp) return (int) -1;
+        val = __Pyx_PyInt_As_int(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+    if (is_unsigned) {
+#if CYTHON_USE_PYLONG_INTERNALS
+        if (unlikely(__Pyx_PyLong_IsNeg(x))) {
+            goto raise_neg_overflow;
+        } else if (__Pyx_PyLong_IsCompact(x)) {
+            __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_upylong, __Pyx_PyLong_CompactValueUnsigned(x))
+        } else {
+            const digit* digits = __Pyx_PyLong_Digits(x);
+            assert(__Pyx_PyLong_DigitCount(x) > 1);
+            switch (__Pyx_PyLong_DigitCount(x)) {
+                case 2:
+                    if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(int) >= 2 * PyLong_SHIFT)) {
+                            return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+                        }
+                    }
+                    break;
+                case 3:
+                    if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(int) >= 3 * PyLong_SHIFT)) {
+                            return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+                        }
+                    }
+                    break;
+                case 4:
+                    if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(int) >= 4 * PyLong_SHIFT)) {
+                            return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+                        }
+                    }
+                    break;
+            }
+        }
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX < 0x030C00A7
+        if (unlikely(Py_SIZE(x) < 0)) {
+            goto raise_neg_overflow;
+        }
+#else
+        {
+            int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+            if (unlikely(result < 0))
+                return (int) -1;
+            if (unlikely(result == 1))
+                goto raise_neg_overflow;
+        }
+#endif
+        if ((sizeof(int) <= sizeof(unsigned long))) {
+            __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x))
+#ifdef HAVE_LONG_LONG
+        } else if ((sizeof(int) <= sizeof(unsigned PY_LONG_LONG))) {
+            __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
+#endif
+        }
+    } else {
+#if CYTHON_USE_PYLONG_INTERNALS
+        if (__Pyx_PyLong_IsCompact(x)) {
+            __PYX_VERIFY_RETURN_INT(int, __Pyx_compact_pylong, __Pyx_PyLong_CompactValue(x))
+        } else {
+            const digit* digits = __Pyx_PyLong_Digits(x);
+            assert(__Pyx_PyLong_DigitCount(x) > 1);
+            switch (__Pyx_PyLong_SignedDigitCount(x)) {
+                case -2:
+                    if ((8 * sizeof(int) - 1 > 1 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) {
+                            return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case 2:
+                    if ((8 * sizeof(int) > 1 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 2 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) {
+                            return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case -3:
+                    if ((8 * sizeof(int) - 1 > 2 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) {
+                            return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case 3:
+                    if ((8 * sizeof(int) > 2 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 3 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) {
+                            return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case -4:
+                    if ((8 * sizeof(int) - 1 > 3 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) {
+                            return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case 4:
+                    if ((8 * sizeof(int) > 3 * PyLong_SHIFT)) {
+                        if ((8 * sizeof(unsigned long) > 4 * PyLong_SHIFT)) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if ((8 * sizeof(int) - 1 > 4 * PyLong_SHIFT)) {
+                            return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+            }
+        }
+#endif
+        if ((sizeof(int) <= sizeof(long))) {
+            __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x))
+#ifdef HAVE_LONG_LONG
+        } else if ((sizeof(int) <= sizeof(PY_LONG_LONG))) {
+            __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x))
+#endif
+        }
+    }
+    {
+        int val;
+        int ret = -1;
+#if PY_VERSION_HEX >= 0x030d00A6 && !CYTHON_COMPILING_IN_LIMITED_API
+        Py_ssize_t bytes_copied = PyLong_AsNativeBytes(
+            x, &val, sizeof(val), Py_ASNATIVEBYTES_NATIVE_ENDIAN | (is_unsigned ? Py_ASNATIVEBYTES_UNSIGNED_BUFFER | Py_ASNATIVEBYTES_REJECT_NEGATIVE : 0));
+        if (unlikely(bytes_copied == -1)) {
+        } else if (unlikely(bytes_copied > (Py_ssize_t) sizeof(val))) {
+            goto raise_overflow;
+        } else {
+            ret = 0;
+        }
+#elif PY_VERSION_HEX < 0x030d0000 && !(CYTHON_COMPILING_IN_PYPY || CYTHON_COMPILING_IN_LIMITED_API) || defined(_PyLong_AsByteArray)
+        int one = 1; int is_little = (int)*(unsigned char *)&one;
+        unsigned char *bytes = (unsigned char *)&val;
+        ret = _PyLong_AsByteArray((PyLongObject *)x,
+                                    bytes, sizeof(val),
+                                    is_little, !is_unsigned);
+#else
+        PyObject *v;
+        PyObject *stepval = NULL, *mask = NULL, *shift = NULL;
+        int bits, remaining_bits, is_negative = 0;
+        int chunk_size = (sizeof(long) < 8) ? 30 : 62;
+        if (likely(PyLong_CheckExact(x))) {
+            v = __Pyx_NewRef(x);
+        } else {
+            v = PyNumber_Long(x);
+            if (unlikely(!v)) return (int) -1;
+            assert(PyLong_CheckExact(v));
+        }
+        {
+            int result = PyObject_RichCompareBool(v, Py_False, Py_LT);
+            if (unlikely(result < 0)) {
+                Py_DECREF(v);
+                return (int) -1;
+            }
+            is_negative = result == 1;
+        }
+        if (is_unsigned && unlikely(is_negative)) {
+            Py_DECREF(v);
+            goto raise_neg_overflow;
+        } else if (is_negative) {
+            stepval = PyNumber_Invert(v);
+            Py_DECREF(v);
+            if (unlikely(!stepval))
+                return (int) -1;
+        } else {
+            stepval = v;
+        }
+        v = NULL;
+        val = (int) 0;
+        mask = PyLong_FromLong((1L << chunk_size) - 1); if (unlikely(!mask)) goto done;
+        shift = PyLong_FromLong(chunk_size); if (unlikely(!shift)) goto done;
+        for (bits = 0; bits < (int) sizeof(int) * 8 - chunk_size; bits += chunk_size) {
+            PyObject *tmp, *digit;
+            long idigit;
+            digit = PyNumber_And(stepval, mask);
+            if (unlikely(!digit)) goto done;
+            idigit = PyLong_AsLong(digit);
+            Py_DECREF(digit);
+            if (unlikely(idigit < 0)) goto done;
+            val |= ((int) idigit) << bits;
+            tmp = PyNumber_Rshift(stepval, shift);
+            if (unlikely(!tmp)) goto done;
+            Py_DECREF(stepval); stepval = tmp;
+        }
+        Py_DECREF(shift); shift = NULL;
+        Py_DECREF(mask); mask = NULL;
+        {
+            long idigit = PyLong_AsLong(stepval);
+            if (unlikely(idigit < 0)) goto done;
+            remaining_bits = ((int) sizeof(int) * 8) - bits - (is_unsigned ? 0 : 1);
+            if (unlikely(idigit >= (1L << remaining_bits)))
+                goto raise_overflow;
+            val |= ((int) idigit) << bits;
+        }
+        if (!is_unsigned) {
+            if (unlikely(val & (((int) 1) << (sizeof(int) * 8 - 1))))
+                goto raise_overflow;
+            if (is_negative)
+                val = ~val;
+        }
+        ret = 0;
+    done:
+        Py_XDECREF(shift);
+        Py_XDECREF(mask);
+        Py_XDECREF(stepval);
+#endif
+        if (unlikely(ret))
+            return (int) -1;
+        return val;
+    }
+raise_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "value too large to convert to int");
+    return (int) -1;
+raise_neg_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "can't convert negative value to int");
+    return (int) -1;
+}
+
+/* FastTypeChecks */
+  #if CYTHON_COMPILING_IN_CPYTHON
+static int __Pyx_InBases(PyTypeObject *a, PyTypeObject *b) {
+    while (a) {
+        a = __Pyx_PyType_GetSlot(a, tp_base, PyTypeObject*);
+        if (a == b)
+            return 1;
+    }
+    return b == &PyBaseObject_Type;
+}
+static CYTHON_INLINE int __Pyx_IsSubtype(PyTypeObject *a, PyTypeObject *b) {
+    PyObject *mro;
+    if (a == b) return 1;
+    mro = a->tp_mro;
+    if (likely(mro)) {
+        Py_ssize_t i, n;
+        n = PyTuple_GET_SIZE(mro);
+        for (i = 0; i < n; i++) {
+            if (PyTuple_GET_ITEM(mro, i) == (PyObject *)b)
+                return 1;
+        }
+        return 0;
+    }
+    return __Pyx_InBases(a, b);
+}
+static CYTHON_INLINE int __Pyx_IsAnySubtype2(PyTypeObject *cls, PyTypeObject *a, PyTypeObject *b) {
+    PyObject *mro;
+    if (cls == a || cls == b) return 1;
+    mro = cls->tp_mro;
+    if (likely(mro)) {
+        Py_ssize_t i, n;
+        n = PyTuple_GET_SIZE(mro);
+        for (i = 0; i < n; i++) {
+            PyObject *base = PyTuple_GET_ITEM(mro, i);
+            if (base == (PyObject *)a || base == (PyObject *)b)
+                return 1;
+        }
+        return 0;
+    }
+    return __Pyx_InBases(cls, a) || __Pyx_InBases(cls, b);
+}
+#if PY_MAJOR_VERSION == 2
+static int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject* exc_type2) {
+    PyObject *exception, *value, *tb;
+    int res;
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ErrFetch(&exception, &value, &tb);
+    res = exc_type1 ? PyObject_IsSubclass(err, exc_type1) : 0;
+    if (unlikely(res == -1)) {
+        PyErr_WriteUnraisable(err);
+        res = 0;
+    }
+    if (!res) {
+        res = PyObject_IsSubclass(err, exc_type2);
+        if (unlikely(res == -1)) {
+            PyErr_WriteUnraisable(err);
+            res = 0;
+        }
+    }
+    __Pyx_ErrRestore(exception, value, tb);
+    return res;
+}
+#else
+static CYTHON_INLINE int __Pyx_inner_PyErr_GivenExceptionMatches2(PyObject *err, PyObject* exc_type1, PyObject *exc_type2) {
+    if (exc_type1) {
+        return __Pyx_IsAnySubtype2((PyTypeObject*)err, (PyTypeObject*)exc_type1, (PyTypeObject*)exc_type2);
+    } else {
+        return __Pyx_IsSubtype((PyTypeObject*)err, (PyTypeObject*)exc_type2);
+    }
+}
+#endif
+static int __Pyx_PyErr_GivenExceptionMatchesTuple(PyObject *exc_type, PyObject *tuple) {
+    Py_ssize_t i, n;
+    assert(PyExceptionClass_Check(exc_type));
+    n = PyTuple_GET_SIZE(tuple);
+#if PY_MAJOR_VERSION >= 3
+    for (i=0; i<n; i++) {
+        if (exc_type == PyTuple_GET_ITEM(tuple, i)) return 1;
+    }
+#endif
+    for (i=0; i<n; i++) {
+        PyObject *t = PyTuple_GET_ITEM(tuple, i);
+        #if PY_MAJOR_VERSION < 3
+        if (likely(exc_type == t)) return 1;
+        #endif
+        if (likely(PyExceptionClass_Check(t))) {
+            if (__Pyx_inner_PyErr_GivenExceptionMatches2(exc_type, NULL, t)) return 1;
+        } else {
+        }
+    }
+    return 0;
+}
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches(PyObject *err, PyObject* exc_type) {
+    if (likely(err == exc_type)) return 1;
+    if (likely(PyExceptionClass_Check(err))) {
+        if (likely(PyExceptionClass_Check(exc_type))) {
+            return __Pyx_inner_PyErr_GivenExceptionMatches2(err, NULL, exc_type);
+        } else if (likely(PyTuple_Check(exc_type))) {
+            return __Pyx_PyErr_GivenExceptionMatchesTuple(err, exc_type);
+        } else {
+        }
+    }
+    return PyErr_GivenExceptionMatches(err, exc_type);
+}
+static CYTHON_INLINE int __Pyx_PyErr_GivenExceptionMatches2(PyObject *err, PyObject *exc_type1, PyObject *exc_type2) {
+    assert(PyExceptionClass_Check(exc_type1));
+    assert(PyExceptionClass_Check(exc_type2));
+    if (likely(err == exc_type1 || err == exc_type2)) return 1;
+    if (likely(PyExceptionClass_Check(err))) {
+        return __Pyx_inner_PyErr_GivenExceptionMatches2(err, exc_type1, exc_type2);
+    }
+    return (PyErr_GivenExceptionMatches(err, exc_type1) || PyErr_GivenExceptionMatches(err, exc_type2));
+}
+#endif
+
+/* CheckBinaryVersion */
+  static unsigned long __Pyx_get_runtime_version(void) {
+#if __PYX_LIMITED_VERSION_HEX >= 0x030B00A4
+    return Py_Version & ~0xFFUL;
+#else
+    const char* rt_version = Py_GetVersion();
+    unsigned long version = 0;
+    unsigned long factor = 0x01000000UL;
+    unsigned int digit = 0;
+    int i = 0;
+    while (factor) {
+        while ('0' <= rt_version[i] && rt_version[i] <= '9') {
+            digit = digit * 10 + (unsigned int) (rt_version[i] - '0');
+            ++i;
+        }
+        version += factor * digit;
+        if (rt_version[i] != '.')
+            break;
+        digit = 0;
+        factor >>= 8;
+        ++i;
+    }
+    return version;
+#endif
+}
+static int __Pyx_check_binary_version(unsigned long ct_version, unsigned long rt_version, int allow_newer) {
+    const unsigned long MAJOR_MINOR = 0xFFFF0000UL;
+    if ((rt_version & MAJOR_MINOR) == (ct_version & MAJOR_MINOR))
+        return 0;
+    if (likely(allow_newer && (rt_version & MAJOR_MINOR) > (ct_version & MAJOR_MINOR)))
+        return 1;
+    {
+        char message[200];
+        PyOS_snprintf(message, sizeof(message),
+                      "compile time Python version %d.%d "
+                      "of module '%.100s' "
+                      "%s "
+                      "runtime version %d.%d",
+                       (int) (ct_version >> 24), (int) ((ct_version >> 16) & 0xFF),
+                       __Pyx_MODULE_NAME,
+                       (allow_newer) ? "was newer than" : "does not match",
+                       (int) (rt_version >> 24), (int) ((rt_version >> 16) & 0xFF)
+       );
+        return PyErr_WarnEx(NULL, message, 1);
+    }
+}
+
+/* InitStrings */
+  #if PY_MAJOR_VERSION >= 3
+static int __Pyx_InitString(__Pyx_StringTabEntry t, PyObject **str) {
+    if (t.is_unicode | t.is_str) {
+        if (t.intern) {
+            *str = PyUnicode_InternFromString(t.s);
+        } else if (t.encoding) {
+            *str = PyUnicode_Decode(t.s, t.n - 1, t.encoding, NULL);
+        } else {
+            *str = PyUnicode_FromStringAndSize(t.s, t.n - 1);
+        }
+    } else {
+        *str = PyBytes_FromStringAndSize(t.s, t.n - 1);
+    }
+    if (!*str)
+        return -1;
+    if (PyObject_Hash(*str) == -1)
+        return -1;
+    return 0;
+}
+#endif
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
+    while (t->p) {
+        #if PY_MAJOR_VERSION >= 3
+        __Pyx_InitString(*t, t->p);
+        #else
+        if (t->is_unicode) {
+            *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
+        } else if (t->intern) {
+            *t->p = PyString_InternFromString(t->s);
+        } else {
+            *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
+        }
+        if (!*t->p)
+            return -1;
+        if (PyObject_Hash(*t->p) == -1)
+            return -1;
+        #endif
+        ++t;
+    }
+    return 0;
+}
+
+#include <string.h>
+static CYTHON_INLINE Py_ssize_t __Pyx_ssize_strlen(const char *s) {
+    size_t len = strlen(s);
+    if (unlikely(len > (size_t) PY_SSIZE_T_MAX)) {
+        PyErr_SetString(PyExc_OverflowError, "byte string is too long");
+        return -1;
+    }
+    return (Py_ssize_t) len;
+}
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) {
+    Py_ssize_t len = __Pyx_ssize_strlen(c_str);
+    if (unlikely(len < 0)) return NULL;
+    return __Pyx_PyUnicode_FromStringAndSize(c_str, len);
+}
+static CYTHON_INLINE PyObject* __Pyx_PyByteArray_FromString(const char* c_str) {
+    Py_ssize_t len = __Pyx_ssize_strlen(c_str);
+    if (unlikely(len < 0)) return NULL;
+    return PyByteArray_FromStringAndSize(c_str, len);
+}
+static CYTHON_INLINE const char* __Pyx_PyObject_AsString(PyObject* o) {
+    Py_ssize_t ignore;
+    return __Pyx_PyObject_AsStringAndSize(o, &ignore);
+}
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+#if !CYTHON_PEP393_ENABLED
+static const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
+    char* defenc_c;
+    PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL);
+    if (!defenc) return NULL;
+    defenc_c = PyBytes_AS_STRING(defenc);
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+    {
+        char* end = defenc_c + PyBytes_GET_SIZE(defenc);
+        char* c;
+        for (c = defenc_c; c < end; c++) {
+            if ((unsigned char) (*c) >= 128) {
+                PyUnicode_AsASCIIString(o);
+                return NULL;
+            }
+        }
+    }
+#endif
+    *length = PyBytes_GET_SIZE(defenc);
+    return defenc_c;
+}
+#else
+static CYTHON_INLINE const char* __Pyx_PyUnicode_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
+    if (unlikely(__Pyx_PyUnicode_READY(o) == -1)) return NULL;
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+    if (likely(PyUnicode_IS_ASCII(o))) {
+        *length = PyUnicode_GET_LENGTH(o);
+        return PyUnicode_AsUTF8(o);
+    } else {
+        PyUnicode_AsASCIIString(o);
+        return NULL;
+    }
+#else
+    return PyUnicode_AsUTF8AndSize(o, length);
+#endif
+}
+#endif
+#endif
+static CYTHON_INLINE const char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+    if (
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+            __Pyx_sys_getdefaultencoding_not_ascii &&
+#endif
+            PyUnicode_Check(o)) {
+        return __Pyx_PyUnicode_AsStringAndSize(o, length);
+    } else
+#endif
+#if (!CYTHON_COMPILING_IN_PYPY && !CYTHON_COMPILING_IN_LIMITED_API) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE))
+    if (PyByteArray_Check(o)) {
+        *length = PyByteArray_GET_SIZE(o);
+        return PyByteArray_AS_STRING(o);
+    } else
+#endif
+    {
+        char* result;
+        int r = PyBytes_AsStringAndSize(o, &result, length);
+        if (unlikely(r < 0)) {
+            return NULL;
+        } else {
+            return result;
+        }
+    }
+}
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
+   int is_true = x == Py_True;
+   if (is_true | (x == Py_False) | (x == Py_None)) return is_true;
+   else return PyObject_IsTrue(x);
+}
+static CYTHON_INLINE int __Pyx_PyObject_IsTrueAndDecref(PyObject* x) {
+    int retval;
+    if (unlikely(!x)) return -1;
+    retval = __Pyx_PyObject_IsTrue(x);
+    Py_DECREF(x);
+    return retval;
+}
+static PyObject* __Pyx_PyNumber_IntOrLongWrongResultType(PyObject* result, const char* type_name) {
+    __Pyx_TypeName result_type_name = __Pyx_PyType_GetName(Py_TYPE(result));
+#if PY_MAJOR_VERSION >= 3
+    if (PyLong_Check(result)) {
+        if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+                "__int__ returned non-int (type " __Pyx_FMT_TYPENAME ").  "
+                "The ability to return an instance of a strict subclass of int is deprecated, "
+                "and may be removed in a future version of Python.",
+                result_type_name)) {
+            __Pyx_DECREF_TypeName(result_type_name);
+            Py_DECREF(result);
+            return NULL;
+        }
+        __Pyx_DECREF_TypeName(result_type_name);
+        return result;
+    }
+#endif
+    PyErr_Format(PyExc_TypeError,
+                 "__%.4s__ returned non-%.4s (type " __Pyx_FMT_TYPENAME ")",
+                 type_name, type_name, result_type_name);
+    __Pyx_DECREF_TypeName(result_type_name);
+    Py_DECREF(result);
+    return NULL;
+}
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) {
+#if CYTHON_USE_TYPE_SLOTS
+  PyNumberMethods *m;
+#endif
+  const char *name = NULL;
+  PyObject *res = NULL;
+#if PY_MAJOR_VERSION < 3
+  if (likely(PyInt_Check(x) || PyLong_Check(x)))
+#else
+  if (likely(PyLong_Check(x)))
+#endif
+    return __Pyx_NewRef(x);
+#if CYTHON_USE_TYPE_SLOTS
+  m = Py_TYPE(x)->tp_as_number;
+  #if PY_MAJOR_VERSION < 3
+  if (m && m->nb_int) {
+    name = "int";
+    res = m->nb_int(x);
+  }
+  else if (m && m->nb_long) {
+    name = "long";
+    res = m->nb_long(x);
+  }
+  #else
+  if (likely(m && m->nb_int)) {
+    name = "int";
+    res = m->nb_int(x);
+  }
+  #endif
+#else
+  if (!PyBytes_CheckExact(x) && !PyUnicode_CheckExact(x)) {
+    res = PyNumber_Int(x);
+  }
+#endif
+  if (likely(res)) {
+#if PY_MAJOR_VERSION < 3
+    if (unlikely(!PyInt_Check(res) && !PyLong_Check(res))) {
+#else
+    if (unlikely(!PyLong_CheckExact(res))) {
+#endif
+        return __Pyx_PyNumber_IntOrLongWrongResultType(res, name);
+    }
+  }
+  else if (!PyErr_Occurred()) {
+    PyErr_SetString(PyExc_TypeError,
+                    "an integer is required");
+  }
+  return res;
+}
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
+  Py_ssize_t ival;
+  PyObject *x;
+#if PY_MAJOR_VERSION < 3
+  if (likely(PyInt_CheckExact(b))) {
+    if (sizeof(Py_ssize_t) >= sizeof(long))
+        return PyInt_AS_LONG(b);
+    else
+        return PyInt_AsSsize_t(b);
+  }
+#endif
+  if (likely(PyLong_CheckExact(b))) {
+    #if CYTHON_USE_PYLONG_INTERNALS
+    if (likely(__Pyx_PyLong_IsCompact(b))) {
+        return __Pyx_PyLong_CompactValue(b);
+    } else {
+      const digit* digits = __Pyx_PyLong_Digits(b);
+      const Py_ssize_t size = __Pyx_PyLong_SignedDigitCount(b);
+      switch (size) {
+         case 2:
+           if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) {
+             return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case -2:
+           if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) {
+             return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case 3:
+           if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) {
+             return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case -3:
+           if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) {
+             return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case 4:
+           if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) {
+             return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case -4:
+           if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) {
+             return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+      }
+    }
+    #endif
+    return PyLong_AsSsize_t(b);
+  }
+  x = PyNumber_Index(b);
+  if (!x) return -1;
+  ival = PyInt_AsSsize_t(x);
+  Py_DECREF(x);
+  return ival;
+}
+static CYTHON_INLINE Py_hash_t __Pyx_PyIndex_AsHash_t(PyObject* o) {
+  if (sizeof(Py_hash_t) == sizeof(Py_ssize_t)) {
+    return (Py_hash_t) __Pyx_PyIndex_AsSsize_t(o);
+#if PY_MAJOR_VERSION < 3
+  } else if (likely(PyInt_CheckExact(o))) {
+    return PyInt_AS_LONG(o);
+#endif
+  } else {
+    Py_ssize_t ival;
+    PyObject *x;
+    x = PyNumber_Index(o);
+    if (!x) return -1;
+    ival = PyInt_AsLong(x);
+    Py_DECREF(x);
+    return ival;
+  }
+}
+static CYTHON_INLINE PyObject * __Pyx_PyBool_FromLong(long b) {
+  return b ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False);
+}
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
+    return PyInt_FromSize_t(ival);
+}
+
+
+/* #### Code section: utility_code_pragmas_end ### */
+#ifdef _MSC_VER
+#pragma warning( pop )
+#endif
+
+
+
+/* #### Code section: end ### */
+#endif /* Py_PYTHON_H */
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.cpython-37m-x86_64-linux-gnu.so b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.cpython-37m-x86_64-linux-gnu.so
new file mode 100755
index 0000000000000000000000000000000000000000..51798ca653806dc7b14d5683cbeab5596a3d24df
Binary files /dev/null and b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.cpython-37m-x86_64-linux-gnu.so differ
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.pyx b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.pyx
new file mode 100644
index 0000000000000000000000000000000000000000..cd543ce481e3da6a5888bf2a64f8ce5c86e28e90
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_overlaps.pyx
@@ -0,0 +1,147 @@
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Sergey Karayev
+# --------------------------------------------------------
+
+import numpy as np
+cimport numpy as np
+from cython.parallel import prange, parallel
+
+
+DTYPE = np.float32
+ctypedef float DTYPE_t
+
+
+def bbox_overlaps(
+        np.ndarray[DTYPE_t, ndim=2] boxes,
+        np.ndarray[DTYPE_t, ndim=2] query_boxes):
+    """
+    Parameters
+    ----------
+    boxes: (N, 4) ndarray of float
+    query_boxes: (K, 4) ndarray of float
+    Returns
+    -------
+    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
+    """
+    cdef unsigned int N = boxes.shape[0]
+    cdef unsigned int K = query_boxes.shape[0]
+    cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
+    cdef DTYPE_t iw, ih, box_area
+    cdef DTYPE_t ua
+    cdef unsigned int k, n
+    for k in range(K):
+        box_area = (
+            (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
+            (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+        )
+        for n in range(N):
+            iw = (
+                min(boxes[n, 2], query_boxes[k, 2]) -
+                max(boxes[n, 0], query_boxes[k, 0]) + 1
+            )
+            if iw > 0:
+                ih = (
+                    min(boxes[n, 3], query_boxes[k, 3]) -
+                    max(boxes[n, 1], query_boxes[k, 1]) + 1
+                )
+                if ih > 0:
+                    ua = float(
+                        (boxes[n, 2] - boxes[n, 0] + 1) *
+                        (boxes[n, 3] - boxes[n, 1] + 1) +
+                        box_area - iw * ih
+                    )
+                    overlaps[n, k] = iw * ih / ua
+    return overlaps
+
+def bbox_intersections(
+        np.ndarray[DTYPE_t, ndim=2] boxes,
+        np.ndarray[DTYPE_t, ndim=2] query_boxes):
+    """
+    For each query box compute the intersection ratio covered by boxes
+    ----------
+    Parameters
+    ----------
+    boxes: (N, 4) ndarray of float
+    query_boxes: (K, 4) ndarray of float
+    Returns
+    -------
+    overlaps: (N, K) ndarray of intersec between boxes and query_boxes
+    """
+    cdef unsigned int N = boxes.shape[0]
+    cdef unsigned int K = query_boxes.shape[0]
+    cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
+    cdef DTYPE_t iw, ih, box_area
+    cdef DTYPE_t ua
+    cdef unsigned int k, n
+    for k in range(K):
+        box_area = (
+            (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
+            (query_boxes[k, 3] - query_boxes[k, 1] + 1)
+        )
+        for n in range(N):
+            iw = (
+                min(boxes[n, 2], query_boxes[k, 2]) -
+                max(boxes[n, 0], query_boxes[k, 0]) + 1
+            )
+            if iw > 0:
+                ih = (
+                    min(boxes[n, 3], query_boxes[k, 3]) -
+                    max(boxes[n, 1], query_boxes[k, 1]) + 1
+                )
+                if ih > 0:
+                    intersec[n, k] = iw * ih / box_area
+    return intersec
+
+# Compute bounding box voting
+def box_vote(
+        np.ndarray[float, ndim=2] dets_NMS,
+        np.ndarray[float, ndim=2] dets_all):
+    cdef np.ndarray[float, ndim=2] dets_voted = np.zeros((dets_NMS.shape[0], dets_NMS.shape[1]), dtype=np.float32)
+    cdef unsigned int N = dets_NMS.shape[0]
+    cdef unsigned int M = dets_all.shape[0]
+
+    cdef np.ndarray[float, ndim=1] det
+    cdef np.ndarray[float, ndim=1] acc_box
+    cdef float acc_score
+
+    cdef np.ndarray[float, ndim=1] det2
+    cdef float bi0, bi1, bit2, bi3
+    cdef float iw, ih, ua
+
+    cdef float thresh=0.5
+
+    for i in range(N):
+        det = dets_NMS[i, :]
+        acc_box = np.zeros((4), dtype=np.float32)
+        acc_score = 0.0
+
+        for m in range(M):
+            det2 = dets_all[m, :]
+
+            bi0 = max(det[0], det2[0])
+            bi1 = max(det[1], det2[1])
+            bi2 = min(det[2], det2[2])
+            bi3 = min(det[3], det2[3])
+
+            iw = bi2 - bi0 + 1
+            ih = bi3 - bi1 + 1
+
+            if not (iw > 0 and ih > 0):
+                continue
+
+            ua = (det[2] - det[0] + 1) * (det[3] - det[1] + 1) + (det2[2] - det2[0] + 1) * (det2[3] - det2[1] + 1) - iw * ih
+            ov = iw * ih / ua
+
+            if (ov < thresh):
+                continue
+
+            acc_box += det2[4] * det2[0:4]
+            acc_score += det2[4]
+
+        dets_voted[i][0:4] = acc_box / acc_score
+        dets_voted[i][4] = det[4]       # Keep the original score
+
+    return dets_voted
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ea755cef42367ac9cbdbf51e5837a53cc914e05
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/box_utils.py
@@ -0,0 +1,1338 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, Hao Xiang <haxiang@g.ucla.edu>,
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+"""
+Bounding box related utility functions
+"""
+import sys
+
+import numpy as np
+
+import torch
+import torch.nn.functional as F
+import opencood.utils.common_utils as common_utils
+from opencood.utils.transformation_utils import x1_to_x2, x_to_world
+from pyquaternion import Quaternion
+import copy
+
+
+def corner_to_center_torch(corner3d, order='lwh'):
+    corner3d_ = corner3d.cpu().numpy()
+    return torch.from_numpy(corner_to_center(corner3d_, order)).to(corner3d.device)
+
+def corner_to_center(corner3d, order='lwh'):
+    """
+    Convert 8 corners to x, y, z, dx, dy, dz, yaw.
+    yaw in radians
+
+    Parameters
+    ----------
+    corner3d : np.ndarray
+        (N, 8, 3)
+
+    order : str, for output.
+        'lwh' or 'hwl'
+
+    Returns
+    -------
+    box3d : np.ndarray
+        (N, 7)
+    """
+    assert corner3d.ndim == 3
+    batch_size = corner3d.shape[0]
+
+    xyz = np.mean(corner3d[:, [0, 3, 5, 6], :], axis=1)
+    h = abs(np.mean(corner3d[:, 4:, 2] - corner3d[:, :4, 2], axis=1,
+                    keepdims=True))
+    l = (np.sqrt(np.sum((corner3d[:, 0, [0, 1]] - corner3d[:, 3, [0, 1]]) ** 2,
+                        axis=1, keepdims=True)) +
+         np.sqrt(np.sum((corner3d[:, 2, [0, 1]] - corner3d[:, 1, [0, 1]]) ** 2,
+                        axis=1, keepdims=True)) +
+         np.sqrt(np.sum((corner3d[:, 4, [0, 1]] - corner3d[:, 7, [0, 1]]) ** 2,
+                        axis=1, keepdims=True)) +
+         np.sqrt(np.sum((corner3d[:, 5, [0, 1]] - corner3d[:, 6, [0, 1]]) ** 2,
+                        axis=1, keepdims=True))) / 4
+
+    w = (np.sqrt(
+        np.sum((corner3d[:, 0, [0, 1]] - corner3d[:, 1, [0, 1]]) ** 2, axis=1,
+               keepdims=True)) +
+         np.sqrt(np.sum((corner3d[:, 2, [0, 1]] - corner3d[:, 3, [0, 1]]) ** 2,
+                        axis=1, keepdims=True)) +
+         np.sqrt(np.sum((corner3d[:, 4, [0, 1]] - corner3d[:, 5, [0, 1]]) ** 2,
+                        axis=1, keepdims=True)) +
+         np.sqrt(np.sum((corner3d[:, 6, [0, 1]] - corner3d[:, 7, [0, 1]]) ** 2,
+                        axis=1, keepdims=True))) / 4
+
+    theta = (np.arctan2(corner3d[:, 1, 1] - corner3d[:, 2, 1],
+                        corner3d[:, 1, 0] - corner3d[:, 2, 0]) +
+             np.arctan2(corner3d[:, 0, 1] - corner3d[:, 3, 1],
+                        corner3d[:, 0, 0] - corner3d[:, 3, 0]) +
+             np.arctan2(corner3d[:, 5, 1] - corner3d[:, 6, 1],
+                        corner3d[:, 5, 0] - corner3d[:, 6, 0]) +
+             np.arctan2(corner3d[:, 4, 1] - corner3d[:, 7, 1],
+                        corner3d[:, 4, 0] - corner3d[:, 7, 0]))[:,
+            np.newaxis] / 4
+
+    if order == 'lwh':
+        return np.concatenate([xyz, l, w, h, theta], axis=1).reshape(
+            batch_size, 7)
+    elif order == 'hwl':
+        return np.concatenate([xyz, h, w, l, theta], axis=1).reshape(
+            batch_size, 7)
+    else:
+        sys.exit('Unknown order')
+
+
+def boxes_to_corners2d(boxes3d, order):
+    """
+      0 -------- 1
+      |          |
+      |          |
+      |          |
+      3 -------- 2
+    Parameters
+    __________
+    boxes3d: np.ndarray or torch.Tensor
+        (N, 7) [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center.
+
+    order : str
+        'lwh' or 'hwl'
+
+    Returns:
+        corners2d: np.ndarray or torch.Tensor
+        (N, 4, 3), the 4 corners of the bounding box.
+
+    """
+    corners3d = boxes_to_corners_3d(boxes3d, order)
+    corners2d = corners3d[:, :4, :]
+    return corners2d
+
+
+def boxes2d_to_corners2d(boxes2d, order="lwh"):
+    """
+      0 -------- 1
+      |          |
+      |          |
+      |          |
+      3 -------- 2
+    Parameters
+    __________
+    boxes2d: np.ndarray or torch.Tensor
+        (..., 5) [x, y, dx, dy, heading], (x, y) is the box center.
+
+    order : str
+        'lwh' or 'hwl'
+
+    Returns:
+        corners2d: np.ndarray or torch.Tensor
+        (..., 4, 2), the 4 corners of the bounding box.
+
+    """
+    assert order == "lwh", \
+        "boxes2d_to_corners_2d only supports lwh order for now."
+    boxes2d, is_numpy = common_utils.check_numpy_to_torch(boxes2d)
+    template = boxes2d.new_tensor((
+        [1, -1], [1, 1], [-1, 1], [-1, -1]
+    )) / 2
+    input_shape = boxes2d.shape
+    boxes2d = boxes2d.view(-1, 5)
+    corners2d = boxes2d[:, None, 2:4].repeat(1, 4, 1) * template[None, :, :]
+    corners2d = common_utils.rotate_points_along_z_2d(corners2d.view(-1, 2),
+                                                      boxes2d[:,
+                                                      4].repeat_interleave(
+                                                          4)).view(-1, 4,
+                                                                   2)
+    corners2d += boxes2d[:, None, 0:2]
+    corners2d = corners2d.view(*(input_shape[:-1]), 4, 2)
+    return corners2d
+
+
+def boxes_to_corners_3d(boxes3d, order):
+    """
+        4 -------- 5
+       /|         /|
+      7 -------- 6 .
+      | |        | |
+      . 0 -------- 1
+      |/         |/
+      3 -------- 2
+    Parameters
+    __________
+    boxes3d: np.ndarray or torch.Tensor
+        (N, 7) [x, y, z, l, w, h, heading], or [x, y, z, h, w, l, heading]
+
+               (x, y, z) is the box center.
+
+    order : str
+        'lwh' or 'hwl'
+
+    Returns:
+        corners3d: np.ndarray or torch.Tensor
+        (N, 8, 3), the 8 corners of the bounding box.
+
+
+    opv2v's left hand coord 
+    
+    ^ z
+    |
+    |
+    | . x
+    |/
+    +-------> y
+
+    """
+
+    boxes3d, is_numpy = common_utils.check_numpy_to_torch(boxes3d)
+    boxes3d_ = boxes3d
+
+    if order == 'hwl':
+        boxes3d_ = boxes3d[:, [0, 1, 2, 5, 4, 3, 6]]
+
+    template = boxes3d_.new_tensor((
+        [1, -1, -1], [1, 1, -1], [-1, 1, -1], [-1, -1, -1],
+        [1, -1, 1], [1, 1, 1], [-1, 1, 1], [-1, -1, 1],
+    )) / 2
+
+    corners3d = boxes3d_[:, None, 3:6].repeat(1, 8, 1) * template[None, :, :]
+    corners3d = common_utils.rotate_points_along_z(corners3d.view(-1, 8, 3),
+                                                   boxes3d_[:, 6]).view(-1, 8,
+                                                                        3)
+    corners3d += boxes3d_[:, None, 0:3]
+
+    return corners3d.numpy() if is_numpy else corners3d
+
+
+def box3d_to_2d(box3d):
+    """
+    Convert 3D bounding box to 2D.
+
+    Parameters
+    ----------
+    box3d : np.ndarray
+        (n, 8, 3)
+
+    Returns
+    -------
+    box2d : np.ndarray
+        (n, 4, 2), project 3d to 2d.
+    """
+    box2d = box3d[:, :4, :2]
+    return box2d
+
+
+def corner2d_to_standup_box(box2d):
+    """
+    Find the minmaxx, minmaxy for each 2d box. (N, 4, 2) -> (N, 4)
+    x1, y1, x2, y2
+
+    Parameters
+    ----------
+    box2d : np.ndarray
+        (n, 4, 2), four corners of the 2d bounding box.
+
+    Returns
+    -------
+    standup_box2d : np.ndarray
+        (n, 4)
+    """
+    N = box2d.shape[0]
+    standup_boxes2d = np.zeros((N, 4))
+
+    standup_boxes2d[:, 0] = np.min(box2d[:, :, 0], axis=1)
+    standup_boxes2d[:, 1] = np.min(box2d[:, :, 1], axis=1)
+    standup_boxes2d[:, 2] = np.max(box2d[:, :, 0], axis=1)
+    standup_boxes2d[:, 3] = np.max(box2d[:, :, 1], axis=1)
+
+    return standup_boxes2d
+
+
+def corner_to_standup_box_torch(box_corner):
+    """
+    Find the minmax x and y for each bounding box.
+
+    Parameters
+    ----------
+    box_corner : torch.Tensor
+        Shape: (N, 8, 3) or (N, 4)
+
+    Returns
+    -------
+    standup_box2d : torch.Tensor
+        (n, 4)
+    """
+    N = box_corner.shape[0]
+    standup_boxes2d = torch.zeros((N, 4))
+
+    standup_boxes2d = standup_boxes2d.to(box_corner.device)
+
+    standup_boxes2d[:, 0] = torch.min(box_corner[:, :, 0], dim=1).values
+    standup_boxes2d[:, 1] = torch.min(box_corner[:, :, 1], dim=1).values
+    standup_boxes2d[:, 2] = torch.max(box_corner[:, :, 0], dim=1).values
+    standup_boxes2d[:, 3] = torch.max(box_corner[:, :, 1], dim=1).values
+
+    return standup_boxes2d
+
+
+def project_box3d(box3d, transformation_matrix):
+    """
+    Project the 3d bounding box to another coordinate system based on the
+    transfomration matrix.
+
+    Parameters
+    ----------
+    box3d : torch.Tensor or np.ndarray
+        3D bounding box, (N, 8, 3)
+
+    transformation_matrix : torch.Tensor or np.ndarray
+        Transformation matrix, (4, 4)
+
+    Returns
+    -------
+    projected_box3d : torch.Tensor
+        The projected bounding box, (N, 8, 3)
+    """
+    assert transformation_matrix.shape == (4, 4)
+    box3d, is_numpy = \
+        common_utils.check_numpy_to_torch(box3d)
+    transformation_matrix, _ = \
+        common_utils.check_numpy_to_torch(transformation_matrix)
+
+    # (N, 3, 8)
+    box3d_corner = box3d.transpose(1, 2)
+    # (N, 1, 8)
+    torch_ones = torch.ones((box3d_corner.shape[0], 1, 8))
+    torch_ones = torch_ones.to(box3d_corner.device)
+    # (N, 4, 8)
+    box3d_corner = torch.cat((box3d_corner, torch_ones),
+                             dim=1)
+    # (N, 4, 8)
+    projected_box3d = torch.matmul(transformation_matrix,
+                                   box3d_corner)
+    # (N, 8, 3)
+    projected_box3d = projected_box3d[:, :3, :].transpose(1, 2)
+
+    return projected_box3d if not is_numpy else projected_box3d.numpy()
+
+
+def project_points_by_matrix_torch(points, transformation_matrix):
+    """
+    Project the points to another coordinate system based on the
+    transfomration matrix. 
+    
+    IT NOT USED. LATTER ONE WITH THE SAME NAME WILL BE USED.
+
+    Parameters
+    ----------
+    points : torch.Tensor
+        3D points, (N, 3)
+
+    transformation_matrix : torch.Tensor
+        Transformation matrix, (4, 4)
+
+    Returns
+    -------
+    projected_points : torch.Tensor
+        The projected points, (N, 3)
+    """
+    # convert to homogeneous  coordinates via padding 1 at the last dimension.
+    # (N, 4)
+    points_homogeneous = F.pad(points, (0, 1), mode="constant", value=1)
+    # (N, 4)
+    projected_points = torch.einsum("ik, jk->ij", points_homogeneous,
+                                    transformation_matrix)
+    return projected_points[:, :3]
+
+
+def get_mask_for_boxes_within_range_torch(boxes, gt_range):
+    """
+    Generate mask to remove the bounding boxes
+    outside the range.
+
+    Parameters
+    ----------
+    boxes : torch.Tensor
+        Groundtruth bbx, shape: N,8,3 or N,4,2
+
+    gt_range: list
+        [xmin, ymin, zmin, xmax, ymax, zmax]
+    Returns
+    -------
+    mask: torch.Tensor
+        The mask for bounding box -- True means the
+        bbx is within the range and False means the
+        bbx is outside the range.
+    """
+
+    # mask out the gt bounding box out fixed range (-140, -40, -3, 140, 40 1)
+    device = boxes.device
+    boundary_lower_range = \
+        torch.Tensor(gt_range[:2]).reshape(1, 1, -1).to(device)
+    boundary_higher_range = \
+        torch.Tensor(gt_range[3:5]).reshape(1, 1, -1).to(device)
+
+    mask = torch.all(
+        torch.all(boxes[:, :, :2] >= boundary_lower_range,
+                  dim=-1) & \
+        torch.all(boxes[:, :, :2] <= boundary_higher_range,
+                  dim=-1), dim=-1)
+
+    return mask
+
+
+def mask_boxes_outside_range_numpy(boxes, limit_range, order,
+                                   min_num_corners=8, return_mask=False):
+    """
+    Parameters
+    ----------
+    boxes: np.ndarray
+        (N, 7) [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center
+
+    limit_range: list
+        [minx, miny, minz, maxx, maxy, maxz]
+
+    min_num_corners: int
+        The required minimum number of corners to be considered as in range.
+
+    order : str
+        'lwh' or 'hwl'
+
+    return_mask : bool
+        Whether return the mask.
+
+    Returns
+    -------
+    boxes: np.ndarray
+        The filtered boxes.
+    """
+    assert boxes.shape[1] == 8 or boxes.shape[1] == 7
+
+    new_boxes = boxes.copy()
+    if boxes.shape[1] == 7:
+        new_boxes = boxes_to_corners_3d(new_boxes, order)
+
+    mask = ((new_boxes >= limit_range[0:3]) &
+            (new_boxes <= limit_range[3:6])).all(axis=2)
+    mask = mask.sum(axis=1) >= min_num_corners  # (N)
+
+    if return_mask:
+        return boxes[mask], mask
+    return boxes[mask]
+
+
+def create_bbx(extent):
+    """
+    Create bounding box with 8 corners under obstacle vehicle reference.
+
+    Parameters
+    ----------
+    extent : list
+        half length, width and height
+
+    Returns
+    -------
+    bbx : np.array
+        The bounding box with 8 corners, shape: (8, 3)
+    """
+
+    bbx = np.array([[extent[0], -extent[1], -extent[2]],
+                    [extent[0], extent[1], -extent[2]],
+                    [-extent[0], extent[1], -extent[2]],
+                    [-extent[0], -extent[1], -extent[2]],
+                    [extent[0], -extent[1], extent[2]],
+                    [extent[0], extent[1], extent[2]],
+                    [-extent[0], extent[1], extent[2]],
+                    [-extent[0], -extent[1], extent[2]]])
+
+    return bbx
+
+
+def project_world_objects(object_dict,
+                          output_dict,
+                          lidar_pose,
+                          lidar_range,
+                          order,
+                          enlarge_z=False):
+    """
+    Project the objects under world coordinates into another coordinate
+    based on the provided extrinsic.
+
+    Parameters
+    ----------
+    object_dict : dict
+        The dictionary contains all objects surrounding a certain cav.
+
+    output_dict : dict
+        key: object id, value: object bbx (xyzlwhyaw).
+
+    lidar_pose : list
+        (6, ), lidar pose under world coordinate, [x, y, z, roll, yaw, pitch].
+
+    lidar_range : list
+         [minx, miny, minz, maxx, maxy, maxz]
+
+    order : str
+        'lwh' or 'hwl'
+    """
+    for object_id, object_content in object_dict.items():
+        location = object_content['location']
+        rotation = object_content['angle']
+        center = [0,0,0] if 'center' not in object_content else object_content['center']
+        extent = object_content['extent']
+
+        object_pose = [location[0] + center[0],
+                       location[1] + center[1],
+                       location[2] + center[2],
+                       rotation[0], rotation[1], rotation[2]]
+
+
+        object2lidar = x1_to_x2(object_pose, lidar_pose)
+
+        # shape (3, 8)
+        bbx = create_bbx(extent).T
+        # bounding box under ego coordinate shape (4, 8)
+        bbx = np.r_[bbx, [np.ones(bbx.shape[1])]]
+
+        # project the 8 corners to world coordinate
+        bbx_lidar = np.dot(object2lidar, bbx).T
+        bbx_lidar = np.expand_dims(bbx_lidar[:, :3], 0)
+        bbx_lidar = corner_to_center(bbx_lidar, order=order)
+
+        if enlarge_z:
+            lidar_range_z_larger = copy.deepcopy(lidar_range)
+            lidar_range_z_larger[2] -= 10
+            lidar_range_z_larger[5] += 10
+            lidar_range = lidar_range_z_larger
+        
+        bbx_lidar = mask_boxes_outside_range_numpy(bbx_lidar,
+                                                   lidar_range,
+                                                   order)
+
+        if bbx_lidar.shape[0] > 0:
+            output_dict.update({object_id: bbx_lidar})
+
+
+def project_world_objects_v2x(object_dict,
+                          output_dict,
+                          reference_lidar_pose,
+                          lidar_range,
+                          order,
+                          lidar_np):
+    """
+    Project the objects under world coordinates into another coordinate
+    based on the provided extrinsic.
+
+    Parameters
+    ----------
+    object_dict : 
+        gt boxes: numpy.ndarray (N,10)
+            [x,y,z,dx,dy,dz,w,a,b,c], dxdydz=lwh
+        object_ids: numpy.ndarray (N,)
+
+    output_dict : dict
+        key: object id, value: object bbx (xyzlwhyaw).
+
+    reference_lidar_pose : list
+        (6, ), lidar pose under world coordinate, [x, y, z, roll, yaw, pitch].
+
+    lidar_range : list
+         [minx, miny, minz, maxx, maxy, maxz]
+
+    order : str
+        'lwh' or 'hwl'
+
+    lidar_np: np.ndarray
+        point cloud in ego coord. Used to determine if any lidar point hits the box
+
+
+    output_dict: [x,y,z, lwh or hwl, yaw] 
+    """
+    from icecream import ic
+    gt_boxes = object_dict['gt_boxes']
+    object_ids = object_dict['object_ids']
+    for i, object_content in enumerate(gt_boxes):
+        x,y,z,dx,dy,dz,w,a,b,c = object_content
+
+        q = Quaternion([w,a,b,c])
+        T_world_object = q.transformation_matrix
+        T_world_object[:3,3] = object_content[:3]
+
+        T_world_lidar = x_to_world(reference_lidar_pose)
+
+        object2lidar = np.linalg.solve(T_world_lidar, T_world_object) # T_lidar_object
+
+
+        # shape (3, 8). 
+        # or we can use the create_bbx funcion.
+        x_corners = dx / 2 * np.array([ 1,  1, -1, -1,  1,  1, -1, -1]) # (8,)
+        y_corners = dy / 2 * np.array([-1,  1,  1, -1, -1,  1,  1, -1])
+        z_corners = dz / 2 * np.array([-1, -1, -1, -1,  1,  1,  1,  1])
+
+        bbx = np.vstack((x_corners, y_corners, z_corners)) # (3, 8)
+
+        # bounding box under ego coordinate shape (4, 8)
+        bbx = np.r_[bbx, [np.ones(bbx.shape[1])]]
+
+        # project the 8 corners to world coordinate
+        bbx_lidar = np.dot(object2lidar, bbx).T # (8, 4)
+        bbx_lidar = np.expand_dims(bbx_lidar[:, :3], 0) # (1, 8, 3)
+        bbx_lidar = corner_to_center(bbx_lidar, order=order)
+        
+        lidar_range_z_larger = copy.deepcopy(lidar_range)
+        lidar_range_z_larger[2] -= 1
+        lidar_range_z_larger[5] += 1
+        
+        bbx_lidar = mask_boxes_outside_range_numpy(bbx_lidar,
+                                                   lidar_range_z_larger,
+                                                   order)
+
+        
+        if bbx_lidar.shape[0] > 0:
+            output_dict.update({object_ids[i]: bbx_lidar})
+
+
+def get_points_in_rotated_box(p, box_corner):
+    """
+    Get points within a rotated bounding box (2D version).
+
+    Parameters
+    ----------
+    p : numpy.array
+        Points to be tested with shape (N, 2).
+    box_corner : numpy.array
+        Corners of bounding box with shape (4, 2).
+
+    Returns
+    -------
+    p_in_box : numpy.array
+        Points within the box.
+
+    """
+    edge1 = box_corner[1, :] - box_corner[0, :]
+    edge2 = box_corner[3, :] - box_corner[0, :]
+    p_rel = p - box_corner[0, :].reshape(1, -1)
+
+    l1 = get_projection_length_for_vector_projection(p_rel, edge1)
+    l2 = get_projection_length_for_vector_projection(p_rel, edge2)
+    # A point is within the box, if and only after projecting the
+    # point onto the two edges s.t. p_rel = [edge1, edge2] @ [l1, l2]^T,
+    # we have 0<=l1<=1 and 0<=l2<=1.
+    mask = np.logical_and(l1 >= 0, l1 <= 1)
+    mask = np.logical_and(mask, l2 >= 0)
+    mask = np.logical_and(mask, l2 <= 1)
+    p_in_box = p[mask, :]
+    return p_in_box
+
+
+def get_points_in_rotated_box_3d(p, box_corner):
+    """
+    Get points within a rotated bounding box (3D version).
+
+    Parameters
+    ----------
+    p : numpy.array
+        Points to be tested with shape (N, 3).
+    box_corner : numpy.array
+        Corners of bounding box with shape (8, 3).
+
+    Returns
+    -------
+    p_in_box : numpy.array
+        Points within the box.
+
+    """
+    edge1 = box_corner[1, :] - box_corner[0, :]
+    edge2 = box_corner[3, :] - box_corner[0, :]
+    edge3 = box_corner[4, :] - box_corner[0, :]
+
+    p_rel = p - box_corner[0, :].reshape(1, -1)
+
+    l1 = get_projection_length_for_vector_projection(p_rel, edge1)
+    l2 = get_projection_length_for_vector_projection(p_rel, edge2)
+    l3 = get_projection_length_for_vector_projection(p_rel, edge3)
+    # A point is within the box, if and only after projecting the
+    # point onto the two edges s.t. p_rel = [edge1, edge2] @ [l1, l2]^T,
+    # we have 0<=l1<=1 and 0<=l2<=1.
+    mask1 = np.logical_and(l1 >= 0, l1 <= 1)
+    mask2 = np.logical_and(l2 >= 0, l2 <= 1)
+    mask3 = np.logical_and(l3 >= 0, l3 <= 1)
+
+    mask = np.logical_and(mask1, mask2)
+    mask = np.logical_and(mask, mask3)
+    p_in_box = p[mask, :]
+
+    return p_in_box
+
+
+def get_projection_length_for_vector_projection(a, b):
+    """
+    Get projection length for the Vector projection of a onto b s.t.
+    a_projected = length * b. (2D version) See
+    https://en.wikipedia.org/wiki/Vector_projection#Vector_projection_2
+    for more details.
+
+    Parameters
+    ----------
+    a : numpy.array
+        The vectors to be projected with shape (N, 2).
+
+    b : numpy.array
+        The vector that is projected onto with shape (2).
+
+    Returns
+    -------
+    length : numpy.array
+        The length of projected a with respect to b.
+    """
+    assert np.sum(b ** 2, axis=-1) > 1e-6
+    length = a.dot(b) / np.sum(b ** 2, axis=-1)
+    return length
+
+
+def nms_rotated(boxes, scores, threshold):
+    """Performs rorated non-maximum suppression and returns indices of kept
+    boxes.
+
+    Parameters
+    ----------
+    boxes : torch.tensor
+        The location preds with shape (N, 4, 2).
+
+    scores : torch.tensor
+        The predicted confidence score with shape (N,)
+
+    threshold: float
+        IoU threshold to use for filtering.
+
+    Returns
+    -------
+        An array of index
+    """
+    if boxes.shape[0] == 0:
+        return np.array([], dtype=np.int32)
+    boxes = boxes.cpu().detach().numpy()
+    scores = scores.cpu().detach().numpy()
+
+    polygons = common_utils.convert_format(boxes)
+
+    top = 1000
+    # Get indicies of boxes sorted by scores (highest first)
+    ixs = scores.argsort()[::-1][:top]
+
+    pick = []
+    while len(ixs) > 0:
+        # Pick top box and add its index to the list
+        i = ixs[0]
+        pick.append(i)
+        # Compute IoU of the picked box with the rest
+        iou = common_utils.compute_iou(polygons[i], polygons[ixs[1:]])
+        # Identify boxes with IoU over the threshold. This
+        # returns indices into ixs[1:], so add 1 to get
+        # indices into ixs.
+        remove_ixs = np.where(iou > threshold)[0] + 1
+        # Remove indices of the picked and overlapped boxes.
+        ixs = np.delete(ixs, remove_ixs)
+        ixs = np.delete(ixs, 0)
+
+    return np.array(pick, dtype=np.int32)
+
+
+def nms_pytorch(boxes: torch.tensor, thresh_iou: float):
+    """
+    Apply non-maximum suppression to avoid detecting too many
+    overlapping bounding boxes for a given object.
+
+    Parameters
+    ----------
+    boxes : torch.tensor
+        The location preds along with the class predscores,
+         Shape: [num_boxes,5].
+    thresh_iou : float
+        (float) The overlap thresh for suppressing unnecessary boxes.
+    Returns
+    -------
+        A list of index
+    """
+
+    # we extract coordinates for every
+    # prediction box present in P
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+
+    # we extract the confidence scores as well
+    scores = boxes[:, 4]
+
+    # calculate area of every block in P
+    areas = (x2 - x1) * (y2 - y1)
+
+    # sort the prediction boxes in P
+    # according to their confidence scores
+    order = scores.argsort()
+
+    # initialise an empty list for
+    # filtered prediction boxes
+    keep = []
+
+    while len(order) > 0:
+
+        # extract the index of the
+        # prediction with highest score
+        # we call this prediction S
+        idx = order[-1]
+
+        # push S in filtered predictions list
+        keep.append(idx.numpy().item()
+                    if not idx.is_cuda else idx.cpu().detach().numpy().item())
+
+        # remove S from P
+        order = order[:-1]
+
+        # sanity check
+        if len(order) == 0:
+            break
+
+        # select coordinates of BBoxes according to
+        # the indices in order
+        xx1 = torch.index_select(x1, dim=0, index=order)
+        xx2 = torch.index_select(x2, dim=0, index=order)
+        yy1 = torch.index_select(y1, dim=0, index=order)
+        yy2 = torch.index_select(y2, dim=0, index=order)
+
+        # find the coordinates of the intersection boxes
+        xx1 = torch.max(xx1, x1[idx])
+        yy1 = torch.max(yy1, y1[idx])
+        xx2 = torch.min(xx2, x2[idx])
+        yy2 = torch.min(yy2, y2[idx])
+
+        # find height and width of the intersection boxes
+        w = xx2 - xx1
+        h = yy2 - yy1
+
+        # take max with 0.0 to avoid negative w and h
+        # due to non-overlapping boxes
+        w = torch.clamp(w, min=0.0)
+        h = torch.clamp(h, min=0.0)
+
+        # find the intersection area
+        inter = w * h
+
+        # find the areas of BBoxes according the indices in order
+        rem_areas = torch.index_select(areas, dim=0, index=order)
+
+        # find the union of every prediction T in P
+        # with the prediction S
+        # Note that areas[idx] represents area of S
+        union = (rem_areas - inter) + areas[idx]
+
+        # find the IoU of every prediction in P with S
+        IoU = inter / union
+
+        # keep the boxes with IoU less than thresh_iou
+        mask = IoU < thresh_iou
+        order = order[mask]
+
+    return keep
+
+
+def remove_large_pred_bbx(bbx_3d):
+    """
+    Remove large bounding box.
+
+    Parameters
+    ----------
+    bbx_3d : torch.Tensor
+        Predcited 3d bounding box, shape:(N,8,3)
+
+    Returns
+    -------
+    index : torch.Tensor
+        The keep index.
+    """
+    bbx_x_max = torch.max(bbx_3d[:, :, 0], dim=1)[0]
+    bbx_x_min = torch.min(bbx_3d[:, :, 0], dim=1)[0]
+    x_len = bbx_x_max - bbx_x_min
+
+    bbx_y_max = torch.max(bbx_3d[:, :, 1], dim=1)[0]
+    bbx_y_min = torch.min(bbx_3d[:, :, 1], dim=1)[0]
+    y_len = bbx_y_max - bbx_y_min
+
+    bbx_z_max = torch.max(bbx_3d[:, :, 1], dim=1)[0]
+    bbx_z_min = torch.min(bbx_3d[:, :, 1], dim=1)[0]
+    z_len = bbx_z_max - bbx_z_min
+
+    index = torch.logical_and(x_len <= 6, y_len <= 6)
+    index = torch.logical_and(index, z_len)
+
+    return index
+
+
+def remove_bbx_abnormal_z(bbx_3d):
+    """
+    Remove bounding box that has negative z axis.
+
+    Parameters
+    ----------
+    bbx_3d : torch.Tensor
+        Predcited 3d bounding box, shape:(N,8,3)
+
+    Returns
+    -------
+    index : torch.Tensor
+        The keep index.
+    """
+    bbx_z_min = torch.min(bbx_3d[:, :, 2], dim=1)[0]
+    bbx_z_max = torch.max(bbx_3d[:, :, 2], dim=1)[0]
+    # NOTE gjliu: (-3, 5) -> (-100, 100)
+    index = torch.logical_and(bbx_z_min >= -100, bbx_z_max <= 100)
+
+    return index
+
+
+def project_points_by_matrix_torch(points, transformation_matrix):
+    """
+    Project the points to another coordinate system based on the
+    transformation matrix.
+
+    Parameters
+    ----------
+    points : torch.Tensor
+        3D points, (N, 3)
+    transformation_matrix : torch.Tensor
+        Transformation matrix, (4, 4)
+    Returns
+    -------
+    projected_points : torch.Tensor
+        The projected points, (N, 3)
+    """
+    points, is_numpy = \
+        common_utils.check_numpy_to_torch(points)
+    transformation_matrix, _ = \
+        common_utils.check_numpy_to_torch(transformation_matrix)
+
+    # convert to homogeneous coordinates via padding 1 at the last dimension.
+    # (N, 4)
+    points_homogeneous = F.pad(points, (0, 1), mode="constant", value=1)
+    # (N, 4)
+    projected_points = torch.einsum("ik, jk->ij", points_homogeneous,
+                                    transformation_matrix)
+
+    return projected_points[:, :3] if not is_numpy \
+        else projected_points[:, :3].numpy()
+
+
+def box_encode(
+        boxes,
+        anchors,
+        encode_angle_to_vector=False,
+        encode_angle_with_residual=False,
+        smooth_dim=False,
+        norm_velo=False
+):
+    """box encode for VoxelNet
+        Args:
+            boxes ([N, 7] Tensor): normal boxes: x, y, z, w, l, h, r.
+            anchors ([N, 7] Tensor): anchors.
+    """
+
+    box_ndim = anchors.shape[-1]
+
+    if box_ndim == 7:
+        xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1)
+        xg, yg, zg, wg, lg, hg, rg = torch.split(boxes, 1, dim=-1)
+    else:
+        xa, ya, za, wa, la, ha, vxa, vya, ra = torch.split(anchors, 1, dim=-1)
+        xg, yg, zg, wg, lg, hg, vxg, vyg, rg = torch.split(boxes, 1, dim=-1)
+
+    diagonal = torch.sqrt(la ** 2 + wa ** 2)
+    xt = (xg - xa) / diagonal
+    yt = (yg - ya) / diagonal
+    zt = (zg - za) / ha
+
+    if smooth_dim:
+        lt = lg / la - 1
+        wt = wg / wa - 1
+        ht = hg / ha - 1
+    else:
+        lt = torch.log(lg / la)
+        wt = torch.log(wg / wa)
+        ht = torch.log(hg / ha)
+
+    ret = [xt, yt, zt, wt, lt, ht]
+
+    if box_ndim > 7:
+        if norm_velo:
+            vxt = (vxg - vxa) / diagonal
+            vyt = (vyg - vya) / diagonal
+        else:
+            vxt = vxg - vxa
+            vyt = vyg - vya
+        ret.extend([vxt, vyt])
+
+    if encode_angle_to_vector:
+        rgx = torch.cos(rg)
+        rgy = torch.sin(rg)
+        if encode_angle_with_residual:
+            rax = torch.cos(ra)
+            ray = torch.sin(ra)
+            rtx = rgx - rax
+            rty = rgy - ray
+            ret.extend([rtx, rty])
+        else:
+            ret.extend([rgx, rgy])
+    else:
+        rt = rg - ra
+        ret.append(rt)
+
+    return torch.cat(ret, dim=-1)
+
+
+def box_decode(
+        box_encodings,
+        anchors,
+        encode_angle_to_vector=False,
+        encode_angle_with_residual=False,
+        bin_loss=False,
+        smooth_dim=False,
+        norm_velo=False,
+):
+    """box decode for VoxelNet in lidar
+    Args:
+        boxes ([N, 7] Tensor): normal boxes: x, y, z, w, l, h, r
+        anchors ([N, 7] Tensor): anchors
+    """
+    box_ndim = anchors.shape[-1]
+
+    if box_ndim == 9:  # False
+        xa, ya, za, wa, la, ha, vxa, vya, ra = torch.split(anchors, 1, dim=-1)
+        if encode_angle_to_vector:
+            xt, yt, zt, wt, lt, ht, vxt, vyt, rtx, rty = torch.split(box_encodings, 1, dim=-1)
+        else:
+            xt, yt, zt, wt, lt, ht, vxt, vyt, rt = torch.split(box_encodings, 1, dim=-1)
+
+    elif box_ndim == 7:
+        xa, ya, za, wa, la, ha, ra = torch.split(anchors, 1, dim=-1)
+        if encode_angle_to_vector:  # False
+            xt, yt, zt, wt, lt, ht, rtx, rty = torch.split(box_encodings, 1, dim=-1)
+        else:
+            xt, yt, zt, wt, lt, ht, rt = torch.split(box_encodings, 1, dim=-1)
+
+    diagonal = torch.sqrt(la ** 2 + wa ** 2)
+    xg = xt * diagonal + xa
+    yg = yt * diagonal + ya
+    zg = zt * ha + za
+
+    ret = [xg, yg, zg]
+
+    if smooth_dim:  # False
+        lg = (lt + 1) * la
+        wg = (wt + 1) * wa
+        hg = (ht + 1) * ha
+    else:
+        lg = torch.exp(lt) * la
+        wg = torch.exp(wt) * wa
+        hg = torch.exp(ht) * ha
+    ret.extend([wg, lg, hg])
+
+    if encode_angle_to_vector:  # False
+        if encode_angle_with_residual:
+            rax = torch.cos(ra)
+            ray = torch.sin(ra)
+            rgx = rtx + rax
+            rgy = rty + ray
+            rg = torch.atan2(rgy, rgx)
+        else:
+            rg = torch.atan2(rty, rtx)
+    else:
+        rg = rt + ra
+
+    if box_ndim > 7:  # False
+        if norm_velo:
+            vxg = vxt * diagonal + vxa
+            vyg = vyt * diagonal + vya
+        else:
+            vxg = vxt + vxa
+            vyg = vyt + vya
+        ret.extend([vxg, vyg])
+
+    ret.append(rg)
+
+    return torch.cat(ret, dim=-1)
+
+
+def project_world_objects_dairv2x(object_list,
+                          output_dict,
+                          lidar_pose,
+                          lidar_range,
+                          order):
+    """
+    Project the objects under world coordinates into another coordinate
+    based on the provided extrinsic.
+
+    Parameters
+    ----------
+    object_list : list
+        The list contains all objects surrounding a certain cav.
+
+    output_dict : dict
+        key: object id, value: object bbx (xyzlwhyaw).
+
+    lidar_pose : list
+        (6, ), lidar pose under world coordinate, [x, y, z, roll, yaw, pitch].
+
+    lidar_range : list
+         [minx, miny, minz, maxx, maxy, maxz]
+
+    order : str
+        'lwh' or 'hwl'
+    """
+    i = 0
+
+    for object_content in object_list: 
+        object_id = i
+        i = i + 1
+        lidar_to_world = x_to_world(lidar_pose) # T_world_lidar
+        world_to_lidar = np.linalg.inv(lidar_to_world)
+
+        corners_world = np.array(object_content['world_8_points']) # [8,3]
+        corners_world_homo = np.pad(corners_world, ((0,0), (0,1)), constant_values=1) # [8, 4]
+        corners_lidar = (world_to_lidar @ corners_world_homo.T).T 
+
+        lidar_range_z_larger = copy.deepcopy(lidar_range)
+        lidar_range_z_larger[2] -= 1
+        lidar_range_z_larger[5] += 1
+
+        bbx_lidar = corners_lidar
+        bbx_lidar = np.expand_dims(bbx_lidar[:, :3], 0) # [1, 8, 3]
+        bbx_lidar = corner_to_center(bbx_lidar, order=order)
+        bbx_lidar = mask_boxes_outside_range_numpy(bbx_lidar, lidar_range_z_larger, order)
+        if bbx_lidar.shape[0] > 0:
+            output_dict.update({object_id: bbx_lidar})
+
+
+def load_single_objects_dairv2x(object_list,
+                          output_dict,
+                          lidar_range,
+                          order):
+    """
+
+    Parameters
+    ----------
+    object_list : list
+        The list contains all objects surrounding a certain cav.
+
+    output_dict : dict
+        key: object id, value: object bbx (xyzlwhyaw).
+
+    lidar_range : list
+         [minx, miny, minz, maxx, maxy, maxz]
+
+    order : str
+        'lwh' or 'hwl'
+    """
+
+    i = 0
+    for object_content in object_list:        
+        object_id = i
+        if 'rotation' not in object_content:
+            print(object_content)
+        x = object_content['3d_location']['x']
+        y = object_content['3d_location']['y']
+        z = object_content['3d_location']['z']
+        l = object_content['3d_dimensions']['l']
+        h = object_content['3d_dimensions']['h']
+        w = object_content['3d_dimensions']['w']
+        rotation = object_content['rotation']
+
+        if isinstance(x, str): # in camera label, xyz are str
+            x = eval(x)
+            y = eval(y)
+            z = eval(z)
+
+        if l==0 or h ==0 or w==0:
+            continue
+        i = i + 1
+
+        lidar_range_z_larger = copy.deepcopy(lidar_range)
+        lidar_range_z_larger[2] -= 1
+        lidar_range_z_larger[5] += 1
+
+        bbx_lidar = [x,y,z,h,w,l,rotation] if order=="hwl" else [x,y,z,l,w,h,rotation] # suppose order is in ['hwl', 'lwh']
+        bbx_lidar = np.array(bbx_lidar).reshape(1,-1) # [1,7]
+
+        bbx_lidar = mask_boxes_outside_range_numpy(bbx_lidar, lidar_range_z_larger, order)
+        if bbx_lidar.shape[0] > 0:
+            if object_content['type'] == "Car" or \
+               object_content['type'] == "Van" or \
+               object_content['type'] == "Truck" or \
+               object_content['type'] == "Bus":
+                    output_dict.update({object_id: bbx_lidar})
+
+
+
+
+def load_single_objects_dairv2x_hetero(object_list,
+                          output_dict,
+                          lidar_range,
+                          trans_mat,
+                          order):
+    """
+
+    Parameters
+    ----------
+    object_list : list
+        The list contains all objects surrounding a certain cav.
+
+    output_dict : dict
+        key: object id, value: object bbx (xyzlwhyaw).
+
+    lidar_range : list
+         [minx, miny, minz, maxx, maxy, maxz]
+
+    order : str
+        'lwh' or 'hwl'
+    """
+
+    i = 0
+    for object_content in object_list:        
+        object_id = i
+        x = object_content['3d_location']['x']
+        y = object_content['3d_location']['y']
+        z = object_content['3d_location']['z']
+        l = object_content['3d_dimensions']['l']
+        h = object_content['3d_dimensions']['h']
+        w = object_content['3d_dimensions']['w']
+        rotation = object_content['rotation']
+
+        if isinstance(x, str): # in camera label, xyz are str
+            x = eval(x)
+            y = eval(y)
+            z = eval(z)
+
+        if l==0 or h ==0 or w==0:
+            continue
+        i = i + 1
+
+        lidar_range_z_larger = copy.deepcopy(lidar_range)
+        lidar_range_z_larger[2] -= 1
+        lidar_range_z_larger[5] += 1
+
+        bbx_lidar = [x,y,z,h,w,l,rotation] if order=="hwl" else [x,y,z,l,w,h,rotation] # suppose order is in ['hwl', 'lwh']
+        bbx_lidar = np.array(bbx_lidar).reshape(1,-1) # [1,7]
+        bbx_lidar_ego = corner_to_center(
+                            project_box3d(boxes_to_corners_3d(bbx_lidar, order), trans_mat) , order=order)
+        bbx_lidar_ego = mask_boxes_outside_range_numpy(bbx_lidar_ego, lidar_range_z_larger, order)
+
+        if bbx_lidar_ego.shape[0] > 0:
+            if object_content['type'] == "Car" or \
+               object_content['type'] == "Van" or \
+               object_content['type'] == "Truck" or \
+               object_content['type'] == "Bus":
+                    output_dict.update({object_id: bbx_lidar_ego})
+
+
+
+def box_is_visible(bbx_lidar, visibility_map):
+    """
+    fitler bbx_lidar by visibility map.
+
+    Parameters:
+
+    (0,0)------------px
+    |        ^ x      |
+    |        |        |
+    |        o---> y  |
+    |                 |
+    |                 |
+    py-----------------(256,256)
+    
+    bbx_lidar : np.ndarray 
+        (1, 7), x, y, z, dx, dy, dz, yaw. dx,dy,dz follows order.
+
+    visibility_map : np.ndarray
+        (256, 256). Non zero is visible.
+    """
+    
+    x, y = bbx_lidar[0,:2]
+    
+    # rasterize x and y
+    py = 127 - int(x/0.39)
+    px = 127 + int(y/0.39)
+
+    if py < 0 or py >= 256 or px < 0 or px >= 256:
+        return False
+
+    return visibility_map[py, px] > 0
+
+
+def project_world_visible_objects(object_dict,
+                          output_dict,
+                          lidar_pose,
+                          lidar_range,
+                          order,
+                          visibility_map,
+                          enlarge_z = False):
+    """
+    It's used by CameraDataset. Filtered by visibility map.
+
+    Project the objects under world coordinates into another coordinate
+    based on the provided extrinsic.
+
+    Parameters
+    ----------
+    object_dict : dict
+        The dictionary contains all objects surrounding a certain cav.
+
+    output_dict : dict
+        key: object id, value: object bbx (xyzlwhyaw).
+
+    lidar_pose : list
+        (6, ), lidar pose under world coordinate, [x, y, z, roll, yaw, pitch].
+
+    lidar_range : list
+         [minx, miny, minz, maxx, maxy, maxz]
+
+    order : str
+        'lwh' or 'hwl'
+
+    visibility_map : np.ndarray
+        for OPV2V, its 256*256 resolution. 0.39m per pixel. heading up.
+    """
+    for object_id, object_content in object_dict.items():
+        location = object_content['location']
+        rotation = object_content['angle']
+        center = [0,0,0] if 'center' not in object_content else object_content['center']
+        extent = object_content['extent']
+
+        object_pose = [location[0] + center[0],
+                       location[1] + center[1],
+                       location[2] + center[2],
+                       rotation[0], rotation[1], rotation[2]]
+
+
+        object2lidar = x1_to_x2(object_pose, lidar_pose)
+
+        # shape (3, 8)
+        bbx = create_bbx(extent).T
+        # bounding box under ego coordinate shape (4, 8)
+        bbx = np.r_[bbx, [np.ones(bbx.shape[1])]]
+
+        # project the 8 corners to world coordinate
+        bbx_lidar = np.dot(object2lidar, bbx).T
+        bbx_lidar = np.expand_dims(bbx_lidar[:, :3], 0)
+        bbx_lidar = corner_to_center(bbx_lidar, order=order)
+        if enlarge_z:
+            lidar_range_z_larger = copy.deepcopy(lidar_range)
+            lidar_range_z_larger[2] -= 10
+            lidar_range_z_larger[5] += 10
+            lidar_range = lidar_range_z_larger
+
+        bbx_lidar = mask_boxes_outside_range_numpy(bbx_lidar,
+                                                   lidar_range,
+                                                   order)
+
+        if bbx_lidar.shape[0] > 0 and box_is_visible(bbx_lidar, visibility_map):
+            output_dict.update({object_id: bbx_lidar})
+
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/camera_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/camera_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd7d365b88a6b73a7d9229f5945c77e216e4b6de
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/camera_utils.py
@@ -0,0 +1,334 @@
+from PIL import Image
+import numpy as np
+import torch
+import torchvision
+import cv2
+import math
+from shapely.geometry import Point, MultiPoint
+
+def load_camera_data(camera_files, preload=True):
+    """
+    Args:
+        camera_files: list, 
+            store camera path
+        shape : tuple
+            (width, height), resize the image, and overcoming the lazy loading.
+    Returns:
+        camera_data_list: list,
+            list of Image, RGB order
+    """
+    camera_data_list = []
+    for camera_file in camera_files:
+        camera_data = Image.open(camera_file)
+        if preload:
+            camera_data = camera_data.copy()
+        camera_data_list.append(camera_data)
+    return camera_data_list
+
+
+def sample_augmentation(data_aug_conf, is_train):
+    """
+    https://github.com/nv-tlabs/lift-splat-shoot/blob/d74598cb51101e2143097ab270726a561f81f8fd/src/data.py#L96
+    """
+    H, W = data_aug_conf['H'], data_aug_conf['W']
+    fH, fW = data_aug_conf['final_dim']
+    if is_train:
+        resize = np.random.uniform(*data_aug_conf['resize_lim'])
+        resize_dims = (int(W*resize), int(H*resize))
+        newW, newH = resize_dims
+        crop_h = int((1 - np.random.uniform(*data_aug_conf['bot_pct_lim']))*newH) - fH
+        crop_w = int(np.random.uniform(0, max(0, newW - fW)))
+        crop = (crop_w, crop_h, crop_w + fW, crop_h + fH) # [x_start, y_start, x_end, y_end]
+        flip = False
+        if data_aug_conf['rand_flip'] and np.random.choice([0, 1]):
+            flip = True
+        rotate = np.random.uniform(*data_aug_conf['rot_lim'])
+    else:
+        resize = max(fH/H, fW/W)
+        resize_dims = (int(W*resize), int(H*resize))
+        newW, newH = resize_dims
+        crop_h = int((1 - np.mean(data_aug_conf['bot_pct_lim']))*newH) - fH
+        crop_w = int(max(0, newW - fW) / 2)
+        crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
+        flip = False
+        rotate = 0
+    return resize, resize_dims, crop, flip, rotate
+
+
+def img_transform(imgs, post_rot, post_tran,
+                  resize, resize_dims, crop,
+                  flip, rotate):
+    imgs_output = []
+    for img in imgs:
+        # adjust image
+        img = img.resize(resize_dims)
+        img = img.crop(crop)
+        if flip:
+            img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
+        img = img.rotate(rotate)
+        imgs_output.append(img)
+
+
+    # post-homography transformation
+    post_rot *= resize
+    post_tran -= torch.Tensor(crop[:2])
+
+    if flip: 
+        A = torch.Tensor([[-1, 0], [0, 1]])
+        b = torch.Tensor([crop[2] - crop[0], 0])
+        post_rot = A.matmul(post_rot)
+        post_tran = A.matmul(post_tran) + b
+
+    A = get_rot(rotate/180*np.pi)
+    b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2 # [x_start, y_start, x_end, y_end]
+    b = A.matmul(-b) + b
+    post_rot = A.matmul(post_rot)
+    post_tran = A.matmul(post_tran) + b
+
+    return imgs_output, post_rot, post_tran
+
+def get_rot(h):
+    return torch.Tensor([
+        [np.cos(h), np.sin(h)],
+        [-np.sin(h), np.cos(h)],
+    ])
+
+class NormalizeInverse(torchvision.transforms.Normalize):
+    #  https://discuss.pytorch.org/t/simple-way-to-inverse-transform-normalization/4821/8
+    def __init__(self, mean, std):
+        mean = torch.as_tensor(mean)
+        std = torch.as_tensor(std)
+        std_inv = 1 / (std + 1e-7)
+        mean_inv = -mean * std_inv
+        super().__init__(mean=mean_inv, std=std_inv)
+
+    def __call__(self, tensor):
+        return super().__call__(tensor.clone())
+
+
+denormalize_img = torchvision.transforms.Compose((
+            NormalizeInverse(mean=[0.485, 0.456, 0.406],
+                             std=[0.229, 0.224, 0.225]),
+            torchvision.transforms.ToPILImage(),
+        ))
+
+
+normalize_img = torchvision.transforms.Compose((
+                torchvision.transforms.ToTensor(),
+                torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                 std=[0.229, 0.224, 0.225]),
+))
+
+img_to_tensor = torchvision.transforms.ToTensor() # [0,255] -> [0,1]
+
+
+def gen_dx_bx(xbound, ybound, zbound):
+    dx = torch.Tensor([row[2] for row in [xbound, ybound, zbound]])
+    bx = torch.Tensor([row[0] + row[2]/2.0 for row in [xbound, ybound, zbound]])
+    nx = torch.LongTensor([(row[1] - row[0]) / row[2] for row in [xbound, ybound, zbound]])
+
+    return dx, bx, nx
+
+
+def bin_depths(depth_map, mode, depth_min, depth_max, num_bins, target=True):
+    """
+    Converts depth map into bin indices
+    Args:
+        depth_map [torch.Tensor(H, W)]: Depth Map
+        mode [string]: Discretiziation mode (See https://arxiv.org/pdf/2005.13423.pdf for more details)
+            UD: Uniform discretiziation
+            LID: Linear increasing discretiziation
+            SID: Spacing increasing discretiziation
+        depth_min [float]: Minimum depth value
+        depth_max [float]: Maximum depth value
+        num_bins [int]: Number of depth bins
+        target [bool]: Whether the depth bins indices will be used for a target tensor in loss comparison
+    Returns:
+        indices [torch.Tensor(H, W)]: Depth bin indices
+    """
+    if mode == "UD":
+        bin_size = (depth_max - depth_min) / num_bins
+        indices = ((depth_map - depth_min) / bin_size)
+    elif mode == "LID":
+        bin_size = 2 * (depth_max - depth_min) / (num_bins * (1 + num_bins))
+        indices = -0.5 + 0.5 * torch.sqrt(1 + 8 * (depth_map - depth_min) / bin_size)
+    elif mode == "SID":
+        indices = num_bins * (torch.log(1 + depth_map) - math.log(1 + depth_min)) / \
+            (math.log(1 + depth_max) - math.log(1 + depth_min))
+    else:
+        raise NotImplementedError
+
+    if target:
+        # Remove indicies outside of bounds
+        # mask = (indices < 0) | (indices > num_bins) | (~torch.isfinite(indices))
+        # indices[mask] = num_bins
+        indices[indices < 0] = 0
+        indices[indices >= num_bins] = num_bins - 1
+        indices[~torch.isfinite(indices)] = num_bins - 1
+
+        # Convert to integer
+        indices = indices.type(torch.int64)
+        return indices, None
+    else:
+        # mask indices outside of bounds
+        mask = (indices < 0) | (indices >= num_bins) | (~torch.isfinite(indices))
+        indices[indices < 0] = 0
+        indices[indices >= num_bins] = num_bins - 1
+        indices[~torch.isfinite(indices)] = num_bins - 1
+
+        # Convert to integer
+        indices = indices.type(torch.int64)
+        return indices, ~mask
+
+def depth_discretization(depth_min, depth_max, num_bins, mode):
+    if mode == "UD":
+        bin_size = (depth_max - depth_min) / num_bins
+        depth_discre = depth_min + bin_size * np.arange(num_bins)
+    elif mode == "LID":
+        bin_size = 2 * (depth_max - depth_min) / (num_bins * (1 + num_bins))
+        depth_discre = depth_min + bin_size * (np.arange(num_bins) * np.arange(1, 1+num_bins)) / 2
+    else:
+        raise NotImplementedError
+    return depth_discre
+
+def indices_to_depth(indices, depth_min, depth_max, num_bins, mode):
+    if mode == "UD":
+        bin_size = (depth_max - depth_min) / num_bins
+        depth = indices * bin_size + depth_min
+    elif mode == "LID":
+        bin_size = 2 * (depth_max - depth_min) / (num_bins * (1 + num_bins))
+        depth = depth_min + bin_size * (indices * (indices+1)) / 2
+    else:
+        raise NotImplementedError
+    return depth
+
+def cumsum_trick(x, geom_feats, ranks):
+    x = x.cumsum(0)
+    kept = torch.ones(x.shape[0], device=x.device, dtype=torch.bool)
+    kept[:-1] = (ranks[1:] != ranks[:-1])
+
+    x, geom_feats = x[kept], geom_feats[kept]
+    x = torch.cat((x[:1], x[1:] - x[:-1]))
+
+    return x, geom_feats
+
+
+class QuickCumsum(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x, geom_feats, ranks):
+        x = x.cumsum(0)
+        kept = torch.ones(x.shape[0], device=x.device, dtype=torch.bool)
+        kept[:-1] = (ranks[1:] != ranks[:-1])
+
+        x, geom_feats = x[kept], geom_feats[kept]
+        x = torch.cat((x[:1], x[1:] - x[:-1]))
+
+        # save kept for backward
+        ctx.save_for_backward(kept)
+
+        # no gradient for geom_feats
+        ctx.mark_non_differentiable(geom_feats)
+
+        return x, geom_feats
+
+    @staticmethod
+    def backward(ctx, gradx, gradgeom):
+        kept, = ctx.saved_tensors
+        back = torch.cumsum(kept, 0)
+        back[kept] -= 1
+
+        val = gradx[back]
+
+        return val, None, None
+
+def coord_3d_to_2d(gt_box3d, int_matrix, ext_matrix, image_H=600, image_W=800, image=None, idx=None):
+    """
+    Projects XYZ points onto the canvas and returns the projected canvas
+    coordinates.
+
+    Args:
+        gt_box3d : np.ndarray
+            shape (N, 8, 3). point coord in world (LiDAR) coordinate. 
+        int_matrix : np.ndarray
+            shape (4, 4)
+        ext_matrix : np.ndarray
+            shape (4, 4), T_wc, transform point in camera coord to world coord.
+
+    Returns:
+        gt_box2d : np.ndarray
+            shape (N, 8, 2). pixel coord (u, v) in the image. You may want to flip them for image data indexing. 
+        gt_box2d_mask : np.ndarray (bool)
+            shape (N,). If false, this box is out of image boundary
+        fg_mask : np.ndarray 
+            shape (image_H, image_W), 1 means foreground, 0 means background
+    """
+    N = gt_box3d.shape[0]
+    xyz = gt_box3d.reshape(-1, 3) # (N*8, 3)
+
+    xyz_hom = np.concatenate(
+        [xyz, np.ones((xyz.shape[0], 1), dtype=np.float32)], axis=1)
+
+    ext_matrix = np.linalg.inv(ext_matrix)[:3,:4]
+    img_pts = (int_matrix @ ext_matrix @ xyz_hom.T).T
+
+    depth = img_pts[:, 2]
+    uv = img_pts[:, :2] / depth[:, None]
+    uv_int = uv.round().astype(np.int32) # [N*8, 2]
+
+
+    # o--------> u
+    # |
+    # |
+    # |
+    # v v
+
+
+    valid_mask1 = ((uv_int[:, 0] >= 0) & (uv_int[:, 0] < image_W) & 
+                    (uv_int[:, 1] >= 0) & (uv_int[:, 1] < image_H)).reshape(N, 8)
+    
+    valid_mask2 = ((depth > 0.5) & (depth < 100)).reshape(N, 8)
+    gt_box2d_mask = valid_mask1.any(axis=1) & valid_mask2.all(axis=1) # [N, ]
+    
+    gt_box2d = uv_int.reshape(N, 8, 2) # [N, 8, 2]
+    gt_box2d_u = np.clip(gt_box2d[:,:,0], 0, image_W-1)
+    gt_box2d_v = np.clip(gt_box2d[:,:,1], 0, image_H-1)
+    gt_box2d = np.stack((gt_box2d_u, gt_box2d_v), axis=-1) # [N, 8, 2]
+
+    # create fg/bg mask
+    fg_mask = np.zeros((image_H, image_W))
+    for gt_box in gt_box2d[gt_box2d_mask]:
+        u_min = gt_box[:,0].min()
+        v_min = gt_box[:,1].min()
+        u_max = gt_box[:,0].max()
+        v_max = gt_box[:,1].max()
+        fg_mask[v_min:v_max, u_min:u_max] = 1
+        # poly = MultiPoint(gt_box).convex_hull
+        # cv2.fillConvexPoly(fg_mask, np.array(list(zip(*poly.exterior.coords.xy)), dtype=np.int32), 1)
+
+    DEBUG = False
+    if DEBUG:
+        from matplotlib import pyplot as plt
+        plt.imshow(image)
+        for i in range(N):
+            if gt_box2d_mask[i]:
+                coord2d = gt_box2d[i]
+                for start, end in [(0, 1), (1, 2), (2, 3), (3, 0),
+                               (0, 4), (1, 5), (2, 6), (3, 7),
+                               (4, 5), (5, 6), (6, 7), (7, 4)]:
+                    plt.plot(coord2d[[start,end]][:,0], coord2d[[start,end]][:,1], marker="o", c='g')
+        plt.savefig(f"/GPFS/rhome/yifanlu/OpenCOOD/vis_result/dairv2x_lss_vehonly/image_gt_box2d_{idx}.png", dpi=300)
+        plt.clf()
+        plt.imshow(fg_mask)
+        plt.savefig(f"/GPFS/rhome/yifanlu/OpenCOOD/vis_result/dairv2x_lss_vehonly/image_gt_box2d_{idx}_mask.png", dpi=300)
+        plt.clf()
+
+    
+    return gt_box2d, gt_box2d_mask, fg_mask
+
+
+def load_intrinsic_DAIR_V2X(int_dict):
+    # cam_D : [5, ], what'is this...
+    # cam_K : [9, ]
+    cam_D = int_dict['cam_D']
+    cam_K = int_dict['cam_K']
+    return np.array(cam_K).reshape(3,3)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/cleanup_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/cleanup_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9582e6042a071b5aae4a2f140f213d42957243e
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/cleanup_utils.py
@@ -0,0 +1,20 @@
+import glob
+import os
+import sys
+
+def clean_all_numeric_checkpoint(path):
+    """
+    remove all intermediate checkpoint except bestval
+
+    path: str,
+        a path to log directory
+    """
+    file_list = glob.glob(os.path.join(path, "net_epoch[0-9]*.pth"))
+    for file in file_list:
+        os.remove(file)
+
+
+if __name__ == "__main__":
+    path = sys.argv[1]
+    assert os.path.isdir(path)
+    clean_all_numeric_checkpoint(path)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/common_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/common_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..96fe1cfd4218b3d81af9e11879657f4094c24396
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/common_utils.py
@@ -0,0 +1,330 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, Hao Xiang <haxiang@g.ucla.edu>,
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+"""
+Common utilities
+"""
+
+import numpy as np
+import torch
+from shapely.geometry import Polygon
+import json
+import pickle
+from collections import OrderedDict
+
+def update_dict(d1,d2):
+    '''
+    credit: https://github.com/yutu-75/update_dict/blob/main/update_dict/update_dict.py
+
+    :param d1: Default nested dictionary,默认嵌套字典;
+    :param d2: Updated dictionary 需要更新的字典;
+    :return d1:
+    Return a dict merged from default and custom
+    # >>> recursive_update('a', 'b')
+    Traceback (most recent call last):
+        ...
+    TypeError: Params of update_dict should be dicts
+    # >>> update_dict({'a':{"b":{"c":{"d"}}},"e":{"e1":{"e5":'qwq'}},"e5": {},"ss":"1111"},
+    {"e5":'www',"ss":"ssss",'c':{},'ss1':'ss'})
+    {'a': {'b': {'c': {}}}, 'e': {'e1': {'e5': 'www'}}, 'e5': 'www', 'ss': 'ssss'
+    # >>> update_dict({'a':{"b":{"c":{"d":'c'}}},"e":{"e1":{"e5":'qwq'}},"e5": {},"ss":"1111"},{"d":'www'})
+    {'a': {'b': {'c': {'d': 'www'}}}, 'e': {'e1': {'e5': 'qwq'}}, 'e5': {}, 'ss': '1111'}
+    # >>> update_dict({'a': {'c': 1, 'd': {}}, 'b': 4}, {'a': 2})
+    {'a': 2, 'b': 4}
+    '''
+
+    if not isinstance(d1, dict) or not isinstance(d2, dict):
+        raise TypeError('Params of update_dict should be dicts')
+    for i in d1:
+        if d2.get(i, None) is not None:
+            d1[i] = d2[i]
+        if isinstance(d1[i], dict):
+            update_dict(d1[i],d2)
+    return d1
+
+
+def merge_features_to_dict(processed_feature_list, merge=None):
+    """
+    Merge the preprocessed features from different cavs to the same
+    dictionary.
+
+    Parameters
+    ----------
+    processed_feature_list : list
+        A list of dictionary containing all processed features from
+        different cavs.
+    merge : "stack" or "cat". used for images
+
+    Returns
+    -------
+    merged_feature_dict: dict
+        key: feature names, value: list of features.
+    """
+
+    if len(processed_feature_list) == 0:
+        return None
+
+    merged_feature_dict = OrderedDict()
+
+    for i in range(len(processed_feature_list)):
+        for feature_name, feature in processed_feature_list[i].items():
+            if feature_name not in merged_feature_dict:
+                merged_feature_dict[feature_name] = []
+            if isinstance(feature, list):
+                merged_feature_dict[feature_name] += feature
+            else:
+                merged_feature_dict[feature_name].append(feature) # merged_feature_dict['coords'] = [f1,f2,f3,f4]
+    
+    # stack them
+    # it usually happens when merging cavs images -> v.shape = [N, Ncam, C, H, W]
+    # cat them
+    # it usually happens when merging batches cav images -> v is a list [(N1+N2+...Nn, Ncam, C, H, W))]
+    if merge=='stack': 
+        for feature_name, features in merged_feature_dict.items():
+            merged_feature_dict[feature_name] = torch.stack(features, dim=0)
+    elif merge=='cat':
+        for feature_name, features in merged_feature_dict.items():
+            merged_feature_dict[feature_name] = torch.cat(features, dim=0)
+
+    return merged_feature_dict
+
+def load_pkl_files(pkl_path):
+    with open(pkl_path, 'rb') as f:
+        data = pickle.load(f)
+    return data
+
+def read_json(file_path):
+    with open(file_path, 'r') as f:
+        data = json.load(f)
+
+    return data
+
+def limit_period(val, offset=0.5, period=2*np.pi):
+    """
+    continous part: 
+    [0 - period * offset, period - period * offset)
+    """
+    # 首先，numpy格式数据转换为torch格式
+    val, is_numpy = check_numpy_to_torch(val)
+    # 将方位角限制在[-pi, pi]
+    ans = val - torch.floor(val / period + offset) * period
+    return ans.numpy() if is_numpy else ans
+
+
+def check_numpy_to_torch(x):
+    if isinstance(x, np.ndarray):
+        return torch.from_numpy(x).float(), True
+    return x, False
+
+def check_torch_to_numpy(x):
+    if isinstance(x, torch.tensor):
+        return x.cpu().numpy(), True
+    return x, False
+
+
+def check_contain_nan(x):
+    if isinstance(x, dict):
+        return any(check_contain_nan(v) for k, v in x.items())
+    if isinstance(x, list):
+        return any(check_contain_nan(itm) for itm in x)
+    if isinstance(x, int) or isinstance(x, float):
+        return False
+    if isinstance(x, np.ndarray):
+        return np.any(np.isnan(x))
+    return torch.any(x.isnan()).detach().cpu().item()
+
+
+def rotate_points_along_z(points, angle):
+    """
+    Args:
+        points: (B, N, 3 + C)
+        angle: (B), radians, angle along z-axis, angle increases x ==> y
+    Returns:
+
+    """
+    points, is_numpy = check_numpy_to_torch(points)
+    angle, _ = check_numpy_to_torch(angle)
+
+    cosa = torch.cos(angle)
+    sina = torch.sin(angle)
+    zeros = angle.new_zeros(points.shape[0])
+    ones = angle.new_ones(points.shape[0])
+    rot_matrix = torch.stack((
+        cosa, sina, zeros,
+        -sina, cosa, zeros,
+        zeros, zeros, ones
+    ), dim=1).view(-1, 3, 3).float()
+    points_rot = torch.matmul(points[:, :, 0:3].float(), rot_matrix)
+    points_rot = torch.cat((points_rot, points[:, :, 3:]), dim=-1)
+    return points_rot.numpy() if is_numpy else points_rot
+
+
+def rotate_points_along_z_2d(points, angle):
+    """
+    Rorate the points along z-axis.
+    Parameters
+    ----------
+    points : torch.Tensor / np.ndarray
+        (N, 2).
+    angle : torch.Tensor / np.ndarray
+        (N,)
+
+    Returns
+    -------
+    points_rot : torch.Tensor / np.ndarray
+        Rorated points with shape (N, 2)
+
+    """
+    points, is_numpy = check_numpy_to_torch(points)
+    angle, _ = check_numpy_to_torch(angle)
+    cosa = torch.cos(angle)
+    sina = torch.sin(angle)
+    # (N, 2, 2)
+    rot_matrix = torch.stack((cosa, sina, -sina, cosa), dim=1).view(-1, 2,
+                                                                    2).float()
+    points_rot = torch.einsum("ik, ikj->ij", points.float(), rot_matrix)
+    return points_rot.numpy() if is_numpy else points_rot
+
+
+def remove_ego_from_objects(objects, ego_id):
+    """
+    Avoid adding ego vehicle to the object dictionary.
+
+    Parameters
+    ----------
+    objects : dict
+        The dictionary contained all objects.
+
+    ego_id : int
+        Ego id.
+    """
+    if ego_id in objects:
+        del objects[ego_id]
+
+
+def retrieve_ego_id(base_data_dict):
+    """
+    Retrieve the ego vehicle id from sample(origin format).
+
+    Parameters
+    ----------
+    base_data_dict : dict
+        Data sample in origin format.
+
+    Returns
+    -------
+    ego_id : str
+        The id of ego vehicle.
+    """
+    ego_id = None
+
+    for cav_id, cav_content in base_data_dict.items():
+        if cav_content['ego']:
+            ego_id = cav_id
+            break
+    return ego_id
+
+
+def compute_iou(box, boxes):
+    """
+    Compute iou between box and boxes list
+    Parameters
+    ----------
+    box : shapely.geometry.Polygon
+        Bounding box Polygon.
+
+    boxes : list
+        List of shapely.geometry.Polygon.
+
+    Returns
+    -------
+    iou : np.ndarray
+        Array of iou between box and boxes.
+
+    """
+    # Calculate intersection areas
+    if np.any(np.array([box.union(b).area for b in boxes])==0):
+        print('debug')
+    iou = [box.intersection(b).area / box.union(b).area for b in boxes]
+
+    return np.array(iou, dtype=np.float32)
+
+
+def convert_format(boxes_array):
+    """
+    Convert boxes array to shapely.geometry.Polygon format.
+    Parameters
+    ----------
+    boxes_array : np.ndarray
+        (N, 4, 2) or (N, 8, 3).
+
+    Returns
+    -------
+        list of converted shapely.geometry.Polygon object.
+
+    """
+    polygons = [Polygon([(box[i, 0], box[i, 1]) for i in range(4)]) for box in
+                boxes_array]
+    return np.array(polygons)
+
+
+def torch_tensor_to_numpy(torch_tensor):
+    """
+    Convert a torch tensor to numpy.
+
+    Parameters
+    ----------
+    torch_tensor : torch.Tensor
+
+    Returns
+    -------
+    A numpy array.
+    """
+    return torch_tensor.numpy() if not torch_tensor.is_cuda else \
+        torch_tensor.cpu().detach().numpy()
+
+
+def get_voxel_centers(voxel_coords,
+                      downsample_times,
+                      voxel_size,
+                      point_cloud_range):
+    """
+    Args:
+        voxel_coords: (N, 3)
+        downsample_times:
+        voxel_size:
+        point_cloud_range:
+
+    Returns:
+
+    """
+    assert voxel_coords.shape[1] == 3
+    voxel_centers = voxel_coords[:, [2, 1, 0]].float()  # (xyz)
+    voxel_size = torch.tensor(voxel_size, device=voxel_centers.device).float() * downsample_times
+    pc_range = torch.tensor(point_cloud_range[0:3], device=voxel_centers.device).float()
+    voxel_centers = (voxel_centers + 0.5) * voxel_size + pc_range
+    return voxel_centers
+
+def scatter_point_inds(indices, point_inds, shape):
+    ret = -1 * torch.ones(*shape, dtype=point_inds.dtype, device=point_inds.device) # 初始化结果 (8, 21, 800, 704)
+    ndim = indices.shape[-1] # 获取坐标维度 4
+    flattened_indices = indices.view(-1, ndim) # 将坐标展平 (204916, 4)
+    # 以下两步是经典操作
+    slices = [flattened_indices[:, i] for i in range(ndim)] # 分成4个list
+    ret[slices] = point_inds # 将voxel的索引写入对应位置
+    return ret
+
+def generate_voxel2pinds(sparse_tensor):
+    """
+    计算有效voxel在原始空间shape中的索引
+    """
+    device = sparse_tensor.indices.device # 获取device
+    batch_size = sparse_tensor.batch_size # 获取batch_size
+    spatial_shape = sparse_tensor.spatial_shape # 获取空间形状 (21, 800, 704)
+    indices = sparse_tensor.indices.long() # 获取索引
+    point_indices = torch.arange(indices.shape[0], device=device, dtype=torch.int32) # 生成索引 (204916,)
+    output_shape = [batch_size] + list(spatial_shape) # 计算输出形状 (8, 21, 800, 704)
+    v2pinds_tensor = scatter_point_inds(indices, point_indices, output_shape)
+    return v2pinds_tensor
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/draco_compression.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/draco_compression.py
new file mode 100644
index 0000000000000000000000000000000000000000..00a0fa8bce138148ca2e177c5280ad76c64916d4
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/draco_compression.py
@@ -0,0 +1,69 @@
+"""
+To use this script, draco [https://github.com/google/draco.git] should be installed.
+1. during test, keypoints coordinats and features should be saved as .ply file using
+   the funcion save_ply.
+2. Compress and anylize the CPM size using function draco_compression.
+"""
+import random, os, re
+import numpy as np
+import torch
+from glob import glob
+import subprocess
+
+draco = "/media/hdd/yuan/draco/build_dir/draco_encoder"
+
+
+def save_ply(path, batch_coords, batch_features):
+    # path = "/media/hdd/yuan/OpenCOOD/opencood/logs/fpvrcnn_intermediate_fusion/cpms/"
+    dirname = "{:06d}".format(random.randint(0, 999999))
+    os.mkdir(path + dirname)
+    for bi, (coords, features) in enumerate(zip(batch_coords[1:],
+                                                batch_features[1:])):
+        header = "ply\n" \
+                 "format ascii 1.0\n" \
+                 f"element vertex {len(coords)}\n" \
+                 "property float x\n" \
+                 "property float y\n" \
+                 "property float z\n"
+        header = header + "".join([f"property float feat{i}\n" for i in range(32)]) + "end_header"
+        data = torch.cat([coords, features], dim=1).detach().cpu().numpy()
+        np.savetxt(path + dirname + f"/{bi + 1}.ply", data,
+                   delimiter=' ', header=header, comments='')
+
+
+def draco_compression(ply_path):
+    files = glob(os.path.join(ply_path, '*/*.ply'))
+    cpm_sizes = list(map(draco_compression_one, files))
+    return cpm_sizes
+
+
+def draco_compression_one(file):
+    out_file = file.replace('ply', 'drc')
+    std_out = subprocess.getoutput(f"{draco} -point_cloud -i {file} -o {out_file}")
+    size_str = re.findall('[0-9]+ bytes', std_out)
+    if len(size_str)<1:
+        print("Compression failed:", file)
+        cpm_size = 0
+    else:
+        cpm_size = int(size_str[0].split(' ')[0])
+
+    return cpm_size
+
+
+def cal_avg_num_kpts(ply_path):
+    files = glob(os.path.join(ply_path, '*/*.ply'))
+
+    def read_vertex_num(file):
+        with open(file, 'r') as f:
+            size_str = re.findall('element vertex [0-9]+', f.read())[0]
+        return float(size_str.split(' ')[-1]) * 4 * 32 / 1024
+
+    sizes = list(map(read_vertex_num, files))
+
+    return sizes
+
+
+if __name__=="__main__":
+    cpm_sizes = cal_avg_num_kpts("/media/hdd/yuan/OpenCOOD/opencood/logs/fpvrcnn_intermediate_fusion/cpms")
+    # cpm_sizes = draco_compression("/media/hdd/yuan/OpenCOOD/opencood/logs/fpvrcnn_intermediate_fusion/cpms")
+    print(np.array(cpm_sizes).mean())
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/eval_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/eval_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..5361dae5532af81d84f4595ebb6cb501048630bb
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/eval_utils.py
@@ -0,0 +1,253 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+import os
+
+import numpy as np
+import torch
+
+from opencood.utils import common_utils
+from opencood.hypes_yaml import yaml_utils
+
+
+def voc_ap(rec, prec):
+    """
+    VOC 2010 Average Precision.
+    """
+    rec.insert(0, 0.0)
+    rec.append(1.0)
+    mrec = rec[:]
+
+    prec.insert(0, 0.0)
+    prec.append(0.0)
+    mpre = prec[:]
+
+    for i in range(len(mpre) - 2, -1, -1):
+        mpre[i] = max(mpre[i], mpre[i + 1])
+
+    i_list = []
+    for i in range(1, len(mrec)):
+        if mrec[i] != mrec[i - 1]:
+            i_list.append(i)
+
+    ap = 0.0
+    for i in i_list:
+        ap += ((mrec[i] - mrec[i - 1]) * mpre[i])
+    return ap, mrec, mpre
+
+
+def caluclate_tp_fp(det_boxes, det_score, gt_boxes, result_stat, iou_thresh):
+    """
+    Calculate the true positive and false positive numbers of the current
+    frames.
+    Parameters
+    ----------
+    det_boxes : torch.Tensor
+        The detection bounding box, shape (N, 8, 3) or (N, 4, 2).
+    det_score :torch.Tensor
+        The confidence score for each preditect bounding box.
+    gt_boxes : torch.Tensor
+        The groundtruth bounding box.
+    result_stat: dict
+        A dictionary contains fp, tp and gt number.
+    iou_thresh : float
+        The iou thresh.
+    """
+    # fp, tp and gt in the current frame
+    fp = []
+    tp = []
+    gt = gt_boxes.shape[0]
+    if det_boxes is not None:
+        # convert bounding boxes to numpy array
+        det_boxes = common_utils.torch_tensor_to_numpy(det_boxes)
+        det_score = common_utils.torch_tensor_to_numpy(det_score)
+        gt_boxes = common_utils.torch_tensor_to_numpy(gt_boxes)
+
+        # sort the prediction bounding box by score
+        score_order_descend = np.argsort(-det_score)
+        det_score = det_score[score_order_descend] # from high to low
+        det_polygon_list = list(common_utils.convert_format(det_boxes))
+        gt_polygon_list = list(common_utils.convert_format(gt_boxes))
+
+        # match prediction and gt bounding box, in confidence descending order
+        for i in range(score_order_descend.shape[0]):
+            det_polygon = det_polygon_list[score_order_descend[i]]
+            ious = common_utils.compute_iou(det_polygon, gt_polygon_list)
+
+            if len(gt_polygon_list) == 0 or np.max(ious) < iou_thresh:
+                fp.append(1)
+                tp.append(0)
+                continue
+
+            fp.append(0)
+            tp.append(1)
+
+            gt_index = np.argmax(ious)
+            gt_polygon_list.pop(gt_index)
+        result_stat[iou_thresh]['score'] += det_score.tolist()
+    result_stat[iou_thresh]['fp'] += fp
+    result_stat[iou_thresh]['tp'] += tp
+    result_stat[iou_thresh]['gt'] += gt
+
+def caluclate_tp_fp_multiclass(det_boxes_all, det_score_all, gt_boxes_all, result_stat_all, iou_thresh):
+    """
+    Calculate the true positive and false positive numbers of the current
+    frames.
+    Parameters
+    ----------
+    det_boxes : torch.Tensor
+        The detection bounding box, shape (N, 8, 3) or (N, 4, 2).
+    det_score :torch.Tensor
+        The confidence score for each preditect bounding box.
+    gt_boxes : torch.Tensor
+        The groundtruth bounding box.
+    result_stat: dict
+        A dictionary contains fp, tp and gt number.
+    iou_thresh : float
+        The iou thresh.
+    """
+
+    class_list = [0,1,3]
+    for c in range(3):
+        det_boxes = det_boxes_all[c]
+        det_score = det_score_all[c]
+        gt_boxes = gt_boxes_all[c]
+        result_stat = result_stat_all[class_list[c]]
+
+        if gt_boxes is None:
+            continue
+
+        # fp, tp and gt in the current frame
+        fp = []
+        tp = []
+        gt = gt_boxes.shape[0]
+        if det_boxes is not None:
+            # convert bounding boxes to numpy array
+            det_boxes = common_utils.torch_tensor_to_numpy(det_boxes)
+            det_score = common_utils.torch_tensor_to_numpy(det_score)
+            gt_boxes = common_utils.torch_tensor_to_numpy(gt_boxes)
+
+            # sort the prediction bounding box by score
+            score_order_descend = np.argsort(-det_score)
+            det_score = det_score[score_order_descend] # from high to low
+            det_polygon_list = list(common_utils.convert_format(det_boxes))
+            gt_polygon_list = list(common_utils.convert_format(gt_boxes))
+
+            # match prediction and gt bounding box, in confidence descending order
+            for i in range(score_order_descend.shape[0]):
+                det_polygon = det_polygon_list[score_order_descend[i]]
+                ious = common_utils.compute_iou(det_polygon, gt_polygon_list)
+
+                if len(gt_polygon_list) == 0 or np.max(ious) < iou_thresh:
+                    fp.append(1)
+                    tp.append(0)
+                    continue
+
+                fp.append(0)
+                tp.append(1)
+
+                gt_index = np.argmax(ious)
+                gt_polygon_list.pop(gt_index)
+            result_stat[iou_thresh]['score'] += det_score.tolist()
+        result_stat[iou_thresh]['fp'] += fp
+        result_stat[iou_thresh]['tp'] += tp
+        result_stat[iou_thresh]['gt'] += gt
+
+def calculate_ap(result_stat, iou):
+    """
+    Calculate the average precision and recall, and save them into a txt.
+    Parameters
+    ----------
+    result_stat : dict
+        A dictionary contains fp, tp and gt number.
+    iou : float
+    """
+    iou_5 = result_stat[iou]
+
+    fp = np.array(iou_5['fp'])
+    tp = np.array(iou_5['tp'])
+    score = np.array(iou_5['score'])
+    assert len(fp) == len(tp) and len(tp) == len(score)
+
+    sorted_index = np.argsort(-score)
+    fp = fp[sorted_index].tolist()
+    tp = tp[sorted_index].tolist()
+
+    gt_total = iou_5['gt']
+
+    cumsum = 0
+    for idx, val in enumerate(fp):
+        fp[idx] += cumsum
+        cumsum += val
+
+    cumsum = 0
+    for idx, val in enumerate(tp):
+        tp[idx] += cumsum
+        cumsum += val
+
+    rec = tp[:]
+    for idx, val in enumerate(tp):
+        rec[idx] = float(tp[idx]) / gt_total
+
+    prec = tp[:]
+    for idx, val in enumerate(tp):
+        prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx])
+
+    ap, mrec, mprec = voc_ap(rec[:], prec[:])
+
+    return ap, mrec, mprec
+
+
+def eval_final_results_multiclass(result_stat_dict, save_path, infer_info=None):
+    all_class_results = {}
+    for tpe in result_stat_dict.keys():
+        result_stat = result_stat_dict[tpe]
+        dump_dict = {}
+        ap_30, mrec_30, mpre_30 = calculate_ap(result_stat, 0.30)
+        ap_50, mrec_50, mpre_50 = calculate_ap(result_stat, 0.50)
+        ap_70, mrec_70, mpre_70 = calculate_ap(result_stat, 0.70)
+        dump_dict.update({'ap30': ap_30,
+                        'ap50': ap_50,
+                        'ap70': ap_70,
+                        'mpre_50': mpre_50,
+                        'mrec_50': mrec_50,
+                        'mpre_70': mpre_70,
+                        'mrec_70': mrec_70,
+                        })
+        print('class_{}:\n'.format(tpe),'The Average Precision at IOU 0.3 is %.2f, '
+            'The Average Precision at IOU 0.5 is %.2f, '
+            'The Average Precision at IOU 0.7 is %.2f' % (ap_30, ap_50, ap_70), '\n')
+        all_class_results[tpe] = dump_dict
+    if infer_info is None:
+        yaml_utils.save_yaml(all_class_results, os.path.join(save_path, 'eval.yaml'))
+    else:
+        yaml_utils.save_yaml(all_class_results, os.path.join(save_path, f'eval_{infer_info}.yaml'))
+    return all_class_results, ap_30, ap_50, ap_70
+
+def eval_final_results(result_stat, save_path, infer_info=None):
+    dump_dict = {}
+
+    ap_30, mrec_30, mpre_30 = calculate_ap(result_stat, 0.30)
+    ap_50, mrec_50, mpre_50 = calculate_ap(result_stat, 0.50)
+    ap_70, mrec_70, mpre_70 = calculate_ap(result_stat, 0.70)
+
+    dump_dict.update({'ap30': ap_30,
+                      'ap_50': ap_50,
+                      'ap_70': ap_70,
+                      'mpre_50': mpre_50,
+                      'mrec_50': mrec_50,
+                      'mpre_70': mpre_70,
+                      'mrec_70': mrec_70,
+                      })
+    if infer_info is None:
+        yaml_utils.save_yaml(dump_dict, os.path.join(save_path, 'eval.yaml'))
+    else:
+        yaml_utils.save_yaml(dump_dict, os.path.join(save_path, f'eval_{infer_info}.yaml'))
+
+    print('The Average Precision at IOU 0.3 is %.2f, '
+          'The Average Precision at IOU 0.5 is %.2f, '
+          'The Average Precision at IOU 0.7 is %.2f' % (ap_30, ap_50, ap_70))
+
+    return ap_30, ap_50, ap_70
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/heter_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/heter_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..240c36db0c4be2615d4d8e964f92e208b167e86b
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/heter_utils.py
@@ -0,0 +1,155 @@
+"""
+Agent Selection Module for Heterogeneous Collaboration.
+
+Maybe later can use data augment, one sample with different selection setting. 
+"""
+import numpy as np
+import random
+import os
+from collections import OrderedDict
+import json
+
+class Adaptor:
+    def __init__(self, 
+                ego_modality, 
+                model_modality_list, 
+                modality_assignment,
+                lidar_channels_dict,
+                mapping_dict,
+                cav_preference,
+                train):
+        self.ego_modality = ego_modality
+        self.model_modality_list = model_modality_list
+        self.modality_assignment = modality_assignment
+        self.lidar_channels_dict = lidar_channels_dict
+        self.mapping_dict = mapping_dict
+        if cav_preference is None:
+            cav_preference = dict.fromkeys(model_modality_list, 1/len(model_modality_list))
+        self.cav_preferece = cav_preference # training, probability for setting non-ego cav modality
+        self.train = train
+
+
+    def reorder_cav_list(self, cav_list, scenario_name):
+        """
+        When evaluation, make the cav that could be ego modality after mapping be the first.
+
+        This can check the training effect of aligner.
+
+        work in basedataset -> reinitialize
+        """
+        if self.train:
+            # shuffle the cav list
+            random.shuffle(cav_list)
+            return cav_list
+
+        assignment = self.modality_assignment[scenario_name]
+        if assignment[cav_list[0]] not in self.ego_modality:
+            ego_cav = None
+            for cav_id, modality in assignment.items():
+                if self.mapping_dict[modality] in self.ego_modality: # after mapping the modality is ego
+                    ego_cav = cav_id
+                    break
+
+            if ego_cav is None:
+                return cav_list
+
+            other_cav = sorted(list(assignment.keys()))
+            other_cav.remove(ego_cav)
+            cav_list = [ego_cav] + other_cav
+
+        return cav_list
+    
+    def reassign_cav_modality(self, modality_name, idx_in_cav_list):
+        """
+        work in basedataset -> reinitialize
+        """
+        if self.train: 
+            # always assign the ego_modality to idx 0 in cav_list
+            if idx_in_cav_list == 0:
+                return np.random.choice(self.ego_modality.split("&"))
+            return random.choices(list(self.cav_preferece.keys()), weights=self.cav_preferece.values())[0]
+        else:
+            return self.mapping_dict[modality_name]
+
+    def unmatched_modality(self, cav_modality):
+        """
+        work in 
+            intermediate_heter_fusion_dataset -> __getitem__
+            late_heter_fusion_dataset -> get_item_test
+
+        Returns:
+            True/False. If the input modality is in the model_modality_list
+        """
+        return cav_modality not in self.model_modality_list
+
+    def switch_lidar_channels(self, cav_modality, lidar_file_path):
+        """
+        Currently only support OPV2V
+        """
+        if self.lidar_channels_dict.get(cav_modality, None) == 32:
+            return lidar_file_path.replace("OPV2V","OPV2V_Hetero").replace(".pcd", "_32.pcd")
+        if self.lidar_channels_dict.get(cav_modality, None) == 16:
+            return lidar_file_path.replace("OPV2V","OPV2V_Hetero").replace(".pcd", "_16.pcd")
+        return lidar_file_path
+
+
+def assign_modality(root_dir="dataset/OPV2V", output_path="opencood/logs/heter_modality_assign/opv2v.json"):
+    np.random.seed(303)
+    splits = ['train', 'test', 'validate']
+    scenario_cav_modality_dict = OrderedDict()
+
+    for split in splits:
+        split_path = os.path.join(root_dir, split)
+        scenario_folders = sorted([os.path.join(split_path, x)
+                                    for x in os.listdir(split_path) if
+                                    os.path.isdir(os.path.join(split_path, x))])
+
+        for scenario_folder in scenario_folders:
+            scenario_name = scenario_folder.split('/')[-1]
+            scenario_cav_modality_dict[scenario_name] = OrderedDict()
+
+            cav_list = sorted([x for x in os.listdir(scenario_folder) \
+                                if os.path.isdir(os.path.join(scenario_folder, x))])
+
+            # randomly exclude one agent to be M3 
+            M3_agent_idx = np.random.randint(len(cav_list))
+
+            for j, cav_id in enumerate(cav_list):
+
+                if j == M3_agent_idx:
+                    scenario_cav_modality_dict[scenario_name][cav_id] = "m3" # M3 modality
+                else:
+                    scenario_cav_modality_dict[scenario_name][cav_id] = 'm'+str(np.random.randint(1,3)) # can be M1 or M2 mdoality
+    
+    with open(output_path, "w") as f:
+        json.dump(scenario_cav_modality_dict, f, indent=4, sort_keys=True)
+
+
+def assign_modality_4(root_dir="dataset/OPV2V", output_path="opencood/logs/heter_modality_assign/opv2v_4modality.json"):
+    np.random.seed(303)
+    splits = ['train', 'test', 'validate']
+    scenario_cav_modality_dict = OrderedDict()
+
+    for split in splits:
+        split_path = os.path.join(root_dir, split)
+        scenario_folders = sorted([os.path.join(split_path, x)
+                                    for x in os.listdir(split_path) if
+                                    os.path.isdir(os.path.join(split_path, x))])
+
+        for scenario_folder in scenario_folders:
+            scenario_name = scenario_folder.split('/')[-1]
+            scenario_cav_modality_dict[scenario_name] = OrderedDict()
+
+            cav_list = sorted([x for x in os.listdir(scenario_folder) \
+                                if os.path.isdir(os.path.join(scenario_folder, x))])
+
+            perm = np.random.permutation(4) + 1 
+            for j, cav_id in enumerate(cav_list):
+                scenario_cav_modality_dict[scenario_name][cav_id] = 'm'+str(perm[j%4]) # m1 or m2 or m3 or m4
+
+    
+    with open(output_path, "w") as f:
+        json.dump(scenario_cav_modality_dict, f, indent=4, sort_keys=True)
+
+if __name__ == "__main__":
+    assign_modality_4()
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/img2hdf5.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/img2hdf5.py
new file mode 100644
index 0000000000000000000000000000000000000000..c68311218e0a60cea0b438e102da74a5ccf12a1d
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/img2hdf5.py
@@ -0,0 +1,246 @@
+import os
+from multiprocessing import Process
+import numpy as np
+from tqdm import tqdm
+from PIL import Image
+import h5py
+import sys
+
+
+def load_camera_data(camera_files, preload=True):
+    """
+    Args:
+        camera_files: list, 
+            store camera path
+        shape : tuple
+            (width, height), resize the image, and overcoming the lazy loading.
+    Returns:
+        camera_data_list: list,
+            list of Image, RGB order
+    """
+    camera_data_list = []
+    for camera_file in camera_files:
+        camera_data = Image.open(camera_file)
+        if preload:
+            camera_data = camera_data.copy()
+        camera_data_list.append(camera_data)
+    return camera_data_list
+
+
+def load_camera_files(cav_path, timestamp, name):
+    """
+    Retrieve the paths to all camera files.
+
+    Parameters
+    ----------
+    cav_path : str
+        The full file path of current cav.
+
+    timestamp : str
+        Current timestamp
+
+    Returns
+    -------
+    camera_files : list
+        The list containing all camera png file paths.
+    """
+    camera0_file = os.path.join(cav_path,
+                                timestamp + f'_{name}0.png')
+    camera1_file = os.path.join(cav_path,
+                                timestamp + f'_{name}1.png')
+    camera2_file = os.path.join(cav_path,
+                                timestamp + f'_{name}2.png')
+    camera3_file = os.path.join(cav_path,
+                                timestamp + f'_{name}3.png')
+
+    return [camera0_file, camera1_file, camera2_file, camera3_file]
+
+
+def load_depth_files(cav_path, timestamp, name):
+    """
+    Retrieve the paths to all camera files.
+
+    Parameters
+    ----------
+    cav_path : str
+        The full file path of current cav.
+
+    timestamp : str
+        Current timestamp
+
+    Returns
+    -------
+    camera_files : list
+        The list containing all camera png file paths.
+    """
+    camera0_file = os.path.join(cav_path,
+                                timestamp + f'_{name}0.png').replace("OPV2V", "OPV2V_Hetero")
+    camera1_file = os.path.join(cav_path,
+                                timestamp + f'_{name}1.png').replace("OPV2V", "OPV2V_Hetero")
+    camera2_file = os.path.join(cav_path,
+                                timestamp + f'_{name}2.png').replace("OPV2V", "OPV2V_Hetero")
+    camera3_file = os.path.join(cav_path,
+                                timestamp + f'_{name}3.png').replace("OPV2V", "OPV2V_Hetero")
+
+    return [camera0_file, camera1_file, camera2_file, camera3_file]
+
+def parallel_transform(scenario_folders):
+    print("subprocess...")
+    for scenario_folder in scenario_folders:
+        cav_list = sorted(os.listdir(scenario_folder))
+
+        assert len(cav_list) > 0
+
+        # loop over all CAV data
+        for (j, cav_id) in enumerate(cav_list):
+            cav_path = os.path.join(scenario_folder, cav_id)
+            if not os.path.isdir(cav_path):
+                continue
+
+            yaml_files = \
+                sorted([os.path.join(cav_path, x)
+                        for x in os.listdir(cav_path) if
+                        x.endswith('.yaml')])
+            timestamps = []
+
+            # extract timestamp
+            for file in yaml_files:
+                res = file.split('/')[-1]
+                timestamp = res.replace('.yaml', '')
+                timestamps.append(timestamp)
+
+            for timestamp in timestamps:
+                if os.path.exists(os.path.join(cav_path, timestamp+"_imgs.hdf5")):
+                    continue
+                camera_files = load_camera_files(cav_path, timestamp, name="camera")
+                depth_files = load_depth_files(cav_path, timestamp, name="depth")
+
+                if not os.path.exists(depth_files[0]):
+                    # record the scene
+                    print(cav_path)
+                    continue
+                try:
+                    tmp_data = Image.open(depth_files[0])
+                    tmp_data = tmp_data.copy()
+                except:
+                    print(cav_path)
+                    continue
+
+                camera_data = load_camera_data(camera_files, True)
+                depth_data = load_camera_data(depth_files, True)
+                print(os.path.join(cav_path, timestamp+"_imgs.hdf5"))
+                with h5py.File(os.path.join(cav_path, timestamp+"_imgs.hdf5"), "w") as f:
+                    for i in range(4):
+                        f.create_dataset(f"camera{i}", data=camera_data[i])
+                    for i in range(4):
+                        f.create_dataset(f"depth{i}", data=depth_data[i])
+
+def parallel_check(scenario_folders):
+    print("subprocess...")
+    for scenario_folder in scenario_folders:
+        cav_list = sorted(os.listdir(scenario_folder))
+
+        assert len(cav_list) > 0
+
+        # loop over all CAV data
+        for (j, cav_id) in enumerate(cav_list):
+            cav_path = os.path.join(scenario_folder, cav_id)
+            if not os.path.isdir(cav_path):
+                continue
+
+            yaml_files = \
+                sorted([os.path.join(cav_path, x)
+                        for x in os.listdir(cav_path) if
+                        x.endswith('.yaml')])
+            timestamps = []
+
+            # extract timestamp
+            for file in yaml_files:
+                res = file.split('/')[-1]
+                timestamp = res.replace('.yaml', '')
+                timestamps.append(timestamp)
+
+            for timestamp in timestamps:
+                if os.path.exists(os.path.join(cav_path, timestamp+"_imgs.hdf5")):
+                    continue
+                camera_files = load_camera_files(cav_path, timestamp, name="camera")
+                depth_files = load_depth_files(cav_path, timestamp, name="depth")
+
+                if not os.path.exists(depth_files[0]):
+                    # record the scene
+                    print(depth_files[0])
+                    # break
+                try:
+                    tmp_data = Image.open(depth_files[0])
+                    tmp_data = tmp_data.copy()
+                except:
+                    print(cav_path)
+                    break
+
+
+
+def parallel_cleaup(scenario_folders):
+    print("subprocess...")
+    for scenario_folder in tqdm(scenario_folders):
+        cav_list = sorted(os.listdir(scenario_folder))
+
+        assert len(cav_list) > 0
+
+        # loop over all CAV data
+        for (j, cav_id) in enumerate(cav_list):
+            cav_path = os.path.join(scenario_folder, cav_id)
+            if not os.path.isdir(cav_path):
+                continue
+
+            yaml_files = \
+                sorted([os.path.join(cav_path, x)
+                        for x in os.listdir(cav_path) if
+                        x.endswith('.yaml')])
+            timestamps = []
+
+            # extract timestamp
+            for file in yaml_files:
+                res = file.split('/')[-1]
+                timestamp = res.replace('.yaml', '')
+                timestamps.append(timestamp)
+
+            for timestamp in timestamps:
+                if os.path.exists(os.path.join(cav_path, timestamp+"_imgs.hdf5")):
+                    print(os.path.join(cav_path, timestamp+"_imgs.hdf5"))
+                    os.remove(os.path.join(cav_path, timestamp+"_imgs.hdf5"))
+
+if __name__=="__main__":
+
+    MP_NUM = 8
+    MACHINE_NUM = 1
+    
+    if MACHINE_NUM != 1:
+        machine_idx = eval(sys.argv[1]) # 0,1,2,3
+
+    split_folders = [f"/GPFS/rhome/yifanlu/workspace/OpenCOODv2/dataset/OPV2V/{split}" for split in ['train', 'validate', 'test']]
+    scenario_folders = []
+    print(split_folders)
+
+    for root_dir in split_folders:
+        scenario_folders += sorted([os.path.join(root_dir, x)
+                                    for x in os.listdir(root_dir) if
+                                    os.path.isdir(os.path.join(root_dir, x))])
+
+    """
+    single machine 
+    """
+    if MACHINE_NUM == 1:
+        mp_split = np.array_split(scenario_folders, MP_NUM)
+        mp_split = [x.tolist() for x in mp_split]
+
+        for i in range(MP_NUM):
+            p = Process(target=parallel_check, args=(mp_split[i],))
+            p.start()
+
+    if MACHINE_NUM > 1:
+        mp_split = np.array_split(scenario_folders, MP_NUM * MACHINE_NUM)
+        mp_split = [x.tolist() for x in mp_split]
+
+        for i in range(machine_idx*MP_NUM, (machine_idx+1)*MP_NUM):
+            p = Process(target=parallel_transform, args=(mp_split[i],))
+            p.start()
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/keypoint_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/keypoint_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cba263a80779edce6e81a9d26583580bc4a6f0b
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/keypoint_utils.py
@@ -0,0 +1,374 @@
+import open3d as o3d
+import numpy as np
+import cv2
+from matplotlib import pyplot as plt
+from opencood.utils.subsampling_utils import get_random_subset
+from multiprocessing import Process
+
+vis = False
+
+def mask_points_by_range(points, limit_range, return_mask=False):
+    if len(limit_range) == 6:
+        mask =  (points[:, 0] > limit_range[0]) & \
+                (points[:, 0] < limit_range[3]) & \
+                (points[:, 1] > limit_range[1]) & \
+                (points[:, 1] < limit_range[4]) & \
+                (points[:, 2] > limit_range[2]) & \
+                (points[:, 2] < limit_range[5])
+    elif len(limit_range) == 4:
+        mask =  (points[:, 0] > limit_range[0]) & \
+                (points[:, 0] < limit_range[2]) & \
+                (points[:, 1] > limit_range[1]) & \
+                (points[:, 1] < limit_range[3]) 
+
+    points_mask = points[mask]
+    
+    if return_mask:
+        return points_mask, mask
+    else:
+        return points_mask
+
+def project_bev(pcd_np, lidar_range, voxel_size):
+    """ project pcd to bev
+    Args:
+        pcd_np: np.ndarray, (N, 3)
+
+        lidar_range: list
+            range for bev, [x_min, y_min, z_min, x_max, y_max, z_max]
+
+    Return
+        bev: np.array, (H, W), 
+            H = (y_max - y_min) / voxel_size
+            W = (x_max - x_min) / voxel_size
+
+        pcd_np_with_idx: np.ndarray, (N_, 4)
+            last index show it belongs to which grid
+    """
+    [x_min, y_min, z_min, x_max, y_max, z_max] = lidar_range
+
+    pcd_crop_np, mask = mask_points_by_range(pcd_np, lidar_range, return_mask=True)
+
+    pcd_np_with_idx = np.zeros((pcd_np.shape[0], 4))
+    pcd_np_with_idx[:,:3] = pcd_np
+
+    H = round((y_max - y_min) / voxel_size)
+    W = round((x_max - x_min) / voxel_size)
+    # print(f"BEV map with shape ({H}, {W}).")
+
+    bev = np.zeros((H, W), dtype=np.uint8)
+    for i, (x,y,z) in enumerate(pcd_np):
+        y_idx = int((y - y_min) / voxel_size)
+        x_idx = int((x - x_min) / voxel_size)
+        if mask[i]:
+            bev[y_idx, x_idx] = 255
+        pcd_np_with_idx[i][3] = y_idx * W + x_idx
+
+    if vis:
+        plt.imshow(bev)
+        plt.show()
+
+    return bev, pcd_np_with_idx
+
+def line_detection(bev_img):
+    """
+    Should we really need detect line?
+    Is edge enough to use?
+    """
+    edges = cv2.Canny(bev_img, 100, 200)
+    if vis:
+        plt.imshow(edges)
+        plt.show()
+
+    rho = 1  # distance resolution in pixels of the Hough grid
+    theta = np.pi / 180  # angular resolution in radians of the Hough grid
+    threshold = 25  # minimum number of votes (intersections in Hough grid cell)
+    min_line_length = 20  # minimum number of pixels making up a line
+    max_line_gap = 20  # maximum gap in pixels between connectable line segments
+
+    line_image = np.copy(bev_img) * 0  # creating a blank to draw lines on
+
+    # Run Hough on edge detected image
+    # Output "lines" is an array containing endpoints of detected line segments
+    lines = cv2.HoughLinesP(edges, rho, theta, threshold, np.array([]),
+                        min_line_length, max_line_gap)
+
+    for line in lines:
+        for x1,y1,x2,y2 in line:
+            cv2.line(line_image,(x1,y1),(x2,y2),(255),1)
+
+    if vis:
+        plt.imshow(line_image)
+        plt.show()
+
+    return line_image
+
+
+def get_point_in_voxels(pcd_np, rows, cols, lidar_range, voxel_size, pcd_with_idx):
+    """ use indice in image to filter point cloud, then sample within it.
+    Args:
+        pcd_np: [N, 3]
+        rows: [M,] non zero index -> row
+        cols: [M,] non zero index -> col
+        pcd_with_idx: [N, 4]
+    Returns:
+        points_select: [N_, 3]
+    """
+    [x_min, y_min, z_min, x_max, y_max, z_max] = lidar_range
+    H = round((y_max - y_min) / voxel_size)
+    W = round((x_max - x_min) / voxel_size)
+
+    M = rows.shape[0]
+    points_select = np.zeros((0,4))
+
+    for i in range(M):
+        # voxel_range = [x_min + voxel_size * cols[i],
+        #                 y_min + voxel_size * rows[i], 
+        #                 x_min + voxel_size * (cols[i] + 1),
+        #                 y_min + voxel_size * (rows[i] + 1)]
+        # points_in_voxel = mask_points_by_range(pcd_np, voxel_range)
+
+        # if not points_in_voxel.any():
+        #     continue    
+
+        points_in_voxel = pcd_with_idx[pcd_with_idx[:,3]==(rows[i]*W + cols[i])]
+        if not points_in_voxel.any():
+            continue
+        points_select = np.concatenate((points_select, points_in_voxel), axis=0)
+
+    points_select = points_select[:,:3]
+    
+    return points_select
+
+
+def get_keypoints(pcd_all_np, pcd_select_np, n_samples, mode = 'farthest'):
+    if pcd_select_np.shape[0] >= n_samples:
+        keypoints = get_random_subset(pcd_select_np, n_samples, mode)
+    else:
+        keypoints = get_random_subset(pcd_all_np, n_samples - pcd_select_np.shape[0], mode)
+        keypoints = np.concatenate((keypoints, pcd_select_np), axis=0)
+
+    return keypoints
+
+def bev_sample(pcd_np, lidar_range, n_samples, mode, voxel_size=0.2, all_samples=False):
+    """
+    Args:
+        pcd_np: 
+            [N, 3] or [N, 4]
+        lidar_range: 
+            list len = 4 or len = 6, please use this to remove ground
+        all_samples: 
+            if True, not use n_samples to subsampling
+    Returns:
+        keypoints: np.ndarray
+            [n_samples, 3]
+    """
+
+    pcd_np = pcd_np[:,:3]
+    print(1)
+    bev_img, pcd_with_idx = project_bev(pcd_np, lidar_range, voxel_size)
+    print(2)
+    lines = line_detection(bev_img)
+    rows, cols = np.nonzero(lines)
+    print(3)
+    points_select = get_point_in_voxels(pcd_np, rows, cols, lidar_range, voxel_size, pcd_with_idx)
+    print(4)
+
+    if all_samples:
+        keypoints = points_select
+    else:
+        keypoints = get_keypoints(pcd_np, points_select, n_samples, mode)
+
+    print(keypoints.shape)
+
+    return keypoints
+
+def seq_generate():
+    dirs = ["/GPFS/rhome/yifanlu/workspace/dataset/OPV2V/train",
+            "/GPFS/rhome/yifanlu/workspace/dataset/OPV2V/validate"
+            "/GPFS/rhome/yifanlu/workspace/dataset/OPV2V/test"]
+
+
+    kp_store_path = '/GPFS/rhome/yifanlu/workspace/OpenCOOD/keypoints_file/bev_keypoints'
+    lidar_range = [-140, -80, -1.5, 140, 80, 1]
+    n_samples = 1500
+
+    import os
+    import opencood.utils.pcd_utils as pcd_utils
+
+    for root_dir in dirs:
+        scenario_folders = sorted([os.path.join(root_dir, x)
+                                   for x in os.listdir(root_dir) if
+                                   os.path.isdir(os.path.join(root_dir, x))])
+        scenario_folders_name = sorted([x
+                                   for x in os.listdir(root_dir) if
+                                   os.path.isdir(os.path.join(root_dir, x))])
+        # Structure: {scenario_id : {cav_1 : {timestamp1 : {yaml: path,
+        # lidar: path, cameras:list of path}}}}
+
+        # loop over all scenarios
+        for (i, scenario_folder) in enumerate(scenario_folders):
+            # at least 1 cav should show up
+            cav_list = sorted([x for x in os.listdir(scenario_folder)
+                               if os.path.isdir(
+                    os.path.join(scenario_folder, x))])
+            assert len(cav_list) > 0
+
+            # loop over all CAV data
+            for (j, cav_id) in enumerate(cav_list):
+
+                # save all yaml files to the dictionary
+                cav_path = os.path.join(scenario_folder, cav_id)
+
+                yaml_files = \
+                    sorted([os.path.join(cav_path, x)
+                            for x in os.listdir(cav_path) if
+                            x.endswith('.yaml')])
+                timestamps = []
+
+                # extract timestamp
+                for file in yaml_files:
+                    res = file.split('/')[-1]
+
+                    timestamp = res.replace('.yaml', '')
+                    timestamps.append(timestamp)
+
+
+                for timestamp in timestamps:
+
+                    yaml_file = os.path.join(cav_path,
+                                                timestamp + '.yaml')
+                    lidar_file = os.path.join(cav_path,
+                                                timestamp + '.pcd')
+
+                    # when init the dataset, it read over all pcd files.
+                    # it maybe slow, but no need to perform keypoint sampling for each time.\
+                    kp_path = f"{kp_store_path}/{scenario_folders_name[i]}/{cav_id}/{timestamp}.npy"
+                    kp_dir = kp_path.rsplit('/',1)[0] # before filename
+
+                    if not os.path.exists(kp_dir):
+                        os.makedirs(kp_dir)
+
+                    if not os.path.exists(kp_path):
+                        pcd_np = pcd_utils.pcd_to_np(lidar_file)
+                        kp_file = bev_sample(pcd_np,
+                                    lidar_range,
+                                    n_samples,
+                                    mode='uniform',
+                                    all_samples=True)
+
+                        np.save(kp_path, kp_file)
+
+
+def parallel_generate(scenario_folder, scenario_folder_name):
+
+    kp_store_path = '/GPFS/rhome/yifanlu/workspace/OpenCOOD/keypoints_file/bev_keypoints'
+    lidar_range = [-140, -80, -1.5, 140, 80, 1]
+
+    cav_list = sorted([x for x in os.listdir(scenario_folder)
+                        if os.path.isdir(
+            os.path.join(scenario_folder, x))])
+    assert len(cav_list) > 0
+    print(cav_list)
+
+    # loop over all CAV data
+    for (j, cav_id) in enumerate(cav_list):
+        print(cav_id)
+        # save all yaml files to the dictionary
+        cav_path = os.path.join(scenario_folder, cav_id)
+
+        yaml_files = \
+            sorted([os.path.join(cav_path, x)
+                    for x in os.listdir(cav_path) if
+                    x.endswith('.yaml')])
+        timestamps = []
+
+        # extract timestamp
+        for file in yaml_files:
+            res = file.split('/')[-1]
+
+            timestamp = res.replace('.yaml', '')
+            timestamps.append(timestamp)
+
+
+        for timestamp in timestamps:
+
+            yaml_file = os.path.join(cav_path,
+                                        timestamp + '.yaml')
+            lidar_file = os.path.join(cav_path,
+                                        timestamp + '.pcd')
+
+            # when init the dataset, it read over all pcd files.
+            # it maybe slow, but no need to perform keypoint sampling for each time.\
+            target = [250,500,750,1000,1250,1500,2000,2500]
+            kp_paths = [f"{kp_store_path}/{scenario_folder_name}/{cav_id}/{timestamp}.npy"]
+            kp_paths += [f"{kp_store_path}_{n_samples}/{scenario_folder_name}/{cav_id}/{timestamp}.npy" for n_samples in target]
+            flag = True
+            for kp_path in kp_paths:
+                if not os.path.exists(kp_path):
+                    flag = False
+            if flag:
+                continue
+
+
+            pcd_np = pcd_utils.pcd_to_np(lidar_file)[:,:3]
+
+            all_keypoint = bev_sample(pcd_np,
+                                lidar_range,
+                                np.inf,
+                                mode='uniform',
+                                all_samples=True)
+
+            kp_path = f"{kp_store_path}/{scenario_folder_name}/{cav_id}/{timestamp}.npy"
+            kp_dir = kp_path.rsplit('/',1)[0] # before filename
+            if not os.path.exists(kp_dir):
+                os.makedirs(kp_dir)
+
+            if not os.path.exists(kp_path):
+                np.save(kp_path, all_keypoint)
+                print(f"saving to {kp_path}")
+
+
+            for n_samples in target:
+                kp_path = f"{kp_store_path}_{n_samples}/{scenario_folder_name}/{cav_id}/{timestamp}.npy"
+                kp_dir = kp_path.rsplit('/',1)[0] # before filename
+
+                if not os.path.exists(kp_dir):
+                    os.makedirs(kp_dir)
+                
+                select_keypoint = get_keypoints(pcd_np, all_keypoint, n_samples)
+
+                if not os.path.exists(kp_path):
+                    np.save(kp_path, select_keypoint)
+                    print(f"saving to {kp_path}")
+
+
+
+
+if __name__=="__main__":
+    dirs = ["/GPFS/rhome/yifanlu/workspace/dataset/OPV2V/train",
+            "/GPFS/rhome/yifanlu/workspace/dataset/OPV2V/validate",
+            "/GPFS/rhome/yifanlu/workspace/dataset/OPV2V/test"]
+
+    import os
+    import opencood.utils.pcd_utils as pcd_utils
+
+    scenario_folders = []
+    scenario_folders_name = []
+
+    for root_dir in dirs:
+        scenario_folders += sorted([os.path.join(root_dir, x)
+                                   for x in os.listdir(root_dir) if
+                                   os.path.isdir(os.path.join(root_dir, x))])
+        scenario_folders_name += sorted([x
+                                   for x in os.listdir(root_dir) if
+                                   os.path.isdir(os.path.join(root_dir, x))])
+    
+    
+
+    scenario_folders = ['/GPFS/rhome/yifanlu/workspace/OpenCOOD/dataset_link/validate/2021_08_21_17_30_41']
+    scenario_folders_name = ['2021_08_21_17_30_41']
+    num = len(scenario_folders)
+
+    for i in range(num):
+        p = Process(target=parallel_generate, args=(scenario_folders[i],scenario_folders_name[i]))
+        p.start()
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/max_consensus.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/max_consensus.py
new file mode 100644
index 0000000000000000000000000000000000000000..294afda55f7f642b0056fcee46b759325fe01955
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/max_consensus.py
@@ -0,0 +1,131 @@
+import numpy as np
+from sklearn.neighbors import NearestNeighbors
+
+
+def max_consunsus_hierarchical(pointsl, pointsr, loc_l, loc_r, resolution=None, radius=1, point_labels=None, label_weights=None, **kwargs):
+    max_err = kwargs['search_range']# np.array([1, 1, 6])
+    min_cons = kwargs['min_cons']
+    min_match_acc_points = kwargs['min_match_acc_points']
+    pointsl_out, pointsr_out, T, tf_local, cons, matched_pointsl, matched_pointsr = max_consensus2(pointsl, pointsr, -max_err, max_err,
+                                                                           resolution, radius, loc_l, loc_r,
+                                                                          point_labels=point_labels, label_weights=label_weights)
+
+    if matched_pointsl is not None and len(matched_pointsl) > min_match_acc_points:
+        T, tf = estimate_tf_2d(matched_pointsl, matched_pointsr, pointsl, pointsr_out)
+        tf_local = tf
+        tf_local[:2] = tf_local[:2] = tf_local[:2] - loc_r[0, :2] + loc_l[0, :2]
+        pointsr_homo = np.concatenate([pointsr, np.ones((len(pointsr), 1))], axis=1).T
+        pointsr_out = (T @ pointsr_homo).T
+    else:
+        return None, None, None
+
+    if cons < min_cons:
+        return None, None, None
+    return T, tf_local, pointsr_out
+
+
+def max_consensus2(pointsl, pointsr, xyr_min, xyr_max, resolotion, radius, loc_l=None, loc_r=None, point_labels=None, label_weights=None):
+    tf_matrices, tf_params, tf_params_local = construct_tfs(xyr_min, xyr_max, resolotion, loc_l, loc_r)
+    rotl, _, _ = construct_tfs(xyr_min[2:], xyr_max[2:], resolotion[2:])
+    pointr_homo = np.concatenate([pointsr, np.ones((len(pointsr), 1))], axis=1).T
+    # pointl_homo = np.concatenate([pointsl, np.ones((len(pointsl), 1))], axis=1).T
+    pointr_transformed = np.einsum('...ij, ...jk', tf_matrices, np.tile(pointr_homo,(len(tf_matrices), 1, 1))).transpose(0, 2, 1)
+    pointr_transformed_s = pointr_transformed.reshape(-1, 3)[:, :2]
+    cur_cons = 0
+    pointl_out = pointsl
+    pointr_out = pointsr
+    match_T, match_tf_local, matched_pointsl, matched_pointsr = None, None, None, None
+    # r1 = 0
+    for R in rotl[:, :2, :2]:
+        pointl_transformed = np.einsum('ij, jk', R, pointsl.T).T
+        nbrs = NearestNeighbors(n_neighbors=1, radius=radius, algorithm='auto').fit(pointl_transformed)
+        distances, indices = nbrs.kneighbors(pointr_transformed_s)
+        mask = (distances < radius)
+        lbll, lblr = point_labels
+        plus = (np.logical_and(lbll[indices] > 2, mask)).reshape(len(tf_matrices), len(pointsr))
+        mask = mask.reshape(len(tf_matrices), len(pointsr))
+        pointr_consensus = mask.sum(axis=1) + plus.sum(axis=1) * label_weights[-1]
+        best_match = np.argmax(pointr_consensus)
+        match_consensus = pointr_consensus[best_match]
+        if match_consensus > cur_cons:
+            pointr_out = pointr_transformed[best_match]
+            match_T = tf_matrices[best_match]
+            match_tf_local = tf_params_local[best_match]
+            accurate_points_mask = plus[best_match]
+            selected_indices = indices.reshape(len(tf_matrices), len(pointsr))[best_match][accurate_points_mask]
+            matched_pointsl = pointsl[selected_indices]
+            matched_pointsr = pointsr[accurate_points_mask]
+            # r1 = np.arctan2(R[1, 0], R[0, 0])
+            pointl_out = pointl_transformed
+            cur_cons = match_consensus
+    return pointl_out, pointr_out, match_T, match_tf_local, cur_cons, matched_pointsl, matched_pointsr
+
+
+def max_consensus1(pointsl, pointsr, xyr_min, xyr_max, resolotion, radius, loc_l=None, loc_r=None, point_labels=None, label_weights=None):
+    tf_matrices, tf_params, tf_params_local = construct_tfs(xyr_min, xyr_max, resolotion, loc_l, loc_r)
+    pointr_homo = np.concatenate([pointsr, np.ones((len(pointsr), 1))], axis=1).T
+    pointr_transformed = np.einsum('...ij, ...jk', tf_matrices, np.tile(pointr_homo,(len(tf_matrices), 1, 1))).transpose(0, 2, 1)
+    pointr_transformed_s = pointr_transformed.reshape(-1, 3)[:, :2]
+
+    nbrs = NearestNeighbors(n_neighbors=1, radius=radius, algorithm='auto').fit(pointsl)
+    distances, indices = nbrs.kneighbors(pointr_transformed_s)
+    mask = (distances < radius)
+    lbll, lblr = point_labels
+    plus = (np.logical_and(lbll[indices] > 2, mask)).reshape(len(tf_matrices), len(pointsr))
+    mask = mask.reshape(len(tf_matrices), len(pointsr))
+    pointr_consensus = mask.sum(axis=1) + plus.sum(axis=1) * label_weights[-1]
+    best_match = np.argmax(pointr_consensus)
+    match_consensus = pointr_consensus[best_match]
+    pointr_out = pointr_transformed[best_match]
+    match_tf = tf_params[best_match]
+    match_T = tf_matrices[best_match]
+    match_tf_local = tf_params_local[best_match]
+    accurate_points_mask = plus[best_match]
+    selected_indices = indices.reshape(len(tf_matrices), len(pointsr))[best_match][accurate_points_mask]
+    matched_pointsl = pointsl[selected_indices]
+    matched_pointsr = pointsr[accurate_points_mask]
+    return pointr_out, match_T, match_tf_local, match_consensus, matched_pointsl, matched_pointsr
+
+
+def construct_tfs(xyr_min, xyr_max, resolution, loc_l=None, loc_r=None):
+    input = [np.arange(xyr_min[i], xyr_max[i], resolution[i]) for i in range(len(xyr_min))]
+    grid = np.meshgrid(*input)
+    grid = [a.reshape(-1) for a in grid]
+    tf_parames_local = np.stack(grid, axis=1)
+    tf_parames_local[:, -1] = tf_parames_local[:, -1] / 180 * np.pi
+    tf_parames = np.copy(tf_parames_local)
+    if loc_r is not None:
+        tf_parames[:, :-1] = tf_parames_local[:, :2] + loc_r[:, :2] - loc_l[:, :2]
+    sina = np.sin(tf_parames[:, -1])
+    cosa = np.cos(tf_parames[:, -1])
+    zeros = np.zeros(len(tf_parames), dtype=sina.dtype)
+    ones = np.ones(len(tf_parames), dtype=sina.dtype)
+    x = tf_parames[:, 0] if len(xyr_min)>1 else zeros
+    y = tf_parames[:, 1] if len(xyr_min)>1 else zeros
+    tfs = np.array([[cosa, -sina, x],
+                    [sina, cosa, y],
+                    [zeros, zeros, ones]]).transpose(2, 0, 1)
+    return tfs, tf_parames, tf_parames_local
+
+
+def estimate_tf_2d(pointsr, pointsl, pointsl_all, pointsr_all):
+    # 1 reduce by the center of mass
+    l_mean = pointsl.mean(axis=0)
+    r_mean = pointsr.mean(axis=0)
+    l_reduced = pointsl - l_mean
+    r_reduced = pointsr - r_mean
+    # 2 compute the rotation
+    Sxx = (l_reduced[:, 0] * r_reduced[:, 0]).sum()
+    Syy = (l_reduced[:, 1] * r_reduced[:, 1]).sum()
+    Sxy = (l_reduced[:, 0] * r_reduced[:, 1]).sum()
+    Syx = (l_reduced[:, 1] * r_reduced[:, 0]).sum()
+    theta = np.arctan2(Sxy - Syx, Sxx + Syy)  # / np.pi * 180
+    sa = np.sin(theta)
+    ca = np.cos(theta)
+    T = np.array([[ca, -sa, 0],
+                  [sa, ca, 0],
+                  [0, 0, 1]])
+    t = r_mean.reshape(2, 1) - T[:2, :2] @ l_mean.reshape(2, 1)
+    # T = T.T
+    T[:2, 2:] = t
+    return T, np.array([*t.squeeze(), theta])
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/model_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/model_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2fa36e16f2def23ba38ad6cd38f66db96467b9e
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/model_utils.py
@@ -0,0 +1,322 @@
+import torch
+import torch.nn as nn
+from collections import OrderedDict
+
+def fix_bn(m):
+    classname = m.__class__.__name__
+    if classname.find('BatchNorm') != -1:
+        m.eval()
+def unfix_bn(m):
+    classname = m.__class__.__name__
+    if classname.find('BatchNorm') != -1:
+        m.train()
+
+def has_trainable_params(module: torch.nn.Module) -> bool:
+    any_require_grad = any(p.requires_grad for p in module.parameters())
+    any_bn_in_train_mode = any(m.training for m in module.modules() if isinstance(m, (torch.nn.BatchNorm1d, torch.nn.BatchNorm2d, torch.nn.BatchNorm3d)))
+    return any_require_grad or any_bn_in_train_mode
+
+def has_untrainable_params(module: torch.nn.Module) -> bool:
+    any_not_require_grad = any((not p.requires_grad) for p in module.parameters())
+    any_bn_in_eval_mode = any((not m.training) for m in module.modules() if isinstance(m, (torch.nn.BatchNorm1d, torch.nn.BatchNorm2d, torch.nn.BatchNorm3d)))
+    return any_not_require_grad or any_bn_in_eval_mode
+
+def check_trainable_module(model):
+    appeared_module_list = []
+    has_trainable_list = []
+    has_untrainable_list = []
+    for name, module in model.named_modules():
+        if any([name.startswith(appeared_module_name) for appeared_module_name in appeared_module_list]) or name=='': # the whole model has name ''
+            continue
+        appeared_module_list.append(name)
+
+        if has_trainable_params(module):
+            has_trainable_list.append(name)
+        if has_untrainable_params(module):
+            has_untrainable_list.append(name)
+
+    print("=========Those modules have trainable component=========")
+    print(*has_trainable_list,sep='\n',end='\n\n')
+    print("=========Those modules have untrainable component=========")
+    print(*has_untrainable_list,sep='\n',end='\n\n')
+
+
+def load_model_dict(model, pretrained_dict):
+    """ load pretrained state dict, keys may not match with model
+
+    Args:
+        model: nn.Module
+
+        pretrained_dict: collections.OrderedDict
+    
+    """
+    # 1. filter out unnecessary keys
+    model_dict = model.state_dict()
+    pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
+    # 2. overwrite entries in the existing state dict
+    model_dict.update(pretrained_dict)
+    # 3. load the new state dict
+    model.load_state_dict(model_dict)
+    return model
+
+
+def weight_init(m):
+    if isinstance(m, nn.Linear):
+        nn.init.xavier_normal_(m.weight.data, gain=0.1)
+        if hasattr(m.bias, 'data'):
+            nn.init.constant_(m.bias.data, 0)
+
+    elif isinstance(m, nn.Conv2d):
+        nn.init.xavier_normal_(m.weight, gain=0.1)
+        # if hasattr(m, 'bias'):
+        #     nn.init.constant_(m.bias, 0)
+
+    # elif isinstance(m, nn.BatchNorm2d):
+    #     nn.init.xavier_normal_(m.weight, gain=0.05)
+    #     nn.init.constant_(m.bias, 0)
+
+def rename_model_dict_keys(pretrained_dict_path, rename_dict):
+    """ load pretrained state dict, keys may not match with model
+
+    Args:
+        model: nn.Module
+
+        pretrained_dict: collections.OrderedDict
+    
+    """
+    pretrained_dict = torch.load(pretrained_dict_path)
+    # 1. filter out unnecessary keys
+    for oldname, newname in rename_dict.items():
+        if oldname.endswith("*"):
+            _oldnames = list(pretrained_dict.keys())
+            _oldnames = [x for x in _oldnames if x.startswith(oldname[:-1])]
+            for _oldname in _oldnames:
+                if newname != "":
+                    _newname = _oldname.replace(oldname[:-1], newname[:-1])
+                    pretrained_dict[_newname] = pretrained_dict[_oldname]
+                pretrained_dict.pop(_oldname)
+        else:
+            if newname != "":
+                pretrained_dict[newname] = pretrained_dict[oldname]
+            pretrained_dict.pop(oldname)
+    torch.save(pretrained_dict, pretrained_dict_path)
+
+
+def compose_model(model1, keyname1, model2, keyname2, output_model):
+    pretrained_dict1 = torch.load(model1)
+    pretrained_dict2 = torch.load(model2)
+
+    new_dict = OrderedDict()
+    for keyname in keyname1:
+        if keyname.endswith("*"):
+            _oldnames = list(pretrained_dict1.keys())
+            _oldnames = [x for x in _oldnames if x.startswith(keyname[:-1])]
+            for _oldname in _oldnames:
+                new_dict[_oldname] = pretrained_dict1[_oldname]
+
+    for keyname in keyname2:
+        if keyname.endswith("*"):
+            _oldnames = list(pretrained_dict2.keys())
+            _oldnames = [x for x in _oldnames if x.startswith(keyname[:-1])]
+            for _oldname in _oldnames:
+                new_dict[_oldname] = pretrained_dict2[_oldname]
+
+    torch.save(new_dict, output_model)
+
+
+def switch_model_dict_keys(pretrained_dict_path, switch_dict):
+    """ load pretrained state dict, keys may not match with model
+
+    Args:
+        model: nn.Module
+
+        pretrained_dict: collections.OrderedDict
+    
+        switch_dict: {"cls_head_lidar": "cls_head_camera"}
+    """
+    pretrained_dict = torch.load(pretrained_dict_path)
+    # 1. filter out unnecessary keys
+    for key1, key2 in switch_dict.items():
+        all_model_keys = list(pretrained_dict.keys())
+        all_key1_weight = [x for x in all_model_keys if x.startswith(key1)]
+        for key1_weight_name in all_key1_weight:
+            key2_weight_name = key1_weight_name.replace(key1, key2)
+        
+            pretrained_dict[key1_weight_name], pretrained_dict[key2_weight_name] = \
+                pretrained_dict[key2_weight_name], pretrained_dict[key1_weight_name]
+
+    torch.save(pretrained_dict, pretrained_dict_path)
+
+
+def rename_m3_to_m4(pretrain_dict):
+    new_dict = OrderedDict()
+    for oldname, v in pretrain_dict.items():
+        if 'm3.' in oldname:
+            print(oldname)
+            newname = oldname.replace("m3.","m4.")
+            new_dict[newname] = pretrain_dict[oldname]
+        else:
+            new_dict[oldname] = pretrain_dict[oldname]
+    return new_dict
+
+def create_m1m2m3m4_inter_model(m1m2m3_heter_model, m1m2m4_heter_model):
+    final_model = OrderedDict()
+    for k, v in m1m2m3_heter_model.items():
+        if k not in final_model:
+            final_model[k] = v
+
+    for k, v in m1m2m4_heter_model.items():
+        if k not in final_model:
+            final_model[k] = v
+    
+    return final_model
+
+def create_m1m2m3m4_model(m1_late_model, m2_late_model, m3_late_model, m4_late_model):
+    final_model = OrderedDict()
+    for k, v in m1_late_model.items():
+        if k not in final_model:
+            final_model[k] = v
+        else:
+            print(k, 'is already added.')
+    print('m1 finish')
+
+    for k, v in m2_late_model.items():
+        if k not in final_model:
+            final_model[k] = v
+        else:
+            print(k, 'is already added.')
+    print('m2 finish')
+
+    for k, v in m3_late_model.items():
+        if k not in final_model:
+            final_model[k] = v
+        else:
+            print(k, 'is already added.')
+    print('m3 finish')
+
+    for k, v in m4_late_model.items():
+        if k not in final_model:
+            final_model[k] = v
+        else:
+            print(k, 'is already added.')
+    print('m4 finish')
+
+    return final_model
+
+def make_final_inter_model(m1m2m3_heter_model_path,
+                     m1m2m3tom4_heter_model_path):
+    m1m2m3_heter_model = torch.load(m1m2m3_heter_model_path)
+    
+    m1m2m3tom4_heter_model = torch.load(m1m2m3tom4_heter_model_path)
+    m1m2m4_heter_model = rename_m3_to_m4(m1m2m3tom4_heter_model)
+
+
+    final_model = create_m1m2m3m4_inter_model(m1m2m3_heter_model,
+                                        m1m2m4_heter_model)
+    return final_model
+
+
+def make_m1m2m3m4_model(m1_model_path,
+                        m2_model_path,
+                        m3_model_path,
+                        m4_model_path,
+                        out_path):
+
+    m1_model = torch.load(m1_model_path, map_location='cpu')
+    m2_model = torch.load(m2_model_path, map_location='cpu')
+    m3_model = torch.load(m3_model_path, map_location='cpu')
+    m4_model = torch.load(m4_model_path, map_location='cpu')
+
+
+    final_model = create_m1m2m3m4_model(m1_model,
+                                        m2_model,
+                                        m3_model,
+                                        m4_model)
+    torch.save(final_model, out_path)
+
+def single_model_to_inter_model(model_path, output_path, modality_name='m1'):
+    pretrain_dict = torch.load(model_path)
+    newdict = OrderedDict()
+    for k,v in pretrain_dict.items():
+        if k.startswith('shrink') or k.startswith('shrink') or k.startswith('cls_head') \
+            or k.startswith('reg_head') or k.startswith('dir_head'):
+            newdict[k.replace(f"_{modality_name}", '')] = v    
+        elif k.startswith('layer'):
+            newdict[k.replace(f"layers_{modality_name}", 'backbone')] = v
+        else:
+            newdict[k] = v
+    torch.save(newdict, output_path)
+
+
+if __name__ == "__main__":
+
+    make_m1m2m3m4_model(
+        'opencood/logs/FedHCP_opv2v_m1_pointpillars_140.8_40_align_to_m3_singlesup/net_epoch_bestval_at29.pth',
+        'opencood/logs/FedHCP_opv2v_m2_LSSeff_140.8_40_align_to_m3_singlesup_warp/net_epoch_bestval_at21.pth',
+        'opencood/logs/FedHCP_opv2v_m4_LSSres_140.8_40_align_to_m3_singlesup_warp/net_epoch_bestval_at25.pth',
+        'opencood/logs/FedHCP_opv2v_m4_LSSres_140.8_40_align_to_m3_singlesup_warp/net_epoch_bestval_at25.pth',
+        'opencood/logs/FedHCP_final_m3base_new/net_epoch1.pth'
+    )
+
+
+    # make_m1m2m3m4_model(
+    #     '/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/FedHCP_PRETRAIN/single_modality_intermediate/m1_pointpillar_msmax_epoch25.pth',
+    #     '/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/FedHCP_PRETRAIN/single_modality/m2_LSSeff_epoch21.pth',
+    #     '/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/FedHCP_PRETRAIN/single_modality/m3_SECOND32_epoch29.pth',
+    #     '/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/FedHCP_PRETRAIN/single_modality/m4_LSSres_epoch33.pth',
+    #     '/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/FedHCP_PRETRAIN/align_to_m1_4_modality/m1_pointpillar_col_m2_LSSeff_m3_SECOND32_m4_LSSres.pth'
+    #     )
+
+    # m3_model = torch.load('/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/A_opv2v_heter_pretrain/m4_pretrain/LSSres_net_epoch_bestval_at13.pth')
+    # m4_model = rename_m3_to_m4(m3_model)
+    # torch.save(m3_model, "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/A_opv2v_heter_pretrain/m4_pretrain/LSSres_net_epoch_bestval_at13_m4.pth")
+
+    # finel_model = make_final_inter_model(m1m2m3_heter_model_path='/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/m1_pointpillars_m2_lsseff_m3_SECOND_sharedhead_convnext_block3/net_epoch_bestval_at19.pth',
+    #                                     m1m2m3tom4_heter_model_path='/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/m1_pointpillars_m2_lsseff_m3_lssres_sharedhead_convnext_block3/net_epoch_bestval_at9.pth')
+
+    # torch.save(finel_model, "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/m1_pointpillars_m2_lsseff_m3_SECOND_m4_lssres/net_epoch1.pth")
+
+    # final_model = make_final_late_model()
+    # torch.save(final_model, "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/m1_pointpillars_m2_lsseff_m3_SECOND_m4_lssres_late/net_epoch1.pth")
+
+    # dict_path = "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/A_v2xset_heter_lidar_and_camera_pretrain_switch_layer2_layer3_shrink_head/net_epoch1.pth"
+    # switch_dict = {"lidar_backbone.resnet.layer1": "camera_backbone.resnet.layer1",
+    #                "lidar_backbone.renset.layer2": "camera_backbone.resnet.layer2",
+    #                "cls_head_lidar": "cls_head_camera",
+    #                "reg_head_lidar": "reg_head_camera",
+    #                "dir_head_lidar": "dir_head_camera",
+    #                "shrink_lidar":"shrink_camera"}
+    # switch_model_dict_keys(dict_path, switch_dict)
+
+    # dict_path = "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/A_v2xset_heter_camera_pretrain_8x_64/net_epoch1.pth"
+    # rename_dict = {"camera_encoder.*": "",
+    #                 "camera_backbone.*": "",
+    #                 "shrink_camera.*": "",
+    #                 "cls_head_camera.*": "",
+    #                 "reg_head_camera.*": "",
+    #                 "dir_head_camera.*": "",}
+    # rename_model_dict_keys(dict_path, rename_dict)
+
+    # dict_path = "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/v2xset_heter_late_fusion/net_epoch_bestval_at28.pth"
+    # rename_dict = {"camencode.*": "camera_encoder.camencode.*",
+    #                "bevencode.*": "camera_encoder.bevencode.*",
+    #                "head.cls_head.*": "cls_head_camera.*",
+    #                "head.reg_head.*": "reg_head_camera.*",
+    #                "head.dir_head.*": "dir_head_camera.*",
+    #                "shrink_conv.*": "shrink_camera.*"}
+    # rename_model_dict_keys(dict_path, rename_dict)
+
+    # model1 = "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/A_opv2v_heter_pretrain/m1m2_pretrain_for_late_fusion/m1_pointpillars_m2_lsseff.pth" # lidar
+    # model2 = "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/A_opv2v_heter_pretrain/m3_pretrain/LSSres_net_epoch_bestval_at13.pth" # cam
+    # output_model = "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/A_opv2v_heter_pretrain/m1m2m3_pretrain_for_late_fusion/m1_pointpillars_m2_lsseff_m3_lssres.pth"
+    # keyname1 = ['*',]
+    # keyname2 = ['*',]
+    # compose_model(model1, keyname1, model2, keyname2, output_model)
+
+    # dict_path = "/GPFS/rhome/yifanlu/workspace/OpenCOODv2/opencood/logs/v2xset_heter_late_fusion/net_epoch1.pth"
+    # rename_dict = {"camera_encoder.*": "",
+    #                "head.cls_head_camera.*": "",
+    #                "head.reg_head_camera.*": "",
+    #                "head.dir_head_camera.*": "",
+    #                "shrink_camera.*": ""}
+    # rename_model_dict_keys(dict_path, rename_dict)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/occ_render.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/occ_render.py
new file mode 100644
index 0000000000000000000000000000000000000000..2135cc7263f6ce6646a3785e56b69c6880fecfad
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/occ_render.py
@@ -0,0 +1,147 @@
+"""
+Functions to render occupancy map from bounding boxes
+"""
+
+
+import os
+import copy
+import re
+import io
+import logging
+import json
+import numpy as np
+import torch
+import carla
+import cv2
+import math
+import datetime
+import pathlib
+import torch.utils.data as data
+from torchvision import transforms
+from PIL import Image
+from skimage.measure import block_reduce
+import time
+import matplotlib.pyplot as plt
+import torch.nn.functional as F
+import pygame
+
+def box2occ(infer_result):
+
+    det_range = [36, 12, 12, 12, 0.25]
+
+    attrib_list = ['pred_box_tensor', 'pred_score', 'gt_box_tensor']
+    for attrib in attrib_list:
+        if isinstance(infer_result[attrib], list):
+            infer_result_tensor = []
+            for i in range(len(infer_result[attrib])):
+                if infer_result[attrib][i] is not None:
+                    infer_result_tensor.append(infer_result[attrib][i])
+            if len(infer_result_tensor)>0:
+                infer_result[attrib] = torch.cat(infer_result_tensor, dim=0)
+            else:
+                infer_result[attrib] = None
+
+    ### filte out ego box
+    if not infer_result['pred_box_tensor'] is None:
+        if len(infer_result['pred_box_tensor']) > 0:
+            tmp = infer_result['pred_box_tensor'][:,:,0].clone()
+            infer_result['pred_box_tensor'][:,:,0]=infer_result['pred_box_tensor'][:,:,1]
+            infer_result['pred_box_tensor'][:,:,1] = tmp
+        # measurements = car_data_raw[0]['measurements']
+        num_object = infer_result['pred_box_tensor'].shape[0]
+        # if num_object > 0:
+        object_list = []
+        # transform from lidar pose to ego pose
+        for i in range(num_object):
+            transformed_box = infer_result['pred_box_tensor'][i].cpu().numpy()
+            transformed_box[:,1] += 1.3
+
+
+            location_box = np.mean(transformed_box[:4,:2], 0)
+            if np.linalg.norm(location_box) < 1.4:
+                continue
+            object_list.append(torch.from_numpy(transformed_box))
+        if len(object_list) > 0:
+            processed_pred_box = torch.stack(object_list, dim=0)
+        else:
+            processed_pred_box = infer_result['pred_box_tensor'][:0]
+    else:
+        processed_pred_box = [] # infer_result['pred_box_tensor']
+
+    ### turn boxes into occupancy map
+    if len(processed_pred_box) > 0:
+        occ_map = turn_traffic_into_map(processed_pred_box[:,:4,:2].cpu(), det_range)
+    else:
+        occ_map = turn_traffic_into_map(processed_pred_box, det_range)
+
+    # # N, K, H, W, C=7
+    # occ_map = turn_traffic_into_map(pred_traffic, self.det_range)
+    occ_map_shape = occ_map.shape
+    occ_map = torch.from_numpy(occ_map).cuda().contiguous().view((-1, 1) + occ_map_shape[1:])
+
+    return occ_map
+
+def transform_2d_points(xyz, r1, t1_x, t1_y, r2, t2_x, t2_y):
+    """
+    Build a rotation matrix and take the dot product.
+    """
+    # z value to 1 for rotation
+    xy1 = xyz.copy()
+    xy1[:, 2] = 1
+
+    c, s = np.cos(r1), np.sin(r1)
+    # 顺时针旋转r1角度，r1车辆坐标转换到world frame
+    r1_to_world = np.matrix([[c, -s, t1_x], [s, c, t1_y], [0, 0, 1]])
+
+    # np.dot converts to a matrix, so we explicitly change it back to an array
+    world = np.asarray(r1_to_world @ xy1.T)
+
+    c, s = np.cos(r2), np.sin(r2)
+    r2_to_world = np.matrix([[c, -s, t2_x], [s, c, t2_y], [0, 0, 1]])
+    # world frame -> r2 frame
+    # if r1==r2, do nothing
+    world_to_r2 = np.linalg.inv(r2_to_world)
+
+    out = np.asarray(world_to_r2 @ world).T
+    # reset z-coordinate
+    out[:, 2] = xyz[:, 2]
+
+    return out
+
+def turn_traffic_into_map(all_bbox, det_range):
+    data_total = []
+    for idx in range(1):
+
+        if len(all_bbox) == 0:
+            all_bbox = np.zeros((1,4,2))
+        # plt.cla()
+
+        fig = plt.figure(figsize=(6, 12), dpi=16)
+        plt.gca().xaxis.set_major_locator(plt.NullLocator())
+        plt.gca().yaxis.set_major_locator(plt.NullLocator())
+        plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0)
+        plt.margins(0,0)
+        ax = plt.gca()
+        ax.set_facecolor("black")
+
+        plt.xlim((-det_range[2], det_range[3]))
+        plt.ylim((-det_range[1], det_range[0]))
+
+        for i in range(len(all_bbox)):
+            plt.fill(all_bbox[i,:,0], all_bbox[i,:,1], color = 'white')
+
+        # plt.axis('off')
+        # If we haven't already shown or saved the plot, then we need to
+        # draw the figure first...
+        fig.canvas.draw()
+
+        # Now we can save it to a numpy array.
+        data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
+        data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
+        # H=192, W=96, 3
+        data_total.append(data[:, :, 0])
+        # plt.savefig('/GPFS/public/InterFuser/results/cop3/pnp/multiclass_finetune_fusion_none/test.png')
+        plt.close()
+
+    occ_map = np.stack(data_total, axis=0) # B * T_p, H, W
+    return occ_map
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/pcd_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/pcd_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..20acff058e8ef0368c783cf3369524c916dc5eb2
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/pcd_utils.py
@@ -0,0 +1,235 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, Hao Xiang <haxiang@g.ucla.edu>,
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+"""
+Utility functions related to point cloud
+"""
+
+import open3d as o3d
+import numpy as np
+from pypcd import pypcd
+
+def pcd_to_np(pcd_file):
+    """
+    Read  pcd and return numpy array.
+
+    Parameters
+    ----------
+    pcd_file : str
+        The pcd file that contains the point cloud.
+
+    Returns
+    -------
+    pcd : o3d.PointCloud
+        PointCloud object, used for visualization
+    pcd_np : np.ndarray
+        The lidar data in numpy format, shape:(n, 4)
+
+    """
+    pcd = o3d.io.read_point_cloud(pcd_file)
+
+    xyz = np.asarray(pcd.points)
+    # we save the intensity in the first channel
+    intensity = np.expand_dims(np.asarray(pcd.colors)[:, 0], -1)
+    pcd_np = np.hstack((xyz, intensity))
+
+    return np.asarray(pcd_np, dtype=np.float32)
+
+
+def mask_points_by_range(points, limit_range):
+    """
+    Remove the lidar points out of the boundary.
+
+    Parameters
+    ----------
+    points : np.ndarray
+        Lidar points under lidar sensor coordinate system.
+
+    limit_range : list
+        [x_min, y_min, z_min, x_max, y_max, z_max]
+
+    Returns
+    -------
+    points : np.ndarray
+        Filtered lidar points.
+    """
+
+    mask = (points[:, 0] > limit_range[0]) & (points[:, 0] < limit_range[3])\
+           & (points[:, 1] > limit_range[1]) & (
+                   points[:, 1] < limit_range[4]) \
+           & (points[:, 2] > limit_range[2]) & (
+                   points[:, 2] < limit_range[5])
+
+    points = points[mask]
+
+    return points
+
+
+def mask_ego_points(points):
+    """
+    Remove the lidar points of the ego vehicle itself.
+
+    Parameters
+    ----------
+    points : np.ndarray
+        Lidar points under lidar sensor coordinate system.
+
+    Returns
+    -------
+    points : np.ndarray
+        Filtered lidar points.
+    """
+    mask = (points[:, 0] >= -1.95) & (points[:, 0] <= 2.95) \
+           & (points[:, 1] >= -1.1) & (points[:, 1] <= 1.1)
+    points = points[np.logical_not(mask)]
+
+    return points
+
+def mask_ego_points_v2(points):
+    """
+    Remove the lidar points of the ego vehicle itself.
+
+    Parameters
+    ----------
+    points : np.ndarray
+        Lidar points under lidar sensor coordinate system.
+
+    Returns
+    -------
+    points : np.ndarray
+        Filtered lidar points.
+    """
+    mask = (points[:, 0] >= -2.95) & (points[:, 0] <= 1.95) \
+           & (points[:, 1] >= -1.1) & (points[:, 1] <= 1.1)
+    points = points[np.logical_not(mask)]
+
+    return points
+
+
+def shuffle_points(points):
+    shuffle_idx = np.random.permutation(points.shape[0])
+    points = points[shuffle_idx]
+
+    return points
+
+
+def lidar_project(lidar_data, extrinsic):
+    """
+    Given the extrinsic matrix, project lidar data to another space.
+
+    Parameters
+    ----------
+    lidar_data : np.ndarray
+        Lidar data, shape: (n, 4)
+
+    extrinsic : np.ndarray
+        Extrinsic matrix, shape: (4, 4)
+
+    Returns
+    -------
+    projected_lidar : np.ndarray
+        Projected lida data, shape: (n, 4)
+    """
+
+    lidar_xyz = lidar_data[:, :3].T
+    # (3, n) -> (4, n), homogeneous transformation
+    lidar_xyz = np.r_[lidar_xyz, [np.ones(lidar_xyz.shape[1])]]
+    lidar_int = lidar_data[:, 3]
+
+    # transform to ego vehicle space, (3, n)
+    project_lidar_xyz = np.dot(extrinsic, lidar_xyz)[:3, :]
+    # (n, 3)
+    project_lidar_xyz = project_lidar_xyz.T
+    # concatenate the intensity with xyz, (n, 4)
+    projected_lidar = np.hstack((project_lidar_xyz,
+                                 np.expand_dims(lidar_int, -1)))
+
+    return projected_lidar
+
+
+def projected_lidar_stack(projected_lidar_list):
+    """
+    Stack all projected lidar together.
+
+    Parameters
+    ----------
+    projected_lidar_list : list
+        The list containing all projected lidar.
+
+    Returns
+    -------
+    stack_lidar : np.ndarray
+        Stack all projected lidar data together.
+    """
+    stack_lidar = []
+    for lidar_data in projected_lidar_list:
+        stack_lidar.append(lidar_data)
+
+    return np.vstack(stack_lidar)
+
+
+def downsample_lidar(pcd_np, num):
+    """
+    Downsample the lidar points to a certain number.
+
+    Parameters
+    ----------
+    pcd_np : np.ndarray
+        The lidar points, (n, 4).
+
+    num : int
+        The downsample target number.
+
+    Returns
+    -------
+    pcd_np : np.ndarray
+        The downsampled lidar points.
+    """
+    assert pcd_np.shape[0] >= num
+
+    selected_index = np.random.choice((pcd_np.shape[0]),
+                                      num,
+                                      replace=False)
+    pcd_np = pcd_np[selected_index]
+
+    return pcd_np
+
+
+def downsample_lidar_minimum(pcd_np_list):
+    """
+    Given a list of pcd, find the minimum number and downsample all
+    point clouds to the minimum number.
+
+    Parameters
+    ----------
+    pcd_np_list : list
+        A list of pcd numpy array(n, 4).
+    Returns
+    -------
+    pcd_np_list : list
+        Downsampled point clouds.
+    """
+    minimum = np.Inf
+
+    for i in range(len(pcd_np_list)):
+        num = pcd_np_list[i].shape[0]
+        minimum = num if minimum > num else minimum
+
+    for (i, pcd_np) in enumerate(pcd_np_list):
+        pcd_np_list[i] = downsample_lidar(pcd_np, minimum)
+
+    return pcd_np_list
+
+def read_pcd(pcd_path):
+    pcd = pypcd.PointCloud.from_path(pcd_path)
+    time = None
+    pcd_np_points = np.zeros((pcd.points, 4), dtype=np.float32)
+    pcd_np_points[:, 0] = np.transpose(pcd.pc_data["x"])
+    pcd_np_points[:, 1] = np.transpose(pcd.pc_data["y"])
+    pcd_np_points[:, 2] = np.transpose(pcd.pc_data["z"])
+    pcd_np_points[:, 3] = np.transpose(pcd.pc_data["intensity"]) / 256.0
+    del_index = np.where(np.isnan(pcd_np_points))[0]
+    pcd_np_points = np.delete(pcd_np_points, del_index, axis=0)
+    return pcd_np_points, time
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/pose_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/pose_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ae569febb73240ff48600fde4f34cac0367e646
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/pose_utils.py
@@ -0,0 +1,191 @@
+import numpy as np
+import torch
+import torch.distributions as dist
+
+def add_noise_data_dict(data_dict, noise_setting):
+    """ Update the base data dict. 
+        We retrieve lidar_pose and add_noise to it.
+        And set a clean pose.
+    """
+    if noise_setting['add_noise']:
+        for cav_id, cav_content in data_dict.items():
+            cav_content['params']['lidar_pose_clean'] = cav_content['params']['lidar_pose'] # 6 dof pose
+
+            if "laplace" in noise_setting['args'].keys() and noise_setting['args']['laplace'] is True:
+                cav_content['params']['lidar_pose'] = cav_content['params']['lidar_pose'] + \
+                                                        generate_noise_laplace( # we just use the same key name
+                                                            noise_setting['args']['pos_std'],
+                                                            noise_setting['args']['rot_std'],
+                                                            noise_setting['args']['pos_mean'],
+                                                            noise_setting['args']['rot_mean']
+                                                        )
+            else:
+                cav_content['params']['lidar_pose'] = cav_content['params']['lidar_pose'] + \
+                                                            generate_noise(
+                                                                noise_setting['args']['pos_std'],
+                                                                noise_setting['args']['rot_std'],
+                                                                noise_setting['args']['pos_mean'],
+                                                                noise_setting['args']['rot_mean']
+                                                            )
+
+    else:
+        for cav_id, cav_content in data_dict.items():
+            cav_content['params']['lidar_pose_clean'] = cav_content['params']['lidar_pose'] # 6 dof pose
+
+            
+    return data_dict
+
+def add_noise_data_dict_asymmetric(data_dict, noise_setting):
+    """ Update the base data dict. 
+        We retrieve lidar_pose and add_noise to it.
+        And set a clean pose.
+        This function add pose error noise for agents with asymmetric detection range
+    """
+    if noise_setting['add_noise']:
+        for cav_id, cav_content in data_dict.items():
+            cav_content['params']['lidar_pose_clean'] = cav_content['params']['lidar_pose'] # 6 dof pose
+
+            if "laplace" in noise_setting['args'].keys() and noise_setting['args']['laplace'] is True:
+                noise =  generate_noise_laplace( # we just use the same key name
+                                                            noise_setting['args']['pos_std'],
+                                                            noise_setting['args']['rot_std'],
+                                                            noise_setting['args']['pos_mean'],
+                                                            noise_setting['args']['rot_mean']
+                                                        )
+                cav_content['params']['lidar_pose'] = cav_content['params']['lidar_pose'] + noise
+                cav_content['params']['map_pose'] = cav_content['params']['map_pose'] + noise
+            else:
+                noise = generate_noise(
+                                                                noise_setting['args']['pos_std'],
+                                                                noise_setting['args']['rot_std'],
+                                                                noise_setting['args']['pos_mean'],
+                                                                noise_setting['args']['rot_mean']
+                                                            )
+                cav_content['params']['lidar_pose'] = cav_content['params']['lidar_pose'] + noise
+                cav_content['params']['map_pose'] = cav_content['params']['map_pose'] + noise
+    else:
+        for cav_id, cav_content in data_dict.items():
+            cav_content['params']['lidar_pose_clean'] = cav_content['params']['lidar_pose'] # 6 dof pose
+
+            
+    return data_dict
+
+
+def generate_noise(pos_std, rot_std, pos_mean=0, rot_mean=0):
+    """ Add localization error to the 6dof pose
+        Noise includes position (x,y) and rotation (yaw).
+        We use gaussian distribution to generate noise.
+    
+    Args:
+
+        pos_std : float 
+            std of gaussian dist, in meter
+
+        rot_std : float
+            std of gaussian dist, in degree
+
+        pos_mean : float
+            mean of gaussian dist, in meter
+
+        rot_mean : float
+            mean of gaussian dist, in degree
+    
+    Returns:
+        pose_noise: np.ndarray, [6,]
+            [x, y, z, roll, yaw, pitch]
+    """
+
+    xy = np.random.normal(pos_mean, pos_std, size=(2))
+    yaw = np.random.normal(rot_mean, rot_std, size=(1))
+
+    pose_noise = np.array([xy[0], xy[1], 0, 0, yaw[0], 0])
+
+    
+    return pose_noise
+
+
+
+def generate_noise_laplace(pos_b, rot_b, pos_mu=0, rot_mu=0):
+    """ Add localization error to the 6dof pose
+        Noise includes position (x,y) and rotation (yaw).
+        We use laplace distribution to generate noise.
+    
+    Args:
+
+        pos_b : float 
+            parameter b of laplace dist, in meter
+
+        rot_b : float
+            parameter b of laplace dist, in degree
+
+        pos_mu : float
+            mean of laplace dist, in meter
+
+        rot_mu : float
+            mean of laplace dist, in degree
+    
+    Returns:
+        pose_noise: np.ndarray, [6,]
+            [x, y, z, roll, yaw, pitch]
+    """
+
+    xy = np.random.laplace(pos_mu, pos_b, size=(2))
+    yaw = np.random.laplace(rot_mu, rot_b, size=(1))
+
+    pose_noise = np.array([xy[0], xy[1], 0, 0, yaw[0], 0])
+    return pose_noise
+
+
+def generate_noise_torch(pose, pos_std, rot_std, pos_mean=0, rot_mean=0):
+    """ only used for v2vnet robust.
+        rotation noise is sampled from von_mises distribution
+    
+    Args:
+        pose : Tensor, [N. 6]
+            including [x, y, z, roll, yaw, pitch]
+
+        pos_std : float 
+            std of gaussian dist, in meter
+
+        rot_std : float
+            std of gaussian dist, in degree
+
+        pos_mean : float
+            mean of gaussian dist, in meter
+
+        rot_mean : float
+            mean of gaussian dist, in degree
+    
+    Returns:
+        pose_noisy: Tensor, [N, 6]
+            noisy pose
+    """
+
+    N = pose.shape[0]
+    noise = torch.zeros_like(pose, device=pose.device)
+    concentration = (180 / (np.pi * rot_std)) ** 2
+
+    noise[:, :2] = torch.normal(pos_mean, pos_std, size=(N, 2), device=pose.device)
+    noise[:, 4] = dist.von_mises.VonMises(loc=rot_mean, concentration=concentration).sample((N,)).to(noise.device)
+
+
+    return noise
+
+
+def remove_z_axis(T):
+    """ remove rotation/translation related to z-axis
+    Args:
+        T: np.ndarray
+            [4, 4]
+    Returns:
+        T: np.ndarray
+            [4, 4]
+    """
+    T[2,3] = 0 # z-trans
+    T[0,2] = 0
+    T[1,2] = 0
+    T[2,0] = 0
+    T[2,1] = 0
+    T[2,2] = 1
+    
+    return T
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/setup.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..09eda12a658db87893c88101aa702cebc6f4deac
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/setup.py
@@ -0,0 +1,8 @@
+from distutils.core import setup
+from Cython.Build import cythonize
+import numpy
+setup(
+    name='box overlaps',
+    ext_modules=cythonize('opencood/utils/box_overlaps.pyx'),
+    include_dirs=[numpy.get_include()]
+)
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/spconv_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/spconv_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..344f519508b38cfc9f86da5fb42291d5e4264046
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/spconv_utils.py
@@ -0,0 +1,164 @@
+import spconv
+import torch
+import numpy as np
+from spconv.modules import SparseModule
+from opencood.utils.box_utils import project_points_by_matrix_torch
+from torch_scatter import scatter
+from icecream import ic 
+
+class RemoveDuplicate(SparseModule):
+    """
+    Only keep one when duplicated
+    """
+    def forward(self, x: spconv.SparseConvTensor):
+        inds = x.indices
+        spatial_shape = [x.batch_size, *x.spatial_shape]
+        spatial_stride = [0] * len(spatial_shape)
+        val = 1
+        for i in range(inds.shape[1] - 1, -1, -1):
+            spatial_stride[i] = val
+            val *= spatial_shape[i]
+        indices_index = inds[:, -1].clone()
+
+        for i in range(len(spatial_shape) - 1):
+            indices_index += spatial_stride[i] * inds[:, i]
+
+        _, unique_inds = torch.unique(indices_index, return_inverse=True)
+        unique_inds = torch.unique(unique_inds)
+        new_inds = inds[unique_inds]
+        new_features = x.features[unique_inds]
+        res = spconv.SparseConvTensor(new_features, new_inds, x.spatial_shape,
+                                      x.batch_size, x.grid)
+        return res
+
+class MergeDuplicate(SparseModule):
+    def __init__(self, reduce="max"):
+        super().__init__()
+        self.reduce = reduce
+    def forward(self, x: spconv.SparseConvTensor):
+        inds = x.indices
+        spatial_shape = [x.batch_size, *x.spatial_shape]
+        spatial_stride = [0] * len(spatial_shape)
+        val = 1
+        for i in range(inds.shape[1] - 1, -1, -1):
+            spatial_stride[i] = val
+            val *= spatial_shape[i]
+        indices_index = inds[:, -1].clone()
+
+        for i in range(len(spatial_shape) - 1):
+            indices_index += spatial_stride[i] * inds[:, i]
+
+        _, unique_inds = torch.unique(indices_index, return_inverse=True) # [0, 1, 0]
+
+        scatter_feature = x.features # [N_point, features]
+        scatter_indices = unique_inds # [N_point, ]
+
+        out_feature = scatter(scatter_feature, scatter_indices, dim=0, reduce=self.reduce)  # [N', num_features]
+        out_indices = scatter(scatter_indices, scatter_indices, dim=0, reduce="mean")
+        out_indices = inds[out_indices] # [N', ndim+1] 
+
+        res = spconv.SparseConvTensor(out_feature, out_indices, x.spatial_shape,
+                                      x.batch_size, x.grid)
+        return res
+
+
+def fuseSparseTensor(x_list):
+    """
+        Suppose same spatial shape.
+        Need eliminate same pos tensor later
+    """
+    new_features = torch.cat([x.features for x in x_list], dim=0)
+    new_indice = torch.cat([x.indices for x in x_list], dim=0)
+    res = spconv.SparseConvTensor(new_features, new_indice, x_list[0].spatial_shape,
+                                        x_list[0].batch_size,  x_list[0].grid)
+    return res
+
+
+class warpSparseTensor(SparseModule):
+    """
+    warp the sparse tensor.
+    1. Retrieve the indices
+    2. turn indices to grid point
+    3. transform grid point
+    4. turn back to indices
+    5. construct new sparse tensor
+    Args:
+        x: SparseTensor,
+            spatial_shape:(z,y,x)
+        transformation: torch.Tensor
+            [4,4]
+        voxel_size: torch.Tensor
+            [v_x, v_y, v_z]
+        range3d: list
+            [xmin, xmax, ymin, ymax, zmin, zmax]
+
+    """
+    def indices_to_point(self, indices, transformation_matrix, voxel_size, range3d):
+        """
+            indices: [batch_id, z, y, x]
+        """
+        indices_xyz = indices[:,[3,2,1]].clone().double() # [x, y, z]
+        indices_xyz[:,0] += torch.div(range3d[0], voxel_size[0])
+        indices_xyz[:,1] += torch.div(range3d[1], voxel_size[1])
+        indices_xyz[:,2] += torch.div(range3d[2], voxel_size[2])
+        indices_xyz += 0.5
+
+        points_xyz = indices_xyz * voxel_size # [N_points, 3]
+        points_xyz_new = project_points_by_matrix_torch(points_xyz, transformation_matrix)
+
+        return points_xyz_new
+
+    def construct_new_tensor(self, x, points_xyz, voxel_size, range3d):
+        """
+            points_new: tensor
+                [N_points, ndim + 1], first dim is batch id
+        """
+        mask = (points_xyz[:, 0] > range3d[0]) & (points_xyz[:, 0] < range3d[3])\
+                & (points_xyz[:, 1] > range3d[1]) & (points_xyz[:, 1] < range3d[4]) \
+                & (points_xyz[:, 2] > range3d[2]) & (points_xyz[:, 2] < range3d[5])
+
+        features_new = x.features[mask]
+        points_xyz = points_xyz[mask]
+        new_indices = x.indices[mask].clone()
+
+        new_indices_xyz = torch.div(points_xyz, voxel_size) 
+
+        new_indices_xyz[:,0] -= torch.div(range3d[0], voxel_size[0])
+        new_indices_xyz[:,1] -= torch.div(range3d[1], voxel_size[1])
+        new_indices_xyz[:,2] -= torch.div(range3d[2], voxel_size[2])
+
+        new_indices[:,1:] = new_indices_xyz[:,[2,1,0]].long()
+
+        return spconv.SparseConvTensor(features_new, new_indices, x.spatial_shape, x.batch_size, x.grid)
+
+
+    def forward(self, x, transformation_matrix, voxel_size, range3d):
+        points_new = self.indices_to_point(x.indices, transformation_matrix, voxel_size, range3d)
+        return self.construct_new_tensor(x, points_new, voxel_size, range3d)
+        
+
+
+
+def test():
+    feature1 = torch.randn(2,8)
+    feature2 = torch.randn(2,8)
+    indices1 = torch.Tensor([[0,0,1,2],[0,0,2,3]])
+    indices2 = torch.Tensor([[0,0,1,3],[0,0,2,4]])
+    spatial_shape = (4,3,5) # z,y,x
+    batch_size = 1
+    
+    voxel_size = (0.4, 0.4, 1)
+    pc_range = [-40, -40, -3, 40, 40, 1]
+    tfm = torch.eye(4)
+    tfm[1,3] += 2
+    warpsp = warpSparseTensor()
+    sp1 = spconv.SparseConvTensor(feature1, indices1,spatial_shape, batch_size)
+    sp2 = warpsp(sp1, tfm, voxel_size, pc_range)
+    ic(sp1.features)
+    ic(sp1.indices)
+    ic(sp2.features)
+    ic(sp2.indices)
+
+
+if __name__ == "__main__":
+    test()
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/subsampling_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/subsampling_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b8b6fd566361abe0d630541cfd58bc995e200842
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/subsampling_utils.py
@@ -0,0 +1,254 @@
+"""
+https://github.com/AmnonDrory/BestBuddiesRegistration/blob/main/code/bb_pc/utils/subsampling.py
+"""
+
+import numpy as np
+import open3d as o3d
+import pandas as pd
+from copy import deepcopy
+
+num_features = 3
+
+def calc_bin_inds(PC, n_bins, axis, mode):
+    N = PC.shape[0]
+    if "adaptive" in mode:
+        inds = np.round(np.linspace(0, N, n_bins + 1)).astype(int)
+        s = np.sort(PC[:, axis])
+        thresh = s[inds[1:]-1]
+    else: # "equally_spaced"
+        thresh = np.linspace(np.min(PC[:,axis]), np.max(PC[:,axis]),  n_bins + 1)
+        thresh = thresh[1:]
+
+    bin_ind = np.zeros(N) + np.nan
+    for i in range(n_bins):
+        is_cur = (PC[:, axis] <= thresh[i]) & np.isnan(bin_ind)
+        bin_ind[is_cur] = i
+
+    assert np.sum(np.isnan(bin_ind)) == 0, "Error: not all samples were assigned to a bin"
+
+    return bin_ind
+
+def voxelGrid_filter_inner(PC, num_samples, mode):
+
+    if "equal_nbins_per_axis" in mode:
+        n_bins = int(np.ceil(num_samples ** (1. / 3)))
+        n_bins_x = n_bins
+        n_bins_y = n_bins
+        n_bins_z = n_bins
+    else:
+        span = []
+        for axis in range(3):
+            span.append( np.max(PC[:,axis])-np.min(PC[:,axis]) )
+        normalized_num_samples = num_samples * (span[0]**2 / (span[1]*span[2]))
+        n_bins_x = int(np.ceil(normalized_num_samples ** (1. / 3)))
+        n_bins_y = int(np.ceil(n_bins_x * (span[1]/span[0])))
+        n_bins_z = int(np.ceil(n_bins_x * (span[2] / span[0])))
+        assert (n_bins_x * n_bins_y * n_bins_z) >= num_samples, "Error"
+    x_bin_inds = calc_bin_inds(PC, n_bins_x, 0, mode)
+    y_bin_inds = calc_bin_inds(PC, n_bins_y, 1, mode)
+    z_bin_inds = calc_bin_inds(PC, n_bins_z, 2, mode)
+
+    data = np.hstack([x_bin_inds.reshape([-1,1]),
+                      y_bin_inds.reshape([-1,1]),
+                      z_bin_inds.reshape([-1,1]),
+                      PC])
+
+    df = pd.DataFrame(data, columns=['x_ind', 'y_ind', 'z_ind', 'x', 'y', 'z'])
+    newPC = np.array(df.groupby(['x_ind', 'y_ind', 'z_ind']).mean())
+
+    return newPC
+
+def voxelGrid_filter(PC, num_requested_samples, mode):
+    """
+    Sub-sample a point cloud by defining a grid of voxels, and returning the average point in each one.
+
+    :param PC: Nx3 array, point cloud, each row is a sample
+    :param num_samples: numbver of requested samples
+    :param mode: list of strings, can contain any of the following:
+                 "exact_number" - return exactly num_requested_samples, otherwise may return more than requested number (but never less)
+                 "equal_nbins_per_axis" - same number of bins for each axis (x,y,z). Otherwise the bins are cube shaped, and usually a different number of bins fits in each of the dimensions.
+                 "adaptive" - smaller bins where there is more data. Otherwise, all bins are the same size.
+    :return: newPC - a point cloud with approximately num_requested_samples
+    """
+    num_samples = num_requested_samples
+    N = PC.shape[0]
+    done = False
+    MAX_ATTEMPTS = 40
+    ACCELERATION_FACTOR = 2
+    MAX_DIVERGENCE_TIME = 4
+    TOLERANCE = 0.05
+    rel_history = []
+    newPC_history = []
+    while not done:
+        newPC = voxelGrid_filter_inner(PC, num_samples, mode)
+        new_N = newPC.shape[0]
+        newPC_history.append(newPC)
+        relative_error_in_size = (new_N/float(num_requested_samples)) -1
+        rel_history.append(relative_error_in_size)
+        if (relative_error_in_size < 0) or (relative_error_in_size > TOLERANCE):
+            best_ind = np.argmin(np.abs(rel_history))
+            if (len(rel_history) - best_ind > MAX_DIVERGENCE_TIME) and (np.max(rel_history) > 0):
+                    done = True
+            else:
+                num_samples = int(np.ceil(num_samples*float(num_requested_samples)/new_N))
+                if (np.max(rel_history) < 0):
+                    num_samples = int(ACCELERATION_FACTOR*num_samples)
+
+        else:
+            done = True
+
+        if len(rel_history) >= MAX_ATTEMPTS:
+            done = True
+
+    if len(rel_history) >= MAX_ATTEMPTS:
+        assert False, "voxelGrid_filter could not supply required number of samples"
+        print("Error: voxelGrid_filter could not supply required number of samples, recovering")
+        best_ind = np.argmax(rel_history)
+        return newPC_history[best_ind]
+
+    rel_history_above_only = np.array(rel_history)
+    rel_history_above_only[rel_history_above_only<0] = np.inf
+    best_ind_above = np.argmin(rel_history_above_only)
+
+    newPC = newPC_history[best_ind_above]
+    if 'exact_number' in mode:
+        p = np.random.permutation(newPC.shape[0])
+        inds = p[:num_requested_samples]
+        newPC = newPC[inds,:]
+
+    return newPC
+
+def voxel_filter(pcd, N):
+    # pcd is of open3d point cloud class
+    if "numpy" in str(type(pcd)):
+        tmp = o3d.geometry.PointCloud()
+        tmp.points = o3d.utility.Vector3dVector(pcd)
+        pcd = tmp
+    K = np.shape(pcd.points)[0]
+    vs = 1e-3
+    while K>N:
+        pcd = o3d.geometry.voxel_down_sample(pcd, voxel_size=vs)
+        vs *= 2
+        K = np.shape(pcd.points)[0]
+    return pcd
+
+def calc_distances(p0, points):
+    return ((p0 - points) ** 2).sum(axis=1)
+
+def fps_from_given_pc(pts, K, given_pc):
+    """
+    copied from https://github.com/orendv/learning_to_sample/blob/master/reconstruction/src/sample_net_point_net_ae.py
+    :param self:
+    :param pts:
+    :param K:
+    :param given_pc:
+    :return:
+    """
+    farthest_pts = np.zeros((K, 3))
+    t = given_pc.shape[0]
+    farthest_pts[0:t,:] = given_pc
+
+    distances = calc_distances(farthest_pts[0], pts)
+    for i in range(1, t):
+        distances = np.minimum(distances, calc_distances(farthest_pts[i,:], pts))
+
+    for i in range(t, K):
+        farthest_pts[i,:] = pts[np.argmax(distances),:]
+        distances = np.minimum(distances, calc_distances(farthest_pts[i,:], pts))
+    return farthest_pts
+
+def get_random_subset(PC, num_samples, mode="farthest", submode=None, allow_overask=False):
+    """
+    Subsample a point cloud, using either of various methods
+
+    :param PC:
+    :param num_samples:
+    :param mode:
+    :param n_bins:
+    :param submode: Relevant for the "r_normalized" and "r_squared_normalized" methods.
+    :return:
+    """
+    if num_samples > PC.shape[0]:
+        if allow_overask:
+            return PC
+        else:
+            assert False, "Error: requesting more samples than there are"
+
+    if PC.shape[0] == num_samples:
+        result = PC
+    if mode == "uniform":
+        inds = np.random.permutation(PC.shape[0])[:num_samples]
+        result = PC[inds, :]
+    elif mode == "farthest":
+        first_ind = np.random.permutation(PC.shape[0])[0]
+        result = fps_from_given_pc(PC, num_samples, PC[first_ind:(first_ind+1), :])
+    elif "voxel" in mode:
+        if submode is None:
+            submode = ["equal_nbins_per_axis"]
+
+        # The voxelGrid subsampling algorithm has no randomality.
+        # we force it to have some by rendomly removing a small subset of the points
+
+        keep_fraction = 0.9
+        num_keep = int(PC.shape[0]*keep_fraction)
+        if num_samples < num_keep:
+            PC = get_random_subset(PC, num_keep, mode="uniform")
+        result = voxelGrid_filter(PC, num_samples, submode)
+
+    else:
+        assert False, "unknown mode"
+
+    return result
+
+def subsample_fraction(PC, fraction):
+    N = PC.shape[0]
+    subset_size = int(np.round(N * fraction))
+    inds = np.random.permutation(N)[:subset_size]
+    return PC[inds,:]
+
+
+def keep_closest(PC, max_dist):
+    R = np.sqrt(np.sum(PC ** 2, axis=1))
+    return PC[R <= max_dist, :]
+
+
+def fit_plane(PC):
+    xy1 = deepcopy(PC)
+    xy1[:, 2] = 1
+    z = PC[:, 2]
+    abc, _, _, _ = np.linalg.lstsq(xy1, z, rcond=None)
+    return abc
+
+
+def is_on_plane(PC, abc, thickness):
+    all_xy1 = deepcopy(PC)
+    all_xy1[:, 2] = 1
+    predicted_road_z = np.matmul(all_xy1, abc.reshape([-1, 1])).flatten()
+    res = np.abs(PC[:, 2] - predicted_road_z) <= thickness
+    return res
+
+def remove_road(PC):
+    mode = "plane"  # "constant_height"
+    local_PC = keep_closest(PC, 10)
+    count, bin_edges = np.histogram(local_PC[:, 2], 100)
+    bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])
+    ind_of_most_frequent = np.argmax(count)
+    road_z = bin_centers[ind_of_most_frequent]
+    road_thickness = 0.5  # meters
+    if mode == "constant_height":
+        is_road = np.abs(PC[:, 2] - road_z) <= road_thickness
+    elif mode == "plane":
+        raw_is_road = np.abs(local_PC[:, 2] - road_z) <= road_thickness
+        raw_road_points = local_PC[raw_is_road, :]
+        xy1 = deepcopy(raw_road_points)
+        xy1[:, 2] = 1
+        z = raw_road_points[:, 2]
+        abc, _, _, _ = np.linalg.lstsq(xy1, z, rcond=None)
+        all_xy1 = deepcopy(PC)
+        all_xy1[:, 2] = 1
+        predicted_road_z = np.matmul(all_xy1, abc.reshape([-1, 1])).flatten()
+        is_road = np.abs(PC[:, 2] - predicted_road_z) <= road_thickness
+    else:
+        assert False, "unknown mode"
+
+    return PC[~is_road, :]
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/transformation_utils.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/transformation_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c574dbe6f48f5e0d70abfcd5207e39f6097c5293
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/transformation_utils.py
@@ -0,0 +1,548 @@
+# -*- coding: utf-8 -*-
+# Author: Runsheng Xu <rxx3386@ucla.edu>, Hao Xiang <haxiang@g.ucla.edu>,
+# License: TDG-Attribution-NonCommercial-NoDistrib
+
+
+"""
+Transformation utils
+"""
+
+from re import X
+import numpy as np
+import torch
+from icecream import ic
+from pyquaternion import Quaternion
+from opencood.utils.common_utils import check_numpy_to_torch
+
+def regroup(x, record_len):
+    cum_sum_len = torch.cumsum(record_len, dim=0)
+    split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+    return split_x
+
+def get_pairwise_transformation(base_data_dict, max_cav, proj_first):
+    """
+    Get pair-wise transformation matrix accross different agents.
+
+    Parameters
+    ----------
+    base_data_dict : dict
+        Key : cav id, item: transformation matrix to ego, lidar points.
+
+    max_cav : int
+        The maximum number of cav, default 5
+
+    Return
+    ------
+    pairwise_t_matrix : np.array
+        The pairwise transformation matrix across each cav.
+        shape: (L, L, 4, 4), L is the max cav number in a scene
+        pairwise_t_matrix[i, j] is Tji, i_to_j
+    """
+    pairwise_t_matrix = np.tile(np.eye(4), (max_cav, max_cav, 1, 1)) # (L, L, 4, 4)
+
+    if proj_first:
+        # if lidar projected to ego first, then the pairwise matrix
+        # becomes identity
+        # no need to warp again in fusion time.
+
+        # pairwise_t_matrix[:, :] = np.identity(4)
+        return pairwise_t_matrix
+    else:
+        t_list = []
+
+        # save all transformation matrix in a list in order first.
+        for cav_id, cav_content in base_data_dict.items():
+            lidar_pose = cav_content['params']['lidar_pose']
+            t_list.append(x_to_world(lidar_pose))  # Twx
+
+        for i in range(len(t_list)):
+            for j in range(len(t_list)):
+                # identity matrix to self
+                if i != j:
+                    # i->j: TiPi=TjPj, Tj^(-1)TiPi = Pj
+                    # t_matrix = np.dot(np.linalg.inv(t_list[j]), t_list[i])
+                    t_matrix = np.linalg.solve(t_list[j], t_list[i])  # Tjw*Twi = Tji
+                    pairwise_t_matrix[i, j] = t_matrix
+
+    return pairwise_t_matrix
+
+def get_pairwise_transformation_asymmetric(base_data_dict, max_cav, proj_first):
+    """
+    Get pair-wise transformation matrix accross different agents with detection range being asymmetric.
+
+    Parameters
+    ----------
+    base_data_dict : dict
+        Key : cav id, item: transformation matrix to ego, lidar points.
+
+    max_cav : int
+        The maximum number of cav, default 5
+
+    Return
+    ------
+    pairwise_t_matrix : np.array
+        The pairwise transformation matrix across each cav.
+        shape: (L, L, 4, 4), L is the max cav number in a scene
+        pairwise_t_matrix[i, j] is Tji, i_to_j
+    """
+    pairwise_t_matrix = np.tile(np.eye(4), (max_cav, max_cav, 1, 1)) # (L, L, 4, 4)
+
+    if proj_first:
+        # if lidar projected to ego first, then the pairwise matrix
+        # becomes identity
+        # no need to warp again in fusion time.
+
+        # pairwise_t_matrix[:, :] = np.identity(4)
+        return pairwise_t_matrix
+    else:
+        t_list = []
+        # save all transformation matrix in a list in order first.
+        for cav_id, cav_content in base_data_dict.items():
+            lidar_pose = cav_content['params']['map_pose']
+            t_list.append(x_to_world(lidar_pose))  # Twx
+
+        for i in range(len(t_list)):
+            for j in range(len(t_list)):
+                # identity matrix to self
+                if i != j:
+                    # i->j: TiPi=TjPj, Tj^(-1)TiPi = Pj
+                    # t_matrix = np.dot(np.linalg.inv(t_list[j]), t_list[i])
+                    t_matrix = np.linalg.solve(t_list[j], t_list[i])  # Tjw*Twi = Tji
+                    pairwise_t_matrix[i, j] = t_matrix
+
+    return pairwise_t_matrix
+
+def normalize_pairwise_tfm(pairwise_t_matrix, H, W, discrete_ratio, downsample_rate=1):
+    """
+    normalize the pairwise transformation matrix to affine matrix need by torch.nn.functional.affine_grid()
+
+    pairwise_t_matrix: torch.tensor
+        [B, L, L, 4, 4], B batchsize, L max_cav
+    H: num.
+        Feature map height
+    W: num.
+        Feature map width
+    discrete_ratio * downsample_rate: num.
+        One pixel on the feature map corresponds to the actual physical distance
+    """
+
+    pairwise_t_matrix = pairwise_t_matrix[:,:,:,[0, 1],:][:,:,:,:,[0, 1, 3]] # [B, L, L, 2, 3]
+    pairwise_t_matrix[...,0,1] = pairwise_t_matrix[...,0,1] * H / W
+    pairwise_t_matrix[...,1,0] = pairwise_t_matrix[...,1,0] * W / H
+    pairwise_t_matrix[...,0,2] = pairwise_t_matrix[...,0,2] / (downsample_rate * discrete_ratio * W) * 2
+    pairwise_t_matrix[...,1,2] = pairwise_t_matrix[...,1,2] / (downsample_rate * discrete_ratio * H) * 2
+
+    return pairwise_t_matrix
+
+def pose_to_tfm(pose):
+    """ Transform batch of pose to tfm
+    Args:
+        pose: torch.Tensor or np.ndarray
+            [N, 3], x, y, yaw, in degree
+            [N, 6], x, y, z, roll, yaw, pitch, in degree
+
+            roll and pitch follows carla coordinate
+    Returns:
+        tfm: torch.Tensor
+            [N, 4, 4] 
+    """
+
+    pose_tensor, is_np = check_numpy_to_torch(pose)
+    pose = pose_tensor
+
+
+    if pose.shape[1] == 3:
+        N = pose.shape[0]
+        x = pose[:,0]
+        y = pose[:,1]
+        yaw = pose[:,2]
+
+        tfm = torch.eye(4, device=pose.device).view(1,4,4).repeat(N,1,1)
+        tfm[:,0,0] = torch.cos(torch.deg2rad(yaw))
+        tfm[:,0,1] = - torch.sin(torch.deg2rad(yaw))
+        tfm[:,1,0] = torch.sin(torch.deg2rad(yaw))
+        tfm[:,1,1] = torch.cos(torch.deg2rad(yaw))
+        tfm[:,0,3] = x
+        tfm[:,1,3] = y
+
+    elif pose.shape[1] == 6:
+        N = pose.shape[0]
+        x = pose[:,0]
+        y = pose[:,1]
+        z = pose[:,2]
+        roll = pose[:,3]
+        yaw = pose[:,4]
+        pitch = pose[:,5]
+
+        c_y = torch.cos(torch.deg2rad(yaw))
+        s_y = torch.sin(torch.deg2rad(yaw))
+        c_r = torch.cos(torch.deg2rad(roll))
+        s_r = torch.sin(torch.deg2rad(roll))
+        c_p = torch.cos(torch.deg2rad(pitch))
+        s_p = torch.sin(torch.deg2rad(pitch))
+
+        tfm = torch.eye(4, device=pose.device).view(1,4,4).repeat(N,1,1)
+
+        # translation matrix
+        tfm[:, 0, 3] = x
+        tfm[:, 1, 3] = y
+        tfm[:, 2, 3] = z
+
+        # rotation matrix
+        tfm[:, 0, 0] = c_p * c_y
+        tfm[:, 0, 1] = c_y * s_p * s_r - s_y * c_r
+        tfm[:, 0, 2] = -c_y * s_p * c_r - s_y * s_r
+        tfm[:, 1, 0] = s_y * c_p
+        tfm[:, 1, 1] = s_y * s_p * s_r + c_y * c_r
+        tfm[:, 1, 2] = -s_y * s_p * c_r + c_y * s_r
+        tfm[:, 2, 0] = s_p
+        tfm[:, 2, 1] = -c_p * s_r
+        tfm[:, 2, 2] = c_p * c_r
+
+    if is_np:
+        tfm = tfm.numpy()
+
+    return tfm
+
+
+
+
+def tfm_to_pose(tfm: np.ndarray):
+    """
+    turn transformation matrix to [x, y, z, roll, yaw, pitch]
+    we use radians format.
+    tfm is pose in transformation format, and XYZ order, i.e. roll-pitch-yaw
+    """
+    # There forumlas are designed from x_to_world, but equal to the one below.
+    yaw = np.degrees(np.arctan2(tfm[1,0], tfm[0,0])) # clockwise in carla
+    roll = np.degrees(np.arctan2(-tfm[2,1], tfm[2,2])) # but counter-clockwise in carla
+    pitch = np.degrees(np.arctan2(tfm[2,0], ((tfm[2,1]**2 + tfm[2,2]**2) ** 0.5)) ) # but counter-clockwise in carla
+
+
+    # These formulas are designed for consistent axis orientation
+    # yaw = np.degrees(np.arctan2(tfm[1,0], tfm[0,0])) # clockwise in carla
+    # roll = np.degrees(np.arctan2(tfm[2,1], tfm[2,2])) # but counter-clockwise in carla
+    # pitch = np.degrees(np.arctan2(-tfm[2,0], ((tfm[2,1]**2 + tfm[2,2]**2) ** 0.5)) ) # but counter-clockwise in carla
+
+    # roll = - roll
+    # pitch = - pitch
+
+    x, y, z = tfm[:3,3]
+    return([x, y, z, roll, yaw, pitch])
+
+def tfm_to_xycs_torch(tfm: torch.Tensor):
+    """
+        similar to tfm_to_pose_torch,
+        return x/y/cos(yaw)/sin(yaw)
+    """
+    x = tfm[:,0,3]
+    y = tfm[:,1,3]
+    
+    cos = tfm[:,0,0]
+    sin = tfm[:,1,0]
+
+    pose = torch.stack([x,y,cos,sin]).T # (N, 4)
+
+    return pose
+
+def xycs_to_tfm_torch(xycs: torch.Tensor):
+    """
+        Args: xycs
+            [N, 4]
+    """
+    N = xycs.shape[0]
+    tfm = torch.eye(4, device=xycs.device).view(1,4,4).repeat(N,1,1)
+
+    x, y, cos, sin = xycs[:,0], xycs[:,1], xycs[:,2], xycs[:,3]
+
+    tfm[:,0,0] = cos
+    tfm[:,0,1] = - sin
+    tfm[:,1,0] = sin
+    tfm[:,1,1] = cos
+    tfm[:,0,3] = x
+    tfm[:,1,3] = y
+
+    return tfm
+
+def tfm_to_pose_torch(tfm: torch.Tensor, dof: int):
+    """
+    turn transformation matrix to [x, y, z, roll, yaw, pitch]
+    we use degree format.
+    tfm is pose in transformation format, and XYZ order, i.e. roll-pitch-yaw
+
+    Args:
+        tfm: [N, 4, 4]
+        dof: 3 or 6
+    Returns:
+        6dof pose: [N, 6]
+    """
+
+    # There forumlas are designed from x_to_world, but equal to the one below.
+    yaw = torch.rad2deg(torch.atan2(tfm[:,1,0], tfm[:,0,0])) # clockwise in carla
+    roll = torch.rad2deg(torch.atan2(-tfm[:,2,1], tfm[:,2,2])) # but counter-clockwise in carla
+    pitch = torch.rad2deg(torch.atan2(tfm[:,2,0], (tfm[:,2,1]**2 + tfm[:,2,2]**2) ** 0.5)) # but counter-clockwise in carla
+
+    # These formulas are designed for consistent axis orientation
+    # yaw = torch.rad2deg(torch.atan2(tfm[:,1,0], tfm[:,0,0])) # clockwise in carla
+    # roll = torch.rad2deg(torch.atan2(tfm[:,2,1], tfm[:,2,2])) # but counter-clockwise in carla
+    # pitch = torch.rad2deg(torch.atan2(-tfm[:,2,0], (tfm[:,2,1]**2 + tfm[:,2,2]**2) ** 0.5)) # but counter-clockwise in carla
+
+    # roll = - roll
+    # pitch = - pitch
+
+    x = tfm[:,0,3]
+    y = tfm[:,1,3]
+    z = tfm[:,2,3]
+    
+    if dof == 6:
+        pose = torch.stack([x,y,z,roll,yaw,pitch]).T # (N, 6)
+    elif dof == 3:
+        pose = torch.stack([x,y,yaw]).T
+    else:
+        raise("Only support returning 3dof/6dof pose.")
+
+    return pose
+
+
+def x_to_world(pose):
+    """
+    The transformation matrix from x-coordinate system to carla world system
+    Also is the pose in world coordinate: T_world_x
+
+    Parameters
+    ----------
+    pose : list
+        [x, y, z, roll, yaw, pitch], degree
+
+    Returns
+    -------
+    matrix : np.ndarray
+        The transformation matrix.
+    """
+    x, y, z, roll, yaw, pitch = pose[:]
+
+    # used for rotation matrix
+    c_y = np.cos(np.radians(yaw))
+    s_y = np.sin(np.radians(yaw))
+    c_r = np.cos(np.radians(roll))
+    s_r = np.sin(np.radians(roll))
+    c_p = np.cos(np.radians(pitch))
+    s_p = np.sin(np.radians(pitch))
+
+    matrix = np.identity(4)
+
+    # translation matrix
+    matrix[0, 3] = x
+    matrix[1, 3] = y
+    matrix[2, 3] = z
+
+    # rotation matrix
+    matrix[0, 0] = c_p * c_y
+    matrix[0, 1] = c_y * s_p * s_r - s_y * c_r
+    matrix[0, 2] = -c_y * s_p * c_r - s_y * s_r
+    matrix[1, 0] = s_y * c_p
+    matrix[1, 1] = s_y * s_p * s_r + c_y * c_r
+    matrix[1, 2] = -s_y * s_p * c_r + c_y * s_r
+    matrix[2, 0] = s_p
+    matrix[2, 1] = -c_p * s_r
+    matrix[2, 2] = c_p * c_r
+
+    return matrix
+
+
+def x1_to_x2(x1, x2):
+    """
+    Transformation matrix from x1 to x2. T_x2_x1
+
+    Parameters
+    ----------
+    x1 : list
+        The pose of x1 under world coordinates.
+    x2 : list
+        The pose of x2 under world coordinates.
+
+        yaw, pitch, roll in degree
+
+    Returns
+    -------
+    transformation_matrix : np.ndarray
+        The transformation matrix.
+
+    """
+    x1_to_world = x_to_world(x1) # wP = x1_to_world * 1P, so x1_to_world is Tw1
+    x2_to_world = x_to_world(x2) # Tw2
+    world_to_x2 = np.linalg.inv(x2_to_world) # T2w
+
+    transformation_matrix = np.dot(world_to_x2, x1_to_world) # T2w * Tw1 = T21
+    return transformation_matrix
+
+
+def dist_to_continuous(p_dist, displacement_dist, res, downsample_rate):
+    """
+    Convert points discretized format to continuous space for BEV representation.
+    Parameters
+    ----------
+    p_dist : numpy.array
+        Points in discretized coorindates.
+
+    displacement_dist : numpy.array
+        Discretized coordinates of bottom left origin.
+
+    res : float
+        Discretization resolution.
+
+    downsample_rate : int
+        Dowmsamping rate.
+
+    Returns
+    -------
+    p_continuous : numpy.array
+        Points in continuous coorindates.
+
+    """
+    p_dist = np.copy(p_dist)
+    p_dist = p_dist + displacement_dist
+    p_continuous = p_dist * res * downsample_rate
+    return p_continuous
+
+
+def get_pairwise_transformation_torch(lidar_poses, max_cav, record_len, dof):
+    """
+    Get pair-wise transformation matrix accross different agents.
+    Designed for batch data
+
+    Parameters
+    ----------
+    lidar_poses : tensor, [N, 3] or [N, 6]
+        3 or 6 dof pose of lidar.
+
+    max_cav : int
+        The maximum number of cav, default 5
+
+    record: list
+        shape (B)
+
+    dof: int, 3 or 6
+
+    Return
+    ------
+    pairwise_t_matrix : np.array
+        The pairwise transformation matrix across each cav.
+        shape: (B, L, L, 4, 4), L is the max cav number in a scene
+        pairwise_t_matrix[i, j] is Tji, i_to_j
+    """
+    def regroup(x, record_len):
+        cum_sum_len = torch.cumsum(record_len, dim=0)
+        split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
+        return split_x
+
+    B = len(record_len)
+    lidar_poses_list = regroup(lidar_poses, record_len)
+
+    pairwise_t_matrix = torch.eye(4, device=lidar_poses.device).view(1,1,1,4,4).repeat(B, max_cav, max_cav, 1, 1) # (B, L, L, 4, 4)
+    # save all transformation matrix in a list in order first.
+    for b in range(B):
+        lidar_poses = lidar_poses_list[b]  # [N_cav, 3] or [N_cav, 6]. 
+        t_list = pose_to_tfm(lidar_poses)  # Twx, [N_cav, 4, 4]
+
+        for i in range(len(t_list)):
+            for j in range(len(t_list)):
+                # identity matrix to self
+                if i != j:
+                    # i->j: TiPi=TjPj, Tj^(-1)TiPi = Pj
+                    # t_matrix = np.dot(np.linalg.inv(t_list[j]), t_list[i])
+                    t_matrix = torch.linalg.solve(t_list[j], t_list[i])  # Tjw*Twi = Tji
+                    pairwise_t_matrix[b][i, j] = t_matrix
+
+    return pairwise_t_matrix
+
+
+def get_relative_transformation(lidar_poses):
+    """
+    Args:
+        lidar_pose:  np.ndarray
+            [N, dof], lidar pose in world coordinate
+            N is the agent number, dof is 3/6.
+
+            [x, y, z, roll, yaw, pitch], degree
+        
+    Returns:
+        relative transformation, in ego's coordinate
+    """
+    N = lidar_poses.shape[0]
+    dof = lidar_poses.shape[1]
+
+    if dof == 3:
+        full_lidar_poses = np.zeros((N, 6))
+        full_lidar_poses[:,[0,1,4]] = lidar_poses
+        lidar_poses = full_lidar_poses
+
+    relative_t_matrix = np.eye(4).reshape(1,4,4).repeat(N, axis=0)  # [N, 4, 4]
+    for i in range(1, N):
+        relative_t_matrix[i] = x1_to_x2(lidar_poses[i], lidar_poses[0])
+    
+    return relative_t_matrix
+
+
+
+def muilt_coord(rotationA2B, translationA2B, rotationB2C, translationB2C):
+    rotationA2B = np.array(rotationA2B).reshape(3, 3)
+    rotationB2C = np.array(rotationB2C).reshape(3, 3)
+    rotation = np.dot(rotationB2C, rotationA2B)
+    translationA2B = np.array(translationA2B).reshape(3, 1)
+    translationB2C = np.array(translationB2C).reshape(3, 1)
+    translation = np.dot(rotationB2C, translationA2B) + translationB2C
+
+    return rotation, translation
+
+
+def veh_side_rot_and_trans_to_trasnformation_matrix(lidar_to_novatel_json_file,novatel_to_world_json_file):
+    matrix = np.empty([4,4])
+    rotationA2B = lidar_to_novatel_json_file["transform"]["rotation"]
+    translationA2B = lidar_to_novatel_json_file["transform"]["translation"]
+    rotationB2C = novatel_to_world_json_file["rotation"]
+    translationB2C = novatel_to_world_json_file["translation"]
+    rotation,translation = muilt_coord(rotationA2B, translationA2B, rotationB2C, translationB2C)
+    matrix[0:3, 0:3] = rotation
+    matrix[:, 3][0:3] = np.array(translation)[:, 0]
+    matrix[3, 0:3] = 0
+    matrix[3, 3] = 1
+    
+    return matrix
+
+def inf_side_rot_and_trans_to_trasnformation_matrix(json_file,system_error_offset):
+    matrix = np.empty([4,4])
+    matrix[0:3, 0:3] = json_file["rotation"]
+    translation = np.array(json_file["translation"])
+    translation[0][0] = translation[0][0] + system_error_offset["delta_x"]
+    translation[1][0] = translation[1][0] + system_error_offset["delta_y"]  #为啥有[1][0]??? --> translation是(3,1)的
+    matrix[:, 3][0:3] = translation[:, 0]
+    matrix[3, 0:3] = 0
+    matrix[3, 3] = 1
+
+    return matrix
+
+def rot_and_trans_to_trasnformation_matrix(json_file):
+    matrix = np.empty([4,4])
+    matrix[0:3, 0:3] = json_file["rotation"]
+    matrix[:, 3][0:3] = np.array(json_file["translation"])[:, 0]
+    matrix[3, 0:3] = 0
+    matrix[3, 3] = 1
+
+    return matrix
+
+
+def test():
+    random_pose = np.random.randn(6)
+    tfm = x_to_world(random_pose)
+    pose_result = tfm_to_pose(tfm)
+    tfm2 = x_to_world(pose_result)
+
+    print(random_pose)
+    print(pose_result)
+    print()
+    print(tfm)
+    print(tfm2)
+
+if __name__ == "__main__":
+    test()
\ No newline at end of file
diff --git a/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/waypoint2map.py b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/waypoint2map.py
new file mode 100644
index 0000000000000000000000000000000000000000..f22e74b06ad9222f76cbc0d9febd72dec731f827
--- /dev/null
+++ b/v2xverse_late_multiclass_2025_01_28_08_49_56/scripts/utils/waypoint2map.py
@@ -0,0 +1,164 @@
+'''
+Functions: Transform waypoints to bev maps.
+'''
+
+import numpy as np
+import torch.nn.functional as F
+import torch
+
+def global2grid(waypoints, grid_coord=[96,288,1/2,3/4], det_range=[-36,-12,-10,36,12,10]):
+    X, Y, r_x, r_y = grid_coord
+    center_y, center_x = Y * r_y, X * r_x
+    waypoints *= grid_coord[1]/(det_range[3]-det_range[0])
+    waypoints[:,:,0] = waypoints[:,:,0] + center_x
+    waypoints[:,:,1] = waypoints[:,:,1] + center_y
+    return waypoints
+
+def waypoints2map(waypoints, grid_coord=[192,96,3/4,1/2]):
+    Y, X, r_y, r_x = grid_coord
+    B, N, _ = waypoints.shape # [B, N, 2]
+    bev_map = np.zeros([B, Y, X])
+    grids = global2grid(waypoints)
+    grids = np.array(grids, dtype=np.uint8)
+    batch_idx = np.repeat(np.arange(B),N)
+    x_idx = grids[:,:,0].flatten()
+    y_idx = grids[:,:,1].flatten()
+    valid_mask = (y_idx > (-1)) * (y_idx < Y) * (x_idx > (-1)) * (x_idx < X)
+    valid_idx = np.where(valid_mask*1)[0]
+    bev_map[batch_idx[valid_idx], y_idx[valid_idx], x_idx[valid_idx]] = 1
+    # print(bev_map.sum())
+    # print(len(valid_idx))
+    return bev_map
+
+def gradcam_resize(bev_map, scale=50):
+    '''
+    bev_map: [B,Y,X] torch.tensor
+    '''
+    bev_map = torch.Tensor(bev_map)
+    bev_map = bev_map.unsqueeze(1)
+    bev_map_expand = F.max_pool2d(bev_map, scale, stride=1, padding=(scale-1)//2)
+    return bev_map_expand.squeeze(1)
+
+
+def gaussian_2d(shape, sigma=1):
+    """Generate gaussian map.
+
+    Args:
+        shape (list[int]): Shape of the map.
+        sigma (float): Sigma to generate gaussian map.
+            Defaults to 1.
+
+    Returns:
+        np.ndarray: Generated gaussian map.
+    """
+    m, n = [(ss - 1.) / 2. for ss in shape]
+    y, x = np.ogrid[-m:m + 1, -n:n + 1]
+
+    h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
+    h[h < np.finfo(h.dtype).eps * h.max()] = 0
+    return h
+
+
+def gaussian_radius(det_size, min_overlap=0.5):
+    """Get radius of gaussian.
+
+    Args:
+        det_size (tuple[torch.Tensor]): Size of the detection result.
+        min_overlap (float): Gaussian_overlap. Defaults to 0.5.
+
+    Returns:
+        torch.Tensor: Computed radius.
+    """
+    height, width = det_size
+    
+    a1 = 1
+    b1 = (height + width)
+    c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
+    sq1 = np.sqrt(b1**2 - 4 * a1 * c1)
+    r1 = (b1 + sq1) / (2 * a1)
+
+    a2 = 4
+    b2 = 2 * (height + width)
+    c2 = (1 - min_overlap) * width * height
+    sq2 = np.sqrt(b2**2 - 4 * a2 * c2)
+    r2 = (b2 + sq2) / (2 * a2)
+
+    a3 = 4 * min_overlap
+    b3 = -2 * min_overlap * (height + width)
+    c3 = (min_overlap - 1) * width * height
+    sq3 = np.sqrt(b3**2 - 4 * a3 * c3)
+    r3 = (b3 + sq3) / (2 * a3)
+    return min(r1, r2, r3)
+
+ 
+def draw_gaussian(heatmap, center, radius, ratio=5, k=1):
+    """Get gaussian masked heatmap.
+
+    Args:
+        heatmap (torch.Tensor): Heatmap to be masked.
+        center (torch.Tensor): Center coord of the heatmap.
+        radius (int): Radius of gausian.
+        K (int): Multiple of masked_gaussian. Defaults to 1.
+
+    Returns:
+        torch.Tensor: Masked heatmap.
+    """
+    diameter = 2 * radius + 1
+    gaussian = gaussian_2d((diameter, diameter), sigma=diameter/ratio )
+
+    # x, y = int(center[0]), int(center[1])
+    x, y = int(center[1]), int(center[0])
+
+    height, width = heatmap.shape[0:2]
+
+    left, right = min(x, radius), min(width - x, radius + 1)
+    top, bottom = min(y, radius), min(height - y, radius + 1)
+
+    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
+    masked_gaussian = gaussian[radius - top:radius + bottom,
+                                radius - left:radius + right]
+    
+    if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
+        # torch.max(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
+        np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
+    #     masked_heatmap = np.max([masked_heatmap[None,], (masked_gaussian * k)[None,]], axis=0)[0]
+    # heatmap[y - top:y + bottom, x - left:x + right] = masked_heatmap
+    return heatmap
+
+def draw_heatmap(heatmap, x, y, radius=50, sigma=5):
+    feature_map_size = heatmap.shape
+
+    # throw out not in range objects to avoid out of array
+    # area when creating the heatmap
+    if not (0 <= x < feature_map_size[0]
+            and 0 <= y < feature_map_size[1]):
+        return heatmap
+
+    heatmap = draw_gaussian(heatmap, (x,y), radius, sigma) 
+    return heatmap
+
+
+def waypoints2map_radius(waypoints, radius=40, sigma_reverse=5, grid_coord=[96,288,1/2,3/4], det_range=[-36,-12,-10,36,12,10]):
+
+    waypoints[:,:,1] *= -1
+    X, Y, r_x, r_y = grid_coord
+    B, N, _ = waypoints.shape # [B, N, 2]
+    bev_map = np.zeros([B, X, Y])
+    grids = global2grid(waypoints, grid_coord=grid_coord, det_range=det_range)
+    # grids = np.array(grids, dtype=np.uint8)
+    batch_idx = np.repeat(np.arange(B),N)
+    x_idx = grids[:,:,0].flatten()
+    y_idx = grids[:,:,1].flatten()
+    valid_mask = (y_idx > (-1)) * (y_idx < Y) * (x_idx > (-1)) * (x_idx < X)
+    valid_idx = np.where(valid_mask*1)[0]
+
+    radius *= grid_coord[0]/96*24/(det_range[4]-det_range[1])
+    radius = int(radius)
+    
+    for i in valid_idx:
+        b = batch_idx[i]
+        x = x_idx[i]
+        y = y_idx[i]
+        bev_map[b] = draw_heatmap(bev_map[b], x, y, radius, sigma_reverse)
+
+    return bev_map