Spaces:
Running
on
L4
Running
on
L4
#!/usr/bin/env python3 | |
""" | |
Preprocess the Dynamic Replica dataset. | |
This script reads frame annotations (stored in compressed JSON files), | |
loads images, depth maps, optical flow, and camera parameters, and saves | |
processed images, depth maps, flow files, and camera metadata (intrinsics and poses) | |
to an output directory organized by split, sequence, and camera view. | |
Usage: | |
python preprocess_dynamic_replica.py --root_dir /path/to/data_dynamic_replica \ | |
--out_dir /path/to/processed_dynamic_replica \ | |
[--splits train valid test] \ | |
[--num_processes 8] | |
""" | |
import argparse | |
import gzip | |
import json | |
import os | |
import os.path as osp | |
import re | |
import shutil | |
import time | |
from collections import defaultdict | |
from dataclasses import dataclass | |
from multiprocessing import Pool, cpu_count | |
from typing import List, Optional | |
import cv2 | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import PIL.Image | |
import torch | |
from PIL import Image | |
from pytorch3d.implicitron.dataset.types import ( | |
FrameAnnotation as ImplicitronFrameAnnotation, | |
load_dataclass, | |
) | |
from tqdm import tqdm | |
import imageio | |
# Enable OpenEXR support in OpenCV. | |
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" | |
TAG_CHAR = np.array([202021.25], np.float32) | |
def readFlow(fn): | |
"""Read .flo file in Middlebury format.""" | |
with open(fn, "rb") as f: | |
magic = np.fromfile(f, np.float32, count=1) | |
if 202021.25 != magic: | |
print("Magic number incorrect. Invalid .flo file") | |
return None | |
else: | |
w = np.fromfile(f, np.int32, count=1) | |
h = np.fromfile(f, np.int32, count=1) | |
data = np.fromfile(f, np.float32, count=2 * int(w) * int(h)) | |
return np.resize(data, (int(h), int(w), 2)) | |
def readPFM(file): | |
with open(file, "rb") as f: | |
header = f.readline().rstrip() | |
if header == b"PF": | |
color = True | |
elif header == b"Pf": | |
color = False | |
else: | |
raise Exception("Not a PFM file.") | |
dim_match = re.match(rb"^(\d+)\s(\d+)\s$", f.readline()) | |
if dim_match: | |
width, height = map(int, dim_match.groups()) | |
else: | |
raise Exception("Malformed PFM header.") | |
scale = float(f.readline().rstrip()) | |
endian = "<" if scale < 0 else ">" | |
if scale < 0: | |
scale = -scale | |
data = np.fromfile(f, endian + "f") | |
shape = (height, width, 3) if color else (height, width) | |
data = np.reshape(data, shape) | |
data = np.flipud(data) | |
return data | |
def read_gen(file_name, pil=False): | |
ext = osp.splitext(file_name)[-1].lower() | |
if ext in [".png", ".jpeg", ".ppm", ".jpg"]: | |
return Image.open(file_name) | |
elif ext in [".bin", ".raw"]: | |
return np.load(file_name) | |
elif ext == ".flo": | |
return readFlow(file_name).astype(np.float32) | |
elif ext == ".pfm": | |
flow = readPFM(file_name).astype(np.float32) | |
return flow if len(flow.shape) == 2 else flow[:, :, :-1] | |
return [] | |
def _load_16big_png_depth(depth_png): | |
with Image.open(depth_png) as depth_pil: | |
depth = ( | |
np.frombuffer(np.array(depth_pil, dtype=np.uint16), dtype=np.float16) | |
.astype(np.float32) | |
.reshape((depth_pil.size[1], depth_pil.size[0])) | |
) | |
return depth | |
class DynamicReplicaFrameAnnotation(ImplicitronFrameAnnotation): | |
"""A dataclass used to load annotations from .json for Dynamic Replica.""" | |
camera_name: Optional[str] = None | |
instance_id_map_path: Optional[str] = None | |
flow_forward: Optional[str] = None | |
flow_forward_mask: Optional[str] = None | |
flow_backward: Optional[str] = None | |
flow_backward_mask: Optional[str] = None | |
trajectories: Optional[str] = None | |
def _get_pytorch3d_camera(entry_viewpoint, image_size, scale: float): | |
""" | |
Convert the camera parameters stored in an annotation to PyTorch3D convention. | |
Returns: | |
R, tvec, focal, principal_point | |
""" | |
assert entry_viewpoint is not None | |
principal_point = torch.tensor(entry_viewpoint.principal_point, dtype=torch.float) | |
focal_length = torch.tensor(entry_viewpoint.focal_length, dtype=torch.float) | |
half_image_size_wh_orig = ( | |
torch.tensor(list(reversed(image_size)), dtype=torch.float) / 2.0 | |
) | |
fmt = entry_viewpoint.intrinsics_format | |
if fmt.lower() == "ndc_norm_image_bounds": | |
rescale = half_image_size_wh_orig | |
elif fmt.lower() == "ndc_isotropic": | |
rescale = half_image_size_wh_orig.min() | |
else: | |
raise ValueError(f"Unknown intrinsics format: {fmt}") | |
principal_point_px = half_image_size_wh_orig - principal_point * rescale | |
focal_length_px = focal_length * rescale | |
# Prepare rotation and translation for PyTorch3D | |
R = torch.tensor(entry_viewpoint.R, dtype=torch.float) | |
T = torch.tensor(entry_viewpoint.T, dtype=torch.float) | |
R_pytorch3d = R.clone() | |
T_pytorch3d = T.clone() | |
T_pytorch3d[..., :2] *= -1 | |
R_pytorch3d[..., :, :2] *= -1 | |
tvec = T_pytorch3d | |
return R, tvec, focal_length_px, principal_point_px | |
# Global configuration for splits and output. | |
SPLITS = ["train", "valid", "test"] | |
# (You can override the default root and out_dir via command-line arguments.) | |
def process_split_data(args): | |
""" | |
Process all frames for a given split. | |
Reads the frame annotation file for the given split, groups frames per sequence | |
and camera, and for each frame loads the image, depth map, optical flows (if available), | |
computes the camera intrinsics and pose (using _get_pytorch3d_camera), and saves the data. | |
""" | |
split, root_dir, out_dir = args | |
split_dir = osp.join(root_dir, split) | |
# The frame annotations are stored in a compressed json file. | |
frame_annotations_file = osp.join(split_dir, f"frame_annotations_{split}.jgz") | |
with gzip.open(frame_annotations_file, "rt", encoding="utf8") as zipfile: | |
frame_annots_list = load_dataclass(zipfile, List[DynamicReplicaFrameAnnotation]) | |
# Group frames by sequence and camera. | |
seq_annot = defaultdict(lambda: defaultdict(list)) | |
for frame_annot in frame_annots_list: | |
seq_annot[frame_annot.sequence_name][frame_annot.camera_name].append( | |
frame_annot | |
) | |
# Process each sequence. | |
for seq_name in tqdm(seq_annot.keys(), desc=f"Processing split '{split}'"): | |
# For each camera (e.g., 'left', 'right'), create output directories. | |
for cam in ["left", "right"]: | |
out_img_dir = osp.join(out_dir, split, seq_name, cam, "rgb") | |
out_depth_dir = osp.join(out_dir, split, seq_name, cam, "depth") | |
out_fflow_dir = osp.join(out_dir, split, seq_name, cam, "flow_forward") | |
out_bflow_dir = osp.join(out_dir, split, seq_name, cam, "flow_backward") | |
out_cam_dir = osp.join(out_dir, split, seq_name, cam, "cam") | |
os.makedirs(out_img_dir, exist_ok=True) | |
os.makedirs(out_depth_dir, exist_ok=True) | |
os.makedirs(out_fflow_dir, exist_ok=True) | |
os.makedirs(out_bflow_dir, exist_ok=True) | |
os.makedirs(out_cam_dir, exist_ok=True) | |
for framedata in tqdm( | |
seq_annot[seq_name][cam], desc=f"Seq {seq_name} [{cam}]", leave=False | |
): | |
timestamp = framedata.frame_timestamp | |
im_path = osp.join(split_dir, framedata.image.path) | |
depth_path = osp.join(split_dir, framedata.depth.path) | |
if framedata.flow_forward["path"]: | |
flow_forward_path = osp.join( | |
split_dir, framedata.flow_forward["path"] | |
) | |
flow_forward_mask_path = osp.join( | |
split_dir, framedata.flow_forward_mask["path"] | |
) | |
if framedata.flow_backward["path"]: | |
flow_backward_path = osp.join( | |
split_dir, framedata.flow_backward["path"] | |
) | |
flow_backward_mask_path = osp.join( | |
split_dir, framedata.flow_backward_mask["path"] | |
) | |
# Ensure required files exist. | |
assert os.path.isfile(im_path), im_path | |
assert os.path.isfile(depth_path), depth_path | |
if framedata.flow_forward["path"]: | |
assert os.path.isfile(flow_forward_path), flow_forward_path | |
assert os.path.isfile( | |
flow_forward_mask_path | |
), flow_forward_mask_path | |
if framedata.flow_backward["path"]: | |
assert os.path.isfile(flow_backward_path), flow_backward_path | |
assert os.path.isfile( | |
flow_backward_mask_path | |
), flow_backward_mask_path | |
viewpoint = framedata.viewpoint | |
# Load depth map. | |
depth = _load_16big_png_depth(depth_path) | |
# Process optical flow if available. | |
if framedata.flow_forward["path"]: | |
flow_forward = cv2.imread(flow_forward_path, cv2.IMREAD_UNCHANGED) | |
flow_forward_mask = cv2.imread( | |
flow_forward_mask_path, cv2.IMREAD_UNCHANGED | |
) | |
np.savez( | |
osp.join(out_fflow_dir, f"{timestamp}.npz"), | |
flow=flow_forward, | |
mask=flow_forward_mask, | |
) | |
if framedata.flow_backward["path"]: | |
flow_backward = cv2.imread(flow_backward_path, cv2.IMREAD_UNCHANGED) | |
flow_backward_mask = cv2.imread( | |
flow_backward_mask_path, cv2.IMREAD_UNCHANGED | |
) | |
np.savez( | |
osp.join(out_bflow_dir, f"{timestamp}.npz"), | |
flow=flow_backward, | |
mask=flow_backward_mask, | |
) | |
# Get camera parameters. | |
R, t, focal, pp = _get_pytorch3d_camera( | |
viewpoint, framedata.image.size, scale=1.0 | |
) | |
intrinsics = np.eye(3) | |
intrinsics[0, 0] = focal[0].item() | |
intrinsics[1, 1] = focal[1].item() | |
intrinsics[0, 2] = pp[0].item() | |
intrinsics[1, 2] = pp[1].item() | |
pose = np.eye(4) | |
# Invert the camera pose. | |
pose[:3, :3] = R.numpy().T | |
pose[:3, 3] = -R.numpy().T @ t.numpy() | |
# Define output file paths. | |
out_img_path = osp.join(out_img_dir, f"{timestamp}.png") | |
out_depth_path = osp.join(out_depth_dir, f"{timestamp}.npy") | |
out_cam_path = osp.join(out_cam_dir, f"{timestamp}.npz") | |
# Copy RGB image. | |
shutil.copy(im_path, out_img_path) | |
# Save depth. | |
np.save(out_depth_path, depth) | |
# Save camera metadata. | |
np.savez(out_cam_path, intrinsics=intrinsics, pose=pose) | |
# (Optionally, you could return some summary information.) | |
return None | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Preprocess Dynamic Replica dataset: convert raw annotations, images, " | |
"depth, and flow files to a processed format." | |
) | |
parser.add_argument( | |
"--root_dir", | |
type=str, | |
required=True, | |
help="Root directory of the Dynamic Replica data.", | |
) | |
parser.add_argument( | |
"--out_dir", | |
type=str, | |
required=True, | |
help="Output directory for processed data.", | |
) | |
parser.add_argument( | |
"--splits", | |
type=str, | |
nargs="+", | |
default=SPLITS, | |
help="List of splits to process (default: train valid test).", | |
) | |
parser.add_argument( | |
"--num_processes", | |
type=int, | |
default=cpu_count(), | |
help="Number of processes to use (default: number of CPU cores).", | |
) | |
args = parser.parse_args() | |
os.makedirs(args.out_dir, exist_ok=True) | |
tasks = [(split, args.root_dir, args.out_dir) for split in args.splits] | |
print("Processing splits:", args.splits) | |
with Pool(processes=args.num_processes) as pool: | |
list( | |
tqdm( | |
pool.imap(process_split_data, tasks), | |
total=len(tasks), | |
desc="Overall Progress", | |
) | |
) | |
if __name__ == "__main__": | |
main() | |