Spaces:
Running
on
L4
Running
on
L4
#!/usr/bin/env python3 | |
""" | |
Preprocess Script for Point Odyssey Dataset | |
This script processes the Point Odyssey dataset by: | |
- Copying RGB images. | |
- Converting 16-bit depth images to a normalized float32 depth map. | |
- Inverting camera extrinsic matrices to obtain poses. | |
- Saving intrinsics and computed poses in a structured output directory. | |
The dataset is expected to have subdirectories for each split (e.g., train, test, val), | |
with each split containing multiple sequence directories. Each sequence directory must | |
contain the following: | |
- An 'rgbs' folder with .jpg images. | |
- A 'depths' folder with .png depth images. | |
- An 'anno.npz' file with 'intrinsics' and 'extrinsics' arrays. | |
Usage: | |
python preprocess_point_odyssey.py --input_dir /path/to/input_dataset --output_dir /path/to/output_dataset | |
""" | |
import os | |
import argparse | |
import shutil | |
import numpy as np | |
import cv2 | |
from tqdm import tqdm | |
def process_sequence(seq_dir, out_seq_dir): | |
""" | |
Process a single sequence: | |
- Verifies that required folders/files exist. | |
- Loads camera annotations. | |
- Processes each frame: copies the RGB image, processes the depth map, | |
computes the camera pose, and saves the results. | |
Args: | |
seq_dir (str): Directory of the sequence (should contain 'rgbs', 'depths', and 'anno.npz'). | |
out_seq_dir (str): Output directory where processed files will be saved. | |
""" | |
# Define input subdirectories and annotation file | |
img_dir = os.path.join(seq_dir, "rgbs") | |
depth_dir = os.path.join(seq_dir, "depths") | |
cam_file = os.path.join(seq_dir, "anno.npz") | |
# Ensure all necessary files/folders exist | |
if not ( | |
os.path.exists(img_dir) | |
and os.path.exists(depth_dir) | |
and os.path.exists(cam_file) | |
): | |
raise FileNotFoundError(f"Missing required data in {seq_dir}") | |
# Create output subdirectories for images, depth maps, and camera parameters | |
out_img_dir = os.path.join(out_seq_dir, "rgb") | |
out_depth_dir = os.path.join(out_seq_dir, "depth") | |
out_cam_dir = os.path.join(out_seq_dir, "cam") | |
os.makedirs(out_img_dir, exist_ok=True) | |
os.makedirs(out_depth_dir, exist_ok=True) | |
os.makedirs(out_cam_dir, exist_ok=True) | |
# Load camera annotations | |
annotations = np.load(cam_file) | |
cam_ints = annotations["intrinsics"].astype(np.float32) | |
cam_exts = annotations["extrinsics"].astype(np.float32) | |
# List and sort image and depth filenames | |
rgbs = sorted([f for f in os.listdir(img_dir) if f.endswith(".jpg")]) | |
depths = sorted([f for f in os.listdir(depth_dir) if f.endswith(".png")]) | |
# Ensure that the number of intrinsics, extrinsics, RGB images, and depth images match | |
if not (len(cam_ints) == len(cam_exts) == len(rgbs) == len(depths)): | |
raise ValueError( | |
f"Mismatch in sequence {seq_dir}: " | |
f"{len(cam_ints)} intrinsics, {len(cam_exts)} extrinsics, {len(rgbs)} images, {len(depths)} depths." | |
) | |
# Skip sequence if it has already been processed | |
if len(os.listdir(out_img_dir)) == len(rgbs): | |
return | |
# Process each frame in the sequence | |
for i in tqdm(range(len(cam_exts)), desc="Processing frames", leave=False): | |
# Extract frame index from filenames | |
basename_img = rgbs[i].split(".")[0].split("_")[-1] | |
basename_depth = depths[i].split(".")[0].split("_")[-1] | |
if int(basename_img) != i or int(basename_depth) != i: | |
raise ValueError( | |
f"Frame index mismatch in sequence {seq_dir} for frame {i}" | |
) | |
img_path = os.path.join(img_dir, rgbs[i]) | |
depth_path = os.path.join(depth_dir, depths[i]) | |
# Retrieve intrinsics and compute camera pose by inverting the extrinsic matrix | |
intrins = cam_ints[i] | |
cam_extrinsic = cam_exts[i] | |
pose = np.linalg.inv(cam_extrinsic) | |
if np.any(np.isinf(pose)) or np.any(np.isnan(pose)): | |
raise ValueError( | |
f"Invalid pose computed from extrinsics for frame {i} in {seq_dir}" | |
) | |
# Read and process depth image | |
depth_16bit = cv2.imread(depth_path, cv2.IMREAD_ANYDEPTH) | |
depth = depth_16bit.astype(np.float32) / 65535.0 * 1000.0 | |
# Save processed files: copy the RGB image and save depth and camera parameters | |
basename = basename_img # or str(i) | |
out_img_path = os.path.join(out_img_dir, basename + ".jpg") | |
shutil.copyfile(img_path, out_img_path) | |
np.save(os.path.join(out_depth_dir, basename + ".npy"), depth) | |
np.savez( | |
os.path.join(out_cam_dir, basename + ".npz"), intrinsics=intrins, pose=pose | |
) | |
def process_split(split_dir, out_split_dir): | |
""" | |
Process all sequences within a data split (e.g., train, test, or val). | |
Args: | |
split_dir (str): Directory for the split. | |
out_split_dir (str): Output directory for the processed split. | |
""" | |
sequences = sorted( | |
[d for d in os.listdir(split_dir) if os.path.isdir(os.path.join(split_dir, d))] | |
) | |
for seq in tqdm( | |
sequences, desc=f"Processing sequences in {os.path.basename(split_dir)}" | |
): | |
seq_dir = os.path.join(split_dir, seq) | |
out_seq_dir = os.path.join(out_split_dir, seq) | |
process_sequence(seq_dir, out_seq_dir) | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Preprocess Point Odyssey dataset by processing images, depth maps, and camera parameters." | |
) | |
parser.add_argument( | |
"--input_dir", | |
type=str, | |
required=True, | |
help="Path to the root input dataset directory.", | |
) | |
parser.add_argument( | |
"--output_dir", | |
type=str, | |
required=True, | |
help="Path to the root output directory where processed data will be stored.", | |
) | |
args = parser.parse_args() | |
# Define the expected dataset splits | |
splits = ["train", "test", "val"] | |
for split in splits: | |
split_dir = os.path.join(args.input_dir, split) | |
out_split_dir = os.path.join(args.output_dir, split) | |
if not os.path.exists(split_dir): | |
print( | |
f"Warning: Split directory {split_dir} does not exist. Skipping this split." | |
) | |
continue | |
os.makedirs(out_split_dir, exist_ok=True) | |
process_split(split_dir, out_split_dir) | |
if __name__ == "__main__": | |
main() | |