vmem / extern /CUT3R /datasets_preprocess /preprocess_point_odyssey.py
liguang0115's picture
Add initial project structure with core files, configurations, and sample images
2df809d
#!/usr/bin/env python3
"""
Preprocess Script for Point Odyssey Dataset
This script processes the Point Odyssey dataset by:
- Copying RGB images.
- Converting 16-bit depth images to a normalized float32 depth map.
- Inverting camera extrinsic matrices to obtain poses.
- Saving intrinsics and computed poses in a structured output directory.
The dataset is expected to have subdirectories for each split (e.g., train, test, val),
with each split containing multiple sequence directories. Each sequence directory must
contain the following:
- An 'rgbs' folder with .jpg images.
- A 'depths' folder with .png depth images.
- An 'anno.npz' file with 'intrinsics' and 'extrinsics' arrays.
Usage:
python preprocess_point_odyssey.py --input_dir /path/to/input_dataset --output_dir /path/to/output_dataset
"""
import os
import argparse
import shutil
import numpy as np
import cv2
from tqdm import tqdm
def process_sequence(seq_dir, out_seq_dir):
"""
Process a single sequence:
- Verifies that required folders/files exist.
- Loads camera annotations.
- Processes each frame: copies the RGB image, processes the depth map,
computes the camera pose, and saves the results.
Args:
seq_dir (str): Directory of the sequence (should contain 'rgbs', 'depths', and 'anno.npz').
out_seq_dir (str): Output directory where processed files will be saved.
"""
# Define input subdirectories and annotation file
img_dir = os.path.join(seq_dir, "rgbs")
depth_dir = os.path.join(seq_dir, "depths")
cam_file = os.path.join(seq_dir, "anno.npz")
# Ensure all necessary files/folders exist
if not (
os.path.exists(img_dir)
and os.path.exists(depth_dir)
and os.path.exists(cam_file)
):
raise FileNotFoundError(f"Missing required data in {seq_dir}")
# Create output subdirectories for images, depth maps, and camera parameters
out_img_dir = os.path.join(out_seq_dir, "rgb")
out_depth_dir = os.path.join(out_seq_dir, "depth")
out_cam_dir = os.path.join(out_seq_dir, "cam")
os.makedirs(out_img_dir, exist_ok=True)
os.makedirs(out_depth_dir, exist_ok=True)
os.makedirs(out_cam_dir, exist_ok=True)
# Load camera annotations
annotations = np.load(cam_file)
cam_ints = annotations["intrinsics"].astype(np.float32)
cam_exts = annotations["extrinsics"].astype(np.float32)
# List and sort image and depth filenames
rgbs = sorted([f for f in os.listdir(img_dir) if f.endswith(".jpg")])
depths = sorted([f for f in os.listdir(depth_dir) if f.endswith(".png")])
# Ensure that the number of intrinsics, extrinsics, RGB images, and depth images match
if not (len(cam_ints) == len(cam_exts) == len(rgbs) == len(depths)):
raise ValueError(
f"Mismatch in sequence {seq_dir}: "
f"{len(cam_ints)} intrinsics, {len(cam_exts)} extrinsics, {len(rgbs)} images, {len(depths)} depths."
)
# Skip sequence if it has already been processed
if len(os.listdir(out_img_dir)) == len(rgbs):
return
# Process each frame in the sequence
for i in tqdm(range(len(cam_exts)), desc="Processing frames", leave=False):
# Extract frame index from filenames
basename_img = rgbs[i].split(".")[0].split("_")[-1]
basename_depth = depths[i].split(".")[0].split("_")[-1]
if int(basename_img) != i or int(basename_depth) != i:
raise ValueError(
f"Frame index mismatch in sequence {seq_dir} for frame {i}"
)
img_path = os.path.join(img_dir, rgbs[i])
depth_path = os.path.join(depth_dir, depths[i])
# Retrieve intrinsics and compute camera pose by inverting the extrinsic matrix
intrins = cam_ints[i]
cam_extrinsic = cam_exts[i]
pose = np.linalg.inv(cam_extrinsic)
if np.any(np.isinf(pose)) or np.any(np.isnan(pose)):
raise ValueError(
f"Invalid pose computed from extrinsics for frame {i} in {seq_dir}"
)
# Read and process depth image
depth_16bit = cv2.imread(depth_path, cv2.IMREAD_ANYDEPTH)
depth = depth_16bit.astype(np.float32) / 65535.0 * 1000.0
# Save processed files: copy the RGB image and save depth and camera parameters
basename = basename_img # or str(i)
out_img_path = os.path.join(out_img_dir, basename + ".jpg")
shutil.copyfile(img_path, out_img_path)
np.save(os.path.join(out_depth_dir, basename + ".npy"), depth)
np.savez(
os.path.join(out_cam_dir, basename + ".npz"), intrinsics=intrins, pose=pose
)
def process_split(split_dir, out_split_dir):
"""
Process all sequences within a data split (e.g., train, test, or val).
Args:
split_dir (str): Directory for the split.
out_split_dir (str): Output directory for the processed split.
"""
sequences = sorted(
[d for d in os.listdir(split_dir) if os.path.isdir(os.path.join(split_dir, d))]
)
for seq in tqdm(
sequences, desc=f"Processing sequences in {os.path.basename(split_dir)}"
):
seq_dir = os.path.join(split_dir, seq)
out_seq_dir = os.path.join(out_split_dir, seq)
process_sequence(seq_dir, out_seq_dir)
def main():
parser = argparse.ArgumentParser(
description="Preprocess Point Odyssey dataset by processing images, depth maps, and camera parameters."
)
parser.add_argument(
"--input_dir",
type=str,
required=True,
help="Path to the root input dataset directory.",
)
parser.add_argument(
"--output_dir",
type=str,
required=True,
help="Path to the root output directory where processed data will be stored.",
)
args = parser.parse_args()
# Define the expected dataset splits
splits = ["train", "test", "val"]
for split in splits:
split_dir = os.path.join(args.input_dir, split)
out_split_dir = os.path.join(args.output_dir, split)
if not os.path.exists(split_dir):
print(
f"Warning: Split directory {split_dir} does not exist. Skipping this split."
)
continue
os.makedirs(out_split_dir, exist_ok=True)
process_split(split_dir, out_split_dir)
if __name__ == "__main__":
main()