Spaces:
Running
on
L4
Running
on
L4
#!/usr/bin/env python3 | |
""" | |
Preprocess the Matterport3D (MP3D) dataset. | |
This script reads camera parameters and overlap data from a configuration file, | |
processes RGB images and corresponding depth images, adjusts camera poses using a | |
conversion matrix, and then saves the processed images, depth maps, and camera | |
metadata into separate output directories. | |
Usage: | |
python preprocess_mp3d.py --root_dir /path/to/data_mp3d/v1/scans \ | |
--out_dir /path/to/processed_mp3d | |
""" | |
import os | |
import numpy as np | |
import cv2 | |
import shutil | |
from concurrent.futures import ProcessPoolExecutor, as_completed | |
from tqdm import tqdm | |
import argparse | |
def process_image(args): | |
""" | |
Process a single image: reads the RGB image and depth image, normalizes the depth, | |
adjusts the camera pose using a conversion matrix, and saves the processed outputs. | |
Parameters: | |
args: tuple containing | |
(i, paths, K, pose, img_dir, depth_dir, out_rgb_dir, out_depth_dir, out_cam_dir, R_conv) | |
where: | |
i - the frame index | |
paths - tuple of (depth filename, RGB filename) | |
K - camera intrinsics matrix (3x3 NumPy array) | |
pose - camera pose (4x4 NumPy array) | |
img_dir - directory containing RGB images | |
depth_dir - directory containing depth images | |
out_rgb_dir - output directory for processed RGB images | |
out_depth_dir - output directory for processed depth maps | |
out_cam_dir - output directory for processed camera metadata | |
R_conv - a 4x4 conversion matrix (NumPy array) | |
Returns: | |
None if successful, or an error string if processing fails. | |
""" | |
( | |
i, | |
paths, | |
K, | |
pose, | |
img_dir, | |
depth_dir, | |
out_rgb_dir, | |
out_depth_dir, | |
out_cam_dir, | |
R_conv, | |
) = args | |
depth_path, img_path = paths | |
img_path_full = os.path.join(img_dir, img_path) | |
depth_path_full = os.path.join(depth_dir, depth_path) | |
try: | |
# Read depth image using OpenCV (assumed to be stored with 16-bit depth) | |
depth = cv2.imread(depth_path_full, cv2.IMREAD_ANYDEPTH).astype(np.float32) | |
depth = depth / 4000.0 # Normalize depth (adjust this factor as needed) | |
# Adjust the camera pose with the conversion matrix | |
pose_adjusted = pose @ R_conv | |
# Generate output filenames using a zero-padded frame index. | |
basename = f"{i:06d}" | |
out_img_path = os.path.join(out_rgb_dir, basename + ".png") | |
out_depth_path = os.path.join(out_depth_dir, basename + ".npy") | |
out_cam_path = os.path.join(out_cam_dir, basename + ".npz") | |
# Copy the RGB image. | |
shutil.copyfile(img_path_full, out_img_path) | |
# Save the depth map. | |
np.save(out_depth_path, depth) | |
# Save the camera intrinsics and adjusted pose. | |
np.savez(out_cam_path, intrinsics=K, pose=pose_adjusted) | |
except Exception as e: | |
return f"Error processing image {img_path}: {e}" | |
return None | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Preprocess MP3D scans: convert and save RGB images, depth maps, and camera metadata." | |
) | |
parser.add_argument( | |
"--root_dir", | |
type=str, | |
default="/path/to/data_mp3d/v1/scans", | |
help="Root directory of the raw MP3D data.", | |
) | |
parser.add_argument( | |
"--out_dir", | |
type=str, | |
default="/path/to/processed_mp3d", | |
help="Output directory for processed MP3D data.", | |
) | |
args = parser.parse_args() | |
root = args.root_dir | |
out_dir = args.out_dir | |
# List sequence directories (each scan is stored as a separate directory). | |
seqs = sorted([d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))]) | |
# Define a conversion matrix from MP3D to the desired coordinate system. | |
R_conv = np.array( | |
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], dtype=np.float32 | |
) | |
for seq in tqdm(seqs, desc="Sequences"): | |
# The sequence directory structure assumes that images and depth files are stored | |
# under a subdirectory with the same name as the sequence. | |
seq_dir = os.path.join(root, seq, seq) | |
img_dir = os.path.join(seq_dir, "undistorted_color_images") | |
depth_dir = os.path.join(seq_dir, "undistorted_depth_images") | |
cam_file = os.path.join(seq_dir, "undistorted_camera_parameters", f"{seq}.conf") | |
overlap_file = os.path.join(seq_dir, "image_overlap_data", f"{seq}_iis.txt") | |
# Read overlap data and save it (optional). | |
overlap = [] | |
with open(overlap_file, "r") as f: | |
for line in f: | |
parts = line.split() | |
overlap.append([int(parts[1]), int(parts[2]), float(parts[3])]) | |
overlap = np.array(overlap) | |
os.makedirs(os.path.join(out_dir, seq), exist_ok=True) | |
np.save(os.path.join(out_dir, seq, "overlap.npy"), overlap) | |
# Read camera parameters from a configuration file. | |
intrinsics = [] | |
camera_poses = [] | |
image_files = [] | |
with open(cam_file, "r") as file: | |
lines = file.readlines() | |
current_intrinsics = None | |
for line in lines: | |
parts = line.split() | |
if not parts: | |
continue | |
if parts[0] == "intrinsics_matrix": | |
# Extract intrinsic parameters. | |
fx, cx, fy, cy = ( | |
float(parts[1]), | |
float(parts[3]), | |
float(parts[5]), | |
float(parts[6]), | |
) | |
current_intrinsics = np.array( | |
[[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32 | |
) | |
elif parts[0] == "scan": | |
# Read the image filenames and camera pose. | |
depth_image = parts[1] | |
color_image = parts[2] | |
image_files.append((depth_image, color_image)) | |
matrix_values = list(map(float, parts[3:])) | |
camera_pose = np.array(matrix_values).reshape(4, 4) | |
camera_poses.append(camera_pose) | |
if current_intrinsics is not None: | |
intrinsics.append(current_intrinsics.copy()) | |
if not (len(image_files) == len(intrinsics) == len(camera_poses)): | |
print(f"Inconsistent data in sequence {seq}") | |
continue | |
# Prepare output directories. | |
out_rgb_dir = os.path.join(out_dir, seq, "rgb") | |
out_depth_dir = os.path.join(out_dir, seq, "depth") | |
out_cam_dir = os.path.join(out_dir, seq, "cam") | |
os.makedirs(out_rgb_dir, exist_ok=True) | |
os.makedirs(out_depth_dir, exist_ok=True) | |
os.makedirs(out_cam_dir, exist_ok=True) | |
tasks = [] | |
for i, (paths, K, pose) in enumerate( | |
zip(image_files, intrinsics, camera_poses) | |
): | |
args_task = ( | |
i, | |
paths, | |
K, | |
pose, | |
img_dir, | |
depth_dir, | |
out_rgb_dir, | |
out_depth_dir, | |
out_cam_dir, | |
R_conv, | |
) | |
tasks.append(args_task) | |
num_workers = os.cpu_count() // 2 | |
with ProcessPoolExecutor(max_workers=num_workers) as executor: | |
futures = {executor.submit(process_image, task): task[0] for task in tasks} | |
for future in tqdm( | |
as_completed(futures), total=len(futures), desc=f"Processing {seq}" | |
): | |
error = future.result() | |
if error: | |
print(error) | |
if __name__ == "__main__": | |
main() | |