vmem / extern /CUT3R /datasets_preprocess /preprocess_mvs_synth.py
liguang0115's picture
Add initial project structure with core files, configurations, and sample images
2df809d
#!/usr/bin/env python3
"""
Preprocess the MVS Synth dataset.
This script processes each sequence in a given dataset directory by:
- Reading the RGB image, EXR depth image, and JSON camera parameters.
- Computing the camera pose from the extrinsic matrix (with a conversion matrix applied).
- Creating a simple camera intrinsics matrix from the provided focal lengths and principal point.
- Copying the RGB image (as JPG), saving the depth (as a NumPy array), and saving the camera data (as a NPZ file).
Usage:
python preprocess_mvs_synth.py --root_dir /path/to/data_mvs_synth/GTAV_720/ \
--out_dir /path/to/processed_mvs_synth \
--num_workers 32
"""
import os
import shutil
import json
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm
import numpy as np
import cv2
import argparse
# Ensure OpenEXR support if needed
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
# Conversion matrix (example conversion, adjust if needed)
R_conv = np.array(
[[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float32
)
def process_basename(seq, basename, root_dir, out_dir):
"""
Process a single frame identified by 'basename' within a given sequence.
Reads the RGB image, depth (EXR) file, and camera parameters (JSON file),
computes the adjusted camera pose, builds the camera intrinsics matrix,
and saves the processed outputs.
Parameters:
seq (str): The sequence (subdirectory) name.
basename (str): The basename of the file (without extension).
root_dir (str): Root directory containing the raw data.
out_dir (str): Output directory where processed data will be saved.
Returns:
None on success, or an error string on failure.
"""
try:
# Define input directories.
seq_dir = os.path.join(root_dir, seq)
img_dir = os.path.join(seq_dir, "images")
depth_dir = os.path.join(seq_dir, "depths")
cam_dir = os.path.join(seq_dir, "poses")
# Define input file paths.
img_path = os.path.join(img_dir, basename + ".png")
depth_path = os.path.join(depth_dir, basename + ".exr")
cam_path = os.path.join(cam_dir, basename + ".json")
# Define output directories.
out_seq_dir = os.path.join(out_dir, seq)
out_img_dir = os.path.join(out_seq_dir, "rgb")
out_depth_dir = os.path.join(out_seq_dir, "depth")
out_cam_dir = os.path.join(out_seq_dir, "cam")
os.makedirs(out_img_dir, exist_ok=True)
os.makedirs(out_depth_dir, exist_ok=True)
os.makedirs(out_cam_dir, exist_ok=True)
# Define output file paths.
out_img_path = os.path.join(out_img_dir, basename + ".jpg")
out_depth_path = os.path.join(out_depth_dir, basename + ".npy")
out_cam_path = os.path.join(out_cam_dir, basename + ".npz")
# Read and process camera parameters.
with open(cam_path, "r") as f:
cam_data = json.load(f)
c_x = cam_data["c_x"]
c_y = cam_data["c_y"]
f_x = cam_data["f_x"]
f_y = cam_data["f_y"]
extrinsic = np.array(cam_data["extrinsic"])
# Invert extrinsic matrix to obtain camera-to-world pose.
pose = np.linalg.inv(extrinsic)
# Apply conversion matrix.
pose = R_conv @ pose
# Build a simple intrinsics matrix.
intrinsics = np.array(
[[f_x, 0, c_x], [0, f_y, c_y], [0, 0, 1]], dtype=np.float32
)
if np.any(np.isinf(pose)) or np.any(np.isnan(pose)):
raise ValueError(f"Invalid pose for {basename}")
# Read depth image.
depth = cv2.imread(depth_path, cv2.IMREAD_ANYDEPTH).astype(np.float32)
depth[np.isinf(depth)] = 0.0 # Clean up any infinite values
# Save the processed data.
shutil.copyfile(img_path, out_img_path)
np.save(out_depth_path, depth)
np.savez(out_cam_path, intrinsics=intrinsics, pose=pose)
except Exception as e:
return f"Error processing {seq}/{basename}: {e}"
return None
def main():
parser = argparse.ArgumentParser(
description="Preprocess MVS Synth dataset: convert images, depth, and camera data."
)
parser.add_argument(
"--root_dir",
type=str,
default="/path/to/data_mvs_synth/GTAV_720/",
help="Root directory of the raw MVS Synth data.",
)
parser.add_argument(
"--out_dir",
type=str,
default="/path/to/processed_mvs_synth",
help="Output directory for processed data.",
)
parser.add_argument(
"--num_workers", type=int, default=32, help="Number of parallel workers."
)
args = parser.parse_args()
root_dir = args.root_dir
out_dir = args.out_dir
# Get list of sequence directories.
seqs = sorted(
[d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
)
# Pre-create output directories for each sequence.
for seq in seqs:
out_seq_dir = os.path.join(out_dir, seq)
os.makedirs(os.path.join(out_seq_dir, "rgb"), exist_ok=True)
os.makedirs(os.path.join(out_seq_dir, "depth"), exist_ok=True)
os.makedirs(os.path.join(out_seq_dir, "cam"), exist_ok=True)
# Build list of processing tasks.
tasks = []
for seq in seqs:
seq_dir = os.path.join(root_dir, seq)
img_dir = os.path.join(seq_dir, "images")
basenames = sorted([d[:-4] for d in os.listdir(img_dir) if d.endswith(".png")])
for basename in basenames:
tasks.append((seq, basename, root_dir, out_dir))
num_workers = args.num_workers
print(f"Processing {len(tasks)} tasks using {num_workers} workers...")
with ProcessPoolExecutor(max_workers=num_workers) as executor:
futures = {executor.submit(process_basename, *task): task[1] for task in tasks}
for future in tqdm(
as_completed(futures), total=len(futures), desc="Processing"
):
error = future.result()
if error:
print(error)
if __name__ == "__main__":
main()