vmem / extern /CUT3R /datasets_preprocess /preprocess_urbansyn.py
liguang0115's picture
Add initial project structure with core files, configurations, and sample images
2df809d
#!/usr/bin/env python3
"""
Preprocess Script for UrbanSyn Dataset
This script:
1. Reads RGB, depth (EXR), and semantic segmentation (class) files from an UrbanSyn dataset directory.
2. Retrieves camera intrinsics from a JSON metadata file.
3. Rescales images, depth maps, and masks to a fixed resolution (e.g., 640×480).
4. Saves processed data (RGB, .npy depth, .png sky mask, and .npz intrinsics) in an organized structure.
Usage:
python preprocess_urbansyn.py \
--input_dir /path/to/data_urbansyn \
--output_dir /path/to/processed_urbansyn
"""
import os
import json
import argparse
import shutil
from concurrent.futures import ProcessPoolExecutor, as_completed
import cv2
import numpy as np
from tqdm import tqdm
from PIL import Image
# Make sure OpenCV EXR support is enabled
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
# Custom "cropping" module (ensure cropping.py is available/installed)
import cropping
def process_basename(
basename,
rgb_dir,
depth_dir,
class_dir,
cam_info,
out_rgb_dir,
out_depth_dir,
out_mask_dir,
out_cam_dir,
):
"""
Process a single file triplet (RGB, depth, class) for a given basename.
Args:
basename (str): Base name without file extension (e.g., 'image_0001').
rgb_dir (str): Directory containing RGB .png files.
depth_dir (str): Directory containing .exr depth files.
class_dir (str): Directory containing class .png files (semantic segmentation).
cam_info (dict): Dictionary with camera metadata (focal length, sensor size).
out_rgb_dir (str): Output directory for rescaled RGB images.
out_depth_dir (str): Output directory for rescaled depth files.
out_mask_dir (str): Output directory for sky masks.
out_cam_dir (str): Output directory for camera intrinsics.
Returns:
str or None:
- Returns None if successful.
- Returns an error message if something fails.
"""
# Construct output file paths
out_img_path = os.path.join(out_rgb_dir, f"{basename}.png")
out_depth_path = os.path.join(out_depth_dir, f"{basename}.npy")
out_mask_path = os.path.join(out_mask_dir, f"{basename}.png")
out_cam_path = os.path.join(out_cam_dir, f"{basename}.npz")
# Skip if already processed
if (
os.path.exists(out_img_path)
and os.path.exists(out_depth_path)
and os.path.exists(out_mask_path)
and os.path.exists(out_cam_path)
):
return None
try:
# Build file paths
img_file = os.path.join(rgb_dir, f"{basename}.png")
depth_file = os.path.join(depth_dir, f'{basename.replace("rgb", "depth")}.exr')
class_file = os.path.join(class_dir, basename.replace("rgb", "ss") + ".png")
# 1. Read RGB image
img = cv2.imread(img_file, cv2.IMREAD_UNCHANGED)
if img is None:
return f"Error: Could not read image file {img_file}"
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Convert BGR -> RGB
H, W = img.shape[:2]
# 2. Read depth from EXR
depth = cv2.imread(depth_file, cv2.IMREAD_UNCHANGED)
if depth is None:
# Attempt fallback if there's a '.exr.1' file
alt_exr_1 = depth_file + ".1"
if os.path.exists(alt_exr_1):
temp_exr = depth_file.replace(".exr", "_tmp.exr")
os.rename(alt_exr_1, temp_exr)
depth = cv2.imread(temp_exr, cv2.IMREAD_UNCHANGED)
if depth is None:
return f"Error reading depth file (fallback) {temp_exr}"
depth *= 1e5
else:
return f"Error reading depth file {depth_file}"
else:
depth *= 1e5 # multiply by 1e5, consistent with your original code
# 3. Read class image, build sky mask
cl = cv2.imread(class_file, cv2.IMREAD_UNCHANGED)
if cl is None:
return f"Error: Could not read class file {class_file}"
sky_mask = (cl[..., 0] == 10).astype(np.uint8) # class ID 10 => sky
# 4. Build camera intrinsics
f_mm = cam_info["focalLength_mm"]
w_mm = cam_info["sensorWidth_mm"]
h_mm = cam_info["sensorHeight_mm"]
K = np.eye(3, dtype=np.float32)
K[0, 0] = f_mm / w_mm * W
K[1, 1] = f_mm / h_mm * H
K[0, 2] = W / 2
K[1, 2] = H / 2
# 5. Combine depth + sky_mask in a single array for rescaling
depth_with_mask = np.stack([depth, sky_mask], axis=-1)
# 6. Rescale to desired size
image_pil = Image.fromarray(img)
image_rescaled, depth_with_mask_rescaled, K_rescaled = (
cropping.rescale_image_depthmap(
image_pil, depth_with_mask, K, output_resolution=(640, 480)
)
)
# Write outputs
image_rescaled.save(out_img_path)
np.save(out_depth_path, depth_with_mask_rescaled[..., 0])
cv2.imwrite(
out_mask_path, (depth_with_mask_rescaled[..., 1] * 255).astype(np.uint8)
)
np.savez(out_cam_path, intrinsics=K_rescaled)
except Exception as e:
return f"Error processing {basename}: {e}"
return None
def main():
parser = argparse.ArgumentParser(
description="Preprocess UrbanSyn dataset by loading RGB/Depth/Seg "
"and rescaling them with camera intrinsics."
)
parser.add_argument(
"--input_dir", required=True, help="Path to the UrbanSyn dataset directory."
)
parser.add_argument(
"--output_dir",
required=True,
help="Path to the directory where processed data will be stored.",
)
args = parser.parse_args()
input_dir = os.path.abspath(args.input_dir)
output_dir = os.path.abspath(args.output_dir)
os.makedirs(output_dir, exist_ok=True)
# Define input subdirectories
rgb_dir = os.path.join(input_dir, "rgb")
depth_dir = os.path.join(input_dir, "depth")
class_dir = os.path.join(input_dir, "ss")
meta_file = os.path.join(input_dir, "camera_metadata.json")
# Define output subdirectories
out_rgb_dir = os.path.join(output_dir, "rgb")
out_depth_dir = os.path.join(output_dir, "depth")
out_mask_dir = os.path.join(output_dir, "sky_mask")
out_cam_dir = os.path.join(output_dir, "cam")
for d in [out_rgb_dir, out_depth_dir, out_mask_dir, out_cam_dir]:
os.makedirs(d, exist_ok=True)
# Gather basenames from RGB files
basenames = sorted(
[
os.path.splitext(fname)[0]
for fname in os.listdir(rgb_dir)
if fname.endswith(".png")
]
)
if not basenames:
print(f"No RGB .png files found in {rgb_dir}. Exiting.")
return
# Load camera metadata
if not os.path.isfile(meta_file):
print(f"Error: metadata file not found at {meta_file}. Exiting.")
return
with open(meta_file, "r") as f:
cam_info_full = json.load(f)
cam_info = cam_info_full["parameters"][0]["Camera"]
# Process in parallel
num_workers = max(1, os.cpu_count() or 1)
with ProcessPoolExecutor(max_workers=num_workers) as executor:
futures = {
executor.submit(
process_basename,
basename,
rgb_dir,
depth_dir,
class_dir,
cam_info,
out_rgb_dir,
out_depth_dir,
out_mask_dir,
out_cam_dir,
): basename
for basename in basenames
}
# Use tqdm for progress
for future in tqdm(
as_completed(futures), total=len(futures), desc="Processing UrbanSyn"
):
error = future.result()
if error:
print(error)
if __name__ == "__main__":
main()