Spaces:
Running
on
L4
Running
on
L4
#!/usr/bin/env python3 | |
""" | |
Preprocess Script for UrbanSyn Dataset | |
This script: | |
1. Reads RGB, depth (EXR), and semantic segmentation (class) files from an UrbanSyn dataset directory. | |
2. Retrieves camera intrinsics from a JSON metadata file. | |
3. Rescales images, depth maps, and masks to a fixed resolution (e.g., 640×480). | |
4. Saves processed data (RGB, .npy depth, .png sky mask, and .npz intrinsics) in an organized structure. | |
Usage: | |
python preprocess_urbansyn.py \ | |
--input_dir /path/to/data_urbansyn \ | |
--output_dir /path/to/processed_urbansyn | |
""" | |
import os | |
import json | |
import argparse | |
import shutil | |
from concurrent.futures import ProcessPoolExecutor, as_completed | |
import cv2 | |
import numpy as np | |
from tqdm import tqdm | |
from PIL import Image | |
# Make sure OpenCV EXR support is enabled | |
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" | |
# Custom "cropping" module (ensure cropping.py is available/installed) | |
import cropping | |
def process_basename( | |
basename, | |
rgb_dir, | |
depth_dir, | |
class_dir, | |
cam_info, | |
out_rgb_dir, | |
out_depth_dir, | |
out_mask_dir, | |
out_cam_dir, | |
): | |
""" | |
Process a single file triplet (RGB, depth, class) for a given basename. | |
Args: | |
basename (str): Base name without file extension (e.g., 'image_0001'). | |
rgb_dir (str): Directory containing RGB .png files. | |
depth_dir (str): Directory containing .exr depth files. | |
class_dir (str): Directory containing class .png files (semantic segmentation). | |
cam_info (dict): Dictionary with camera metadata (focal length, sensor size). | |
out_rgb_dir (str): Output directory for rescaled RGB images. | |
out_depth_dir (str): Output directory for rescaled depth files. | |
out_mask_dir (str): Output directory for sky masks. | |
out_cam_dir (str): Output directory for camera intrinsics. | |
Returns: | |
str or None: | |
- Returns None if successful. | |
- Returns an error message if something fails. | |
""" | |
# Construct output file paths | |
out_img_path = os.path.join(out_rgb_dir, f"{basename}.png") | |
out_depth_path = os.path.join(out_depth_dir, f"{basename}.npy") | |
out_mask_path = os.path.join(out_mask_dir, f"{basename}.png") | |
out_cam_path = os.path.join(out_cam_dir, f"{basename}.npz") | |
# Skip if already processed | |
if ( | |
os.path.exists(out_img_path) | |
and os.path.exists(out_depth_path) | |
and os.path.exists(out_mask_path) | |
and os.path.exists(out_cam_path) | |
): | |
return None | |
try: | |
# Build file paths | |
img_file = os.path.join(rgb_dir, f"{basename}.png") | |
depth_file = os.path.join(depth_dir, f'{basename.replace("rgb", "depth")}.exr') | |
class_file = os.path.join(class_dir, basename.replace("rgb", "ss") + ".png") | |
# 1. Read RGB image | |
img = cv2.imread(img_file, cv2.IMREAD_UNCHANGED) | |
if img is None: | |
return f"Error: Could not read image file {img_file}" | |
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Convert BGR -> RGB | |
H, W = img.shape[:2] | |
# 2. Read depth from EXR | |
depth = cv2.imread(depth_file, cv2.IMREAD_UNCHANGED) | |
if depth is None: | |
# Attempt fallback if there's a '.exr.1' file | |
alt_exr_1 = depth_file + ".1" | |
if os.path.exists(alt_exr_1): | |
temp_exr = depth_file.replace(".exr", "_tmp.exr") | |
os.rename(alt_exr_1, temp_exr) | |
depth = cv2.imread(temp_exr, cv2.IMREAD_UNCHANGED) | |
if depth is None: | |
return f"Error reading depth file (fallback) {temp_exr}" | |
depth *= 1e5 | |
else: | |
return f"Error reading depth file {depth_file}" | |
else: | |
depth *= 1e5 # multiply by 1e5, consistent with your original code | |
# 3. Read class image, build sky mask | |
cl = cv2.imread(class_file, cv2.IMREAD_UNCHANGED) | |
if cl is None: | |
return f"Error: Could not read class file {class_file}" | |
sky_mask = (cl[..., 0] == 10).astype(np.uint8) # class ID 10 => sky | |
# 4. Build camera intrinsics | |
f_mm = cam_info["focalLength_mm"] | |
w_mm = cam_info["sensorWidth_mm"] | |
h_mm = cam_info["sensorHeight_mm"] | |
K = np.eye(3, dtype=np.float32) | |
K[0, 0] = f_mm / w_mm * W | |
K[1, 1] = f_mm / h_mm * H | |
K[0, 2] = W / 2 | |
K[1, 2] = H / 2 | |
# 5. Combine depth + sky_mask in a single array for rescaling | |
depth_with_mask = np.stack([depth, sky_mask], axis=-1) | |
# 6. Rescale to desired size | |
image_pil = Image.fromarray(img) | |
image_rescaled, depth_with_mask_rescaled, K_rescaled = ( | |
cropping.rescale_image_depthmap( | |
image_pil, depth_with_mask, K, output_resolution=(640, 480) | |
) | |
) | |
# Write outputs | |
image_rescaled.save(out_img_path) | |
np.save(out_depth_path, depth_with_mask_rescaled[..., 0]) | |
cv2.imwrite( | |
out_mask_path, (depth_with_mask_rescaled[..., 1] * 255).astype(np.uint8) | |
) | |
np.savez(out_cam_path, intrinsics=K_rescaled) | |
except Exception as e: | |
return f"Error processing {basename}: {e}" | |
return None | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Preprocess UrbanSyn dataset by loading RGB/Depth/Seg " | |
"and rescaling them with camera intrinsics." | |
) | |
parser.add_argument( | |
"--input_dir", required=True, help="Path to the UrbanSyn dataset directory." | |
) | |
parser.add_argument( | |
"--output_dir", | |
required=True, | |
help="Path to the directory where processed data will be stored.", | |
) | |
args = parser.parse_args() | |
input_dir = os.path.abspath(args.input_dir) | |
output_dir = os.path.abspath(args.output_dir) | |
os.makedirs(output_dir, exist_ok=True) | |
# Define input subdirectories | |
rgb_dir = os.path.join(input_dir, "rgb") | |
depth_dir = os.path.join(input_dir, "depth") | |
class_dir = os.path.join(input_dir, "ss") | |
meta_file = os.path.join(input_dir, "camera_metadata.json") | |
# Define output subdirectories | |
out_rgb_dir = os.path.join(output_dir, "rgb") | |
out_depth_dir = os.path.join(output_dir, "depth") | |
out_mask_dir = os.path.join(output_dir, "sky_mask") | |
out_cam_dir = os.path.join(output_dir, "cam") | |
for d in [out_rgb_dir, out_depth_dir, out_mask_dir, out_cam_dir]: | |
os.makedirs(d, exist_ok=True) | |
# Gather basenames from RGB files | |
basenames = sorted( | |
[ | |
os.path.splitext(fname)[0] | |
for fname in os.listdir(rgb_dir) | |
if fname.endswith(".png") | |
] | |
) | |
if not basenames: | |
print(f"No RGB .png files found in {rgb_dir}. Exiting.") | |
return | |
# Load camera metadata | |
if not os.path.isfile(meta_file): | |
print(f"Error: metadata file not found at {meta_file}. Exiting.") | |
return | |
with open(meta_file, "r") as f: | |
cam_info_full = json.load(f) | |
cam_info = cam_info_full["parameters"][0]["Camera"] | |
# Process in parallel | |
num_workers = max(1, os.cpu_count() or 1) | |
with ProcessPoolExecutor(max_workers=num_workers) as executor: | |
futures = { | |
executor.submit( | |
process_basename, | |
basename, | |
rgb_dir, | |
depth_dir, | |
class_dir, | |
cam_info, | |
out_rgb_dir, | |
out_depth_dir, | |
out_mask_dir, | |
out_cam_dir, | |
): basename | |
for basename in basenames | |
} | |
# Use tqdm for progress | |
for future in tqdm( | |
as_completed(futures), total=len(futures), desc="Processing UrbanSyn" | |
): | |
error = future.result() | |
if error: | |
print(error) | |
if __name__ == "__main__": | |
main() | |