Spaces:

liguang0115
/

vmem

Running on L4

App Files Files Community

vmem / extern /CUT3R /datasets_preprocess /preprocess_mp3d.py

liguang0115

Add initial project structure with core files, configurations, and sample images

2df809d 14 days ago

raw

history blame contribute delete

7.84 kB

	#!/usr/bin/env python3
	"""
	Preprocess the Matterport3D (MP3D) dataset.

	This script reads camera parameters and overlap data from a configuration file,
	processes RGB images and corresponding depth images, adjusts camera poses using a
	conversion matrix, and then saves the processed images, depth maps, and camera
	metadata into separate output directories.

	Usage:
	python preprocess_mp3d.py --root_dir /path/to/data_mp3d/v1/scans \
	--out_dir /path/to/processed_mp3d
	"""

	import os
	import numpy as np
	import cv2
	import shutil
	from concurrent.futures import ProcessPoolExecutor, as_completed
	from tqdm import tqdm
	import argparse


	def process_image(args):
	"""
	Process a single image: reads the RGB image and depth image, normalizes the depth,
	adjusts the camera pose using a conversion matrix, and saves the processed outputs.

	Parameters:
	args: tuple containing
	(i, paths, K, pose, img_dir, depth_dir, out_rgb_dir, out_depth_dir, out_cam_dir, R_conv)
	where:
	i - the frame index
	paths - tuple of (depth filename, RGB filename)
	K - camera intrinsics matrix (3x3 NumPy array)
	pose - camera pose (4x4 NumPy array)
	img_dir - directory containing RGB images
	depth_dir - directory containing depth images
	out_rgb_dir - output directory for processed RGB images
	out_depth_dir - output directory for processed depth maps
	out_cam_dir - output directory for processed camera metadata
	R_conv - a 4x4 conversion matrix (NumPy array)
	Returns:
	None if successful, or an error string if processing fails.
	"""
	(
	i,
	paths,
	K,
	pose,
	img_dir,
	depth_dir,
	out_rgb_dir,
	out_depth_dir,
	out_cam_dir,
	R_conv,
	) = args

	depth_path, img_path = paths
	img_path_full = os.path.join(img_dir, img_path)
	depth_path_full = os.path.join(depth_dir, depth_path)

	try:
	# Read depth image using OpenCV (assumed to be stored with 16-bit depth)
	depth = cv2.imread(depth_path_full, cv2.IMREAD_ANYDEPTH).astype(np.float32)
	depth = depth / 4000.0 # Normalize depth (adjust this factor as needed)

	# Adjust the camera pose with the conversion matrix
	pose_adjusted = pose @ R_conv

	# Generate output filenames using a zero-padded frame index.
	basename = f"{i:06d}"
	out_img_path = os.path.join(out_rgb_dir, basename + ".png")
	out_depth_path = os.path.join(out_depth_dir, basename + ".npy")
	out_cam_path = os.path.join(out_cam_dir, basename + ".npz")

	# Copy the RGB image.
	shutil.copyfile(img_path_full, out_img_path)

	# Save the depth map.
	np.save(out_depth_path, depth)

	# Save the camera intrinsics and adjusted pose.
	np.savez(out_cam_path, intrinsics=K, pose=pose_adjusted)

	except Exception as e:
	return f"Error processing image {img_path}: {e}"

	return None


	def main():
	parser = argparse.ArgumentParser(
	description="Preprocess MP3D scans: convert and save RGB images, depth maps, and camera metadata."
	)
	parser.add_argument(
	"--root_dir",
	type=str,
	default="/path/to/data_mp3d/v1/scans",
	help="Root directory of the raw MP3D data.",
	)
	parser.add_argument(
	"--out_dir",
	type=str,
	default="/path/to/processed_mp3d",
	help="Output directory for processed MP3D data.",
	)
	args = parser.parse_args()

	root = args.root_dir
	out_dir = args.out_dir

	# List sequence directories (each scan is stored as a separate directory).
	seqs = sorted([d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))])

	# Define a conversion matrix from MP3D to the desired coordinate system.
	R_conv = np.array(
	[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], dtype=np.float32
	)

	for seq in tqdm(seqs, desc="Sequences"):
	# The sequence directory structure assumes that images and depth files are stored
	# under a subdirectory with the same name as the sequence.
	seq_dir = os.path.join(root, seq, seq)

	img_dir = os.path.join(seq_dir, "undistorted_color_images")
	depth_dir = os.path.join(seq_dir, "undistorted_depth_images")
	cam_file = os.path.join(seq_dir, "undistorted_camera_parameters", f"{seq}.conf")
	overlap_file = os.path.join(seq_dir, "image_overlap_data", f"{seq}_iis.txt")

	# Read overlap data and save it (optional).
	overlap = []
	with open(overlap_file, "r") as f:
	for line in f:
	parts = line.split()
	overlap.append([int(parts[1]), int(parts[2]), float(parts[3])])
	overlap = np.array(overlap)
	os.makedirs(os.path.join(out_dir, seq), exist_ok=True)
	np.save(os.path.join(out_dir, seq, "overlap.npy"), overlap)

	# Read camera parameters from a configuration file.
	intrinsics = []
	camera_poses = []
	image_files = []

	with open(cam_file, "r") as file:
	lines = file.readlines()
	current_intrinsics = None
	for line in lines:
	parts = line.split()
	if not parts:
	continue
	if parts[0] == "intrinsics_matrix":
	# Extract intrinsic parameters.
	fx, cx, fy, cy = (
	float(parts[1]),
	float(parts[3]),
	float(parts[5]),
	float(parts[6]),
	)
	current_intrinsics = np.array(
	[[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32
	)
	elif parts[0] == "scan":
	# Read the image filenames and camera pose.
	depth_image = parts[1]
	color_image = parts[2]
	image_files.append((depth_image, color_image))
	matrix_values = list(map(float, parts[3:]))
	camera_pose = np.array(matrix_values).reshape(4, 4)
	camera_poses.append(camera_pose)
	if current_intrinsics is not None:
	intrinsics.append(current_intrinsics.copy())

	if not (len(image_files) == len(intrinsics) == len(camera_poses)):
	print(f"Inconsistent data in sequence {seq}")
	continue

	# Prepare output directories.
	out_rgb_dir = os.path.join(out_dir, seq, "rgb")
	out_depth_dir = os.path.join(out_dir, seq, "depth")
	out_cam_dir = os.path.join(out_dir, seq, "cam")
	os.makedirs(out_rgb_dir, exist_ok=True)
	os.makedirs(out_depth_dir, exist_ok=True)
	os.makedirs(out_cam_dir, exist_ok=True)

	tasks = []
	for i, (paths, K, pose) in enumerate(
	zip(image_files, intrinsics, camera_poses)
	):
	args_task = (
	i,
	paths,
	K,
	pose,
	img_dir,
	depth_dir,
	out_rgb_dir,
	out_depth_dir,
	out_cam_dir,
	R_conv,
	)
	tasks.append(args_task)

	num_workers = os.cpu_count() // 2
	with ProcessPoolExecutor(max_workers=num_workers) as executor:
	futures = {executor.submit(process_image, task): task[0] for task in tasks}
	for future in tqdm(
	as_completed(futures), total=len(futures), desc=f"Processing {seq}"
	):
	error = future.result()
	if error:
	print(error)


	if __name__ == "__main__":
	main()