Spaces:

liguang0115
/

vmem

Running on L4

App Files Files Community

vmem / extern /CUT3R /datasets_preprocess /preprocess_mvs_synth.py

liguang0115

Add initial project structure with core files, configurations, and sample images

2df809d 14 days ago

raw

history blame contribute delete

6.2 kB

	#!/usr/bin/env python3
	"""
	Preprocess the MVS Synth dataset.

	This script processes each sequence in a given dataset directory by:
	- Reading the RGB image, EXR depth image, and JSON camera parameters.
	- Computing the camera pose from the extrinsic matrix (with a conversion matrix applied).
	- Creating a simple camera intrinsics matrix from the provided focal lengths and principal point.
	- Copying the RGB image (as JPG), saving the depth (as a NumPy array), and saving the camera data (as a NPZ file).

	Usage:
	python preprocess_mvs_synth.py --root_dir /path/to/data_mvs_synth/GTAV_720/ \
	--out_dir /path/to/processed_mvs_synth \
	--num_workers 32
	"""

	import os
	import shutil
	import json
	from concurrent.futures import ProcessPoolExecutor, as_completed
	from tqdm import tqdm
	import numpy as np
	import cv2
	import argparse

	# Ensure OpenEXR support if needed
	os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"

	# Conversion matrix (example conversion, adjust if needed)
	R_conv = np.array(
	[[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float32
	)


	def process_basename(seq, basename, root_dir, out_dir):
	"""
	Process a single frame identified by 'basename' within a given sequence.

	Reads the RGB image, depth (EXR) file, and camera parameters (JSON file),
	computes the adjusted camera pose, builds the camera intrinsics matrix,
	and saves the processed outputs.

	Parameters:
	seq (str): The sequence (subdirectory) name.
	basename (str): The basename of the file (without extension).
	root_dir (str): Root directory containing the raw data.
	out_dir (str): Output directory where processed data will be saved.

	Returns:
	None on success, or an error string on failure.
	"""
	try:
	# Define input directories.
	seq_dir = os.path.join(root_dir, seq)
	img_dir = os.path.join(seq_dir, "images")
	depth_dir = os.path.join(seq_dir, "depths")
	cam_dir = os.path.join(seq_dir, "poses")

	# Define input file paths.
	img_path = os.path.join(img_dir, basename + ".png")
	depth_path = os.path.join(depth_dir, basename + ".exr")
	cam_path = os.path.join(cam_dir, basename + ".json")

	# Define output directories.
	out_seq_dir = os.path.join(out_dir, seq)
	out_img_dir = os.path.join(out_seq_dir, "rgb")
	out_depth_dir = os.path.join(out_seq_dir, "depth")
	out_cam_dir = os.path.join(out_seq_dir, "cam")
	os.makedirs(out_img_dir, exist_ok=True)
	os.makedirs(out_depth_dir, exist_ok=True)
	os.makedirs(out_cam_dir, exist_ok=True)

	# Define output file paths.
	out_img_path = os.path.join(out_img_dir, basename + ".jpg")
	out_depth_path = os.path.join(out_depth_dir, basename + ".npy")
	out_cam_path = os.path.join(out_cam_dir, basename + ".npz")

	# Read and process camera parameters.
	with open(cam_path, "r") as f:
	cam_data = json.load(f)
	c_x = cam_data["c_x"]
	c_y = cam_data["c_y"]
	f_x = cam_data["f_x"]
	f_y = cam_data["f_y"]
	extrinsic = np.array(cam_data["extrinsic"])
	# Invert extrinsic matrix to obtain camera-to-world pose.
	pose = np.linalg.inv(extrinsic)
	# Apply conversion matrix.
	pose = R_conv @ pose

	# Build a simple intrinsics matrix.
	intrinsics = np.array(
	[[f_x, 0, c_x], [0, f_y, c_y], [0, 0, 1]], dtype=np.float32
	)

	if np.any(np.isinf(pose)) or np.any(np.isnan(pose)):
	raise ValueError(f"Invalid pose for {basename}")

	# Read depth image.
	depth = cv2.imread(depth_path, cv2.IMREAD_ANYDEPTH).astype(np.float32)
	depth[np.isinf(depth)] = 0.0 # Clean up any infinite values

	# Save the processed data.
	shutil.copyfile(img_path, out_img_path)
	np.save(out_depth_path, depth)
	np.savez(out_cam_path, intrinsics=intrinsics, pose=pose)

	except Exception as e:
	return f"Error processing {seq}/{basename}: {e}"

	return None


	def main():
	parser = argparse.ArgumentParser(
	description="Preprocess MVS Synth dataset: convert images, depth, and camera data."
	)
	parser.add_argument(
	"--root_dir",
	type=str,
	default="/path/to/data_mvs_synth/GTAV_720/",
	help="Root directory of the raw MVS Synth data.",
	)
	parser.add_argument(
	"--out_dir",
	type=str,
	default="/path/to/processed_mvs_synth",
	help="Output directory for processed data.",
	)
	parser.add_argument(
	"--num_workers", type=int, default=32, help="Number of parallel workers."
	)
	args = parser.parse_args()

	root_dir = args.root_dir
	out_dir = args.out_dir

	# Get list of sequence directories.
	seqs = sorted(
	[d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))]
	)

	# Pre-create output directories for each sequence.
	for seq in seqs:
	out_seq_dir = os.path.join(out_dir, seq)
	os.makedirs(os.path.join(out_seq_dir, "rgb"), exist_ok=True)
	os.makedirs(os.path.join(out_seq_dir, "depth"), exist_ok=True)
	os.makedirs(os.path.join(out_seq_dir, "cam"), exist_ok=True)

	# Build list of processing tasks.
	tasks = []
	for seq in seqs:
	seq_dir = os.path.join(root_dir, seq)
	img_dir = os.path.join(seq_dir, "images")
	basenames = sorted([d[:-4] for d in os.listdir(img_dir) if d.endswith(".png")])
	for basename in basenames:
	tasks.append((seq, basename, root_dir, out_dir))

	num_workers = args.num_workers
	print(f"Processing {len(tasks)} tasks using {num_workers} workers...")

	with ProcessPoolExecutor(max_workers=num_workers) as executor:
	futures = {executor.submit(process_basename, *task): task[1] for task in tasks}
	for future in tqdm(
	as_completed(futures), total=len(futures), desc="Processing"
	):
	error = future.result()
	if error:
	print(error)


	if __name__ == "__main__":
	main()