Spaces:

liguang0115
/

vmem

Running on L4

App Files Files Community

vmem / extern /CUT3R /datasets_preprocess /preprocess_hoi4d.py

liguang0115

Add initial project structure with core files, configurations, and sample images

2df809d 14 days ago

raw

history blame contribute delete

6.4 kB

	#!/usr/bin/env python3
	"""
	HOI4D Preprocessing Script

	This script processes HOI4D data by:
	1. Searching specific subdirectories for RGB and depth images.
	2. Reading camera intrinsics from a .npy file (one per high-level scene).
	3. Rescaling the RGB images and depth maps to a fixed output resolution
	(e.g., 640x480) using the 'cropping' module.
	4. Saving results (RGB, .npy depth, .npz camera intrinsics) in a new directory structure.

	Usage:
	python preprocess_hoi4d.py \
	--root_dir /path/to/HOI4D_release \
	--cam_root /path/to/camera_params \
	--out_dir /path/to/processed_hoi4d
	"""

	import os
	import glob
	import cv2
	import numpy as np
	from PIL import Image
	from tqdm import tqdm
	from concurrent.futures import ProcessPoolExecutor
	import argparse

	import src.dust3r.datasets.utils.cropping as cropping

	def parse_arguments():
	"""
	Parse command-line arguments for HOI4D preprocessing.

	Returns:
	argparse.Namespace: The parsed arguments.
	"""
	parser = argparse.ArgumentParser(
	description="Preprocess HOI4D dataset by rescaling RGB and depth images."
	)
	parser.add_argument("--root_dir", required=True,
	help="Path to the HOI4D_release directory.")
	parser.add_argument("--cam_root", required=True,
	help="Path to the directory containing camera intrinsics.")
	parser.add_argument("--out_dir", required=True,
	help="Path to the directory where processed files will be saved.")
	parser.add_argument("--max_workers", type=int, default=None,
	help="Number of parallel workers. Default uses half of available CPU cores.")
	args = parser.parse_args()
	return args

	def process_image(args):
	"""
	Process a single image and depth map:
	- Loads the image (using PIL) and depth (using OpenCV).
	- Converts depth from mm to meters (divided by 1000).
	- Rescales both using 'cropping.rescale_image_depthmap'.
	- Saves the rescaled image (.png), depth (.npy), and camera intrinsics (.npz).

	Args:
	args (tuple): A tuple of:
	(img_path, depth_path, out_img_path, out_depth_path, out_cam_path, intrinsics)

	Returns:
	None. Errors are printed to the console but do not stop the workflow.
	"""
	img_path, depth_path, out_img_path, out_depth_path, out_cam_path, intrinsics = args

	try:
	# Load image
	img = Image.open(img_path)

	# Load depth (in mm) and convert to meters
	depth = cv2.imread(depth_path, cv2.IMREAD_ANYDEPTH)
	if depth is None:
	raise ValueError(f"Could not read depth image: {depth_path}")
	depth = depth.astype(np.float32) / 1000.0

	# Rescale image and depth map
	img_rescaled, depth_rescaled, intrinsics_rescaled = cropping.rescale_image_depthmap(
	img, depth, intrinsics.copy(), (640, 480)
	)

	# Save processed data
	img_rescaled.save(out_img_path) # PNG image
	np.save(out_depth_path, depth_rescaled) # Depth .npy
	np.savez(out_cam_path, intrinsics=intrinsics_rescaled)

	except Exception as e:
	print(f"Error processing {img_path}: {e}")

	def main():
	args = parse_arguments()

	root = args.root_dir
	cam_root = args.cam_root
	out_dir = args.out_dir
	if not os.path.exists(out_dir):
	os.makedirs(out_dir, exist_ok=True)

	# Collect a list of subdirectories using a glob pattern
	# e.g.: root/ZY2021/H/C/N/S/s/T*
	scene_dirs = glob.glob(os.path.join(root, "ZY2021", "H", "C", "N", "S", "s", "T*"))

	# Build tasks
	tasks = []
	for scene_dir in tqdm(scene_dirs, desc="Collecting scenes"):
	# Build an output sub-directory name
	# Example: "ZY202101/H1/C1/N1/S1/s1/T1" -> "ZY202101_H1_C1_N1_S1_s1_T1"
	scene_relpath = os.path.relpath(scene_dir, root)
	scene_name = "_".join(scene_relpath.split(os.sep))

	# Load camera intrinsics from a .npy file in cam_root
	# e.g., first token of scene_relpath might point to the relevant .npy
	# "ZY202101" -> "cam_root/ZY202101/intrin.npy" (adjust logic as needed)
	top_level = scene_relpath.split(os.sep)[0]
	cam_file = os.path.join(cam_root, top_level, "intrin.npy")
	if not os.path.isfile(cam_file):
	print(f"Warning: Camera file not found: {cam_file}. Skipping {scene_dir}")
	continue
	intrinsics = np.load(cam_file)

	# Directories for this sequence
	rgb_dir = os.path.join(scene_dir, "align_rgb")
	depth_dir = os.path.join(scene_dir, "align_depth")

	# Output directories
	out_rgb_dir = os.path.join(out_dir, scene_name, "rgb")
	out_depth_dir = os.path.join(out_dir, scene_name, "depth")
	out_cam_dir = os.path.join(out_dir, scene_name, "cam")
	os.makedirs(out_rgb_dir, exist_ok=True)
	os.makedirs(out_depth_dir, exist_ok=True)
	os.makedirs(out_cam_dir, exist_ok=True)

	# Find all image paths
	img_paths = sorted(glob.glob(os.path.join(rgb_dir, "*.jpg")))

	# Build tasks for each image
	for img_path in img_paths:
	basename = os.path.splitext(os.path.basename(img_path))[0]
	depth_path = os.path.join(depth_dir, f"{basename}.png")

	out_img_path = os.path.join(out_rgb_dir, f"{basename}.png")
	out_depth_path = os.path.join(out_depth_dir, f"{basename}.npy")
	out_cam_path = os.path.join(out_cam_dir, f"{basename}.npz")

	# Skip if already processed
	if (os.path.exists(out_img_path) and os.path.exists(out_depth_path) and
	os.path.exists(out_cam_path)):
	continue

	task = (
	img_path,
	depth_path,
	out_img_path,
	out_depth_path,
	out_cam_path,
	intrinsics
	)
	tasks.append(task)

	# Process tasks in parallel
	max_workers = args.max_workers
	if max_workers is None:
	max_workers = max(1, os.cpu_count() // 2)

	with ProcessPoolExecutor(max_workers=max_workers) as executor:
	list(tqdm(
	executor.map(process_image, tasks),
	total=len(tasks),
	desc="Processing images"
	))


	if __name__ == "__main__":
	main()