#!/usr/bin/env python3 """ Preprocess scenes by sorting images and generating image/video collections. This script processes scenes in parallel using a thread pool, updating metadata with sorted images, trajectories, intrinsics, and generating pair, image collection, and video collection data. The processed metadata is saved to a new file in each scene directory. Usage: python generate_set_arkitscenes.py --root /path/to/data --splits Training Test --max_interval 5.0 --num_workers 8 """ import os import os.path as osp import argparse import numpy as np from concurrent.futures import ThreadPoolExecutor, as_completed from tqdm import tqdm def get_timestamp(img_name): """ Extract the timestamp from an image filename. Assumes the timestamp is the last underscore-separated token in the name (before the file extension). Args: img_name (str): The image filename. Returns: float: The extracted timestamp. """ return float(img_name[:-4].split("_")[-1]) def process_scene(root, split, scene, max_interval): """ Process a single scene by sorting its images by timestamp, updating trajectories, intrinsics, and pairings, and generating image/video collections. Args: root (str): Root directory of the dataset. split (str): The dataset split (e.g., 'Training', 'Test'). scene (str): The scene identifier. max_interval (float): Maximum allowed time interval (in seconds) between images to consider them in the same video collection. """ scene_dir = osp.join(root, split, scene) metadata_path = osp.join(scene_dir, "scene_metadata.npz") # Load the scene metadata with np.load(metadata_path) as data: images = data["images"] trajectories = data["trajectories"] intrinsics = data["intrinsics"] pairs = data["pairs"] # Sort images by timestep imgs_with_indices = sorted(enumerate(images), key=lambda x: x[1]) indices, images = zip(*imgs_with_indices) indices = np.array(indices) index2sorted = {index: i for i, index in enumerate(indices)} # Reorder trajectories and intrinsics based on the new image order trajectories = trajectories[indices] intrinsics = intrinsics[indices] # Update pair indices (each pair is (id1, id2, score)) pairs = [(index2sorted[id1], index2sorted[id2], score) for id1, id2, score in pairs] # Form image_collection: mapping from an image id to a list of (other image id, score) image_collection = {} for id1, id2, score in pairs: image_collection.setdefault(id1, []).append((id2, score)) # Form video_collection: for each image, collect subsequent images within the max_interval time window video_collection = {} for i, image in enumerate(images): j = i + 1 for j in range(i + 1, len(images)): if get_timestamp(images[j]) - get_timestamp(image) > max_interval: break video_collection[i] = list(range(i + 1, j)) # Save the new metadata output_path = osp.join(scene_dir, "new_scene_metadata.npz") np.savez( output_path, images=images, trajectories=trajectories, intrinsics=intrinsics, pairs=pairs, image_collection=image_collection, video_collection=video_collection, ) print(f"Processed scene: {scene}") def main(args): """ Main function to process scenes across specified dataset splits in parallel. """ root = args.root splits = args.splits max_interval = args.max_interval num_workers = args.num_workers futures = [] # Create a ThreadPoolExecutor for parallel processing with ThreadPoolExecutor(max_workers=num_workers) as executor: for split in splits: all_meta_path = osp.join(root, split, "all_metadata.npz") with np.load(all_meta_path) as data: scenes = data["scenes"] # Submit processing tasks for each scene in the current split for scene in scenes: futures.append( executor.submit(process_scene, root, split, scene, max_interval) ) # Use tqdm to display a progress bar as futures complete for future in tqdm( as_completed(futures), total=len(futures), desc="Processing scenes" ): # This will raise any exceptions caught during scene processing. future.result() if __name__ == "__main__": parser = argparse.ArgumentParser( description="Preprocess scene data to update metadata with sorted images and collections." ) parser.add_argument( "--root", type=str, default="", help="Root directory containing the dataset splits.", ) parser.add_argument( "--splits", type=str, nargs="+", default=["Training", "Test"], help="List of dataset splits to process (e.g., Training Test).", ) parser.add_argument( "--max_interval", type=float, default=5.0, help="Maximum time interval (in seconds) between images to consider them in the same video sequence.", ) parser.add_argument( "--num_workers", type=int, default=8, help="Number of worker threads for parallel processing.", ) args = parser.parse_args() main(args)