Spaces:
Running
on
L4
Running
on
L4
#!/usr/bin/env python3 | |
""" | |
Preprocess processed_scannetpp scenes to update scene metadata. | |
This script reads each scene's "scene_metadata.npz", sorts images by timestamp, | |
updates trajectories, intrinsics, and pair indices, and builds two collections: | |
- image_collection: For each image, stores pairs (other image index, score) | |
- video_collection: For each image, groups subsequent images whose timestamps | |
differ by at most a given max_interval (and share the same | |
first character in the image name). | |
The new metadata is saved as "new_scene_metadata.npz" in each scene folder. | |
Usage: | |
python generate_set_scannetpp.py --root /path/to/processed_scannetpp \ | |
--max_interval 150 --num_workers 8 | |
""" | |
import os | |
import os.path as osp | |
import argparse | |
import numpy as np | |
from concurrent.futures import ThreadPoolExecutor, as_completed | |
from tqdm import tqdm | |
def get_timestamp(img_name): | |
""" | |
Convert an image name to a timestamp (integer). | |
If the image name starts with 'DSC', the timestamp is the integer part after 'DSC'. | |
Otherwise, it is assumed the image name has an underscore, and the second element is used. | |
Args: | |
img_name (str): The image basename (without extension). | |
Returns: | |
int: The extracted timestamp. | |
""" | |
if img_name.startswith("DSC"): | |
return int(img_name[3:]) | |
else: | |
return int(img_name.split("_")[1]) | |
def process_scene(root, scene, max_interval): | |
""" | |
Process a single scene: sort images, update trajectories/intrinsics/pairs, and | |
form image and video collections. Save the updated metadata. | |
Args: | |
root (str): Root directory containing scene folders. | |
scene (str): Scene folder name. | |
max_interval (int): Maximum allowed difference (in timestamp units) for video grouping. | |
""" | |
scene_dir = osp.join(root, scene) | |
metadata_path = osp.join(scene_dir, "scene_metadata.npz") | |
with np.load(metadata_path, allow_pickle=True) as data: | |
images = data["images"] | |
trajectories = data["trajectories"] | |
intrinsics = data["intrinsics"] | |
pairs = data["pairs"] | |
# Sort images by timestamp. | |
imgs_with_indices = sorted(enumerate(images), key=lambda x: x[1]) | |
indices, images = zip(*imgs_with_indices) | |
indices = np.array(indices) | |
index2sorted = {index: i for i, index in enumerate(indices)} | |
# Update trajectories and intrinsics arrays according to the new order. | |
trajectories = trajectories[indices] | |
intrinsics = intrinsics[indices] | |
# Update pairs (each pair is (id1, id2, score)) with new indices. | |
pairs = [(index2sorted[id1], index2sorted[id2], score) for id1, id2, score in pairs] | |
# Build image_collection: for each pair, verify that both image files exist. | |
image_collection = {} | |
for id1, id2, score in pairs: | |
img1 = images[id1] | |
img2 = images[id2] | |
img1_path = osp.join(scene_dir, "images", img1 + ".jpg") | |
img2_path = osp.join(scene_dir, "images", img2 + ".jpg") | |
if not (osp.exists(img1_path) and osp.exists(img2_path)): | |
continue | |
if id1 not in image_collection: | |
image_collection[id1] = [] | |
image_collection[id1].append((id2, score)) | |
# Build video_collection: for each image, group subsequent images if: | |
# 1. Their timestamp difference is at most max_interval. | |
# 2. Their name's first character is the same as the current image. | |
video_collection = {} | |
for i, image in enumerate(images): | |
img_path = osp.join(scene_dir, "images", image + ".jpg") | |
if not osp.exists(img_path): | |
continue | |
video_collection[i] = [] | |
for j in range(i + 1, len(images)): | |
next_img_path = osp.join(scene_dir, "images", images[j] + ".jpg") | |
if not osp.exists(next_img_path): | |
continue | |
if ( | |
get_timestamp(images[j]) - get_timestamp(image) > max_interval | |
or images[j][0] != image[0] | |
): | |
break | |
video_collection[i].append(j) | |
# Save the updated metadata to a new file. | |
out_path = osp.join(scene_dir, "new_scene_metadata.npz") | |
np.savez( | |
out_path, | |
images=images, | |
trajectories=trajectories, | |
intrinsics=intrinsics, | |
pairs=pairs, | |
image_collection=image_collection, | |
video_collection=video_collection, | |
) | |
print(f"Processed scene: {scene}") | |
def main(args): | |
root = args.root | |
max_interval = args.max_interval | |
num_workers = args.num_workers | |
# Load the list of scenes from the 'all_metadata.npz' file. | |
all_metadata_path = osp.join(root, "all_metadata.npz") | |
with np.load(all_metadata_path, allow_pickle=True) as data: | |
scenes = data["scenes"] | |
# Process scenes in parallel. | |
futures = [] | |
with ThreadPoolExecutor(max_workers=num_workers) as executor: | |
for scene in scenes: | |
futures.append(executor.submit(process_scene, root, scene, max_interval)) | |
for future in tqdm( | |
as_completed(futures), total=len(futures), desc="Processing scenes" | |
): | |
# This will raise any exceptions from process_scene. | |
future.result() | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser( | |
description="Preprocess processed_scannetpp scenes to update scene metadata." | |
) | |
parser.add_argument( | |
"--root", | |
type=str, | |
required=True, | |
help="Root directory containing processed_scannetpp scene folders.", | |
) | |
parser.add_argument( | |
"--max_interval", | |
type=int, | |
default=150, | |
help="Maximum timestamp interval for grouping images (default: 150).", | |
) | |
parser.add_argument( | |
"--num_workers", | |
type=int, | |
default=8, | |
help="Number of worker threads for parallel processing (default: 8).", | |
) | |
args = parser.parse_args() | |
main(args) | |