vmem / extern /CUT3R /datasets_preprocess /preprocess_tartanair.py
liguang0115's picture
Add initial project structure with core files, configurations, and sample images
2df809d
import argparse
import random
import gzip
import json
import os
import os.path as osp
import torch
import PIL.Image
from PIL import Image
import numpy as np
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt
import shutil
import src.dust3r.datasets.utils.cropping as cropping # noqa
from scipy.spatial.transform import Rotation as R
def get_parser():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--tartanair_dir",
default="data/tartanair",
)
parser.add_argument(
"--output_dir",
default="data/mast3r_data/processed_tartanair",
)
return parser
def main(rootdir, outdir):
os.makedirs(outdir, exist_ok=True)
envs = [
f for f in sorted(os.listdir(rootdir)) if os.path.isdir(osp.join(rootdir, f))
]
for env in tqdm(envs):
for difficulty in ["Easy", "Hard"]:
subscenes = [
f
for f in os.listdir(osp.join(rootdir, env, difficulty))
if os.path.isdir(osp.join(rootdir, env, difficulty, f))
]
for subscene in tqdm(subscenes):
frame_dir = osp.join(rootdir, env, difficulty, subscene)
rgb_dir = osp.join(frame_dir, "image_left")
depth_dir = osp.join(frame_dir, "depth_left")
flow_dir = osp.join(frame_dir, "flow")
intrinsics = np.array(
[[320.0, 0.0, 320.0], [0.0, 320.0, 240.0], [0.0, 0.0, 1.0]]
).astype(np.float32)
poses = np.loadtxt(osp.join(frame_dir, "pose_left.txt"))
frame_num = len(poses)
os.makedirs(osp.join(outdir, env, difficulty, subscene), exist_ok=True)
assert (
len(os.listdir(rgb_dir))
== len(os.listdir(depth_dir))
== len(os.listdir(flow_dir)) // 2 + 1
== frame_num
)
for i in tqdm(range(frame_num)):
rgb_path = osp.join(rgb_dir, f"{i:06d}_left.png")
out_rgb_path = osp.join(
outdir, env, difficulty, subscene, f"{i:06d}_rgb.png"
)
depth_path = osp.join(depth_dir, f"{i:06d}_left_depth.npy")
out_depth_path = osp.join(
outdir, env, difficulty, subscene, f"{i:06d}_depth.npy"
)
if i < frame_num - 1:
fflow_path = osp.join(flow_dir, f"{i:06d}_{i+1:06d}_flow.npy")
mask_path = osp.join(flow_dir, f"{i:06d}_{i+1:06d}_mask.npy")
else:
fflow_path = None
mask_path = None
out_fflow_path = (
osp.join(outdir, env, difficulty, subscene, f"{i:06d}_flow.npy")
if fflow_path is not None
else None
)
out_mask_path = (
osp.join(outdir, env, difficulty, subscene, f"{i:06d}_mask.npy")
if mask_path is not None
else None
)
pose = poses[i]
x, y, z, qx, qy, qz, qw = pose
rotation = R.from_quat([qx, qy, qz, qw]).as_matrix()
c2w = np.eye(4)
c2w[:3, :3] = rotation
c2w[:3, 3] = [x, y, z]
w2c = np.linalg.inv(c2w)
w2c = w2c[[1, 2, 0, 3]]
c2w = np.linalg.inv(w2c)
K = intrinsics
# copy
shutil.copy(rgb_path, out_rgb_path)
shutil.copy(depth_path, out_depth_path)
if fflow_path is not None:
shutil.copy(fflow_path, out_fflow_path)
if mask_path is not None:
shutil.copy(mask_path, out_mask_path)
np.savez(
osp.join(outdir, env, difficulty, subscene, f"{i:06d}_cam.npz"),
camera_pose=c2w.astype(np.float32),
camera_intrinsics=K.astype(np.float32),
)
if __name__ == "__main__":
parser = get_parser()
args = parser.parse_args()
main(args.tartanair_dir, args.output_dir)