Spaces:

geopavlakos
/

HaMeR

Build error

App Files Files Community

HaMeR / vendor /ViTPose /tools /dataset /preprocess_h36m.py

geopavlakos

Initial commit

d7a991a almost 2 years ago

raw

history blame contribute delete

15.6 kB

	# -----------------------------------------------------------------------------
	# Adapted from https://github.com/anibali/h36m-fetch
	# Original license: Copyright (c) Aiden Nibali, under the Apache License.
	# -----------------------------------------------------------------------------

	import argparse
	import os
	import pickle
	import tarfile
	import xml.etree.ElementTree as ET
	from os.path import join

	import cv2
	import numpy as np
	from spacepy import pycdf


	class PreprocessH36m:
	"""Preprocess Human3.6M dataset.

	Args:
	metadata (str): Path to metadata.xml.
	original_dir (str): Directory of the original dataset with all files
	compressed. Specifically, .tgz files belonging to subject 1
	should be placed under the subdirectory 's1'.
	extracted_dir (str): Directory of the extracted files. If not given, it
	will be placed under the same parent directory as original_dir.
	processed_der (str): Directory of the processed files. If not given, it
	will be placed under the same parent directory as original_dir.
	sample_rate (int): Downsample FPS to `1 / sample_rate`. Default: 5.
	"""

	def __init__(self,
	metadata,
	original_dir,
	extracted_dir=None,
	processed_dir=None,
	sample_rate=5):
	self.metadata = metadata
	self.original_dir = original_dir
	self.sample_rate = sample_rate

	if extracted_dir is None:
	self.extracted_dir = join(
	os.path.dirname(os.path.abspath(self.original_dir)),
	'extracted')
	else:
	self.extracted_dir = extracted_dir

	if processed_dir is None:
	self.processed_dir = join(
	os.path.dirname(os.path.abspath(self.original_dir)),
	'processed')
	else:
	self.processed_dir = processed_dir

	self.subjects = []
	self.sequence_mappings = {}
	self.action_names = {}
	self.camera_ids = []
	self._load_metadata()

	self.subjects_annot = ['S1', 'S5', 'S6', 'S7', 'S8', 'S9', 'S11']
	self.subjects_splits = {
	'train': ['S1', 'S5', 'S6', 'S7', 'S8'],
	'test': ['S9', 'S11']
	}
	self.extract_files = ['Videos', 'D2_Positions', 'D3_Positions_mono']
	self.movable_joints = [
	0, 1, 2, 3, 6, 7, 8, 12, 13, 14, 15, 17, 18, 19, 25, 26, 27
	]
	self.scale_factor = 1.2
	self.image_sizes = {
	'54138969': {
	'width': 1000,
	'height': 1002
	},
	'55011271': {
	'width': 1000,
	'height': 1000
	},
	'58860488': {
	'width': 1000,
	'height': 1000
	},
	'60457274': {
	'width': 1000,
	'height': 1002
	}
	}

	def extract_tgz(self):
	"""Extract files from self.extrct_files."""
	os.makedirs(self.extracted_dir, exist_ok=True)
	for subject in self.subjects_annot:
	cur_dir = join(self.original_dir, subject.lower())
	for file in self.extract_files:
	filename = join(cur_dir, file + '.tgz')
	print(f'Extracting {filename} ...')
	with tarfile.open(filename) as tar:
	tar.extractall(self.extracted_dir)
	print('Extraction done.\n')

	def generate_cameras_file(self):
	"""Generate cameras.pkl which contains camera parameters for 11
	subjects each with 4 cameras."""
	cameras = {}
	for subject in range(1, 12):
	for camera in range(4):
	key = (f'S{subject}', self.camera_ids[camera])
	cameras[key] = self._get_camera_params(camera, subject)

	out_file = join(self.processed_dir, 'annotation_body3d', 'cameras.pkl')
	with open(out_file, 'wb') as fout:
	pickle.dump(cameras, fout)
	print(f'Camera parameters have been written to "{out_file}".\n')

	def generate_annotations(self):
	"""Generate annotations for training and testing data."""
	output_dir = join(self.processed_dir, 'annotation_body3d',
	f'fps{50 // self.sample_rate}')
	os.makedirs(output_dir, exist_ok=True)

	for data_split in ('train', 'test'):
	imgnames_all = []
	centers_all = []
	scales_all = []
	kps2d_all = []
	kps3d_all = []
	for subject in self.subjects_splits[data_split]:
	for action, subaction in self.sequence_mappings[subject].keys(
	):
	if action == '1':
	# exclude action "_ALL"
	continue
	for camera in self.camera_ids:
	imgnames, centers, scales, kps2d, kps3d\
	= self._load_annotations(
	subject, action, subaction, camera)
	imgnames_all.append(imgnames)
	centers_all.append(centers)
	scales_all.append(scales)
	kps2d_all.append(kps2d)
	kps3d_all.append(kps3d)

	imgnames_all = np.concatenate(imgnames_all)
	centers_all = np.concatenate(centers_all)
	scales_all = np.concatenate(scales_all)
	kps2d_all = np.concatenate(kps2d_all)
	kps3d_all = np.concatenate(kps3d_all)

	out_file = join(output_dir, f'h36m_{data_split}.npz')
	np.savez(
	out_file,
	imgname=imgnames_all,
	center=centers_all,
	scale=scales_all,
	part=kps2d_all,
	S=kps3d_all)

	print(
	f'All annotations of {data_split}ing data have been written to'
	f' "{out_file}". {len(imgnames_all)} samples in total.\n')

	if data_split == 'train':
	kps_3d_all = kps3d_all[..., :3] # remove visibility
	mean_3d, std_3d = self._get_pose_stats(kps_3d_all)

	kps_2d_all = kps2d_all[..., :2] # remove visibility
	mean_2d, std_2d = self._get_pose_stats(kps_2d_all)

	# centered around root
	# the root keypoint is 0-index
	kps_3d_rel = kps_3d_all[..., 1:, :] - kps_3d_all[..., :1, :]
	mean_3d_rel, std_3d_rel = self._get_pose_stats(kps_3d_rel)

	kps_2d_rel = kps_2d_all[..., 1:, :] - kps_2d_all[..., :1, :]
	mean_2d_rel, std_2d_rel = self._get_pose_stats(kps_2d_rel)

	stats = {
	'joint3d_stats': {
	'mean': mean_3d,
	'std': std_3d
	},
	'joint2d_stats': {
	'mean': mean_2d,
	'std': std_2d
	},
	'joint3d_rel_stats': {
	'mean': mean_3d_rel,
	'std': std_3d_rel
	},
	'joint2d_rel_stats': {
	'mean': mean_2d_rel,
	'std': std_2d_rel
	}
	}
	for name, stat_dict in stats.items():
	out_file = join(output_dir, f'{name}.pkl')
	with open(out_file, 'wb') as f:
	pickle.dump(stat_dict, f)
	print(f'Create statistic data file: {out_file}')

	@staticmethod
	def _get_pose_stats(kps):
	"""Get statistic information `mean` and `std` of pose data.

	Args:
	kps (ndarray): keypoints in shape [..., K, C] where K and C is
	the keypoint category number and dimension.
	Returns:
	mean (ndarray): [K, C]
	"""
	assert kps.ndim > 2
	K, C = kps.shape[-2:]
	kps = kps.reshape(-1, K, C)
	mean = kps.mean(axis=0)
	std = kps.std(axis=0)
	return mean, std

	def _load_metadata(self):
	"""Load meta data from metadata.xml."""

	assert os.path.exists(self.metadata)

	tree = ET.parse(self.metadata)
	root = tree.getroot()

	for i, tr in enumerate(root.find('mapping')):
	if i == 0:
	_, _, *self.subjects = [td.text for td in tr]
	self.sequence_mappings \
	= {subject: {} for subject in self.subjects}
	elif i < 33:
	action_id, subaction_id, *prefixes = [td.text for td in tr]
	for subject, prefix in zip(self.subjects, prefixes):
	self.sequence_mappings[subject][(action_id, subaction_id)]\
	= prefix

	for i, elem in enumerate(root.find('actionnames')):
	action_id = str(i + 1)
	self.action_names[action_id] = elem.text

	self.camera_ids \
	= [elem.text for elem in root.find('dbcameras/index2id')]

	w0 = root.find('w0')
	self.cameras_raw = [float(num) for num in w0.text[1:-1].split()]

	def _get_base_filename(self, subject, action, subaction, camera):
	"""Get base filename given subject, action, subaction and camera."""
	return f'{self.sequence_mappings[subject][(action, subaction)]}' + \
	f'.{camera}'

	def _get_camera_params(self, camera, subject):
	"""Get camera parameters given camera id and subject id."""
	metadata_slice = np.zeros(15)
	start = 6 * (camera * 11 + (subject - 1))

	metadata_slice[:6] = self.cameras_raw[start:start + 6]
	metadata_slice[6:] = self.cameras_raw[265 + camera * 9 - 1:265 +
	(camera + 1) * 9 - 1]

	# extrinsics
	x, y, z = -metadata_slice[0], metadata_slice[1], -metadata_slice[2]

	R_x = np.array([[1, 0, 0], [0, np.cos(x), np.sin(x)],
	[0, -np.sin(x), np.cos(x)]])
	R_y = np.array([[np.cos(y), 0, np.sin(y)], [0, 1, 0],
	[-np.sin(y), 0, np.cos(y)]])
	R_z = np.array([[np.cos(z), np.sin(z), 0], [-np.sin(z),
	np.cos(z), 0], [0, 0, 1]])
	R = (R_x @ R_y @ R_z).T
	T = metadata_slice[3:6].reshape(-1, 1)
	# convert unit from millimeter to meter
	T *= 0.001

	# intrinsics
	c = metadata_slice[8:10, None]
	f = metadata_slice[6:8, None]

	# distortion
	k = metadata_slice[10:13, None]
	p = metadata_slice[13:15, None]

	return {
	'R': R,
	'T': T,
	'c': c,
	'f': f,
	'k': k,
	'p': p,
	'w': self.image_sizes[self.camera_ids[camera]]['width'],
	'h': self.image_sizes[self.camera_ids[camera]]['height'],
	'name': f'camera{camera + 1}',
	'id': self.camera_ids[camera]
	}

	def _load_annotations(self, subject, action, subaction, camera):
	"""Load annotations for a sequence."""
	subj_dir = join(self.extracted_dir, subject)
	basename = self._get_base_filename(subject, action, subaction, camera)

	# load 2D keypoints
	with pycdf.CDF(
	join(subj_dir, 'MyPoseFeatures', 'D2_Positions',
	basename + '.cdf')) as cdf:
	kps_2d = np.array(cdf['Pose'])

	num_frames = kps_2d.shape[1]
	kps_2d = kps_2d.reshape((num_frames, 32, 2))[::self.sample_rate,
	self.movable_joints]
	kps_2d = np.concatenate([kps_2d, np.ones((len(kps_2d), 17, 1))],
	axis=2)

	# load 3D keypoints
	with pycdf.CDF(
	join(subj_dir, 'MyPoseFeatures', 'D3_Positions_mono',
	basename + '.cdf')) as cdf:
	kps_3d = np.array(cdf['Pose'])

	kps_3d = kps_3d.reshape(
	(num_frames, 32, 3))[::self.sample_rate,
	self.movable_joints] / 1000.
	kps_3d = np.concatenate([kps_3d, np.ones((len(kps_3d), 17, 1))],
	axis=2)

	# calculate bounding boxes
	bboxes = np.stack([
	np.min(kps_2d[:, :, 0], axis=1),
	np.min(kps_2d[:, :, 1], axis=1),
	np.max(kps_2d[:, :, 0], axis=1),
	np.max(kps_2d[:, :, 1], axis=1)
	],
	axis=1)
	centers = np.stack([(bboxes[:, 0] + bboxes[:, 2]) / 2,
	(bboxes[:, 1] + bboxes[:, 3]) / 2],
	axis=1)
	scales = self.scale_factor * np.max(
	bboxes[:, 2:] - bboxes[:, :2], axis=1) / 200

	# extract frames and save imgnames
	imgnames = []
	video_path = join(subj_dir, 'Videos', basename + '.mp4')
	sub_base = subject + '_' + basename.replace(' ', '_')
	img_dir = join(self.processed_dir, 'images', subject, sub_base)
	os.makedirs(img_dir, exist_ok=True)
	prefix = join(subject, sub_base, sub_base)

	cap = cv2.VideoCapture(video_path)
	i = 0
	while True:
	success, img = cap.read()
	if not success:
	break
	if i % self.sample_rate == 0:
	imgname = f'{prefix}_{i + 1:06d}.jpg'
	imgnames.append(imgname)
	dest_path = join(self.processed_dir, 'images', imgname)
	if not os.path.exists(dest_path):
	cv2.imwrite(dest_path, img)
	if len(imgnames) == len(centers):
	break
	i += 1
	cap.release()
	imgnames = np.array(imgnames)

	print(f'Annoatations for sequence "{subject} {basename}" are loaded. '
	f'{len(imgnames)} samples in total.')

	return imgnames, centers, scales, kps_2d, kps_3d


	def parse_args():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	'--metadata', type=str, required=True, help='Path to metadata.xml')
	parser.add_argument(
	'--original',
	type=str,
	required=True,
	help='Directory of the original dataset with all files compressed. '
	'Specifically, .tgz files belonging to subject 1 should be placed '
	'under the subdirectory \"s1\".')
	parser.add_argument(
	'--extracted',
	type=str,
	default=None,
	help='Directory of the extracted files. If not given, it will be '
	'placed under the same parent directory as original_dir.')
	parser.add_argument(
	'--processed',
	type=str,
	default=None,
	help='Directory of the processed files. If not given, it will be '
	'placed under the same parent directory as original_dir.')
	parser.add_argument(
	'--sample_rate',
	type=int,
	default=5,
	help='Downsample FPS to `1 / sample_rate`. Default: 5.')
	args = parser.parse_args()
	return args


	if __name__ == '__main__':
	args = parse_args()

	h36m = PreprocessH36m(
	metadata=args.metadata,
	original_dir=args.original,
	extracted_dir=args.extracted,
	processed_dir=args.processed,
	sample_rate=args.sample_rate)
	h36m.extract_tgz()
	h36m.generate_cameras_file()
	h36m.generate_annotations()