mmaction2
/
projects
/gesture_recognition
/configs
/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py
default_scope = 'mmpose' | |
default_hooks = dict( | |
timer=dict(type='IterTimerHook'), | |
logger=dict(type='LoggerHook', interval=50), | |
param_scheduler=dict(type='ParamSchedulerHook'), | |
checkpoint=dict( | |
type='CheckpointHook', | |
interval=10, | |
save_best='AUC', | |
rule='greater', | |
max_keep_ckpts=1), | |
sampler_seed=dict(type='DistSamplerSeedHook'), | |
visualization=dict(type='PoseVisualizationHook', enable=False)) | |
custom_hooks = [ | |
dict( | |
type='EMAHook', | |
ema_type='ExpMomentumEMA', | |
momentum=0.0002, | |
update_buffers=True, | |
priority=49), | |
dict( | |
type='mmdet.PipelineSwitchHook', | |
switch_epoch=180, | |
switch_pipeline=[ | |
dict(type='LoadImage', file_client_args=dict(backend='disk')), | |
dict(type='GetBBoxCenterScale'), | |
dict( | |
type='RandomBBoxTransform', | |
shift_factor=0.0, | |
scale_factor=[0.75, 1.25], | |
rotate_factor=180), | |
dict(type='RandomFlip', direction='horizontal'), | |
dict(type='TopdownAffine', input_size=(256, 256)), | |
dict(type='mmdet.YOLOXHSVRandomAug'), | |
dict( | |
type='Albumentation', | |
transforms=[ | |
dict(type='Blur', p=0.1), | |
dict(type='MedianBlur', p=0.1), | |
dict( | |
type='CoarseDropout', | |
max_holes=1, | |
max_height=0.4, | |
max_width=0.4, | |
min_holes=1, | |
min_height=0.2, | |
min_width=0.2, | |
p=0.5) | |
]), | |
dict( | |
type='GenerateTarget', | |
encoder=dict( | |
type='SimCCLabel', | |
input_size=(256, 256), | |
sigma=(5.66, 5.66), | |
simcc_split_ratio=2.0, | |
normalize=False, | |
use_dark=False)), | |
dict(type='PackPoseInputs') | |
]) | |
] | |
env_cfg = dict( | |
cudnn_benchmark=False, | |
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), | |
dist_cfg=dict(backend='nccl')) | |
vis_backends = [dict(type='LocalVisBackend')] | |
visualizer = dict( | |
type='PoseLocalVisualizer', | |
vis_backends=[dict(type='LocalVisBackend')], | |
name='visualizer') | |
log_processor = dict( | |
type='LogProcessor', window_size=50, by_epoch=True, num_digits=6) | |
log_level = 'INFO' | |
load_from = None | |
resume = False | |
file_client_args = dict(backend='disk') | |
train_cfg = dict(by_epoch=True, max_epochs=210, val_interval=10) | |
val_cfg = dict() | |
test_cfg = dict() | |
max_epochs = 210 | |
stage2_num_epochs = 30 | |
base_lr = 0.004 | |
randomness = dict(seed=21) | |
optim_wrapper = dict( | |
type='OptimWrapper', | |
optimizer=dict(type='AdamW', lr=0.004, weight_decay=0.05), | |
paramwise_cfg=dict( | |
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) | |
param_scheduler = [ | |
dict( | |
type='LinearLR', start_factor=1e-05, by_epoch=False, begin=0, | |
end=1000), | |
dict( | |
type='CosineAnnealingLR', | |
eta_min=0.0002, | |
begin=105, | |
end=210, | |
T_max=105, | |
by_epoch=True, | |
convert_to_iter_based=True) | |
] | |
auto_scale_lr = dict(base_batch_size=256) | |
codec = dict( | |
type='SimCCLabel', | |
input_size=(256, 256), | |
sigma=(5.66, 5.66), | |
simcc_split_ratio=2.0, | |
normalize=False, | |
use_dark=False) | |
model = dict( | |
type='TopdownPoseEstimator', | |
data_preprocessor=dict( | |
type='PoseDataPreprocessor', | |
mean=[123.675, 116.28, 103.53], | |
std=[58.395, 57.12, 57.375], | |
bgr_to_rgb=True), | |
backbone=dict( | |
_scope_='mmdet', | |
type='CSPNeXt', | |
arch='P5', | |
expand_ratio=0.5, | |
deepen_factor=0.67, | |
widen_factor=0.75, | |
out_indices=(4, ), | |
channel_attention=True, | |
norm_cfg=dict(type='SyncBN'), | |
act_cfg=dict(type='SiLU'), | |
init_cfg=dict( | |
type='Pretrained', | |
prefix='backbone.', | |
checkpoint=('https://download.openmmlab.com/mmpose/v1/projects/' | |
'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-' | |
'f2f7d6f6_20230130.pth'))), | |
head=dict( | |
type='RTMCCHead', | |
in_channels=768, | |
out_channels=21, | |
input_size=(256, 256), | |
in_featuremap_size=(8, 8), | |
simcc_split_ratio=2.0, | |
final_layer_kernel_size=7, | |
gau_cfg=dict( | |
hidden_dims=256, | |
s=128, | |
expansion_factor=2, | |
dropout_rate=0.0, | |
drop_path=0.0, | |
act_fn='SiLU', | |
use_rel_bias=False, | |
pos_enc=False), | |
loss=dict( | |
type='KLDiscretLoss', | |
use_target_weight=True, | |
beta=10.0, | |
label_softmax=True), | |
decoder=dict( | |
type='SimCCLabel', | |
input_size=(256, 256), | |
sigma=(5.66, 5.66), | |
simcc_split_ratio=2.0, | |
normalize=False, | |
use_dark=False)), | |
test_cfg=dict(flip_test=True)) | |
dataset_type = 'CocoWholeBodyHandDataset' | |
data_mode = 'topdown' | |
data_root = 'data/coco/' | |
train_pipeline = [ | |
dict(type='LoadImage', file_client_args=dict(backend='disk')), | |
dict(type='GetBBoxCenterScale'), | |
dict( | |
type='RandomBBoxTransform', scale_factor=[0.5, 1.5], | |
rotate_factor=180), | |
dict(type='RandomFlip', direction='horizontal'), | |
dict(type='TopdownAffine', input_size=(256, 256)), | |
dict(type='mmdet.YOLOXHSVRandomAug'), | |
dict( | |
type='Albumentation', | |
transforms=[ | |
dict(type='Blur', p=0.1), | |
dict(type='MedianBlur', p=0.1), | |
dict( | |
type='CoarseDropout', | |
max_holes=1, | |
max_height=0.4, | |
max_width=0.4, | |
min_holes=1, | |
min_height=0.2, | |
min_width=0.2, | |
p=1.0) | |
]), | |
dict( | |
type='GenerateTarget', | |
encoder=dict( | |
type='SimCCLabel', | |
input_size=(256, 256), | |
sigma=(5.66, 5.66), | |
simcc_split_ratio=2.0, | |
normalize=False, | |
use_dark=False)), | |
dict(type='PackPoseInputs') | |
] | |
val_pipeline = [ | |
dict(type='LoadImage', file_client_args=dict(backend='disk')), | |
dict(type='GetBBoxCenterScale'), | |
dict(type='TopdownAffine', input_size=(256, 256)), | |
dict(type='PackPoseInputs') | |
] | |
train_pipeline_stage2 = [ | |
dict(type='LoadImage', file_client_args=dict(backend='disk')), | |
dict(type='GetBBoxCenterScale'), | |
dict( | |
type='RandomBBoxTransform', | |
shift_factor=0.0, | |
scale_factor=[0.75, 1.25], | |
rotate_factor=180), | |
dict(type='RandomFlip', direction='horizontal'), | |
dict(type='TopdownAffine', input_size=(256, 256)), | |
dict(type='mmdet.YOLOXHSVRandomAug'), | |
dict( | |
type='Albumentation', | |
transforms=[ | |
dict(type='Blur', p=0.1), | |
dict(type='MedianBlur', p=0.1), | |
dict( | |
type='CoarseDropout', | |
max_holes=1, | |
max_height=0.4, | |
max_width=0.4, | |
min_holes=1, | |
min_height=0.2, | |
min_width=0.2, | |
p=0.5) | |
]), | |
dict( | |
type='GenerateTarget', | |
encoder=dict( | |
type='SimCCLabel', | |
input_size=(256, 256), | |
sigma=(5.66, 5.66), | |
simcc_split_ratio=2.0, | |
normalize=False, | |
use_dark=False)), | |
dict(type='PackPoseInputs') | |
] | |
train_dataloader = dict( | |
batch_size=32, | |
num_workers=10, | |
persistent_workers=True, | |
sampler=dict(type='DefaultSampler', shuffle=True), | |
dataset=dict( | |
type='CocoWholeBodyHandDataset', | |
data_root='data/coco/', | |
data_mode='topdown', | |
ann_file='annotations/coco_wholebody_train_v1.0.json', | |
data_prefix=dict(img='train2017/'), | |
pipeline=[ | |
dict(type='LoadImage', file_client_args=dict(backend='disk')), | |
dict(type='GetBBoxCenterScale'), | |
dict( | |
type='RandomBBoxTransform', | |
scale_factor=[0.5, 1.5], | |
rotate_factor=180), | |
dict(type='RandomFlip', direction='horizontal'), | |
dict(type='TopdownAffine', input_size=(256, 256)), | |
dict(type='mmdet.YOLOXHSVRandomAug'), | |
dict( | |
type='Albumentation', | |
transforms=[ | |
dict(type='Blur', p=0.1), | |
dict(type='MedianBlur', p=0.1), | |
dict( | |
type='CoarseDropout', | |
max_holes=1, | |
max_height=0.4, | |
max_width=0.4, | |
min_holes=1, | |
min_height=0.2, | |
min_width=0.2, | |
p=1.0) | |
]), | |
dict( | |
type='GenerateTarget', | |
encoder=dict( | |
type='SimCCLabel', | |
input_size=(256, 256), | |
sigma=(5.66, 5.66), | |
simcc_split_ratio=2.0, | |
normalize=False, | |
use_dark=False)), | |
dict(type='PackPoseInputs') | |
])) | |
val_dataloader = dict( | |
batch_size=32, | |
num_workers=10, | |
persistent_workers=True, | |
drop_last=False, | |
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), | |
dataset=dict( | |
type='CocoWholeBodyHandDataset', | |
data_root='data/coco/', | |
data_mode='topdown', | |
ann_file='annotations/coco_wholebody_val_v1.0.json', | |
data_prefix=dict(img='val2017/'), | |
test_mode=True, | |
pipeline=[ | |
dict(type='LoadImage', file_client_args=dict(backend='disk')), | |
dict(type='GetBBoxCenterScale'), | |
dict(type='TopdownAffine', input_size=(256, 256)), | |
dict(type='PackPoseInputs') | |
])) | |
test_dataloader = dict( | |
batch_size=32, | |
num_workers=10, | |
persistent_workers=True, | |
drop_last=False, | |
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), | |
dataset=dict( | |
type='CocoWholeBodyHandDataset', | |
data_root='data/coco/', | |
data_mode='topdown', | |
ann_file='annotations/coco_wholebody_val_v1.0.json', | |
data_prefix=dict(img='val2017/'), | |
test_mode=True, | |
pipeline=[ | |
dict(type='LoadImage', file_client_args=dict(backend='disk')), | |
dict(type='GetBBoxCenterScale'), | |
dict(type='TopdownAffine', input_size=(256, 256)), | |
dict(type='PackPoseInputs') | |
])) | |
val_evaluator = [ | |
dict(type='PCKAccuracy', thr=0.2), | |
dict(type='AUC'), | |
dict(type='EPE') | |
] | |
test_evaluator = [ | |
dict(type='PCKAccuracy', thr=0.2), | |
dict(type='AUC'), | |
dict(type='EPE') | |
] | |