mmaction2
/
projects
/actionclip
/configs
/actionclip_vit-base-p32-res224-clip-pre_g8xb16_1x1x8_k400-rgb.py
custom_imports = dict(imports='models') | |
num_segs = 8 | |
model = dict( | |
type='ActionClip', | |
clip_arch='ViT-B/32', | |
num_adapter_segs=num_segs, | |
num_adapter_layers=6, | |
to_float32=True, | |
labels_or_label_file='configs/label_map_k400.txt', | |
data_preprocessor=dict( | |
type='ActionDataPreprocessor', | |
mean=[122.771, 116.746, 104.093], | |
std=[68.500, 66.632, 70.323], | |
format_shape='NCHW')) | |
dataset_type = 'VideoDataset' | |
data_root = 'data/kinetics400/videos_train' | |
data_root_val = 'data/kinetics400/videos_val' | |
ann_file_train = 'data/kinetics400/kinetics400_train_list_videos.txt' | |
ann_file_val = 'data/kinetics400/kinetics400_val_list_videos.txt' | |
ann_file_test = 'data/kinetics400/kinetics400_val_list_videos.txt' | |
file_client_args = dict(io_backend='disk') | |
file_client_args = dict( | |
io_backend='petrel', | |
path_mapping=dict( | |
{'data/kinetics400/': 's3://openmmlab/datasets/action/Kinetics400/'})) | |
train_pipeline = [ | |
dict(type='DecordInit', **file_client_args), | |
dict( | |
type='SampleFrames', clip_len=1, frame_interval=1, num_clips=num_segs), | |
dict(type='DecordDecode'), | |
dict(type='Resize', scale=(-1, 256)), | |
dict(type='RandomResizedCrop'), | |
dict( | |
type='MultiScaleCrop', | |
input_size=224, | |
scales=(1, .875, .75, .66), | |
random_crop=False, | |
num_fixed_crops=13, | |
max_wh_scale_gap=1), | |
dict(type='Resize', scale=(224, 224), keep_ratio=False), | |
dict(type='Flip', flip_ratio=0.5), | |
dict(type='FormatShape', input_format='NCHW'), | |
dict(type='PackActionInputs') | |
] | |
val_pipeline = [ | |
dict(type='DecordInit', **file_client_args), | |
dict( | |
type='SampleFrames', | |
clip_len=1, | |
frame_interval=1, | |
num_clips=num_segs, | |
test_mode=True), | |
dict(type='DecordDecode'), | |
dict(type='Resize', scale=(-1, 256)), | |
dict(type='CenterCrop', crop_size=224), | |
dict(type='FormatShape', input_format='NCHW'), | |
dict(type='PackActionInputs') | |
] | |
test_pipeline = val_pipeline | |
train_dataloader = dict( | |
batch_size=16, | |
num_workers=16, | |
persistent_workers=True, | |
sampler=dict(type='DefaultSampler', shuffle=True), | |
dataset=dict( | |
type=dataset_type, | |
ann_file=ann_file_train, | |
data_prefix=dict(video=data_root), | |
pipeline=train_pipeline)) | |
val_dataloader = dict( | |
batch_size=16, | |
num_workers=16, | |
persistent_workers=True, | |
sampler=dict(type='DefaultSampler', shuffle=False), | |
dataset=dict( | |
type=dataset_type, | |
ann_file=ann_file_val, | |
data_prefix=dict(video=data_root_val), | |
pipeline=val_pipeline, | |
test_mode=True)) | |
test_dataloader = dict( | |
batch_size=1, | |
num_workers=16, | |
persistent_workers=True, | |
sampler=dict(type='DefaultSampler', shuffle=False), | |
dataset=dict( | |
type=dataset_type, | |
ann_file=ann_file_test, | |
data_prefix=dict(video=data_root_val), | |
pipeline=test_pipeline, | |
test_mode=True)) | |
val_evaluator = dict(type='AccMetric') | |
test_evaluator = val_evaluator | |
train_cfg = dict( | |
type='EpochBasedTrainLoop', max_epochs=50, val_begin=1, val_interval=1) | |
val_cfg = dict(type='ValLoop') | |
test_cfg = dict(type='TestLoop') | |
optim_wrapper = dict( | |
optimizer=dict( | |
type='AdamW', lr=5e-6, betas=(0.9, 0.98), eps=1e-08, weight_decay=0.2), | |
paramwise_cfg=dict(custom_keys=dict(adapter=dict(lr_mult=10)))) | |
param_scheduler = [ | |
dict( | |
type='LinearLR', | |
start_factor=0.01, | |
by_epoch=True, | |
begin=0, | |
end=5, | |
convert_to_iter_based=True), | |
dict( | |
type='CosineAnnealingLR', | |
T_max=45, | |
eta_min=0, | |
by_epoch=True, | |
begin=5, | |
end=50, | |
convert_to_iter_based=True) | |
] | |
# Default setting for scaling LR automatically | |
# - `enable` means enable scaling LR automatically | |
# or not by default. | |
# - `base_batch_size` = (8 GPUs) x (16 samples per GPU). | |
auto_scale_lr = dict(enable=False, base_batch_size=128) | |
default_scope = 'mmaction' | |
default_hooks = dict( | |
runtime_info=dict(type='RuntimeInfoHook'), | |
timer=dict(type='IterTimerHook'), | |
logger=dict(type='LoggerHook', interval=100, ignore_last=False), | |
param_scheduler=dict(type='ParamSchedulerHook'), | |
checkpoint=dict( | |
type='CheckpointHook', interval=1, save_best='auto', max_keep_ckpts=5), | |
sampler_seed=dict(type='DistSamplerSeedHook'), | |
sync_buffers=dict(type='SyncBuffersHook')) | |
env_cfg = dict( | |
cudnn_benchmark=False, | |
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), | |
dist_cfg=dict(backend='nccl')) | |
log_processor = dict(type='LogProcessor', window_size=20, by_epoch=True) | |
vis_backends = [dict(type='LocalVisBackend')] | |
visualizer = dict(type='ActionVisualizer', vis_backends=vis_backends) | |
log_level = 'INFO' | |
load_from = None | |
resume = False | |