ann_file_train = 'data/kinetics400/kinetics400_train_list_videos.txt' ann_file_val = 'data/kinetics400/kinetics400_val_list_videos.txt' auto_scale_lr = dict(base_batch_size=256, enable=False) data_root = 'data/kinetics400/videos_train' data_root_val = 'data/kinetics400/videos_val' dataset_type = 'VideoDataset' default_hooks = dict( checkpoint=dict( interval=3, max_keep_ckpts=3, save_best='auto', type='CheckpointHook'), logger=dict(ignore_last=False, interval=20, type='LoggerHook'), param_scheduler=dict(type='ParamSchedulerHook'), runtime_info=dict(type='RuntimeInfoHook'), sampler_seed=dict(type='DistSamplerSeedHook'), sync_buffers=dict(type='SyncBuffersHook'), timer=dict(type='IterTimerHook')) default_scope = 'mmaction' env_cfg = dict( cudnn_benchmark=False, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) file_client_args = dict(io_backend='disk') load_from = None log_level = 'INFO' log_processor = dict(by_epoch=True, type='LogProcessor', window_size=20) model = dict( backbone=dict( depth=50, norm_eval=False, pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth', type='ResNet'), cls_head=dict( average_clips='prob', consensus=dict(dim=1, type='AvgConsensus'), dropout_ratio=0.4, in_channels=2048, init_std=0.01, num_classes=400, spatial_type='avg', type='TSNHead'), data_preprocessor=dict( format_shape='NCHW', mean=[ 123.675, 116.28, 103.53, ], std=[ 58.395, 57.12, 57.375, ], type='ActionDataPreprocessor'), test_cfg=None, train_cfg=None, type='Recognizer2D') optim_wrapper = dict( clip_grad=dict(max_norm=40, norm_type=2), optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0001)) param_scheduler = [ dict( begin=0, by_epoch=True, end=100, gamma=0.1, milestones=[ 40, 80, ], type='MultiStepLR'), ] resume = False test_cfg = dict(type='TestLoop') test_dataloader = dict( batch_size=1, dataset=dict( ann_file='data/kinetics400/kinetics400_val_list_videos.txt', data_prefix=dict(video='data/kinetics400/videos_val'), pipeline=[ dict(io_backend='disk', type='DecordInit'), dict( clip_len=1, frame_interval=1, num_clips=25, test_mode=True, type='SampleFrames'), dict(type='DecordDecode'), dict(scale=( -1, 256, ), type='Resize'), dict(crop_size=224, type='TenCrop'), dict(input_format='NCHW', type='FormatShape'), dict(type='PackActionInputs'), ], test_mode=True, type='VideoDataset'), num_workers=8, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) test_evaluator = dict(type='AccMetric') test_pipeline = [ dict(io_backend='disk', type='DecordInit'), dict( clip_len=1, frame_interval=1, num_clips=25, test_mode=True, type='SampleFrames'), dict(type='DecordDecode'), dict(scale=( -1, 256, ), type='Resize'), dict(crop_size=224, type='TenCrop'), dict(input_format='NCHW', type='FormatShape'), dict(type='PackActionInputs'), ] train_cfg = dict( max_epochs=100, type='EpochBasedTrainLoop', val_begin=1, val_interval=1) train_dataloader = dict( batch_size=32, dataset=dict( ann_file='data/kinetics400/kinetics400_train_list_videos.txt', data_prefix=dict(video='data/kinetics400/videos_train'), pipeline=[ dict(io_backend='disk', type='DecordInit'), dict( clip_len=1, frame_interval=1, num_clips=8, type='SampleFrames'), dict(type='DecordDecode'), dict(scale=( -1, 256, ), type='Resize'), dict( input_size=224, max_wh_scale_gap=1, random_crop=False, scales=( 1, 0.875, 0.75, 0.66, ), type='MultiScaleCrop'), dict(keep_ratio=False, scale=( 224, 224, ), type='Resize'), dict(flip_ratio=0.5, type='Flip'), dict(input_format='NCHW', type='FormatShape'), dict(type='PackActionInputs'), ], type='VideoDataset'), num_workers=8, persistent_workers=True, sampler=dict(shuffle=True, type='DefaultSampler')) train_pipeline = [ dict(io_backend='disk', type='DecordInit'), dict(clip_len=1, frame_interval=1, num_clips=8, type='SampleFrames'), dict(type='DecordDecode'), dict(scale=( -1, 256, ), type='Resize'), dict( input_size=224, max_wh_scale_gap=1, random_crop=False, scales=( 1, 0.875, 0.75, 0.66, ), type='MultiScaleCrop'), dict(keep_ratio=False, scale=( 224, 224, ), type='Resize'), dict(flip_ratio=0.5, type='Flip'), dict(input_format='NCHW', type='FormatShape'), dict(type='PackActionInputs'), ] val_cfg = dict(type='ValLoop') val_dataloader = dict( batch_size=32, dataset=dict( ann_file='data/kinetics400/kinetics400_val_list_videos.txt', data_prefix=dict(video='data/kinetics400/videos_val'), pipeline=[ dict(io_backend='disk', type='DecordInit'), dict( clip_len=1, frame_interval=1, num_clips=8, test_mode=True, type='SampleFrames'), dict(type='DecordDecode'), dict(scale=( -1, 256, ), type='Resize'), dict(crop_size=224, type='CenterCrop'), dict(input_format='NCHW', type='FormatShape'), dict(type='PackActionInputs'), ], test_mode=True, type='VideoDataset'), num_workers=8, persistent_workers=True, sampler=dict(shuffle=False, type='DefaultSampler')) val_evaluator = dict(type='AccMetric') val_pipeline = [ dict(io_backend='disk', type='DecordInit'), dict( clip_len=1, frame_interval=1, num_clips=8, test_mode=True, type='SampleFrames'), dict(type='DecordDecode'), dict(scale=( -1, 256, ), type='Resize'), dict(crop_size=224, type='CenterCrop'), dict(input_format='NCHW', type='FormatShape'), dict(type='PackActionInputs'), ] vis_backends = [ dict(type='LocalVisBackend'), ] visualizer = dict( type='ActionVisualizer', vis_backends=[ dict(type='LocalVisBackend'), ])