image_root: '' # for image path ann_root: '' # for json path dataset: 'facecaption' config: './configs' pretrained: '' intermediate_hidden_state: False # size of vit model; base or large vit: 'base' image_size: 224 batch_size_train: 80 batch_size_test: 80 queue_size: 61440 alpha: 0.4 k_test: 256 # optimizer weight_decay: 0.05 init_lr: 3e-5 min_lr: 1e-6 warmup_lr: 1e-6 lr_decay_rate: 0.9 max_epoch: 15 warmup_steps: 20000