Image-to-Text
Chinese
English
FLIP / configs /pretrain.yaml
OpenFace-CQUPT
Upload 16 files
3d7aa36 verified
raw
history blame contribute delete
436 Bytes
image_root: '' # for image path
ann_root: '' # for json path
dataset: 'facecaption'
config: './configs'
pretrained: ''
intermediate_hidden_state: False
# size of vit model; base or large
vit: 'base'
image_size: 224
batch_size_train: 80
batch_size_test: 80
queue_size: 61440
alpha: 0.4
k_test: 256
# optimizer
weight_decay: 0.05
init_lr: 3e-5
min_lr: 1e-6
warmup_lr: 1e-6
lr_decay_rate: 0.9
max_epoch: 15
warmup_steps: 20000