_wandb: | |
value: | |
cli_version: 0.18.5 | |
m: [] | |
python_version: 3.11.10 | |
t: | |
"1": | |
- 1 | |
- 11 | |
- 41 | |
- 49 | |
- 51 | |
- 55 | |
- 71 | |
- 83 | |
- 98 | |
- 105 | |
"2": | |
- 1 | |
- 11 | |
- 41 | |
- 49 | |
- 51 | |
- 55 | |
- 71 | |
- 83 | |
- 98 | |
- 105 | |
"3": | |
- 2 | |
- 13 | |
- 16 | |
- 23 | |
- 55 | |
- 61 | |
"4": 3.11.10 | |
"5": 0.18.5 | |
"6": 4.46.0.dev0 | |
"8": | |
- 5 | |
"12": 0.18.5 | |
"13": linux-x86_64 | |
data_cfgs: | |
value: | |
eval_data_files: null | |
eval_datasets: /home/align-anything/datasets/40k_ti2t_filtered | |
eval_name: null | |
eval_optional_args: [] | |
eval_size: null | |
eval_split: train | |
eval_subset: null | |
eval_template: AA_TI2T | |
train_data_files: null | |
train_datasets: /data/align-anything/jiayi/ti2t/refine_gen_wo_res/1021_13b_2000 | |
train_name: null | |
train_optional_args: [] | |
train_size: null | |
train_split: train | |
train_subset: null | |
train_template: AA_TI2T_Critique | |
logger_cfgs: | |
value: | |
cache_dir: null | |
log_project: align-anything | |
log_run_name: rm | |
log_type: wandb | |
output_dir: /data/align-anything/jiayi/ti2t/rm_10_22_ours_13b_2000 | |
save_interval: 598 | |
model_cfgs: | |
value: | |
model_max_length: 2048 | |
model_name_or_path: /data/models/llava/llava-1.5-13b-hf | |
trust_remote_code: true | |
special_tokens: | |
value: null | |
train_cfgs: | |
value: | |
adam_betas: | |
- 0.9 | |
- 0.95 | |
bf16: true | |
ds_cfgs: ds_z3_config.json | |
epochs: 5 | |
eval_interval: 10 | |
eval_strategy: epoch | |
fp16: false | |
freeze_language_model: false | |
freeze_mm_proj: false | |
freeze_vision_tower: true | |
gradient_accumulation_steps: 1 | |
gradient_checkpointing: true | |
learning_rate: 3e-05 | |
lr_scheduler_type: constant_with_warmup | |
lr_warmup_ratio: 0.03 | |
per_device_eval_batch_size: 4 | |
per_device_train_batch_size: 4 | |
regularization: 0.001 | |
seed: 42 | |
weight_decay: 0 | |