|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6019110676397562, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.000000000000001e-06, |
|
"logits/chosen": -58.747344970703125, |
|
"logits/rejected": -59.84019470214844, |
|
"logps/chosen": -150.3143768310547, |
|
"logps/rejected": -179.38966369628906, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.7318750023841858, |
|
"rewards/chosen": -0.8851571679115295, |
|
"rewards/margins": 0.6559739708900452, |
|
"rewards/rejected": -1.5411310195922852, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.68421052631579e-06, |
|
"logits/chosen": -64.05355072021484, |
|
"logits/rejected": -65.13407897949219, |
|
"logps/chosen": -155.71795654296875, |
|
"logps/rejected": -190.02166748046875, |
|
"loss": 0.4485, |
|
"rewards/accuracies": 0.7715625166893005, |
|
"rewards/chosen": -1.3824467658996582, |
|
"rewards/margins": 1.1924123764038086, |
|
"rewards/rejected": -2.5748589038848877, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.263157894736842e-06, |
|
"logits/chosen": -64.59612274169922, |
|
"logits/rejected": -65.59567260742188, |
|
"logps/chosen": -158.51266479492188, |
|
"logps/rejected": -191.44497680664062, |
|
"loss": 0.4208, |
|
"rewards/accuracies": 0.7871875166893005, |
|
"rewards/chosen": -1.4413859844207764, |
|
"rewards/margins": 1.3812304735183716, |
|
"rewards/rejected": -2.8226163387298584, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.842105263157895e-06, |
|
"logits/chosen": -65.70255279541016, |
|
"logits/rejected": -66.5833969116211, |
|
"logps/chosen": -156.6477508544922, |
|
"logps/rejected": -195.2394561767578, |
|
"loss": 0.4062, |
|
"rewards/accuracies": 0.7973437309265137, |
|
"rewards/chosen": -1.4948838949203491, |
|
"rewards/margins": 1.513500690460205, |
|
"rewards/rejected": -3.0083847045898438, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.421052631578948e-06, |
|
"logits/chosen": -65.27395629882812, |
|
"logits/rejected": -66.06521606445312, |
|
"logps/chosen": -156.45945739746094, |
|
"logps/rejected": -196.63790893554688, |
|
"loss": 0.3898, |
|
"rewards/accuracies": 0.7978125214576721, |
|
"rewards/chosen": -1.6718982458114624, |
|
"rewards/margins": 1.6537814140319824, |
|
"rewards/rejected": -3.3256795406341553, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 5000, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|