|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9952153110047847, |
|
"eval_steps": 500, |
|
"global_step": 52, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/chosen": -2.6334049701690674, |
|
"logits/rejected": -2.5699148178100586, |
|
"logps/chosen": -266.709716796875, |
|
"logps/pi_response": -177.38482666015625, |
|
"logps/ref_response": -177.38482666015625, |
|
"logps/rejected": -273.04791259765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.907293218369498e-07, |
|
"logits/chosen": -2.7238454818725586, |
|
"logits/rejected": -2.659656524658203, |
|
"logps/chosen": -230.67950439453125, |
|
"logps/pi_response": -167.97642517089844, |
|
"logps/ref_response": -168.9660186767578, |
|
"logps/rejected": -255.850830078125, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.5798611044883728, |
|
"rewards/chosen": -0.017340650781989098, |
|
"rewards/margins": 0.022774934768676758, |
|
"rewards/rejected": -0.040115587413311005, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.941700805287168e-07, |
|
"logits/chosen": -2.693298816680908, |
|
"logits/rejected": -2.6650853157043457, |
|
"logps/chosen": -245.6147918701172, |
|
"logps/pi_response": -143.6698455810547, |
|
"logps/ref_response": -151.20733642578125, |
|
"logps/rejected": -294.1537170410156, |
|
"loss": 0.6104, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.19365832209587097, |
|
"rewards/margins": 0.2653965353965759, |
|
"rewards/rejected": -0.4590548574924469, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3293939665883228e-07, |
|
"logits/chosen": -2.6871800422668457, |
|
"logits/rejected": -2.6462528705596924, |
|
"logps/chosen": -267.20556640625, |
|
"logps/pi_response": -165.4456024169922, |
|
"logps/ref_response": -163.42379760742188, |
|
"logps/rejected": -344.3558349609375, |
|
"loss": 0.5928, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.35205698013305664, |
|
"rewards/margins": 0.48441195487976074, |
|
"rewards/rejected": -0.8364689946174622, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.936171419533652e-08, |
|
"logits/chosen": -2.6936826705932617, |
|
"logits/rejected": -2.644627094268799, |
|
"logps/chosen": -287.3558349609375, |
|
"logps/pi_response": -172.02587890625, |
|
"logps/ref_response": -168.6425323486328, |
|
"logps/rejected": -370.65753173828125, |
|
"loss": 0.5471, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.3708304762840271, |
|
"rewards/margins": 0.5322133898735046, |
|
"rewards/rejected": -0.9030438661575317, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.328513490917311e-09, |
|
"logits/chosen": -2.6171915531158447, |
|
"logits/rejected": -2.5323662757873535, |
|
"logps/chosen": -264.52960205078125, |
|
"logps/pi_response": -189.58047485351562, |
|
"logps/ref_response": -165.90725708007812, |
|
"logps/rejected": -367.69598388671875, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.42130571603775024, |
|
"rewards/margins": 0.6634918451309204, |
|
"rewards/rejected": -1.0847976207733154, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 52, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5947528710732093, |
|
"train_runtime": 3144.2013, |
|
"train_samples_per_second": 4.241, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 52, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|