|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.99581589958159, |
|
"eval_steps": 500, |
|
"global_step": 119, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.166666666666666e-08, |
|
"logits/chosen": -2.7608747482299805, |
|
"logits/rejected": -2.7489399909973145, |
|
"logps/chosen": -156.13702392578125, |
|
"logps/rejected": -214.59707641601562, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.7645390033721924, |
|
"logits/rejected": -2.704571008682251, |
|
"logps/chosen": -192.93963623046875, |
|
"logps/rejected": -218.26573181152344, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 0.0014097224920988083, |
|
"rewards/margins": 0.0015009460039436817, |
|
"rewards/rejected": -9.122348274104297e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.931352528237397e-07, |
|
"logits/chosen": -2.6521761417388916, |
|
"logits/rejected": -2.614973545074463, |
|
"logps/chosen": -236.9346466064453, |
|
"logps/rejected": -204.9691162109375, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.07020659744739532, |
|
"rewards/margins": 0.029431456699967384, |
|
"rewards/rejected": -0.09963803738355637, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.658920803689553e-07, |
|
"logits/chosen": -2.56916880607605, |
|
"logits/rejected": -2.5743496417999268, |
|
"logps/chosen": -238.0299072265625, |
|
"logps/rejected": -208.71658325195312, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.2192213088274002, |
|
"rewards/margins": 0.04515828937292099, |
|
"rewards/rejected": -0.2643795907497406, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.201712553872657e-07, |
|
"logits/chosen": -2.51991605758667, |
|
"logits/rejected": -2.5045738220214844, |
|
"logps/chosen": -258.259033203125, |
|
"logps/rejected": -245.266845703125, |
|
"loss": 0.6645, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.2803400158882141, |
|
"rewards/margins": 0.07451216131448746, |
|
"rewards/rejected": -0.35485216975212097, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.598859066780754e-07, |
|
"logits/chosen": -2.519888401031494, |
|
"logits/rejected": -2.50127911567688, |
|
"logps/chosen": -257.92901611328125, |
|
"logps/rejected": -247.06884765625, |
|
"loss": 0.6622, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.3478809893131256, |
|
"rewards/margins": 0.08011214435100555, |
|
"rewards/rejected": -0.42799311876296997, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9019570347986706e-07, |
|
"logits/chosen": -2.436131000518799, |
|
"logits/rejected": -2.4517111778259277, |
|
"logps/chosen": -284.99066162109375, |
|
"logps/rejected": -233.84280395507812, |
|
"loss": 0.66, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.3800959289073944, |
|
"rewards/margins": 0.2344251424074173, |
|
"rewards/rejected": -0.6145211458206177, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1706525253979534e-07, |
|
"logits/chosen": -2.4360086917877197, |
|
"logits/rejected": -2.440263032913208, |
|
"logps/chosen": -264.9581604003906, |
|
"logps/rejected": -267.7023010253906, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.4374156892299652, |
|
"rewards/margins": 0.17235831916332245, |
|
"rewards/rejected": -0.6097739338874817, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4675360263490295e-07, |
|
"logits/chosen": -2.4380290508270264, |
|
"logits/rejected": -2.4010090827941895, |
|
"logps/chosen": -236.95388793945312, |
|
"logps/rejected": -257.6996154785156, |
|
"loss": 0.6616, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.4010644853115082, |
|
"rewards/margins": 0.13567090034484863, |
|
"rewards/rejected": -0.536735475063324, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.527854855097224e-08, |
|
"logits/chosen": -2.469369888305664, |
|
"logits/rejected": -2.4206488132476807, |
|
"logps/chosen": -275.33831787109375, |
|
"logps/rejected": -252.06930541992188, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.4325632154941559, |
|
"rewards/margins": 0.17887099087238312, |
|
"rewards/rejected": -0.611434280872345, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.790158337517127e-08, |
|
"logits/chosen": -2.4000065326690674, |
|
"logits/rejected": -2.405733585357666, |
|
"logps/chosen": -256.33343505859375, |
|
"logps/rejected": -261.50762939453125, |
|
"loss": 0.6521, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.44618505239486694, |
|
"rewards/margins": 0.14844560623168945, |
|
"rewards/rejected": -0.5946307182312012, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.677580722139671e-09, |
|
"logits/chosen": -2.432537078857422, |
|
"logits/rejected": -2.425053358078003, |
|
"logps/chosen": -262.3310546875, |
|
"logps/rejected": -261.58697509765625, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.6105460524559021, |
|
"rewards/margins": -0.03789714723825455, |
|
"rewards/rejected": -0.5726489424705505, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 119, |
|
"total_flos": 0.0, |
|
"train_loss": 0.66752010233262, |
|
"train_runtime": 1991.7897, |
|
"train_samples_per_second": 7.673, |
|
"train_steps_per_second": 0.06 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 119, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|