|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 34.8421052631579, |
|
"eval_steps": 500, |
|
"global_step": 140, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.4210526315789473, |
|
"grad_norm": 0.6825495362281799, |
|
"learning_rate": 0.00014285714285714287, |
|
"logits/chosen": -0.9514083862304688, |
|
"logits/rejected": -1.0533627271652222, |
|
"logps/chosen": -39.20041275024414, |
|
"logps/rejected": -21.37519073486328, |
|
"loss": 0.5664, |
|
"rewards/accuracies": 0.7743055820465088, |
|
"rewards/chosen": 0.15898703038692474, |
|
"rewards/margins": 0.5821112990379333, |
|
"rewards/rejected": -0.42312419414520264, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.842105263157895, |
|
"grad_norm": 0.08670935779809952, |
|
"learning_rate": 0.00019888308262251285, |
|
"logits/chosen": -0.13068915903568268, |
|
"logits/rejected": -0.16997402906417847, |
|
"logps/chosen": -74.40914916992188, |
|
"logps/rejected": -151.8779296875, |
|
"loss": 0.0471, |
|
"rewards/accuracies": 0.9635416865348816, |
|
"rewards/chosen": -3.3677978515625, |
|
"rewards/margins": 10.11207389831543, |
|
"rewards/rejected": -13.479872703552246, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 7.421052631578947, |
|
"grad_norm": 0.0034703314304351807, |
|
"learning_rate": 0.00019214762118704076, |
|
"logits/chosen": 0.19064301252365112, |
|
"logits/rejected": 0.24179647862911224, |
|
"logps/chosen": -95.65469360351562, |
|
"logps/rejected": -240.3935089111328, |
|
"loss": 0.0286, |
|
"rewards/accuracies": 0.9664474129676819, |
|
"rewards/chosen": -5.480339050292969, |
|
"rewards/margins": 16.84355354309082, |
|
"rewards/rejected": -22.323888778686523, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 9.842105263157894, |
|
"grad_norm": 0.0037726943846791983, |
|
"learning_rate": 0.00017971325072229226, |
|
"logits/chosen": 0.08421485126018524, |
|
"logits/rejected": 0.1458778977394104, |
|
"logps/chosen": -170.75747680664062, |
|
"logps/rejected": -437.0244140625, |
|
"loss": 0.0258, |
|
"rewards/accuracies": 0.9670138955116272, |
|
"rewards/chosen": -12.99338150024414, |
|
"rewards/margins": 28.99999237060547, |
|
"rewards/rejected": -41.993377685546875, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.421052631578947, |
|
"grad_norm": 0.0033487407490611076, |
|
"learning_rate": 0.00016234898018587337, |
|
"logits/chosen": -0.04333849251270294, |
|
"logits/rejected": -0.01762447878718376, |
|
"logps/chosen": -186.37686157226562, |
|
"logps/rejected": -464.90765380859375, |
|
"loss": 0.0272, |
|
"rewards/accuracies": 0.9671053290367126, |
|
"rewards/chosen": -14.553586959838867, |
|
"rewards/margins": 30.223119735717773, |
|
"rewards/rejected": -44.77670669555664, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 14.842105263157894, |
|
"grad_norm": 0.003097180975601077, |
|
"learning_rate": 0.00014112871031306119, |
|
"logits/chosen": -0.13475045561790466, |
|
"logits/rejected": -0.1398223638534546, |
|
"logps/chosen": -184.91830444335938, |
|
"logps/rejected": -461.1513977050781, |
|
"loss": 0.0244, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -14.403005599975586, |
|
"rewards/margins": 29.99542999267578, |
|
"rewards/rejected": -44.398433685302734, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 17.42105263157895, |
|
"grad_norm": 0.0024468335323035717, |
|
"learning_rate": 0.00011736481776669306, |
|
"logits/chosen": -0.1959075629711151, |
|
"logits/rejected": -0.22540684044361115, |
|
"logps/chosen": -185.68663024902344, |
|
"logps/rejected": -458.96826171875, |
|
"loss": 0.0261, |
|
"rewards/accuracies": 0.9684211015701294, |
|
"rewards/chosen": -14.49423885345459, |
|
"rewards/margins": 29.691282272338867, |
|
"rewards/rejected": -44.185516357421875, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 19.842105263157894, |
|
"grad_norm": 0.002537067048251629, |
|
"learning_rate": 9.252699064135758e-05, |
|
"logits/chosen": -0.22896860539913177, |
|
"logits/rejected": -0.2724004089832306, |
|
"logps/chosen": -184.9608154296875, |
|
"logps/rejected": -460.61468505859375, |
|
"loss": 0.0266, |
|
"rewards/accuracies": 0.9659722447395325, |
|
"rewards/chosen": -14.424090385437012, |
|
"rewards/margins": 29.924333572387695, |
|
"rewards/rejected": -44.34842300415039, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 22.42105263157895, |
|
"grad_norm": 0.0032824031077325344, |
|
"learning_rate": 6.815133497483157e-05, |
|
"logits/chosen": -0.24728278815746307, |
|
"logits/rejected": -0.2936950922012329, |
|
"logps/chosen": -185.9248046875, |
|
"logps/rejected": -467.2992858886719, |
|
"loss": 0.0287, |
|
"rewards/accuracies": 0.9651316404342651, |
|
"rewards/chosen": -14.498348236083984, |
|
"rewards/margins": 30.518945693969727, |
|
"rewards/rejected": -45.01729202270508, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 24.842105263157894, |
|
"grad_norm": 0.003522921120747924, |
|
"learning_rate": 4.574537361342407e-05, |
|
"logits/chosen": -0.2618289887905121, |
|
"logits/rejected": -0.311085045337677, |
|
"logps/chosen": -187.0611572265625, |
|
"logps/rejected": -466.8361511230469, |
|
"loss": 0.0258, |
|
"rewards/accuracies": 0.9670138955116272, |
|
"rewards/chosen": -14.643547058105469, |
|
"rewards/margins": 30.329221725463867, |
|
"rewards/rejected": -44.97277069091797, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 27.42105263157895, |
|
"grad_norm": 0.0024658790789544582, |
|
"learning_rate": 2.669481281701739e-05, |
|
"logits/chosen": -0.27023741602897644, |
|
"logits/rejected": -0.3240560293197632, |
|
"logps/chosen": -186.94210815429688, |
|
"logps/rejected": -473.1488952636719, |
|
"loss": 0.0276, |
|
"rewards/accuracies": 0.9664474129676819, |
|
"rewards/chosen": -14.606731414794922, |
|
"rewards/margins": 30.998502731323242, |
|
"rewards/rejected": -45.6052360534668, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 29.842105263157894, |
|
"grad_norm": 0.0034348091576248407, |
|
"learning_rate": 1.2177842662977135e-05, |
|
"logits/chosen": -0.27230748534202576, |
|
"logits/rejected": -0.32499459385871887, |
|
"logps/chosen": -185.62405395507812, |
|
"logps/rejected": -470.46063232421875, |
|
"loss": 0.0252, |
|
"rewards/accuracies": 0.9677083492279053, |
|
"rewards/chosen": -14.492985725402832, |
|
"rewards/margins": 30.83690071105957, |
|
"rewards/rejected": -45.329891204833984, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 32.421052631578945, |
|
"grad_norm": 0.0032501835376024246, |
|
"learning_rate": 3.092271377092215e-06, |
|
"logits/chosen": -0.27767735719680786, |
|
"logits/rejected": -0.3315570652484894, |
|
"logps/chosen": -187.55230712890625, |
|
"logps/rejected": -470.9134521484375, |
|
"loss": 0.0287, |
|
"rewards/accuracies": 0.9651316404342651, |
|
"rewards/chosen": -14.68417739868164, |
|
"rewards/margins": 30.694440841674805, |
|
"rewards/rejected": -45.37861633300781, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 34.8421052631579, |
|
"grad_norm": 0.003052822547033429, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.27554523944854736, |
|
"logits/rejected": -0.32890552282333374, |
|
"logps/chosen": -185.5701904296875, |
|
"logps/rejected": -472.9700927734375, |
|
"loss": 0.0244, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -14.46578311920166, |
|
"rewards/margins": 31.117090225219727, |
|
"rewards/rejected": -45.58286666870117, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 34.8421052631579, |
|
"step": 140, |
|
"total_flos": 3.1072679368851456e+17, |
|
"train_loss": 0.06661632827350071, |
|
"train_runtime": 6674.5686, |
|
"train_samples_per_second": 6.319, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 140, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 35, |
|
"save_steps": 70, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.1072679368851456e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|