danushkhanna's picture
Upload folder using huggingface_hub
f420eab verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 34.8421052631579,
"eval_steps": 500,
"global_step": 140,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.4210526315789473,
"grad_norm": 0.6825495362281799,
"learning_rate": 0.00014285714285714287,
"logits/chosen": -0.9514083862304688,
"logits/rejected": -1.0533627271652222,
"logps/chosen": -39.20041275024414,
"logps/rejected": -21.37519073486328,
"loss": 0.5664,
"rewards/accuracies": 0.7743055820465088,
"rewards/chosen": 0.15898703038692474,
"rewards/margins": 0.5821112990379333,
"rewards/rejected": -0.42312419414520264,
"step": 10
},
{
"epoch": 4.842105263157895,
"grad_norm": 0.08670935779809952,
"learning_rate": 0.00019888308262251285,
"logits/chosen": -0.13068915903568268,
"logits/rejected": -0.16997402906417847,
"logps/chosen": -74.40914916992188,
"logps/rejected": -151.8779296875,
"loss": 0.0471,
"rewards/accuracies": 0.9635416865348816,
"rewards/chosen": -3.3677978515625,
"rewards/margins": 10.11207389831543,
"rewards/rejected": -13.479872703552246,
"step": 20
},
{
"epoch": 7.421052631578947,
"grad_norm": 0.0034703314304351807,
"learning_rate": 0.00019214762118704076,
"logits/chosen": 0.19064301252365112,
"logits/rejected": 0.24179647862911224,
"logps/chosen": -95.65469360351562,
"logps/rejected": -240.3935089111328,
"loss": 0.0286,
"rewards/accuracies": 0.9664474129676819,
"rewards/chosen": -5.480339050292969,
"rewards/margins": 16.84355354309082,
"rewards/rejected": -22.323888778686523,
"step": 30
},
{
"epoch": 9.842105263157894,
"grad_norm": 0.0037726943846791983,
"learning_rate": 0.00017971325072229226,
"logits/chosen": 0.08421485126018524,
"logits/rejected": 0.1458778977394104,
"logps/chosen": -170.75747680664062,
"logps/rejected": -437.0244140625,
"loss": 0.0258,
"rewards/accuracies": 0.9670138955116272,
"rewards/chosen": -12.99338150024414,
"rewards/margins": 28.99999237060547,
"rewards/rejected": -41.993377685546875,
"step": 40
},
{
"epoch": 12.421052631578947,
"grad_norm": 0.0033487407490611076,
"learning_rate": 0.00016234898018587337,
"logits/chosen": -0.04333849251270294,
"logits/rejected": -0.01762447878718376,
"logps/chosen": -186.37686157226562,
"logps/rejected": -464.90765380859375,
"loss": 0.0272,
"rewards/accuracies": 0.9671053290367126,
"rewards/chosen": -14.553586959838867,
"rewards/margins": 30.223119735717773,
"rewards/rejected": -44.77670669555664,
"step": 50
},
{
"epoch": 14.842105263157894,
"grad_norm": 0.003097180975601077,
"learning_rate": 0.00014112871031306119,
"logits/chosen": -0.13475045561790466,
"logits/rejected": -0.1398223638534546,
"logps/chosen": -184.91830444335938,
"logps/rejected": -461.1513977050781,
"loss": 0.0244,
"rewards/accuracies": 0.96875,
"rewards/chosen": -14.403005599975586,
"rewards/margins": 29.99542999267578,
"rewards/rejected": -44.398433685302734,
"step": 60
},
{
"epoch": 17.42105263157895,
"grad_norm": 0.0024468335323035717,
"learning_rate": 0.00011736481776669306,
"logits/chosen": -0.1959075629711151,
"logits/rejected": -0.22540684044361115,
"logps/chosen": -185.68663024902344,
"logps/rejected": -458.96826171875,
"loss": 0.0261,
"rewards/accuracies": 0.9684211015701294,
"rewards/chosen": -14.49423885345459,
"rewards/margins": 29.691282272338867,
"rewards/rejected": -44.185516357421875,
"step": 70
},
{
"epoch": 19.842105263157894,
"grad_norm": 0.002537067048251629,
"learning_rate": 9.252699064135758e-05,
"logits/chosen": -0.22896860539913177,
"logits/rejected": -0.2724004089832306,
"logps/chosen": -184.9608154296875,
"logps/rejected": -460.61468505859375,
"loss": 0.0266,
"rewards/accuracies": 0.9659722447395325,
"rewards/chosen": -14.424090385437012,
"rewards/margins": 29.924333572387695,
"rewards/rejected": -44.34842300415039,
"step": 80
},
{
"epoch": 22.42105263157895,
"grad_norm": 0.0032824031077325344,
"learning_rate": 6.815133497483157e-05,
"logits/chosen": -0.24728278815746307,
"logits/rejected": -0.2936950922012329,
"logps/chosen": -185.9248046875,
"logps/rejected": -467.2992858886719,
"loss": 0.0287,
"rewards/accuracies": 0.9651316404342651,
"rewards/chosen": -14.498348236083984,
"rewards/margins": 30.518945693969727,
"rewards/rejected": -45.01729202270508,
"step": 90
},
{
"epoch": 24.842105263157894,
"grad_norm": 0.003522921120747924,
"learning_rate": 4.574537361342407e-05,
"logits/chosen": -0.2618289887905121,
"logits/rejected": -0.311085045337677,
"logps/chosen": -187.0611572265625,
"logps/rejected": -466.8361511230469,
"loss": 0.0258,
"rewards/accuracies": 0.9670138955116272,
"rewards/chosen": -14.643547058105469,
"rewards/margins": 30.329221725463867,
"rewards/rejected": -44.97277069091797,
"step": 100
},
{
"epoch": 27.42105263157895,
"grad_norm": 0.0024658790789544582,
"learning_rate": 2.669481281701739e-05,
"logits/chosen": -0.27023741602897644,
"logits/rejected": -0.3240560293197632,
"logps/chosen": -186.94210815429688,
"logps/rejected": -473.1488952636719,
"loss": 0.0276,
"rewards/accuracies": 0.9664474129676819,
"rewards/chosen": -14.606731414794922,
"rewards/margins": 30.998502731323242,
"rewards/rejected": -45.6052360534668,
"step": 110
},
{
"epoch": 29.842105263157894,
"grad_norm": 0.0034348091576248407,
"learning_rate": 1.2177842662977135e-05,
"logits/chosen": -0.27230748534202576,
"logits/rejected": -0.32499459385871887,
"logps/chosen": -185.62405395507812,
"logps/rejected": -470.46063232421875,
"loss": 0.0252,
"rewards/accuracies": 0.9677083492279053,
"rewards/chosen": -14.492985725402832,
"rewards/margins": 30.83690071105957,
"rewards/rejected": -45.329891204833984,
"step": 120
},
{
"epoch": 32.421052631578945,
"grad_norm": 0.0032501835376024246,
"learning_rate": 3.092271377092215e-06,
"logits/chosen": -0.27767735719680786,
"logits/rejected": -0.3315570652484894,
"logps/chosen": -187.55230712890625,
"logps/rejected": -470.9134521484375,
"loss": 0.0287,
"rewards/accuracies": 0.9651316404342651,
"rewards/chosen": -14.68417739868164,
"rewards/margins": 30.694440841674805,
"rewards/rejected": -45.37861633300781,
"step": 130
},
{
"epoch": 34.8421052631579,
"grad_norm": 0.003052822547033429,
"learning_rate": 0.0,
"logits/chosen": -0.27554523944854736,
"logits/rejected": -0.32890552282333374,
"logps/chosen": -185.5701904296875,
"logps/rejected": -472.9700927734375,
"loss": 0.0244,
"rewards/accuracies": 0.96875,
"rewards/chosen": -14.46578311920166,
"rewards/margins": 31.117090225219727,
"rewards/rejected": -45.58286666870117,
"step": 140
},
{
"epoch": 34.8421052631579,
"step": 140,
"total_flos": 3.1072679368851456e+17,
"train_loss": 0.06661632827350071,
"train_runtime": 6674.5686,
"train_samples_per_second": 6.319,
"train_steps_per_second": 0.021
}
],
"logging_steps": 10,
"max_steps": 140,
"num_input_tokens_seen": 0,
"num_train_epochs": 35,
"save_steps": 70,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.1072679368851456e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}