Llama3-8b / trainer_state.json
sakshi-rumsan's picture
Upload folder using huggingface_hub
f44e9bd verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.06,
"eval_steps": 500,
"global_step": 30,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002,
"grad_norm": 60.206947326660156,
"learning_rate": 4.8333333333333334e-05,
"logits/chosen": -1.116684913635254,
"logits/rejected": -1.2412071228027344,
"logps/chosen": -80.99737548828125,
"logps/rejected": -208.84976196289062,
"loss": 0.8252,
"rewards/accuracies": 0.75,
"rewards/chosen": 19.664356231689453,
"rewards/margins": 11.318536758422852,
"rewards/rejected": 8.345821380615234,
"step": 1
},
{
"epoch": 0.004,
"grad_norm": 18.464937210083008,
"learning_rate": 4.666666666666667e-05,
"logits/chosen": -1.2304766178131104,
"logits/rejected": -1.1085100173950195,
"logps/chosen": -44.50049591064453,
"logps/rejected": -95.98702239990234,
"loss": 1.0253,
"rewards/accuracies": 0.75,
"rewards/chosen": 9.29911994934082,
"rewards/margins": 5.345921516418457,
"rewards/rejected": 3.9531972408294678,
"step": 2
},
{
"epoch": 0.006,
"grad_norm": 21.883440017700195,
"learning_rate": 4.5e-05,
"logits/chosen": -1.190551519393921,
"logits/rejected": -0.9011133313179016,
"logps/chosen": -102.20394897460938,
"logps/rejected": -98.943603515625,
"loss": 0.3271,
"rewards/accuracies": 0.75,
"rewards/chosen": 14.595328330993652,
"rewards/margins": 10.861374855041504,
"rewards/rejected": 3.7339534759521484,
"step": 3
},
{
"epoch": 0.008,
"grad_norm": 6.222319643711671e-05,
"learning_rate": 4.3333333333333334e-05,
"logits/chosen": -0.4864046573638916,
"logits/rejected": -0.9188252091407776,
"logps/chosen": -127.2591781616211,
"logps/rejected": -402.8990783691406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 18.617298126220703,
"rewards/margins": 22.434566497802734,
"rewards/rejected": -3.817267656326294,
"step": 4
},
{
"epoch": 0.01,
"grad_norm": 0.3858901858329773,
"learning_rate": 4.166666666666667e-05,
"logits/chosen": -1.0422379970550537,
"logits/rejected": -0.8618497848510742,
"logps/chosen": -73.0203857421875,
"logps/rejected": -301.347412109375,
"loss": 0.0053,
"rewards/accuracies": 1.0,
"rewards/chosen": 25.020734786987305,
"rewards/margins": 27.691850662231445,
"rewards/rejected": -2.6711173057556152,
"step": 5
},
{
"epoch": 0.012,
"grad_norm": 0.0033887920435518026,
"learning_rate": 4e-05,
"logits/chosen": -1.2303388118743896,
"logits/rejected": -0.7831270694732666,
"logps/chosen": -85.19882202148438,
"logps/rejected": -286.6585693359375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 22.92217254638672,
"rewards/margins": 28.057987213134766,
"rewards/rejected": -5.135812282562256,
"step": 6
},
{
"epoch": 0.014,
"grad_norm": 0.01970483362674713,
"learning_rate": 3.8333333333333334e-05,
"logits/chosen": -1.266550064086914,
"logits/rejected": -0.7790937423706055,
"logps/chosen": -104.19335174560547,
"logps/rejected": -176.42501831054688,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": 12.132671356201172,
"rewards/margins": 16.39078140258789,
"rewards/rejected": -4.2581095695495605,
"step": 7
},
{
"epoch": 0.016,
"grad_norm": 207.87570190429688,
"learning_rate": 3.6666666666666666e-05,
"logits/chosen": -1.1942288875579834,
"logits/rejected": -0.5387299060821533,
"logps/chosen": -330.1429748535156,
"logps/rejected": -239.27011108398438,
"loss": 3.0167,
"rewards/accuracies": 0.75,
"rewards/chosen": 9.409097671508789,
"rewards/margins": 14.1237154006958,
"rewards/rejected": -4.714616298675537,
"step": 8
},
{
"epoch": 0.018,
"grad_norm": 0.0003472985699772835,
"learning_rate": 3.5e-05,
"logits/chosen": -1.2635735273361206,
"logits/rejected": -0.413591206073761,
"logps/chosen": -185.6806640625,
"logps/rejected": -166.2919921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 24.54703712463379,
"rewards/margins": 25.826885223388672,
"rewards/rejected": -1.279848337173462,
"step": 9
},
{
"epoch": 0.02,
"grad_norm": 0.05313471704721451,
"learning_rate": 3.3333333333333335e-05,
"logits/chosen": -1.0146455764770508,
"logits/rejected": -0.8916444182395935,
"logps/chosen": -209.36700439453125,
"logps/rejected": -468.6343994140625,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": 22.948640823364258,
"rewards/margins": 28.48465919494629,
"rewards/rejected": -5.536019325256348,
"step": 10
},
{
"epoch": 0.022,
"grad_norm": 142.11721801757812,
"learning_rate": 3.1666666666666666e-05,
"logits/chosen": -0.9070144891738892,
"logits/rejected": -0.6460937261581421,
"logps/chosen": -159.49267578125,
"logps/rejected": -283.4989929199219,
"loss": 1.5215,
"rewards/accuracies": 0.75,
"rewards/chosen": 18.536209106445312,
"rewards/margins": 17.37179183959961,
"rewards/rejected": 1.1644160747528076,
"step": 11
},
{
"epoch": 0.024,
"grad_norm": 2.082591663565836e-07,
"learning_rate": 3e-05,
"logits/chosen": -1.2319185733795166,
"logits/rejected": -0.69745934009552,
"logps/chosen": -118.86239624023438,
"logps/rejected": -291.8245544433594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 15.314111709594727,
"rewards/margins": 24.709243774414062,
"rewards/rejected": -9.395132064819336,
"step": 12
},
{
"epoch": 0.026,
"grad_norm": 0.2945432960987091,
"learning_rate": 2.8333333333333335e-05,
"logits/chosen": -1.1761776208877563,
"logits/rejected": -0.7650622725486755,
"logps/chosen": -149.94891357421875,
"logps/rejected": -175.96791076660156,
"loss": 0.0014,
"rewards/accuracies": 1.0,
"rewards/chosen": 13.455711364746094,
"rewards/margins": 17.48712158203125,
"rewards/rejected": -4.031410217285156,
"step": 13
},
{
"epoch": 0.028,
"grad_norm": 1.873824954032898,
"learning_rate": 2.6666666666666667e-05,
"logits/chosen": -0.8233789801597595,
"logits/rejected": -1.152618646621704,
"logps/chosen": -146.26565551757812,
"logps/rejected": -507.0345458984375,
"loss": 0.0099,
"rewards/accuracies": 1.0,
"rewards/chosen": 20.955230712890625,
"rewards/margins": 25.935028076171875,
"rewards/rejected": -4.979795932769775,
"step": 14
},
{
"epoch": 0.03,
"grad_norm": 28.382314682006836,
"learning_rate": 2.5e-05,
"logits/chosen": -1.1895701885223389,
"logits/rejected": -0.8884562849998474,
"logps/chosen": -93.35548400878906,
"logps/rejected": -189.5703125,
"loss": 0.1604,
"rewards/accuracies": 1.0,
"rewards/chosen": 11.567254066467285,
"rewards/margins": 15.98021411895752,
"rewards/rejected": -4.412960052490234,
"step": 15
},
{
"epoch": 0.032,
"grad_norm": 0.0003264884580858052,
"learning_rate": 2.3333333333333336e-05,
"logits/chosen": -1.0162668228149414,
"logits/rejected": -0.9473219513893127,
"logps/chosen": -269.1289367675781,
"logps/rejected": -664.7567138671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 15.435200691223145,
"rewards/margins": 40.45425796508789,
"rewards/rejected": -25.01905632019043,
"step": 16
},
{
"epoch": 0.034,
"grad_norm": 39.93281936645508,
"learning_rate": 2.1666666666666667e-05,
"logits/chosen": -1.261150598526001,
"logits/rejected": -0.5414608120918274,
"logps/chosen": -169.63702392578125,
"logps/rejected": -218.22845458984375,
"loss": 2.1803,
"rewards/accuracies": 0.75,
"rewards/chosen": 19.81353759765625,
"rewards/margins": 21.26726722717285,
"rewards/rejected": -1.4537286758422852,
"step": 17
},
{
"epoch": 0.036,
"grad_norm": 0.3639410138130188,
"learning_rate": 2e-05,
"logits/chosen": -1.1032836437225342,
"logits/rejected": -0.42093324661254883,
"logps/chosen": -250.34255981445312,
"logps/rejected": -339.02777099609375,
"loss": 0.001,
"rewards/accuracies": 1.0,
"rewards/chosen": 29.49982452392578,
"rewards/margins": 29.380714416503906,
"rewards/rejected": 0.11910903453826904,
"step": 18
},
{
"epoch": 0.038,
"grad_norm": 91.46900939941406,
"learning_rate": 1.8333333333333333e-05,
"logits/chosen": -1.382272720336914,
"logits/rejected": -1.3159658908843994,
"logps/chosen": -157.1051788330078,
"logps/rejected": -93.73989868164062,
"loss": 4.247,
"rewards/accuracies": 0.25,
"rewards/chosen": 1.4054617881774902,
"rewards/margins": -1.071337103843689,
"rewards/rejected": 2.4767990112304688,
"step": 19
},
{
"epoch": 0.04,
"grad_norm": 0.07625256478786469,
"learning_rate": 1.6666666666666667e-05,
"logits/chosen": -1.2797192335128784,
"logits/rejected": -0.41436293721199036,
"logps/chosen": -233.0648956298828,
"logps/rejected": -280.9295959472656,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": 14.561626434326172,
"rewards/margins": 24.783857345581055,
"rewards/rejected": -10.222232818603516,
"step": 20
},
{
"epoch": 0.042,
"grad_norm": 0.0006307783187367022,
"learning_rate": 1.5e-05,
"logits/chosen": -1.200671672821045,
"logits/rejected": -0.8377301692962646,
"logps/chosen": -181.38812255859375,
"logps/rejected": -400.5192565917969,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 20.238813400268555,
"rewards/margins": 29.048757553100586,
"rewards/rejected": -8.809943199157715,
"step": 21
},
{
"epoch": 0.044,
"grad_norm": 3.4681459510466084e-05,
"learning_rate": 1.3333333333333333e-05,
"logits/chosen": -1.0809839963912964,
"logits/rejected": -1.0748283863067627,
"logps/chosen": -210.24159240722656,
"logps/rejected": -478.85113525390625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 14.9466552734375,
"rewards/margins": 30.102947235107422,
"rewards/rejected": -15.156291007995605,
"step": 22
},
{
"epoch": 0.046,
"grad_norm": 1.9936389435315505e-05,
"learning_rate": 1.1666666666666668e-05,
"logits/chosen": -1.1689856052398682,
"logits/rejected": -0.40747758746147156,
"logps/chosen": -278.3692932128906,
"logps/rejected": -260.0756530761719,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 22.959720611572266,
"rewards/margins": 28.647663116455078,
"rewards/rejected": -5.687943458557129,
"step": 23
},
{
"epoch": 0.048,
"grad_norm": 2.3284033886739053e-05,
"learning_rate": 1e-05,
"logits/chosen": -0.9488776922225952,
"logits/rejected": -0.9771822094917297,
"logps/chosen": -107.5537109375,
"logps/rejected": -420.6499328613281,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 13.997261047363281,
"rewards/margins": 30.08607292175293,
"rewards/rejected": -16.08881187438965,
"step": 24
},
{
"epoch": 0.05,
"grad_norm": 0.00019201346731279045,
"learning_rate": 8.333333333333334e-06,
"logits/chosen": -1.0477547645568848,
"logits/rejected": -0.8248160481452942,
"logps/chosen": -200.435546875,
"logps/rejected": -410.39874267578125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 12.744895935058594,
"rewards/margins": 28.151798248291016,
"rewards/rejected": -15.406902313232422,
"step": 25
},
{
"epoch": 0.052,
"grad_norm": 9.264203981729224e-06,
"learning_rate": 6.666666666666667e-06,
"logits/chosen": -0.795575737953186,
"logits/rejected": -0.8969402313232422,
"logps/chosen": -187.31686401367188,
"logps/rejected": -461.4927673339844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 17.49878692626953,
"rewards/margins": 30.602787017822266,
"rewards/rejected": -13.10400104522705,
"step": 26
},
{
"epoch": 0.054,
"grad_norm": 0.0012664368841797113,
"learning_rate": 5e-06,
"logits/chosen": -1.146821141242981,
"logits/rejected": -1.307145118713379,
"logps/chosen": -82.47093963623047,
"logps/rejected": -296.2177734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.118415832519531,
"rewards/margins": 22.095069885253906,
"rewards/rejected": -11.976654052734375,
"step": 27
},
{
"epoch": 0.056,
"grad_norm": 24.4777889251709,
"learning_rate": 3.3333333333333333e-06,
"logits/chosen": -1.1492491960525513,
"logits/rejected": -0.9397568702697754,
"logps/chosen": -207.41566467285156,
"logps/rejected": -408.0826110839844,
"loss": 0.219,
"rewards/accuracies": 0.75,
"rewards/chosen": 14.095235824584961,
"rewards/margins": 25.648344039916992,
"rewards/rejected": -11.553108215332031,
"step": 28
},
{
"epoch": 0.058,
"grad_norm": 0.1801503300666809,
"learning_rate": 1.6666666666666667e-06,
"logits/chosen": -1.2135852575302124,
"logits/rejected": -0.22595801949501038,
"logps/chosen": -190.64984130859375,
"logps/rejected": -219.69593811035156,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.875904083251953,
"rewards/margins": 18.048542022705078,
"rewards/rejected": -8.172636985778809,
"step": 29
},
{
"epoch": 0.06,
"grad_norm": 0.11778837442398071,
"learning_rate": 0.0,
"logits/chosen": -0.8404010534286499,
"logits/rejected": -0.7674828171730042,
"logps/chosen": -144.8819580078125,
"logps/rejected": -553.426513671875,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": 18.967082977294922,
"rewards/margins": 35.870948791503906,
"rewards/rejected": -16.903865814208984,
"step": 30
}
],
"logging_steps": 1,
"max_steps": 30,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}