peshwai-historian-ai / trainer_state.json
Devavrat28's picture
Upload folder using huggingface_hub
10d0346 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.4339419978517722,
"eval_steps": 500,
"global_step": 800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04296455424274973,
"grad_norm": 1.6397182941436768,
"learning_rate": 5e-06,
"loss": 3.4918,
"step": 10
},
{
"epoch": 0.08592910848549946,
"grad_norm": 1.1852494478225708,
"learning_rate": 4.998023493068255e-06,
"loss": 3.1439,
"step": 20
},
{
"epoch": 0.1288936627282492,
"grad_norm": 0.813017725944519,
"learning_rate": 4.99209709753674e-06,
"loss": 2.7821,
"step": 30
},
{
"epoch": 0.17185821697099893,
"grad_norm": 0.8324390053749084,
"learning_rate": 4.982230184254934e-06,
"loss": 2.5004,
"step": 40
},
{
"epoch": 0.21482277121374865,
"grad_norm": 0.7486415505409241,
"learning_rate": 4.968438354840834e-06,
"loss": 2.2932,
"step": 50
},
{
"epoch": 0.2577873254564984,
"grad_norm": 0.8176449537277222,
"learning_rate": 4.950743417011591e-06,
"loss": 2.0388,
"step": 60
},
{
"epoch": 0.3007518796992481,
"grad_norm": 1.1593029499053955,
"learning_rate": 4.929173350101025e-06,
"loss": 1.798,
"step": 70
},
{
"epoch": 0.34371643394199786,
"grad_norm": 0.8432386517524719,
"learning_rate": 4.903762260818552e-06,
"loss": 1.5069,
"step": 80
},
{
"epoch": 0.3866809881847476,
"grad_norm": 0.5978631973266602,
"learning_rate": 4.874550329319457e-06,
"loss": 1.2985,
"step": 90
},
{
"epoch": 0.4296455424274973,
"grad_norm": 0.43345287442207336,
"learning_rate": 4.84158374567182e-06,
"loss": 1.1899,
"step": 100
},
{
"epoch": 0.47261009667024706,
"grad_norm": 0.41255077719688416,
"learning_rate": 4.804914636820517e-06,
"loss": 1.207,
"step": 110
},
{
"epoch": 0.5155746509129968,
"grad_norm": 0.3531062602996826,
"learning_rate": 4.764600984163809e-06,
"loss": 1.097,
"step": 120
},
{
"epoch": 0.5585392051557465,
"grad_norm": 0.3125358819961548,
"learning_rate": 4.72070653187283e-06,
"loss": 1.0538,
"step": 130
},
{
"epoch": 0.6015037593984962,
"grad_norm": 0.3715002238750458,
"learning_rate": 4.673300686098957e-06,
"loss": 1.0238,
"step": 140
},
{
"epoch": 0.644468313641246,
"grad_norm": 0.3236304819583893,
"learning_rate": 4.622458405228411e-06,
"loss": 1.0329,
"step": 150
},
{
"epoch": 0.6874328678839957,
"grad_norm": 0.389879435300827,
"learning_rate": 4.568260081357644e-06,
"loss": 1.0452,
"step": 160
},
{
"epoch": 0.7303974221267454,
"grad_norm": 0.3259812593460083,
"learning_rate": 4.510791413176912e-06,
"loss": 1.0407,
"step": 170
},
{
"epoch": 0.7733619763694952,
"grad_norm": 0.3405851125717163,
"learning_rate": 4.450143270463031e-06,
"loss": 1.0219,
"step": 180
},
{
"epoch": 0.8163265306122449,
"grad_norm": 0.35685139894485474,
"learning_rate": 4.386411550395576e-06,
"loss": 0.9988,
"step": 190
},
{
"epoch": 0.8592910848549946,
"grad_norm": 0.3287046253681183,
"learning_rate": 4.319697025923736e-06,
"loss": 1.0164,
"step": 200
},
{
"epoch": 0.9022556390977443,
"grad_norm": 0.33687785267829895,
"learning_rate": 4.250105186423564e-06,
"loss": 0.9938,
"step": 210
},
{
"epoch": 0.9452201933404941,
"grad_norm": 0.40256965160369873,
"learning_rate": 4.177746070897593e-06,
"loss": 1.001,
"step": 220
},
{
"epoch": 0.9881847475832438,
"grad_norm": 0.48085835576057434,
"learning_rate": 4.10273409398055e-06,
"loss": 1.0145,
"step": 230
},
{
"epoch": 1.0300751879699248,
"grad_norm": 0.4048563241958618,
"learning_rate": 4.025187865026311e-06,
"loss": 0.9861,
"step": 240
},
{
"epoch": 1.0730397422126745,
"grad_norm": 0.34213146567344666,
"learning_rate": 3.945230000562121e-06,
"loss": 0.9843,
"step": 250
},
{
"epoch": 1.1160042964554242,
"grad_norm": 0.3930899500846863,
"learning_rate": 3.862986930406669e-06,
"loss": 0.9539,
"step": 260
},
{
"epoch": 1.158968850698174,
"grad_norm": 0.41636350750923157,
"learning_rate": 3.7785886977585562e-06,
"loss": 0.9629,
"step": 270
},
{
"epoch": 1.2019334049409238,
"grad_norm": 0.3695228397846222,
"learning_rate": 3.6921687535712657e-06,
"loss": 0.9778,
"step": 280
},
{
"epoch": 1.2448979591836735,
"grad_norm": 0.3623664677143097,
"learning_rate": 3.6038637455397802e-06,
"loss": 0.9613,
"step": 290
},
{
"epoch": 1.2878625134264232,
"grad_norm": 0.4789970815181732,
"learning_rate": 3.513813302032485e-06,
"loss": 0.9671,
"step": 300
},
{
"epoch": 1.330827067669173,
"grad_norm": 0.4214249849319458,
"learning_rate": 3.4221598113100196e-06,
"loss": 0.9597,
"step": 310
},
{
"epoch": 1.3737916219119226,
"grad_norm": 0.4314541518688202,
"learning_rate": 3.32904819638017e-06,
"loss": 0.9872,
"step": 320
},
{
"epoch": 1.4167561761546725,
"grad_norm": 0.45763522386550903,
"learning_rate": 3.234625685844803e-06,
"loss": 1.006,
"step": 330
},
{
"epoch": 1.459720730397422,
"grad_norm": 0.41263076663017273,
"learning_rate": 3.139041581101187e-06,
"loss": 0.973,
"step": 340
},
{
"epoch": 1.502685284640172,
"grad_norm": 0.5277674198150635,
"learning_rate": 3.0424470202657953e-06,
"loss": 0.9525,
"step": 350
},
{
"epoch": 1.5456498388829216,
"grad_norm": 0.5219724178314209,
"learning_rate": 2.9449947391938768e-06,
"loss": 0.9516,
"step": 360
},
{
"epoch": 1.5886143931256713,
"grad_norm": 0.47409552335739136,
"learning_rate": 2.8468388299726714e-06,
"loss": 0.9599,
"step": 370
},
{
"epoch": 1.631578947368421,
"grad_norm": 0.4466871917247772,
"learning_rate": 2.7481344972701545e-06,
"loss": 0.952,
"step": 380
},
{
"epoch": 1.6745435016111707,
"grad_norm": 0.4826906621456146,
"learning_rate": 2.64903781292455e-06,
"loss": 0.9492,
"step": 390
},
{
"epoch": 1.7175080558539206,
"grad_norm": 0.4338424801826477,
"learning_rate": 2.5497054691626754e-06,
"loss": 0.9584,
"step": 400
},
{
"epoch": 1.76047261009667,
"grad_norm": 0.6052074432373047,
"learning_rate": 2.4502945308373246e-06,
"loss": 0.9446,
"step": 410
},
{
"epoch": 1.80343716433942,
"grad_norm": 0.5061154961585999,
"learning_rate": 2.3509621870754505e-06,
"loss": 0.9369,
"step": 420
},
{
"epoch": 1.8464017185821697,
"grad_norm": 0.47927358746528625,
"learning_rate": 2.2518655027298468e-06,
"loss": 1.0038,
"step": 430
},
{
"epoch": 1.8893662728249194,
"grad_norm": 0.4831957221031189,
"learning_rate": 2.15316117002733e-06,
"loss": 0.9501,
"step": 440
},
{
"epoch": 1.9323308270676691,
"grad_norm": 0.4502115249633789,
"learning_rate": 2.055005260806125e-06,
"loss": 0.9333,
"step": 450
},
{
"epoch": 1.9752953813104188,
"grad_norm": 0.6315404772758484,
"learning_rate": 1.957552979734205e-06,
"loss": 0.9279,
"step": 460
},
{
"epoch": 2.0171858216970997,
"grad_norm": 0.5396577715873718,
"learning_rate": 1.8609584188988135e-06,
"loss": 0.9254,
"step": 470
},
{
"epoch": 2.0601503759398496,
"grad_norm": 0.49860578775405884,
"learning_rate": 1.7653743141551983e-06,
"loss": 0.9307,
"step": 480
},
{
"epoch": 2.1031149301825995,
"grad_norm": 0.4839128255844116,
"learning_rate": 1.6709518036198307e-06,
"loss": 0.9491,
"step": 490
},
{
"epoch": 2.146079484425349,
"grad_norm": 0.4922753572463989,
"learning_rate": 1.5778401886899808e-06,
"loss": 0.9209,
"step": 500
},
{
"epoch": 2.189044038668099,
"grad_norm": 0.6326885223388672,
"learning_rate": 1.4861866979675155e-06,
"loss": 0.9163,
"step": 510
},
{
"epoch": 2.2320085929108484,
"grad_norm": 0.5038188099861145,
"learning_rate": 1.3961362544602215e-06,
"loss": 0.934,
"step": 520
},
{
"epoch": 2.2749731471535983,
"grad_norm": 0.5759682059288025,
"learning_rate": 1.3078312464287355e-06,
"loss": 0.9335,
"step": 530
},
{
"epoch": 2.317937701396348,
"grad_norm": 0.5074816942214966,
"learning_rate": 1.2214113022414448e-06,
"loss": 0.9194,
"step": 540
},
{
"epoch": 2.3609022556390977,
"grad_norm": 0.5167147517204285,
"learning_rate": 1.1370130695933317e-06,
"loss": 0.9442,
"step": 550
},
{
"epoch": 2.4038668098818476,
"grad_norm": 0.4864475131034851,
"learning_rate": 1.0547699994378787e-06,
"loss": 0.9248,
"step": 560
},
{
"epoch": 2.446831364124597,
"grad_norm": 0.5640864968299866,
"learning_rate": 9.74812134973689e-07,
"loss": 0.9063,
"step": 570
},
{
"epoch": 2.489795918367347,
"grad_norm": 0.5532556772232056,
"learning_rate": 8.972659060194505e-07,
"loss": 0.9045,
"step": 580
},
{
"epoch": 2.5327604726100965,
"grad_norm": 0.539094090461731,
"learning_rate": 8.222539291024079e-07,
"loss": 0.9115,
"step": 590
},
{
"epoch": 2.5757250268528464,
"grad_norm": 0.5470451712608337,
"learning_rate": 7.49894813576437e-07,
"loss": 0.9261,
"step": 600
},
{
"epoch": 2.6186895810955964,
"grad_norm": 0.6289984583854675,
"learning_rate": 6.803029740762648e-07,
"loss": 0.9183,
"step": 610
},
{
"epoch": 2.661654135338346,
"grad_norm": 0.6060165166854858,
"learning_rate": 6.135884496044245e-07,
"loss": 0.9308,
"step": 620
},
{
"epoch": 2.7046186895810957,
"grad_norm": 0.5452494025230408,
"learning_rate": 5.4985672953697e-07,
"loss": 0.9702,
"step": 630
},
{
"epoch": 2.7475832438238452,
"grad_norm": 0.577720582485199,
"learning_rate": 4.892085868230881e-07,
"loss": 0.9176,
"step": 640
},
{
"epoch": 2.790547798066595,
"grad_norm": 0.5641165375709534,
"learning_rate": 4.317399186423574e-07,
"loss": 0.9452,
"step": 650
},
{
"epoch": 2.833512352309345,
"grad_norm": 0.5322738289833069,
"learning_rate": 3.7754159477158994e-07,
"loss": 0.9271,
"step": 660
},
{
"epoch": 2.8764769065520945,
"grad_norm": 0.6401134133338928,
"learning_rate": 3.266993139010438e-07,
"loss": 0.9151,
"step": 670
},
{
"epoch": 2.919441460794844,
"grad_norm": 0.653149425983429,
"learning_rate": 2.792934681271708e-07,
"loss": 0.8951,
"step": 680
},
{
"epoch": 2.962406015037594,
"grad_norm": 0.5724528431892395,
"learning_rate": 2.3539901583619186e-07,
"loss": 0.9102,
"step": 690
},
{
"epoch": 3.004296455424275,
"grad_norm": 0.5610048174858093,
"learning_rate": 1.9508536317948358e-07,
"loss": 0.8989,
"step": 700
},
{
"epoch": 3.0472610096670247,
"grad_norm": 0.6965809464454651,
"learning_rate": 1.584162543281806e-07,
"loss": 0.9079,
"step": 710
},
{
"epoch": 3.090225563909774,
"grad_norm": 0.594599723815918,
"learning_rate": 1.2544967068054332e-07,
"loss": 0.9214,
"step": 720
},
{
"epoch": 3.133190118152524,
"grad_norm": 0.5906569361686707,
"learning_rate": 9.623773918144896e-08,
"loss": 0.9054,
"step": 730
},
{
"epoch": 3.176154672395274,
"grad_norm": 0.5923225283622742,
"learning_rate": 7.082664989897486e-08,
"loss": 0.9305,
"step": 740
},
{
"epoch": 3.2191192266380235,
"grad_norm": 0.5577116012573242,
"learning_rate": 4.9256582988409795e-08,
"loss": 0.9231,
"step": 750
},
{
"epoch": 3.2620837808807734,
"grad_norm": 0.5889368057250977,
"learning_rate": 3.15616451591666e-08,
"loss": 0.9072,
"step": 760
},
{
"epoch": 3.305048335123523,
"grad_norm": 0.5589332580566406,
"learning_rate": 1.7769815745066476e-08,
"loss": 0.9138,
"step": 770
},
{
"epoch": 3.348012889366273,
"grad_norm": 0.646337628364563,
"learning_rate": 7.90290246326042e-09,
"loss": 0.9175,
"step": 780
},
{
"epoch": 3.3909774436090228,
"grad_norm": 0.6335355043411255,
"learning_rate": 1.976506931745392e-09,
"loss": 0.9135,
"step": 790
},
{
"epoch": 3.4339419978517722,
"grad_norm": 0.7010214924812317,
"learning_rate": 0.0,
"loss": 0.9279,
"step": 800
}
],
"logging_steps": 10,
"max_steps": 800,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.9266685807744512e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}