ewre324's picture
Upload folder using huggingface_hub
92b39f0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.12040939193257075,
"eval_steps": 200,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0012040939193257074,
"grad_norm": 4.470886707305908,
"learning_rate": 1.9975915221579964e-05,
"loss": 2.6688,
"step": 5
},
{
"epoch": 0.002408187838651415,
"grad_norm": 4.067092418670654,
"learning_rate": 1.9951830443159926e-05,
"loss": 2.2509,
"step": 10
},
{
"epoch": 0.003612281757977122,
"grad_norm": 4.133108615875244,
"learning_rate": 1.9927745664739885e-05,
"loss": 2.3732,
"step": 15
},
{
"epoch": 0.00481637567730283,
"grad_norm": 3.4890763759613037,
"learning_rate": 1.9903660886319848e-05,
"loss": 2.3632,
"step": 20
},
{
"epoch": 0.006020469596628537,
"grad_norm": 4.1045308113098145,
"learning_rate": 1.987957610789981e-05,
"loss": 2.5203,
"step": 25
},
{
"epoch": 0.007224563515954244,
"grad_norm": 4.26784086227417,
"learning_rate": 1.985549132947977e-05,
"loss": 2.3349,
"step": 30
},
{
"epoch": 0.008428657435279952,
"grad_norm": 4.144766330718994,
"learning_rate": 1.983140655105973e-05,
"loss": 2.4106,
"step": 35
},
{
"epoch": 0.00963275135460566,
"grad_norm": 3.9538934230804443,
"learning_rate": 1.9807321772639694e-05,
"loss": 2.3192,
"step": 40
},
{
"epoch": 0.010836845273931367,
"grad_norm": 3.9219865798950195,
"learning_rate": 1.9783236994219656e-05,
"loss": 2.2012,
"step": 45
},
{
"epoch": 0.012040939193257074,
"grad_norm": 3.391493797302246,
"learning_rate": 1.9759152215799615e-05,
"loss": 2.4064,
"step": 50
},
{
"epoch": 0.013245033112582781,
"grad_norm": 4.393350124359131,
"learning_rate": 1.9735067437379577e-05,
"loss": 2.1513,
"step": 55
},
{
"epoch": 0.014449127031908489,
"grad_norm": 3.6243207454681396,
"learning_rate": 1.971098265895954e-05,
"loss": 2.2788,
"step": 60
},
{
"epoch": 0.015653220951234198,
"grad_norm": 3.642468214035034,
"learning_rate": 1.9686897880539502e-05,
"loss": 2.2244,
"step": 65
},
{
"epoch": 0.016857314870559904,
"grad_norm": 4.0894670486450195,
"learning_rate": 1.966281310211946e-05,
"loss": 2.3622,
"step": 70
},
{
"epoch": 0.018061408789885613,
"grad_norm": 4.033819198608398,
"learning_rate": 1.9638728323699423e-05,
"loss": 2.3803,
"step": 75
},
{
"epoch": 0.01926550270921132,
"grad_norm": 3.535987138748169,
"learning_rate": 1.9614643545279385e-05,
"loss": 2.2161,
"step": 80
},
{
"epoch": 0.020469596628537028,
"grad_norm": 3.541868209838867,
"learning_rate": 1.9590558766859348e-05,
"loss": 2.1721,
"step": 85
},
{
"epoch": 0.021673690547862733,
"grad_norm": 4.147072792053223,
"learning_rate": 1.9566473988439307e-05,
"loss": 2.3239,
"step": 90
},
{
"epoch": 0.022877784467188442,
"grad_norm": 3.4949986934661865,
"learning_rate": 1.954238921001927e-05,
"loss": 2.265,
"step": 95
},
{
"epoch": 0.024081878386514148,
"grad_norm": 3.793950319290161,
"learning_rate": 1.951830443159923e-05,
"loss": 2.152,
"step": 100
},
{
"epoch": 0.025285972305839857,
"grad_norm": 3.9355053901672363,
"learning_rate": 1.949421965317919e-05,
"loss": 2.2534,
"step": 105
},
{
"epoch": 0.026490066225165563,
"grad_norm": 3.255175828933716,
"learning_rate": 1.9470134874759156e-05,
"loss": 2.2971,
"step": 110
},
{
"epoch": 0.027694160144491272,
"grad_norm": 3.650298595428467,
"learning_rate": 1.9446050096339115e-05,
"loss": 2.1228,
"step": 115
},
{
"epoch": 0.028898254063816978,
"grad_norm": 3.1906814575195312,
"learning_rate": 1.9421965317919077e-05,
"loss": 2.0995,
"step": 120
},
{
"epoch": 0.030102347983142687,
"grad_norm": 3.8122494220733643,
"learning_rate": 1.939788053949904e-05,
"loss": 2.1651,
"step": 125
},
{
"epoch": 0.031306441902468396,
"grad_norm": 3.8269336223602295,
"learning_rate": 1.9373795761079e-05,
"loss": 2.1731,
"step": 130
},
{
"epoch": 0.0325105358217941,
"grad_norm": 3.75238037109375,
"learning_rate": 1.934971098265896e-05,
"loss": 2.3071,
"step": 135
},
{
"epoch": 0.03371462974111981,
"grad_norm": 3.538330078125,
"learning_rate": 1.9325626204238923e-05,
"loss": 2.3015,
"step": 140
},
{
"epoch": 0.034918723660445516,
"grad_norm": 3.497131586074829,
"learning_rate": 1.9301541425818882e-05,
"loss": 2.128,
"step": 145
},
{
"epoch": 0.036122817579771226,
"grad_norm": 3.6173276901245117,
"learning_rate": 1.9277456647398845e-05,
"loss": 2.1792,
"step": 150
},
{
"epoch": 0.03732691149909693,
"grad_norm": 3.2987892627716064,
"learning_rate": 1.9253371868978807e-05,
"loss": 2.0246,
"step": 155
},
{
"epoch": 0.03853100541842264,
"grad_norm": 3.1787831783294678,
"learning_rate": 1.922928709055877e-05,
"loss": 2.2108,
"step": 160
},
{
"epoch": 0.039735099337748346,
"grad_norm": 3.5422236919403076,
"learning_rate": 1.920520231213873e-05,
"loss": 2.1738,
"step": 165
},
{
"epoch": 0.040939193257074055,
"grad_norm": 3.7987539768218994,
"learning_rate": 1.918111753371869e-05,
"loss": 2.1161,
"step": 170
},
{
"epoch": 0.04214328717639976,
"grad_norm": 3.2058522701263428,
"learning_rate": 1.9157032755298653e-05,
"loss": 2.0808,
"step": 175
},
{
"epoch": 0.04334738109572547,
"grad_norm": 3.00519061088562,
"learning_rate": 1.9132947976878615e-05,
"loss": 2.1412,
"step": 180
},
{
"epoch": 0.044551475015051176,
"grad_norm": 3.4471330642700195,
"learning_rate": 1.9108863198458578e-05,
"loss": 2.1695,
"step": 185
},
{
"epoch": 0.045755568934376885,
"grad_norm": 3.394496440887451,
"learning_rate": 1.9084778420038536e-05,
"loss": 1.9532,
"step": 190
},
{
"epoch": 0.04695966285370259,
"grad_norm": 3.03004789352417,
"learning_rate": 1.90606936416185e-05,
"loss": 2.0659,
"step": 195
},
{
"epoch": 0.048163756773028296,
"grad_norm": 3.4260365962982178,
"learning_rate": 1.903660886319846e-05,
"loss": 2.0792,
"step": 200
},
{
"epoch": 0.048163756773028296,
"eval_loss": 2.1430513858795166,
"eval_runtime": 16.4051,
"eval_samples_per_second": 6.096,
"eval_steps_per_second": 0.792,
"step": 200
},
{
"epoch": 0.049367850692354005,
"grad_norm": 4.670680999755859,
"learning_rate": 1.901252408477842e-05,
"loss": 2.0952,
"step": 205
},
{
"epoch": 0.050571944611679714,
"grad_norm": 3.510042667388916,
"learning_rate": 1.8988439306358382e-05,
"loss": 2.195,
"step": 210
},
{
"epoch": 0.05177603853100542,
"grad_norm": 3.0459847450256348,
"learning_rate": 1.8964354527938345e-05,
"loss": 2.2117,
"step": 215
},
{
"epoch": 0.052980132450331126,
"grad_norm": 4.36016321182251,
"learning_rate": 1.8940269749518304e-05,
"loss": 2.1191,
"step": 220
},
{
"epoch": 0.054184226369656835,
"grad_norm": 3.0498242378234863,
"learning_rate": 1.891618497109827e-05,
"loss": 2.0838,
"step": 225
},
{
"epoch": 0.055388320288982544,
"grad_norm": 3.218038558959961,
"learning_rate": 1.889210019267823e-05,
"loss": 2.1118,
"step": 230
},
{
"epoch": 0.056592414208308246,
"grad_norm": 3.3144683837890625,
"learning_rate": 1.886801541425819e-05,
"loss": 2.2176,
"step": 235
},
{
"epoch": 0.057796508127633955,
"grad_norm": 3.2364652156829834,
"learning_rate": 1.8843930635838153e-05,
"loss": 2.112,
"step": 240
},
{
"epoch": 0.059000602046959665,
"grad_norm": 3.291278839111328,
"learning_rate": 1.8819845857418112e-05,
"loss": 2.144,
"step": 245
},
{
"epoch": 0.060204695966285374,
"grad_norm": 3.65297794342041,
"learning_rate": 1.8795761078998074e-05,
"loss": 2.2597,
"step": 250
},
{
"epoch": 0.061408789885611076,
"grad_norm": 3.2321982383728027,
"learning_rate": 1.8771676300578037e-05,
"loss": 2.1618,
"step": 255
},
{
"epoch": 0.06261288380493679,
"grad_norm": 3.352842330932617,
"learning_rate": 1.8747591522158e-05,
"loss": 2.006,
"step": 260
},
{
"epoch": 0.0638169777242625,
"grad_norm": 3.5657215118408203,
"learning_rate": 1.8723506743737958e-05,
"loss": 2.2253,
"step": 265
},
{
"epoch": 0.0650210716435882,
"grad_norm": 3.060060739517212,
"learning_rate": 1.869942196531792e-05,
"loss": 2.1187,
"step": 270
},
{
"epoch": 0.06622516556291391,
"grad_norm": 3.473719835281372,
"learning_rate": 1.8675337186897883e-05,
"loss": 2.0299,
"step": 275
},
{
"epoch": 0.06742925948223961,
"grad_norm": 3.1167919635772705,
"learning_rate": 1.8651252408477845e-05,
"loss": 2.0381,
"step": 280
},
{
"epoch": 0.06863335340156532,
"grad_norm": 3.815816640853882,
"learning_rate": 1.8627167630057804e-05,
"loss": 2.1624,
"step": 285
},
{
"epoch": 0.06983744732089103,
"grad_norm": 3.2820959091186523,
"learning_rate": 1.8603082851637766e-05,
"loss": 2.0819,
"step": 290
},
{
"epoch": 0.07104154124021674,
"grad_norm": 3.568885087966919,
"learning_rate": 1.857899807321773e-05,
"loss": 2.0749,
"step": 295
},
{
"epoch": 0.07224563515954245,
"grad_norm": 3.424076795578003,
"learning_rate": 1.855491329479769e-05,
"loss": 2.129,
"step": 300
},
{
"epoch": 0.07344972907886815,
"grad_norm": 3.2800493240356445,
"learning_rate": 1.853082851637765e-05,
"loss": 2.2067,
"step": 305
},
{
"epoch": 0.07465382299819386,
"grad_norm": 3.487868547439575,
"learning_rate": 1.8506743737957612e-05,
"loss": 2.124,
"step": 310
},
{
"epoch": 0.07585791691751957,
"grad_norm": 3.3999245166778564,
"learning_rate": 1.8482658959537575e-05,
"loss": 1.9888,
"step": 315
},
{
"epoch": 0.07706201083684527,
"grad_norm": 3.973482370376587,
"learning_rate": 1.8458574181117533e-05,
"loss": 2.0592,
"step": 320
},
{
"epoch": 0.07826610475617098,
"grad_norm": 2.9601657390594482,
"learning_rate": 1.8434489402697496e-05,
"loss": 2.1022,
"step": 325
},
{
"epoch": 0.07947019867549669,
"grad_norm": 3.260118246078491,
"learning_rate": 1.8410404624277458e-05,
"loss": 1.9763,
"step": 330
},
{
"epoch": 0.0806742925948224,
"grad_norm": 3.509838819503784,
"learning_rate": 1.838631984585742e-05,
"loss": 2.0284,
"step": 335
},
{
"epoch": 0.08187838651414811,
"grad_norm": 4.363494396209717,
"learning_rate": 1.8362235067437383e-05,
"loss": 2.0479,
"step": 340
},
{
"epoch": 0.08308248043347381,
"grad_norm": 3.2578630447387695,
"learning_rate": 1.8338150289017342e-05,
"loss": 2.0488,
"step": 345
},
{
"epoch": 0.08428657435279951,
"grad_norm": 3.2846531867980957,
"learning_rate": 1.8314065510597304e-05,
"loss": 2.0876,
"step": 350
},
{
"epoch": 0.08549066827212523,
"grad_norm": 3.3275203704833984,
"learning_rate": 1.8289980732177266e-05,
"loss": 2.0564,
"step": 355
},
{
"epoch": 0.08669476219145093,
"grad_norm": 3.1368625164031982,
"learning_rate": 1.8265895953757225e-05,
"loss": 2.1533,
"step": 360
},
{
"epoch": 0.08789885611077664,
"grad_norm": 3.3824191093444824,
"learning_rate": 1.8241811175337188e-05,
"loss": 2.1821,
"step": 365
},
{
"epoch": 0.08910295003010235,
"grad_norm": 3.5150134563446045,
"learning_rate": 1.821772639691715e-05,
"loss": 2.0292,
"step": 370
},
{
"epoch": 0.09030704394942805,
"grad_norm": 3.421921730041504,
"learning_rate": 1.8193641618497112e-05,
"loss": 1.9862,
"step": 375
},
{
"epoch": 0.09151113786875377,
"grad_norm": 3.616887092590332,
"learning_rate": 1.8169556840077075e-05,
"loss": 2.0158,
"step": 380
},
{
"epoch": 0.09271523178807947,
"grad_norm": 5.063056945800781,
"learning_rate": 1.8145472061657034e-05,
"loss": 2.0579,
"step": 385
},
{
"epoch": 0.09391932570740517,
"grad_norm": 3.5242559909820557,
"learning_rate": 1.8121387283236996e-05,
"loss": 2.0272,
"step": 390
},
{
"epoch": 0.09512341962673089,
"grad_norm": 3.2852962017059326,
"learning_rate": 1.809730250481696e-05,
"loss": 2.077,
"step": 395
},
{
"epoch": 0.09632751354605659,
"grad_norm": 3.710927963256836,
"learning_rate": 1.8073217726396917e-05,
"loss": 2.1271,
"step": 400
},
{
"epoch": 0.09632751354605659,
"eval_loss": 2.0655810832977295,
"eval_runtime": 16.3755,
"eval_samples_per_second": 6.107,
"eval_steps_per_second": 0.794,
"step": 400
},
{
"epoch": 0.0975316074653823,
"grad_norm": 3.5019216537475586,
"learning_rate": 1.804913294797688e-05,
"loss": 2.1081,
"step": 405
},
{
"epoch": 0.09873570138470801,
"grad_norm": 3.5533690452575684,
"learning_rate": 1.8025048169556842e-05,
"loss": 2.0751,
"step": 410
},
{
"epoch": 0.09993979530403371,
"grad_norm": 3.4970240592956543,
"learning_rate": 1.8000963391136804e-05,
"loss": 2.066,
"step": 415
},
{
"epoch": 0.10114388922335943,
"grad_norm": 3.0926427841186523,
"learning_rate": 1.7976878612716763e-05,
"loss": 2.0516,
"step": 420
},
{
"epoch": 0.10234798314268513,
"grad_norm": 3.747452974319458,
"learning_rate": 1.7952793834296726e-05,
"loss": 2.0721,
"step": 425
},
{
"epoch": 0.10355207706201083,
"grad_norm": 3.3113677501678467,
"learning_rate": 1.7928709055876688e-05,
"loss": 2.1527,
"step": 430
},
{
"epoch": 0.10475617098133655,
"grad_norm": 3.357912063598633,
"learning_rate": 1.7904624277456647e-05,
"loss": 2.0113,
"step": 435
},
{
"epoch": 0.10596026490066225,
"grad_norm": 3.023893356323242,
"learning_rate": 1.7880539499036613e-05,
"loss": 2.1332,
"step": 440
},
{
"epoch": 0.10716435881998795,
"grad_norm": 3.3027355670928955,
"learning_rate": 1.785645472061657e-05,
"loss": 1.9699,
"step": 445
},
{
"epoch": 0.10836845273931367,
"grad_norm": 5.3524932861328125,
"learning_rate": 1.7832369942196534e-05,
"loss": 2.0182,
"step": 450
},
{
"epoch": 0.10957254665863937,
"grad_norm": 3.200258731842041,
"learning_rate": 1.7808285163776496e-05,
"loss": 2.007,
"step": 455
},
{
"epoch": 0.11077664057796509,
"grad_norm": 3.286268949508667,
"learning_rate": 1.7784200385356455e-05,
"loss": 2.0907,
"step": 460
},
{
"epoch": 0.11198073449729079,
"grad_norm": 3.15291428565979,
"learning_rate": 1.7760115606936417e-05,
"loss": 2.0468,
"step": 465
},
{
"epoch": 0.11318482841661649,
"grad_norm": 3.3798069953918457,
"learning_rate": 1.773603082851638e-05,
"loss": 1.9927,
"step": 470
},
{
"epoch": 0.11438892233594221,
"grad_norm": 3.4220967292785645,
"learning_rate": 1.771194605009634e-05,
"loss": 2.1326,
"step": 475
},
{
"epoch": 0.11559301625526791,
"grad_norm": 3.379628896713257,
"learning_rate": 1.76878612716763e-05,
"loss": 1.9202,
"step": 480
},
{
"epoch": 0.11679711017459361,
"grad_norm": 3.3020846843719482,
"learning_rate": 1.7663776493256263e-05,
"loss": 2.1176,
"step": 485
},
{
"epoch": 0.11800120409391933,
"grad_norm": 3.2711665630340576,
"learning_rate": 1.7639691714836226e-05,
"loss": 2.0865,
"step": 490
},
{
"epoch": 0.11920529801324503,
"grad_norm": 3.239253520965576,
"learning_rate": 1.7615606936416188e-05,
"loss": 1.9284,
"step": 495
},
{
"epoch": 0.12040939193257075,
"grad_norm": 3.4960460662841797,
"learning_rate": 1.7591522157996147e-05,
"loss": 2.0088,
"step": 500
}
],
"logging_steps": 5,
"max_steps": 4152,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 529407992473776.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}