flip_A0-mbzau90mqt / trainer_state.json

Upload trainer_state.json with huggingface_hub

373ec7a verified about 2 months ago

27.6 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 4.993485342019544,
	"eval_steps": 500,
	"global_step": 1533,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.03257328990228013,
	"grad_norm": 10.128602027893066,
	"learning_rate": 2.5974025974025972e-05,
	"loss": 1.7762,
	"step": 10
	},
	{
	"epoch": 0.06514657980456026,
	"grad_norm": 11.557284355163574,
	"learning_rate": 5.1948051948051944e-05,
	"loss": 0.5874,
	"step": 20
	},
	{
	"epoch": 0.09771986970684039,
	"grad_norm": 1.7021989822387695,
	"learning_rate": 7.792207792207793e-05,
	"loss": 0.2361,
	"step": 30
	},
	{
	"epoch": 0.13029315960912052,
	"grad_norm": 2.088670015335083,
	"learning_rate": 0.00010389610389610389,
	"loss": 0.176,
	"step": 40
	},
	{
	"epoch": 0.16286644951140064,
	"grad_norm": 2.070906400680542,
	"learning_rate": 0.00012987012987012987,
	"loss": 0.1546,
	"step": 50
	},
	{
	"epoch": 0.19543973941368079,
	"grad_norm": 1.9704210758209229,
	"learning_rate": 0.00015584415584415587,
	"loss": 0.1463,
	"step": 60
	},
	{
	"epoch": 0.2280130293159609,
	"grad_norm": 1.2319754362106323,
	"learning_rate": 0.00018181818181818183,
	"loss": 0.1306,
	"step": 70
	},
	{
	"epoch": 0.26058631921824105,
	"grad_norm": 1.4384595155715942,
	"learning_rate": 0.0001999979049808708,
	"loss": 0.1218,
	"step": 80
	},
	{
	"epoch": 0.2931596091205212,
	"grad_norm": 1.3861066102981567,
	"learning_rate": 0.00019996066263830531,
	"loss": 0.1031,
	"step": 90
	},
	{
	"epoch": 0.3257328990228013,
	"grad_norm": 1.3069312572479248,
	"learning_rate": 0.00019987688427197897,
	"loss": 0.1125,
	"step": 100
	},
	{
	"epoch": 0.3583061889250814,
	"grad_norm": 1.1740531921386719,
	"learning_rate": 0.0001997466088843548,
	"loss": 0.0984,
	"step": 110
	},
	{
	"epoch": 0.39087947882736157,
	"grad_norm": 0.831253170967102,
	"learning_rate": 0.00019956989712427577,
	"loss": 0.1013,
	"step": 120
	},
	{
	"epoch": 0.4234527687296417,
	"grad_norm": 1.1512802839279175,
	"learning_rate": 0.0001993468312587303,
	"loss": 0.0827,
	"step": 130
	},
	{
	"epoch": 0.4560260586319218,
	"grad_norm": 0.7890095710754395,
	"learning_rate": 0.00019907751513455302,
	"loss": 0.0747,
	"step": 140
	},
	{
	"epoch": 0.48859934853420195,
	"grad_norm": 0.8635206818580627,
	"learning_rate": 0.00019876207413008015,
	"loss": 0.0911,
	"step": 150
	},
	{
	"epoch": 0.5211726384364821,
	"grad_norm": 0.984118640422821,
	"learning_rate": 0.00019840065509677988,
	"loss": 0.0708,
	"step": 160
	},
	{
	"epoch": 0.5537459283387622,
	"grad_norm": 1.1899775266647339,
	"learning_rate": 0.00019799342629088702,
	"loss": 0.0759,
	"step": 170
	},
	{
	"epoch": 0.5863192182410424,
	"grad_norm": 0.8736202120780945,
	"learning_rate": 0.00019754057729507227,
	"loss": 0.0747,
	"step": 180
	},
	{
	"epoch": 0.6188925081433225,
	"grad_norm": 0.6165608763694763,
	"learning_rate": 0.0001970423189301833,
	"loss": 0.0697,
	"step": 190
	},
	{
	"epoch": 0.6514657980456026,
	"grad_norm": 1.2145845890045166,
	"learning_rate": 0.00019649888315709843,
	"loss": 0.0768,
	"step": 200
	},
	{
	"epoch": 0.6840390879478827,
	"grad_norm": 0.7146447896957397,
	"learning_rate": 0.00019591052296873888,
	"loss": 0.0575,
	"step": 210
	},
	{
	"epoch": 0.7166123778501629,
	"grad_norm": 1.0241068601608276,
	"learning_rate": 0.00019527751227228963,
	"loss": 0.0688,
	"step": 220
	},
	{
	"epoch": 0.749185667752443,
	"grad_norm": 0.8579204678535461,
	"learning_rate": 0.00019460014576168358,
	"loss": 0.0544,
	"step": 230
	},
	{
	"epoch": 0.7817589576547231,
	"grad_norm": 0.5284867286682129,
	"learning_rate": 0.0001938787387804088,
	"loss": 0.0516,
	"step": 240
	},
	{
	"epoch": 0.8143322475570033,
	"grad_norm": 0.6213068962097168,
	"learning_rate": 0.00019311362717470268,
	"loss": 0.0501,
	"step": 250
	},
	{
	"epoch": 0.8469055374592834,
	"grad_norm": 0.7320097088813782,
	"learning_rate": 0.00019230516713720052,
	"loss": 0.0581,
	"step": 260
	},
	{
	"epoch": 0.8794788273615635,
	"grad_norm": 0.36288219690322876,
	"learning_rate": 0.00019145373504111279,
	"loss": 0.0478,
	"step": 270
	},
	{
	"epoch": 0.9120521172638436,
	"grad_norm": 0.6096987724304199,
	"learning_rate": 0.00019055972726500695,
	"loss": 0.0548,
	"step": 280
	},
	{
	"epoch": 0.9446254071661238,
	"grad_norm": 0.7391396760940552,
	"learning_rate": 0.0001896235600082759,
	"loss": 0.0444,
	"step": 290
	},
	{
	"epoch": 0.9771986970684039,
	"grad_norm": 0.7820376753807068,
	"learning_rate": 0.00018864566909737937,
	"loss": 0.0566,
	"step": 300
	},
	{
	"epoch": 1.009771986970684,
	"grad_norm": 0.3828499913215637,
	"learning_rate": 0.00018762650978294758,
	"loss": 0.0533,
	"step": 310
	},
	{
	"epoch": 1.0423452768729642,
	"grad_norm": 0.49720415472984314,
	"learning_rate": 0.0001865665565278424,
	"loss": 0.0582,
	"step": 320
	},
	{
	"epoch": 1.0749185667752443,
	"grad_norm": 0.4968441128730774,
	"learning_rate": 0.00018546630278627437,
	"loss": 0.0473,
	"step": 330
	},
	{
	"epoch": 1.1074918566775245,
	"grad_norm": 0.812972366809845,
	"learning_rate": 0.00018432626077407829,
	"loss": 0.0382,
	"step": 340
	},
	{
	"epoch": 1.1400651465798046,
	"grad_norm": 0.4055006802082062,
	"learning_rate": 0.00018314696123025454,
	"loss": 0.043,
	"step": 350
	},
	{
	"epoch": 1.1726384364820848,
	"grad_norm": 0.6709649562835693,
	"learning_rate": 0.0001819289531698871,
	"loss": 0.0343,
	"step": 360
	},
	{
	"epoch": 1.205211726384365,
	"grad_norm": 0.6343613862991333,
	"learning_rate": 0.0001806728036285532,
	"loss": 0.0388,
	"step": 370
	},
	{
	"epoch": 1.237785016286645,
	"grad_norm": 0.44080787897109985,
	"learning_rate": 0.00017937909739834367,
	"loss": 0.0419,
	"step": 380
	},
	{
	"epoch": 1.2703583061889252,
	"grad_norm": 0.714772641658783,
	"learning_rate": 0.00017804843675561677,
	"loss": 0.0476,
	"step": 390
	},
	{
	"epoch": 1.3029315960912053,
	"grad_norm": 0.4127221703529358,
	"learning_rate": 0.00017668144118061262,
	"loss": 0.0354,
	"step": 400
	},
	{
	"epoch": 1.3355048859934853,
	"grad_norm": 0.44271162152290344,
	"learning_rate": 0.00017527874706905805,
	"loss": 0.0385,
	"step": 410
	},
	{
	"epoch": 1.3680781758957654,
	"grad_norm": 0.6225533485412598,
	"learning_rate": 0.00017384100743589697,
	"loss": 0.0665,
	"step": 420
	},
	{
	"epoch": 1.4006514657980456,
	"grad_norm": 0.4418353736400604,
	"learning_rate": 0.0001723688916112835,
	"loss": 0.0554,
	"step": 430
	},
	{
	"epoch": 1.4332247557003257,
	"grad_norm": 0.3992595672607422,
	"learning_rate": 0.00017086308492897983,
	"loss": 0.0522,
	"step": 440
	},
	{
	"epoch": 1.4657980456026058,
	"grad_norm": 0.45478880405426025,
	"learning_rate": 0.0001693242884073035,
	"loss": 0.0398,
	"step": 450
	},
	{
	"epoch": 1.498371335504886,
	"grad_norm": 0.5688516497612,
	"learning_rate": 0.00016775321842277312,
	"loss": 0.032,
	"step": 460
	},
	{
	"epoch": 1.5309446254071661,
	"grad_norm": 0.4958943724632263,
	"learning_rate": 0.00016615060637660388,
	"loss": 0.0422,
	"step": 470
	},
	{
	"epoch": 1.5635179153094463,
	"grad_norm": 0.4975818991661072,
	"learning_rate": 0.00016451719835420877,
	"loss": 0.0317,
	"step": 480
	},
	{
	"epoch": 1.5960912052117264,
	"grad_norm": 0.5288175344467163,
	"learning_rate": 0.00016285375477786322,
	"loss": 0.0412,
	"step": 490
	},
	{
	"epoch": 1.6286644951140063,
	"grad_norm": 0.6416569352149963,
	"learning_rate": 0.0001611610500526957,
	"loss": 0.0275,
	"step": 500
	},
	{
	"epoch": 1.6612377850162865,
	"grad_norm": 0.42349156737327576,
	"learning_rate": 0.00015943987220616855,
	"loss": 0.029,
	"step": 510
	},
	{
	"epoch": 1.6938110749185666,
	"grad_norm": 0.2572161853313446,
	"learning_rate": 0.00015769102252121702,
	"loss": 0.0308,
	"step": 520
	},
	{
	"epoch": 1.7263843648208468,
	"grad_norm": 0.5944684147834778,
	"learning_rate": 0.0001559153151632171,
	"loss": 0.0354,
	"step": 530
	},
	{
	"epoch": 1.758957654723127,
	"grad_norm": 0.5154451727867126,
	"learning_rate": 0.0001541135768009566,
	"loss": 0.0424,
	"step": 540
	},
	{
	"epoch": 1.791530944625407,
	"grad_norm": 0.5298171043395996,
	"learning_rate": 0.00015228664622178467,
	"loss": 0.0263,
	"step": 550
	},
	{
	"epoch": 1.8241042345276872,
	"grad_norm": 0.3582160472869873,
	"learning_rate": 0.00015043537394112007,
	"loss": 0.0386,
	"step": 560
	},
	{
	"epoch": 1.8566775244299674,
	"grad_norm": 0.6350813508033752,
	"learning_rate": 0.0001485606218064993,
	"loss": 0.0444,
	"step": 570
	},
	{
	"epoch": 1.8892508143322475,
	"grad_norm": 0.6265957355499268,
	"learning_rate": 0.00014666326259634918,
	"loss": 0.0417,
	"step": 580
	},
	{
	"epoch": 1.9218241042345277,
	"grad_norm": 0.41339483857154846,
	"learning_rate": 0.00014474417961367065,
	"loss": 0.0309,
	"step": 590
	},
	{
	"epoch": 1.9543973941368078,
	"grad_norm": 0.41287094354629517,
	"learning_rate": 0.000142804266274823,
	"loss": 0.0308,
	"step": 600
	},
	{
	"epoch": 1.986970684039088,
	"grad_norm": 0.5833337903022766,
	"learning_rate": 0.00014084442569359964,
	"loss": 0.0346,
	"step": 610
	},
	{
	"epoch": 2.019543973941368,
	"grad_norm": 0.3714321255683899,
	"learning_rate": 0.00013886557026078955,
	"loss": 0.0337,
	"step": 620
	},
	{
	"epoch": 2.0521172638436482,
	"grad_norm": 0.2712138295173645,
	"learning_rate": 0.0001368686212194199,
	"loss": 0.0196,
	"step": 630
	},
	{
	"epoch": 2.0846905537459284,
	"grad_norm": 0.6208952069282532,
	"learning_rate": 0.00013485450823587725,
	"loss": 0.0288,
	"step": 640
	},
	{
	"epoch": 2.1172638436482085,
	"grad_norm": 0.34229573607444763,
	"learning_rate": 0.00013282416896710778,
	"loss": 0.0246,
	"step": 650
	},
	{
	"epoch": 2.1498371335504887,
	"grad_norm": 0.4573960304260254,
	"learning_rate": 0.00013077854862409696,
	"loss": 0.0249,
	"step": 660
	},
	{
	"epoch": 2.182410423452769,
	"grad_norm": 0.3675532042980194,
	"learning_rate": 0.0001287185995318333,
	"loss": 0.022,
	"step": 670
	},
	{
	"epoch": 2.214983713355049,
	"grad_norm": 0.5372172594070435,
	"learning_rate": 0.0001266452806859594,
	"loss": 0.0343,
	"step": 680
	},
	{
	"epoch": 2.247557003257329,
	"grad_norm": 0.46904900670051575,
	"learning_rate": 0.00012455955730631804,
	"loss": 0.0233,
	"step": 690
	},
	{
	"epoch": 2.2801302931596092,
	"grad_norm": 0.4394093155860901,
	"learning_rate": 0.00012246240038760043,
	"loss": 0.0209,
	"step": 700
	},
	{
	"epoch": 2.3127035830618894,
	"grad_norm": 0.3127492368221283,
	"learning_rate": 0.00012035478624730608,
	"loss": 0.0321,
	"step": 710
	},
	{
	"epoch": 2.3452768729641695,
	"grad_norm": 0.601370096206665,
	"learning_rate": 0.00011823769607122479,
	"loss": 0.0243,
	"step": 720
	},
	{
	"epoch": 2.3778501628664497,
	"grad_norm": 0.5871070623397827,
	"learning_rate": 0.00011611211545665184,
	"loss": 0.0337,
	"step": 730
	},
	{
	"epoch": 2.41042345276873,
	"grad_norm": 0.3546801805496216,
	"learning_rate": 0.00011397903395354996,
	"loss": 0.0288,
	"step": 740
	},
	{
	"epoch": 2.44299674267101,
	"grad_norm": 0.8319407105445862,
	"learning_rate": 0.0001118394446038708,
	"loss": 0.0337,
	"step": 750
	},
	{
	"epoch": 2.47557003257329,
	"grad_norm": 0.5210663080215454,
	"learning_rate": 0.00010969434347925076,
	"loss": 0.026,
	"step": 760
	},
	{
	"epoch": 2.5081433224755703,
	"grad_norm": 0.5834184288978577,
	"learning_rate": 0.00010754472921729661,
	"loss": 0.0282,
	"step": 770
	},
	{
	"epoch": 2.5407166123778504,
	"grad_norm": 0.42890864610671997,
	"learning_rate": 0.00010539160255667623,
	"loss": 0.028,
	"step": 780
	},
	{
	"epoch": 2.5732899022801305,
	"grad_norm": 0.4473400413990021,
	"learning_rate": 0.00010323596587123145,
	"loss": 0.025,
	"step": 790
	},
	{
	"epoch": 2.6058631921824107,
	"grad_norm": 0.5189303159713745,
	"learning_rate": 0.00010107882270332952,
	"loss": 0.0293,
	"step": 800
	},
	{
	"epoch": 2.6384364820846904,
	"grad_norm": 0.43365001678466797,
	"learning_rate": 9.892117729667052e-05,
	"loss": 0.0175,
	"step": 810
	},
	{
	"epoch": 2.6710097719869705,
	"grad_norm": 0.28346696496009827,
	"learning_rate": 9.676403412876856e-05,
	"loss": 0.0334,
	"step": 820
	},
	{
	"epoch": 2.7035830618892507,
	"grad_norm": 0.3956477642059326,
	"learning_rate": 9.460839744332378e-05,
	"loss": 0.0271,
	"step": 830
	},
	{
	"epoch": 2.736156351791531,
	"grad_norm": 0.30705949664115906,
	"learning_rate": 9.245527078270341e-05,
	"loss": 0.0217,
	"step": 840
	},
	{
	"epoch": 2.768729641693811,
	"grad_norm": 0.40188854932785034,
	"learning_rate": 9.030565652074926e-05,
	"loss": 0.019,
	"step": 850
	},
	{
	"epoch": 2.801302931596091,
	"grad_norm": 0.3447129428386688,
	"learning_rate": 8.816055539612924e-05,
	"loss": 0.028,
	"step": 860
	},
	{
	"epoch": 2.8338762214983713,
	"grad_norm": 0.38768622279167175,
	"learning_rate": 8.602096604645009e-05,
	"loss": 0.0218,
	"step": 870
	},
	{
	"epoch": 2.8664495114006514,
	"grad_norm": 0.26912721991539,
	"learning_rate": 8.388788454334817e-05,
	"loss": 0.0173,
	"step": 880
	},
	{
	"epoch": 2.8990228013029316,
	"grad_norm": 0.33078861236572266,
	"learning_rate": 8.176230392877523e-05,
	"loss": 0.0233,
	"step": 890
	},
	{
	"epoch": 2.9315960912052117,
	"grad_norm": 0.24832488596439362,
	"learning_rate": 7.964521375269396e-05,
	"loss": 0.0171,
	"step": 900
	},
	{
	"epoch": 2.964169381107492,
	"grad_norm": 0.6595136523246765,
	"learning_rate": 7.753759961239964e-05,
	"loss": 0.0272,
	"step": 910
	},
	{
	"epoch": 2.996742671009772,
	"grad_norm": 0.2780207097530365,
	"learning_rate": 7.544044269368197e-05,
	"loss": 0.0338,
	"step": 920
	},
	{
	"epoch": 3.029315960912052,
	"grad_norm": 0.7173179388046265,
	"learning_rate": 7.335471931404063e-05,
	"loss": 0.0365,
	"step": 930
	},
	{
	"epoch": 3.0618892508143323,
	"grad_norm": 0.33753442764282227,
	"learning_rate": 7.128140046816671e-05,
	"loss": 0.0195,
	"step": 940
	},
	{
	"epoch": 3.0944625407166124,
	"grad_norm": 0.35064950585365295,
	"learning_rate": 6.922145137590306e-05,
	"loss": 0.02,
	"step": 950
	},
	{
	"epoch": 3.1270358306188926,
	"grad_norm": 0.39598166942596436,
	"learning_rate": 6.717583103289229e-05,
	"loss": 0.0203,
	"step": 960
	},
	{
	"epoch": 3.1596091205211727,
	"grad_norm": 0.18257524073123932,
	"learning_rate": 6.514549176412275e-05,
	"loss": 0.0134,
	"step": 970
	},
	{
	"epoch": 3.192182410423453,
	"grad_norm": 0.4458347260951996,
	"learning_rate": 6.313137878058013e-05,
	"loss": 0.0236,
	"step": 980
	},
	{
	"epoch": 3.224755700325733,
	"grad_norm": 0.22742605209350586,
	"learning_rate": 6.113442973921046e-05,
	"loss": 0.0208,
	"step": 990
	},
	{
	"epoch": 3.257328990228013,
	"grad_norm": 0.1858537793159485,
	"learning_rate": 5.9155574306400395e-05,
	"loss": 0.0218,
	"step": 1000
	},
	{
	"epoch": 3.2899022801302933,
	"grad_norm": 0.24626286327838898,
	"learning_rate": 5.7195733725176994e-05,
	"loss": 0.0232,
	"step": 1010
	},
	{
	"epoch": 3.3224755700325734,
	"grad_norm": 0.2719153165817261,
	"learning_rate": 5.525582038632934e-05,
	"loss": 0.0148,
	"step": 1020
	},
	{
	"epoch": 3.3550488599348536,
	"grad_norm": 0.218730166554451,
	"learning_rate": 5.333673740365083e-05,
	"loss": 0.0157,
	"step": 1030
	},
	{
	"epoch": 3.3876221498371337,
	"grad_norm": 0.20292945206165314,
	"learning_rate": 5.1439378193500707e-05,
	"loss": 0.0143,
	"step": 1040
	},
	{
	"epoch": 3.420195439739414,
	"grad_norm": 0.2846449017524719,
	"learning_rate": 4.956462605887994e-05,
	"loss": 0.0177,
	"step": 1050
	},
	{
	"epoch": 3.4527687296416936,
	"grad_norm": 0.322721391916275,
	"learning_rate": 4.771335377821535e-05,
	"loss": 0.0224,
	"step": 1060
	},
	{
	"epoch": 3.4853420195439737,
	"grad_norm": 0.1719449758529663,
	"learning_rate": 4.588642319904343e-05,
	"loss": 0.0234,
	"step": 1070
	},
	{
	"epoch": 3.517915309446254,
	"grad_norm": 0.44704851508140564,
	"learning_rate": 4.408468483678293e-05,
	"loss": 0.019,
	"step": 1080
	},
	{
	"epoch": 3.550488599348534,
	"grad_norm": 0.4159814417362213,
	"learning_rate": 4.230897747878303e-05,
	"loss": 0.0156,
	"step": 1090
	},
	{
	"epoch": 3.583061889250814,
	"grad_norm": 0.19604472815990448,
	"learning_rate": 4.056012779383145e-05,
	"loss": 0.0158,
	"step": 1100
	},
	{
	"epoch": 3.6156351791530943,
	"grad_norm": 0.19116809964179993,
	"learning_rate": 3.883894994730428e-05,
	"loss": 0.0174,
	"step": 1110
	},
	{
	"epoch": 3.6482084690553744,
	"grad_norm": 0.3637801706790924,
	"learning_rate": 3.714624522213681e-05,
	"loss": 0.0162,
	"step": 1120
	},
	{
	"epoch": 3.6807817589576546,
	"grad_norm": 0.1877295821905136,
	"learning_rate": 3.548280164579126e-05,
	"loss": 0.0142,
	"step": 1130
	},
	{
	"epoch": 3.7133550488599347,
	"grad_norm": 0.1830226182937622,
	"learning_rate": 3.384939362339614e-05,
	"loss": 0.0119,
	"step": 1140
	},
	{
	"epoch": 3.745928338762215,
	"grad_norm": 0.15163740515708923,
	"learning_rate": 3.224678157722689e-05,
	"loss": 0.0181,
	"step": 1150
	},
	{
	"epoch": 3.778501628664495,
	"grad_norm": 0.2479788213968277,
	"learning_rate": 3.067571159269651e-05,
	"loss": 0.0138,
	"step": 1160
	},
	{
	"epoch": 3.811074918566775,
	"grad_norm": 0.6171669960021973,
	"learning_rate": 2.913691507102019e-05,
	"loss": 0.0197,
	"step": 1170
	},
	{
	"epoch": 3.8436482084690553,
	"grad_norm": 0.18519634008407593,
	"learning_rate": 2.763110838871651e-05,
	"loss": 0.0137,
	"step": 1180
	},
	{
	"epoch": 3.8762214983713354,
	"grad_norm": 0.26303982734680176,
	"learning_rate": 2.6158992564103058e-05,
	"loss": 0.0172,
	"step": 1190
	},
	{
	"epoch": 3.9087947882736156,
	"grad_norm": 0.28331807255744934,
	"learning_rate": 2.4721252930941974e-05,
	"loss": 0.0168,
	"step": 1200
	},
	{
	"epoch": 3.9413680781758957,
	"grad_norm": 0.20530906319618225,
	"learning_rate": 2.3318558819387404e-05,
	"loss": 0.0199,
	"step": 1210
	},
	{
	"epoch": 3.973941368078176,
	"grad_norm": 0.16924133896827698,
	"learning_rate": 2.1951563244383233e-05,
	"loss": 0.0146,
	"step": 1220
	},
	{
	"epoch": 4.006514657980456,
	"grad_norm": 0.13186028599739075,
	"learning_rate": 2.0620902601656345e-05,
	"loss": 0.0124,
	"step": 1230
	},
	{
	"epoch": 4.039087947882736,
	"grad_norm": 0.24360792338848114,
	"learning_rate": 1.9327196371446776e-05,
	"loss": 0.0119,
	"step": 1240
	},
	{
	"epoch": 4.071661237785016,
	"grad_norm": 0.09876150637865067,
	"learning_rate": 1.807104683011289e-05,
	"loss": 0.012,
	"step": 1250
	},
	{
	"epoch": 4.1042345276872965,
	"grad_norm": 0.2283184826374054,
	"learning_rate": 1.6853038769745467e-05,
	"loss": 0.0142,
	"step": 1260
	},
	{
	"epoch": 4.136807817589577,
	"grad_norm": 0.32383596897125244,
	"learning_rate": 1.5673739225921758e-05,
	"loss": 0.012,
	"step": 1270
	},
	{
	"epoch": 4.169381107491857,
	"grad_norm": 0.2783248722553253,
	"learning_rate": 1.4533697213725662e-05,
	"loss": 0.0163,
	"step": 1280
	},
	{
	"epoch": 4.201954397394137,
	"grad_norm": 0.17678265273571014,
	"learning_rate": 1.3433443472157613e-05,
	"loss": 0.012,
	"step": 1290
	},
	{
	"epoch": 4.234527687296417,
	"grad_norm": 0.25102487206459045,
	"learning_rate": 1.237349021705243e-05,
	"loss": 0.0158,
	"step": 1300
	},
	{
	"epoch": 4.267100977198697,
	"grad_norm": 0.15461167693138123,
	"learning_rate": 1.1354330902620636e-05,
	"loss": 0.0126,
	"step": 1310
	},
	{
	"epoch": 4.299674267100977,
	"grad_norm": 0.24122057855129242,
	"learning_rate": 1.0376439991724096e-05,
	"loss": 0.0168,
	"step": 1320
	},
	{
	"epoch": 4.3322475570032575,
	"grad_norm": 0.14669205248355865,
	"learning_rate": 9.440272734993072e-06,
	"loss": 0.0179,
	"step": 1330
	},
	{
	"epoch": 4.364820846905538,
	"grad_norm": 0.32440969347953796,
	"learning_rate": 8.546264958887219e-06,
	"loss": 0.0197,
	"step": 1340
	},
	{
	"epoch": 4.397394136807818,
	"grad_norm": 0.14456795156002045,
	"learning_rate": 7.694832862799505e-06,
	"loss": 0.0111,
	"step": 1350
	},
	{
	"epoch": 4.429967426710098,
	"grad_norm": 0.17956456542015076,
	"learning_rate": 6.886372825297349e-06,
	"loss": 0.0085,
	"step": 1360
	},
	{
	"epoch": 4.462540716612378,
	"grad_norm": 0.30424752831459045,
	"learning_rate": 6.12126121959119e-06,
	"loss": 0.0207,
	"step": 1370
	},
	{
	"epoch": 4.495114006514658,
	"grad_norm": 0.18671758472919464,
	"learning_rate": 5.399854238316437e-06,
	"loss": 0.013,
	"step": 1380
	},
	{
	"epoch": 4.527687296416938,
	"grad_norm": 0.3565406799316406,
	"learning_rate": 4.722487727710368e-06,
	"loss": 0.0165,
	"step": 1390
	},
	{
	"epoch": 4.5602605863192185,
	"grad_norm": 0.26344749331474304,
	"learning_rate": 4.089477031261113e-06,
	"loss": 0.0148,
	"step": 1400
	},
	{
	"epoch": 4.592833876221499,
	"grad_norm": 0.18339155614376068,
	"learning_rate": 3.5011168429016083e-06,
	"loss": 0.0195,
	"step": 1410
	},
	{
	"epoch": 4.625407166123779,
	"grad_norm": 0.2598022222518921,
	"learning_rate": 2.95768106981672e-06,
	"loss": 0.0135,
	"step": 1420
	},
	{
	"epoch": 4.657980456026059,
	"grad_norm": 0.3853515684604645,
	"learning_rate": 2.4594227049277386e-06,
	"loss": 0.0177,
	"step": 1430
	},
	{
	"epoch": 4.690553745928339,
	"grad_norm": 0.13664180040359497,
	"learning_rate": 2.006573709112991e-06,
	"loss": 0.0086,
	"step": 1440
	},
	{
	"epoch": 4.723127035830619,
	"grad_norm": 0.1015399917960167,
	"learning_rate": 1.5993449032201458e-06,
	"loss": 0.0116,
	"step": 1450
	},
	{
	"epoch": 4.755700325732899,
	"grad_norm": 0.18885648250579834,
	"learning_rate": 1.237925869919887e-06,
	"loss": 0.0175,
	"step": 1460
	},
	{
	"epoch": 4.7882736156351795,
	"grad_norm": 0.18131224811077118,
	"learning_rate": 9.224848654469931e-07,
	"loss": 0.0088,
	"step": 1470
	},
	{
	"epoch": 4.82084690553746,
	"grad_norm": 0.194551482796669,
	"learning_rate": 6.531687412697496e-07,
	"loss": 0.014,
	"step": 1480
	},
	{
	"epoch": 4.85342019543974,
	"grad_norm": 0.23798178136348724,
	"learning_rate": 4.3010287572422537e-07,
	"loss": 0.0097,
	"step": 1490
	},
	{
	"epoch": 4.88599348534202,
	"grad_norm": 0.141094371676445,
	"learning_rate": 2.5339111564521844e-07,
	"loss": 0.0151,
	"step": 1500
	},
	{
	"epoch": 4.918566775244299,
	"grad_norm": 0.17839759588241577,
	"learning_rate": 1.2311572802105043e-07,
	"loss": 0.0097,
	"step": 1510
	},
	{
	"epoch": 4.95114006514658,
	"grad_norm": 0.3124100863933563,
	"learning_rate": 3.933736169471347e-08,
	"loss": 0.0098,
	"step": 1520
	},
	{
	"epoch": 4.9837133550488595,
	"grad_norm": 0.17664480209350586,
	"learning_rate": 2.0950191292112842e-09,
	"loss": 0.0142,
	"step": 1530
	},
	{
	"epoch": 4.993485342019544,
	"step": 1533,
	"total_flos": 5.185032946443418e+16,
	"train_loss": 0.05182663513868756,
	"train_runtime": 722.6992,
	"train_samples_per_second": 33.939,
	"train_steps_per_second": 2.121
	}
	],
	"logging_steps": 10,
	"max_steps": 1533,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 5,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 5.185032946443418e+16,
	"train_batch_size": 16,
	"trial_name": null,
	"trial_params": null
	}