JasperHaozhe's picture
Add files using upload-large-folder tool
25f82bc verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.981670061099797,
"eval_steps": 1.0,
"global_step": 1225,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004073319755600814,
"grad_norm": 34.300819396972656,
"learning_rate": 8.130081300813008e-09,
"loss": 1.59619802236557,
"step": 1
},
{
"epoch": 0.008146639511201629,
"grad_norm": 30.720197677612305,
"learning_rate": 1.6260162601626016e-08,
"loss": 1.468272864818573,
"step": 2
},
{
"epoch": 0.012219959266802444,
"grad_norm": 30.16754722595215,
"learning_rate": 2.4390243902439023e-08,
"loss": 1.3843095302581787,
"step": 3
},
{
"epoch": 0.016293279022403257,
"grad_norm": 38.58047103881836,
"learning_rate": 3.252032520325203e-08,
"loss": 1.7031245231628418,
"step": 4
},
{
"epoch": 0.020366598778004074,
"grad_norm": 30.89760971069336,
"learning_rate": 4.065040650406504e-08,
"loss": 1.4844104647636414,
"step": 5
},
{
"epoch": 0.024439918533604887,
"grad_norm": 34.434993743896484,
"learning_rate": 4.878048780487805e-08,
"loss": 1.574910283088684,
"step": 6
},
{
"epoch": 0.028513238289205704,
"grad_norm": 32.540470123291016,
"learning_rate": 5.6910569105691055e-08,
"loss": 1.4606674909591675,
"step": 7
},
{
"epoch": 0.032586558044806514,
"grad_norm": 36.41299819946289,
"learning_rate": 6.504065040650406e-08,
"loss": 1.553576111793518,
"step": 8
},
{
"epoch": 0.03665987780040733,
"grad_norm": 34.50511932373047,
"learning_rate": 7.317073170731706e-08,
"loss": 1.3344553709030151,
"step": 9
},
{
"epoch": 0.04073319755600815,
"grad_norm": 27.898704528808594,
"learning_rate": 8.130081300813008e-08,
"loss": 1.3406395316123962,
"step": 10
},
{
"epoch": 0.04480651731160896,
"grad_norm": 29.29271125793457,
"learning_rate": 8.943089430894309e-08,
"loss": 1.4415303468704224,
"step": 11
},
{
"epoch": 0.048879837067209775,
"grad_norm": 28.2354736328125,
"learning_rate": 9.75609756097561e-08,
"loss": 1.2696096301078796,
"step": 12
},
{
"epoch": 0.05295315682281059,
"grad_norm": 35.44163131713867,
"learning_rate": 1.0569105691056911e-07,
"loss": 1.598312497138977,
"step": 13
},
{
"epoch": 0.05702647657841141,
"grad_norm": 26.94402313232422,
"learning_rate": 1.1382113821138211e-07,
"loss": 1.3497812747955322,
"step": 14
},
{
"epoch": 0.06109979633401222,
"grad_norm": 37.78248977661133,
"learning_rate": 1.219512195121951e-07,
"loss": 1.5689660906791687,
"step": 15
},
{
"epoch": 0.06517311608961303,
"grad_norm": 31.73078155517578,
"learning_rate": 1.3008130081300813e-07,
"loss": 1.525648295879364,
"step": 16
},
{
"epoch": 0.06924643584521385,
"grad_norm": 27.77250862121582,
"learning_rate": 1.3821138211382114e-07,
"loss": 1.304672360420227,
"step": 17
},
{
"epoch": 0.07331975560081466,
"grad_norm": 28.092498779296875,
"learning_rate": 1.4634146341463413e-07,
"loss": 1.346445381641388,
"step": 18
},
{
"epoch": 0.07739307535641547,
"grad_norm": 30.995866775512695,
"learning_rate": 1.5447154471544717e-07,
"loss": 1.447025179862976,
"step": 19
},
{
"epoch": 0.0814663951120163,
"grad_norm": 28.858421325683594,
"learning_rate": 1.6260162601626016e-07,
"loss": 1.3801668882369995,
"step": 20
},
{
"epoch": 0.0855397148676171,
"grad_norm": 31.91228485107422,
"learning_rate": 1.7073170731707317e-07,
"loss": 1.4577875137329102,
"step": 21
},
{
"epoch": 0.08961303462321792,
"grad_norm": 31.215259552001953,
"learning_rate": 1.7886178861788619e-07,
"loss": 1.4091373682022095,
"step": 22
},
{
"epoch": 0.09368635437881874,
"grad_norm": 30.24734115600586,
"learning_rate": 1.8699186991869917e-07,
"loss": 1.4649581909179688,
"step": 23
},
{
"epoch": 0.09775967413441955,
"grad_norm": 31.560291290283203,
"learning_rate": 1.951219512195122e-07,
"loss": 1.5308585166931152,
"step": 24
},
{
"epoch": 0.10183299389002037,
"grad_norm": 27.27391242980957,
"learning_rate": 2.032520325203252e-07,
"loss": 1.5144553780555725,
"step": 25
},
{
"epoch": 0.10590631364562118,
"grad_norm": 29.813785552978516,
"learning_rate": 2.1138211382113822e-07,
"loss": 1.519466757774353,
"step": 26
},
{
"epoch": 0.109979633401222,
"grad_norm": 24.201751708984375,
"learning_rate": 2.195121951219512e-07,
"loss": 1.3116011023521423,
"step": 27
},
{
"epoch": 0.11405295315682282,
"grad_norm": 27.95865249633789,
"learning_rate": 2.2764227642276422e-07,
"loss": 1.4637184143066406,
"step": 28
},
{
"epoch": 0.11812627291242363,
"grad_norm": 26.65915870666504,
"learning_rate": 2.3577235772357723e-07,
"loss": 1.4885194301605225,
"step": 29
},
{
"epoch": 0.12219959266802444,
"grad_norm": 27.386289596557617,
"learning_rate": 2.439024390243902e-07,
"loss": 1.3836334347724915,
"step": 30
},
{
"epoch": 0.12627291242362526,
"grad_norm": 25.87419319152832,
"learning_rate": 2.520325203252032e-07,
"loss": 1.3642336130142212,
"step": 31
},
{
"epoch": 0.13034623217922606,
"grad_norm": 26.620105743408203,
"learning_rate": 2.6016260162601625e-07,
"loss": 1.3461121916770935,
"step": 32
},
{
"epoch": 0.13441955193482688,
"grad_norm": 22.665058135986328,
"learning_rate": 2.682926829268293e-07,
"loss": 1.2577590942382812,
"step": 33
},
{
"epoch": 0.1384928716904277,
"grad_norm": 23.679920196533203,
"learning_rate": 2.764227642276423e-07,
"loss": 1.2572017908096313,
"step": 34
},
{
"epoch": 0.1425661914460285,
"grad_norm": 25.136371612548828,
"learning_rate": 2.8455284552845527e-07,
"loss": 1.2670851349830627,
"step": 35
},
{
"epoch": 0.14663951120162932,
"grad_norm": 21.567337036132812,
"learning_rate": 2.9268292682926825e-07,
"loss": 1.242683231830597,
"step": 36
},
{
"epoch": 0.15071283095723015,
"grad_norm": 20.61647605895996,
"learning_rate": 3.008130081300813e-07,
"loss": 1.279579222202301,
"step": 37
},
{
"epoch": 0.15478615071283094,
"grad_norm": 20.656513214111328,
"learning_rate": 3.0894308943089434e-07,
"loss": 1.2040475606918335,
"step": 38
},
{
"epoch": 0.15885947046843177,
"grad_norm": 22.86530876159668,
"learning_rate": 3.170731707317073e-07,
"loss": 1.2522715330123901,
"step": 39
},
{
"epoch": 0.1629327902240326,
"grad_norm": 20.22757911682129,
"learning_rate": 3.252032520325203e-07,
"loss": 1.2012774348258972,
"step": 40
},
{
"epoch": 0.1670061099796334,
"grad_norm": 23.09739875793457,
"learning_rate": 3.333333333333333e-07,
"loss": 1.2088268399238586,
"step": 41
},
{
"epoch": 0.1710794297352342,
"grad_norm": 22.845685958862305,
"learning_rate": 3.4146341463414634e-07,
"loss": 1.0982880592346191,
"step": 42
},
{
"epoch": 0.17515274949083504,
"grad_norm": 19.80814552307129,
"learning_rate": 3.4959349593495933e-07,
"loss": 1.1271469593048096,
"step": 43
},
{
"epoch": 0.17922606924643583,
"grad_norm": 20.553686141967773,
"learning_rate": 3.5772357723577237e-07,
"loss": 1.0008204579353333,
"step": 44
},
{
"epoch": 0.18329938900203666,
"grad_norm": 16.66282844543457,
"learning_rate": 3.6585365853658536e-07,
"loss": 0.9251897931098938,
"step": 45
},
{
"epoch": 0.18737270875763748,
"grad_norm": 15.797308921813965,
"learning_rate": 3.7398373983739835e-07,
"loss": 1.0191328525543213,
"step": 46
},
{
"epoch": 0.19144602851323828,
"grad_norm": 13.579208374023438,
"learning_rate": 3.821138211382114e-07,
"loss": 0.774791806936264,
"step": 47
},
{
"epoch": 0.1955193482688391,
"grad_norm": 14.556002616882324,
"learning_rate": 3.902439024390244e-07,
"loss": 1.0026790797710419,
"step": 48
},
{
"epoch": 0.19959266802443992,
"grad_norm": 14.489509582519531,
"learning_rate": 3.9837398373983736e-07,
"loss": 0.9430837631225586,
"step": 49
},
{
"epoch": 0.20366598778004075,
"grad_norm": 12.495223999023438,
"learning_rate": 4.065040650406504e-07,
"loss": 0.8999880254268646,
"step": 50
},
{
"epoch": 0.20773930753564154,
"grad_norm": 11.441575050354004,
"learning_rate": 4.146341463414634e-07,
"loss": 0.8320233225822449,
"step": 51
},
{
"epoch": 0.21181262729124237,
"grad_norm": 10.894216537475586,
"learning_rate": 4.2276422764227643e-07,
"loss": 0.8139239549636841,
"step": 52
},
{
"epoch": 0.2158859470468432,
"grad_norm": 10.404220581054688,
"learning_rate": 4.308943089430894e-07,
"loss": 0.8323288261890411,
"step": 53
},
{
"epoch": 0.219959266802444,
"grad_norm": 10.463072776794434,
"learning_rate": 4.390243902439024e-07,
"loss": 0.882573276758194,
"step": 54
},
{
"epoch": 0.2240325865580448,
"grad_norm": 10.669075012207031,
"learning_rate": 4.471544715447154e-07,
"loss": 0.749780923128128,
"step": 55
},
{
"epoch": 0.22810590631364563,
"grad_norm": 10.453638076782227,
"learning_rate": 4.5528455284552844e-07,
"loss": 0.7727148830890656,
"step": 56
},
{
"epoch": 0.23217922606924643,
"grad_norm": 11.427080154418945,
"learning_rate": 4.634146341463415e-07,
"loss": 0.8585084676742554,
"step": 57
},
{
"epoch": 0.23625254582484725,
"grad_norm": 8.558117866516113,
"learning_rate": 4.7154471544715447e-07,
"loss": 0.7314337491989136,
"step": 58
},
{
"epoch": 0.24032586558044808,
"grad_norm": 9.031648635864258,
"learning_rate": 4.796747967479675e-07,
"loss": 0.701579749584198,
"step": 59
},
{
"epoch": 0.24439918533604887,
"grad_norm": 8.817708969116211,
"learning_rate": 4.878048780487804e-07,
"loss": 0.7815204560756683,
"step": 60
},
{
"epoch": 0.2484725050916497,
"grad_norm": 8.00804615020752,
"learning_rate": 4.959349593495934e-07,
"loss": 0.655106246471405,
"step": 61
},
{
"epoch": 0.2525458248472505,
"grad_norm": 6.538842678070068,
"learning_rate": 5.040650406504064e-07,
"loss": 0.6697916388511658,
"step": 62
},
{
"epoch": 0.25661914460285135,
"grad_norm": 7.5446553230285645,
"learning_rate": 5.121951219512195e-07,
"loss": 0.7426944077014923,
"step": 63
},
{
"epoch": 0.2606924643584521,
"grad_norm": 6.402474403381348,
"learning_rate": 5.203252032520325e-07,
"loss": 0.6401277780532837,
"step": 64
},
{
"epoch": 0.26476578411405294,
"grad_norm": 7.257569313049316,
"learning_rate": 5.284552845528455e-07,
"loss": 0.6731106042861938,
"step": 65
},
{
"epoch": 0.26883910386965376,
"grad_norm": 6.263636589050293,
"learning_rate": 5.365853658536586e-07,
"loss": 0.5806022882461548,
"step": 66
},
{
"epoch": 0.2729124236252546,
"grad_norm": 5.273800849914551,
"learning_rate": 5.447154471544715e-07,
"loss": 0.5338439792394638,
"step": 67
},
{
"epoch": 0.2769857433808554,
"grad_norm": 5.2786149978637695,
"learning_rate": 5.528455284552846e-07,
"loss": 0.5390533208847046,
"step": 68
},
{
"epoch": 0.28105906313645623,
"grad_norm": 4.901702404022217,
"learning_rate": 5.609756097560975e-07,
"loss": 0.5899032056331635,
"step": 69
},
{
"epoch": 0.285132382892057,
"grad_norm": 4.853933811187744,
"learning_rate": 5.691056910569105e-07,
"loss": 0.5600310862064362,
"step": 70
},
{
"epoch": 0.2892057026476578,
"grad_norm": 4.680273532867432,
"learning_rate": 5.772357723577236e-07,
"loss": 0.5319355428218842,
"step": 71
},
{
"epoch": 0.29327902240325865,
"grad_norm": 3.7406885623931885,
"learning_rate": 5.853658536585365e-07,
"loss": 0.508156955242157,
"step": 72
},
{
"epoch": 0.2973523421588595,
"grad_norm": 4.389779567718506,
"learning_rate": 5.934959349593496e-07,
"loss": 0.49855048954486847,
"step": 73
},
{
"epoch": 0.3014256619144603,
"grad_norm": 4.23866081237793,
"learning_rate": 6.016260162601626e-07,
"loss": 0.5242476612329483,
"step": 74
},
{
"epoch": 0.3054989816700611,
"grad_norm": 4.1824951171875,
"learning_rate": 6.097560975609756e-07,
"loss": 0.532037615776062,
"step": 75
},
{
"epoch": 0.3095723014256619,
"grad_norm": 3.7223150730133057,
"learning_rate": 6.178861788617887e-07,
"loss": 0.46959882974624634,
"step": 76
},
{
"epoch": 0.3136456211812627,
"grad_norm": 3.545388698577881,
"learning_rate": 6.260162601626016e-07,
"loss": 0.4825982600450516,
"step": 77
},
{
"epoch": 0.31771894093686354,
"grad_norm": 3.6351099014282227,
"learning_rate": 6.341463414634146e-07,
"loss": 0.5095209777355194,
"step": 78
},
{
"epoch": 0.32179226069246436,
"grad_norm": 3.243072271347046,
"learning_rate": 6.422764227642276e-07,
"loss": 0.4842926263809204,
"step": 79
},
{
"epoch": 0.3258655804480652,
"grad_norm": 3.5646300315856934,
"learning_rate": 6.504065040650406e-07,
"loss": 0.4908552020788193,
"step": 80
},
{
"epoch": 0.329938900203666,
"grad_norm": 3.5380759239196777,
"learning_rate": 6.585365853658536e-07,
"loss": 0.4536065459251404,
"step": 81
},
{
"epoch": 0.3340122199592668,
"grad_norm": 3.128525495529175,
"learning_rate": 6.666666666666666e-07,
"loss": 0.47657161951065063,
"step": 82
},
{
"epoch": 0.3380855397148676,
"grad_norm": 3.3621485233306885,
"learning_rate": 6.747967479674797e-07,
"loss": 0.43791596591472626,
"step": 83
},
{
"epoch": 0.3421588594704684,
"grad_norm": 3.39066219329834,
"learning_rate": 6.829268292682927e-07,
"loss": 0.42947711050510406,
"step": 84
},
{
"epoch": 0.34623217922606925,
"grad_norm": 3.7795698642730713,
"learning_rate": 6.910569105691057e-07,
"loss": 0.4219910502433777,
"step": 85
},
{
"epoch": 0.35030549898167007,
"grad_norm": 3.633206367492676,
"learning_rate": 6.991869918699187e-07,
"loss": 0.4253977984189987,
"step": 86
},
{
"epoch": 0.3543788187372709,
"grad_norm": 3.6160175800323486,
"learning_rate": 7.073170731707316e-07,
"loss": 0.449339896440506,
"step": 87
},
{
"epoch": 0.35845213849287166,
"grad_norm": 3.30557918548584,
"learning_rate": 7.154471544715447e-07,
"loss": 0.45001736283302307,
"step": 88
},
{
"epoch": 0.3625254582484725,
"grad_norm": 3.1727640628814697,
"learning_rate": 7.235772357723577e-07,
"loss": 0.4165496975183487,
"step": 89
},
{
"epoch": 0.3665987780040733,
"grad_norm": 3.073976516723633,
"learning_rate": 7.317073170731707e-07,
"loss": 0.4443822205066681,
"step": 90
},
{
"epoch": 0.37067209775967414,
"grad_norm": 3.129105567932129,
"learning_rate": 7.398373983739837e-07,
"loss": 0.4265598952770233,
"step": 91
},
{
"epoch": 0.37474541751527496,
"grad_norm": 3.1485190391540527,
"learning_rate": 7.479674796747967e-07,
"loss": 0.3882734924554825,
"step": 92
},
{
"epoch": 0.3788187372708758,
"grad_norm": 3.1610565185546875,
"learning_rate": 7.560975609756097e-07,
"loss": 0.37010858952999115,
"step": 93
},
{
"epoch": 0.38289205702647655,
"grad_norm": 3.039264440536499,
"learning_rate": 7.642276422764228e-07,
"loss": 0.400989294052124,
"step": 94
},
{
"epoch": 0.3869653767820774,
"grad_norm": 2.9321980476379395,
"learning_rate": 7.723577235772358e-07,
"loss": 0.3771343380212784,
"step": 95
},
{
"epoch": 0.3910386965376782,
"grad_norm": 2.807072162628174,
"learning_rate": 7.804878048780488e-07,
"loss": 0.4001482129096985,
"step": 96
},
{
"epoch": 0.395112016293279,
"grad_norm": 2.8286941051483154,
"learning_rate": 7.886178861788617e-07,
"loss": 0.4234430640935898,
"step": 97
},
{
"epoch": 0.39918533604887985,
"grad_norm": 2.9245986938476562,
"learning_rate": 7.967479674796747e-07,
"loss": 0.3854667395353317,
"step": 98
},
{
"epoch": 0.40325865580448067,
"grad_norm": 3.015875816345215,
"learning_rate": 8.048780487804878e-07,
"loss": 0.38027653098106384,
"step": 99
},
{
"epoch": 0.4073319755600815,
"grad_norm": 2.907216787338257,
"learning_rate": 8.130081300813008e-07,
"loss": 0.34937676787376404,
"step": 100
},
{
"epoch": 0.41140529531568226,
"grad_norm": 3.131850004196167,
"learning_rate": 8.211382113821138e-07,
"loss": 0.4414845108985901,
"step": 101
},
{
"epoch": 0.4154786150712831,
"grad_norm": 2.9019775390625,
"learning_rate": 8.292682926829268e-07,
"loss": 0.3990558981895447,
"step": 102
},
{
"epoch": 0.4195519348268839,
"grad_norm": 2.9362523555755615,
"learning_rate": 8.373983739837398e-07,
"loss": 0.41413092613220215,
"step": 103
},
{
"epoch": 0.42362525458248473,
"grad_norm": 3.0895473957061768,
"learning_rate": 8.455284552845529e-07,
"loss": 0.3904542028903961,
"step": 104
},
{
"epoch": 0.42769857433808556,
"grad_norm": 2.9235992431640625,
"learning_rate": 8.536585365853657e-07,
"loss": 0.3995140939950943,
"step": 105
},
{
"epoch": 0.4317718940936864,
"grad_norm": 2.919102668762207,
"learning_rate": 8.617886178861788e-07,
"loss": 0.32857778668403625,
"step": 106
},
{
"epoch": 0.43584521384928715,
"grad_norm": 2.831698417663574,
"learning_rate": 8.699186991869918e-07,
"loss": 0.3507983237504959,
"step": 107
},
{
"epoch": 0.439918533604888,
"grad_norm": 2.952693223953247,
"learning_rate": 8.780487804878048e-07,
"loss": 0.37046514451503754,
"step": 108
},
{
"epoch": 0.4439918533604888,
"grad_norm": 3.315002679824829,
"learning_rate": 8.861788617886179e-07,
"loss": 0.391086682677269,
"step": 109
},
{
"epoch": 0.4480651731160896,
"grad_norm": 2.7241294384002686,
"learning_rate": 8.943089430894308e-07,
"loss": 0.3864188492298126,
"step": 110
},
{
"epoch": 0.45213849287169044,
"grad_norm": 2.782064199447632,
"learning_rate": 9.024390243902439e-07,
"loss": 0.38219109177589417,
"step": 111
},
{
"epoch": 0.45621181262729127,
"grad_norm": 4.001572132110596,
"learning_rate": 9.105691056910569e-07,
"loss": 0.3784598410129547,
"step": 112
},
{
"epoch": 0.46028513238289204,
"grad_norm": 2.607434034347534,
"learning_rate": 9.186991869918699e-07,
"loss": 0.3763512521982193,
"step": 113
},
{
"epoch": 0.46435845213849286,
"grad_norm": 2.97188138961792,
"learning_rate": 9.26829268292683e-07,
"loss": 0.36788034439086914,
"step": 114
},
{
"epoch": 0.4684317718940937,
"grad_norm": 2.9631524085998535,
"learning_rate": 9.349593495934958e-07,
"loss": 0.3696867823600769,
"step": 115
},
{
"epoch": 0.4725050916496945,
"grad_norm": 2.5895049571990967,
"learning_rate": 9.430894308943089e-07,
"loss": 0.3349902927875519,
"step": 116
},
{
"epoch": 0.47657841140529533,
"grad_norm": 2.600832462310791,
"learning_rate": 9.512195121951218e-07,
"loss": 0.34966227412223816,
"step": 117
},
{
"epoch": 0.48065173116089616,
"grad_norm": 3.0639443397521973,
"learning_rate": 9.59349593495935e-07,
"loss": 0.38310858607292175,
"step": 118
},
{
"epoch": 0.4847250509164969,
"grad_norm": 2.6944706439971924,
"learning_rate": 9.67479674796748e-07,
"loss": 0.3360476493835449,
"step": 119
},
{
"epoch": 0.48879837067209775,
"grad_norm": 2.8398237228393555,
"learning_rate": 9.756097560975609e-07,
"loss": 0.39176714420318604,
"step": 120
},
{
"epoch": 0.49287169042769857,
"grad_norm": 2.8028745651245117,
"learning_rate": 9.83739837398374e-07,
"loss": 0.37909021973609924,
"step": 121
},
{
"epoch": 0.4969450101832994,
"grad_norm": 2.6169185638427734,
"learning_rate": 9.918699186991869e-07,
"loss": 0.37069061398506165,
"step": 122
},
{
"epoch": 0.5010183299389002,
"grad_norm": 2.572046995162964,
"learning_rate": 1e-06,
"loss": 0.3428824096918106,
"step": 123
},
{
"epoch": 0.505091649694501,
"grad_norm": 2.7804417610168457,
"learning_rate": 9.999979682219186e-07,
"loss": 0.3680119812488556,
"step": 124
},
{
"epoch": 0.5091649694501018,
"grad_norm": 2.5910799503326416,
"learning_rate": 9.999918729041868e-07,
"loss": 0.33467385172843933,
"step": 125
},
{
"epoch": 0.5132382892057027,
"grad_norm": 2.8417587280273438,
"learning_rate": 9.999817140963419e-07,
"loss": 0.35100705921649933,
"step": 126
},
{
"epoch": 0.5173116089613035,
"grad_norm": 2.905728340148926,
"learning_rate": 9.999674918809457e-07,
"loss": 0.32811686396598816,
"step": 127
},
{
"epoch": 0.5213849287169042,
"grad_norm": 2.5878095626831055,
"learning_rate": 9.99949206373584e-07,
"loss": 0.32490645349025726,
"step": 128
},
{
"epoch": 0.5254582484725051,
"grad_norm": 2.9762229919433594,
"learning_rate": 9.999268577228648e-07,
"loss": 0.3934018760919571,
"step": 129
},
{
"epoch": 0.5295315682281059,
"grad_norm": 2.792989492416382,
"learning_rate": 9.99900446110418e-07,
"loss": 0.3315049111843109,
"step": 130
},
{
"epoch": 0.5336048879837068,
"grad_norm": 2.6891062259674072,
"learning_rate": 9.998699717508945e-07,
"loss": 0.3097301423549652,
"step": 131
},
{
"epoch": 0.5376782077393075,
"grad_norm": 2.92191481590271,
"learning_rate": 9.99835434891962e-07,
"loss": 0.34749817848205566,
"step": 132
},
{
"epoch": 0.5417515274949084,
"grad_norm": 2.980543851852417,
"learning_rate": 9.99796835814306e-07,
"loss": 0.3367327153682709,
"step": 133
},
{
"epoch": 0.5458248472505092,
"grad_norm": 2.50433611869812,
"learning_rate": 9.99754174831625e-07,
"loss": 0.3090934008359909,
"step": 134
},
{
"epoch": 0.5498981670061099,
"grad_norm": 2.869647979736328,
"learning_rate": 9.9970745229063e-07,
"loss": 0.35603591799736023,
"step": 135
},
{
"epoch": 0.5539714867617108,
"grad_norm": 2.6435837745666504,
"learning_rate": 9.9965666857104e-07,
"loss": 0.3288918733596802,
"step": 136
},
{
"epoch": 0.5580448065173116,
"grad_norm": 2.7970142364501953,
"learning_rate": 9.996018240855806e-07,
"loss": 0.3878723680973053,
"step": 137
},
{
"epoch": 0.5621181262729125,
"grad_norm": 2.593043327331543,
"learning_rate": 9.995429192799788e-07,
"loss": 0.3534126281738281,
"step": 138
},
{
"epoch": 0.5661914460285132,
"grad_norm": 2.8867013454437256,
"learning_rate": 9.994799546329602e-07,
"loss": 0.38061630725860596,
"step": 139
},
{
"epoch": 0.570264765784114,
"grad_norm": 2.589017152786255,
"learning_rate": 9.994129306562458e-07,
"loss": 0.37725748121738434,
"step": 140
},
{
"epoch": 0.5743380855397149,
"grad_norm": 2.369696617126465,
"learning_rate": 9.993418478945472e-07,
"loss": 0.32034583389759064,
"step": 141
},
{
"epoch": 0.5784114052953157,
"grad_norm": 2.6410069465637207,
"learning_rate": 9.992667069255618e-07,
"loss": 0.36017628014087677,
"step": 142
},
{
"epoch": 0.5824847250509165,
"grad_norm": 2.597259283065796,
"learning_rate": 9.991875083599688e-07,
"loss": 0.32577911019325256,
"step": 143
},
{
"epoch": 0.5865580448065173,
"grad_norm": 2.761859655380249,
"learning_rate": 9.991042528414237e-07,
"loss": 0.33353830873966217,
"step": 144
},
{
"epoch": 0.5906313645621182,
"grad_norm": 2.7634713649749756,
"learning_rate": 9.990169410465536e-07,
"loss": 0.33604632318019867,
"step": 145
},
{
"epoch": 0.594704684317719,
"grad_norm": 2.820897340774536,
"learning_rate": 9.98925573684951e-07,
"loss": 0.3069554716348648,
"step": 146
},
{
"epoch": 0.5987780040733197,
"grad_norm": 2.856700897216797,
"learning_rate": 9.98830151499169e-07,
"loss": 0.33896636962890625,
"step": 147
},
{
"epoch": 0.6028513238289206,
"grad_norm": 2.9203782081604004,
"learning_rate": 9.987306752647142e-07,
"loss": 0.35070909559726715,
"step": 148
},
{
"epoch": 0.6069246435845214,
"grad_norm": 2.679352283477783,
"learning_rate": 9.986271457900414e-07,
"loss": 0.3325359970331192,
"step": 149
},
{
"epoch": 0.6109979633401222,
"grad_norm": 2.4953606128692627,
"learning_rate": 9.98519563916546e-07,
"loss": 0.32330869138240814,
"step": 150
},
{
"epoch": 0.615071283095723,
"grad_norm": 2.618744134902954,
"learning_rate": 9.98407930518558e-07,
"loss": 0.33912393450737,
"step": 151
},
{
"epoch": 0.6191446028513238,
"grad_norm": 2.6512296199798584,
"learning_rate": 9.982922465033348e-07,
"loss": 0.3045920431613922,
"step": 152
},
{
"epoch": 0.6232179226069247,
"grad_norm": 2.7606050968170166,
"learning_rate": 9.981725128110532e-07,
"loss": 0.32916732132434845,
"step": 153
},
{
"epoch": 0.6272912423625254,
"grad_norm": 2.95037841796875,
"learning_rate": 9.980487304148024e-07,
"loss": 0.36757831275463104,
"step": 154
},
{
"epoch": 0.6313645621181263,
"grad_norm": 2.890489339828491,
"learning_rate": 9.97920900320576e-07,
"loss": 0.36117151379585266,
"step": 155
},
{
"epoch": 0.6354378818737271,
"grad_norm": 2.7488858699798584,
"learning_rate": 9.97789023567263e-07,
"loss": 0.35026322305202484,
"step": 156
},
{
"epoch": 0.639511201629328,
"grad_norm": 2.5479671955108643,
"learning_rate": 9.976531012266413e-07,
"loss": 0.308156818151474,
"step": 157
},
{
"epoch": 0.6435845213849287,
"grad_norm": 2.717344045639038,
"learning_rate": 9.975131344033664e-07,
"loss": 0.29827529191970825,
"step": 158
},
{
"epoch": 0.6476578411405295,
"grad_norm": 2.569551467895508,
"learning_rate": 9.973691242349648e-07,
"loss": 0.3232528269290924,
"step": 159
},
{
"epoch": 0.6517311608961304,
"grad_norm": 3.0013420581817627,
"learning_rate": 9.972210718918233e-07,
"loss": 0.3270832598209381,
"step": 160
},
{
"epoch": 0.6558044806517311,
"grad_norm": 2.7339162826538086,
"learning_rate": 9.970689785771798e-07,
"loss": 0.3668155074119568,
"step": 161
},
{
"epoch": 0.659877800407332,
"grad_norm": 2.6689724922180176,
"learning_rate": 9.969128455271137e-07,
"loss": 0.32853490114212036,
"step": 162
},
{
"epoch": 0.6639511201629328,
"grad_norm": 3.042081117630005,
"learning_rate": 9.967526740105358e-07,
"loss": 0.3487651199102402,
"step": 163
},
{
"epoch": 0.6680244399185336,
"grad_norm": 2.4641284942626953,
"learning_rate": 9.965884653291783e-07,
"loss": 0.35704147815704346,
"step": 164
},
{
"epoch": 0.6720977596741344,
"grad_norm": 2.6836225986480713,
"learning_rate": 9.964202208175833e-07,
"loss": 0.33587950468063354,
"step": 165
},
{
"epoch": 0.6761710794297352,
"grad_norm": 2.2905988693237305,
"learning_rate": 9.962479418430932e-07,
"loss": 0.3061918318271637,
"step": 166
},
{
"epoch": 0.6802443991853361,
"grad_norm": 2.4772934913635254,
"learning_rate": 9.960716298058381e-07,
"loss": 0.2896444499492645,
"step": 167
},
{
"epoch": 0.6843177189409368,
"grad_norm": 2.6987321376800537,
"learning_rate": 9.958912861387258e-07,
"loss": 0.3374595195055008,
"step": 168
},
{
"epoch": 0.6883910386965377,
"grad_norm": 2.6165449619293213,
"learning_rate": 9.9570691230743e-07,
"loss": 0.33027225732803345,
"step": 169
},
{
"epoch": 0.6924643584521385,
"grad_norm": 3.1326680183410645,
"learning_rate": 9.955185098103771e-07,
"loss": 0.3138381540775299,
"step": 170
},
{
"epoch": 0.6965376782077393,
"grad_norm": 2.5313732624053955,
"learning_rate": 9.953260801787356e-07,
"loss": 0.31824737787246704,
"step": 171
},
{
"epoch": 0.7006109979633401,
"grad_norm": 2.529325008392334,
"learning_rate": 9.951296249764025e-07,
"loss": 0.298155277967453,
"step": 172
},
{
"epoch": 0.7046843177189409,
"grad_norm": 2.6821744441986084,
"learning_rate": 9.949291457999916e-07,
"loss": 0.33296874165534973,
"step": 173
},
{
"epoch": 0.7087576374745418,
"grad_norm": 2.588157892227173,
"learning_rate": 9.947246442788193e-07,
"loss": 0.31226691603660583,
"step": 174
},
{
"epoch": 0.7128309572301426,
"grad_norm": 2.7822420597076416,
"learning_rate": 9.945161220748927e-07,
"loss": 0.322743222117424,
"step": 175
},
{
"epoch": 0.7169042769857433,
"grad_norm": 2.379702091217041,
"learning_rate": 9.943035808828953e-07,
"loss": 0.3056500107049942,
"step": 176
},
{
"epoch": 0.7209775967413442,
"grad_norm": 2.4450721740722656,
"learning_rate": 9.94087022430173e-07,
"loss": 0.3037564754486084,
"step": 177
},
{
"epoch": 0.725050916496945,
"grad_norm": 2.5885887145996094,
"learning_rate": 9.938664484767205e-07,
"loss": 0.327587828040123,
"step": 178
},
{
"epoch": 0.7291242362525459,
"grad_norm": 2.613290309906006,
"learning_rate": 9.936418608151675e-07,
"loss": 0.33323927223682404,
"step": 179
},
{
"epoch": 0.7331975560081466,
"grad_norm": 2.6541707515716553,
"learning_rate": 9.93413261270763e-07,
"loss": 0.3316569924354553,
"step": 180
},
{
"epoch": 0.7372708757637475,
"grad_norm": 2.646383047103882,
"learning_rate": 9.931806517013612e-07,
"loss": 0.35486292839050293,
"step": 181
},
{
"epoch": 0.7413441955193483,
"grad_norm": 2.5270328521728516,
"learning_rate": 9.92944033997406e-07,
"loss": 0.3157142102718353,
"step": 182
},
{
"epoch": 0.745417515274949,
"grad_norm": 2.5851869583129883,
"learning_rate": 9.927034100819163e-07,
"loss": 0.3013855814933777,
"step": 183
},
{
"epoch": 0.7494908350305499,
"grad_norm": 2.75219988822937,
"learning_rate": 9.924587819104695e-07,
"loss": 0.3420049250125885,
"step": 184
},
{
"epoch": 0.7535641547861507,
"grad_norm": 2.436596632003784,
"learning_rate": 9.922101514711865e-07,
"loss": 0.3062688261270523,
"step": 185
},
{
"epoch": 0.7576374745417516,
"grad_norm": 2.9479236602783203,
"learning_rate": 9.919575207847145e-07,
"loss": 0.31793762743473053,
"step": 186
},
{
"epoch": 0.7617107942973523,
"grad_norm": 2.5482208728790283,
"learning_rate": 9.917008919042116e-07,
"loss": 0.3306496888399124,
"step": 187
},
{
"epoch": 0.7657841140529531,
"grad_norm": 2.609839677810669,
"learning_rate": 9.914402669153295e-07,
"loss": 0.29324449598789215,
"step": 188
},
{
"epoch": 0.769857433808554,
"grad_norm": 2.5740039348602295,
"learning_rate": 9.91175647936197e-07,
"loss": 0.3193310797214508,
"step": 189
},
{
"epoch": 0.7739307535641547,
"grad_norm": 2.3878629207611084,
"learning_rate": 9.909070371174019e-07,
"loss": 0.3040658235549927,
"step": 190
},
{
"epoch": 0.7780040733197556,
"grad_norm": 2.755152463912964,
"learning_rate": 9.906344366419746e-07,
"loss": 0.33930477499961853,
"step": 191
},
{
"epoch": 0.7820773930753564,
"grad_norm": 2.58367657661438,
"learning_rate": 9.9035784872537e-07,
"loss": 0.3244568109512329,
"step": 192
},
{
"epoch": 0.7861507128309573,
"grad_norm": 2.350712537765503,
"learning_rate": 9.90077275615449e-07,
"loss": 0.2779058516025543,
"step": 193
},
{
"epoch": 0.790224032586558,
"grad_norm": 2.7418465614318848,
"learning_rate": 9.897927195924608e-07,
"loss": 0.32641272246837616,
"step": 194
},
{
"epoch": 0.7942973523421588,
"grad_norm": 2.516510009765625,
"learning_rate": 9.895041829690238e-07,
"loss": 0.3083319664001465,
"step": 195
},
{
"epoch": 0.7983706720977597,
"grad_norm": 2.7772316932678223,
"learning_rate": 9.892116680901084e-07,
"loss": 0.30357757210731506,
"step": 196
},
{
"epoch": 0.8024439918533605,
"grad_norm": 2.5389041900634766,
"learning_rate": 9.88915177333015e-07,
"loss": 0.30694054067134857,
"step": 197
},
{
"epoch": 0.8065173116089613,
"grad_norm": 2.7129383087158203,
"learning_rate": 9.886147131073579e-07,
"loss": 0.3402569591999054,
"step": 198
},
{
"epoch": 0.8105906313645621,
"grad_norm": 2.654186248779297,
"learning_rate": 9.883102778550434e-07,
"loss": 0.3343619704246521,
"step": 199
},
{
"epoch": 0.814663951120163,
"grad_norm": 2.380168914794922,
"learning_rate": 9.880018740502508e-07,
"loss": 0.3020651191473007,
"step": 200
},
{
"epoch": 0.8187372708757638,
"grad_norm": 2.771951198577881,
"learning_rate": 9.876895041994127e-07,
"loss": 0.30565840005874634,
"step": 201
},
{
"epoch": 0.8228105906313645,
"grad_norm": 2.4966540336608887,
"learning_rate": 9.873731708411939e-07,
"loss": 0.3085058331489563,
"step": 202
},
{
"epoch": 0.8268839103869654,
"grad_norm": 2.5919551849365234,
"learning_rate": 9.870528765464711e-07,
"loss": 0.34540820121765137,
"step": 203
},
{
"epoch": 0.8309572301425662,
"grad_norm": 3.0668885707855225,
"learning_rate": 9.867286239183122e-07,
"loss": 0.3307037353515625,
"step": 204
},
{
"epoch": 0.835030549898167,
"grad_norm": 2.4281554222106934,
"learning_rate": 9.864004155919544e-07,
"loss": 0.28929875791072845,
"step": 205
},
{
"epoch": 0.8391038696537678,
"grad_norm": 2.5561623573303223,
"learning_rate": 9.860682542347838e-07,
"loss": 0.3272414803504944,
"step": 206
},
{
"epoch": 0.8431771894093686,
"grad_norm": 2.824591636657715,
"learning_rate": 9.85732142546313e-07,
"loss": 0.3192295432090759,
"step": 207
},
{
"epoch": 0.8472505091649695,
"grad_norm": 2.643718719482422,
"learning_rate": 9.853920832581597e-07,
"loss": 0.31284041702747345,
"step": 208
},
{
"epoch": 0.8513238289205702,
"grad_norm": 2.6777195930480957,
"learning_rate": 9.850480791340236e-07,
"loss": 0.3136574327945709,
"step": 209
},
{
"epoch": 0.8553971486761711,
"grad_norm": 2.5229766368865967,
"learning_rate": 9.847001329696652e-07,
"loss": 0.3047819435596466,
"step": 210
},
{
"epoch": 0.8594704684317719,
"grad_norm": 2.659447431564331,
"learning_rate": 9.843482475928818e-07,
"loss": 0.3642407953739166,
"step": 211
},
{
"epoch": 0.8635437881873728,
"grad_norm": 2.697049379348755,
"learning_rate": 9.839924258634853e-07,
"loss": 0.3134022653102875,
"step": 212
},
{
"epoch": 0.8676171079429735,
"grad_norm": 2.629868745803833,
"learning_rate": 9.83632670673279e-07,
"loss": 0.306331992149353,
"step": 213
},
{
"epoch": 0.8716904276985743,
"grad_norm": 2.4997003078460693,
"learning_rate": 9.832689849460339e-07,
"loss": 0.3142865002155304,
"step": 214
},
{
"epoch": 0.8757637474541752,
"grad_norm": 2.826869010925293,
"learning_rate": 9.829013716374647e-07,
"loss": 0.2904099076986313,
"step": 215
},
{
"epoch": 0.879837067209776,
"grad_norm": 2.6697499752044678,
"learning_rate": 9.825298337352058e-07,
"loss": 0.29838354885578156,
"step": 216
},
{
"epoch": 0.8839103869653768,
"grad_norm": 2.5330023765563965,
"learning_rate": 9.821543742587876e-07,
"loss": 0.3052047789096832,
"step": 217
},
{
"epoch": 0.8879837067209776,
"grad_norm": 2.806683301925659,
"learning_rate": 9.817749962596114e-07,
"loss": 0.3121778964996338,
"step": 218
},
{
"epoch": 0.8920570264765784,
"grad_norm": 2.718122720718384,
"learning_rate": 9.81391702820925e-07,
"loss": 0.32955022156238556,
"step": 219
},
{
"epoch": 0.8961303462321792,
"grad_norm": 2.346466541290283,
"learning_rate": 9.81004497057797e-07,
"loss": 0.291049063205719,
"step": 220
},
{
"epoch": 0.90020366598778,
"grad_norm": 2.4048361778259277,
"learning_rate": 9.806133821170924e-07,
"loss": 0.30249159038066864,
"step": 221
},
{
"epoch": 0.9042769857433809,
"grad_norm": 2.681546688079834,
"learning_rate": 9.80218361177446e-07,
"loss": 0.362154021859169,
"step": 222
},
{
"epoch": 0.9083503054989817,
"grad_norm": 2.792266368865967,
"learning_rate": 9.798194374492375e-07,
"loss": 0.28344525396823883,
"step": 223
},
{
"epoch": 0.9124236252545825,
"grad_norm": 2.507050037384033,
"learning_rate": 9.794166141745646e-07,
"loss": 0.2935172915458679,
"step": 224
},
{
"epoch": 0.9164969450101833,
"grad_norm": 2.7160379886627197,
"learning_rate": 9.790098946272177e-07,
"loss": 0.3005199581384659,
"step": 225
},
{
"epoch": 0.9205702647657841,
"grad_norm": 2.666494131088257,
"learning_rate": 9.785992821126518e-07,
"loss": 0.30710943043231964,
"step": 226
},
{
"epoch": 0.924643584521385,
"grad_norm": 2.699313163757324,
"learning_rate": 9.781847799679615e-07,
"loss": 0.3164513558149338,
"step": 227
},
{
"epoch": 0.9287169042769857,
"grad_norm": 2.49406099319458,
"learning_rate": 9.777663915618517e-07,
"loss": 0.3061770647764206,
"step": 228
},
{
"epoch": 0.9327902240325866,
"grad_norm": 2.552093029022217,
"learning_rate": 9.773441202946121e-07,
"loss": 0.2973909080028534,
"step": 229
},
{
"epoch": 0.9368635437881874,
"grad_norm": 2.5773231983184814,
"learning_rate": 9.76917969598089e-07,
"loss": 0.31120532751083374,
"step": 230
},
{
"epoch": 0.9409368635437881,
"grad_norm": 2.653515100479126,
"learning_rate": 9.76487942935657e-07,
"loss": 0.3365926146507263,
"step": 231
},
{
"epoch": 0.945010183299389,
"grad_norm": 2.670433282852173,
"learning_rate": 9.760540438021907e-07,
"loss": 0.3196941614151001,
"step": 232
},
{
"epoch": 0.9490835030549898,
"grad_norm": 2.892035961151123,
"learning_rate": 9.756162757240373e-07,
"loss": 0.33982205390930176,
"step": 233
},
{
"epoch": 0.9531568228105907,
"grad_norm": 2.5157856941223145,
"learning_rate": 9.751746422589872e-07,
"loss": 0.2537951096892357,
"step": 234
},
{
"epoch": 0.9572301425661914,
"grad_norm": 2.6808388233184814,
"learning_rate": 9.747291469962452e-07,
"loss": 0.2846526652574539,
"step": 235
},
{
"epoch": 0.9613034623217923,
"grad_norm": 2.451559066772461,
"learning_rate": 9.742797935564011e-07,
"loss": 0.29611095786094666,
"step": 236
},
{
"epoch": 0.9653767820773931,
"grad_norm": 2.7313358783721924,
"learning_rate": 9.738265855914012e-07,
"loss": 0.3275996297597885,
"step": 237
},
{
"epoch": 0.9694501018329938,
"grad_norm": 2.5593299865722656,
"learning_rate": 9.733695267845171e-07,
"loss": 0.2993656247854233,
"step": 238
},
{
"epoch": 0.9735234215885947,
"grad_norm": 2.6013288497924805,
"learning_rate": 9.729086208503173e-07,
"loss": 0.31615155935287476,
"step": 239
},
{
"epoch": 0.9775967413441955,
"grad_norm": 2.5403575897216797,
"learning_rate": 9.72443871534636e-07,
"loss": 0.2843424677848816,
"step": 240
},
{
"epoch": 0.9816700610997964,
"grad_norm": 2.4495410919189453,
"learning_rate": 9.719752826145432e-07,
"loss": 0.2987358868122101,
"step": 241
},
{
"epoch": 0.9857433808553971,
"grad_norm": 2.719775676727295,
"learning_rate": 9.715028578983136e-07,
"loss": 0.34320636093616486,
"step": 242
},
{
"epoch": 0.9898167006109979,
"grad_norm": 2.7152929306030273,
"learning_rate": 9.71026601225396e-07,
"loss": 0.2937510758638382,
"step": 243
},
{
"epoch": 0.9938900203665988,
"grad_norm": 2.4305663108825684,
"learning_rate": 9.705465164663817e-07,
"loss": 0.29807206988334656,
"step": 244
},
{
"epoch": 0.9979633401221996,
"grad_norm": 2.322704792022705,
"learning_rate": 9.700626075229738e-07,
"loss": 0.3189048618078232,
"step": 245
},
{
"epoch": 1.0,
"grad_norm": 2.322704792022705,
"learning_rate": 9.695748783279544e-07,
"loss": 0.3195984363555908,
"step": 246
},
{
"epoch": 1.0040733197556009,
"grad_norm": 2.505608081817627,
"learning_rate": 9.690833328451532e-07,
"loss": 0.24639207869768143,
"step": 247
},
{
"epoch": 1.0081466395112015,
"grad_norm": 2.5328471660614014,
"learning_rate": 9.68587975069416e-07,
"loss": 0.3204229325056076,
"step": 248
},
{
"epoch": 1.0122199592668024,
"grad_norm": 2.462449073791504,
"learning_rate": 9.680888090265713e-07,
"loss": 0.29177573323249817,
"step": 249
},
{
"epoch": 1.0162932790224033,
"grad_norm": 2.4300286769866943,
"learning_rate": 9.67585838773397e-07,
"loss": 0.2859930992126465,
"step": 250
},
{
"epoch": 1.0203665987780042,
"grad_norm": 2.4073538780212402,
"learning_rate": 9.67079068397589e-07,
"loss": 0.253083311021328,
"step": 251
},
{
"epoch": 1.0244399185336048,
"grad_norm": 2.4259159564971924,
"learning_rate": 9.66568502017727e-07,
"loss": 0.27018117904663086,
"step": 252
},
{
"epoch": 1.0285132382892057,
"grad_norm": 2.490452766418457,
"learning_rate": 9.660541437832416e-07,
"loss": 0.270308181643486,
"step": 253
},
{
"epoch": 1.0325865580448066,
"grad_norm": 2.2988967895507812,
"learning_rate": 9.655359978743798e-07,
"loss": 0.2865062654018402,
"step": 254
},
{
"epoch": 1.0366598778004072,
"grad_norm": 2.3249478340148926,
"learning_rate": 9.650140685021716e-07,
"loss": 0.30297039449214935,
"step": 255
},
{
"epoch": 1.0407331975560081,
"grad_norm": 2.530956506729126,
"learning_rate": 9.644883599083957e-07,
"loss": 0.2875078618526459,
"step": 256
},
{
"epoch": 1.044806517311609,
"grad_norm": 2.462820053100586,
"learning_rate": 9.639588763655448e-07,
"loss": 0.3308262676000595,
"step": 257
},
{
"epoch": 1.0488798370672097,
"grad_norm": 2.733877658843994,
"learning_rate": 9.634256221767912e-07,
"loss": 0.3172256797552109,
"step": 258
},
{
"epoch": 1.0529531568228105,
"grad_norm": 2.5849740505218506,
"learning_rate": 9.628886016759515e-07,
"loss": 0.24582470953464508,
"step": 259
},
{
"epoch": 1.0570264765784114,
"grad_norm": 2.495885133743286,
"learning_rate": 9.623478192274517e-07,
"loss": 0.29177717864513397,
"step": 260
},
{
"epoch": 1.0610997963340123,
"grad_norm": 2.7560508251190186,
"learning_rate": 9.618032792262914e-07,
"loss": 0.2618248611688614,
"step": 261
},
{
"epoch": 1.065173116089613,
"grad_norm": 2.5949246883392334,
"learning_rate": 9.612549860980087e-07,
"loss": 0.29752814769744873,
"step": 262
},
{
"epoch": 1.0692464358452138,
"grad_norm": 2.5723235607147217,
"learning_rate": 9.60702944298643e-07,
"loss": 0.283334344625473,
"step": 263
},
{
"epoch": 1.0733197556008147,
"grad_norm": 2.5463922023773193,
"learning_rate": 9.601471583147002e-07,
"loss": 0.29830360412597656,
"step": 264
},
{
"epoch": 1.0773930753564154,
"grad_norm": 2.4415295124053955,
"learning_rate": 9.595876326631153e-07,
"loss": 0.278356209397316,
"step": 265
},
{
"epoch": 1.0814663951120163,
"grad_norm": 2.4167826175689697,
"learning_rate": 9.590243718912164e-07,
"loss": 0.2482159435749054,
"step": 266
},
{
"epoch": 1.0855397148676171,
"grad_norm": 2.4946138858795166,
"learning_rate": 9.584573805766867e-07,
"loss": 0.2892283797264099,
"step": 267
},
{
"epoch": 1.089613034623218,
"grad_norm": 2.585139036178589,
"learning_rate": 9.578866633275286e-07,
"loss": 0.28597134351730347,
"step": 268
},
{
"epoch": 1.0936863543788187,
"grad_norm": 2.5749940872192383,
"learning_rate": 9.573122247820253e-07,
"loss": 0.24586065858602524,
"step": 269
},
{
"epoch": 1.0977596741344195,
"grad_norm": 2.6099436283111572,
"learning_rate": 9.567340696087032e-07,
"loss": 0.2968917638063431,
"step": 270
},
{
"epoch": 1.1018329938900204,
"grad_norm": 2.5843560695648193,
"learning_rate": 9.561522025062946e-07,
"loss": 0.3214084059000015,
"step": 271
},
{
"epoch": 1.105906313645621,
"grad_norm": 2.7599520683288574,
"learning_rate": 9.555666282036984e-07,
"loss": 0.2851483225822449,
"step": 272
},
{
"epoch": 1.109979633401222,
"grad_norm": 2.340637683868408,
"learning_rate": 9.549773514599428e-07,
"loss": 0.2864815294742584,
"step": 273
},
{
"epoch": 1.1140529531568228,
"grad_norm": 2.682030200958252,
"learning_rate": 9.543843770641463e-07,
"loss": 0.2536205053329468,
"step": 274
},
{
"epoch": 1.1181262729124237,
"grad_norm": 2.3765580654144287,
"learning_rate": 9.537877098354784e-07,
"loss": 0.24970313906669617,
"step": 275
},
{
"epoch": 1.1221995926680244,
"grad_norm": 2.5046305656433105,
"learning_rate": 9.531873546231208e-07,
"loss": 0.2925817370414734,
"step": 276
},
{
"epoch": 1.1262729124236253,
"grad_norm": 2.487609386444092,
"learning_rate": 9.525833163062273e-07,
"loss": 0.28858567774295807,
"step": 277
},
{
"epoch": 1.1303462321792261,
"grad_norm": 2.5617923736572266,
"learning_rate": 9.519755997938856e-07,
"loss": 0.29943670332431793,
"step": 278
},
{
"epoch": 1.1344195519348268,
"grad_norm": 2.5144989490509033,
"learning_rate": 9.51364210025076e-07,
"loss": 0.26054797321558,
"step": 279
},
{
"epoch": 1.1384928716904277,
"grad_norm": 2.393367290496826,
"learning_rate": 9.507491519686315e-07,
"loss": 0.2766056954860687,
"step": 280
},
{
"epoch": 1.1425661914460286,
"grad_norm": 2.5914998054504395,
"learning_rate": 9.501304306231983e-07,
"loss": 0.2934035658836365,
"step": 281
},
{
"epoch": 1.1466395112016294,
"grad_norm": 2.6555488109588623,
"learning_rate": 9.495080510171942e-07,
"loss": 0.27582375705242157,
"step": 282
},
{
"epoch": 1.15071283095723,
"grad_norm": 2.4485878944396973,
"learning_rate": 9.488820182087682e-07,
"loss": 0.27037250250577927,
"step": 283
},
{
"epoch": 1.154786150712831,
"grad_norm": 2.5796926021575928,
"learning_rate": 9.482523372857592e-07,
"loss": 0.25297392159700394,
"step": 284
},
{
"epoch": 1.1588594704684319,
"grad_norm": 2.4900271892547607,
"learning_rate": 9.476190133656548e-07,
"loss": 0.2928699851036072,
"step": 285
},
{
"epoch": 1.1629327902240325,
"grad_norm": 2.378876209259033,
"learning_rate": 9.469820515955497e-07,
"loss": 0.3021889925003052,
"step": 286
},
{
"epoch": 1.1670061099796334,
"grad_norm": 2.377063751220703,
"learning_rate": 9.463414571521036e-07,
"loss": 0.26095758378505707,
"step": 287
},
{
"epoch": 1.1710794297352343,
"grad_norm": 2.347393274307251,
"learning_rate": 9.456972352414997e-07,
"loss": 0.24494879692792892,
"step": 288
},
{
"epoch": 1.175152749490835,
"grad_norm": 2.3959014415740967,
"learning_rate": 9.450493910994017e-07,
"loss": 0.28228186070919037,
"step": 289
},
{
"epoch": 1.1792260692464358,
"grad_norm": 2.5725162029266357,
"learning_rate": 9.443979299909117e-07,
"loss": 0.3038964122533798,
"step": 290
},
{
"epoch": 1.1832993890020367,
"grad_norm": 3.050133228302002,
"learning_rate": 9.437428572105274e-07,
"loss": 0.28559453785419464,
"step": 291
},
{
"epoch": 1.1873727087576376,
"grad_norm": 2.4305083751678467,
"learning_rate": 9.430841780820988e-07,
"loss": 0.26872922480106354,
"step": 292
},
{
"epoch": 1.1914460285132382,
"grad_norm": 2.3637914657592773,
"learning_rate": 9.424218979587852e-07,
"loss": 0.2783532440662384,
"step": 293
},
{
"epoch": 1.195519348268839,
"grad_norm": 2.4774560928344727,
"learning_rate": 9.417560222230114e-07,
"loss": 0.2842411994934082,
"step": 294
},
{
"epoch": 1.19959266802444,
"grad_norm": 2.5469460487365723,
"learning_rate": 9.410865562864245e-07,
"loss": 0.2584770694375038,
"step": 295
},
{
"epoch": 1.2036659877800409,
"grad_norm": 2.3821113109588623,
"learning_rate": 9.404135055898495e-07,
"loss": 0.29300089180469513,
"step": 296
},
{
"epoch": 1.2077393075356415,
"grad_norm": 2.7043542861938477,
"learning_rate": 9.397368756032444e-07,
"loss": 0.2970714569091797,
"step": 297
},
{
"epoch": 1.2118126272912424,
"grad_norm": 2.353179693222046,
"learning_rate": 9.390566718256578e-07,
"loss": 0.28464287519454956,
"step": 298
},
{
"epoch": 1.2158859470468433,
"grad_norm": 2.615365743637085,
"learning_rate": 9.383728997851819e-07,
"loss": 0.2843187004327774,
"step": 299
},
{
"epoch": 1.219959266802444,
"grad_norm": 2.526761054992676,
"learning_rate": 9.376855650389089e-07,
"loss": 0.29214321076869965,
"step": 300
},
{
"epoch": 1.2240325865580448,
"grad_norm": 2.416611909866333,
"learning_rate": 9.369946731728854e-07,
"loss": 0.2746543139219284,
"step": 301
},
{
"epoch": 1.2281059063136457,
"grad_norm": 2.6475841999053955,
"learning_rate": 9.363002298020673e-07,
"loss": 0.32700057327747345,
"step": 302
},
{
"epoch": 1.2321792260692463,
"grad_norm": 2.7061877250671387,
"learning_rate": 9.356022405702739e-07,
"loss": 0.26850761473178864,
"step": 303
},
{
"epoch": 1.2362525458248472,
"grad_norm": 2.578963279724121,
"learning_rate": 9.349007111501419e-07,
"loss": 0.2758927643299103,
"step": 304
},
{
"epoch": 1.240325865580448,
"grad_norm": 2.476339340209961,
"learning_rate": 9.341956472430801e-07,
"loss": 0.3046765774488449,
"step": 305
},
{
"epoch": 1.2443991853360488,
"grad_norm": 2.594074249267578,
"learning_rate": 9.334870545792217e-07,
"loss": 0.30949874222278595,
"step": 306
},
{
"epoch": 1.2484725050916496,
"grad_norm": 2.311086416244507,
"learning_rate": 9.32774938917379e-07,
"loss": 0.27652885019779205,
"step": 307
},
{
"epoch": 1.2525458248472505,
"grad_norm": 2.5869719982147217,
"learning_rate": 9.320593060449958e-07,
"loss": 0.2891390472650528,
"step": 308
},
{
"epoch": 1.2566191446028514,
"grad_norm": 2.7591726779937744,
"learning_rate": 9.313401617781012e-07,
"loss": 0.2366793006658554,
"step": 309
},
{
"epoch": 1.260692464358452,
"grad_norm": 2.394120693206787,
"learning_rate": 9.306175119612612e-07,
"loss": 0.2750696837902069,
"step": 310
},
{
"epoch": 1.264765784114053,
"grad_norm": 2.542830228805542,
"learning_rate": 9.29891362467532e-07,
"loss": 0.29537804424762726,
"step": 311
},
{
"epoch": 1.2688391038696538,
"grad_norm": 2.63010573387146,
"learning_rate": 9.291617191984123e-07,
"loss": 0.28230538964271545,
"step": 312
},
{
"epoch": 1.2729124236252547,
"grad_norm": 2.4835424423217773,
"learning_rate": 9.284285880837946e-07,
"loss": 0.2597920596599579,
"step": 313
},
{
"epoch": 1.2769857433808554,
"grad_norm": 2.4826974868774414,
"learning_rate": 9.276919750819181e-07,
"loss": 0.28388310968875885,
"step": 314
},
{
"epoch": 1.2810590631364562,
"grad_norm": 2.495941400527954,
"learning_rate": 9.269518861793193e-07,
"loss": 0.2510681226849556,
"step": 315
},
{
"epoch": 1.2851323828920571,
"grad_norm": 2.3886849880218506,
"learning_rate": 9.262083273907837e-07,
"loss": 0.28582368791103363,
"step": 316
},
{
"epoch": 1.2892057026476578,
"grad_norm": 2.443054437637329,
"learning_rate": 9.254613047592974e-07,
"loss": 0.2807822525501251,
"step": 317
},
{
"epoch": 1.2932790224032586,
"grad_norm": 2.2685866355895996,
"learning_rate": 9.247108243559971e-07,
"loss": 0.2636859193444252,
"step": 318
},
{
"epoch": 1.2973523421588595,
"grad_norm": 2.4459731578826904,
"learning_rate": 9.239568922801212e-07,
"loss": 0.28120698034763336,
"step": 319
},
{
"epoch": 1.3014256619144602,
"grad_norm": 2.4667344093322754,
"learning_rate": 9.231995146589605e-07,
"loss": 0.27484674006700516,
"step": 320
},
{
"epoch": 1.305498981670061,
"grad_norm": 2.432385206222534,
"learning_rate": 9.22438697647808e-07,
"loss": 0.3200181722640991,
"step": 321
},
{
"epoch": 1.309572301425662,
"grad_norm": 2.594827175140381,
"learning_rate": 9.21674447429909e-07,
"loss": 0.26146167516708374,
"step": 322
},
{
"epoch": 1.3136456211812626,
"grad_norm": 2.6781699657440186,
"learning_rate": 9.209067702164108e-07,
"loss": 0.30198925733566284,
"step": 323
},
{
"epoch": 1.3177189409368635,
"grad_norm": 2.382455348968506,
"learning_rate": 9.201356722463124e-07,
"loss": 0.2656544893980026,
"step": 324
},
{
"epoch": 1.3217922606924644,
"grad_norm": 2.3920562267303467,
"learning_rate": 9.193611597864137e-07,
"loss": 0.30542662739753723,
"step": 325
},
{
"epoch": 1.3258655804480652,
"grad_norm": 2.5210139751434326,
"learning_rate": 9.185832391312642e-07,
"loss": 0.33505627512931824,
"step": 326
},
{
"epoch": 1.3299389002036661,
"grad_norm": 2.530137777328491,
"learning_rate": 9.178019166031128e-07,
"loss": 0.28073740005493164,
"step": 327
},
{
"epoch": 1.3340122199592668,
"grad_norm": 2.6757373809814453,
"learning_rate": 9.170171985518551e-07,
"loss": 0.2617020010948181,
"step": 328
},
{
"epoch": 1.3380855397148677,
"grad_norm": 2.737130880355835,
"learning_rate": 9.162290913549831e-07,
"loss": 0.31688614189624786,
"step": 329
},
{
"epoch": 1.3421588594704685,
"grad_norm": 2.4106035232543945,
"learning_rate": 9.154376014175325e-07,
"loss": 0.25045838952064514,
"step": 330
},
{
"epoch": 1.3462321792260692,
"grad_norm": 2.5006155967712402,
"learning_rate": 9.146427351720307e-07,
"loss": 0.2350526750087738,
"step": 331
},
{
"epoch": 1.35030549898167,
"grad_norm": 2.443948268890381,
"learning_rate": 9.138444990784453e-07,
"loss": 0.3193846642971039,
"step": 332
},
{
"epoch": 1.354378818737271,
"grad_norm": 2.5271644592285156,
"learning_rate": 9.130428996241304e-07,
"loss": 0.2833334505558014,
"step": 333
},
{
"epoch": 1.3584521384928716,
"grad_norm": 2.3211746215820312,
"learning_rate": 9.122379433237748e-07,
"loss": 0.27067267149686813,
"step": 334
},
{
"epoch": 1.3625254582484725,
"grad_norm": 2.5455574989318848,
"learning_rate": 9.11429636719349e-07,
"loss": 0.3177233636379242,
"step": 335
},
{
"epoch": 1.3665987780040734,
"grad_norm": 2.9286653995513916,
"learning_rate": 9.106179863800513e-07,
"loss": 0.30554522573947906,
"step": 336
},
{
"epoch": 1.370672097759674,
"grad_norm": 2.452594757080078,
"learning_rate": 9.098029989022557e-07,
"loss": 0.29210953414440155,
"step": 337
},
{
"epoch": 1.374745417515275,
"grad_norm": 2.5022428035736084,
"learning_rate": 9.089846809094563e-07,
"loss": 0.25522106885910034,
"step": 338
},
{
"epoch": 1.3788187372708758,
"grad_norm": 2.6464457511901855,
"learning_rate": 9.081630390522157e-07,
"loss": 0.287350669503212,
"step": 339
},
{
"epoch": 1.3828920570264764,
"grad_norm": 2.3021206855773926,
"learning_rate": 9.073380800081096e-07,
"loss": 0.2737245708703995,
"step": 340
},
{
"epoch": 1.3869653767820773,
"grad_norm": 2.5468103885650635,
"learning_rate": 9.065098104816726e-07,
"loss": 0.26481032371520996,
"step": 341
},
{
"epoch": 1.3910386965376782,
"grad_norm": 2.3847126960754395,
"learning_rate": 9.056782372043445e-07,
"loss": 0.270496666431427,
"step": 342
},
{
"epoch": 1.395112016293279,
"grad_norm": 2.586550235748291,
"learning_rate": 9.048433669344144e-07,
"loss": 0.32853636145591736,
"step": 343
},
{
"epoch": 1.39918533604888,
"grad_norm": 2.5621798038482666,
"learning_rate": 9.04005206456967e-07,
"loss": 0.2707676887512207,
"step": 344
},
{
"epoch": 1.4032586558044806,
"grad_norm": 2.405062198638916,
"learning_rate": 9.031637625838264e-07,
"loss": 0.285464346408844,
"step": 345
},
{
"epoch": 1.4073319755600815,
"grad_norm": 2.382589817047119,
"learning_rate": 9.023190421535016e-07,
"loss": 0.2704601585865021,
"step": 346
},
{
"epoch": 1.4114052953156824,
"grad_norm": 2.2916622161865234,
"learning_rate": 9.014710520311306e-07,
"loss": 0.2510330229997635,
"step": 347
},
{
"epoch": 1.415478615071283,
"grad_norm": 2.3999521732330322,
"learning_rate": 9.006197991084241e-07,
"loss": 0.2853076159954071,
"step": 348
},
{
"epoch": 1.419551934826884,
"grad_norm": 2.6907191276550293,
"learning_rate": 8.997652903036104e-07,
"loss": 0.27804453670978546,
"step": 349
},
{
"epoch": 1.4236252545824848,
"grad_norm": 2.3953359127044678,
"learning_rate": 8.989075325613784e-07,
"loss": 0.24225353449583054,
"step": 350
},
{
"epoch": 1.4276985743380854,
"grad_norm": 2.5458950996398926,
"learning_rate": 8.980465328528218e-07,
"loss": 0.29082323610782623,
"step": 351
},
{
"epoch": 1.4317718940936863,
"grad_norm": 2.570434093475342,
"learning_rate": 8.971822981753818e-07,
"loss": 0.2637802064418793,
"step": 352
},
{
"epoch": 1.4358452138492872,
"grad_norm": 2.608654260635376,
"learning_rate": 8.963148355527908e-07,
"loss": 0.267157182097435,
"step": 353
},
{
"epoch": 1.4399185336048879,
"grad_norm": 2.505756139755249,
"learning_rate": 8.954441520350149e-07,
"loss": 0.31443025171756744,
"step": 354
},
{
"epoch": 1.4439918533604887,
"grad_norm": 2.8767051696777344,
"learning_rate": 8.945702546981968e-07,
"loss": 0.28173527121543884,
"step": 355
},
{
"epoch": 1.4480651731160896,
"grad_norm": 2.5839850902557373,
"learning_rate": 8.936931506445984e-07,
"loss": 0.2735389843583107,
"step": 356
},
{
"epoch": 1.4521384928716905,
"grad_norm": 2.380516529083252,
"learning_rate": 8.928128470025429e-07,
"loss": 0.2868722528219223,
"step": 357
},
{
"epoch": 1.4562118126272914,
"grad_norm": 2.373098134994507,
"learning_rate": 8.919293509263567e-07,
"loss": 0.266360841691494,
"step": 358
},
{
"epoch": 1.460285132382892,
"grad_norm": 2.440216541290283,
"learning_rate": 8.910426695963118e-07,
"loss": 0.26618482172489166,
"step": 359
},
{
"epoch": 1.464358452138493,
"grad_norm": 2.374094247817993,
"learning_rate": 8.901528102185669e-07,
"loss": 0.27816291898489,
"step": 360
},
{
"epoch": 1.4684317718940938,
"grad_norm": 2.3657944202423096,
"learning_rate": 8.892597800251093e-07,
"loss": 0.25230376422405243,
"step": 361
},
{
"epoch": 1.4725050916496945,
"grad_norm": 2.3009097576141357,
"learning_rate": 8.883635862736956e-07,
"loss": 0.26984117925167084,
"step": 362
},
{
"epoch": 1.4765784114052953,
"grad_norm": 2.9458165168762207,
"learning_rate": 8.874642362477929e-07,
"loss": 0.29643698036670685,
"step": 363
},
{
"epoch": 1.4806517311608962,
"grad_norm": 2.8554880619049072,
"learning_rate": 8.865617372565199e-07,
"loss": 0.256381556391716,
"step": 364
},
{
"epoch": 1.4847250509164969,
"grad_norm": 2.5279104709625244,
"learning_rate": 8.856560966345876e-07,
"loss": 0.24138055741786957,
"step": 365
},
{
"epoch": 1.4887983706720977,
"grad_norm": 2.4175782203674316,
"learning_rate": 8.847473217422388e-07,
"loss": 0.27801232039928436,
"step": 366
},
{
"epoch": 1.4928716904276986,
"grad_norm": 2.7168712615966797,
"learning_rate": 8.838354199651891e-07,
"loss": 0.28772565722465515,
"step": 367
},
{
"epoch": 1.4969450101832993,
"grad_norm": 2.5258262157440186,
"learning_rate": 8.829203987145669e-07,
"loss": 0.2848212271928787,
"step": 368
},
{
"epoch": 1.5010183299389002,
"grad_norm": 2.424790859222412,
"learning_rate": 8.820022654268525e-07,
"loss": 0.2653035372495651,
"step": 369
},
{
"epoch": 1.505091649694501,
"grad_norm": 2.416537284851074,
"learning_rate": 8.810810275638182e-07,
"loss": 0.27215129137039185,
"step": 370
},
{
"epoch": 1.5091649694501017,
"grad_norm": 2.5008385181427,
"learning_rate": 8.801566926124677e-07,
"loss": 0.2523413300514221,
"step": 371
},
{
"epoch": 1.5132382892057028,
"grad_norm": 2.5779659748077393,
"learning_rate": 8.79229268084975e-07,
"loss": 0.2584775537252426,
"step": 372
},
{
"epoch": 1.5173116089613035,
"grad_norm": 2.631009340286255,
"learning_rate": 8.782987615186234e-07,
"loss": 0.27790168672800064,
"step": 373
},
{
"epoch": 1.5213849287169041,
"grad_norm": 2.4295825958251953,
"learning_rate": 8.773651804757443e-07,
"loss": 0.26932528614997864,
"step": 374
},
{
"epoch": 1.5254582484725052,
"grad_norm": 2.6135988235473633,
"learning_rate": 8.764285325436555e-07,
"loss": 0.28256936371326447,
"step": 375
},
{
"epoch": 1.5295315682281059,
"grad_norm": 2.5016775131225586,
"learning_rate": 8.754888253346002e-07,
"loss": 0.28559644520282745,
"step": 376
},
{
"epoch": 1.5336048879837068,
"grad_norm": 2.5780680179595947,
"learning_rate": 8.745460664856844e-07,
"loss": 0.2705298289656639,
"step": 377
},
{
"epoch": 1.5376782077393076,
"grad_norm": 2.7102303504943848,
"learning_rate": 8.736002636588151e-07,
"loss": 0.27591292560100555,
"step": 378
},
{
"epoch": 1.5417515274949083,
"grad_norm": 2.520955801010132,
"learning_rate": 8.72651424540638e-07,
"loss": 0.2621312141418457,
"step": 379
},
{
"epoch": 1.5458248472505092,
"grad_norm": 3.2938437461853027,
"learning_rate": 8.716995568424754e-07,
"loss": 0.2692745327949524,
"step": 380
},
{
"epoch": 1.54989816700611,
"grad_norm": 2.4736831188201904,
"learning_rate": 8.70744668300263e-07,
"loss": 0.24611817300319672,
"step": 381
},
{
"epoch": 1.5539714867617107,
"grad_norm": 2.3731565475463867,
"learning_rate": 8.697867666744871e-07,
"loss": 0.2787970006465912,
"step": 382
},
{
"epoch": 1.5580448065173116,
"grad_norm": 2.5973782539367676,
"learning_rate": 8.688258597501219e-07,
"loss": 0.3056941330432892,
"step": 383
},
{
"epoch": 1.5621181262729125,
"grad_norm": 2.490133285522461,
"learning_rate": 8.678619553365658e-07,
"loss": 0.28115689754486084,
"step": 384
},
{
"epoch": 1.5661914460285131,
"grad_norm": 2.7315447330474854,
"learning_rate": 8.668950612675783e-07,
"loss": 0.2873499393463135,
"step": 385
},
{
"epoch": 1.570264765784114,
"grad_norm": 2.518087387084961,
"learning_rate": 8.659251854012161e-07,
"loss": 0.28173641860485077,
"step": 386
},
{
"epoch": 1.5743380855397149,
"grad_norm": 2.214815378189087,
"learning_rate": 8.649523356197695e-07,
"loss": 0.2376307100057602,
"step": 387
},
{
"epoch": 1.5784114052953155,
"grad_norm": 2.2534103393554688,
"learning_rate": 8.639765198296977e-07,
"loss": 0.2530301362276077,
"step": 388
},
{
"epoch": 1.5824847250509166,
"grad_norm": 2.5998353958129883,
"learning_rate": 8.629977459615654e-07,
"loss": 0.26012372225522995,
"step": 389
},
{
"epoch": 1.5865580448065173,
"grad_norm": 2.4811596870422363,
"learning_rate": 8.620160219699777e-07,
"loss": 0.26469268649816513,
"step": 390
},
{
"epoch": 1.5906313645621182,
"grad_norm": 2.4212377071380615,
"learning_rate": 8.610313558335156e-07,
"loss": 0.2749471068382263,
"step": 391
},
{
"epoch": 1.594704684317719,
"grad_norm": 2.3209803104400635,
"learning_rate": 8.600437555546716e-07,
"loss": 0.2666809409856796,
"step": 392
},
{
"epoch": 1.5987780040733197,
"grad_norm": 2.6138010025024414,
"learning_rate": 8.59053229159784e-07,
"loss": 0.2631463557481766,
"step": 393
},
{
"epoch": 1.6028513238289206,
"grad_norm": 2.4465487003326416,
"learning_rate": 8.580597846989721e-07,
"loss": 0.2840096950531006,
"step": 394
},
{
"epoch": 1.6069246435845215,
"grad_norm": 2.781353235244751,
"learning_rate": 8.570634302460706e-07,
"loss": 0.26131805777549744,
"step": 395
},
{
"epoch": 1.6109979633401221,
"grad_norm": 2.4190244674682617,
"learning_rate": 8.560641738985641e-07,
"loss": 0.28362704813480377,
"step": 396
},
{
"epoch": 1.615071283095723,
"grad_norm": 2.461918354034424,
"learning_rate": 8.550620237775213e-07,
"loss": 0.2682417184114456,
"step": 397
},
{
"epoch": 1.6191446028513239,
"grad_norm": 2.4705612659454346,
"learning_rate": 8.540569880275286e-07,
"loss": 0.2832501232624054,
"step": 398
},
{
"epoch": 1.6232179226069245,
"grad_norm": 2.401193380355835,
"learning_rate": 8.530490748166244e-07,
"loss": 0.2732377126812935,
"step": 399
},
{
"epoch": 1.6272912423625254,
"grad_norm": 2.7205650806427,
"learning_rate": 8.520382923362328e-07,
"loss": 0.2597970813512802,
"step": 400
},
{
"epoch": 1.6313645621181263,
"grad_norm": 2.6480019092559814,
"learning_rate": 8.510246488010964e-07,
"loss": 0.2697395384311676,
"step": 401
},
{
"epoch": 1.635437881873727,
"grad_norm": 2.5444276332855225,
"learning_rate": 8.5000815244921e-07,
"loss": 0.26779498159885406,
"step": 402
},
{
"epoch": 1.639511201629328,
"grad_norm": 2.3247053623199463,
"learning_rate": 8.489888115417537e-07,
"loss": 0.2565397247672081,
"step": 403
},
{
"epoch": 1.6435845213849287,
"grad_norm": 2.8525049686431885,
"learning_rate": 8.479666343630256e-07,
"loss": 0.29496023058891296,
"step": 404
},
{
"epoch": 1.6476578411405294,
"grad_norm": 2.5987389087677,
"learning_rate": 8.469416292203746e-07,
"loss": 0.24137818068265915,
"step": 405
},
{
"epoch": 1.6517311608961305,
"grad_norm": 2.4682445526123047,
"learning_rate": 8.459138044441323e-07,
"loss": 0.3068127781152725,
"step": 406
},
{
"epoch": 1.6558044806517311,
"grad_norm": 2.43457293510437,
"learning_rate": 8.448831683875464e-07,
"loss": 0.2586989253759384,
"step": 407
},
{
"epoch": 1.659877800407332,
"grad_norm": 2.446392297744751,
"learning_rate": 8.438497294267116e-07,
"loss": 0.2845039367675781,
"step": 408
},
{
"epoch": 1.663951120162933,
"grad_norm": 2.4405159950256348,
"learning_rate": 8.428134959605027e-07,
"loss": 0.2668181359767914,
"step": 409
},
{
"epoch": 1.6680244399185336,
"grad_norm": 2.398472309112549,
"learning_rate": 8.417744764105053e-07,
"loss": 0.294752761721611,
"step": 410
},
{
"epoch": 1.6720977596741344,
"grad_norm": 2.485675096511841,
"learning_rate": 8.407326792209481e-07,
"loss": 0.27970798313617706,
"step": 411
},
{
"epoch": 1.6761710794297353,
"grad_norm": 2.326519250869751,
"learning_rate": 8.396881128586338e-07,
"loss": 0.27439168095588684,
"step": 412
},
{
"epoch": 1.680244399185336,
"grad_norm": 2.5076746940612793,
"learning_rate": 8.386407858128706e-07,
"loss": 0.24190914630889893,
"step": 413
},
{
"epoch": 1.6843177189409368,
"grad_norm": 2.418656349182129,
"learning_rate": 8.375907065954028e-07,
"loss": 0.302260160446167,
"step": 414
},
{
"epoch": 1.6883910386965377,
"grad_norm": 2.431206226348877,
"learning_rate": 8.365378837403427e-07,
"loss": 0.24484054744243622,
"step": 415
},
{
"epoch": 1.6924643584521384,
"grad_norm": 2.1974987983703613,
"learning_rate": 8.354823258040995e-07,
"loss": 0.23830442130565643,
"step": 416
},
{
"epoch": 1.6965376782077393,
"grad_norm": 2.531301498413086,
"learning_rate": 8.344240413653111e-07,
"loss": 0.23077362775802612,
"step": 417
},
{
"epoch": 1.7006109979633401,
"grad_norm": 2.3643598556518555,
"learning_rate": 8.333630390247741e-07,
"loss": 0.2691231817007065,
"step": 418
},
{
"epoch": 1.7046843177189408,
"grad_norm": 2.429917335510254,
"learning_rate": 8.322993274053738e-07,
"loss": 0.26382092386484146,
"step": 419
},
{
"epoch": 1.708757637474542,
"grad_norm": 2.3184382915496826,
"learning_rate": 8.312329151520139e-07,
"loss": 0.24146142601966858,
"step": 420
},
{
"epoch": 1.7128309572301426,
"grad_norm": 2.3974194526672363,
"learning_rate": 8.301638109315465e-07,
"loss": 0.28932487964630127,
"step": 421
},
{
"epoch": 1.7169042769857432,
"grad_norm": 2.445939064025879,
"learning_rate": 8.290920234327019e-07,
"loss": 0.2816842496395111,
"step": 422
},
{
"epoch": 1.7209775967413443,
"grad_norm": 2.517059087753296,
"learning_rate": 8.280175613660175e-07,
"loss": 0.28957797586917877,
"step": 423
},
{
"epoch": 1.725050916496945,
"grad_norm": 2.7085039615631104,
"learning_rate": 8.269404334637666e-07,
"loss": 0.2929697036743164,
"step": 424
},
{
"epoch": 1.7291242362525459,
"grad_norm": 2.3027985095977783,
"learning_rate": 8.258606484798896e-07,
"loss": 0.2602064907550812,
"step": 425
},
{
"epoch": 1.7331975560081467,
"grad_norm": 2.4261462688446045,
"learning_rate": 8.247782151899195e-07,
"loss": 0.2826506048440933,
"step": 426
},
{
"epoch": 1.7372708757637474,
"grad_norm": 2.6706831455230713,
"learning_rate": 8.236931423909138e-07,
"loss": 0.25307597219944,
"step": 427
},
{
"epoch": 1.7413441955193483,
"grad_norm": 2.6151740550994873,
"learning_rate": 8.226054389013808e-07,
"loss": 0.31587836146354675,
"step": 428
},
{
"epoch": 1.7454175152749491,
"grad_norm": 2.3904056549072266,
"learning_rate": 8.215151135612088e-07,
"loss": 0.2650100588798523,
"step": 429
},
{
"epoch": 1.7494908350305498,
"grad_norm": 2.48195219039917,
"learning_rate": 8.204221752315948e-07,
"loss": 0.24214383959770203,
"step": 430
},
{
"epoch": 1.7535641547861507,
"grad_norm": 2.28658390045166,
"learning_rate": 8.193266327949708e-07,
"loss": 0.26451194286346436,
"step": 431
},
{
"epoch": 1.7576374745417516,
"grad_norm": 2.365696430206299,
"learning_rate": 8.182284951549335e-07,
"loss": 0.26112615317106247,
"step": 432
},
{
"epoch": 1.7617107942973522,
"grad_norm": 2.467344284057617,
"learning_rate": 8.171277712361708e-07,
"loss": 0.2644128352403641,
"step": 433
},
{
"epoch": 1.765784114052953,
"grad_norm": 2.6279022693634033,
"learning_rate": 8.160244699843899e-07,
"loss": 0.26811327785253525,
"step": 434
},
{
"epoch": 1.769857433808554,
"grad_norm": 2.196678638458252,
"learning_rate": 8.149186003662437e-07,
"loss": 0.2764963060617447,
"step": 435
},
{
"epoch": 1.7739307535641546,
"grad_norm": 2.582368850708008,
"learning_rate": 8.138101713692587e-07,
"loss": 0.24663043022155762,
"step": 436
},
{
"epoch": 1.7780040733197557,
"grad_norm": 2.4632701873779297,
"learning_rate": 8.12699192001762e-07,
"loss": 0.24030117690563202,
"step": 437
},
{
"epoch": 1.7820773930753564,
"grad_norm": 2.549862861633301,
"learning_rate": 8.115856712928077e-07,
"loss": 0.2775857746601105,
"step": 438
},
{
"epoch": 1.7861507128309573,
"grad_norm": 2.460521936416626,
"learning_rate": 8.104696182921039e-07,
"loss": 0.24370034784078598,
"step": 439
},
{
"epoch": 1.7902240325865582,
"grad_norm": 2.489588975906372,
"learning_rate": 8.093510420699386e-07,
"loss": 0.2672967165708542,
"step": 440
},
{
"epoch": 1.7942973523421588,
"grad_norm": 2.442493438720703,
"learning_rate": 8.082299517171061e-07,
"loss": 0.2774495333433151,
"step": 441
},
{
"epoch": 1.7983706720977597,
"grad_norm": 2.6533353328704834,
"learning_rate": 8.071063563448339e-07,
"loss": 0.27777694165706635,
"step": 442
},
{
"epoch": 1.8024439918533606,
"grad_norm": 2.546030282974243,
"learning_rate": 8.059802650847077e-07,
"loss": 0.2475699484348297,
"step": 443
},
{
"epoch": 1.8065173116089612,
"grad_norm": 2.603074789047241,
"learning_rate": 8.048516870885977e-07,
"loss": 0.2610132396221161,
"step": 444
},
{
"epoch": 1.810590631364562,
"grad_norm": 2.5384109020233154,
"learning_rate": 8.037206315285841e-07,
"loss": 0.26541996002197266,
"step": 445
},
{
"epoch": 1.814663951120163,
"grad_norm": 2.5843305587768555,
"learning_rate": 8.025871075968826e-07,
"loss": 0.26443643867969513,
"step": 446
},
{
"epoch": 1.8187372708757636,
"grad_norm": 2.428213119506836,
"learning_rate": 8.014511245057691e-07,
"loss": 0.27873335778713226,
"step": 447
},
{
"epoch": 1.8228105906313645,
"grad_norm": 2.622931480407715,
"learning_rate": 8.003126914875063e-07,
"loss": 0.2784544825553894,
"step": 448
},
{
"epoch": 1.8268839103869654,
"grad_norm": 2.4025309085845947,
"learning_rate": 7.991718177942666e-07,
"loss": 0.22745566070079803,
"step": 449
},
{
"epoch": 1.830957230142566,
"grad_norm": 2.426877498626709,
"learning_rate": 7.980285126980591e-07,
"loss": 0.2900083065032959,
"step": 450
},
{
"epoch": 1.8350305498981672,
"grad_norm": 2.506085157394409,
"learning_rate": 7.968827854906528e-07,
"loss": 0.3019551932811737,
"step": 451
},
{
"epoch": 1.8391038696537678,
"grad_norm": 2.428899049758911,
"learning_rate": 7.95734645483501e-07,
"loss": 0.2602303624153137,
"step": 452
},
{
"epoch": 1.8431771894093685,
"grad_norm": 2.7357733249664307,
"learning_rate": 7.945841020076669e-07,
"loss": 0.2674318701028824,
"step": 453
},
{
"epoch": 1.8472505091649696,
"grad_norm": 2.492051124572754,
"learning_rate": 7.934311644137463e-07,
"loss": 0.29544439911842346,
"step": 454
},
{
"epoch": 1.8513238289205702,
"grad_norm": 2.751485586166382,
"learning_rate": 7.922758420717928e-07,
"loss": 0.2730901688337326,
"step": 455
},
{
"epoch": 1.8553971486761711,
"grad_norm": 2.469752550125122,
"learning_rate": 7.911181443712407e-07,
"loss": 0.28135350346565247,
"step": 456
},
{
"epoch": 1.859470468431772,
"grad_norm": 2.5486271381378174,
"learning_rate": 7.89958080720829e-07,
"loss": 0.2647327482700348,
"step": 457
},
{
"epoch": 1.8635437881873727,
"grad_norm": 2.328063726425171,
"learning_rate": 7.887956605485258e-07,
"loss": 0.29177480936050415,
"step": 458
},
{
"epoch": 1.8676171079429735,
"grad_norm": 2.3431217670440674,
"learning_rate": 7.876308933014502e-07,
"loss": 0.2461807057261467,
"step": 459
},
{
"epoch": 1.8716904276985744,
"grad_norm": 2.346773862838745,
"learning_rate": 7.864637884457961e-07,
"loss": 0.2737877368927002,
"step": 460
},
{
"epoch": 1.875763747454175,
"grad_norm": 2.6286261081695557,
"learning_rate": 7.852943554667561e-07,
"loss": 0.29079559445381165,
"step": 461
},
{
"epoch": 1.879837067209776,
"grad_norm": 2.3207788467407227,
"learning_rate": 7.841226038684434e-07,
"loss": 0.261492520570755,
"step": 462
},
{
"epoch": 1.8839103869653768,
"grad_norm": 2.4097938537597656,
"learning_rate": 7.829485431738148e-07,
"loss": 0.2779378667473793,
"step": 463
},
{
"epoch": 1.8879837067209775,
"grad_norm": 2.580665349960327,
"learning_rate": 7.817721829245935e-07,
"loss": 0.26885151863098145,
"step": 464
},
{
"epoch": 1.8920570264765784,
"grad_norm": 2.5190136432647705,
"learning_rate": 7.805935326811912e-07,
"loss": 0.2348337173461914,
"step": 465
},
{
"epoch": 1.8961303462321792,
"grad_norm": 2.6823647022247314,
"learning_rate": 7.794126020226309e-07,
"loss": 0.31435824930667877,
"step": 466
},
{
"epoch": 1.90020366598778,
"grad_norm": 2.678208827972412,
"learning_rate": 7.782294005464686e-07,
"loss": 0.2894388735294342,
"step": 467
},
{
"epoch": 1.904276985743381,
"grad_norm": 2.519721269607544,
"learning_rate": 7.770439378687161e-07,
"loss": 0.3156396448612213,
"step": 468
},
{
"epoch": 1.9083503054989817,
"grad_norm": 2.6619021892547607,
"learning_rate": 7.758562236237614e-07,
"loss": 0.2675836831331253,
"step": 469
},
{
"epoch": 1.9124236252545825,
"grad_norm": 2.2732584476470947,
"learning_rate": 7.746662674642912e-07,
"loss": 0.21649424731731415,
"step": 470
},
{
"epoch": 1.9164969450101834,
"grad_norm": 2.302424669265747,
"learning_rate": 7.734740790612136e-07,
"loss": 0.2436254546046257,
"step": 471
},
{
"epoch": 1.920570264765784,
"grad_norm": 2.3057138919830322,
"learning_rate": 7.722796681035769e-07,
"loss": 0.25994570553302765,
"step": 472
},
{
"epoch": 1.924643584521385,
"grad_norm": 2.327444076538086,
"learning_rate": 7.710830442984937e-07,
"loss": 0.2572202906012535,
"step": 473
},
{
"epoch": 1.9287169042769858,
"grad_norm": 2.3709921836853027,
"learning_rate": 7.698842173710599e-07,
"loss": 0.2600872740149498,
"step": 474
},
{
"epoch": 1.9327902240325865,
"grad_norm": 2.463008165359497,
"learning_rate": 7.686831970642766e-07,
"loss": 0.2489926964044571,
"step": 475
},
{
"epoch": 1.9368635437881874,
"grad_norm": 2.4627251625061035,
"learning_rate": 7.674799931389708e-07,
"loss": 0.2903194725513458,
"step": 476
},
{
"epoch": 1.9409368635437882,
"grad_norm": 2.9676828384399414,
"learning_rate": 7.662746153737156e-07,
"loss": 0.2674560844898224,
"step": 477
},
{
"epoch": 1.945010183299389,
"grad_norm": 2.448298692703247,
"learning_rate": 7.65067073564752e-07,
"loss": 0.3171275407075882,
"step": 478
},
{
"epoch": 1.9490835030549898,
"grad_norm": 2.524946451187134,
"learning_rate": 7.638573775259077e-07,
"loss": 0.2637060284614563,
"step": 479
},
{
"epoch": 1.9531568228105907,
"grad_norm": 2.4001801013946533,
"learning_rate": 7.62645537088518e-07,
"loss": 0.23155802488327026,
"step": 480
},
{
"epoch": 1.9572301425661913,
"grad_norm": 2.287005662918091,
"learning_rate": 7.614315621013469e-07,
"loss": 0.27207742631435394,
"step": 481
},
{
"epoch": 1.9613034623217924,
"grad_norm": 2.453338623046875,
"learning_rate": 7.60215462430505e-07,
"loss": 0.2895518094301224,
"step": 482
},
{
"epoch": 1.965376782077393,
"grad_norm": 2.5567643642425537,
"learning_rate": 7.58997247959371e-07,
"loss": 0.2574731409549713,
"step": 483
},
{
"epoch": 1.9694501018329937,
"grad_norm": 2.365522623062134,
"learning_rate": 7.577769285885108e-07,
"loss": 0.22622792422771454,
"step": 484
},
{
"epoch": 1.9735234215885948,
"grad_norm": 2.2915074825286865,
"learning_rate": 7.56554514235597e-07,
"loss": 0.26031681150197983,
"step": 485
},
{
"epoch": 1.9775967413441955,
"grad_norm": 2.651094436645508,
"learning_rate": 7.553300148353284e-07,
"loss": 0.2904250845313072,
"step": 486
},
{
"epoch": 1.9816700610997964,
"grad_norm": 2.416658639907837,
"learning_rate": 7.541034403393489e-07,
"loss": 0.2949088215827942,
"step": 487
},
{
"epoch": 1.9857433808553973,
"grad_norm": 2.4140050411224365,
"learning_rate": 7.528748007161676e-07,
"loss": 0.2634105682373047,
"step": 488
},
{
"epoch": 1.989816700610998,
"grad_norm": 2.684359550476074,
"learning_rate": 7.516441059510764e-07,
"loss": 0.27155014872550964,
"step": 489
},
{
"epoch": 1.9938900203665988,
"grad_norm": 2.4172725677490234,
"learning_rate": 7.5041136604607e-07,
"loss": 0.25667136907577515,
"step": 490
},
{
"epoch": 1.9979633401221997,
"grad_norm": 2.3175907135009766,
"learning_rate": 7.491765910197643e-07,
"loss": 0.2724708914756775,
"step": 491
},
{
"epoch": 2.0,
"grad_norm": 2.5458946228027344,
"learning_rate": 7.479397909073143e-07,
"loss": 0.2475520372390747,
"step": 492
},
{
"epoch": 2.0040733197556007,
"grad_norm": 2.372309446334839,
"learning_rate": 7.467009757603336e-07,
"loss": 0.25162914395332336,
"step": 493
},
{
"epoch": 2.0081466395112018,
"grad_norm": 2.3240795135498047,
"learning_rate": 7.454601556468121e-07,
"loss": 0.24550767987966537,
"step": 494
},
{
"epoch": 2.0122199592668024,
"grad_norm": 2.3425838947296143,
"learning_rate": 7.442173406510341e-07,
"loss": 0.21918359398841858,
"step": 495
},
{
"epoch": 2.016293279022403,
"grad_norm": 2.2755539417266846,
"learning_rate": 7.429725408734968e-07,
"loss": 0.24949443340301514,
"step": 496
},
{
"epoch": 2.020366598778004,
"grad_norm": 2.490926742553711,
"learning_rate": 7.417257664308276e-07,
"loss": 0.2353430986404419,
"step": 497
},
{
"epoch": 2.024439918533605,
"grad_norm": 2.390965461730957,
"learning_rate": 7.404770274557028e-07,
"loss": 0.25412893295288086,
"step": 498
},
{
"epoch": 2.0285132382892055,
"grad_norm": 2.11749005317688,
"learning_rate": 7.392263340967641e-07,
"loss": 0.21416524052619934,
"step": 499
},
{
"epoch": 2.0325865580448066,
"grad_norm": 2.317534923553467,
"learning_rate": 7.379736965185368e-07,
"loss": 0.24323680251836777,
"step": 500
},
{
"epoch": 2.0366598778004072,
"grad_norm": 2.29274845123291,
"learning_rate": 7.367191249013472e-07,
"loss": 0.2301274538040161,
"step": 501
},
{
"epoch": 2.0407331975560083,
"grad_norm": 2.2650434970855713,
"learning_rate": 7.354626294412402e-07,
"loss": 0.1820373833179474,
"step": 502
},
{
"epoch": 2.044806517311609,
"grad_norm": 2.6822898387908936,
"learning_rate": 7.342042203498951e-07,
"loss": 0.24766084551811218,
"step": 503
},
{
"epoch": 2.0488798370672097,
"grad_norm": 2.583789348602295,
"learning_rate": 7.329439078545438e-07,
"loss": 0.24215728789567947,
"step": 504
},
{
"epoch": 2.0529531568228108,
"grad_norm": 2.484274387359619,
"learning_rate": 7.316817021978883e-07,
"loss": 0.23763683438301086,
"step": 505
},
{
"epoch": 2.0570264765784114,
"grad_norm": 2.4976799488067627,
"learning_rate": 7.304176136380149e-07,
"loss": 0.2570403814315796,
"step": 506
},
{
"epoch": 2.061099796334012,
"grad_norm": 2.323218584060669,
"learning_rate": 7.291516524483136e-07,
"loss": 0.21409639716148376,
"step": 507
},
{
"epoch": 2.065173116089613,
"grad_norm": 2.562451124191284,
"learning_rate": 7.278838289173933e-07,
"loss": 0.2541456073522568,
"step": 508
},
{
"epoch": 2.069246435845214,
"grad_norm": 2.538037061691284,
"learning_rate": 7.266141533489983e-07,
"loss": 0.2855856567621231,
"step": 509
},
{
"epoch": 2.0733197556008145,
"grad_norm": 2.348076343536377,
"learning_rate": 7.253426360619242e-07,
"loss": 0.2544526904821396,
"step": 510
},
{
"epoch": 2.0773930753564156,
"grad_norm": 2.2017970085144043,
"learning_rate": 7.240692873899351e-07,
"loss": 0.21077334135770798,
"step": 511
},
{
"epoch": 2.0814663951120163,
"grad_norm": 2.3848230838775635,
"learning_rate": 7.227941176816787e-07,
"loss": 0.2178598716855049,
"step": 512
},
{
"epoch": 2.085539714867617,
"grad_norm": 2.4316163063049316,
"learning_rate": 7.215171373006024e-07,
"loss": 0.21304114907979965,
"step": 513
},
{
"epoch": 2.089613034623218,
"grad_norm": 2.342197895050049,
"learning_rate": 7.202383566248692e-07,
"loss": 0.2186000794172287,
"step": 514
},
{
"epoch": 2.0936863543788187,
"grad_norm": 2.3286263942718506,
"learning_rate": 7.189577860472731e-07,
"loss": 0.25711505115032196,
"step": 515
},
{
"epoch": 2.0977596741344193,
"grad_norm": 2.196580410003662,
"learning_rate": 7.176754359751555e-07,
"loss": 0.20909912884235382,
"step": 516
},
{
"epoch": 2.1018329938900204,
"grad_norm": 2.197080373764038,
"learning_rate": 7.163913168303191e-07,
"loss": 0.23562318086624146,
"step": 517
},
{
"epoch": 2.105906313645621,
"grad_norm": 2.5636260509490967,
"learning_rate": 7.151054390489444e-07,
"loss": 0.25697334110736847,
"step": 518
},
{
"epoch": 2.109979633401222,
"grad_norm": 2.6412978172302246,
"learning_rate": 7.138178130815047e-07,
"loss": 0.24819861352443695,
"step": 519
},
{
"epoch": 2.114052953156823,
"grad_norm": 2.4628520011901855,
"learning_rate": 7.125284493926809e-07,
"loss": 0.20018430054187775,
"step": 520
},
{
"epoch": 2.1181262729124235,
"grad_norm": 2.6035919189453125,
"learning_rate": 7.112373584612763e-07,
"loss": 0.2615906372666359,
"step": 521
},
{
"epoch": 2.1221995926680246,
"grad_norm": 2.499178409576416,
"learning_rate": 7.099445507801323e-07,
"loss": 0.25786396861076355,
"step": 522
},
{
"epoch": 2.1262729124236253,
"grad_norm": 2.356799364089966,
"learning_rate": 7.086500368560419e-07,
"loss": 0.24771813303232193,
"step": 523
},
{
"epoch": 2.130346232179226,
"grad_norm": 2.3499882221221924,
"learning_rate": 7.073538272096651e-07,
"loss": 0.2333463951945305,
"step": 524
},
{
"epoch": 2.134419551934827,
"grad_norm": 2.328732490539551,
"learning_rate": 7.060559323754435e-07,
"loss": 0.20822811126708984,
"step": 525
},
{
"epoch": 2.1384928716904277,
"grad_norm": 2.24107027053833,
"learning_rate": 7.047563629015141e-07,
"loss": 0.2279072254896164,
"step": 526
},
{
"epoch": 2.1425661914460283,
"grad_norm": 2.3738133907318115,
"learning_rate": 7.03455129349624e-07,
"loss": 0.23648229241371155,
"step": 527
},
{
"epoch": 2.1466395112016294,
"grad_norm": 2.2748498916625977,
"learning_rate": 7.021522422950443e-07,
"loss": 0.21738301217556,
"step": 528
},
{
"epoch": 2.15071283095723,
"grad_norm": 2.4731128215789795,
"learning_rate": 7.008477123264847e-07,
"loss": 0.23567791283130646,
"step": 529
},
{
"epoch": 2.1547861507128308,
"grad_norm": 2.5964107513427734,
"learning_rate": 6.995415500460067e-07,
"loss": 0.25774678587913513,
"step": 530
},
{
"epoch": 2.158859470468432,
"grad_norm": 2.7168128490448,
"learning_rate": 6.982337660689377e-07,
"loss": 0.25715047866106033,
"step": 531
},
{
"epoch": 2.1629327902240325,
"grad_norm": 2.4718027114868164,
"learning_rate": 6.96924371023785e-07,
"loss": 0.21807514131069183,
"step": 532
},
{
"epoch": 2.167006109979633,
"grad_norm": 2.3053858280181885,
"learning_rate": 6.956133755521496e-07,
"loss": 0.22069019079208374,
"step": 533
},
{
"epoch": 2.1710794297352343,
"grad_norm": 2.3815200328826904,
"learning_rate": 6.943007903086387e-07,
"loss": 0.2234855741262436,
"step": 534
},
{
"epoch": 2.175152749490835,
"grad_norm": 2.331794261932373,
"learning_rate": 6.929866259607797e-07,
"loss": 0.2392452359199524,
"step": 535
},
{
"epoch": 2.179226069246436,
"grad_norm": 2.5421111583709717,
"learning_rate": 6.916708931889344e-07,
"loss": 0.2579016238451004,
"step": 536
},
{
"epoch": 2.1832993890020367,
"grad_norm": 2.235800266265869,
"learning_rate": 6.903536026862104e-07,
"loss": 0.21762673556804657,
"step": 537
},
{
"epoch": 2.1873727087576373,
"grad_norm": 2.3409221172332764,
"learning_rate": 6.890347651583759e-07,
"loss": 0.21134832501411438,
"step": 538
},
{
"epoch": 2.1914460285132384,
"grad_norm": 2.29571795463562,
"learning_rate": 6.877143913237713e-07,
"loss": 0.22032570838928223,
"step": 539
},
{
"epoch": 2.195519348268839,
"grad_norm": 2.3287696838378906,
"learning_rate": 6.863924919132236e-07,
"loss": 0.22613044828176498,
"step": 540
},
{
"epoch": 2.1995926680244398,
"grad_norm": 2.2647016048431396,
"learning_rate": 6.850690776699573e-07,
"loss": 0.20803789049386978,
"step": 541
},
{
"epoch": 2.203665987780041,
"grad_norm": 2.5180113315582275,
"learning_rate": 6.837441593495086e-07,
"loss": 0.25074785202741623,
"step": 542
},
{
"epoch": 2.2077393075356415,
"grad_norm": 2.4430267810821533,
"learning_rate": 6.824177477196377e-07,
"loss": 0.23671245574951172,
"step": 543
},
{
"epoch": 2.211812627291242,
"grad_norm": 2.4262306690216064,
"learning_rate": 6.810898535602411e-07,
"loss": 0.23878173530101776,
"step": 544
},
{
"epoch": 2.2158859470468433,
"grad_norm": 2.2304294109344482,
"learning_rate": 6.797604876632632e-07,
"loss": 0.2229127734899521,
"step": 545
},
{
"epoch": 2.219959266802444,
"grad_norm": 2.443239450454712,
"learning_rate": 6.784296608326107e-07,
"loss": 0.22968536615371704,
"step": 546
},
{
"epoch": 2.224032586558045,
"grad_norm": 2.5297110080718994,
"learning_rate": 6.770973838840622e-07,
"loss": 0.2511328458786011,
"step": 547
},
{
"epoch": 2.2281059063136457,
"grad_norm": 2.648350238800049,
"learning_rate": 6.757636676451823e-07,
"loss": 0.2674560844898224,
"step": 548
},
{
"epoch": 2.2321792260692463,
"grad_norm": 2.1822292804718018,
"learning_rate": 6.744285229552327e-07,
"loss": 0.19531694799661636,
"step": 549
},
{
"epoch": 2.2362525458248474,
"grad_norm": 2.4335954189300537,
"learning_rate": 6.730919606650841e-07,
"loss": 0.23324161767959595,
"step": 550
},
{
"epoch": 2.240325865580448,
"grad_norm": 2.5245561599731445,
"learning_rate": 6.717539916371288e-07,
"loss": 0.22070679813623428,
"step": 551
},
{
"epoch": 2.2443991853360488,
"grad_norm": 2.4031879901885986,
"learning_rate": 6.704146267451908e-07,
"loss": 0.20751216262578964,
"step": 552
},
{
"epoch": 2.24847250509165,
"grad_norm": 2.4731009006500244,
"learning_rate": 6.690738768744394e-07,
"loss": 0.23825813084840775,
"step": 553
},
{
"epoch": 2.2525458248472505,
"grad_norm": 2.62092924118042,
"learning_rate": 6.677317529212993e-07,
"loss": 0.23570290952920914,
"step": 554
},
{
"epoch": 2.256619144602851,
"grad_norm": 2.4778685569763184,
"learning_rate": 6.663882657933626e-07,
"loss": 0.22070936858654022,
"step": 555
},
{
"epoch": 2.2606924643584523,
"grad_norm": 2.3577542304992676,
"learning_rate": 6.650434264093e-07,
"loss": 0.2068188190460205,
"step": 556
},
{
"epoch": 2.264765784114053,
"grad_norm": 2.421339988708496,
"learning_rate": 6.636972456987725e-07,
"loss": 0.25278185307979584,
"step": 557
},
{
"epoch": 2.2688391038696536,
"grad_norm": 2.4010040760040283,
"learning_rate": 6.623497346023417e-07,
"loss": 0.24000639468431473,
"step": 558
},
{
"epoch": 2.2729124236252547,
"grad_norm": 2.54424786567688,
"learning_rate": 6.610009040713818e-07,
"loss": 0.27193646132946014,
"step": 559
},
{
"epoch": 2.2769857433808554,
"grad_norm": 2.5175139904022217,
"learning_rate": 6.596507650679899e-07,
"loss": 0.23332953453063965,
"step": 560
},
{
"epoch": 2.281059063136456,
"grad_norm": 2.4466259479522705,
"learning_rate": 6.582993285648976e-07,
"loss": 0.2428213581442833,
"step": 561
},
{
"epoch": 2.285132382892057,
"grad_norm": 2.4644064903259277,
"learning_rate": 6.569466055453807e-07,
"loss": 0.21874462068080902,
"step": 562
},
{
"epoch": 2.2892057026476578,
"grad_norm": 2.250108480453491,
"learning_rate": 6.555926070031716e-07,
"loss": 0.20208529382944107,
"step": 563
},
{
"epoch": 2.293279022403259,
"grad_norm": 2.5665433406829834,
"learning_rate": 6.542373439423683e-07,
"loss": 0.22964468598365784,
"step": 564
},
{
"epoch": 2.2973523421588595,
"grad_norm": 2.6082041263580322,
"learning_rate": 6.528808273773459e-07,
"loss": 0.24122490733861923,
"step": 565
},
{
"epoch": 2.30142566191446,
"grad_norm": 2.4170379638671875,
"learning_rate": 6.515230683326669e-07,
"loss": 0.21298009902238846,
"step": 566
},
{
"epoch": 2.3054989816700613,
"grad_norm": 2.651233196258545,
"learning_rate": 6.501640778429917e-07,
"loss": 0.24708368629217148,
"step": 567
},
{
"epoch": 2.309572301425662,
"grad_norm": 2.462790012359619,
"learning_rate": 6.488038669529886e-07,
"loss": 0.2247847020626068,
"step": 568
},
{
"epoch": 2.3136456211812626,
"grad_norm": 2.2574470043182373,
"learning_rate": 6.474424467172442e-07,
"loss": 0.21436495333909988,
"step": 569
},
{
"epoch": 2.3177189409368637,
"grad_norm": 2.4516212940216064,
"learning_rate": 6.460798282001738e-07,
"loss": 0.2516315132379532,
"step": 570
},
{
"epoch": 2.3217922606924644,
"grad_norm": 2.514031410217285,
"learning_rate": 6.447160224759311e-07,
"loss": 0.2250988855957985,
"step": 571
},
{
"epoch": 2.325865580448065,
"grad_norm": 2.3617303371429443,
"learning_rate": 6.433510406283185e-07,
"loss": 0.23347290605306625,
"step": 572
},
{
"epoch": 2.329938900203666,
"grad_norm": 2.392488479614258,
"learning_rate": 6.419848937506964e-07,
"loss": 0.25712814927101135,
"step": 573
},
{
"epoch": 2.3340122199592668,
"grad_norm": 2.4351253509521484,
"learning_rate": 6.406175929458944e-07,
"loss": 0.23134000599384308,
"step": 574
},
{
"epoch": 2.3380855397148674,
"grad_norm": 2.351980447769165,
"learning_rate": 6.392491493261198e-07,
"loss": 0.24626444280147552,
"step": 575
},
{
"epoch": 2.3421588594704685,
"grad_norm": 2.2502481937408447,
"learning_rate": 6.37879574012867e-07,
"loss": 0.2008371651172638,
"step": 576
},
{
"epoch": 2.346232179226069,
"grad_norm": 2.413408041000366,
"learning_rate": 6.36508878136829e-07,
"loss": 0.24356064200401306,
"step": 577
},
{
"epoch": 2.35030549898167,
"grad_norm": 2.358431577682495,
"learning_rate": 6.351370728378049e-07,
"loss": 0.25313031673431396,
"step": 578
},
{
"epoch": 2.354378818737271,
"grad_norm": 2.580531358718872,
"learning_rate": 6.337641692646106e-07,
"loss": 0.2431691735982895,
"step": 579
},
{
"epoch": 2.3584521384928716,
"grad_norm": 2.4596192836761475,
"learning_rate": 6.323901785749871e-07,
"loss": 0.2600405141711235,
"step": 580
},
{
"epoch": 2.3625254582484727,
"grad_norm": 2.41133975982666,
"learning_rate": 6.310151119355118e-07,
"loss": 0.1907319650053978,
"step": 581
},
{
"epoch": 2.3665987780040734,
"grad_norm": 2.5591516494750977,
"learning_rate": 6.296389805215054e-07,
"loss": 0.2532680034637451,
"step": 582
},
{
"epoch": 2.370672097759674,
"grad_norm": 2.362185478210449,
"learning_rate": 6.282617955169425e-07,
"loss": 0.2242288738489151,
"step": 583
},
{
"epoch": 2.374745417515275,
"grad_norm": 2.438898801803589,
"learning_rate": 6.268835681143602e-07,
"loss": 0.2321534976363182,
"step": 584
},
{
"epoch": 2.378818737270876,
"grad_norm": 2.066927433013916,
"learning_rate": 6.255043095147678e-07,
"loss": 0.2276027500629425,
"step": 585
},
{
"epoch": 2.3828920570264764,
"grad_norm": 2.347784996032715,
"learning_rate": 6.241240309275545e-07,
"loss": 0.2264810875058174,
"step": 586
},
{
"epoch": 2.3869653767820775,
"grad_norm": 2.444033145904541,
"learning_rate": 6.227427435703995e-07,
"loss": 0.2255028337240219,
"step": 587
},
{
"epoch": 2.391038696537678,
"grad_norm": 2.2944633960723877,
"learning_rate": 6.213604586691805e-07,
"loss": 0.2329559102654457,
"step": 588
},
{
"epoch": 2.395112016293279,
"grad_norm": 2.346848964691162,
"learning_rate": 6.199771874578819e-07,
"loss": 0.22230961173772812,
"step": 589
},
{
"epoch": 2.39918533604888,
"grad_norm": 2.4055089950561523,
"learning_rate": 6.185929411785042e-07,
"loss": 0.22255368530750275,
"step": 590
},
{
"epoch": 2.4032586558044806,
"grad_norm": 2.5555362701416016,
"learning_rate": 6.172077310809724e-07,
"loss": 0.25746987015008926,
"step": 591
},
{
"epoch": 2.4073319755600817,
"grad_norm": 2.5292749404907227,
"learning_rate": 6.15821568423045e-07,
"loss": 0.1872299611568451,
"step": 592
},
{
"epoch": 2.4114052953156824,
"grad_norm": 2.6285977363586426,
"learning_rate": 6.144344644702211e-07,
"loss": 0.25099916756153107,
"step": 593
},
{
"epoch": 2.415478615071283,
"grad_norm": 2.830077886581421,
"learning_rate": 6.130464304956504e-07,
"loss": 0.26932021975517273,
"step": 594
},
{
"epoch": 2.4195519348268837,
"grad_norm": 2.4076106548309326,
"learning_rate": 6.116574777800412e-07,
"loss": 0.25139734894037247,
"step": 595
},
{
"epoch": 2.423625254582485,
"grad_norm": 2.45658540725708,
"learning_rate": 6.102676176115681e-07,
"loss": 0.2526377737522125,
"step": 596
},
{
"epoch": 2.4276985743380854,
"grad_norm": 2.455595016479492,
"learning_rate": 6.088768612857807e-07,
"loss": 0.20270772278308868,
"step": 597
},
{
"epoch": 2.4317718940936865,
"grad_norm": 2.5158026218414307,
"learning_rate": 6.074852201055121e-07,
"loss": 0.2484816238284111,
"step": 598
},
{
"epoch": 2.435845213849287,
"grad_norm": 2.8085286617279053,
"learning_rate": 6.060927053807863e-07,
"loss": 0.2813955247402191,
"step": 599
},
{
"epoch": 2.439918533604888,
"grad_norm": 2.644648313522339,
"learning_rate": 6.046993284287267e-07,
"loss": 0.2188793271780014,
"step": 600
},
{
"epoch": 2.443991853360489,
"grad_norm": 2.362947702407837,
"learning_rate": 6.033051005734647e-07,
"loss": 0.2174539864063263,
"step": 601
},
{
"epoch": 2.4480651731160896,
"grad_norm": 2.469346284866333,
"learning_rate": 6.019100331460466e-07,
"loss": 0.24562832713127136,
"step": 602
},
{
"epoch": 2.4521384928716903,
"grad_norm": 2.338634729385376,
"learning_rate": 6.005141374843419e-07,
"loss": 0.23371918499469757,
"step": 603
},
{
"epoch": 2.4562118126272914,
"grad_norm": 2.3929550647735596,
"learning_rate": 5.991174249329514e-07,
"loss": 0.23879797011613846,
"step": 604
},
{
"epoch": 2.460285132382892,
"grad_norm": 2.257073163986206,
"learning_rate": 5.977199068431153e-07,
"loss": 0.22032149881124496,
"step": 605
},
{
"epoch": 2.4643584521384927,
"grad_norm": 2.408695936203003,
"learning_rate": 5.963215945726198e-07,
"loss": 0.2490229532122612,
"step": 606
},
{
"epoch": 2.468431771894094,
"grad_norm": 2.3911116123199463,
"learning_rate": 5.949224994857057e-07,
"loss": 0.23558590561151505,
"step": 607
},
{
"epoch": 2.4725050916496945,
"grad_norm": 2.351417303085327,
"learning_rate": 5.93522632952976e-07,
"loss": 0.22598809003829956,
"step": 608
},
{
"epoch": 2.4765784114052956,
"grad_norm": 2.258190870285034,
"learning_rate": 5.921220063513034e-07,
"loss": 0.17976737767457962,
"step": 609
},
{
"epoch": 2.480651731160896,
"grad_norm": 2.431304454803467,
"learning_rate": 5.907206310637375e-07,
"loss": 0.209254652261734,
"step": 610
},
{
"epoch": 2.484725050916497,
"grad_norm": 2.3682570457458496,
"learning_rate": 5.893185184794128e-07,
"loss": 0.23237604647874832,
"step": 611
},
{
"epoch": 2.4887983706720975,
"grad_norm": 2.473200798034668,
"learning_rate": 5.879156799934554e-07,
"loss": 0.21567458659410477,
"step": 612
},
{
"epoch": 2.4928716904276986,
"grad_norm": 2.312518358230591,
"learning_rate": 5.865121270068916e-07,
"loss": 0.20658506453037262,
"step": 613
},
{
"epoch": 2.4969450101832993,
"grad_norm": 2.5240719318389893,
"learning_rate": 5.851078709265541e-07,
"loss": 0.2315971404314041,
"step": 614
},
{
"epoch": 2.5010183299389004,
"grad_norm": 2.444183349609375,
"learning_rate": 5.837029231649898e-07,
"loss": 0.248056061565876,
"step": 615
},
{
"epoch": 2.505091649694501,
"grad_norm": 2.928251028060913,
"learning_rate": 5.82297295140367e-07,
"loss": 0.190412700176239,
"step": 616
},
{
"epoch": 2.5091649694501017,
"grad_norm": 2.3546931743621826,
"learning_rate": 5.808909982763825e-07,
"loss": 0.2290133684873581,
"step": 617
},
{
"epoch": 2.513238289205703,
"grad_norm": 2.4045937061309814,
"learning_rate": 5.794840440021686e-07,
"loss": 0.20809811353683472,
"step": 618
},
{
"epoch": 2.5173116089613035,
"grad_norm": 2.304481029510498,
"learning_rate": 5.780764437522012e-07,
"loss": 0.21659423410892487,
"step": 619
},
{
"epoch": 2.521384928716904,
"grad_norm": 2.5403997898101807,
"learning_rate": 5.766682089662054e-07,
"loss": 0.24090662598609924,
"step": 620
},
{
"epoch": 2.525458248472505,
"grad_norm": 2.365463972091675,
"learning_rate": 5.752593510890635e-07,
"loss": 0.22003071755170822,
"step": 621
},
{
"epoch": 2.529531568228106,
"grad_norm": 2.4152722358703613,
"learning_rate": 5.738498815707219e-07,
"loss": 0.22138181328773499,
"step": 622
},
{
"epoch": 2.5336048879837065,
"grad_norm": 2.3184590339660645,
"learning_rate": 5.724398118660973e-07,
"loss": 0.22900409996509552,
"step": 623
},
{
"epoch": 2.5376782077393076,
"grad_norm": 2.4730820655822754,
"learning_rate": 5.710291534349849e-07,
"loss": 0.2507135570049286,
"step": 624
},
{
"epoch": 2.5417515274949083,
"grad_norm": 2.771104097366333,
"learning_rate": 5.696179177419642e-07,
"loss": 0.2613483816385269,
"step": 625
},
{
"epoch": 2.5458248472505094,
"grad_norm": 2.334442377090454,
"learning_rate": 5.682061162563061e-07,
"loss": 0.19708166271448135,
"step": 626
},
{
"epoch": 2.54989816700611,
"grad_norm": 2.361771821975708,
"learning_rate": 5.667937604518798e-07,
"loss": 0.2150387316942215,
"step": 627
},
{
"epoch": 2.5539714867617107,
"grad_norm": 2.5752477645874023,
"learning_rate": 5.653808618070597e-07,
"loss": 0.25150199234485626,
"step": 628
},
{
"epoch": 2.5580448065173114,
"grad_norm": 2.685345411300659,
"learning_rate": 5.639674318046317e-07,
"loss": 0.22753620892763138,
"step": 629
},
{
"epoch": 2.5621181262729125,
"grad_norm": 2.3737359046936035,
"learning_rate": 5.625534819317004e-07,
"loss": 0.2346680760383606,
"step": 630
},
{
"epoch": 2.566191446028513,
"grad_norm": 2.5365428924560547,
"learning_rate": 5.61139023679595e-07,
"loss": 0.27340593934059143,
"step": 631
},
{
"epoch": 2.5702647657841142,
"grad_norm": 2.5359578132629395,
"learning_rate": 5.597240685437765e-07,
"loss": 0.21642443537712097,
"step": 632
},
{
"epoch": 2.574338085539715,
"grad_norm": 2.651430130004883,
"learning_rate": 5.583086280237446e-07,
"loss": 0.2263542339205742,
"step": 633
},
{
"epoch": 2.5784114052953155,
"grad_norm": 2.7270631790161133,
"learning_rate": 5.568927136229432e-07,
"loss": 0.2491724044084549,
"step": 634
},
{
"epoch": 2.5824847250509166,
"grad_norm": 2.3526268005371094,
"learning_rate": 5.554763368486674e-07,
"loss": 0.21600161492824554,
"step": 635
},
{
"epoch": 2.5865580448065173,
"grad_norm": 2.307650327682495,
"learning_rate": 5.540595092119708e-07,
"loss": 0.19877354055643082,
"step": 636
},
{
"epoch": 2.5906313645621184,
"grad_norm": 2.3778960704803467,
"learning_rate": 5.526422422275707e-07,
"loss": 0.2133459597826004,
"step": 637
},
{
"epoch": 2.594704684317719,
"grad_norm": 2.5685219764709473,
"learning_rate": 5.512245474137546e-07,
"loss": 0.2637478709220886,
"step": 638
},
{
"epoch": 2.5987780040733197,
"grad_norm": 2.3442726135253906,
"learning_rate": 5.498064362922882e-07,
"loss": 0.2268994301557541,
"step": 639
},
{
"epoch": 2.6028513238289204,
"grad_norm": 2.780430793762207,
"learning_rate": 5.483879203883194e-07,
"loss": 0.24991512298583984,
"step": 640
},
{
"epoch": 2.6069246435845215,
"grad_norm": 2.5000061988830566,
"learning_rate": 5.469690112302863e-07,
"loss": 0.2421465590596199,
"step": 641
},
{
"epoch": 2.610997963340122,
"grad_norm": 2.5583932399749756,
"learning_rate": 5.455497203498231e-07,
"loss": 0.22834140062332153,
"step": 642
},
{
"epoch": 2.6150712830957232,
"grad_norm": 2.4214282035827637,
"learning_rate": 5.441300592816662e-07,
"loss": 0.24269834905862808,
"step": 643
},
{
"epoch": 2.619144602851324,
"grad_norm": 2.352479934692383,
"learning_rate": 5.427100395635607e-07,
"loss": 0.23605701327323914,
"step": 644
},
{
"epoch": 2.6232179226069245,
"grad_norm": 2.3162059783935547,
"learning_rate": 5.412896727361662e-07,
"loss": 0.2196669727563858,
"step": 645
},
{
"epoch": 2.627291242362525,
"grad_norm": 2.438443183898926,
"learning_rate": 5.398689703429634e-07,
"loss": 0.23577384650707245,
"step": 646
},
{
"epoch": 2.6313645621181263,
"grad_norm": 2.6072158813476562,
"learning_rate": 5.384479439301605e-07,
"loss": 0.22815095633268356,
"step": 647
},
{
"epoch": 2.635437881873727,
"grad_norm": 2.495229959487915,
"learning_rate": 5.370266050465983e-07,
"loss": 0.21826496720314026,
"step": 648
},
{
"epoch": 2.639511201629328,
"grad_norm": 2.4141361713409424,
"learning_rate": 5.356049652436579e-07,
"loss": 0.2289327010512352,
"step": 649
},
{
"epoch": 2.6435845213849287,
"grad_norm": 2.461887836456299,
"learning_rate": 5.341830360751658e-07,
"loss": 0.24295459687709808,
"step": 650
},
{
"epoch": 2.6476578411405294,
"grad_norm": 2.4014673233032227,
"learning_rate": 5.327608290972998e-07,
"loss": 0.23736917972564697,
"step": 651
},
{
"epoch": 2.6517311608961305,
"grad_norm": 2.3798553943634033,
"learning_rate": 5.313383558684957e-07,
"loss": 0.21394247561693192,
"step": 652
},
{
"epoch": 2.655804480651731,
"grad_norm": 2.433729648590088,
"learning_rate": 5.299156279493535e-07,
"loss": 0.22641988098621368,
"step": 653
},
{
"epoch": 2.6598778004073322,
"grad_norm": 2.801403284072876,
"learning_rate": 5.284926569025428e-07,
"loss": 0.19061069190502167,
"step": 654
},
{
"epoch": 2.663951120162933,
"grad_norm": 2.345667839050293,
"learning_rate": 5.270694542927088e-07,
"loss": 0.2233218401670456,
"step": 655
},
{
"epoch": 2.6680244399185336,
"grad_norm": 2.7157375812530518,
"learning_rate": 5.256460316863791e-07,
"loss": 0.268241822719574,
"step": 656
},
{
"epoch": 2.672097759674134,
"grad_norm": 2.566452980041504,
"learning_rate": 5.242224006518694e-07,
"loss": 0.23765669763088226,
"step": 657
},
{
"epoch": 2.6761710794297353,
"grad_norm": 2.395622491836548,
"learning_rate": 5.227985727591888e-07,
"loss": 0.2502652183175087,
"step": 658
},
{
"epoch": 2.680244399185336,
"grad_norm": 2.3431246280670166,
"learning_rate": 5.213745595799462e-07,
"loss": 0.2402183562517166,
"step": 659
},
{
"epoch": 2.684317718940937,
"grad_norm": 2.374241352081299,
"learning_rate": 5.199503726872573e-07,
"loss": 0.23906593769788742,
"step": 660
},
{
"epoch": 2.6883910386965377,
"grad_norm": 2.4988749027252197,
"learning_rate": 5.185260236556484e-07,
"loss": 0.23993954807519913,
"step": 661
},
{
"epoch": 2.6924643584521384,
"grad_norm": 2.4878809452056885,
"learning_rate": 5.171015240609644e-07,
"loss": 0.23122139275074005,
"step": 662
},
{
"epoch": 2.696537678207739,
"grad_norm": 2.6239712238311768,
"learning_rate": 5.156768854802734e-07,
"loss": 0.2619614750146866,
"step": 663
},
{
"epoch": 2.70061099796334,
"grad_norm": 3.4436697959899902,
"learning_rate": 5.142521194917733e-07,
"loss": 0.24487808346748352,
"step": 664
},
{
"epoch": 2.704684317718941,
"grad_norm": 2.537623643875122,
"learning_rate": 5.128272376746971e-07,
"loss": 0.2131681889295578,
"step": 665
},
{
"epoch": 2.708757637474542,
"grad_norm": 2.5551114082336426,
"learning_rate": 5.114022516092194e-07,
"loss": 0.2128780037164688,
"step": 666
},
{
"epoch": 2.7128309572301426,
"grad_norm": 2.3989691734313965,
"learning_rate": 5.099771728763623e-07,
"loss": 0.24740803986787796,
"step": 667
},
{
"epoch": 2.716904276985743,
"grad_norm": 2.2155869007110596,
"learning_rate": 5.085520130579005e-07,
"loss": 0.19293303787708282,
"step": 668
},
{
"epoch": 2.7209775967413443,
"grad_norm": 2.773521661758423,
"learning_rate": 5.07126783736268e-07,
"loss": 0.24105177074670792,
"step": 669
},
{
"epoch": 2.725050916496945,
"grad_norm": 2.3562657833099365,
"learning_rate": 5.057014964944634e-07,
"loss": 0.2287985384464264,
"step": 670
},
{
"epoch": 2.729124236252546,
"grad_norm": 2.4878883361816406,
"learning_rate": 5.042761629159566e-07,
"loss": 0.2295224368572235,
"step": 671
},
{
"epoch": 2.7331975560081467,
"grad_norm": 2.9576029777526855,
"learning_rate": 5.028507945845932e-07,
"loss": 0.24781977385282516,
"step": 672
},
{
"epoch": 2.7372708757637474,
"grad_norm": 2.7853589057922363,
"learning_rate": 5.014254030845021e-07,
"loss": 0.2594607025384903,
"step": 673
},
{
"epoch": 2.741344195519348,
"grad_norm": 2.313760280609131,
"learning_rate": 5e-07,
"loss": 0.2149473801255226,
"step": 674
},
{
"epoch": 2.745417515274949,
"grad_norm": 2.6287894248962402,
"learning_rate": 4.98574596915498e-07,
"loss": 0.2292870730161667,
"step": 675
},
{
"epoch": 2.74949083503055,
"grad_norm": 2.3970835208892822,
"learning_rate": 4.971492054154068e-07,
"loss": 0.24747398495674133,
"step": 676
},
{
"epoch": 2.753564154786151,
"grad_norm": 2.5344555377960205,
"learning_rate": 4.957238370840436e-07,
"loss": 0.24396724253892899,
"step": 677
},
{
"epoch": 2.7576374745417516,
"grad_norm": 2.3144986629486084,
"learning_rate": 4.942985035055366e-07,
"loss": 0.2170693725347519,
"step": 678
},
{
"epoch": 2.7617107942973522,
"grad_norm": 2.3190531730651855,
"learning_rate": 4.928732162637321e-07,
"loss": 0.22908472269773483,
"step": 679
},
{
"epoch": 2.765784114052953,
"grad_norm": 2.263834238052368,
"learning_rate": 4.914479869420994e-07,
"loss": 0.22652582079172134,
"step": 680
},
{
"epoch": 2.769857433808554,
"grad_norm": 2.4412286281585693,
"learning_rate": 4.900228271236377e-07,
"loss": 0.22030826658010483,
"step": 681
},
{
"epoch": 2.7739307535641546,
"grad_norm": 2.443682909011841,
"learning_rate": 4.885977483907804e-07,
"loss": 0.22983287274837494,
"step": 682
},
{
"epoch": 2.7780040733197557,
"grad_norm": 2.451335906982422,
"learning_rate": 4.871727623253028e-07,
"loss": 0.22219268232584,
"step": 683
},
{
"epoch": 2.7820773930753564,
"grad_norm": 2.3448593616485596,
"learning_rate": 4.857478805082267e-07,
"loss": 0.2025885134935379,
"step": 684
},
{
"epoch": 2.786150712830957,
"grad_norm": 2.2007482051849365,
"learning_rate": 4.843231145197266e-07,
"loss": 0.2085644006729126,
"step": 685
},
{
"epoch": 2.790224032586558,
"grad_norm": 2.331252336502075,
"learning_rate": 4.828984759390356e-07,
"loss": 0.20427662134170532,
"step": 686
},
{
"epoch": 2.794297352342159,
"grad_norm": 2.243265151977539,
"learning_rate": 4.814739763443515e-07,
"loss": 0.2013532519340515,
"step": 687
},
{
"epoch": 2.79837067209776,
"grad_norm": 2.581249475479126,
"learning_rate": 4.800496273127429e-07,
"loss": 0.2316228449344635,
"step": 688
},
{
"epoch": 2.8024439918533606,
"grad_norm": 2.5763373374938965,
"learning_rate": 4.786254404200538e-07,
"loss": 0.2278730794787407,
"step": 689
},
{
"epoch": 2.8065173116089612,
"grad_norm": 2.4369475841522217,
"learning_rate": 4.772014272408114e-07,
"loss": 0.22459274530410767,
"step": 690
},
{
"epoch": 2.810590631364562,
"grad_norm": 2.458749294281006,
"learning_rate": 4.757775993481306e-07,
"loss": 0.21142005175352097,
"step": 691
},
{
"epoch": 2.814663951120163,
"grad_norm": 2.3797214031219482,
"learning_rate": 4.743539683136209e-07,
"loss": 0.23049592226743698,
"step": 692
},
{
"epoch": 2.8187372708757636,
"grad_norm": 2.5822553634643555,
"learning_rate": 4.7293054570729126e-07,
"loss": 0.2521442621946335,
"step": 693
},
{
"epoch": 2.8228105906313647,
"grad_norm": 2.489661931991577,
"learning_rate": 4.715073430974573e-07,
"loss": 0.24500177055597305,
"step": 694
},
{
"epoch": 2.8268839103869654,
"grad_norm": 2.592890977859497,
"learning_rate": 4.7008437205064634e-07,
"loss": 0.2384454905986786,
"step": 695
},
{
"epoch": 2.830957230142566,
"grad_norm": 2.5368080139160156,
"learning_rate": 4.686616441315043e-07,
"loss": 0.25886720418930054,
"step": 696
},
{
"epoch": 2.835030549898167,
"grad_norm": 2.4768271446228027,
"learning_rate": 4.672391709027002e-07,
"loss": 0.2593514025211334,
"step": 697
},
{
"epoch": 2.839103869653768,
"grad_norm": 2.3749191761016846,
"learning_rate": 4.658169639248342e-07,
"loss": 0.19730783253908157,
"step": 698
},
{
"epoch": 2.8431771894093685,
"grad_norm": 2.4880032539367676,
"learning_rate": 4.643950347563421e-07,
"loss": 0.22945521771907806,
"step": 699
},
{
"epoch": 2.8472505091649696,
"grad_norm": 2.3967182636260986,
"learning_rate": 4.6297339495340165e-07,
"loss": 0.24117758870124817,
"step": 700
},
{
"epoch": 2.8513238289205702,
"grad_norm": 2.144585609436035,
"learning_rate": 4.615520560698397e-07,
"loss": 0.181608684360981,
"step": 701
},
{
"epoch": 2.855397148676171,
"grad_norm": 2.489408493041992,
"learning_rate": 4.601310296570366e-07,
"loss": 0.27073855698108673,
"step": 702
},
{
"epoch": 2.859470468431772,
"grad_norm": 2.4170804023742676,
"learning_rate": 4.5871032726383385e-07,
"loss": 0.20901280641555786,
"step": 703
},
{
"epoch": 2.8635437881873727,
"grad_norm": 2.4658868312835693,
"learning_rate": 4.572899604364392e-07,
"loss": 0.22699516266584396,
"step": 704
},
{
"epoch": 2.8676171079429738,
"grad_norm": 2.32704758644104,
"learning_rate": 4.5586994071833377e-07,
"loss": 0.2194635197520256,
"step": 705
},
{
"epoch": 2.8716904276985744,
"grad_norm": 2.540724277496338,
"learning_rate": 4.5445027965017683e-07,
"loss": 0.23322076350450516,
"step": 706
},
{
"epoch": 2.875763747454175,
"grad_norm": 2.4795303344726562,
"learning_rate": 4.5303098876971373e-07,
"loss": 0.22777557373046875,
"step": 707
},
{
"epoch": 2.8798370672097757,
"grad_norm": 2.5126326084136963,
"learning_rate": 4.516120796116806e-07,
"loss": 0.2254505679011345,
"step": 708
},
{
"epoch": 2.883910386965377,
"grad_norm": 2.6765761375427246,
"learning_rate": 4.5019356370771185e-07,
"loss": 0.23741194605827332,
"step": 709
},
{
"epoch": 2.8879837067209775,
"grad_norm": 2.261955976486206,
"learning_rate": 4.487754525862453e-07,
"loss": 0.22391848266124725,
"step": 710
},
{
"epoch": 2.8920570264765786,
"grad_norm": 2.5763208866119385,
"learning_rate": 4.473577577724293e-07,
"loss": 0.2411726713180542,
"step": 711
},
{
"epoch": 2.8961303462321792,
"grad_norm": 2.489358425140381,
"learning_rate": 4.459404907880292e-07,
"loss": 0.25273367017507553,
"step": 712
},
{
"epoch": 2.90020366598778,
"grad_norm": 2.3726134300231934,
"learning_rate": 4.4452366315133256e-07,
"loss": 0.20239847898483276,
"step": 713
},
{
"epoch": 2.904276985743381,
"grad_norm": 2.5708673000335693,
"learning_rate": 4.43107286377057e-07,
"loss": 0.23958415538072586,
"step": 714
},
{
"epoch": 2.9083503054989817,
"grad_norm": 2.319916248321533,
"learning_rate": 4.4169137197625537e-07,
"loss": 0.2106548771262169,
"step": 715
},
{
"epoch": 2.9124236252545828,
"grad_norm": 2.5788509845733643,
"learning_rate": 4.4027593145622357e-07,
"loss": 0.2554449290037155,
"step": 716
},
{
"epoch": 2.9164969450101834,
"grad_norm": 2.4445812702178955,
"learning_rate": 4.388609763204051e-07,
"loss": 0.22007200866937637,
"step": 717
},
{
"epoch": 2.920570264765784,
"grad_norm": 2.534118175506592,
"learning_rate": 4.3744651806829967e-07,
"loss": 0.24423322826623917,
"step": 718
},
{
"epoch": 2.9246435845213847,
"grad_norm": 2.4413700103759766,
"learning_rate": 4.3603256819536817e-07,
"loss": 0.22403902560472488,
"step": 719
},
{
"epoch": 2.928716904276986,
"grad_norm": 2.397890567779541,
"learning_rate": 4.3461913819294035e-07,
"loss": 0.2157697230577469,
"step": 720
},
{
"epoch": 2.9327902240325865,
"grad_norm": 2.469019651412964,
"learning_rate": 4.332062395481203e-07,
"loss": 0.22792110592126846,
"step": 721
},
{
"epoch": 2.9368635437881876,
"grad_norm": 2.254711866378784,
"learning_rate": 4.3179388374369396e-07,
"loss": 0.22459496557712555,
"step": 722
},
{
"epoch": 2.9409368635437882,
"grad_norm": 2.2734055519104004,
"learning_rate": 4.3038208225803594e-07,
"loss": 0.22033336013555527,
"step": 723
},
{
"epoch": 2.945010183299389,
"grad_norm": 2.565237283706665,
"learning_rate": 4.289708465650151e-07,
"loss": 0.24250654131174088,
"step": 724
},
{
"epoch": 2.9490835030549896,
"grad_norm": 2.408484935760498,
"learning_rate": 4.275601881339027e-07,
"loss": 0.22946030646562576,
"step": 725
},
{
"epoch": 2.9531568228105907,
"grad_norm": 2.5278210639953613,
"learning_rate": 4.261501184292782e-07,
"loss": 0.22008834779262543,
"step": 726
},
{
"epoch": 2.9572301425661913,
"grad_norm": 2.2692785263061523,
"learning_rate": 4.2474064891093655e-07,
"loss": 0.22004914283752441,
"step": 727
},
{
"epoch": 2.9613034623217924,
"grad_norm": 2.8039700984954834,
"learning_rate": 4.2333179103379445e-07,
"loss": 0.22936520725488663,
"step": 728
},
{
"epoch": 2.965376782077393,
"grad_norm": 2.5510387420654297,
"learning_rate": 4.2192355624779884e-07,
"loss": 0.2276434227824211,
"step": 729
},
{
"epoch": 2.9694501018329937,
"grad_norm": 2.512366771697998,
"learning_rate": 4.205159559978313e-07,
"loss": 0.23083247244358063,
"step": 730
},
{
"epoch": 2.973523421588595,
"grad_norm": 2.492103338241577,
"learning_rate": 4.1910900172361763e-07,
"loss": 0.2253756895661354,
"step": 731
},
{
"epoch": 2.9775967413441955,
"grad_norm": 2.4701900482177734,
"learning_rate": 4.1770270485963294e-07,
"loss": 0.2475346326828003,
"step": 732
},
{
"epoch": 2.9816700610997966,
"grad_norm": 2.8806581497192383,
"learning_rate": 4.162970768350102e-07,
"loss": 0.27113020420074463,
"step": 733
},
{
"epoch": 2.9857433808553973,
"grad_norm": 2.374631643295288,
"learning_rate": 4.148921290734459e-07,
"loss": 0.22385699301958084,
"step": 734
},
{
"epoch": 2.989816700610998,
"grad_norm": 2.5273098945617676,
"learning_rate": 4.134878729931083e-07,
"loss": 0.2260419949889183,
"step": 735
},
{
"epoch": 2.9938900203665986,
"grad_norm": 2.4065239429473877,
"learning_rate": 4.120843200065447e-07,
"loss": 0.2051657810807228,
"step": 736
},
{
"epoch": 2.9979633401221997,
"grad_norm": 2.419027328491211,
"learning_rate": 4.106814815205873e-07,
"loss": 0.22403547167778015,
"step": 737
},
{
"epoch": 3.0,
"grad_norm": 2.419027328491211,
"learning_rate": 4.092793689362625e-07,
"loss": 0.2715803384780884,
"step": 738
},
{
"epoch": 3.0040733197556007,
"grad_norm": 2.3535571098327637,
"learning_rate": 4.078779936486965e-07,
"loss": 0.21160051226615906,
"step": 739
},
{
"epoch": 3.0081466395112018,
"grad_norm": 2.379765272140503,
"learning_rate": 4.06477367047024e-07,
"loss": 0.22423982620239258,
"step": 740
},
{
"epoch": 3.0122199592668024,
"grad_norm": 2.3597190380096436,
"learning_rate": 4.050775005142943e-07,
"loss": 0.22356468439102173,
"step": 741
},
{
"epoch": 3.016293279022403,
"grad_norm": 2.3479771614074707,
"learning_rate": 4.036784054273803e-07,
"loss": 0.1949443370103836,
"step": 742
},
{
"epoch": 3.020366598778004,
"grad_norm": 2.1719963550567627,
"learning_rate": 4.0228009315688463e-07,
"loss": 0.16644436120986938,
"step": 743
},
{
"epoch": 3.024439918533605,
"grad_norm": 2.2947304248809814,
"learning_rate": 4.0088257506704853e-07,
"loss": 0.21931639313697815,
"step": 744
},
{
"epoch": 3.0285132382892055,
"grad_norm": 2.383701801300049,
"learning_rate": 3.994858625156582e-07,
"loss": 0.2001979798078537,
"step": 745
},
{
"epoch": 3.0325865580448066,
"grad_norm": 2.237454891204834,
"learning_rate": 3.9808996685395344e-07,
"loss": 0.20605531334877014,
"step": 746
},
{
"epoch": 3.0366598778004072,
"grad_norm": 2.3682029247283936,
"learning_rate": 3.966948994265354e-07,
"loss": 0.21250516921281815,
"step": 747
},
{
"epoch": 3.0407331975560083,
"grad_norm": 2.4874134063720703,
"learning_rate": 3.953006715712733e-07,
"loss": 0.21023060381412506,
"step": 748
},
{
"epoch": 3.044806517311609,
"grad_norm": 2.21878719329834,
"learning_rate": 3.939072946192139e-07,
"loss": 0.22278980910778046,
"step": 749
},
{
"epoch": 3.0488798370672097,
"grad_norm": 2.3506107330322266,
"learning_rate": 3.9251477989448795e-07,
"loss": 0.2010231390595436,
"step": 750
},
{
"epoch": 3.0529531568228108,
"grad_norm": 2.3747055530548096,
"learning_rate": 3.9112313871421937e-07,
"loss": 0.18845809996128082,
"step": 751
},
{
"epoch": 3.0570264765784114,
"grad_norm": 2.1489953994750977,
"learning_rate": 3.897323823884318e-07,
"loss": 0.19643110036849976,
"step": 752
},
{
"epoch": 3.061099796334012,
"grad_norm": 2.3836452960968018,
"learning_rate": 3.8834252221995877e-07,
"loss": 0.22072193771600723,
"step": 753
},
{
"epoch": 3.065173116089613,
"grad_norm": 2.418147563934326,
"learning_rate": 3.8695356950434945e-07,
"loss": 0.2128564417362213,
"step": 754
},
{
"epoch": 3.069246435845214,
"grad_norm": 2.341552734375,
"learning_rate": 3.855655355297789e-07,
"loss": 0.21925050020217896,
"step": 755
},
{
"epoch": 3.0733197556008145,
"grad_norm": 2.7095723152160645,
"learning_rate": 3.8417843157695497e-07,
"loss": 0.19232793152332306,
"step": 756
},
{
"epoch": 3.0773930753564156,
"grad_norm": 2.275331497192383,
"learning_rate": 3.827922689190275e-07,
"loss": 0.203746996819973,
"step": 757
},
{
"epoch": 3.0814663951120163,
"grad_norm": 2.3823604583740234,
"learning_rate": 3.8140705882149585e-07,
"loss": 0.20427367091178894,
"step": 758
},
{
"epoch": 3.085539714867617,
"grad_norm": 3.103059768676758,
"learning_rate": 3.8002281254211815e-07,
"loss": 0.22299692034721375,
"step": 759
},
{
"epoch": 3.089613034623218,
"grad_norm": 2.3718128204345703,
"learning_rate": 3.7863954133081966e-07,
"loss": 0.18740925192832947,
"step": 760
},
{
"epoch": 3.0936863543788187,
"grad_norm": 2.494307518005371,
"learning_rate": 3.772572564296004e-07,
"loss": 0.18933183699846268,
"step": 761
},
{
"epoch": 3.0977596741344193,
"grad_norm": 2.2291083335876465,
"learning_rate": 3.7587596907244545e-07,
"loss": 0.20075885951519012,
"step": 762
},
{
"epoch": 3.1018329938900204,
"grad_norm": 2.42327618598938,
"learning_rate": 3.744956904852321e-07,
"loss": 0.20842499285936356,
"step": 763
},
{
"epoch": 3.105906313645621,
"grad_norm": 2.4771366119384766,
"learning_rate": 3.7311643188563967e-07,
"loss": 0.20921579748392105,
"step": 764
},
{
"epoch": 3.109979633401222,
"grad_norm": 2.459636688232422,
"learning_rate": 3.717382044830575e-07,
"loss": 0.20464809238910675,
"step": 765
},
{
"epoch": 3.114052953156823,
"grad_norm": 2.4493255615234375,
"learning_rate": 3.7036101947849456e-07,
"loss": 0.19754133373498917,
"step": 766
},
{
"epoch": 3.1181262729124235,
"grad_norm": 2.426888942718506,
"learning_rate": 3.6898488806448807e-07,
"loss": 0.22133717685937881,
"step": 767
},
{
"epoch": 3.1221995926680246,
"grad_norm": 2.7352089881896973,
"learning_rate": 3.6760982142501284e-07,
"loss": 0.19161057472229004,
"step": 768
},
{
"epoch": 3.1262729124236253,
"grad_norm": 2.271554946899414,
"learning_rate": 3.6623583073538965e-07,
"loss": 0.1958870366215706,
"step": 769
},
{
"epoch": 3.130346232179226,
"grad_norm": 2.6821932792663574,
"learning_rate": 3.6486292716219514e-07,
"loss": 0.20984943211078644,
"step": 770
},
{
"epoch": 3.134419551934827,
"grad_norm": 2.6320438385009766,
"learning_rate": 3.634911218631711e-07,
"loss": 0.20410407334566116,
"step": 771
},
{
"epoch": 3.1384928716904277,
"grad_norm": 2.2215492725372314,
"learning_rate": 3.6212042598713296e-07,
"loss": 0.21011168509721756,
"step": 772
},
{
"epoch": 3.1425661914460283,
"grad_norm": 2.485713005065918,
"learning_rate": 3.607508506738803e-07,
"loss": 0.20080970227718353,
"step": 773
},
{
"epoch": 3.1466395112016294,
"grad_norm": 2.4904892444610596,
"learning_rate": 3.5938240705410537e-07,
"loss": 0.21116189658641815,
"step": 774
},
{
"epoch": 3.15071283095723,
"grad_norm": 2.4921395778656006,
"learning_rate": 3.580151062493036e-07,
"loss": 0.1990169882774353,
"step": 775
},
{
"epoch": 3.1547861507128308,
"grad_norm": 2.323054790496826,
"learning_rate": 3.566489593716816e-07,
"loss": 0.19865593314170837,
"step": 776
},
{
"epoch": 3.158859470468432,
"grad_norm": 2.533299684524536,
"learning_rate": 3.5528397752406894e-07,
"loss": 0.19558026641607285,
"step": 777
},
{
"epoch": 3.1629327902240325,
"grad_norm": 2.2772974967956543,
"learning_rate": 3.5392017179982613e-07,
"loss": 0.18929005414247513,
"step": 778
},
{
"epoch": 3.167006109979633,
"grad_norm": 2.433457374572754,
"learning_rate": 3.5255755328275584e-07,
"loss": 0.2181885540485382,
"step": 779
},
{
"epoch": 3.1710794297352343,
"grad_norm": 2.350560426712036,
"learning_rate": 3.511961330470115e-07,
"loss": 0.19552721083164215,
"step": 780
},
{
"epoch": 3.175152749490835,
"grad_norm": 2.4112911224365234,
"learning_rate": 3.498359221570083e-07,
"loss": 0.20244888216257095,
"step": 781
},
{
"epoch": 3.179226069246436,
"grad_norm": 2.4253125190734863,
"learning_rate": 3.484769316673331e-07,
"loss": 0.22686784714460373,
"step": 782
},
{
"epoch": 3.1832993890020367,
"grad_norm": 2.535693645477295,
"learning_rate": 3.471191726226541e-07,
"loss": 0.22888771444559097,
"step": 783
},
{
"epoch": 3.1873727087576373,
"grad_norm": 2.3272151947021484,
"learning_rate": 3.4576265605763185e-07,
"loss": 0.20377343893051147,
"step": 784
},
{
"epoch": 3.1914460285132384,
"grad_norm": 2.6908118724823,
"learning_rate": 3.444073929968284e-07,
"loss": 0.20916848629713058,
"step": 785
},
{
"epoch": 3.195519348268839,
"grad_norm": 2.3921995162963867,
"learning_rate": 3.4305339445461923e-07,
"loss": 0.1810828298330307,
"step": 786
},
{
"epoch": 3.1995926680244398,
"grad_norm": 2.4223461151123047,
"learning_rate": 3.417006714351024e-07,
"loss": 0.2161688134074211,
"step": 787
},
{
"epoch": 3.203665987780041,
"grad_norm": 2.4449656009674072,
"learning_rate": 3.4034923493201007e-07,
"loss": 0.18497437238693237,
"step": 788
},
{
"epoch": 3.2077393075356415,
"grad_norm": 2.400811195373535,
"learning_rate": 3.3899909592861816e-07,
"loss": 0.1974419429898262,
"step": 789
},
{
"epoch": 3.211812627291242,
"grad_norm": 2.381777763366699,
"learning_rate": 3.3765026539765827e-07,
"loss": 0.18730898946523666,
"step": 790
},
{
"epoch": 3.2158859470468433,
"grad_norm": 2.3965096473693848,
"learning_rate": 3.3630275430122747e-07,
"loss": 0.17667900025844574,
"step": 791
},
{
"epoch": 3.219959266802444,
"grad_norm": 2.2706997394561768,
"learning_rate": 3.349565735907e-07,
"loss": 0.21489760279655457,
"step": 792
},
{
"epoch": 3.224032586558045,
"grad_norm": 2.5842676162719727,
"learning_rate": 3.336117342066375e-07,
"loss": 0.21414875984191895,
"step": 793
},
{
"epoch": 3.2281059063136457,
"grad_norm": 2.563056707382202,
"learning_rate": 3.3226824707870073e-07,
"loss": 0.22138936817646027,
"step": 794
},
{
"epoch": 3.2321792260692463,
"grad_norm": 2.35392689704895,
"learning_rate": 3.3092612312556075e-07,
"loss": 0.20381484925746918,
"step": 795
},
{
"epoch": 3.2362525458248474,
"grad_norm": 2.532590389251709,
"learning_rate": 3.2958537325480924e-07,
"loss": 0.21896713972091675,
"step": 796
},
{
"epoch": 3.240325865580448,
"grad_norm": 2.476938247680664,
"learning_rate": 3.282460083628713e-07,
"loss": 0.21506690233945847,
"step": 797
},
{
"epoch": 3.2443991853360488,
"grad_norm": 2.544861078262329,
"learning_rate": 3.2690803933491576e-07,
"loss": 0.22808198630809784,
"step": 798
},
{
"epoch": 3.24847250509165,
"grad_norm": 2.4569203853607178,
"learning_rate": 3.255714770447674e-07,
"loss": 0.2176097184419632,
"step": 799
},
{
"epoch": 3.2525458248472505,
"grad_norm": 2.4378373622894287,
"learning_rate": 3.242363323548177e-07,
"loss": 0.19017792493104935,
"step": 800
},
{
"epoch": 3.256619144602851,
"grad_norm": 2.4247865676879883,
"learning_rate": 3.229026161159378e-07,
"loss": 0.2059084177017212,
"step": 801
},
{
"epoch": 3.2606924643584523,
"grad_norm": 2.5162672996520996,
"learning_rate": 3.215703391673893e-07,
"loss": 0.19650442153215408,
"step": 802
},
{
"epoch": 3.264765784114053,
"grad_norm": 2.570923328399658,
"learning_rate": 3.202395123367367e-07,
"loss": 0.2457878440618515,
"step": 803
},
{
"epoch": 3.2688391038696536,
"grad_norm": 2.5794918537139893,
"learning_rate": 3.189101464397591e-07,
"loss": 0.1937229335308075,
"step": 804
},
{
"epoch": 3.2729124236252547,
"grad_norm": 2.4221835136413574,
"learning_rate": 3.1758225228036227e-07,
"loss": 0.17538277059793472,
"step": 805
},
{
"epoch": 3.2769857433808554,
"grad_norm": 2.418198347091675,
"learning_rate": 3.1625584065049155e-07,
"loss": 0.20809274911880493,
"step": 806
},
{
"epoch": 3.281059063136456,
"grad_norm": 2.56083607673645,
"learning_rate": 3.1493092233004277e-07,
"loss": 0.2350323647260666,
"step": 807
},
{
"epoch": 3.285132382892057,
"grad_norm": 2.516634702682495,
"learning_rate": 3.136075080867765e-07,
"loss": 0.1945355385541916,
"step": 808
},
{
"epoch": 3.2892057026476578,
"grad_norm": 2.5624663829803467,
"learning_rate": 3.1228560867622854e-07,
"loss": 0.21247724443674088,
"step": 809
},
{
"epoch": 3.293279022403259,
"grad_norm": 2.6713180541992188,
"learning_rate": 3.1096523484162407e-07,
"loss": 0.20905288308858871,
"step": 810
},
{
"epoch": 3.2973523421588595,
"grad_norm": 2.5594656467437744,
"learning_rate": 3.0964639731378947e-07,
"loss": 0.21280484646558762,
"step": 811
},
{
"epoch": 3.30142566191446,
"grad_norm": 2.5005033016204834,
"learning_rate": 3.0832910681106565e-07,
"loss": 0.1858508661389351,
"step": 812
},
{
"epoch": 3.3054989816700613,
"grad_norm": 2.5059120655059814,
"learning_rate": 3.070133740392202e-07,
"loss": 0.17269013077020645,
"step": 813
},
{
"epoch": 3.309572301425662,
"grad_norm": 2.484297752380371,
"learning_rate": 3.0569920969136135e-07,
"loss": 0.21136271953582764,
"step": 814
},
{
"epoch": 3.3136456211812626,
"grad_norm": 2.5234310626983643,
"learning_rate": 3.043866244478505e-07,
"loss": 0.18130285292863846,
"step": 815
},
{
"epoch": 3.3177189409368637,
"grad_norm": 2.338294267654419,
"learning_rate": 3.0307562897621485e-07,
"loss": 0.20136955380439758,
"step": 816
},
{
"epoch": 3.3217922606924644,
"grad_norm": 2.490870714187622,
"learning_rate": 3.0176623393106235e-07,
"loss": 0.18799114972352982,
"step": 817
},
{
"epoch": 3.325865580448065,
"grad_norm": 2.756178140640259,
"learning_rate": 3.0045844995399327e-07,
"loss": 0.17684923857450485,
"step": 818
},
{
"epoch": 3.329938900203666,
"grad_norm": 2.6265006065368652,
"learning_rate": 2.9915228767351535e-07,
"loss": 0.20372115820646286,
"step": 819
},
{
"epoch": 3.3340122199592668,
"grad_norm": 2.3552069664001465,
"learning_rate": 2.978477577049556e-07,
"loss": 0.17303332686424255,
"step": 820
},
{
"epoch": 3.3380855397148674,
"grad_norm": 2.3262510299682617,
"learning_rate": 2.965448706503761e-07,
"loss": 0.22534170746803284,
"step": 821
},
{
"epoch": 3.3421588594704685,
"grad_norm": 2.619785785675049,
"learning_rate": 2.952436370984859e-07,
"loss": 0.2306053191423416,
"step": 822
},
{
"epoch": 3.346232179226069,
"grad_norm": 2.4102282524108887,
"learning_rate": 2.939440676245566e-07,
"loss": 0.2103247046470642,
"step": 823
},
{
"epoch": 3.35030549898167,
"grad_norm": 2.646646738052368,
"learning_rate": 2.926461727903349e-07,
"loss": 0.21324314177036285,
"step": 824
},
{
"epoch": 3.354378818737271,
"grad_norm": 2.454810619354248,
"learning_rate": 2.9134996314395817e-07,
"loss": 0.1971464827656746,
"step": 825
},
{
"epoch": 3.3584521384928716,
"grad_norm": 2.4926371574401855,
"learning_rate": 2.900554492198677e-07,
"loss": 0.19652055203914642,
"step": 826
},
{
"epoch": 3.3625254582484727,
"grad_norm": 2.251152992248535,
"learning_rate": 2.887626415387237e-07,
"loss": 0.22214417904615402,
"step": 827
},
{
"epoch": 3.3665987780040734,
"grad_norm": 2.5524468421936035,
"learning_rate": 2.8747155060731937e-07,
"loss": 0.21327239274978638,
"step": 828
},
{
"epoch": 3.370672097759674,
"grad_norm": 2.429053783416748,
"learning_rate": 2.8618218691849545e-07,
"loss": 0.2205718532204628,
"step": 829
},
{
"epoch": 3.374745417515275,
"grad_norm": 2.5766923427581787,
"learning_rate": 2.8489456095105566e-07,
"loss": 0.23655060678720474,
"step": 830
},
{
"epoch": 3.378818737270876,
"grad_norm": 2.752376079559326,
"learning_rate": 2.836086831696809e-07,
"loss": 0.24048178642988205,
"step": 831
},
{
"epoch": 3.3828920570264764,
"grad_norm": 2.4999852180480957,
"learning_rate": 2.8232456402484463e-07,
"loss": 0.16257788240909576,
"step": 832
},
{
"epoch": 3.3869653767820775,
"grad_norm": 2.4269254207611084,
"learning_rate": 2.8104221395272674e-07,
"loss": 0.2149578034877777,
"step": 833
},
{
"epoch": 3.391038696537678,
"grad_norm": 2.354278326034546,
"learning_rate": 2.797616433751309e-07,
"loss": 0.19306989759206772,
"step": 834
},
{
"epoch": 3.395112016293279,
"grad_norm": 2.495457649230957,
"learning_rate": 2.784828626993976e-07,
"loss": 0.16760513186454773,
"step": 835
},
{
"epoch": 3.39918533604888,
"grad_norm": 2.299818754196167,
"learning_rate": 2.772058823183212e-07,
"loss": 0.1885610893368721,
"step": 836
},
{
"epoch": 3.4032586558044806,
"grad_norm": 2.441559076309204,
"learning_rate": 2.7593071261006473e-07,
"loss": 0.19247783720493317,
"step": 837
},
{
"epoch": 3.4073319755600817,
"grad_norm": 2.5072267055511475,
"learning_rate": 2.746573639380758e-07,
"loss": 0.16321063041687012,
"step": 838
},
{
"epoch": 3.4114052953156824,
"grad_norm": 2.3393125534057617,
"learning_rate": 2.7338584665100195e-07,
"loss": 0.1672486811876297,
"step": 839
},
{
"epoch": 3.415478615071283,
"grad_norm": 2.5106165409088135,
"learning_rate": 2.7211617108260674e-07,
"loss": 0.20449652522802353,
"step": 840
},
{
"epoch": 3.4195519348268837,
"grad_norm": 2.5324196815490723,
"learning_rate": 2.708483475516865e-07,
"loss": 0.2249668836593628,
"step": 841
},
{
"epoch": 3.423625254582485,
"grad_norm": 2.581486225128174,
"learning_rate": 2.695823863619853e-07,
"loss": 0.2402847856283188,
"step": 842
},
{
"epoch": 3.4276985743380854,
"grad_norm": 2.535104274749756,
"learning_rate": 2.683182978021118e-07,
"loss": 0.1992955058813095,
"step": 843
},
{
"epoch": 3.4317718940936865,
"grad_norm": 2.2774689197540283,
"learning_rate": 2.6705609214545585e-07,
"loss": 0.19953173398971558,
"step": 844
},
{
"epoch": 3.435845213849287,
"grad_norm": 2.4968085289001465,
"learning_rate": 2.65795779650105e-07,
"loss": 0.19426950812339783,
"step": 845
},
{
"epoch": 3.439918533604888,
"grad_norm": 2.459174156188965,
"learning_rate": 2.6453737055875974e-07,
"loss": 0.1926077976822853,
"step": 846
},
{
"epoch": 3.443991853360489,
"grad_norm": 2.254603624343872,
"learning_rate": 2.632808750986527e-07,
"loss": 0.1818878874182701,
"step": 847
},
{
"epoch": 3.4480651731160896,
"grad_norm": 2.502915382385254,
"learning_rate": 2.620263034814632e-07,
"loss": 0.17121271044015884,
"step": 848
},
{
"epoch": 3.4521384928716903,
"grad_norm": 2.478640079498291,
"learning_rate": 2.6077366590323605e-07,
"loss": 0.17577876895666122,
"step": 849
},
{
"epoch": 3.4562118126272914,
"grad_norm": 2.799896478652954,
"learning_rate": 2.5952297254429725e-07,
"loss": 0.2371537759900093,
"step": 850
},
{
"epoch": 3.460285132382892,
"grad_norm": 2.4503631591796875,
"learning_rate": 2.582742335691722e-07,
"loss": 0.18308546394109726,
"step": 851
},
{
"epoch": 3.4643584521384927,
"grad_norm": 2.511253595352173,
"learning_rate": 2.5702745912650327e-07,
"loss": 0.1863907426595688,
"step": 852
},
{
"epoch": 3.468431771894094,
"grad_norm": 2.441244602203369,
"learning_rate": 2.5578265934896586e-07,
"loss": 0.17125633358955383,
"step": 853
},
{
"epoch": 3.4725050916496945,
"grad_norm": 2.5986480712890625,
"learning_rate": 2.54539844353188e-07,
"loss": 0.20211002230644226,
"step": 854
},
{
"epoch": 3.4765784114052956,
"grad_norm": 2.5657708644866943,
"learning_rate": 2.5329902423966636e-07,
"loss": 0.18166958540678024,
"step": 855
},
{
"epoch": 3.480651731160896,
"grad_norm": 2.759941577911377,
"learning_rate": 2.5206020909268575e-07,
"loss": 0.2045290172100067,
"step": 856
},
{
"epoch": 3.484725050916497,
"grad_norm": 2.3406291007995605,
"learning_rate": 2.508234089802356e-07,
"loss": 0.20272044837474823,
"step": 857
},
{
"epoch": 3.4887983706720975,
"grad_norm": 2.5458176136016846,
"learning_rate": 2.4958863395392985e-07,
"loss": 0.20205383747816086,
"step": 858
},
{
"epoch": 3.4928716904276986,
"grad_norm": 2.6369478702545166,
"learning_rate": 2.483558940489235e-07,
"loss": 0.21245616674423218,
"step": 859
},
{
"epoch": 3.4969450101832993,
"grad_norm": 2.407942533493042,
"learning_rate": 2.4712519928383245e-07,
"loss": 0.20060960948467255,
"step": 860
},
{
"epoch": 3.5010183299389004,
"grad_norm": 2.491145133972168,
"learning_rate": 2.45896559660651e-07,
"loss": 0.2001042366027832,
"step": 861
},
{
"epoch": 3.505091649694501,
"grad_norm": 2.3934686183929443,
"learning_rate": 2.4466998516467176e-07,
"loss": 0.20040663331747055,
"step": 862
},
{
"epoch": 3.5091649694501017,
"grad_norm": 2.350095510482788,
"learning_rate": 2.4344548576440293e-07,
"loss": 0.1889752671122551,
"step": 863
},
{
"epoch": 3.513238289205703,
"grad_norm": 2.4073328971862793,
"learning_rate": 2.4222307141148906e-07,
"loss": 0.18654479831457138,
"step": 864
},
{
"epoch": 3.5173116089613035,
"grad_norm": 2.4485344886779785,
"learning_rate": 2.4100275204062897e-07,
"loss": 0.21063948422670364,
"step": 865
},
{
"epoch": 3.521384928716904,
"grad_norm": 2.4200923442840576,
"learning_rate": 2.397845375694949e-07,
"loss": 0.1860312521457672,
"step": 866
},
{
"epoch": 3.525458248472505,
"grad_norm": 2.775789260864258,
"learning_rate": 2.3856843789865303e-07,
"loss": 0.2309775874018669,
"step": 867
},
{
"epoch": 3.529531568228106,
"grad_norm": 2.7096245288848877,
"learning_rate": 2.3735446291148176e-07,
"loss": 0.2163269817829132,
"step": 868
},
{
"epoch": 3.5336048879837065,
"grad_norm": 2.4550371170043945,
"learning_rate": 2.361426224740924e-07,
"loss": 0.1779681146144867,
"step": 869
},
{
"epoch": 3.5376782077393076,
"grad_norm": 2.2727365493774414,
"learning_rate": 2.3493292643524799e-07,
"loss": 0.19602014124393463,
"step": 870
},
{
"epoch": 3.5417515274949083,
"grad_norm": 2.445028066635132,
"learning_rate": 2.3372538462628422e-07,
"loss": 0.19011924415826797,
"step": 871
},
{
"epoch": 3.5458248472505094,
"grad_norm": 2.537092447280884,
"learning_rate": 2.3252000686102912e-07,
"loss": 0.22465527802705765,
"step": 872
},
{
"epoch": 3.54989816700611,
"grad_norm": 2.5350615978240967,
"learning_rate": 2.3131680293572336e-07,
"loss": 0.2048494815826416,
"step": 873
},
{
"epoch": 3.5539714867617107,
"grad_norm": 2.435441255569458,
"learning_rate": 2.3011578262894015e-07,
"loss": 0.22786639630794525,
"step": 874
},
{
"epoch": 3.5580448065173114,
"grad_norm": 2.409935235977173,
"learning_rate": 2.2891695570150631e-07,
"loss": 0.1882152482867241,
"step": 875
},
{
"epoch": 3.5621181262729125,
"grad_norm": 2.6188433170318604,
"learning_rate": 2.2772033189642321e-07,
"loss": 0.21629157662391663,
"step": 876
},
{
"epoch": 3.566191446028513,
"grad_norm": 2.4537856578826904,
"learning_rate": 2.2652592093878665e-07,
"loss": 0.19376155734062195,
"step": 877
},
{
"epoch": 3.5702647657841142,
"grad_norm": 2.438338279724121,
"learning_rate": 2.2533373253570875e-07,
"loss": 0.22396929562091827,
"step": 878
},
{
"epoch": 3.574338085539715,
"grad_norm": 2.4425530433654785,
"learning_rate": 2.2414377637623865e-07,
"loss": 0.1896074339747429,
"step": 879
},
{
"epoch": 3.5784114052953155,
"grad_norm": 2.46877384185791,
"learning_rate": 2.2295606213128387e-07,
"loss": 0.1916242465376854,
"step": 880
},
{
"epoch": 3.5824847250509166,
"grad_norm": 2.4224820137023926,
"learning_rate": 2.2177059945353115e-07,
"loss": 0.23046725243330002,
"step": 881
},
{
"epoch": 3.5865580448065173,
"grad_norm": 2.678439140319824,
"learning_rate": 2.2058739797736914e-07,
"loss": 0.1764805093407631,
"step": 882
},
{
"epoch": 3.5906313645621184,
"grad_norm": 2.5862419605255127,
"learning_rate": 2.1940646731880885e-07,
"loss": 0.1832810789346695,
"step": 883
},
{
"epoch": 3.594704684317719,
"grad_norm": 2.3499772548675537,
"learning_rate": 2.1822781707540667e-07,
"loss": 0.19466058164834976,
"step": 884
},
{
"epoch": 3.5987780040733197,
"grad_norm": 2.416409492492676,
"learning_rate": 2.1705145682618502e-07,
"loss": 0.20160751044750214,
"step": 885
},
{
"epoch": 3.6028513238289204,
"grad_norm": 2.7396693229675293,
"learning_rate": 2.1587739613155653e-07,
"loss": 0.2220790833234787,
"step": 886
},
{
"epoch": 3.6069246435845215,
"grad_norm": 2.438356876373291,
"learning_rate": 2.1470564453324392e-07,
"loss": 0.1909223347902298,
"step": 887
},
{
"epoch": 3.610997963340122,
"grad_norm": 2.637033224105835,
"learning_rate": 2.1353621155420393e-07,
"loss": 0.19567319750785828,
"step": 888
},
{
"epoch": 3.6150712830957232,
"grad_norm": 2.6245222091674805,
"learning_rate": 2.1236910669855006e-07,
"loss": 0.19575632363557816,
"step": 889
},
{
"epoch": 3.619144602851324,
"grad_norm": 2.6102139949798584,
"learning_rate": 2.112043394514742e-07,
"loss": 0.19621288776397705,
"step": 890
},
{
"epoch": 3.6232179226069245,
"grad_norm": 2.5597336292266846,
"learning_rate": 2.100419192791708e-07,
"loss": 0.17981623113155365,
"step": 891
},
{
"epoch": 3.627291242362525,
"grad_norm": 2.345719575881958,
"learning_rate": 2.088818556287592e-07,
"loss": 0.18147233873605728,
"step": 892
},
{
"epoch": 3.6313645621181263,
"grad_norm": 2.4701764583587646,
"learning_rate": 2.0772415792820713e-07,
"loss": 0.20388461649417877,
"step": 893
},
{
"epoch": 3.635437881873727,
"grad_norm": 2.6218173503875732,
"learning_rate": 2.0656883558625348e-07,
"loss": 0.20968149602413177,
"step": 894
},
{
"epoch": 3.639511201629328,
"grad_norm": 2.433162212371826,
"learning_rate": 2.054158979923331e-07,
"loss": 0.23324060440063477,
"step": 895
},
{
"epoch": 3.6435845213849287,
"grad_norm": 2.5269041061401367,
"learning_rate": 2.042653545164989e-07,
"loss": 0.22314583510160446,
"step": 896
},
{
"epoch": 3.6476578411405294,
"grad_norm": 2.772693634033203,
"learning_rate": 2.0311721450934732e-07,
"loss": 0.17379353195428848,
"step": 897
},
{
"epoch": 3.6517311608961305,
"grad_norm": 2.4417383670806885,
"learning_rate": 2.0197148730194085e-07,
"loss": 0.196005217730999,
"step": 898
},
{
"epoch": 3.655804480651731,
"grad_norm": 2.5393450260162354,
"learning_rate": 2.0082818220573332e-07,
"loss": 0.21743719279766083,
"step": 899
},
{
"epoch": 3.6598778004073322,
"grad_norm": 2.476929187774658,
"learning_rate": 1.9968730851249388e-07,
"loss": 0.20515238493680954,
"step": 900
},
{
"epoch": 3.663951120162933,
"grad_norm": 2.441044569015503,
"learning_rate": 1.9854887549423082e-07,
"loss": 0.1906992271542549,
"step": 901
},
{
"epoch": 3.6680244399185336,
"grad_norm": 2.5373618602752686,
"learning_rate": 1.9741289240311754e-07,
"loss": 0.20473621785640717,
"step": 902
},
{
"epoch": 3.672097759674134,
"grad_norm": 2.405190944671631,
"learning_rate": 1.962793684714158e-07,
"loss": 0.21750831604003906,
"step": 903
},
{
"epoch": 3.6761710794297353,
"grad_norm": 2.40073299407959,
"learning_rate": 1.9514831291140228e-07,
"loss": 0.19307416677474976,
"step": 904
},
{
"epoch": 3.680244399185336,
"grad_norm": 2.7207555770874023,
"learning_rate": 1.940197349152923e-07,
"loss": 0.2060309201478958,
"step": 905
},
{
"epoch": 3.684317718940937,
"grad_norm": 2.4056124687194824,
"learning_rate": 1.9289364365516607e-07,
"loss": 0.1817646324634552,
"step": 906
},
{
"epoch": 3.6883910386965377,
"grad_norm": 2.5248098373413086,
"learning_rate": 1.9177004828289383e-07,
"loss": 0.20411433279514313,
"step": 907
},
{
"epoch": 3.6924643584521384,
"grad_norm": 2.365913152694702,
"learning_rate": 1.9064895793006153e-07,
"loss": 0.1928766593337059,
"step": 908
},
{
"epoch": 3.696537678207739,
"grad_norm": 2.5754306316375732,
"learning_rate": 1.8953038170789615e-07,
"loss": 0.21009906381368637,
"step": 909
},
{
"epoch": 3.70061099796334,
"grad_norm": 2.641087055206299,
"learning_rate": 1.8841432870719226e-07,
"loss": 0.21582353860139847,
"step": 910
},
{
"epoch": 3.704684317718941,
"grad_norm": 2.567742347717285,
"learning_rate": 1.8730080799823815e-07,
"loss": 0.19261349737644196,
"step": 911
},
{
"epoch": 3.708757637474542,
"grad_norm": 2.521796703338623,
"learning_rate": 1.861898286307413e-07,
"loss": 0.20253480970859528,
"step": 912
},
{
"epoch": 3.7128309572301426,
"grad_norm": 2.504960298538208,
"learning_rate": 1.8508139963375646e-07,
"loss": 0.196384996175766,
"step": 913
},
{
"epoch": 3.716904276985743,
"grad_norm": 2.448503017425537,
"learning_rate": 1.8397553001561012e-07,
"loss": 0.22694706171751022,
"step": 914
},
{
"epoch": 3.7209775967413443,
"grad_norm": 2.448093891143799,
"learning_rate": 1.8287222876382912e-07,
"loss": 0.18733669072389603,
"step": 915
},
{
"epoch": 3.725050916496945,
"grad_norm": 2.4246034622192383,
"learning_rate": 1.8177150484506642e-07,
"loss": 0.19822125136852264,
"step": 916
},
{
"epoch": 3.729124236252546,
"grad_norm": 2.5895848274230957,
"learning_rate": 1.806733672050293e-07,
"loss": 0.18903044611215591,
"step": 917
},
{
"epoch": 3.7331975560081467,
"grad_norm": 2.5084774494171143,
"learning_rate": 1.7957782476840528e-07,
"loss": 0.2244538515806198,
"step": 918
},
{
"epoch": 3.7372708757637474,
"grad_norm": 2.436156749725342,
"learning_rate": 1.78484886438791e-07,
"loss": 0.19061604887247086,
"step": 919
},
{
"epoch": 3.741344195519348,
"grad_norm": 2.3762974739074707,
"learning_rate": 1.7739456109861912e-07,
"loss": 0.18180037289857864,
"step": 920
},
{
"epoch": 3.745417515274949,
"grad_norm": 2.5922470092773438,
"learning_rate": 1.763068576090862e-07,
"loss": 0.21426790952682495,
"step": 921
},
{
"epoch": 3.74949083503055,
"grad_norm": 2.582359790802002,
"learning_rate": 1.7522178481008054e-07,
"loss": 0.20259950309991837,
"step": 922
},
{
"epoch": 3.753564154786151,
"grad_norm": 2.3691251277923584,
"learning_rate": 1.7413935152011055e-07,
"loss": 0.21478895843029022,
"step": 923
},
{
"epoch": 3.7576374745417516,
"grad_norm": 2.511850595474243,
"learning_rate": 1.7305956653623343e-07,
"loss": 0.20169981569051743,
"step": 924
},
{
"epoch": 3.7617107942973522,
"grad_norm": 2.342623472213745,
"learning_rate": 1.719824386339827e-07,
"loss": 0.21613454818725586,
"step": 925
},
{
"epoch": 3.765784114052953,
"grad_norm": 2.58245587348938,
"learning_rate": 1.7090797656729804e-07,
"loss": 0.1654214784502983,
"step": 926
},
{
"epoch": 3.769857433808554,
"grad_norm": 2.3268702030181885,
"learning_rate": 1.6983618906845332e-07,
"loss": 0.18952950835227966,
"step": 927
},
{
"epoch": 3.7739307535641546,
"grad_norm": 2.42199444770813,
"learning_rate": 1.6876708484798608e-07,
"loss": 0.16338826343417168,
"step": 928
},
{
"epoch": 3.7780040733197557,
"grad_norm": 2.2870736122131348,
"learning_rate": 1.677006725946261e-07,
"loss": 0.18167713284492493,
"step": 929
},
{
"epoch": 3.7820773930753564,
"grad_norm": 2.5234158039093018,
"learning_rate": 1.6663696097522585e-07,
"loss": 0.1719643920660019,
"step": 930
},
{
"epoch": 3.786150712830957,
"grad_norm": 2.5721402168273926,
"learning_rate": 1.6557595863468886e-07,
"loss": 0.1708434671163559,
"step": 931
},
{
"epoch": 3.790224032586558,
"grad_norm": 2.392375946044922,
"learning_rate": 1.6451767419590062e-07,
"loss": 0.20495689660310745,
"step": 932
},
{
"epoch": 3.794297352342159,
"grad_norm": 2.815039873123169,
"learning_rate": 1.6346211625965732e-07,
"loss": 0.1895192414522171,
"step": 933
},
{
"epoch": 3.79837067209776,
"grad_norm": 2.6176204681396484,
"learning_rate": 1.6240929340459703e-07,
"loss": 0.19631709158420563,
"step": 934
},
{
"epoch": 3.8024439918533606,
"grad_norm": 2.4265332221984863,
"learning_rate": 1.6135921418712955e-07,
"loss": 0.19473101943731308,
"step": 935
},
{
"epoch": 3.8065173116089612,
"grad_norm": 2.5783004760742188,
"learning_rate": 1.6031188714136623e-07,
"loss": 0.2178090512752533,
"step": 936
},
{
"epoch": 3.810590631364562,
"grad_norm": 2.534191846847534,
"learning_rate": 1.5926732077905203e-07,
"loss": 0.16174981743097305,
"step": 937
},
{
"epoch": 3.814663951120163,
"grad_norm": 2.6376049518585205,
"learning_rate": 1.582255235894947e-07,
"loss": 0.22103732079267502,
"step": 938
},
{
"epoch": 3.8187372708757636,
"grad_norm": 2.646101474761963,
"learning_rate": 1.571865040394973e-07,
"loss": 0.21458610147237778,
"step": 939
},
{
"epoch": 3.8228105906313647,
"grad_norm": 2.335294246673584,
"learning_rate": 1.561502705732883e-07,
"loss": 0.21040001511573792,
"step": 940
},
{
"epoch": 3.8268839103869654,
"grad_norm": 2.527137279510498,
"learning_rate": 1.5511683161245365e-07,
"loss": 0.19486284255981445,
"step": 941
},
{
"epoch": 3.830957230142566,
"grad_norm": 2.3123862743377686,
"learning_rate": 1.540861955558676e-07,
"loss": 0.18999101221561432,
"step": 942
},
{
"epoch": 3.835030549898167,
"grad_norm": 2.6126627922058105,
"learning_rate": 1.5305837077962542e-07,
"loss": 0.20924139767885208,
"step": 943
},
{
"epoch": 3.839103869653768,
"grad_norm": 2.4893648624420166,
"learning_rate": 1.5203336563697444e-07,
"loss": 0.18669888377189636,
"step": 944
},
{
"epoch": 3.8431771894093685,
"grad_norm": 2.406766414642334,
"learning_rate": 1.5101118845824628e-07,
"loss": 0.18617846816778183,
"step": 945
},
{
"epoch": 3.8472505091649696,
"grad_norm": 2.2685155868530273,
"learning_rate": 1.4999184755079004e-07,
"loss": 0.1924063339829445,
"step": 946
},
{
"epoch": 3.8513238289205702,
"grad_norm": 2.443790912628174,
"learning_rate": 1.4897535119890364e-07,
"loss": 0.1853998601436615,
"step": 947
},
{
"epoch": 3.855397148676171,
"grad_norm": 2.320080041885376,
"learning_rate": 1.4796170766376727e-07,
"loss": 0.21173150092363358,
"step": 948
},
{
"epoch": 3.859470468431772,
"grad_norm": 2.5018677711486816,
"learning_rate": 1.4695092518337554e-07,
"loss": 0.1776503399014473,
"step": 949
},
{
"epoch": 3.8635437881873727,
"grad_norm": 2.493777275085449,
"learning_rate": 1.459430119724715e-07,
"loss": 0.17813850194215775,
"step": 950
},
{
"epoch": 3.8676171079429738,
"grad_norm": 2.4366796016693115,
"learning_rate": 1.4493797622247867e-07,
"loss": 0.19571475684642792,
"step": 951
},
{
"epoch": 3.8716904276985744,
"grad_norm": 2.559058427810669,
"learning_rate": 1.439358261014359e-07,
"loss": 0.19421598315238953,
"step": 952
},
{
"epoch": 3.875763747454175,
"grad_norm": 2.4444687366485596,
"learning_rate": 1.4293656975392937e-07,
"loss": 0.21629701554775238,
"step": 953
},
{
"epoch": 3.8798370672097757,
"grad_norm": 2.388728380203247,
"learning_rate": 1.4194021530102783e-07,
"loss": 0.19527916610240936,
"step": 954
},
{
"epoch": 3.883910386965377,
"grad_norm": 2.286078929901123,
"learning_rate": 1.4094677084021588e-07,
"loss": 0.1616881936788559,
"step": 955
},
{
"epoch": 3.8879837067209775,
"grad_norm": 2.5002880096435547,
"learning_rate": 1.3995624444532844e-07,
"loss": 0.19413011521100998,
"step": 956
},
{
"epoch": 3.8920570264765786,
"grad_norm": 2.6207797527313232,
"learning_rate": 1.3896864416648452e-07,
"loss": 0.15424808859825134,
"step": 957
},
{
"epoch": 3.8961303462321792,
"grad_norm": 2.5780515670776367,
"learning_rate": 1.3798397803002237e-07,
"loss": 0.20987361669540405,
"step": 958
},
{
"epoch": 3.90020366598778,
"grad_norm": 2.439293622970581,
"learning_rate": 1.370022540384347e-07,
"loss": 0.19203339517116547,
"step": 959
},
{
"epoch": 3.904276985743381,
"grad_norm": 2.810973644256592,
"learning_rate": 1.360234801703023e-07,
"loss": 0.25363121181726456,
"step": 960
},
{
"epoch": 3.9083503054989817,
"grad_norm": 2.6023387908935547,
"learning_rate": 1.3504766438023042e-07,
"loss": 0.21091164648532867,
"step": 961
},
{
"epoch": 3.9124236252545828,
"grad_norm": 2.3644959926605225,
"learning_rate": 1.3407481459878366e-07,
"loss": 0.21235870569944382,
"step": 962
},
{
"epoch": 3.9164969450101834,
"grad_norm": 2.3764562606811523,
"learning_rate": 1.3310493873242167e-07,
"loss": 0.1808951571583748,
"step": 963
},
{
"epoch": 3.920570264765784,
"grad_norm": 2.3367507457733154,
"learning_rate": 1.321380446634342e-07,
"loss": 0.2204703390598297,
"step": 964
},
{
"epoch": 3.9246435845213847,
"grad_norm": 2.595667600631714,
"learning_rate": 1.3117414024987823e-07,
"loss": 0.19948850572109222,
"step": 965
},
{
"epoch": 3.928716904276986,
"grad_norm": 2.6602208614349365,
"learning_rate": 1.3021323332551294e-07,
"loss": 0.22697525471448898,
"step": 966
},
{
"epoch": 3.9327902240325865,
"grad_norm": 2.483463764190674,
"learning_rate": 1.2925533169973695e-07,
"loss": 0.17191919684410095,
"step": 967
},
{
"epoch": 3.9368635437881876,
"grad_norm": 2.4962165355682373,
"learning_rate": 1.283004431575246e-07,
"loss": 0.20009542256593704,
"step": 968
},
{
"epoch": 3.9409368635437882,
"grad_norm": 2.5772528648376465,
"learning_rate": 1.273485754593619e-07,
"loss": 0.2056511491537094,
"step": 969
},
{
"epoch": 3.945010183299389,
"grad_norm": 2.4633169174194336,
"learning_rate": 1.26399736341185e-07,
"loss": 0.19378910213708878,
"step": 970
},
{
"epoch": 3.9490835030549896,
"grad_norm": 2.5064918994903564,
"learning_rate": 1.254539335143156e-07,
"loss": 0.2224956750869751,
"step": 971
},
{
"epoch": 3.9531568228105907,
"grad_norm": 2.723224401473999,
"learning_rate": 1.2451117466539985e-07,
"loss": 0.20264600962400436,
"step": 972
},
{
"epoch": 3.9572301425661913,
"grad_norm": 2.501723289489746,
"learning_rate": 1.235714674563445e-07,
"loss": 0.17863625288009644,
"step": 973
},
{
"epoch": 3.9613034623217924,
"grad_norm": 2.344696521759033,
"learning_rate": 1.226348195242557e-07,
"loss": 0.23713428527116776,
"step": 974
},
{
"epoch": 3.965376782077393,
"grad_norm": 2.421300172805786,
"learning_rate": 1.2170123848137648e-07,
"loss": 0.20281652361154556,
"step": 975
},
{
"epoch": 3.9694501018329937,
"grad_norm": 2.5802454948425293,
"learning_rate": 1.2077073191502496e-07,
"loss": 0.21466045081615448,
"step": 976
},
{
"epoch": 3.973523421588595,
"grad_norm": 2.3439295291900635,
"learning_rate": 1.1984330738753218e-07,
"loss": 0.1579430103302002,
"step": 977
},
{
"epoch": 3.9775967413441955,
"grad_norm": 2.491952896118164,
"learning_rate": 1.1891897243618183e-07,
"loss": 0.1682949811220169,
"step": 978
},
{
"epoch": 3.9816700610997966,
"grad_norm": 2.3951187133789062,
"learning_rate": 1.1799773457314766e-07,
"loss": 0.20190123468637466,
"step": 979
},
{
"epoch": 3.9857433808553973,
"grad_norm": 2.4365179538726807,
"learning_rate": 1.1707960128543314e-07,
"loss": 0.18969932198524475,
"step": 980
},
{
"epoch": 3.989816700610998,
"grad_norm": 2.2501721382141113,
"learning_rate": 1.1616458003481084e-07,
"loss": 0.16813133656978607,
"step": 981
},
{
"epoch": 3.9938900203665986,
"grad_norm": 2.550407886505127,
"learning_rate": 1.1525267825776114e-07,
"loss": 0.19014200568199158,
"step": 982
},
{
"epoch": 3.9979633401221997,
"grad_norm": 2.3324475288391113,
"learning_rate": 1.1434390336541238e-07,
"loss": 0.21611415594816208,
"step": 983
},
{
"epoch": 4.0,
"grad_norm": 2.8509156703948975,
"learning_rate": 1.1343826274347995e-07,
"loss": 0.2155466079711914,
"step": 984
},
{
"epoch": 4.004073319755601,
"grad_norm": 2.371936321258545,
"learning_rate": 1.125357637522072e-07,
"loss": 0.21221815049648285,
"step": 985
},
{
"epoch": 4.008146639511201,
"grad_norm": 2.5109193325042725,
"learning_rate": 1.1163641372630445e-07,
"loss": 0.19040236622095108,
"step": 986
},
{
"epoch": 4.012219959266803,
"grad_norm": 2.3751659393310547,
"learning_rate": 1.1074021997489074e-07,
"loss": 0.17234822362661362,
"step": 987
},
{
"epoch": 4.0162932790224035,
"grad_norm": 2.3197624683380127,
"learning_rate": 1.0984718978143287e-07,
"loss": 0.1829347461462021,
"step": 988
},
{
"epoch": 4.020366598778004,
"grad_norm": 2.230381965637207,
"learning_rate": 1.0895733040368815e-07,
"loss": 0.1833319142460823,
"step": 989
},
{
"epoch": 4.024439918533605,
"grad_norm": 2.195852279663086,
"learning_rate": 1.0807064907364321e-07,
"loss": 0.1787407100200653,
"step": 990
},
{
"epoch": 4.0285132382892055,
"grad_norm": 2.331566572189331,
"learning_rate": 1.0718715299745717e-07,
"loss": 0.16609113663434982,
"step": 991
},
{
"epoch": 4.032586558044806,
"grad_norm": 2.600375175476074,
"learning_rate": 1.0630684935540168e-07,
"loss": 0.2246694266796112,
"step": 992
},
{
"epoch": 4.036659877800408,
"grad_norm": 2.4915621280670166,
"learning_rate": 1.0542974530180327e-07,
"loss": 0.1964123621582985,
"step": 993
},
{
"epoch": 4.040733197556008,
"grad_norm": 2.434880256652832,
"learning_rate": 1.0455584796498512e-07,
"loss": 0.19002247601747513,
"step": 994
},
{
"epoch": 4.044806517311609,
"grad_norm": 2.3584625720977783,
"learning_rate": 1.0368516444720915e-07,
"loss": 0.16979426890611649,
"step": 995
},
{
"epoch": 4.04887983706721,
"grad_norm": 2.557974338531494,
"learning_rate": 1.0281770182461813e-07,
"loss": 0.20458902418613434,
"step": 996
},
{
"epoch": 4.05295315682281,
"grad_norm": 2.3859493732452393,
"learning_rate": 1.0195346714717812e-07,
"loss": 0.16649136692285538,
"step": 997
},
{
"epoch": 4.057026476578411,
"grad_norm": 2.168416738510132,
"learning_rate": 1.0109246743862155e-07,
"loss": 0.15576700866222382,
"step": 998
},
{
"epoch": 4.0610997963340125,
"grad_norm": 2.2460920810699463,
"learning_rate": 1.0023470969638953e-07,
"loss": 0.1829686239361763,
"step": 999
},
{
"epoch": 4.065173116089613,
"grad_norm": 2.477757215499878,
"learning_rate": 9.938020089157595e-08,
"loss": 0.20117176324129105,
"step": 1000
},
{
"epoch": 4.069246435845214,
"grad_norm": 2.3903391361236572,
"learning_rate": 9.85289479688694e-08,
"loss": 0.1714942306280136,
"step": 1001
},
{
"epoch": 4.0733197556008145,
"grad_norm": 2.4241321086883545,
"learning_rate": 9.768095784649833e-08,
"loss": 0.1976744383573532,
"step": 1002
},
{
"epoch": 4.077393075356415,
"grad_norm": 2.3917348384857178,
"learning_rate": 9.68362374161737e-08,
"loss": 0.18097112327814102,
"step": 1003
},
{
"epoch": 4.081466395112017,
"grad_norm": 2.710338830947876,
"learning_rate": 9.599479354303308e-08,
"loss": 0.21830029785633087,
"step": 1004
},
{
"epoch": 4.085539714867617,
"grad_norm": 2.2726480960845947,
"learning_rate": 9.515663306558568e-08,
"loss": 0.1627790406346321,
"step": 1005
},
{
"epoch": 4.089613034623218,
"grad_norm": 2.563021183013916,
"learning_rate": 9.432176279565557e-08,
"loss": 0.17855525016784668,
"step": 1006
},
{
"epoch": 4.093686354378819,
"grad_norm": 2.2680087089538574,
"learning_rate": 9.349018951832738e-08,
"loss": 0.17709980905056,
"step": 1007
},
{
"epoch": 4.097759674134419,
"grad_norm": 2.483131170272827,
"learning_rate": 9.266191999189044e-08,
"loss": 0.18354866653680801,
"step": 1008
},
{
"epoch": 4.10183299389002,
"grad_norm": 2.2514729499816895,
"learning_rate": 9.18369609477842e-08,
"loss": 0.17437510192394257,
"step": 1009
},
{
"epoch": 4.1059063136456215,
"grad_norm": 2.2802059650421143,
"learning_rate": 9.10153190905436e-08,
"loss": 0.18543030321598053,
"step": 1010
},
{
"epoch": 4.109979633401222,
"grad_norm": 2.3787717819213867,
"learning_rate": 9.019700109774436e-08,
"loss": 0.16922692209482193,
"step": 1011
},
{
"epoch": 4.114052953156823,
"grad_norm": 3.4778051376342773,
"learning_rate": 8.938201361994846e-08,
"loss": 0.20204298198223114,
"step": 1012
},
{
"epoch": 4.1181262729124235,
"grad_norm": 2.3797998428344727,
"learning_rate": 8.857036328065098e-08,
"loss": 0.19148864597082138,
"step": 1013
},
{
"epoch": 4.122199592668024,
"grad_norm": 2.5149481296539307,
"learning_rate": 8.776205667622527e-08,
"loss": 0.18599937111139297,
"step": 1014
},
{
"epoch": 4.126272912423625,
"grad_norm": 2.5857813358306885,
"learning_rate": 8.695710037586957e-08,
"loss": 0.22922367602586746,
"step": 1015
},
{
"epoch": 4.130346232179226,
"grad_norm": 2.539100408554077,
"learning_rate": 8.615550092155477e-08,
"loss": 0.21377216279506683,
"step": 1016
},
{
"epoch": 4.134419551934827,
"grad_norm": 2.3746533393859863,
"learning_rate": 8.535726482796918e-08,
"loss": 0.17763541638851166,
"step": 1017
},
{
"epoch": 4.138492871690428,
"grad_norm": 2.46425199508667,
"learning_rate": 8.456239858246755e-08,
"loss": 0.1617206409573555,
"step": 1018
},
{
"epoch": 4.142566191446028,
"grad_norm": 2.2839174270629883,
"learning_rate": 8.37709086450168e-08,
"loss": 0.17701925337314606,
"step": 1019
},
{
"epoch": 4.146639511201629,
"grad_norm": 2.5324790477752686,
"learning_rate": 8.29828014481449e-08,
"loss": 0.18900563567876816,
"step": 1020
},
{
"epoch": 4.1507128309572305,
"grad_norm": 2.781304359436035,
"learning_rate": 8.219808339688722e-08,
"loss": 0.2167806699872017,
"step": 1021
},
{
"epoch": 4.154786150712831,
"grad_norm": 2.32759165763855,
"learning_rate": 8.141676086873573e-08,
"loss": 0.15807153284549713,
"step": 1022
},
{
"epoch": 4.158859470468432,
"grad_norm": 2.4917571544647217,
"learning_rate": 8.063884021358624e-08,
"loss": 0.19083156436681747,
"step": 1023
},
{
"epoch": 4.1629327902240325,
"grad_norm": 2.430983543395996,
"learning_rate": 7.986432775368756e-08,
"loss": 0.1890602707862854,
"step": 1024
},
{
"epoch": 4.167006109979633,
"grad_norm": 2.4754414558410645,
"learning_rate": 7.909322978358912e-08,
"loss": 0.1957196220755577,
"step": 1025
},
{
"epoch": 4.171079429735234,
"grad_norm": 2.4021310806274414,
"learning_rate": 7.832555257009105e-08,
"loss": 0.1860102415084839,
"step": 1026
},
{
"epoch": 4.175152749490835,
"grad_norm": 2.6945855617523193,
"learning_rate": 7.75613023521921e-08,
"loss": 0.207361102104187,
"step": 1027
},
{
"epoch": 4.179226069246436,
"grad_norm": 2.431584358215332,
"learning_rate": 7.68004853410395e-08,
"loss": 0.17377741634845734,
"step": 1028
},
{
"epoch": 4.183299389002037,
"grad_norm": 2.4929258823394775,
"learning_rate": 7.604310771987882e-08,
"loss": 0.18458444625139236,
"step": 1029
},
{
"epoch": 4.187372708757637,
"grad_norm": 2.394940137863159,
"learning_rate": 7.528917564400289e-08,
"loss": 0.16298695653676987,
"step": 1030
},
{
"epoch": 4.191446028513238,
"grad_norm": 2.4729163646698,
"learning_rate": 7.45386952407026e-08,
"loss": 0.19453544914722443,
"step": 1031
},
{
"epoch": 4.195519348268839,
"grad_norm": 2.406733751296997,
"learning_rate": 7.379167260921621e-08,
"loss": 0.18493016809225082,
"step": 1032
},
{
"epoch": 4.19959266802444,
"grad_norm": 2.6580498218536377,
"learning_rate": 7.304811382068077e-08,
"loss": 0.2018485963344574,
"step": 1033
},
{
"epoch": 4.203665987780041,
"grad_norm": 2.409303903579712,
"learning_rate": 7.230802491808191e-08,
"loss": 0.18995004892349243,
"step": 1034
},
{
"epoch": 4.2077393075356415,
"grad_norm": 2.679262399673462,
"learning_rate": 7.157141191620548e-08,
"loss": 0.18551041930913925,
"step": 1035
},
{
"epoch": 4.211812627291242,
"grad_norm": 2.4415478706359863,
"learning_rate": 7.083828080158783e-08,
"loss": 0.19411860406398773,
"step": 1036
},
{
"epoch": 4.215885947046843,
"grad_norm": 2.6177053451538086,
"learning_rate": 7.010863753246798e-08,
"loss": 0.18280935287475586,
"step": 1037
},
{
"epoch": 4.219959266802444,
"grad_norm": 2.4606175422668457,
"learning_rate": 6.938248803873887e-08,
"loss": 0.17776849120855331,
"step": 1038
},
{
"epoch": 4.224032586558045,
"grad_norm": 2.459441900253296,
"learning_rate": 6.86598382218988e-08,
"loss": 0.18201512843370438,
"step": 1039
},
{
"epoch": 4.228105906313646,
"grad_norm": 2.5489375591278076,
"learning_rate": 6.794069395500418e-08,
"loss": 0.183207206428051,
"step": 1040
},
{
"epoch": 4.232179226069246,
"grad_norm": 2.3916819095611572,
"learning_rate": 6.722506108262111e-08,
"loss": 0.17223259061574936,
"step": 1041
},
{
"epoch": 4.236252545824847,
"grad_norm": 2.3839707374572754,
"learning_rate": 6.651294542077846e-08,
"loss": 0.18934150785207748,
"step": 1042
},
{
"epoch": 4.240325865580448,
"grad_norm": 2.680551052093506,
"learning_rate": 6.580435275691987e-08,
"loss": 0.19397541880607605,
"step": 1043
},
{
"epoch": 4.244399185336049,
"grad_norm": 2.7202107906341553,
"learning_rate": 6.509928884985799e-08,
"loss": 0.20066425204277039,
"step": 1044
},
{
"epoch": 4.24847250509165,
"grad_norm": 2.458122730255127,
"learning_rate": 6.439775942972609e-08,
"loss": 0.19551938772201538,
"step": 1045
},
{
"epoch": 4.2525458248472505,
"grad_norm": 2.5357983112335205,
"learning_rate": 6.369977019793271e-08,
"loss": 0.20287911593914032,
"step": 1046
},
{
"epoch": 4.256619144602851,
"grad_norm": 2.4644596576690674,
"learning_rate": 6.300532682711456e-08,
"loss": 0.20531214773654938,
"step": 1047
},
{
"epoch": 4.260692464358452,
"grad_norm": 2.4811034202575684,
"learning_rate": 6.231443496109117e-08,
"loss": 0.16013487428426743,
"step": 1048
},
{
"epoch": 4.2647657841140525,
"grad_norm": 2.415850877761841,
"learning_rate": 6.16271002148181e-08,
"loss": 0.18164420872926712,
"step": 1049
},
{
"epoch": 4.268839103869654,
"grad_norm": 2.4283230304718018,
"learning_rate": 6.094332817434211e-08,
"loss": 0.1915610432624817,
"step": 1050
},
{
"epoch": 4.272912423625255,
"grad_norm": 2.526712656021118,
"learning_rate": 6.026312439675551e-08,
"loss": 0.2085861638188362,
"step": 1051
},
{
"epoch": 4.276985743380855,
"grad_norm": 2.4190962314605713,
"learning_rate": 5.9586494410150565e-08,
"loss": 0.17680321633815765,
"step": 1052
},
{
"epoch": 4.281059063136456,
"grad_norm": 2.567119598388672,
"learning_rate": 5.891344371357549e-08,
"loss": 0.15278445929288864,
"step": 1053
},
{
"epoch": 4.285132382892057,
"grad_norm": 2.6076533794403076,
"learning_rate": 5.824397777698858e-08,
"loss": 0.20571757853031158,
"step": 1054
},
{
"epoch": 4.289205702647658,
"grad_norm": 2.405752658843994,
"learning_rate": 5.7578102041214936e-08,
"loss": 0.1562228798866272,
"step": 1055
},
{
"epoch": 4.293279022403259,
"grad_norm": 2.524439811706543,
"learning_rate": 5.691582191790123e-08,
"loss": 0.20757433027029037,
"step": 1056
},
{
"epoch": 4.2973523421588595,
"grad_norm": 2.387105703353882,
"learning_rate": 5.6257142789472576e-08,
"loss": 0.1555587202310562,
"step": 1057
},
{
"epoch": 4.30142566191446,
"grad_norm": 2.396641969680786,
"learning_rate": 5.560207000908823e-08,
"loss": 0.18656989187002182,
"step": 1058
},
{
"epoch": 4.305498981670061,
"grad_norm": 2.687645196914673,
"learning_rate": 5.4950608900598326e-08,
"loss": 0.21030457317829132,
"step": 1059
},
{
"epoch": 4.3095723014256615,
"grad_norm": 2.4285991191864014,
"learning_rate": 5.430276475850026e-08,
"loss": 0.19240695238113403,
"step": 1060
},
{
"epoch": 4.313645621181263,
"grad_norm": 2.5999672412872314,
"learning_rate": 5.3658542847896395e-08,
"loss": 0.18602756410837173,
"step": 1061
},
{
"epoch": 4.317718940936864,
"grad_norm": 2.3800199031829834,
"learning_rate": 5.30179484044504e-08,
"loss": 0.17461714148521423,
"step": 1062
},
{
"epoch": 4.321792260692464,
"grad_norm": 2.4830610752105713,
"learning_rate": 5.2380986634345204e-08,
"loss": 0.210846908390522,
"step": 1063
},
{
"epoch": 4.325865580448065,
"grad_norm": 2.3398642539978027,
"learning_rate": 5.1747662714240806e-08,
"loss": 0.18094143271446228,
"step": 1064
},
{
"epoch": 4.329938900203666,
"grad_norm": 2.7047762870788574,
"learning_rate": 5.1117981791231726e-08,
"loss": 0.21604597568511963,
"step": 1065
},
{
"epoch": 4.334012219959266,
"grad_norm": 2.4406027793884277,
"learning_rate": 5.049194898280579e-08,
"loss": 0.19460473209619522,
"step": 1066
},
{
"epoch": 4.338085539714868,
"grad_norm": 2.416376829147339,
"learning_rate": 4.986956937680165e-08,
"loss": 0.18469534814357758,
"step": 1067
},
{
"epoch": 4.3421588594704685,
"grad_norm": 2.3338382244110107,
"learning_rate": 4.925084803136853e-08,
"loss": 0.16407328099012375,
"step": 1068
},
{
"epoch": 4.346232179226069,
"grad_norm": 2.4940404891967773,
"learning_rate": 4.863578997492407e-08,
"loss": 0.19178733974695206,
"step": 1069
},
{
"epoch": 4.35030549898167,
"grad_norm": 2.358466863632202,
"learning_rate": 4.802440020611426e-08,
"loss": 0.18211781978607178,
"step": 1070
},
{
"epoch": 4.3543788187372705,
"grad_norm": 2.4675703048706055,
"learning_rate": 4.7416683693772495e-08,
"loss": 0.20454513281583786,
"step": 1071
},
{
"epoch": 4.358452138492872,
"grad_norm": 2.691836357116699,
"learning_rate": 4.6812645376879236e-08,
"loss": 0.20274968445301056,
"step": 1072
},
{
"epoch": 4.362525458248473,
"grad_norm": 2.635066032409668,
"learning_rate": 4.621229016452155e-08,
"loss": 0.18851862847805023,
"step": 1073
},
{
"epoch": 4.366598778004073,
"grad_norm": 2.4487643241882324,
"learning_rate": 4.561562293585364e-08,
"loss": 0.1614024117588997,
"step": 1074
},
{
"epoch": 4.370672097759674,
"grad_norm": 2.554473638534546,
"learning_rate": 4.5022648540057274e-08,
"loss": 0.18884680420160294,
"step": 1075
},
{
"epoch": 4.374745417515275,
"grad_norm": 2.5699715614318848,
"learning_rate": 4.443337179630174e-08,
"loss": 0.18893162161111832,
"step": 1076
},
{
"epoch": 4.378818737270876,
"grad_norm": 2.412440061569214,
"learning_rate": 4.384779749370554e-08,
"loss": 0.18390610069036484,
"step": 1077
},
{
"epoch": 4.382892057026477,
"grad_norm": 2.4353432655334473,
"learning_rate": 4.3265930391296726e-08,
"loss": 0.19943667203187943,
"step": 1078
},
{
"epoch": 4.3869653767820775,
"grad_norm": 2.5288360118865967,
"learning_rate": 4.2687775217974674e-08,
"loss": 0.15438083559274673,
"step": 1079
},
{
"epoch": 4.391038696537678,
"grad_norm": 2.6911988258361816,
"learning_rate": 4.2113336672471245e-08,
"loss": 0.1961485669016838,
"step": 1080
},
{
"epoch": 4.395112016293279,
"grad_norm": 2.4872078895568848,
"learning_rate": 4.1542619423313276e-08,
"loss": 0.17868036031723022,
"step": 1081
},
{
"epoch": 4.3991853360488795,
"grad_norm": 2.635779619216919,
"learning_rate": 4.097562810878369e-08,
"loss": 0.18501786887645721,
"step": 1082
},
{
"epoch": 4.403258655804481,
"grad_norm": 2.4047963619232178,
"learning_rate": 4.041236733688474e-08,
"loss": 0.1908714473247528,
"step": 1083
},
{
"epoch": 4.407331975560082,
"grad_norm": 2.582057476043701,
"learning_rate": 3.985284168529995e-08,
"loss": 0.19209705293178558,
"step": 1084
},
{
"epoch": 4.411405295315682,
"grad_norm": 2.320059061050415,
"learning_rate": 3.929705570135711e-08,
"loss": 0.17084672302007675,
"step": 1085
},
{
"epoch": 4.415478615071283,
"grad_norm": 2.5907821655273438,
"learning_rate": 3.874501390199148e-08,
"loss": 0.18601538985967636,
"step": 1086
},
{
"epoch": 4.419551934826884,
"grad_norm": 2.4668545722961426,
"learning_rate": 3.819672077370856e-08,
"loss": 0.17312505841255188,
"step": 1087
},
{
"epoch": 4.423625254582484,
"grad_norm": 2.8270812034606934,
"learning_rate": 3.7652180772548393e-08,
"loss": 0.15925130993127823,
"step": 1088
},
{
"epoch": 4.427698574338086,
"grad_norm": 2.352118968963623,
"learning_rate": 3.711139832404853e-08,
"loss": 0.1883508414030075,
"step": 1089
},
{
"epoch": 4.4317718940936865,
"grad_norm": 2.426455020904541,
"learning_rate": 3.657437782320888e-08,
"loss": 0.19179560989141464,
"step": 1090
},
{
"epoch": 4.435845213849287,
"grad_norm": 2.449763059616089,
"learning_rate": 3.604112363445522e-08,
"loss": 0.17501786351203918,
"step": 1091
},
{
"epoch": 4.439918533604888,
"grad_norm": 2.496755361557007,
"learning_rate": 3.551164009160429e-08,
"loss": 0.17811695486307144,
"step": 1092
},
{
"epoch": 4.4439918533604885,
"grad_norm": 2.3908495903015137,
"learning_rate": 3.49859314978283e-08,
"loss": 0.16527411341667175,
"step": 1093
},
{
"epoch": 4.44806517311609,
"grad_norm": 2.344762086868286,
"learning_rate": 3.446400212562017e-08,
"loss": 0.1888575553894043,
"step": 1094
},
{
"epoch": 4.452138492871691,
"grad_norm": 2.396965503692627,
"learning_rate": 3.3945856216758384e-08,
"loss": 0.19079575687646866,
"step": 1095
},
{
"epoch": 4.456211812627291,
"grad_norm": 2.5549583435058594,
"learning_rate": 3.343149798227301e-08,
"loss": 0.16446177661418915,
"step": 1096
},
{
"epoch": 4.460285132382892,
"grad_norm": 2.4434475898742676,
"learning_rate": 3.2920931602411105e-08,
"loss": 0.20346921682357788,
"step": 1097
},
{
"epoch": 4.464358452138493,
"grad_norm": 2.409318208694458,
"learning_rate": 3.241416122660312e-08,
"loss": 0.1957874372601509,
"step": 1098
},
{
"epoch": 4.468431771894093,
"grad_norm": 2.5695855617523193,
"learning_rate": 3.191119097342876e-08,
"loss": 0.20461179316043854,
"step": 1099
},
{
"epoch": 4.472505091649695,
"grad_norm": 2.499413013458252,
"learning_rate": 3.141202493058376e-08,
"loss": 0.19887447357177734,
"step": 1100
},
{
"epoch": 4.4765784114052956,
"grad_norm": 2.511868476867676,
"learning_rate": 3.0916667154846644e-08,
"loss": 0.21206560730934143,
"step": 1101
},
{
"epoch": 4.480651731160896,
"grad_norm": 2.3708322048187256,
"learning_rate": 3.042512167204569e-08,
"loss": 0.19482630491256714,
"step": 1102
},
{
"epoch": 4.484725050916497,
"grad_norm": 2.418764114379883,
"learning_rate": 2.9937392477026256e-08,
"loss": 0.15218085050582886,
"step": 1103
},
{
"epoch": 4.4887983706720975,
"grad_norm": 2.526432752609253,
"learning_rate": 2.9453483533618208e-08,
"loss": 0.19341377913951874,
"step": 1104
},
{
"epoch": 4.492871690427698,
"grad_norm": 2.466676950454712,
"learning_rate": 2.8973398774603976e-08,
"loss": 0.20285610854625702,
"step": 1105
},
{
"epoch": 4.4969450101833,
"grad_norm": 2.626288890838623,
"learning_rate": 2.84971421016863e-08,
"loss": 0.21343690901994705,
"step": 1106
},
{
"epoch": 4.5010183299389,
"grad_norm": 2.4493844509124756,
"learning_rate": 2.8024717385456752e-08,
"loss": 0.1735098510980606,
"step": 1107
},
{
"epoch": 4.505091649694501,
"grad_norm": 2.614302158355713,
"learning_rate": 2.755612846536398e-08,
"loss": 0.2130608856678009,
"step": 1108
},
{
"epoch": 4.509164969450102,
"grad_norm": 2.4521331787109375,
"learning_rate": 2.7091379149682682e-08,
"loss": 0.16439496725797653,
"step": 1109
},
{
"epoch": 4.513238289205702,
"grad_norm": 2.3728718757629395,
"learning_rate": 2.6630473215482895e-08,
"loss": 0.17642652988433838,
"step": 1110
},
{
"epoch": 4.517311608961304,
"grad_norm": 2.4868416786193848,
"learning_rate": 2.6173414408598826e-08,
"loss": 0.19394922256469727,
"step": 1111
},
{
"epoch": 4.521384928716905,
"grad_norm": 2.190237283706665,
"learning_rate": 2.5720206443598736e-08,
"loss": 0.15093064308166504,
"step": 1112
},
{
"epoch": 4.525458248472505,
"grad_norm": 2.2836077213287354,
"learning_rate": 2.52708530037547e-08,
"loss": 0.16533400863409042,
"step": 1113
},
{
"epoch": 4.529531568228106,
"grad_norm": 2.499483823776245,
"learning_rate": 2.4825357741012686e-08,
"loss": 0.18762270361185074,
"step": 1114
},
{
"epoch": 4.5336048879837065,
"grad_norm": 2.6462838649749756,
"learning_rate": 2.438372427596258e-08,
"loss": 0.1893811672925949,
"step": 1115
},
{
"epoch": 4.537678207739307,
"grad_norm": 2.453942060470581,
"learning_rate": 2.394595619780926e-08,
"loss": 0.18495626747608185,
"step": 1116
},
{
"epoch": 4.541751527494909,
"grad_norm": 2.721647024154663,
"learning_rate": 2.3512057064343006e-08,
"loss": 0.19172073900699615,
"step": 1117
},
{
"epoch": 4.545824847250509,
"grad_norm": 2.3400092124938965,
"learning_rate": 2.3082030401910868e-08,
"loss": 0.18133129179477692,
"step": 1118
},
{
"epoch": 4.54989816700611,
"grad_norm": 2.5185601711273193,
"learning_rate": 2.2655879705387683e-08,
"loss": 0.18830690532922745,
"step": 1119
},
{
"epoch": 4.553971486761711,
"grad_norm": 2.7922866344451904,
"learning_rate": 2.223360843814831e-08,
"loss": 0.17920264601707458,
"step": 1120
},
{
"epoch": 4.558044806517311,
"grad_norm": 2.25423526763916,
"learning_rate": 2.181522003203862e-08,
"loss": 0.1760845184326172,
"step": 1121
},
{
"epoch": 4.562118126272912,
"grad_norm": 2.8005192279815674,
"learning_rate": 2.1400717887348096e-08,
"loss": 0.17970797419548035,
"step": 1122
},
{
"epoch": 4.566191446028514,
"grad_norm": 2.4870920181274414,
"learning_rate": 2.0990105372782342e-08,
"loss": 0.19520121812820435,
"step": 1123
},
{
"epoch": 4.570264765784114,
"grad_norm": 2.4267923831939697,
"learning_rate": 2.058338582543534e-08,
"loss": 0.18133552372455597,
"step": 1124
},
{
"epoch": 4.574338085539715,
"grad_norm": 2.4744374752044678,
"learning_rate": 2.0180562550762557e-08,
"loss": 0.20324096083641052,
"step": 1125
},
{
"epoch": 4.5784114052953155,
"grad_norm": 2.4335596561431885,
"learning_rate": 1.9781638822553958e-08,
"loss": 0.19217759370803833,
"step": 1126
},
{
"epoch": 4.582484725050916,
"grad_norm": 2.4571220874786377,
"learning_rate": 1.9386617882907576e-08,
"loss": 0.1853143498301506,
"step": 1127
},
{
"epoch": 4.586558044806518,
"grad_norm": 2.7526497840881348,
"learning_rate": 1.8995502942202866e-08,
"loss": 0.15644675493240356,
"step": 1128
},
{
"epoch": 4.590631364562118,
"grad_norm": 2.5981698036193848,
"learning_rate": 1.8608297179074973e-08,
"loss": 0.1661221832036972,
"step": 1129
},
{
"epoch": 4.594704684317719,
"grad_norm": 2.5742714405059814,
"learning_rate": 1.8225003740388545e-08,
"loss": 0.1731417402625084,
"step": 1130
},
{
"epoch": 4.59877800407332,
"grad_norm": 2.418259620666504,
"learning_rate": 1.7845625741212455e-08,
"loss": 0.17661213874816895,
"step": 1131
},
{
"epoch": 4.60285132382892,
"grad_norm": 2.518803358078003,
"learning_rate": 1.7470166264794252e-08,
"loss": 0.18510932475328445,
"step": 1132
},
{
"epoch": 4.606924643584521,
"grad_norm": 2.540313482284546,
"learning_rate": 1.709862836253534e-08,
"loss": 0.1894361674785614,
"step": 1133
},
{
"epoch": 4.610997963340123,
"grad_norm": 2.5993340015411377,
"learning_rate": 1.6731015053966012e-08,
"loss": 0.17485319077968597,
"step": 1134
},
{
"epoch": 4.615071283095723,
"grad_norm": 2.4957704544067383,
"learning_rate": 1.6367329326720856e-08,
"loss": 0.19985325634479523,
"step": 1135
},
{
"epoch": 4.619144602851324,
"grad_norm": 2.43721604347229,
"learning_rate": 1.6007574136514712e-08,
"loss": 0.18134716898202896,
"step": 1136
},
{
"epoch": 4.6232179226069245,
"grad_norm": 2.3459789752960205,
"learning_rate": 1.5651752407118247e-08,
"loss": 0.17994606494903564,
"step": 1137
},
{
"epoch": 4.627291242362525,
"grad_norm": 2.4452524185180664,
"learning_rate": 1.5299867030334813e-08,
"loss": 0.2009306699037552,
"step": 1138
},
{
"epoch": 4.631364562118126,
"grad_norm": 2.472385883331299,
"learning_rate": 1.4951920865976298e-08,
"loss": 0.18430952727794647,
"step": 1139
},
{
"epoch": 4.635437881873727,
"grad_norm": 2.39799165725708,
"learning_rate": 1.4607916741840254e-08,
"loss": 0.16439027339220047,
"step": 1140
},
{
"epoch": 4.639511201629328,
"grad_norm": 2.3951544761657715,
"learning_rate": 1.426785745368686e-08,
"loss": 0.17046649008989334,
"step": 1141
},
{
"epoch": 4.643584521384929,
"grad_norm": 2.521660566329956,
"learning_rate": 1.3931745765216218e-08,
"loss": 0.19015470147132874,
"step": 1142
},
{
"epoch": 4.647657841140529,
"grad_norm": 2.4929487705230713,
"learning_rate": 1.3599584408045705e-08,
"loss": 0.195196270942688,
"step": 1143
},
{
"epoch": 4.65173116089613,
"grad_norm": 2.4070982933044434,
"learning_rate": 1.3271376081687934e-08,
"loss": 0.18301567435264587,
"step": 1144
},
{
"epoch": 4.655804480651732,
"grad_norm": 2.6266772747039795,
"learning_rate": 1.2947123453528885e-08,
"loss": 0.19102585315704346,
"step": 1145
},
{
"epoch": 4.659877800407332,
"grad_norm": 2.549821138381958,
"learning_rate": 1.2626829158806084e-08,
"loss": 0.1963408663868904,
"step": 1146
},
{
"epoch": 4.663951120162933,
"grad_norm": 2.394179344177246,
"learning_rate": 1.2310495800587295e-08,
"loss": 0.17852062731981277,
"step": 1147
},
{
"epoch": 4.6680244399185336,
"grad_norm": 2.6832633018493652,
"learning_rate": 1.1998125949749194e-08,
"loss": 0.21865876764059067,
"step": 1148
},
{
"epoch": 4.672097759674134,
"grad_norm": 2.521888494491577,
"learning_rate": 1.168972214495667e-08,
"loss": 0.2011728659272194,
"step": 1149
},
{
"epoch": 4.676171079429735,
"grad_norm": 2.46395206451416,
"learning_rate": 1.1385286892642066e-08,
"loss": 0.2030443474650383,
"step": 1150
},
{
"epoch": 4.680244399185336,
"grad_norm": 2.5812623500823975,
"learning_rate": 1.1084822666984905e-08,
"loss": 0.20721124857664108,
"step": 1151
},
{
"epoch": 4.684317718940937,
"grad_norm": 2.2974133491516113,
"learning_rate": 1.0788331909891646e-08,
"loss": 0.164021335542202,
"step": 1152
},
{
"epoch": 4.688391038696538,
"grad_norm": 2.2822930812835693,
"learning_rate": 1.0495817030976018e-08,
"loss": 0.14888529479503632,
"step": 1153
},
{
"epoch": 4.692464358452138,
"grad_norm": 2.5392258167266846,
"learning_rate": 1.0207280407539265e-08,
"loss": 0.1809844821691513,
"step": 1154
},
{
"epoch": 4.696537678207739,
"grad_norm": 2.4750709533691406,
"learning_rate": 9.922724384551051e-09,
"loss": 0.1907663643360138,
"step": 1155
},
{
"epoch": 4.70061099796334,
"grad_norm": 2.3656814098358154,
"learning_rate": 9.642151274630028e-09,
"loss": 0.1763923540711403,
"step": 1156
},
{
"epoch": 4.704684317718941,
"grad_norm": 2.6838109493255615,
"learning_rate": 9.365563358025353e-09,
"loss": 0.1965761035680771,
"step": 1157
},
{
"epoch": 4.708757637474542,
"grad_norm": 2.723710775375366,
"learning_rate": 9.092962882598144e-09,
"loss": 0.20456959307193756,
"step": 1158
},
{
"epoch": 4.712830957230143,
"grad_norm": 2.6089608669281006,
"learning_rate": 8.824352063803053e-09,
"loss": 0.19157623499631882,
"step": 1159
},
{
"epoch": 4.716904276985743,
"grad_norm": 2.320207357406616,
"learning_rate": 8.559733084670396e-09,
"loss": 0.1886928454041481,
"step": 1160
},
{
"epoch": 4.720977596741344,
"grad_norm": 2.386166572570801,
"learning_rate": 8.29910809578832e-09,
"loss": 0.16740842908620834,
"step": 1161
},
{
"epoch": 4.725050916496945,
"grad_norm": 2.4986507892608643,
"learning_rate": 8.042479215285503e-09,
"loss": 0.18630076944828033,
"step": 1162
},
{
"epoch": 4.729124236252546,
"grad_norm": 2.5233304500579834,
"learning_rate": 7.789848528813537e-09,
"loss": 0.17323897778987885,
"step": 1163
},
{
"epoch": 4.733197556008147,
"grad_norm": 2.336219072341919,
"learning_rate": 7.541218089530454e-09,
"loss": 0.18500008434057236,
"step": 1164
},
{
"epoch": 4.737270875763747,
"grad_norm": 2.426069736480713,
"learning_rate": 7.296589918083684e-09,
"loss": 0.1783820241689682,
"step": 1165
},
{
"epoch": 4.741344195519348,
"grad_norm": 2.6765623092651367,
"learning_rate": 7.055966002594005e-09,
"loss": 0.207184799015522,
"step": 1166
},
{
"epoch": 4.745417515274949,
"grad_norm": 2.5222582817077637,
"learning_rate": 6.819348298638839e-09,
"loss": 0.2139280065894127,
"step": 1167
},
{
"epoch": 4.74949083503055,
"grad_norm": 2.328120470046997,
"learning_rate": 6.5867387292369295e-09,
"loss": 0.15476347506046295,
"step": 1168
},
{
"epoch": 4.753564154786151,
"grad_norm": 2.431166887283325,
"learning_rate": 6.358139184832412e-09,
"loss": 0.19098040461540222,
"step": 1169
},
{
"epoch": 4.757637474541752,
"grad_norm": 2.378601551055908,
"learning_rate": 6.1335515232793786e-09,
"loss": 0.1613384410738945,
"step": 1170
},
{
"epoch": 4.761710794297352,
"grad_norm": 2.562450647354126,
"learning_rate": 5.91297756982706e-09,
"loss": 0.18494479358196259,
"step": 1171
},
{
"epoch": 4.765784114052953,
"grad_norm": 2.483607292175293,
"learning_rate": 5.696419117104667e-09,
"loss": 0.18615225702524185,
"step": 1172
},
{
"epoch": 4.7698574338085535,
"grad_norm": 2.571909189224243,
"learning_rate": 5.483877925107127e-09,
"loss": 0.15820663422346115,
"step": 1173
},
{
"epoch": 4.773930753564155,
"grad_norm": 2.3289737701416016,
"learning_rate": 5.275355721180541e-09,
"loss": 0.17181526124477386,
"step": 1174
},
{
"epoch": 4.778004073319756,
"grad_norm": 2.635626792907715,
"learning_rate": 5.070854200008356e-09,
"loss": 0.18911509215831757,
"step": 1175
},
{
"epoch": 4.782077393075356,
"grad_norm": 2.4061386585235596,
"learning_rate": 4.870375023597384e-09,
"loss": 0.19500445574522018,
"step": 1176
},
{
"epoch": 4.786150712830957,
"grad_norm": 2.6663994789123535,
"learning_rate": 4.67391982126436e-09,
"loss": 0.18807929754257202,
"step": 1177
},
{
"epoch": 4.790224032586558,
"grad_norm": 2.541975259780884,
"learning_rate": 4.481490189622794e-09,
"loss": 0.2035481184720993,
"step": 1178
},
{
"epoch": 4.794297352342159,
"grad_norm": 2.6013271808624268,
"learning_rate": 4.293087692569974e-09,
"loss": 0.1858099028468132,
"step": 1179
},
{
"epoch": 4.79837067209776,
"grad_norm": 2.4530036449432373,
"learning_rate": 4.108713861274038e-09,
"loss": 0.1730939969420433,
"step": 1180
},
{
"epoch": 4.802443991853361,
"grad_norm": 2.538501501083374,
"learning_rate": 3.928370194161923e-09,
"loss": 0.19924624264240265,
"step": 1181
},
{
"epoch": 4.806517311608961,
"grad_norm": 2.5811445713043213,
"learning_rate": 3.7520581569068785e-09,
"loss": 0.170890673995018,
"step": 1182
},
{
"epoch": 4.810590631364562,
"grad_norm": 2.467304229736328,
"learning_rate": 3.579779182416587e-09,
"loss": 0.19038555771112442,
"step": 1183
},
{
"epoch": 4.814663951120163,
"grad_norm": 2.435349702835083,
"learning_rate": 3.4115346708216695e-09,
"loss": 0.19310712069272995,
"step": 1184
},
{
"epoch": 4.818737270875764,
"grad_norm": 2.652028799057007,
"learning_rate": 3.247325989464089e-09,
"loss": 0.18610060214996338,
"step": 1185
},
{
"epoch": 4.822810590631365,
"grad_norm": 2.5963809490203857,
"learning_rate": 3.0871544728863217e-09,
"loss": 0.18380168080329895,
"step": 1186
},
{
"epoch": 4.826883910386965,
"grad_norm": 2.5820107460021973,
"learning_rate": 2.9310214228202014e-09,
"loss": 0.1849624440073967,
"step": 1187
},
{
"epoch": 4.830957230142566,
"grad_norm": 2.3870606422424316,
"learning_rate": 2.778928108176648e-09,
"loss": 0.1531737670302391,
"step": 1188
},
{
"epoch": 4.835030549898167,
"grad_norm": 2.368928909301758,
"learning_rate": 2.630875765035068e-09,
"loss": 0.16189177334308624,
"step": 1189
},
{
"epoch": 4.839103869653767,
"grad_norm": 2.522529363632202,
"learning_rate": 2.4868655966334696e-09,
"loss": 0.18868660926818848,
"step": 1190
},
{
"epoch": 4.843177189409369,
"grad_norm": 2.583470582962036,
"learning_rate": 2.346898773358752e-09,
"loss": 0.21254148334264755,
"step": 1191
},
{
"epoch": 4.84725050916497,
"grad_norm": 2.4336440563201904,
"learning_rate": 2.2109764327368774e-09,
"loss": 0.17807240039110184,
"step": 1192
},
{
"epoch": 4.85132382892057,
"grad_norm": 2.539942741394043,
"learning_rate": 2.079099679424101e-09,
"loss": 0.167090006172657,
"step": 1193
},
{
"epoch": 4.855397148676171,
"grad_norm": 2.442253351211548,
"learning_rate": 1.9512695851975346e-09,
"loss": 0.16879206895828247,
"step": 1194
},
{
"epoch": 4.8594704684317716,
"grad_norm": 2.496445894241333,
"learning_rate": 1.827487188946708e-09,
"loss": 0.1911175176501274,
"step": 1195
},
{
"epoch": 4.863543788187373,
"grad_norm": 2.352762222290039,
"learning_rate": 1.7077534966650765e-09,
"loss": 0.1814107596874237,
"step": 1196
},
{
"epoch": 4.867617107942974,
"grad_norm": 2.6556501388549805,
"learning_rate": 1.59206948144186e-09,
"loss": 0.16374102234840393,
"step": 1197
},
{
"epoch": 4.871690427698574,
"grad_norm": 2.3610963821411133,
"learning_rate": 1.4804360834539398e-09,
"loss": 0.1801617071032524,
"step": 1198
},
{
"epoch": 4.875763747454175,
"grad_norm": 2.3272058963775635,
"learning_rate": 1.372854209958585e-09,
"loss": 0.15738121420145035,
"step": 1199
},
{
"epoch": 4.879837067209776,
"grad_norm": 2.4722721576690674,
"learning_rate": 1.2693247352856817e-09,
"loss": 0.17832408845424652,
"step": 1200
},
{
"epoch": 4.883910386965377,
"grad_norm": 2.7059404850006104,
"learning_rate": 1.169848500830961e-09,
"loss": 0.19288718700408936,
"step": 1201
},
{
"epoch": 4.887983706720978,
"grad_norm": 2.5134401321411133,
"learning_rate": 1.0744263150489486e-09,
"loss": 0.16510120779275894,
"step": 1202
},
{
"epoch": 4.892057026476579,
"grad_norm": 2.3228554725646973,
"learning_rate": 9.830589534464139e-10,
"loss": 0.1725562885403633,
"step": 1203
},
{
"epoch": 4.896130346232179,
"grad_norm": 2.576732635498047,
"learning_rate": 8.957471585762655e-10,
"loss": 0.19249098747968674,
"step": 1204
},
{
"epoch": 4.90020366598778,
"grad_norm": 3.2054977416992188,
"learning_rate": 8.124916400311654e-10,
"loss": 0.19399181008338928,
"step": 1205
},
{
"epoch": 4.904276985743381,
"grad_norm": 2.5403683185577393,
"learning_rate": 7.332930744380905e-10,
"loss": 0.21494056284427643,
"step": 1206
},
{
"epoch": 4.908350305498981,
"grad_norm": 2.5669965744018555,
"learning_rate": 6.581521054526695e-10,
"loss": 0.213642418384552,
"step": 1207
},
{
"epoch": 4.912423625254583,
"grad_norm": 2.3600635528564453,
"learning_rate": 5.870693437540764e-10,
"loss": 0.17898286134004593,
"step": 1208
},
{
"epoch": 4.916496945010183,
"grad_norm": 2.6826629638671875,
"learning_rate": 5.20045367039812e-10,
"loss": 0.18938665091991425,
"step": 1209
},
{
"epoch": 4.920570264765784,
"grad_norm": 2.3554890155792236,
"learning_rate": 4.570807200212634e-10,
"loss": 0.18093843758106232,
"step": 1210
},
{
"epoch": 4.924643584521385,
"grad_norm": 2.351396083831787,
"learning_rate": 3.9817591441926313e-10,
"loss": 0.17892272025346756,
"step": 1211
},
{
"epoch": 4.928716904276985,
"grad_norm": 2.481386423110962,
"learning_rate": 3.4333142895975886e-10,
"loss": 0.18873242288827896,
"step": 1212
},
{
"epoch": 4.932790224032587,
"grad_norm": 2.3141331672668457,
"learning_rate": 2.9254770936998354e-10,
"loss": 0.15687178820371628,
"step": 1213
},
{
"epoch": 4.936863543788188,
"grad_norm": 2.5041322708129883,
"learning_rate": 2.458251683750134e-10,
"loss": 0.19991402328014374,
"step": 1214
},
{
"epoch": 4.940936863543788,
"grad_norm": 2.6752336025238037,
"learning_rate": 2.0316418569416015e-10,
"loss": 0.2112450748682022,
"step": 1215
},
{
"epoch": 4.945010183299389,
"grad_norm": 2.4857139587402344,
"learning_rate": 1.6456510803797286e-10,
"loss": 0.16738834232091904,
"step": 1216
},
{
"epoch": 4.94908350305499,
"grad_norm": 2.5286214351654053,
"learning_rate": 1.3002824910551824e-10,
"loss": 0.19572018086910248,
"step": 1217
},
{
"epoch": 4.953156822810591,
"grad_norm": 2.585662841796875,
"learning_rate": 9.955388958177158e-11,
"loss": 0.21787291765213013,
"step": 1218
},
{
"epoch": 4.957230142566192,
"grad_norm": 2.3523364067077637,
"learning_rate": 7.314227713522969e-11,
"loss": 0.18198108673095703,
"step": 1219
},
{
"epoch": 4.961303462321792,
"grad_norm": 2.401686191558838,
"learning_rate": 5.079362641602358e-11,
"loss": 0.1640736535191536,
"step": 1220
},
{
"epoch": 4.965376782077393,
"grad_norm": 2.5535383224487305,
"learning_rate": 3.250811905419759e-11,
"loss": 0.20682203024625778,
"step": 1221
},
{
"epoch": 4.969450101832994,
"grad_norm": 2.499046564102173,
"learning_rate": 1.8285903658099566e-11,
"loss": 0.16233273595571518,
"step": 1222
},
{
"epoch": 4.973523421588594,
"grad_norm": 2.4775757789611816,
"learning_rate": 8.127095813215135e-12,
"loss": 0.16816455125808716,
"step": 1223
},
{
"epoch": 4.977596741344195,
"grad_norm": 2.65974760055542,
"learning_rate": 2.0317780814460563e-12,
"loss": 0.20536164939403534,
"step": 1224
},
{
"epoch": 4.981670061099797,
"grad_norm": 2.462040901184082,
"learning_rate": 0.0,
"loss": 0.19432584941387177,
"step": 1225
}
],
"logging_steps": 1.0,
"max_steps": 1225,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 100.0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}