nttx's picture
Training in progress, step 1200, checkpoint
efedcef verified
{
"best_metric": 11.5,
"best_model_checkpoint": "miner_id_24/checkpoint-300",
"epoch": 0.16387286197125397,
"eval_steps": 300,
"global_step": 1200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0001365607183093783,
"grad_norm": 9.676669833424967e-06,
"learning_rate": 2e-05,
"loss": 46.0,
"step": 1
},
{
"epoch": 0.0001365607183093783,
"eval_loss": 11.5,
"eval_runtime": 20.0819,
"eval_samples_per_second": 153.571,
"eval_steps_per_second": 76.786,
"step": 1
},
{
"epoch": 0.0002731214366187566,
"grad_norm": 8.113798685371876e-06,
"learning_rate": 4e-05,
"loss": 46.0,
"step": 2
},
{
"epoch": 0.0004096821549281349,
"grad_norm": 4.353820713731693e-06,
"learning_rate": 6e-05,
"loss": 46.0,
"step": 3
},
{
"epoch": 0.0005462428732375132,
"grad_norm": 5.62772765988484e-06,
"learning_rate": 8e-05,
"loss": 46.0,
"step": 4
},
{
"epoch": 0.0006828035915468915,
"grad_norm": 6.43259363641846e-06,
"learning_rate": 0.0001,
"loss": 46.0,
"step": 5
},
{
"epoch": 0.0008193643098562698,
"grad_norm": 9.933234650816303e-06,
"learning_rate": 0.00012,
"loss": 46.0,
"step": 6
},
{
"epoch": 0.0009559250281656481,
"grad_norm": 6.729349024681142e-06,
"learning_rate": 0.00014,
"loss": 46.0,
"step": 7
},
{
"epoch": 0.0010924857464750264,
"grad_norm": 7.317645668081241e-06,
"learning_rate": 0.00016,
"loss": 46.0,
"step": 8
},
{
"epoch": 0.0012290464647844047,
"grad_norm": 7.1731265052221715e-06,
"learning_rate": 0.00018,
"loss": 46.0,
"step": 9
},
{
"epoch": 0.001365607183093783,
"grad_norm": 7.621179065608885e-06,
"learning_rate": 0.0002,
"loss": 46.0,
"step": 10
},
{
"epoch": 0.0015021679014031613,
"grad_norm": 1.141078882938018e-05,
"learning_rate": 0.00019999994480149276,
"loss": 46.0,
"step": 11
},
{
"epoch": 0.0016387286197125396,
"grad_norm": 9.140413567365613e-06,
"learning_rate": 0.00019999977920603197,
"loss": 46.0,
"step": 12
},
{
"epoch": 0.001775289338021918,
"grad_norm": 1.240484562003985e-05,
"learning_rate": 0.0001999995032138004,
"loss": 46.0,
"step": 13
},
{
"epoch": 0.0019118500563312963,
"grad_norm": 1.1790569260483608e-05,
"learning_rate": 0.00019999911682510278,
"loss": 46.0,
"step": 14
},
{
"epoch": 0.0020484107746406746,
"grad_norm": 1.051307117450051e-05,
"learning_rate": 0.00019999862004036568,
"loss": 46.0,
"step": 15
},
{
"epoch": 0.0021849714929500527,
"grad_norm": 9.398099791724235e-06,
"learning_rate": 0.0001999980128601375,
"loss": 46.0,
"step": 16
},
{
"epoch": 0.0023215322112594312,
"grad_norm": 7.582193120470038e-06,
"learning_rate": 0.00019999729528508855,
"loss": 46.0,
"step": 17
},
{
"epoch": 0.0024580929295688093,
"grad_norm": 1.940154834301211e-05,
"learning_rate": 0.00019999646731601103,
"loss": 46.0,
"step": 18
},
{
"epoch": 0.002594653647878188,
"grad_norm": 7.919963536551222e-06,
"learning_rate": 0.00019999552895381902,
"loss": 46.0,
"step": 19
},
{
"epoch": 0.002731214366187566,
"grad_norm": 1.3096555449010339e-05,
"learning_rate": 0.0001999944801995484,
"loss": 46.0,
"step": 20
},
{
"epoch": 0.0028677750844969445,
"grad_norm": 1.2133096788602415e-05,
"learning_rate": 0.00019999332105435696,
"loss": 46.0,
"step": 21
},
{
"epoch": 0.0030043358028063226,
"grad_norm": 3.7125832022866234e-05,
"learning_rate": 0.00019999205151952437,
"loss": 46.0,
"step": 22
},
{
"epoch": 0.003140896521115701,
"grad_norm": 1.1414869732107036e-05,
"learning_rate": 0.0001999906715964522,
"loss": 46.0,
"step": 23
},
{
"epoch": 0.0032774572394250793,
"grad_norm": 1.2319793313508853e-05,
"learning_rate": 0.0001999891812866638,
"loss": 46.0,
"step": 24
},
{
"epoch": 0.003414017957734458,
"grad_norm": 1.1083939170930535e-05,
"learning_rate": 0.00019998758059180447,
"loss": 46.0,
"step": 25
},
{
"epoch": 0.003550578676043836,
"grad_norm": 1.0406022738607135e-05,
"learning_rate": 0.00019998586951364125,
"loss": 46.0,
"step": 26
},
{
"epoch": 0.0036871393943532144,
"grad_norm": 8.916207661968656e-06,
"learning_rate": 0.0001999840480540632,
"loss": 46.0,
"step": 27
},
{
"epoch": 0.0038237001126625926,
"grad_norm": 8.375522156711668e-06,
"learning_rate": 0.0001999821162150811,
"loss": 46.0,
"step": 28
},
{
"epoch": 0.003960260830971971,
"grad_norm": 5.647367288474925e-06,
"learning_rate": 0.00019998007399882765,
"loss": 46.0,
"step": 29
},
{
"epoch": 0.004096821549281349,
"grad_norm": 2.2721666027791798e-05,
"learning_rate": 0.00019997792140755746,
"loss": 46.0,
"step": 30
},
{
"epoch": 0.004233382267590727,
"grad_norm": 1.8854540030588396e-05,
"learning_rate": 0.00019997565844364688,
"loss": 46.0,
"step": 31
},
{
"epoch": 0.004369942985900105,
"grad_norm": 8.220870768127497e-06,
"learning_rate": 0.00019997328510959413,
"loss": 46.0,
"step": 32
},
{
"epoch": 0.004506503704209484,
"grad_norm": 7.226680736494018e-06,
"learning_rate": 0.00019997080140801932,
"loss": 46.0,
"step": 33
},
{
"epoch": 0.0046430644225188625,
"grad_norm": 4.141399131185608e-06,
"learning_rate": 0.0001999682073416644,
"loss": 46.0,
"step": 34
},
{
"epoch": 0.004779625140828241,
"grad_norm": 1.651368074817583e-05,
"learning_rate": 0.00019996550291339311,
"loss": 46.0,
"step": 35
},
{
"epoch": 0.004916185859137619,
"grad_norm": 2.9052245736238547e-05,
"learning_rate": 0.00019996268812619107,
"loss": 46.0,
"step": 36
},
{
"epoch": 0.005052746577446998,
"grad_norm": 9.355511792819016e-06,
"learning_rate": 0.00019995976298316576,
"loss": 46.0,
"step": 37
},
{
"epoch": 0.005189307295756376,
"grad_norm": 1.852245804911945e-05,
"learning_rate": 0.00019995672748754638,
"loss": 46.0,
"step": 38
},
{
"epoch": 0.005325868014065754,
"grad_norm": 2.2482845452032052e-05,
"learning_rate": 0.0001999535816426841,
"loss": 46.0,
"step": 39
},
{
"epoch": 0.005462428732375132,
"grad_norm": 4.135522249271162e-05,
"learning_rate": 0.0001999503254520518,
"loss": 46.0,
"step": 40
},
{
"epoch": 0.005598989450684511,
"grad_norm": 4.387225635582581e-05,
"learning_rate": 0.0001999469589192442,
"loss": 46.0,
"step": 41
},
{
"epoch": 0.005735550168993889,
"grad_norm": 1.1143504707433749e-05,
"learning_rate": 0.00019994348204797788,
"loss": 46.0,
"step": 42
},
{
"epoch": 0.005872110887303267,
"grad_norm": 1.7061322068911977e-05,
"learning_rate": 0.00019993989484209118,
"loss": 46.0,
"step": 43
},
{
"epoch": 0.006008671605612645,
"grad_norm": 3.7752193748019636e-05,
"learning_rate": 0.0001999361973055443,
"loss": 46.0,
"step": 44
},
{
"epoch": 0.006145232323922024,
"grad_norm": 2.867165494535584e-05,
"learning_rate": 0.0001999323894424192,
"loss": 46.0,
"step": 45
},
{
"epoch": 0.006281793042231402,
"grad_norm": 1.418732153979363e-05,
"learning_rate": 0.0001999284712569196,
"loss": 46.0,
"step": 46
},
{
"epoch": 0.00641835376054078,
"grad_norm": 5.592720117419958e-05,
"learning_rate": 0.00019992444275337114,
"loss": 46.0,
"step": 47
},
{
"epoch": 0.0065549144788501585,
"grad_norm": 7.95181404100731e-05,
"learning_rate": 0.0001999203039362211,
"loss": 46.0,
"step": 48
},
{
"epoch": 0.006691475197159537,
"grad_norm": 2.4339618903468363e-05,
"learning_rate": 0.00019991605481003866,
"loss": 46.0,
"step": 49
},
{
"epoch": 0.006828035915468916,
"grad_norm": 3.441837543505244e-05,
"learning_rate": 0.00019991169537951468,
"loss": 46.0,
"step": 50
},
{
"epoch": 0.006964596633778294,
"grad_norm": 1.7208472854690626e-05,
"learning_rate": 0.0001999072256494619,
"loss": 46.0,
"step": 51
},
{
"epoch": 0.007101157352087672,
"grad_norm": 1.2097309991077054e-05,
"learning_rate": 0.00019990264562481472,
"loss": 46.0,
"step": 52
},
{
"epoch": 0.00723771807039705,
"grad_norm": 1.3584556654677726e-05,
"learning_rate": 0.00019989795531062936,
"loss": 46.0,
"step": 53
},
{
"epoch": 0.007374278788706429,
"grad_norm": 2.912199306592811e-05,
"learning_rate": 0.00019989315471208378,
"loss": 46.0,
"step": 54
},
{
"epoch": 0.007510839507015807,
"grad_norm": 2.0452795070013963e-05,
"learning_rate": 0.00019988824383447776,
"loss": 46.0,
"step": 55
},
{
"epoch": 0.007647400225325185,
"grad_norm": 1.590143256180454e-05,
"learning_rate": 0.00019988322268323268,
"loss": 46.0,
"step": 56
},
{
"epoch": 0.007783960943634563,
"grad_norm": 1.094182789529441e-05,
"learning_rate": 0.00019987809126389177,
"loss": 46.0,
"step": 57
},
{
"epoch": 0.007920521661943942,
"grad_norm": 1.5657904441468418e-05,
"learning_rate": 0.00019987284958211996,
"loss": 46.0,
"step": 58
},
{
"epoch": 0.00805708238025332,
"grad_norm": 1.1172323866048828e-05,
"learning_rate": 0.00019986749764370392,
"loss": 46.0,
"step": 59
},
{
"epoch": 0.008193643098562698,
"grad_norm": 1.1931785593333188e-05,
"learning_rate": 0.00019986203545455203,
"loss": 46.0,
"step": 60
},
{
"epoch": 0.008330203816872076,
"grad_norm": 2.3814011001377366e-05,
"learning_rate": 0.0001998564630206944,
"loss": 46.0,
"step": 61
},
{
"epoch": 0.008466764535181455,
"grad_norm": 1.1059896678489167e-05,
"learning_rate": 0.0001998507803482828,
"loss": 46.0,
"step": 62
},
{
"epoch": 0.008603325253490833,
"grad_norm": 1.2055512343067676e-05,
"learning_rate": 0.00019984498744359075,
"loss": 46.0,
"step": 63
},
{
"epoch": 0.00873988597180021,
"grad_norm": 3.0810657335678115e-05,
"learning_rate": 0.00019983908431301343,
"loss": 46.0,
"step": 64
},
{
"epoch": 0.00887644669010959,
"grad_norm": 2.3847031116019934e-05,
"learning_rate": 0.0001998330709630677,
"loss": 46.0,
"step": 65
},
{
"epoch": 0.009013007408418969,
"grad_norm": 2.5676057703094557e-05,
"learning_rate": 0.0001998269474003922,
"loss": 46.0,
"step": 66
},
{
"epoch": 0.009149568126728347,
"grad_norm": 1.9099008568446152e-05,
"learning_rate": 0.0001998207136317471,
"loss": 46.0,
"step": 67
},
{
"epoch": 0.009286128845037725,
"grad_norm": 1.620809234736953e-05,
"learning_rate": 0.00019981436966401425,
"loss": 46.0,
"step": 68
},
{
"epoch": 0.009422689563347103,
"grad_norm": 2.771842628135346e-05,
"learning_rate": 0.00019980791550419728,
"loss": 46.0,
"step": 69
},
{
"epoch": 0.009559250281656481,
"grad_norm": 1.6155812772922218e-05,
"learning_rate": 0.00019980135115942136,
"loss": 46.0,
"step": 70
},
{
"epoch": 0.00969581099996586,
"grad_norm": 1.4296605513663962e-05,
"learning_rate": 0.00019979467663693332,
"loss": 46.0,
"step": 71
},
{
"epoch": 0.009832371718275237,
"grad_norm": 2.5244138669222593e-05,
"learning_rate": 0.00019978789194410168,
"loss": 46.0,
"step": 72
},
{
"epoch": 0.009968932436584617,
"grad_norm": 2.8768767151632346e-05,
"learning_rate": 0.00019978099708841646,
"loss": 46.0,
"step": 73
},
{
"epoch": 0.010105493154893995,
"grad_norm": 2.3254493498825468e-05,
"learning_rate": 0.00019977399207748943,
"loss": 46.0,
"step": 74
},
{
"epoch": 0.010242053873203373,
"grad_norm": 2.780866634566337e-05,
"learning_rate": 0.00019976687691905393,
"loss": 46.0,
"step": 75
},
{
"epoch": 0.010378614591512752,
"grad_norm": 9.848828085523564e-06,
"learning_rate": 0.00019975965162096485,
"loss": 46.0,
"step": 76
},
{
"epoch": 0.01051517530982213,
"grad_norm": 1.7771888451534323e-05,
"learning_rate": 0.00019975231619119867,
"loss": 46.0,
"step": 77
},
{
"epoch": 0.010651736028131508,
"grad_norm": 2.449357634759508e-05,
"learning_rate": 0.00019974487063785355,
"loss": 46.0,
"step": 78
},
{
"epoch": 0.010788296746440886,
"grad_norm": 2.3659646103624254e-05,
"learning_rate": 0.00019973731496914914,
"loss": 46.0,
"step": 79
},
{
"epoch": 0.010924857464750264,
"grad_norm": 4.919664206681773e-05,
"learning_rate": 0.00019972964919342663,
"loss": 46.0,
"step": 80
},
{
"epoch": 0.011061418183059642,
"grad_norm": 3.88891676266212e-05,
"learning_rate": 0.00019972187331914886,
"loss": 46.0,
"step": 81
},
{
"epoch": 0.011197978901369022,
"grad_norm": 5.824596883030608e-05,
"learning_rate": 0.00019971398735490014,
"loss": 46.0,
"step": 82
},
{
"epoch": 0.0113345396196784,
"grad_norm": 2.443828088871669e-05,
"learning_rate": 0.00019970599130938633,
"loss": 46.0,
"step": 83
},
{
"epoch": 0.011471100337987778,
"grad_norm": 1.806908221624326e-05,
"learning_rate": 0.0001996978851914349,
"loss": 46.0,
"step": 84
},
{
"epoch": 0.011607661056297156,
"grad_norm": 1.9875111320288852e-05,
"learning_rate": 0.00019968966900999464,
"loss": 46.0,
"step": 85
},
{
"epoch": 0.011744221774606534,
"grad_norm": 2.811396734614391e-05,
"learning_rate": 0.00019968134277413606,
"loss": 46.0,
"step": 86
},
{
"epoch": 0.011880782492915912,
"grad_norm": 2.5689738322398625e-05,
"learning_rate": 0.00019967290649305103,
"loss": 46.0,
"step": 87
},
{
"epoch": 0.01201734321122529,
"grad_norm": 4.2773946915986016e-05,
"learning_rate": 0.00019966436017605297,
"loss": 46.0,
"step": 88
},
{
"epoch": 0.012153903929534669,
"grad_norm": 0.00021493734675459564,
"learning_rate": 0.00019965570383257677,
"loss": 46.0,
"step": 89
},
{
"epoch": 0.012290464647844048,
"grad_norm": 0.00024037304683588445,
"learning_rate": 0.00019964693747217874,
"loss": 46.0,
"step": 90
},
{
"epoch": 0.012427025366153427,
"grad_norm": 4.650273695006035e-05,
"learning_rate": 0.00019963806110453672,
"loss": 46.0,
"step": 91
},
{
"epoch": 0.012563586084462805,
"grad_norm": 2.4313370886375196e-05,
"learning_rate": 0.00019962907473944995,
"loss": 46.0,
"step": 92
},
{
"epoch": 0.012700146802772183,
"grad_norm": 1.7518057575216517e-05,
"learning_rate": 0.00019961997838683905,
"loss": 46.0,
"step": 93
},
{
"epoch": 0.01283670752108156,
"grad_norm": 3.366146847838536e-05,
"learning_rate": 0.00019961077205674622,
"loss": 46.0,
"step": 94
},
{
"epoch": 0.012973268239390939,
"grad_norm": 4.310454460210167e-05,
"learning_rate": 0.00019960145575933486,
"loss": 46.0,
"step": 95
},
{
"epoch": 0.013109828957700317,
"grad_norm": 4.665861342800781e-05,
"learning_rate": 0.00019959202950489,
"loss": 46.0,
"step": 96
},
{
"epoch": 0.013246389676009695,
"grad_norm": 2.579794090706855e-05,
"learning_rate": 0.00019958249330381787,
"loss": 46.0,
"step": 97
},
{
"epoch": 0.013382950394319073,
"grad_norm": 0.00010568476136540994,
"learning_rate": 0.00019957284716664618,
"loss": 46.0,
"step": 98
},
{
"epoch": 0.013519511112628453,
"grad_norm": 3.9795751945348457e-05,
"learning_rate": 0.00019956309110402397,
"loss": 46.0,
"step": 99
},
{
"epoch": 0.013656071830937831,
"grad_norm": 2.9642118533956818e-05,
"learning_rate": 0.00019955322512672162,
"loss": 46.0,
"step": 100
},
{
"epoch": 0.01379263254924721,
"grad_norm": 1.9580385924200527e-05,
"learning_rate": 0.00019954324924563089,
"loss": 46.0,
"step": 101
},
{
"epoch": 0.013929193267556587,
"grad_norm": 3.410055433050729e-05,
"learning_rate": 0.00019953316347176488,
"loss": 46.0,
"step": 102
},
{
"epoch": 0.014065753985865966,
"grad_norm": 5.128211705596186e-05,
"learning_rate": 0.00019952296781625795,
"loss": 46.0,
"step": 103
},
{
"epoch": 0.014202314704175344,
"grad_norm": 3.538643795764074e-05,
"learning_rate": 0.0001995126622903658,
"loss": 46.0,
"step": 104
},
{
"epoch": 0.014338875422484722,
"grad_norm": 2.4323409888893366e-05,
"learning_rate": 0.00019950224690546545,
"loss": 46.0,
"step": 105
},
{
"epoch": 0.0144754361407941,
"grad_norm": 3.726944123627618e-05,
"learning_rate": 0.00019949172167305516,
"loss": 46.0,
"step": 106
},
{
"epoch": 0.01461199685910348,
"grad_norm": 3.0351222449098714e-05,
"learning_rate": 0.00019948108660475445,
"loss": 46.0,
"step": 107
},
{
"epoch": 0.014748557577412858,
"grad_norm": 3.8205947930691764e-05,
"learning_rate": 0.0001994703417123042,
"loss": 46.0,
"step": 108
},
{
"epoch": 0.014885118295722236,
"grad_norm": 3.468756040092558e-05,
"learning_rate": 0.00019945948700756633,
"loss": 46.0,
"step": 109
},
{
"epoch": 0.015021679014031614,
"grad_norm": 2.4972347091534175e-05,
"learning_rate": 0.00019944852250252418,
"loss": 46.0,
"step": 110
},
{
"epoch": 0.015158239732340992,
"grad_norm": 2.8265107175684534e-05,
"learning_rate": 0.00019943744820928222,
"loss": 46.0,
"step": 111
},
{
"epoch": 0.01529480045065037,
"grad_norm": 2.0429773940122686e-05,
"learning_rate": 0.00019942626414006615,
"loss": 46.0,
"step": 112
},
{
"epoch": 0.015431361168959748,
"grad_norm": 3.450764052104205e-05,
"learning_rate": 0.00019941497030722286,
"loss": 46.0,
"step": 113
},
{
"epoch": 0.015567921887269126,
"grad_norm": 4.498309499467723e-05,
"learning_rate": 0.00019940356672322037,
"loss": 46.0,
"step": 114
},
{
"epoch": 0.015704482605578506,
"grad_norm": 2.8786233087885194e-05,
"learning_rate": 0.00019939205340064792,
"loss": 46.0,
"step": 115
},
{
"epoch": 0.015841043323887884,
"grad_norm": 3.189581184415147e-05,
"learning_rate": 0.00019938043035221586,
"loss": 46.0,
"step": 116
},
{
"epoch": 0.015977604042197262,
"grad_norm": 4.335124685894698e-05,
"learning_rate": 0.0001993686975907557,
"loss": 46.0,
"step": 117
},
{
"epoch": 0.01611416476050664,
"grad_norm": 4.708675987785682e-05,
"learning_rate": 0.00019935685512922007,
"loss": 46.0,
"step": 118
},
{
"epoch": 0.01625072547881602,
"grad_norm": 3.432563244132325e-05,
"learning_rate": 0.00019934490298068264,
"loss": 46.0,
"step": 119
},
{
"epoch": 0.016387286197125397,
"grad_norm": 4.133255788474344e-05,
"learning_rate": 0.0001993328411583383,
"loss": 46.0,
"step": 120
},
{
"epoch": 0.016523846915434775,
"grad_norm": 2.959890480269678e-05,
"learning_rate": 0.00019932066967550289,
"loss": 46.0,
"step": 121
},
{
"epoch": 0.016660407633744153,
"grad_norm": 5.350433275452815e-05,
"learning_rate": 0.0001993083885456134,
"loss": 46.0,
"step": 122
},
{
"epoch": 0.01679696835205353,
"grad_norm": 2.8684948119916953e-05,
"learning_rate": 0.0001992959977822278,
"loss": 46.0,
"step": 123
},
{
"epoch": 0.01693352907036291,
"grad_norm": 4.703548984252848e-05,
"learning_rate": 0.0001992834973990251,
"loss": 46.0,
"step": 124
},
{
"epoch": 0.017070089788672287,
"grad_norm": 2.6726818759925663e-05,
"learning_rate": 0.0001992708874098054,
"loss": 46.0,
"step": 125
},
{
"epoch": 0.017206650506981665,
"grad_norm": 3.8927741115912795e-05,
"learning_rate": 0.00019925816782848975,
"loss": 46.0,
"step": 126
},
{
"epoch": 0.017343211225291044,
"grad_norm": 8.123937004711479e-05,
"learning_rate": 0.00019924533866912017,
"loss": 46.0,
"step": 127
},
{
"epoch": 0.01747977194360042,
"grad_norm": 4.269002965884283e-05,
"learning_rate": 0.00019923239994585967,
"loss": 46.0,
"step": 128
},
{
"epoch": 0.017616332661909803,
"grad_norm": 6.259434303501621e-05,
"learning_rate": 0.0001992193516729922,
"loss": 46.0,
"step": 129
},
{
"epoch": 0.01775289338021918,
"grad_norm": 5.1137911214027554e-05,
"learning_rate": 0.0001992061938649227,
"loss": 46.0,
"step": 130
},
{
"epoch": 0.01788945409852856,
"grad_norm": 8.54436366353184e-05,
"learning_rate": 0.00019919292653617694,
"loss": 46.0,
"step": 131
},
{
"epoch": 0.018026014816837937,
"grad_norm": 3.590865890146233e-05,
"learning_rate": 0.00019917954970140173,
"loss": 46.0,
"step": 132
},
{
"epoch": 0.018162575535147316,
"grad_norm": 3.2492869650013745e-05,
"learning_rate": 0.00019916606337536466,
"loss": 46.0,
"step": 133
},
{
"epoch": 0.018299136253456694,
"grad_norm": 8.628293289802969e-05,
"learning_rate": 0.00019915246757295417,
"loss": 46.0,
"step": 134
},
{
"epoch": 0.018435696971766072,
"grad_norm": 7.722469308646396e-05,
"learning_rate": 0.00019913876230917975,
"loss": 46.0,
"step": 135
},
{
"epoch": 0.01857225769007545,
"grad_norm": 4.240256384946406e-05,
"learning_rate": 0.00019912494759917148,
"loss": 46.0,
"step": 136
},
{
"epoch": 0.018708818408384828,
"grad_norm": 4.052065560244955e-05,
"learning_rate": 0.00019911102345818046,
"loss": 46.0,
"step": 137
},
{
"epoch": 0.018845379126694206,
"grad_norm": 7.86216405685991e-05,
"learning_rate": 0.00019909698990157852,
"loss": 46.0,
"step": 138
},
{
"epoch": 0.018981939845003584,
"grad_norm": 0.00010640006075846031,
"learning_rate": 0.00019908284694485827,
"loss": 46.0,
"step": 139
},
{
"epoch": 0.019118500563312962,
"grad_norm": 0.00015501640154980123,
"learning_rate": 0.00019906859460363307,
"loss": 46.0,
"step": 140
},
{
"epoch": 0.01925506128162234,
"grad_norm": 7.59594258852303e-05,
"learning_rate": 0.00019905423289363715,
"loss": 46.0,
"step": 141
},
{
"epoch": 0.01939162199993172,
"grad_norm": 5.662858529831283e-05,
"learning_rate": 0.0001990397618307254,
"loss": 46.0,
"step": 142
},
{
"epoch": 0.019528182718241097,
"grad_norm": 3.2056228519650176e-05,
"learning_rate": 0.00019902518143087342,
"loss": 46.0,
"step": 143
},
{
"epoch": 0.019664743436550475,
"grad_norm": 6.27958943368867e-05,
"learning_rate": 0.00019901049171017752,
"loss": 46.0,
"step": 144
},
{
"epoch": 0.019801304154859853,
"grad_norm": 8.007367432583123e-05,
"learning_rate": 0.00019899569268485472,
"loss": 46.0,
"step": 145
},
{
"epoch": 0.019937864873169234,
"grad_norm": 6.982972263358533e-05,
"learning_rate": 0.00019898078437124276,
"loss": 46.0,
"step": 146
},
{
"epoch": 0.020074425591478613,
"grad_norm": 4.9886282795341685e-05,
"learning_rate": 0.0001989657667857999,
"loss": 46.0,
"step": 147
},
{
"epoch": 0.02021098630978799,
"grad_norm": 9.474890248384327e-05,
"learning_rate": 0.0001989506399451051,
"loss": 46.0,
"step": 148
},
{
"epoch": 0.02034754702809737,
"grad_norm": 0.00012795208021998405,
"learning_rate": 0.00019893540386585804,
"loss": 46.0,
"step": 149
},
{
"epoch": 0.020484107746406747,
"grad_norm": 8.636638813186437e-05,
"learning_rate": 0.00019892005856487878,
"loss": 46.0,
"step": 150
},
{
"epoch": 0.020620668464716125,
"grad_norm": 5.324094672687352e-05,
"learning_rate": 0.00019890460405910815,
"loss": 46.0,
"step": 151
},
{
"epoch": 0.020757229183025503,
"grad_norm": 5.794732351205312e-05,
"learning_rate": 0.00019888904036560745,
"loss": 46.0,
"step": 152
},
{
"epoch": 0.02089378990133488,
"grad_norm": 6.407459295587614e-05,
"learning_rate": 0.0001988733675015585,
"loss": 46.0,
"step": 153
},
{
"epoch": 0.02103035061964426,
"grad_norm": 5.669236270477995e-05,
"learning_rate": 0.00019885758548426367,
"loss": 46.0,
"step": 154
},
{
"epoch": 0.021166911337953637,
"grad_norm": 8.20392815512605e-05,
"learning_rate": 0.0001988416943311459,
"loss": 46.0,
"step": 155
},
{
"epoch": 0.021303472056263015,
"grad_norm": 4.120526136830449e-05,
"learning_rate": 0.00019882569405974852,
"loss": 46.0,
"step": 156
},
{
"epoch": 0.021440032774572394,
"grad_norm": 6.0489343013614416e-05,
"learning_rate": 0.0001988095846877353,
"loss": 46.0,
"step": 157
},
{
"epoch": 0.02157659349288177,
"grad_norm": 5.2780691476073116e-05,
"learning_rate": 0.00019879336623289056,
"loss": 46.0,
"step": 158
},
{
"epoch": 0.02171315421119115,
"grad_norm": 7.062828808557242e-05,
"learning_rate": 0.00019877703871311903,
"loss": 46.0,
"step": 159
},
{
"epoch": 0.021849714929500528,
"grad_norm": 8.607962081441656e-05,
"learning_rate": 0.00019876060214644566,
"loss": 46.0,
"step": 160
},
{
"epoch": 0.021986275647809906,
"grad_norm": 5.568426786339842e-05,
"learning_rate": 0.0001987440565510161,
"loss": 46.0,
"step": 161
},
{
"epoch": 0.022122836366119284,
"grad_norm": 2.0496960132732056e-05,
"learning_rate": 0.00019872740194509607,
"loss": 46.0,
"step": 162
},
{
"epoch": 0.022259397084428666,
"grad_norm": 0.0001443786604795605,
"learning_rate": 0.0001987106383470718,
"loss": 46.0,
"step": 163
},
{
"epoch": 0.022395957802738044,
"grad_norm": 0.00022565714607480913,
"learning_rate": 0.00019869376577544984,
"loss": 46.0,
"step": 164
},
{
"epoch": 0.022532518521047422,
"grad_norm": 4.196175359538756e-05,
"learning_rate": 0.00019867678424885692,
"loss": 46.0,
"step": 165
},
{
"epoch": 0.0226690792393568,
"grad_norm": 5.022220284445211e-05,
"learning_rate": 0.0001986596937860402,
"loss": 46.0,
"step": 166
},
{
"epoch": 0.022805639957666178,
"grad_norm": 6.462670717155561e-05,
"learning_rate": 0.00019864249440586704,
"loss": 46.0,
"step": 167
},
{
"epoch": 0.022942200675975556,
"grad_norm": 5.396630149334669e-05,
"learning_rate": 0.00019862518612732502,
"loss": 46.0,
"step": 168
},
{
"epoch": 0.023078761394284934,
"grad_norm": 7.057916081976146e-05,
"learning_rate": 0.00019860776896952201,
"loss": 46.0,
"step": 169
},
{
"epoch": 0.023215322112594312,
"grad_norm": 2.83908757410245e-05,
"learning_rate": 0.00019859024295168593,
"loss": 46.0,
"step": 170
},
{
"epoch": 0.02335188283090369,
"grad_norm": 5.459811654873192e-05,
"learning_rate": 0.0001985726080931651,
"loss": 46.0,
"step": 171
},
{
"epoch": 0.02348844354921307,
"grad_norm": 9.622200013836846e-05,
"learning_rate": 0.0001985548644134278,
"loss": 46.0,
"step": 172
},
{
"epoch": 0.023625004267522447,
"grad_norm": 3.8583631976507604e-05,
"learning_rate": 0.00019853701193206256,
"loss": 46.0,
"step": 173
},
{
"epoch": 0.023761564985831825,
"grad_norm": 1.6933201550273225e-05,
"learning_rate": 0.00019851905066877796,
"loss": 46.0,
"step": 174
},
{
"epoch": 0.023898125704141203,
"grad_norm": 0.00014548096805810928,
"learning_rate": 0.0001985009806434027,
"loss": 46.0,
"step": 175
},
{
"epoch": 0.02403468642245058,
"grad_norm": 3.331439438625239e-05,
"learning_rate": 0.00019848280187588556,
"loss": 46.0,
"step": 176
},
{
"epoch": 0.02417124714075996,
"grad_norm": 0.00010656285303412005,
"learning_rate": 0.00019846451438629536,
"loss": 46.0,
"step": 177
},
{
"epoch": 0.024307807859069337,
"grad_norm": 5.0110294978367165e-05,
"learning_rate": 0.00019844611819482095,
"loss": 46.0,
"step": 178
},
{
"epoch": 0.024444368577378715,
"grad_norm": 8.202512981370091e-05,
"learning_rate": 0.00019842761332177115,
"loss": 46.0,
"step": 179
},
{
"epoch": 0.024580929295688097,
"grad_norm": 4.755376357934438e-05,
"learning_rate": 0.00019840899978757485,
"loss": 46.0,
"step": 180
},
{
"epoch": 0.024717490013997475,
"grad_norm": 7.185702270362526e-05,
"learning_rate": 0.0001983902776127807,
"loss": 46.0,
"step": 181
},
{
"epoch": 0.024854050732306853,
"grad_norm": 4.723266465589404e-05,
"learning_rate": 0.00019837144681805757,
"loss": 46.0,
"step": 182
},
{
"epoch": 0.02499061145061623,
"grad_norm": 0.00011025248386431485,
"learning_rate": 0.000198352507424194,
"loss": 46.0,
"step": 183
},
{
"epoch": 0.02512717216892561,
"grad_norm": 0.00010296511027263477,
"learning_rate": 0.00019833345945209857,
"loss": 46.0,
"step": 184
},
{
"epoch": 0.025263732887234987,
"grad_norm": 7.570455636596307e-05,
"learning_rate": 0.00019831430292279966,
"loss": 46.0,
"step": 185
},
{
"epoch": 0.025400293605544366,
"grad_norm": 8.105228334898129e-05,
"learning_rate": 0.0001982950378574455,
"loss": 46.0,
"step": 186
},
{
"epoch": 0.025536854323853744,
"grad_norm": 0.00013544574903789908,
"learning_rate": 0.00019827566427730412,
"loss": 46.0,
"step": 187
},
{
"epoch": 0.02567341504216312,
"grad_norm": 6.9964567956049e-05,
"learning_rate": 0.00019825618220376342,
"loss": 46.0,
"step": 188
},
{
"epoch": 0.0258099757604725,
"grad_norm": 9.811633208300918e-05,
"learning_rate": 0.00019823659165833102,
"loss": 46.0,
"step": 189
},
{
"epoch": 0.025946536478781878,
"grad_norm": 0.00023017756757326424,
"learning_rate": 0.00019821689266263427,
"loss": 46.0,
"step": 190
},
{
"epoch": 0.026083097197091256,
"grad_norm": 0.0003564099024515599,
"learning_rate": 0.0001981970852384203,
"loss": 46.0,
"step": 191
},
{
"epoch": 0.026219657915400634,
"grad_norm": 5.261121259536594e-05,
"learning_rate": 0.00019817716940755586,
"loss": 46.0,
"step": 192
},
{
"epoch": 0.026356218633710012,
"grad_norm": 0.00012212673027534038,
"learning_rate": 0.00019815714519202753,
"loss": 46.0,
"step": 193
},
{
"epoch": 0.02649277935201939,
"grad_norm": 7.185459253378212e-05,
"learning_rate": 0.00019813701261394136,
"loss": 46.0,
"step": 194
},
{
"epoch": 0.02662934007032877,
"grad_norm": 0.00010008271055994555,
"learning_rate": 0.00019811677169552313,
"loss": 46.0,
"step": 195
},
{
"epoch": 0.026765900788638147,
"grad_norm": 0.00010963875683955848,
"learning_rate": 0.0001980964224591183,
"loss": 46.0,
"step": 196
},
{
"epoch": 0.026902461506947528,
"grad_norm": 5.4636468121316284e-05,
"learning_rate": 0.00019807596492719167,
"loss": 46.0,
"step": 197
},
{
"epoch": 0.027039022225256906,
"grad_norm": 5.760273052146658e-05,
"learning_rate": 0.00019805539912232784,
"loss": 46.0,
"step": 198
},
{
"epoch": 0.027175582943566284,
"grad_norm": 0.0001271786750294268,
"learning_rate": 0.00019803472506723085,
"loss": 46.0,
"step": 199
},
{
"epoch": 0.027312143661875662,
"grad_norm": 0.00014666210336145014,
"learning_rate": 0.00019801394278472418,
"loss": 46.0,
"step": 200
},
{
"epoch": 0.02744870438018504,
"grad_norm": 3.408119664527476e-05,
"learning_rate": 0.0001979930522977509,
"loss": 46.0,
"step": 201
},
{
"epoch": 0.02758526509849442,
"grad_norm": 0.00010019735782407224,
"learning_rate": 0.00019797205362937347,
"loss": 46.0,
"step": 202
},
{
"epoch": 0.027721825816803797,
"grad_norm": 0.00011540239211171865,
"learning_rate": 0.00019795094680277378,
"loss": 46.0,
"step": 203
},
{
"epoch": 0.027858386535113175,
"grad_norm": 8.116603567032143e-05,
"learning_rate": 0.0001979297318412532,
"loss": 46.0,
"step": 204
},
{
"epoch": 0.027994947253422553,
"grad_norm": 0.0001576711074449122,
"learning_rate": 0.00019790840876823232,
"loss": 46.0,
"step": 205
},
{
"epoch": 0.02813150797173193,
"grad_norm": 8.809396240394562e-05,
"learning_rate": 0.0001978869776072512,
"loss": 46.0,
"step": 206
},
{
"epoch": 0.02826806869004131,
"grad_norm": 0.0001056291293934919,
"learning_rate": 0.00019786543838196924,
"loss": 46.0,
"step": 207
},
{
"epoch": 0.028404629408350687,
"grad_norm": 0.0001859702606452629,
"learning_rate": 0.00019784379111616507,
"loss": 46.0,
"step": 208
},
{
"epoch": 0.028541190126660065,
"grad_norm": 7.50935505493544e-05,
"learning_rate": 0.00019782203583373664,
"loss": 46.0,
"step": 209
},
{
"epoch": 0.028677750844969443,
"grad_norm": 4.688434273703024e-05,
"learning_rate": 0.00019780017255870114,
"loss": 46.0,
"step": 210
},
{
"epoch": 0.02881431156327882,
"grad_norm": 0.00032884004758670926,
"learning_rate": 0.00019777820131519495,
"loss": 46.0,
"step": 211
},
{
"epoch": 0.0289508722815882,
"grad_norm": 0.0001322666648775339,
"learning_rate": 0.0001977561221274737,
"loss": 46.0,
"step": 212
},
{
"epoch": 0.029087432999897578,
"grad_norm": 0.00014480279060080647,
"learning_rate": 0.00019773393501991212,
"loss": 46.0,
"step": 213
},
{
"epoch": 0.02922399371820696,
"grad_norm": 0.00011657484719762579,
"learning_rate": 0.0001977116400170041,
"loss": 46.0,
"step": 214
},
{
"epoch": 0.029360554436516337,
"grad_norm": 0.00011347379040671512,
"learning_rate": 0.00019768923714336272,
"loss": 46.0,
"step": 215
},
{
"epoch": 0.029497115154825716,
"grad_norm": 0.00017761997878551483,
"learning_rate": 0.00019766672642372002,
"loss": 46.0,
"step": 216
},
{
"epoch": 0.029633675873135094,
"grad_norm": 0.00014384661335498095,
"learning_rate": 0.00019764410788292722,
"loss": 46.0,
"step": 217
},
{
"epoch": 0.029770236591444472,
"grad_norm": 7.11917455191724e-05,
"learning_rate": 0.00019762138154595446,
"loss": 46.0,
"step": 218
},
{
"epoch": 0.02990679730975385,
"grad_norm": 0.00012330934987403452,
"learning_rate": 0.00019759854743789097,
"loss": 46.0,
"step": 219
},
{
"epoch": 0.030043358028063228,
"grad_norm": 8.032290497794747e-05,
"learning_rate": 0.00019757560558394493,
"loss": 46.0,
"step": 220
},
{
"epoch": 0.030179918746372606,
"grad_norm": 0.00021323734836187214,
"learning_rate": 0.0001975525560094434,
"loss": 46.0,
"step": 221
},
{
"epoch": 0.030316479464681984,
"grad_norm": 0.00014698925951961428,
"learning_rate": 0.00019752939873983255,
"loss": 46.0,
"step": 222
},
{
"epoch": 0.030453040182991362,
"grad_norm": 0.00018497723795007914,
"learning_rate": 0.00019750613380067718,
"loss": 46.0,
"step": 223
},
{
"epoch": 0.03058960090130074,
"grad_norm": 0.0001770486414898187,
"learning_rate": 0.00019748276121766116,
"loss": 46.0,
"step": 224
},
{
"epoch": 0.03072616161961012,
"grad_norm": 6.994479190325364e-05,
"learning_rate": 0.00019745928101658707,
"loss": 46.0,
"step": 225
},
{
"epoch": 0.030862722337919497,
"grad_norm": 0.0001687954500084743,
"learning_rate": 0.00019743569322337642,
"loss": 46.0,
"step": 226
},
{
"epoch": 0.030999283056228875,
"grad_norm": 0.00010247869795421138,
"learning_rate": 0.00019741199786406938,
"loss": 46.0,
"step": 227
},
{
"epoch": 0.031135843774538253,
"grad_norm": 0.00013670619227923453,
"learning_rate": 0.00019738819496482494,
"loss": 46.0,
"step": 228
},
{
"epoch": 0.031272404492847634,
"grad_norm": 0.00013827405928168446,
"learning_rate": 0.0001973642845519208,
"loss": 46.0,
"step": 229
},
{
"epoch": 0.03140896521115701,
"grad_norm": 0.00014026669668965042,
"learning_rate": 0.00019734026665175334,
"loss": 46.0,
"step": 230
},
{
"epoch": 0.03154552592946639,
"grad_norm": 0.00010120788647327572,
"learning_rate": 0.00019731614129083754,
"loss": 46.0,
"step": 231
},
{
"epoch": 0.03168208664777577,
"grad_norm": 0.00014757270400878042,
"learning_rate": 0.0001972919084958072,
"loss": 46.0,
"step": 232
},
{
"epoch": 0.03181864736608515,
"grad_norm": 8.340936619788408e-05,
"learning_rate": 0.00019726756829341446,
"loss": 46.0,
"step": 233
},
{
"epoch": 0.031955208084394525,
"grad_norm": 0.00013478368055075407,
"learning_rate": 0.0001972431207105303,
"loss": 46.0,
"step": 234
},
{
"epoch": 0.0320917688027039,
"grad_norm": 0.00013591728929895908,
"learning_rate": 0.00019721856577414407,
"loss": 46.0,
"step": 235
},
{
"epoch": 0.03222832952101328,
"grad_norm": 0.0002845980925485492,
"learning_rate": 0.00019719390351136365,
"loss": 46.0,
"step": 236
},
{
"epoch": 0.03236489023932266,
"grad_norm": 0.00021167262457311153,
"learning_rate": 0.0001971691339494155,
"loss": 46.0,
"step": 237
},
{
"epoch": 0.03250145095763204,
"grad_norm": 0.00015637895558029413,
"learning_rate": 0.00019714425711564446,
"loss": 46.0,
"step": 238
},
{
"epoch": 0.032638011675941415,
"grad_norm": 0.0004328020440880209,
"learning_rate": 0.00019711927303751382,
"loss": 46.0,
"step": 239
},
{
"epoch": 0.032774572394250794,
"grad_norm": 7.109026773832738e-05,
"learning_rate": 0.0001970941817426052,
"loss": 46.0,
"step": 240
},
{
"epoch": 0.03291113311256017,
"grad_norm": 0.00022207674919627607,
"learning_rate": 0.00019706898325861874,
"loss": 46.0,
"step": 241
},
{
"epoch": 0.03304769383086955,
"grad_norm": 7.223385910037905e-05,
"learning_rate": 0.0001970436776133727,
"loss": 46.0,
"step": 242
},
{
"epoch": 0.03318425454917893,
"grad_norm": 0.0002725286176428199,
"learning_rate": 0.00019701826483480388,
"loss": 46.0,
"step": 243
},
{
"epoch": 0.033320815267488306,
"grad_norm": 0.00011271587572991848,
"learning_rate": 0.00019699274495096712,
"loss": 46.0,
"step": 244
},
{
"epoch": 0.033457375985797684,
"grad_norm": 0.0001713872916297987,
"learning_rate": 0.0001969671179900357,
"loss": 46.0,
"step": 245
},
{
"epoch": 0.03359393670410706,
"grad_norm": 0.0001251544599654153,
"learning_rate": 0.00019694138398030094,
"loss": 46.0,
"step": 246
},
{
"epoch": 0.03373049742241644,
"grad_norm": 0.00030473063816316426,
"learning_rate": 0.00019691554295017246,
"loss": 46.0,
"step": 247
},
{
"epoch": 0.03386705814072582,
"grad_norm": 0.00020300064352340996,
"learning_rate": 0.000196889594928178,
"loss": 46.0,
"step": 248
},
{
"epoch": 0.034003618859035196,
"grad_norm": 0.0001950880396179855,
"learning_rate": 0.00019686353994296333,
"loss": 46.0,
"step": 249
},
{
"epoch": 0.034140179577344575,
"grad_norm": 0.00034574157325550914,
"learning_rate": 0.00019683737802329244,
"loss": 46.0,
"step": 250
},
{
"epoch": 0.03427674029565395,
"grad_norm": 0.0001567787694511935,
"learning_rate": 0.0001968111091980473,
"loss": 46.0,
"step": 251
},
{
"epoch": 0.03441330101396333,
"grad_norm": 0.00012498951400630176,
"learning_rate": 0.00019678473349622793,
"loss": 46.0,
"step": 252
},
{
"epoch": 0.03454986173227271,
"grad_norm": 0.0002493293723091483,
"learning_rate": 0.0001967582509469523,
"loss": 46.0,
"step": 253
},
{
"epoch": 0.03468642245058209,
"grad_norm": 0.0003348199534229934,
"learning_rate": 0.0001967316615794563,
"loss": 46.0,
"step": 254
},
{
"epoch": 0.034822983168891465,
"grad_norm": 0.00020134066289756447,
"learning_rate": 0.00019670496542309384,
"loss": 46.0,
"step": 255
},
{
"epoch": 0.03495954388720084,
"grad_norm": 8.074977085925639e-05,
"learning_rate": 0.0001966781625073367,
"loss": 46.0,
"step": 256
},
{
"epoch": 0.03509610460551023,
"grad_norm": 0.00018040316354017705,
"learning_rate": 0.00019665125286177449,
"loss": 46.0,
"step": 257
},
{
"epoch": 0.035232665323819606,
"grad_norm": 0.00016175376367755234,
"learning_rate": 0.00019662423651611464,
"loss": 46.0,
"step": 258
},
{
"epoch": 0.035369226042128984,
"grad_norm": 6.748022133251652e-05,
"learning_rate": 0.00019659711350018239,
"loss": 46.0,
"step": 259
},
{
"epoch": 0.03550578676043836,
"grad_norm": 0.00014227218343876302,
"learning_rate": 0.00019656988384392075,
"loss": 46.0,
"step": 260
},
{
"epoch": 0.03564234747874774,
"grad_norm": 0.0002607603382784873,
"learning_rate": 0.00019654254757739043,
"loss": 46.0,
"step": 261
},
{
"epoch": 0.03577890819705712,
"grad_norm": 0.00026646576588973403,
"learning_rate": 0.00019651510473076987,
"loss": 46.0,
"step": 262
},
{
"epoch": 0.0359154689153665,
"grad_norm": 0.00020209423382766545,
"learning_rate": 0.00019648755533435518,
"loss": 46.0,
"step": 263
},
{
"epoch": 0.036052029633675875,
"grad_norm": 0.00013208483869675547,
"learning_rate": 0.00019645989941855999,
"loss": 46.0,
"step": 264
},
{
"epoch": 0.03618859035198525,
"grad_norm": 0.0002046150912065059,
"learning_rate": 0.00019643213701391567,
"loss": 46.0,
"step": 265
},
{
"epoch": 0.03632515107029463,
"grad_norm": 0.00022820701997261494,
"learning_rate": 0.00019640426815107108,
"loss": 46.0,
"step": 266
},
{
"epoch": 0.03646171178860401,
"grad_norm": 0.00019359646830707788,
"learning_rate": 0.0001963762928607926,
"loss": 46.0,
"step": 267
},
{
"epoch": 0.03659827250691339,
"grad_norm": 0.00015012026415206492,
"learning_rate": 0.0001963482111739641,
"loss": 46.0,
"step": 268
},
{
"epoch": 0.036734833225222766,
"grad_norm": 0.00021516659762710333,
"learning_rate": 0.00019632002312158697,
"loss": 46.0,
"step": 269
},
{
"epoch": 0.036871393943532144,
"grad_norm": 0.00019021316256839782,
"learning_rate": 0.00019629172873477995,
"loss": 46.0,
"step": 270
},
{
"epoch": 0.03700795466184152,
"grad_norm": 0.00023805341334082186,
"learning_rate": 0.00019626332804477915,
"loss": 46.0,
"step": 271
},
{
"epoch": 0.0371445153801509,
"grad_norm": 0.0002716032904572785,
"learning_rate": 0.0001962348210829382,
"loss": 46.0,
"step": 272
},
{
"epoch": 0.03728107609846028,
"grad_norm": 0.0001799121528165415,
"learning_rate": 0.00019620620788072783,
"loss": 46.0,
"step": 273
},
{
"epoch": 0.037417636816769656,
"grad_norm": 0.00041423438233323395,
"learning_rate": 0.0001961774884697362,
"loss": 46.0,
"step": 274
},
{
"epoch": 0.037554197535079034,
"grad_norm": 0.0002172417298424989,
"learning_rate": 0.0001961486628816687,
"loss": 46.0,
"step": 275
},
{
"epoch": 0.03769075825338841,
"grad_norm": 0.0002554766833782196,
"learning_rate": 0.0001961197311483479,
"loss": 46.0,
"step": 276
},
{
"epoch": 0.03782731897169779,
"grad_norm": 0.0002692249254323542,
"learning_rate": 0.0001960906933017135,
"loss": 46.0,
"step": 277
},
{
"epoch": 0.03796387969000717,
"grad_norm": 0.00040551606798544526,
"learning_rate": 0.00019606154937382256,
"loss": 46.0,
"step": 278
},
{
"epoch": 0.03810044040831655,
"grad_norm": 0.0003460289444774389,
"learning_rate": 0.000196032299396849,
"loss": 46.0,
"step": 279
},
{
"epoch": 0.038237001126625925,
"grad_norm": 0.000246795651037246,
"learning_rate": 0.00019600294340308398,
"loss": 46.0,
"step": 280
},
{
"epoch": 0.0383735618449353,
"grad_norm": 0.0005187865463085473,
"learning_rate": 0.00019597348142493562,
"loss": 46.0,
"step": 281
},
{
"epoch": 0.03851012256324468,
"grad_norm": 0.0005177973653189838,
"learning_rate": 0.00019594391349492902,
"loss": 46.0,
"step": 282
},
{
"epoch": 0.03864668328155406,
"grad_norm": 0.00031362145091407,
"learning_rate": 0.00019591423964570632,
"loss": 46.0,
"step": 283
},
{
"epoch": 0.03878324399986344,
"grad_norm": 0.0003161428030580282,
"learning_rate": 0.0001958844599100266,
"loss": 46.0,
"step": 284
},
{
"epoch": 0.038919804718172815,
"grad_norm": 0.0002518398978281766,
"learning_rate": 0.00019585457432076578,
"loss": 46.0,
"step": 285
},
{
"epoch": 0.03905636543648219,
"grad_norm": 0.00017934896459337324,
"learning_rate": 0.00019582458291091663,
"loss": 46.0,
"step": 286
},
{
"epoch": 0.03919292615479157,
"grad_norm": 0.00032432653824798763,
"learning_rate": 0.0001957944857135888,
"loss": 46.0,
"step": 287
},
{
"epoch": 0.03932948687310095,
"grad_norm": 0.00021099011064507067,
"learning_rate": 0.00019576428276200868,
"loss": 46.0,
"step": 288
},
{
"epoch": 0.03946604759141033,
"grad_norm": 0.0005440306267701089,
"learning_rate": 0.00019573397408951943,
"loss": 46.0,
"step": 289
},
{
"epoch": 0.039602608309719706,
"grad_norm": 0.0005642606993205845,
"learning_rate": 0.00019570355972958097,
"loss": 46.0,
"step": 290
},
{
"epoch": 0.03973916902802909,
"grad_norm": 0.0007911530556157231,
"learning_rate": 0.00019567303971576976,
"loss": 46.0,
"step": 291
},
{
"epoch": 0.03987572974633847,
"grad_norm": 0.00017880380619317293,
"learning_rate": 0.000195642414081779,
"loss": 46.0,
"step": 292
},
{
"epoch": 0.04001229046464785,
"grad_norm": 0.00048157072160393,
"learning_rate": 0.00019561168286141856,
"loss": 46.0,
"step": 293
},
{
"epoch": 0.040148851182957225,
"grad_norm": 0.00010301580186933279,
"learning_rate": 0.00019558084608861472,
"loss": 46.0,
"step": 294
},
{
"epoch": 0.0402854119012666,
"grad_norm": 0.0003147012903355062,
"learning_rate": 0.00019554990379741033,
"loss": 46.0,
"step": 295
},
{
"epoch": 0.04042197261957598,
"grad_norm": 0.00039921182906255126,
"learning_rate": 0.0001955188560219648,
"loss": 46.0,
"step": 296
},
{
"epoch": 0.04055853333788536,
"grad_norm": 0.0004582660039886832,
"learning_rate": 0.00019548770279655397,
"loss": 46.0,
"step": 297
},
{
"epoch": 0.04069509405619474,
"grad_norm": 0.0001638657267903909,
"learning_rate": 0.00019545644415557,
"loss": 46.0,
"step": 298
},
{
"epoch": 0.040831654774504116,
"grad_norm": 0.00041633055661804974,
"learning_rate": 0.00019542508013352156,
"loss": 46.0,
"step": 299
},
{
"epoch": 0.040968215492813494,
"grad_norm": 0.0005134938983246684,
"learning_rate": 0.0001953936107650336,
"loss": 46.0,
"step": 300
},
{
"epoch": 0.040968215492813494,
"eval_loss": 11.5,
"eval_runtime": 20.4404,
"eval_samples_per_second": 150.878,
"eval_steps_per_second": 75.439,
"step": 300
},
{
"epoch": 0.04110477621112287,
"grad_norm": 0.00037575350143015385,
"learning_rate": 0.0001953620360848473,
"loss": 46.0,
"step": 301
},
{
"epoch": 0.04124133692943225,
"grad_norm": 0.00024376453075092286,
"learning_rate": 0.00019533035612782017,
"loss": 46.0,
"step": 302
},
{
"epoch": 0.04137789764774163,
"grad_norm": 0.00043027085484936833,
"learning_rate": 0.00019529857092892602,
"loss": 46.0,
"step": 303
},
{
"epoch": 0.041514458366051006,
"grad_norm": 0.00039237432065419853,
"learning_rate": 0.00019526668052325467,
"loss": 46.0,
"step": 304
},
{
"epoch": 0.041651019084360384,
"grad_norm": 0.0001755830307956785,
"learning_rate": 0.00019523468494601223,
"loss": 46.0,
"step": 305
},
{
"epoch": 0.04178757980266976,
"grad_norm": 0.0001863718789536506,
"learning_rate": 0.00019520258423252082,
"loss": 46.0,
"step": 306
},
{
"epoch": 0.04192414052097914,
"grad_norm": 0.0003355692024342716,
"learning_rate": 0.00019517037841821873,
"loss": 46.0,
"step": 307
},
{
"epoch": 0.04206070123928852,
"grad_norm": 0.0002629165828693658,
"learning_rate": 0.00019513806753866016,
"loss": 46.0,
"step": 308
},
{
"epoch": 0.0421972619575979,
"grad_norm": 0.0004353369877208024,
"learning_rate": 0.00019510565162951537,
"loss": 46.0,
"step": 309
},
{
"epoch": 0.042333822675907275,
"grad_norm": 0.0001725061738397926,
"learning_rate": 0.00019507313072657055,
"loss": 46.0,
"step": 310
},
{
"epoch": 0.04247038339421665,
"grad_norm": 0.00021753301552962512,
"learning_rate": 0.00019504050486572784,
"loss": 46.0,
"step": 311
},
{
"epoch": 0.04260694411252603,
"grad_norm": 0.00035614983062259853,
"learning_rate": 0.00019500777408300519,
"loss": 46.0,
"step": 312
},
{
"epoch": 0.04274350483083541,
"grad_norm": 0.00025182642275467515,
"learning_rate": 0.00019497493841453642,
"loss": 46.0,
"step": 313
},
{
"epoch": 0.04288006554914479,
"grad_norm": 0.0003710365854203701,
"learning_rate": 0.0001949419978965711,
"loss": 46.0,
"step": 314
},
{
"epoch": 0.043016626267454165,
"grad_norm": 0.00031021906761452556,
"learning_rate": 0.00019490895256547464,
"loss": 46.0,
"step": 315
},
{
"epoch": 0.04315318698576354,
"grad_norm": 0.0002598558203317225,
"learning_rate": 0.000194875802457728,
"loss": 46.0,
"step": 316
},
{
"epoch": 0.04328974770407292,
"grad_norm": 0.00025477109011262655,
"learning_rate": 0.000194842547609928,
"loss": 46.0,
"step": 317
},
{
"epoch": 0.0434263084223823,
"grad_norm": 0.0003966864896938205,
"learning_rate": 0.00019480918805878697,
"loss": 46.0,
"step": 318
},
{
"epoch": 0.04356286914069168,
"grad_norm": 0.0001289808569708839,
"learning_rate": 0.00019477572384113282,
"loss": 46.0,
"step": 319
},
{
"epoch": 0.043699429859001056,
"grad_norm": 0.00031020533060655,
"learning_rate": 0.00019474215499390912,
"loss": 46.0,
"step": 320
},
{
"epoch": 0.043835990577310434,
"grad_norm": 0.00045745153329335153,
"learning_rate": 0.0001947084815541748,
"loss": 46.0,
"step": 321
},
{
"epoch": 0.04397255129561981,
"grad_norm": 0.00031357730040326715,
"learning_rate": 0.00019467470355910438,
"loss": 46.0,
"step": 322
},
{
"epoch": 0.04410911201392919,
"grad_norm": 0.00021334455232135952,
"learning_rate": 0.00019464082104598776,
"loss": 46.0,
"step": 323
},
{
"epoch": 0.04424567273223857,
"grad_norm": 0.00033166687353514135,
"learning_rate": 0.0001946068340522302,
"loss": 46.0,
"step": 324
},
{
"epoch": 0.04438223345054795,
"grad_norm": 0.0004193445493001491,
"learning_rate": 0.00019457274261535236,
"loss": 46.0,
"step": 325
},
{
"epoch": 0.04451879416885733,
"grad_norm": 0.0005776687175966799,
"learning_rate": 0.0001945385467729901,
"loss": 46.0,
"step": 326
},
{
"epoch": 0.04465535488716671,
"grad_norm": 0.00021739969088230282,
"learning_rate": 0.00019450424656289466,
"loss": 46.0,
"step": 327
},
{
"epoch": 0.04479191560547609,
"grad_norm": 0.00025186152197420597,
"learning_rate": 0.00019446984202293246,
"loss": 46.0,
"step": 328
},
{
"epoch": 0.044928476323785466,
"grad_norm": 0.0004120633821003139,
"learning_rate": 0.00019443533319108504,
"loss": 46.0,
"step": 329
},
{
"epoch": 0.045065037042094844,
"grad_norm": 0.00028226643917150795,
"learning_rate": 0.00019440072010544918,
"loss": 46.0,
"step": 330
},
{
"epoch": 0.04520159776040422,
"grad_norm": 0.00048744879313744605,
"learning_rate": 0.00019436600280423665,
"loss": 46.0,
"step": 331
},
{
"epoch": 0.0453381584787136,
"grad_norm": 0.00019058110774494708,
"learning_rate": 0.0001943311813257743,
"loss": 46.0,
"step": 332
},
{
"epoch": 0.04547471919702298,
"grad_norm": 0.00047706879558973014,
"learning_rate": 0.00019429625570850404,
"loss": 46.0,
"step": 333
},
{
"epoch": 0.045611279915332356,
"grad_norm": 0.0005120193236507475,
"learning_rate": 0.0001942612259909827,
"loss": 46.0,
"step": 334
},
{
"epoch": 0.045747840633641734,
"grad_norm": 0.0002128657652065158,
"learning_rate": 0.00019422609221188207,
"loss": 46.0,
"step": 335
},
{
"epoch": 0.04588440135195111,
"grad_norm": 0.0005021935794502497,
"learning_rate": 0.00019419085440998873,
"loss": 46.0,
"step": 336
},
{
"epoch": 0.04602096207026049,
"grad_norm": 0.000529598502907902,
"learning_rate": 0.00019415551262420418,
"loss": 46.0,
"step": 337
},
{
"epoch": 0.04615752278856987,
"grad_norm": 0.000645966618321836,
"learning_rate": 0.0001941200668935447,
"loss": 46.0,
"step": 338
},
{
"epoch": 0.04629408350687925,
"grad_norm": 0.0012579227332025766,
"learning_rate": 0.00019408451725714136,
"loss": 46.0,
"step": 339
},
{
"epoch": 0.046430644225188625,
"grad_norm": 0.0003929708036594093,
"learning_rate": 0.00019404886375423984,
"loss": 46.0,
"step": 340
},
{
"epoch": 0.046567204943498,
"grad_norm": 0.0008281446644105017,
"learning_rate": 0.00019401310642420058,
"loss": 46.0,
"step": 341
},
{
"epoch": 0.04670376566180738,
"grad_norm": 0.0003685842384584248,
"learning_rate": 0.00019397724530649857,
"loss": 46.0,
"step": 342
},
{
"epoch": 0.04684032638011676,
"grad_norm": 0.00041180921834893525,
"learning_rate": 0.00019394128044072345,
"loss": 46.0,
"step": 343
},
{
"epoch": 0.04697688709842614,
"grad_norm": 0.00038018723716959357,
"learning_rate": 0.00019390521186657934,
"loss": 46.0,
"step": 344
},
{
"epoch": 0.047113447816735515,
"grad_norm": 0.0006593601428903639,
"learning_rate": 0.00019386903962388487,
"loss": 46.0,
"step": 345
},
{
"epoch": 0.04725000853504489,
"grad_norm": 0.0003346616867929697,
"learning_rate": 0.0001938327637525731,
"loss": 46.0,
"step": 346
},
{
"epoch": 0.04738656925335427,
"grad_norm": 0.0006303668487817049,
"learning_rate": 0.00019379638429269157,
"loss": 46.0,
"step": 347
},
{
"epoch": 0.04752312997166365,
"grad_norm": 0.0009288009605370462,
"learning_rate": 0.00019375990128440204,
"loss": 46.0,
"step": 348
},
{
"epoch": 0.04765969068997303,
"grad_norm": 0.0009077245485968888,
"learning_rate": 0.00019372331476798072,
"loss": 46.0,
"step": 349
},
{
"epoch": 0.047796251408282406,
"grad_norm": 0.0006878585554659367,
"learning_rate": 0.00019368662478381799,
"loss": 46.0,
"step": 350
},
{
"epoch": 0.047932812126591784,
"grad_norm": 0.0005428345175459981,
"learning_rate": 0.00019364983137241853,
"loss": 46.0,
"step": 351
},
{
"epoch": 0.04806937284490116,
"grad_norm": 0.0005264700739644468,
"learning_rate": 0.0001936129345744011,
"loss": 46.0,
"step": 352
},
{
"epoch": 0.04820593356321054,
"grad_norm": 0.00034603691892698407,
"learning_rate": 0.00019357593443049877,
"loss": 46.0,
"step": 353
},
{
"epoch": 0.04834249428151992,
"grad_norm": 0.0005551993381232023,
"learning_rate": 0.00019353883098155854,
"loss": 46.0,
"step": 354
},
{
"epoch": 0.048479054999829296,
"grad_norm": 0.0005989357596263289,
"learning_rate": 0.0001935016242685415,
"loss": 46.0,
"step": 355
},
{
"epoch": 0.048615615718138674,
"grad_norm": 0.0003730835160240531,
"learning_rate": 0.00019346431433252276,
"loss": 46.0,
"step": 356
},
{
"epoch": 0.04875217643644805,
"grad_norm": 0.0006811637431383133,
"learning_rate": 0.00019342690121469138,
"loss": 46.0,
"step": 357
},
{
"epoch": 0.04888873715475743,
"grad_norm": 0.000448873353889212,
"learning_rate": 0.0001933893849563503,
"loss": 46.0,
"step": 358
},
{
"epoch": 0.049025297873066816,
"grad_norm": 0.00046228739665821195,
"learning_rate": 0.0001933517655989164,
"loss": 46.0,
"step": 359
},
{
"epoch": 0.049161858591376194,
"grad_norm": 0.000341800187015906,
"learning_rate": 0.00019331404318392027,
"loss": 46.0,
"step": 360
},
{
"epoch": 0.04929841930968557,
"grad_norm": 0.0007170014432631433,
"learning_rate": 0.00019327621775300637,
"loss": 46.0,
"step": 361
},
{
"epoch": 0.04943498002799495,
"grad_norm": 0.0009279394871555269,
"learning_rate": 0.00019323828934793286,
"loss": 46.0,
"step": 362
},
{
"epoch": 0.04957154074630433,
"grad_norm": 0.0007022646022960544,
"learning_rate": 0.0001932002580105715,
"loss": 46.0,
"step": 363
},
{
"epoch": 0.049708101464613706,
"grad_norm": 0.0005995425744913518,
"learning_rate": 0.0001931621237829078,
"loss": 46.0,
"step": 364
},
{
"epoch": 0.049844662182923084,
"grad_norm": 0.0006767901941202581,
"learning_rate": 0.00019312388670704081,
"loss": 46.0,
"step": 365
},
{
"epoch": 0.04998122290123246,
"grad_norm": 0.00040845770854502916,
"learning_rate": 0.00019308554682518313,
"loss": 46.0,
"step": 366
},
{
"epoch": 0.05011778361954184,
"grad_norm": 0.00040534368599765003,
"learning_rate": 0.00019304710417966079,
"loss": 46.0,
"step": 367
},
{
"epoch": 0.05025434433785122,
"grad_norm": 0.00040592235745862126,
"learning_rate": 0.0001930085588129134,
"loss": 46.0,
"step": 368
},
{
"epoch": 0.0503909050561606,
"grad_norm": 0.00048625541967339814,
"learning_rate": 0.00019296991076749381,
"loss": 46.0,
"step": 369
},
{
"epoch": 0.050527465774469975,
"grad_norm": 0.0010227779857814312,
"learning_rate": 0.00019293116008606837,
"loss": 46.0,
"step": 370
},
{
"epoch": 0.05066402649277935,
"grad_norm": 0.0005206182249821723,
"learning_rate": 0.00019289230681141667,
"loss": 46.0,
"step": 371
},
{
"epoch": 0.05080058721108873,
"grad_norm": 0.0005789480055682361,
"learning_rate": 0.00019285335098643153,
"loss": 46.0,
"step": 372
},
{
"epoch": 0.05093714792939811,
"grad_norm": 0.00046148046385496855,
"learning_rate": 0.00019281429265411907,
"loss": 46.0,
"step": 373
},
{
"epoch": 0.05107370864770749,
"grad_norm": 0.0005185164045542479,
"learning_rate": 0.00019277513185759844,
"loss": 46.0,
"step": 374
},
{
"epoch": 0.051210269366016865,
"grad_norm": 0.0005086156306788325,
"learning_rate": 0.0001927358686401021,
"loss": 46.0,
"step": 375
},
{
"epoch": 0.05134683008432624,
"grad_norm": 0.0005038722883909941,
"learning_rate": 0.0001926965030449754,
"loss": 46.0,
"step": 376
},
{
"epoch": 0.05148339080263562,
"grad_norm": 0.001132496865466237,
"learning_rate": 0.00019265703511567677,
"loss": 46.0,
"step": 377
},
{
"epoch": 0.051619951520945,
"grad_norm": 0.0011954177170991898,
"learning_rate": 0.00019261746489577765,
"loss": 46.0,
"step": 378
},
{
"epoch": 0.05175651223925438,
"grad_norm": 0.0006181654753163457,
"learning_rate": 0.0001925777924289624,
"loss": 46.0,
"step": 379
},
{
"epoch": 0.051893072957563756,
"grad_norm": 0.0005807424895465374,
"learning_rate": 0.00019253801775902824,
"loss": 46.0,
"step": 380
},
{
"epoch": 0.052029633675873134,
"grad_norm": 0.0007045441307127476,
"learning_rate": 0.00019249814092988515,
"loss": 46.0,
"step": 381
},
{
"epoch": 0.05216619439418251,
"grad_norm": 0.0005537315737456083,
"learning_rate": 0.00019245816198555605,
"loss": 46.0,
"step": 382
},
{
"epoch": 0.05230275511249189,
"grad_norm": 0.0006181203643791378,
"learning_rate": 0.00019241808097017642,
"loss": 46.0,
"step": 383
},
{
"epoch": 0.05243931583080127,
"grad_norm": 0.0005752414581365883,
"learning_rate": 0.00019237789792799458,
"loss": 46.0,
"step": 384
},
{
"epoch": 0.052575876549110646,
"grad_norm": 0.0012608635006472468,
"learning_rate": 0.00019233761290337134,
"loss": 46.0,
"step": 385
},
{
"epoch": 0.052712437267420025,
"grad_norm": 0.001415454433299601,
"learning_rate": 0.0001922972259407802,
"loss": 46.0,
"step": 386
},
{
"epoch": 0.0528489979857294,
"grad_norm": 0.001285754144191742,
"learning_rate": 0.00019225673708480717,
"loss": 46.0,
"step": 387
},
{
"epoch": 0.05298555870403878,
"grad_norm": 0.0007671714411117136,
"learning_rate": 0.00019221614638015075,
"loss": 46.0,
"step": 388
},
{
"epoch": 0.05312211942234816,
"grad_norm": 0.0009842630242928863,
"learning_rate": 0.0001921754538716218,
"loss": 46.0,
"step": 389
},
{
"epoch": 0.05325868014065754,
"grad_norm": 0.0023981237318366766,
"learning_rate": 0.00019213465960414368,
"loss": 46.0,
"step": 390
},
{
"epoch": 0.053395240858966915,
"grad_norm": 0.0003951654944103211,
"learning_rate": 0.00019209376362275206,
"loss": 46.0,
"step": 391
},
{
"epoch": 0.05353180157727629,
"grad_norm": 0.0011894862400367856,
"learning_rate": 0.00019205276597259485,
"loss": 46.0,
"step": 392
},
{
"epoch": 0.05366836229558568,
"grad_norm": 0.0006607277318835258,
"learning_rate": 0.00019201166669893227,
"loss": 46.0,
"step": 393
},
{
"epoch": 0.053804923013895056,
"grad_norm": 0.0026427984703332186,
"learning_rate": 0.00019197046584713663,
"loss": 46.0,
"step": 394
},
{
"epoch": 0.053941483732204434,
"grad_norm": 0.0005352182779461145,
"learning_rate": 0.00019192916346269246,
"loss": 46.0,
"step": 395
},
{
"epoch": 0.05407804445051381,
"grad_norm": 0.0007665826706215739,
"learning_rate": 0.00019188775959119643,
"loss": 46.0,
"step": 396
},
{
"epoch": 0.05421460516882319,
"grad_norm": 0.0006750879692845047,
"learning_rate": 0.0001918462542783571,
"loss": 46.0,
"step": 397
},
{
"epoch": 0.05435116588713257,
"grad_norm": 0.0007010844419710338,
"learning_rate": 0.0001918046475699951,
"loss": 46.0,
"step": 398
},
{
"epoch": 0.05448772660544195,
"grad_norm": 0.0007815820863470435,
"learning_rate": 0.00019176293951204303,
"loss": 46.0,
"step": 399
},
{
"epoch": 0.054624287323751325,
"grad_norm": 0.0012023310409858823,
"learning_rate": 0.00019172113015054532,
"loss": 46.0,
"step": 400
},
{
"epoch": 0.0547608480420607,
"grad_norm": 0.0008429251029156148,
"learning_rate": 0.00019167921953165825,
"loss": 46.0,
"step": 401
},
{
"epoch": 0.05489740876037008,
"grad_norm": 0.0011086566373705864,
"learning_rate": 0.00019163720770164991,
"loss": 46.0,
"step": 402
},
{
"epoch": 0.05503396947867946,
"grad_norm": 0.0006057535065338016,
"learning_rate": 0.00019159509470690012,
"loss": 46.0,
"step": 403
},
{
"epoch": 0.05517053019698884,
"grad_norm": 0.0005650485400110483,
"learning_rate": 0.0001915528805939003,
"loss": 46.0,
"step": 404
},
{
"epoch": 0.055307090915298215,
"grad_norm": 0.00042634617420844734,
"learning_rate": 0.00019151056540925364,
"loss": 46.0,
"step": 405
},
{
"epoch": 0.055443651633607594,
"grad_norm": 0.0010889278491958976,
"learning_rate": 0.0001914681491996748,
"loss": 46.0,
"step": 406
},
{
"epoch": 0.05558021235191697,
"grad_norm": 0.0005564565653912723,
"learning_rate": 0.00019142563201199008,
"loss": 46.0,
"step": 407
},
{
"epoch": 0.05571677307022635,
"grad_norm": 0.0007656107773073018,
"learning_rate": 0.0001913830138931371,
"loss": 46.0,
"step": 408
},
{
"epoch": 0.05585333378853573,
"grad_norm": 0.0008188536739908159,
"learning_rate": 0.00019134029489016502,
"loss": 46.0,
"step": 409
},
{
"epoch": 0.055989894506845106,
"grad_norm": 0.0009277886711061001,
"learning_rate": 0.00019129747505023436,
"loss": 46.0,
"step": 410
},
{
"epoch": 0.056126455225154484,
"grad_norm": 0.0008007950964383781,
"learning_rate": 0.00019125455442061691,
"loss": 46.0,
"step": 411
},
{
"epoch": 0.05626301594346386,
"grad_norm": 0.000565837137401104,
"learning_rate": 0.00019121153304869584,
"loss": 46.0,
"step": 412
},
{
"epoch": 0.05639957666177324,
"grad_norm": 0.0009090682142414153,
"learning_rate": 0.00019116841098196536,
"loss": 46.0,
"step": 413
},
{
"epoch": 0.05653613738008262,
"grad_norm": 0.0010395282879471779,
"learning_rate": 0.000191125188268031,
"loss": 46.0,
"step": 414
},
{
"epoch": 0.056672698098391996,
"grad_norm": 0.0004872768186032772,
"learning_rate": 0.00019108186495460933,
"loss": 46.0,
"step": 415
},
{
"epoch": 0.056809258816701375,
"grad_norm": 0.0007016431773081422,
"learning_rate": 0.00019103844108952803,
"loss": 46.0,
"step": 416
},
{
"epoch": 0.05694581953501075,
"grad_norm": 0.0005011600442230701,
"learning_rate": 0.00019099491672072566,
"loss": 46.0,
"step": 417
},
{
"epoch": 0.05708238025332013,
"grad_norm": 0.0009304819977842271,
"learning_rate": 0.00019095129189625193,
"loss": 46.0,
"step": 418
},
{
"epoch": 0.05721894097162951,
"grad_norm": 0.0009160469635389745,
"learning_rate": 0.0001909075666642673,
"loss": 46.0,
"step": 419
},
{
"epoch": 0.05735550168993889,
"grad_norm": 0.001117922831326723,
"learning_rate": 0.00019086374107304312,
"loss": 46.0,
"step": 420
},
{
"epoch": 0.057492062408248265,
"grad_norm": 0.0009813562501221895,
"learning_rate": 0.00019081981517096152,
"loss": 46.0,
"step": 421
},
{
"epoch": 0.05762862312655764,
"grad_norm": 0.0006970268441364169,
"learning_rate": 0.00019077578900651544,
"loss": 46.0,
"step": 422
},
{
"epoch": 0.05776518384486702,
"grad_norm": 0.0004595222999341786,
"learning_rate": 0.00019073166262830834,
"loss": 46.0,
"step": 423
},
{
"epoch": 0.0579017445631764,
"grad_norm": 0.0009433355298824608,
"learning_rate": 0.00019068743608505455,
"loss": 46.0,
"step": 424
},
{
"epoch": 0.05803830528148578,
"grad_norm": 0.001090813777409494,
"learning_rate": 0.00019064310942557878,
"loss": 46.0,
"step": 425
},
{
"epoch": 0.058174865999795156,
"grad_norm": 0.00044983444968238473,
"learning_rate": 0.0001905986826988164,
"loss": 46.0,
"step": 426
},
{
"epoch": 0.05831142671810454,
"grad_norm": 0.0008178472053259611,
"learning_rate": 0.00019055415595381305,
"loss": 46.0,
"step": 427
},
{
"epoch": 0.05844798743641392,
"grad_norm": 0.0005727699608542025,
"learning_rate": 0.0001905095292397251,
"loss": 46.0,
"step": 428
},
{
"epoch": 0.0585845481547233,
"grad_norm": 0.0013587451539933681,
"learning_rate": 0.00019046480260581902,
"loss": 46.0,
"step": 429
},
{
"epoch": 0.058721108873032675,
"grad_norm": 0.0004678687546402216,
"learning_rate": 0.00019041997610147167,
"loss": 46.0,
"step": 430
},
{
"epoch": 0.05885766959134205,
"grad_norm": 0.00045540923019871116,
"learning_rate": 0.0001903750497761702,
"loss": 46.0,
"step": 431
},
{
"epoch": 0.05899423030965143,
"grad_norm": 0.0006064171902835369,
"learning_rate": 0.00019033002367951194,
"loss": 46.0,
"step": 432
},
{
"epoch": 0.05913079102796081,
"grad_norm": 0.0002765974495559931,
"learning_rate": 0.0001902848978612043,
"loss": 46.0,
"step": 433
},
{
"epoch": 0.05926735174627019,
"grad_norm": 0.000879188475664705,
"learning_rate": 0.00019023967237106492,
"loss": 46.0,
"step": 434
},
{
"epoch": 0.059403912464579565,
"grad_norm": 0.0005185411428101361,
"learning_rate": 0.00019019434725902137,
"loss": 46.0,
"step": 435
},
{
"epoch": 0.059540473182888944,
"grad_norm": 0.0008257310255430639,
"learning_rate": 0.00019014892257511118,
"loss": 46.0,
"step": 436
},
{
"epoch": 0.05967703390119832,
"grad_norm": 0.0009346020524390042,
"learning_rate": 0.00019010339836948186,
"loss": 46.0,
"step": 437
},
{
"epoch": 0.0598135946195077,
"grad_norm": 0.0010087540140375495,
"learning_rate": 0.00019005777469239076,
"loss": 46.0,
"step": 438
},
{
"epoch": 0.05995015533781708,
"grad_norm": 0.0015897548291832209,
"learning_rate": 0.00019001205159420513,
"loss": 46.0,
"step": 439
},
{
"epoch": 0.060086716056126456,
"grad_norm": 0.0007276605465449393,
"learning_rate": 0.0001899662291254018,
"loss": 46.0,
"step": 440
},
{
"epoch": 0.060223276774435834,
"grad_norm": 0.0019181488314643502,
"learning_rate": 0.00018992030733656746,
"loss": 46.0,
"step": 441
},
{
"epoch": 0.06035983749274521,
"grad_norm": 0.0009860562859103084,
"learning_rate": 0.00018987428627839843,
"loss": 46.0,
"step": 442
},
{
"epoch": 0.06049639821105459,
"grad_norm": 0.0009881850564852357,
"learning_rate": 0.0001898281660017005,
"loss": 46.0,
"step": 443
},
{
"epoch": 0.06063295892936397,
"grad_norm": 0.0016714398516342044,
"learning_rate": 0.00018978194655738917,
"loss": 46.0,
"step": 444
},
{
"epoch": 0.060769519647673347,
"grad_norm": 0.000832723337225616,
"learning_rate": 0.00018973562799648927,
"loss": 46.0,
"step": 445
},
{
"epoch": 0.060906080365982725,
"grad_norm": 0.0007889857515692711,
"learning_rate": 0.00018968921037013512,
"loss": 46.0,
"step": 446
},
{
"epoch": 0.0610426410842921,
"grad_norm": 0.001141861779615283,
"learning_rate": 0.00018964269372957038,
"loss": 46.0,
"step": 447
},
{
"epoch": 0.06117920180260148,
"grad_norm": 0.001163587556220591,
"learning_rate": 0.00018959607812614807,
"loss": 46.0,
"step": 448
},
{
"epoch": 0.06131576252091086,
"grad_norm": 0.0006048906943760812,
"learning_rate": 0.0001895493636113304,
"loss": 46.0,
"step": 449
},
{
"epoch": 0.06145232323922024,
"grad_norm": 0.0009424586896784604,
"learning_rate": 0.00018950255023668876,
"loss": 46.0,
"step": 450
},
{
"epoch": 0.061588883957529615,
"grad_norm": 0.000706047285348177,
"learning_rate": 0.00018945563805390381,
"loss": 46.0,
"step": 451
},
{
"epoch": 0.06172544467583899,
"grad_norm": 0.000651683600153774,
"learning_rate": 0.00018940862711476513,
"loss": 46.0,
"step": 452
},
{
"epoch": 0.06186200539414837,
"grad_norm": 0.001372483093291521,
"learning_rate": 0.00018936151747117141,
"loss": 46.0,
"step": 453
},
{
"epoch": 0.06199856611245775,
"grad_norm": 0.0011862257961183786,
"learning_rate": 0.0001893143091751303,
"loss": 46.0,
"step": 454
},
{
"epoch": 0.06213512683076713,
"grad_norm": 0.0015178897883743048,
"learning_rate": 0.00018926700227875833,
"loss": 46.0,
"step": 455
},
{
"epoch": 0.062271687549076506,
"grad_norm": 0.0008582820883020759,
"learning_rate": 0.0001892195968342809,
"loss": 46.0,
"step": 456
},
{
"epoch": 0.062408248267385884,
"grad_norm": 0.000795324333012104,
"learning_rate": 0.00018917209289403227,
"loss": 46.0,
"step": 457
},
{
"epoch": 0.06254480898569527,
"grad_norm": 0.001082447823137045,
"learning_rate": 0.00018912449051045527,
"loss": 46.0,
"step": 458
},
{
"epoch": 0.06268136970400465,
"grad_norm": 0.001177507103420794,
"learning_rate": 0.00018907678973610156,
"loss": 46.0,
"step": 459
},
{
"epoch": 0.06281793042231403,
"grad_norm": 0.0007584211998619139,
"learning_rate": 0.00018902899062363143,
"loss": 46.0,
"step": 460
},
{
"epoch": 0.0629544911406234,
"grad_norm": 0.0005909963510930538,
"learning_rate": 0.00018898109322581356,
"loss": 46.0,
"step": 461
},
{
"epoch": 0.06309105185893278,
"grad_norm": 0.0007048872066661716,
"learning_rate": 0.0001889330975955253,
"loss": 46.0,
"step": 462
},
{
"epoch": 0.06322761257724216,
"grad_norm": 0.0005063001881353557,
"learning_rate": 0.0001888850037857524,
"loss": 46.0,
"step": 463
},
{
"epoch": 0.06336417329555154,
"grad_norm": 0.000993978581391275,
"learning_rate": 0.000188836811849589,
"loss": 46.0,
"step": 464
},
{
"epoch": 0.06350073401386092,
"grad_norm": 0.0010828451486304402,
"learning_rate": 0.0001887885218402375,
"loss": 46.0,
"step": 465
},
{
"epoch": 0.0636372947321703,
"grad_norm": 0.0008432026370428503,
"learning_rate": 0.00018874013381100875,
"loss": 46.0,
"step": 466
},
{
"epoch": 0.06377385545047967,
"grad_norm": 0.0007918166811577976,
"learning_rate": 0.00018869164781532157,
"loss": 46.0,
"step": 467
},
{
"epoch": 0.06391041616878905,
"grad_norm": 0.0007451958954334259,
"learning_rate": 0.00018864306390670307,
"loss": 46.0,
"step": 468
},
{
"epoch": 0.06404697688709843,
"grad_norm": 0.0007615243084728718,
"learning_rate": 0.00018859438213878849,
"loss": 46.0,
"step": 469
},
{
"epoch": 0.0641835376054078,
"grad_norm": 0.00018180711776949465,
"learning_rate": 0.000188545602565321,
"loss": 46.0,
"step": 470
},
{
"epoch": 0.06432009832371718,
"grad_norm": 0.0009969191160053015,
"learning_rate": 0.0001884967252401518,
"loss": 46.0,
"step": 471
},
{
"epoch": 0.06445665904202656,
"grad_norm": 0.0008999738493002951,
"learning_rate": 0.00018844775021724004,
"loss": 46.0,
"step": 472
},
{
"epoch": 0.06459321976033594,
"grad_norm": 0.0006670115399174392,
"learning_rate": 0.00018839867755065265,
"loss": 46.0,
"step": 473
},
{
"epoch": 0.06472978047864532,
"grad_norm": 0.0011296794982627034,
"learning_rate": 0.00018834950729456433,
"loss": 46.0,
"step": 474
},
{
"epoch": 0.0648663411969547,
"grad_norm": 0.0004901388892903924,
"learning_rate": 0.0001883002395032577,
"loss": 46.0,
"step": 475
},
{
"epoch": 0.06500290191526407,
"grad_norm": 0.0017039328813552856,
"learning_rate": 0.00018825087423112282,
"loss": 46.0,
"step": 476
},
{
"epoch": 0.06513946263357345,
"grad_norm": 0.0009402755531482399,
"learning_rate": 0.00018820141153265754,
"loss": 46.0,
"step": 477
},
{
"epoch": 0.06527602335188283,
"grad_norm": 0.0008238424779847264,
"learning_rate": 0.00018815185146246716,
"loss": 46.0,
"step": 478
},
{
"epoch": 0.06541258407019221,
"grad_norm": 0.0007881290512159467,
"learning_rate": 0.00018810219407526456,
"loss": 46.0,
"step": 479
},
{
"epoch": 0.06554914478850159,
"grad_norm": 0.0010467070387676358,
"learning_rate": 0.00018805243942587,
"loss": 46.0,
"step": 480
},
{
"epoch": 0.06568570550681097,
"grad_norm": 0.0009283073595725,
"learning_rate": 0.0001880025875692111,
"loss": 46.0,
"step": 481
},
{
"epoch": 0.06582226622512034,
"grad_norm": 0.000698375515639782,
"learning_rate": 0.00018795263856032288,
"loss": 46.0,
"step": 482
},
{
"epoch": 0.06595882694342972,
"grad_norm": 0.0015646845567971468,
"learning_rate": 0.00018790259245434748,
"loss": 46.0,
"step": 483
},
{
"epoch": 0.0660953876617391,
"grad_norm": 0.0008225208730436862,
"learning_rate": 0.00018785244930653438,
"loss": 46.0,
"step": 484
},
{
"epoch": 0.06623194838004848,
"grad_norm": 0.0006926036439836025,
"learning_rate": 0.00018780220917224006,
"loss": 46.0,
"step": 485
},
{
"epoch": 0.06636850909835786,
"grad_norm": 0.000991306733340025,
"learning_rate": 0.00018775187210692815,
"loss": 46.0,
"step": 486
},
{
"epoch": 0.06650506981666723,
"grad_norm": 0.0006662954692728817,
"learning_rate": 0.00018770143816616928,
"loss": 46.0,
"step": 487
},
{
"epoch": 0.06664163053497661,
"grad_norm": 0.000875332101713866,
"learning_rate": 0.000187650907405641,
"loss": 46.0,
"step": 488
},
{
"epoch": 0.06677819125328599,
"grad_norm": 0.002434828784316778,
"learning_rate": 0.00018760027988112775,
"loss": 46.0,
"step": 489
},
{
"epoch": 0.06691475197159537,
"grad_norm": 0.002048447262495756,
"learning_rate": 0.0001875495556485208,
"loss": 46.0,
"step": 490
},
{
"epoch": 0.06705131268990475,
"grad_norm": 0.0005600312724709511,
"learning_rate": 0.00018749873476381828,
"loss": 46.0,
"step": 491
},
{
"epoch": 0.06718787340821412,
"grad_norm": 0.00041424104711040854,
"learning_rate": 0.00018744781728312479,
"loss": 46.0,
"step": 492
},
{
"epoch": 0.0673244341265235,
"grad_norm": 0.0009647294646129012,
"learning_rate": 0.0001873968032626518,
"loss": 46.0,
"step": 493
},
{
"epoch": 0.06746099484483288,
"grad_norm": 0.0007067355909384787,
"learning_rate": 0.00018734569275871726,
"loss": 46.0,
"step": 494
},
{
"epoch": 0.06759755556314226,
"grad_norm": 0.000527632946614176,
"learning_rate": 0.00018729448582774559,
"loss": 46.0,
"step": 495
},
{
"epoch": 0.06773411628145164,
"grad_norm": 0.000945943349506706,
"learning_rate": 0.00018724318252626775,
"loss": 46.0,
"step": 496
},
{
"epoch": 0.06787067699976101,
"grad_norm": 0.0016438892344012856,
"learning_rate": 0.00018719178291092106,
"loss": 46.0,
"step": 497
},
{
"epoch": 0.06800723771807039,
"grad_norm": 0.0018535932758823037,
"learning_rate": 0.00018714028703844914,
"loss": 46.0,
"step": 498
},
{
"epoch": 0.06814379843637977,
"grad_norm": 0.0012148134410381317,
"learning_rate": 0.00018708869496570192,
"loss": 46.0,
"step": 499
},
{
"epoch": 0.06828035915468915,
"grad_norm": 0.0008952637435868382,
"learning_rate": 0.00018703700674963547,
"loss": 46.0,
"step": 500
},
{
"epoch": 0.06841691987299853,
"grad_norm": 0.000657785392832011,
"learning_rate": 0.0001869852224473121,
"loss": 46.0,
"step": 501
},
{
"epoch": 0.0685534805913079,
"grad_norm": 0.0008411743910983205,
"learning_rate": 0.00018693334211590006,
"loss": 46.0,
"step": 502
},
{
"epoch": 0.06869004130961728,
"grad_norm": 0.0008810686995275319,
"learning_rate": 0.00018688136581267373,
"loss": 46.0,
"step": 503
},
{
"epoch": 0.06882660202792666,
"grad_norm": 0.0005853201728314161,
"learning_rate": 0.00018682929359501338,
"loss": 46.0,
"step": 504
},
{
"epoch": 0.06896316274623604,
"grad_norm": 0.0004718601703643799,
"learning_rate": 0.00018677712552040522,
"loss": 46.0,
"step": 505
},
{
"epoch": 0.06909972346454542,
"grad_norm": 0.0007315895054489374,
"learning_rate": 0.00018672486164644116,
"loss": 46.0,
"step": 506
},
{
"epoch": 0.0692362841828548,
"grad_norm": 0.0009641025681048632,
"learning_rate": 0.00018667250203081905,
"loss": 46.0,
"step": 507
},
{
"epoch": 0.06937284490116417,
"grad_norm": 0.0008396725752390921,
"learning_rate": 0.00018662004673134232,
"loss": 46.0,
"step": 508
},
{
"epoch": 0.06950940561947355,
"grad_norm": 0.000584542634896934,
"learning_rate": 0.00018656749580592003,
"loss": 46.0,
"step": 509
},
{
"epoch": 0.06964596633778293,
"grad_norm": 0.0005784454988315701,
"learning_rate": 0.00018651484931256685,
"loss": 46.0,
"step": 510
},
{
"epoch": 0.06978252705609231,
"grad_norm": 0.0003768317692447454,
"learning_rate": 0.00018646210730940293,
"loss": 46.0,
"step": 511
},
{
"epoch": 0.06991908777440169,
"grad_norm": 0.0012037859996780753,
"learning_rate": 0.00018640926985465388,
"loss": 46.0,
"step": 512
},
{
"epoch": 0.07005564849271108,
"grad_norm": 0.0007435141014866531,
"learning_rate": 0.00018635633700665063,
"loss": 46.0,
"step": 513
},
{
"epoch": 0.07019220921102046,
"grad_norm": 0.0009005676256492734,
"learning_rate": 0.00018630330882382951,
"loss": 46.0,
"step": 514
},
{
"epoch": 0.07032876992932983,
"grad_norm": 0.0005016371724195778,
"learning_rate": 0.00018625018536473206,
"loss": 46.0,
"step": 515
},
{
"epoch": 0.07046533064763921,
"grad_norm": 0.0012827449245378375,
"learning_rate": 0.00018619696668800492,
"loss": 46.0,
"step": 516
},
{
"epoch": 0.07060189136594859,
"grad_norm": 0.0004301304288674146,
"learning_rate": 0.0001861436528524,
"loss": 46.0,
"step": 517
},
{
"epoch": 0.07073845208425797,
"grad_norm": 0.00036730722058564425,
"learning_rate": 0.00018609024391677418,
"loss": 46.0,
"step": 518
},
{
"epoch": 0.07087501280256735,
"grad_norm": 0.0008029394666664302,
"learning_rate": 0.00018603673994008925,
"loss": 46.0,
"step": 519
},
{
"epoch": 0.07101157352087673,
"grad_norm": 0.0006513711996376514,
"learning_rate": 0.00018598314098141206,
"loss": 46.0,
"step": 520
},
{
"epoch": 0.0711481342391861,
"grad_norm": 0.00077268440509215,
"learning_rate": 0.00018592944709991426,
"loss": 46.0,
"step": 521
},
{
"epoch": 0.07128469495749548,
"grad_norm": 0.0004973417380824685,
"learning_rate": 0.00018587565835487233,
"loss": 46.0,
"step": 522
},
{
"epoch": 0.07142125567580486,
"grad_norm": 0.000682865793351084,
"learning_rate": 0.00018582177480566735,
"loss": 46.0,
"step": 523
},
{
"epoch": 0.07155781639411424,
"grad_norm": 0.0006797179230488837,
"learning_rate": 0.00018576779651178522,
"loss": 46.0,
"step": 524
},
{
"epoch": 0.07169437711242362,
"grad_norm": 0.000731765350792557,
"learning_rate": 0.00018571372353281632,
"loss": 46.0,
"step": 525
},
{
"epoch": 0.071830937830733,
"grad_norm": 0.00029318922315724194,
"learning_rate": 0.00018565955592845563,
"loss": 46.0,
"step": 526
},
{
"epoch": 0.07196749854904237,
"grad_norm": 0.0011708553647622466,
"learning_rate": 0.00018560529375850259,
"loss": 46.0,
"step": 527
},
{
"epoch": 0.07210405926735175,
"grad_norm": 0.00033764285035431385,
"learning_rate": 0.00018555093708286094,
"loss": 46.0,
"step": 528
},
{
"epoch": 0.07224061998566113,
"grad_norm": 0.0011742584174498916,
"learning_rate": 0.00018549648596153891,
"loss": 46.0,
"step": 529
},
{
"epoch": 0.0723771807039705,
"grad_norm": 0.0011071540648117661,
"learning_rate": 0.00018544194045464886,
"loss": 46.0,
"step": 530
},
{
"epoch": 0.07251374142227988,
"grad_norm": 0.0004732667875941843,
"learning_rate": 0.00018538730062240744,
"loss": 46.0,
"step": 531
},
{
"epoch": 0.07265030214058926,
"grad_norm": 0.0006753505440428853,
"learning_rate": 0.00018533256652513536,
"loss": 46.0,
"step": 532
},
{
"epoch": 0.07278686285889864,
"grad_norm": 0.0013869997346773744,
"learning_rate": 0.00018527773822325742,
"loss": 46.0,
"step": 533
},
{
"epoch": 0.07292342357720802,
"grad_norm": 0.0004685988824348897,
"learning_rate": 0.0001852228157773025,
"loss": 46.0,
"step": 534
},
{
"epoch": 0.0730599842955174,
"grad_norm": 0.0005608046194538474,
"learning_rate": 0.00018516779924790324,
"loss": 46.0,
"step": 535
},
{
"epoch": 0.07319654501382677,
"grad_norm": 0.0007237467798404396,
"learning_rate": 0.00018511268869579635,
"loss": 46.0,
"step": 536
},
{
"epoch": 0.07333310573213615,
"grad_norm": 0.0007371974061243236,
"learning_rate": 0.00018505748418182213,
"loss": 46.0,
"step": 537
},
{
"epoch": 0.07346966645044553,
"grad_norm": 0.0021626208908855915,
"learning_rate": 0.0001850021857669248,
"loss": 46.0,
"step": 538
},
{
"epoch": 0.07360622716875491,
"grad_norm": 0.0044371578842401505,
"learning_rate": 0.0001849467935121521,
"loss": 46.0,
"step": 539
},
{
"epoch": 0.07374278788706429,
"grad_norm": 0.001232149894349277,
"learning_rate": 0.00018489130747865548,
"loss": 46.0,
"step": 540
},
{
"epoch": 0.07387934860537367,
"grad_norm": 0.0007343983743339777,
"learning_rate": 0.00018483572772768982,
"loss": 46.0,
"step": 541
},
{
"epoch": 0.07401590932368304,
"grad_norm": 0.00047310179797932506,
"learning_rate": 0.00018478005432061352,
"loss": 46.0,
"step": 542
},
{
"epoch": 0.07415247004199242,
"grad_norm": 0.0008101825369521976,
"learning_rate": 0.00018472428731888837,
"loss": 46.0,
"step": 543
},
{
"epoch": 0.0742890307603018,
"grad_norm": 0.001507743145339191,
"learning_rate": 0.00018466842678407946,
"loss": 46.0,
"step": 544
},
{
"epoch": 0.07442559147861118,
"grad_norm": 0.0009909087093546987,
"learning_rate": 0.00018461247277785513,
"loss": 46.0,
"step": 545
},
{
"epoch": 0.07456215219692056,
"grad_norm": 0.0013652259949594736,
"learning_rate": 0.000184556425361987,
"loss": 46.0,
"step": 546
},
{
"epoch": 0.07469871291522993,
"grad_norm": 0.0008130900096148252,
"learning_rate": 0.0001845002845983497,
"loss": 46.0,
"step": 547
},
{
"epoch": 0.07483527363353931,
"grad_norm": 0.0012740385718643665,
"learning_rate": 0.00018444405054892092,
"loss": 46.0,
"step": 548
},
{
"epoch": 0.07497183435184869,
"grad_norm": 0.000746730831451714,
"learning_rate": 0.0001843877232757815,
"loss": 46.0,
"step": 549
},
{
"epoch": 0.07510839507015807,
"grad_norm": 0.0019112954614683986,
"learning_rate": 0.0001843313028411149,
"loss": 46.0,
"step": 550
},
{
"epoch": 0.07524495578846745,
"grad_norm": 0.0005015329807065427,
"learning_rate": 0.0001842747893072077,
"loss": 46.0,
"step": 551
},
{
"epoch": 0.07538151650677682,
"grad_norm": 0.0004962061066180468,
"learning_rate": 0.00018421818273644912,
"loss": 46.0,
"step": 552
},
{
"epoch": 0.0755180772250862,
"grad_norm": 0.00037836559931747615,
"learning_rate": 0.00018416148319133117,
"loss": 46.0,
"step": 553
},
{
"epoch": 0.07565463794339558,
"grad_norm": 0.0003992395068053156,
"learning_rate": 0.0001841046907344484,
"loss": 46.0,
"step": 554
},
{
"epoch": 0.07579119866170496,
"grad_norm": 0.0004277300031390041,
"learning_rate": 0.000184047805428498,
"loss": 46.0,
"step": 555
},
{
"epoch": 0.07592775938001434,
"grad_norm": 0.0005832565948367119,
"learning_rate": 0.00018399082733627965,
"loss": 46.0,
"step": 556
},
{
"epoch": 0.07606432009832371,
"grad_norm": 0.0006913339020684361,
"learning_rate": 0.00018393375652069548,
"loss": 46.0,
"step": 557
},
{
"epoch": 0.0762008808166331,
"grad_norm": 0.00086840835865587,
"learning_rate": 0.00018387659304474994,
"loss": 46.0,
"step": 558
},
{
"epoch": 0.07633744153494247,
"grad_norm": 0.0007002664497122169,
"learning_rate": 0.00018381933697154986,
"loss": 46.0,
"step": 559
},
{
"epoch": 0.07647400225325185,
"grad_norm": 0.0008870341116562486,
"learning_rate": 0.00018376198836430417,
"loss": 46.0,
"step": 560
},
{
"epoch": 0.07661056297156123,
"grad_norm": 0.0006294609629549086,
"learning_rate": 0.00018370454728632404,
"loss": 46.0,
"step": 561
},
{
"epoch": 0.0767471236898706,
"grad_norm": 0.0004686205938924104,
"learning_rate": 0.00018364701380102266,
"loss": 46.0,
"step": 562
},
{
"epoch": 0.07688368440817998,
"grad_norm": 0.0008394161704927683,
"learning_rate": 0.0001835893879719154,
"loss": 46.0,
"step": 563
},
{
"epoch": 0.07702024512648936,
"grad_norm": 0.0006610259879380465,
"learning_rate": 0.00018353166986261935,
"loss": 46.0,
"step": 564
},
{
"epoch": 0.07715680584479874,
"grad_norm": 0.000717335322406143,
"learning_rate": 0.0001834738595368536,
"loss": 46.0,
"step": 565
},
{
"epoch": 0.07729336656310812,
"grad_norm": 0.0005708184908144176,
"learning_rate": 0.00018341595705843907,
"loss": 46.0,
"step": 566
},
{
"epoch": 0.0774299272814175,
"grad_norm": 0.0009451278601773083,
"learning_rate": 0.00018335796249129832,
"loss": 46.0,
"step": 567
},
{
"epoch": 0.07756648799972687,
"grad_norm": 0.0005919419345445931,
"learning_rate": 0.0001832998758994556,
"loss": 46.0,
"step": 568
},
{
"epoch": 0.07770304871803625,
"grad_norm": 0.0006907092756591737,
"learning_rate": 0.00018324169734703683,
"loss": 46.0,
"step": 569
},
{
"epoch": 0.07783960943634563,
"grad_norm": 0.0008807244012132287,
"learning_rate": 0.00018318342689826938,
"loss": 46.0,
"step": 570
},
{
"epoch": 0.07797617015465501,
"grad_norm": 0.0006545766955241561,
"learning_rate": 0.00018312506461748207,
"loss": 46.0,
"step": 571
},
{
"epoch": 0.07811273087296439,
"grad_norm": 0.00029055686900392175,
"learning_rate": 0.0001830666105691051,
"loss": 46.0,
"step": 572
},
{
"epoch": 0.07824929159127376,
"grad_norm": 0.000587637594435364,
"learning_rate": 0.00018300806481767005,
"loss": 46.0,
"step": 573
},
{
"epoch": 0.07838585230958314,
"grad_norm": 0.0003420762368477881,
"learning_rate": 0.00018294942742780966,
"loss": 46.0,
"step": 574
},
{
"epoch": 0.07852241302789252,
"grad_norm": 0.00046790859778411686,
"learning_rate": 0.00018289069846425782,
"loss": 46.0,
"step": 575
},
{
"epoch": 0.0786589737462019,
"grad_norm": 0.00026647665072232485,
"learning_rate": 0.00018283187799184958,
"loss": 46.0,
"step": 576
},
{
"epoch": 0.07879553446451128,
"grad_norm": 0.0015470724320039153,
"learning_rate": 0.000182772966075521,
"loss": 46.0,
"step": 577
},
{
"epoch": 0.07893209518282066,
"grad_norm": 0.00032806419767439365,
"learning_rate": 0.00018271396278030905,
"loss": 46.0,
"step": 578
},
{
"epoch": 0.07906865590113003,
"grad_norm": 0.0004820665344595909,
"learning_rate": 0.0001826548681713516,
"loss": 46.0,
"step": 579
},
{
"epoch": 0.07920521661943941,
"grad_norm": 0.0006422748556360602,
"learning_rate": 0.00018259568231388738,
"loss": 46.0,
"step": 580
},
{
"epoch": 0.0793417773377488,
"grad_norm": 0.0002866037539206445,
"learning_rate": 0.00018253640527325578,
"loss": 46.0,
"step": 581
},
{
"epoch": 0.07947833805605818,
"grad_norm": 0.0005615049158222973,
"learning_rate": 0.00018247703711489686,
"loss": 46.0,
"step": 582
},
{
"epoch": 0.07961489877436756,
"grad_norm": 0.001207325840368867,
"learning_rate": 0.00018241757790435134,
"loss": 46.0,
"step": 583
},
{
"epoch": 0.07975145949267694,
"grad_norm": 0.0005774226738139987,
"learning_rate": 0.00018235802770726037,
"loss": 46.0,
"step": 584
},
{
"epoch": 0.07988802021098632,
"grad_norm": 0.0005512305651791394,
"learning_rate": 0.00018229838658936564,
"loss": 46.0,
"step": 585
},
{
"epoch": 0.0800245809292957,
"grad_norm": 0.0021268404088914394,
"learning_rate": 0.00018223865461650912,
"loss": 46.0,
"step": 586
},
{
"epoch": 0.08016114164760507,
"grad_norm": 0.0017917539225891232,
"learning_rate": 0.00018217883185463315,
"loss": 46.0,
"step": 587
},
{
"epoch": 0.08029770236591445,
"grad_norm": 0.004704636055976152,
"learning_rate": 0.00018211891836978028,
"loss": 46.0,
"step": 588
},
{
"epoch": 0.08043426308422383,
"grad_norm": 0.0016004083445295691,
"learning_rate": 0.00018205891422809316,
"loss": 46.0,
"step": 589
},
{
"epoch": 0.0805708238025332,
"grad_norm": 0.0008631657692603767,
"learning_rate": 0.0001819988194958146,
"loss": 46.0,
"step": 590
},
{
"epoch": 0.08070738452084258,
"grad_norm": 0.000875883677508682,
"learning_rate": 0.00018193863423928742,
"loss": 46.0,
"step": 591
},
{
"epoch": 0.08084394523915196,
"grad_norm": 0.0007616846705786884,
"learning_rate": 0.0001818783585249543,
"loss": 46.0,
"step": 592
},
{
"epoch": 0.08098050595746134,
"grad_norm": 0.0017507924931123853,
"learning_rate": 0.00018181799241935787,
"loss": 46.0,
"step": 593
},
{
"epoch": 0.08111706667577072,
"grad_norm": 0.0015062005259096622,
"learning_rate": 0.00018175753598914047,
"loss": 46.0,
"step": 594
},
{
"epoch": 0.0812536273940801,
"grad_norm": 0.0009979240130633116,
"learning_rate": 0.0001816969893010442,
"loss": 46.0,
"step": 595
},
{
"epoch": 0.08139018811238947,
"grad_norm": 0.0014799052150920033,
"learning_rate": 0.00018163635242191085,
"loss": 46.0,
"step": 596
},
{
"epoch": 0.08152674883069885,
"grad_norm": 0.002407307270914316,
"learning_rate": 0.00018157562541868164,
"loss": 46.0,
"step": 597
},
{
"epoch": 0.08166330954900823,
"grad_norm": 0.001063771778717637,
"learning_rate": 0.00018151480835839741,
"loss": 46.0,
"step": 598
},
{
"epoch": 0.08179987026731761,
"grad_norm": 0.00132046639919281,
"learning_rate": 0.0001814539013081984,
"loss": 46.0,
"step": 599
},
{
"epoch": 0.08193643098562699,
"grad_norm": 0.001031374093145132,
"learning_rate": 0.00018139290433532416,
"loss": 46.0,
"step": 600
},
{
"epoch": 0.08193643098562699,
"eval_loss": 11.5,
"eval_runtime": 20.7447,
"eval_samples_per_second": 148.665,
"eval_steps_per_second": 74.332,
"step": 600
},
{
"epoch": 0.08207299170393637,
"grad_norm": 0.0006209348211996257,
"learning_rate": 0.00018133181750711348,
"loss": 46.0,
"step": 601
},
{
"epoch": 0.08220955242224574,
"grad_norm": 0.0009648207924328744,
"learning_rate": 0.00018127064089100447,
"loss": 46.0,
"step": 602
},
{
"epoch": 0.08234611314055512,
"grad_norm": 0.0010155495256185532,
"learning_rate": 0.00018120937455453424,
"loss": 46.0,
"step": 603
},
{
"epoch": 0.0824826738588645,
"grad_norm": 0.0008265697979368269,
"learning_rate": 0.000181148018565339,
"loss": 46.0,
"step": 604
},
{
"epoch": 0.08261923457717388,
"grad_norm": 0.0006557560409419239,
"learning_rate": 0.00018108657299115396,
"loss": 46.0,
"step": 605
},
{
"epoch": 0.08275579529548326,
"grad_norm": 0.0003027521597687155,
"learning_rate": 0.0001810250378998132,
"loss": 46.0,
"step": 606
},
{
"epoch": 0.08289235601379263,
"grad_norm": 0.00038604583824053407,
"learning_rate": 0.00018096341335924955,
"loss": 46.0,
"step": 607
},
{
"epoch": 0.08302891673210201,
"grad_norm": 0.0011505828006193042,
"learning_rate": 0.00018090169943749476,
"loss": 46.0,
"step": 608
},
{
"epoch": 0.08316547745041139,
"grad_norm": 0.00021457420371007174,
"learning_rate": 0.00018083989620267907,
"loss": 46.0,
"step": 609
},
{
"epoch": 0.08330203816872077,
"grad_norm": 0.000474292115541175,
"learning_rate": 0.0001807780037230315,
"loss": 46.0,
"step": 610
},
{
"epoch": 0.08343859888703015,
"grad_norm": 0.00048646898358128965,
"learning_rate": 0.00018071602206687942,
"loss": 46.0,
"step": 611
},
{
"epoch": 0.08357515960533952,
"grad_norm": 0.0005354878958314657,
"learning_rate": 0.00018065395130264876,
"loss": 46.0,
"step": 612
},
{
"epoch": 0.0837117203236489,
"grad_norm": 0.00048499630065634847,
"learning_rate": 0.00018059179149886378,
"loss": 46.0,
"step": 613
},
{
"epoch": 0.08384828104195828,
"grad_norm": 0.0008654086268506944,
"learning_rate": 0.00018052954272414706,
"loss": 46.0,
"step": 614
},
{
"epoch": 0.08398484176026766,
"grad_norm": 0.0005253584822639823,
"learning_rate": 0.0001804672050472194,
"loss": 46.0,
"step": 615
},
{
"epoch": 0.08412140247857704,
"grad_norm": 0.0017661650199443102,
"learning_rate": 0.0001804047785368997,
"loss": 46.0,
"step": 616
},
{
"epoch": 0.08425796319688642,
"grad_norm": 0.0003864025929942727,
"learning_rate": 0.00018034226326210496,
"loss": 46.0,
"step": 617
},
{
"epoch": 0.0843945239151958,
"grad_norm": 0.000856466474942863,
"learning_rate": 0.00018027965929185024,
"loss": 46.0,
"step": 618
},
{
"epoch": 0.08453108463350517,
"grad_norm": 0.0004072580486536026,
"learning_rate": 0.00018021696669524842,
"loss": 46.0,
"step": 619
},
{
"epoch": 0.08466764535181455,
"grad_norm": 0.0010099663631990552,
"learning_rate": 0.0001801541855415102,
"loss": 46.0,
"step": 620
},
{
"epoch": 0.08480420607012393,
"grad_norm": 0.001016309019178152,
"learning_rate": 0.00018009131589994418,
"loss": 46.0,
"step": 621
},
{
"epoch": 0.0849407667884333,
"grad_norm": 0.001154162921011448,
"learning_rate": 0.00018002835783995652,
"loss": 46.0,
"step": 622
},
{
"epoch": 0.08507732750674268,
"grad_norm": 0.0006390147027559578,
"learning_rate": 0.00017996531143105106,
"loss": 46.0,
"step": 623
},
{
"epoch": 0.08521388822505206,
"grad_norm": 0.0012196438619866967,
"learning_rate": 0.00017990217674282915,
"loss": 46.0,
"step": 624
},
{
"epoch": 0.08535044894336144,
"grad_norm": 0.0003661528753582388,
"learning_rate": 0.0001798389538449896,
"loss": 46.0,
"step": 625
},
{
"epoch": 0.08548700966167082,
"grad_norm": 0.0006461184821091592,
"learning_rate": 0.0001797756428073286,
"loss": 46.0,
"step": 626
},
{
"epoch": 0.0856235703799802,
"grad_norm": 0.0010678149992600083,
"learning_rate": 0.00017971224369973964,
"loss": 46.0,
"step": 627
},
{
"epoch": 0.08576013109828957,
"grad_norm": 0.0006297577056102455,
"learning_rate": 0.00017964875659221344,
"loss": 46.0,
"step": 628
},
{
"epoch": 0.08589669181659895,
"grad_norm": 0.000396767514757812,
"learning_rate": 0.00017958518155483788,
"loss": 46.0,
"step": 629
},
{
"epoch": 0.08603325253490833,
"grad_norm": 0.0011105970479547977,
"learning_rate": 0.00017952151865779792,
"loss": 46.0,
"step": 630
},
{
"epoch": 0.08616981325321771,
"grad_norm": 0.0012717852368950844,
"learning_rate": 0.00017945776797137543,
"loss": 46.0,
"step": 631
},
{
"epoch": 0.08630637397152709,
"grad_norm": 0.0007417348097078502,
"learning_rate": 0.00017939392956594933,
"loss": 46.0,
"step": 632
},
{
"epoch": 0.08644293468983646,
"grad_norm": 0.00036522196023724973,
"learning_rate": 0.00017933000351199533,
"loss": 46.0,
"step": 633
},
{
"epoch": 0.08657949540814584,
"grad_norm": 0.0013521965593099594,
"learning_rate": 0.00017926598988008582,
"loss": 46.0,
"step": 634
},
{
"epoch": 0.08671605612645522,
"grad_norm": 0.0007052735309116542,
"learning_rate": 0.00017920188874089,
"loss": 46.0,
"step": 635
},
{
"epoch": 0.0868526168447646,
"grad_norm": 0.0010762631427496672,
"learning_rate": 0.00017913770016517354,
"loss": 46.0,
"step": 636
},
{
"epoch": 0.08698917756307398,
"grad_norm": 0.002226311946287751,
"learning_rate": 0.0001790734242237988,
"loss": 46.0,
"step": 637
},
{
"epoch": 0.08712573828138336,
"grad_norm": 0.004666191525757313,
"learning_rate": 0.00017900906098772444,
"loss": 46.0,
"step": 638
},
{
"epoch": 0.08726229899969273,
"grad_norm": 0.0010003356728702784,
"learning_rate": 0.00017894461052800557,
"loss": 46.0,
"step": 639
},
{
"epoch": 0.08739885971800211,
"grad_norm": 0.0011451984755694866,
"learning_rate": 0.00017888007291579357,
"loss": 46.0,
"step": 640
},
{
"epoch": 0.08753542043631149,
"grad_norm": 0.0011838976060971618,
"learning_rate": 0.000178815448222336,
"loss": 46.0,
"step": 641
},
{
"epoch": 0.08767198115462087,
"grad_norm": 0.0011408502468839288,
"learning_rate": 0.0001787507365189767,
"loss": 46.0,
"step": 642
},
{
"epoch": 0.08780854187293025,
"grad_norm": 0.0007302387966774404,
"learning_rate": 0.00017868593787715535,
"loss": 46.0,
"step": 643
},
{
"epoch": 0.08794510259123962,
"grad_norm": 0.0010645872680470347,
"learning_rate": 0.00017862105236840777,
"loss": 46.0,
"step": 644
},
{
"epoch": 0.088081663309549,
"grad_norm": 0.0014955222140997648,
"learning_rate": 0.00017855608006436558,
"loss": 46.0,
"step": 645
},
{
"epoch": 0.08821822402785838,
"grad_norm": 0.0011010526213794947,
"learning_rate": 0.0001784910210367563,
"loss": 46.0,
"step": 646
},
{
"epoch": 0.08835478474616776,
"grad_norm": 0.0005369166028685868,
"learning_rate": 0.00017842587535740314,
"loss": 46.0,
"step": 647
},
{
"epoch": 0.08849134546447714,
"grad_norm": 0.0007034659502096474,
"learning_rate": 0.00017836064309822503,
"loss": 46.0,
"step": 648
},
{
"epoch": 0.08862790618278653,
"grad_norm": 0.0008996776887215674,
"learning_rate": 0.00017829532433123635,
"loss": 46.0,
"step": 649
},
{
"epoch": 0.0887644669010959,
"grad_norm": 0.001349107245914638,
"learning_rate": 0.00017822991912854713,
"loss": 46.0,
"step": 650
},
{
"epoch": 0.08890102761940528,
"grad_norm": 0.00062900735065341,
"learning_rate": 0.00017816442756236276,
"loss": 46.0,
"step": 651
},
{
"epoch": 0.08903758833771466,
"grad_norm": 0.000961163139436394,
"learning_rate": 0.00017809884970498396,
"loss": 46.0,
"step": 652
},
{
"epoch": 0.08917414905602404,
"grad_norm": 0.0008951184572651982,
"learning_rate": 0.00017803318562880673,
"loss": 46.0,
"step": 653
},
{
"epoch": 0.08931070977433342,
"grad_norm": 0.000987289589829743,
"learning_rate": 0.00017796743540632223,
"loss": 46.0,
"step": 654
},
{
"epoch": 0.0894472704926428,
"grad_norm": 0.0005511721828952432,
"learning_rate": 0.0001779015991101168,
"loss": 46.0,
"step": 655
},
{
"epoch": 0.08958383121095218,
"grad_norm": 0.0008138107368722558,
"learning_rate": 0.00017783567681287166,
"loss": 46.0,
"step": 656
},
{
"epoch": 0.08972039192926155,
"grad_norm": 0.0005222151521593332,
"learning_rate": 0.00017776966858736314,
"loss": 46.0,
"step": 657
},
{
"epoch": 0.08985695264757093,
"grad_norm": 0.0005912959459237754,
"learning_rate": 0.00017770357450646232,
"loss": 46.0,
"step": 658
},
{
"epoch": 0.08999351336588031,
"grad_norm": 0.00042330275755375624,
"learning_rate": 0.00017763739464313506,
"loss": 46.0,
"step": 659
},
{
"epoch": 0.09013007408418969,
"grad_norm": 0.0006147885578684509,
"learning_rate": 0.000177571129070442,
"loss": 46.0,
"step": 660
},
{
"epoch": 0.09026663480249907,
"grad_norm": 0.0013564558466896415,
"learning_rate": 0.00017750477786153832,
"loss": 46.0,
"step": 661
},
{
"epoch": 0.09040319552080844,
"grad_norm": 0.0008140000863932073,
"learning_rate": 0.0001774383410896738,
"loss": 46.0,
"step": 662
},
{
"epoch": 0.09053975623911782,
"grad_norm": 0.0005956863751634955,
"learning_rate": 0.00017737181882819262,
"loss": 46.0,
"step": 663
},
{
"epoch": 0.0906763169574272,
"grad_norm": 0.0005575847462750971,
"learning_rate": 0.0001773052111505334,
"loss": 46.0,
"step": 664
},
{
"epoch": 0.09081287767573658,
"grad_norm": 0.00038412457797676325,
"learning_rate": 0.00017723851813022901,
"loss": 46.0,
"step": 665
},
{
"epoch": 0.09094943839404596,
"grad_norm": 0.0009578867466188967,
"learning_rate": 0.00017717173984090658,
"loss": 46.0,
"step": 666
},
{
"epoch": 0.09108599911235533,
"grad_norm": 0.0004775691486429423,
"learning_rate": 0.00017710487635628735,
"loss": 46.0,
"step": 667
},
{
"epoch": 0.09122255983066471,
"grad_norm": 0.0006319622043520212,
"learning_rate": 0.00017703792775018655,
"loss": 46.0,
"step": 668
},
{
"epoch": 0.09135912054897409,
"grad_norm": 0.0006505327764898539,
"learning_rate": 0.0001769708940965135,
"loss": 46.0,
"step": 669
},
{
"epoch": 0.09149568126728347,
"grad_norm": 0.00035890881554223597,
"learning_rate": 0.00017690377546927133,
"loss": 46.0,
"step": 670
},
{
"epoch": 0.09163224198559285,
"grad_norm": 0.0005791863077320158,
"learning_rate": 0.000176836571942557,
"loss": 46.0,
"step": 671
},
{
"epoch": 0.09176880270390222,
"grad_norm": 0.0005439603119157255,
"learning_rate": 0.00017676928359056123,
"loss": 46.0,
"step": 672
},
{
"epoch": 0.0919053634222116,
"grad_norm": 0.0006497172289527953,
"learning_rate": 0.0001767019104875683,
"loss": 46.0,
"step": 673
},
{
"epoch": 0.09204192414052098,
"grad_norm": 0.0004997221985831857,
"learning_rate": 0.00017663445270795611,
"loss": 46.0,
"step": 674
},
{
"epoch": 0.09217848485883036,
"grad_norm": 0.0010348627110943198,
"learning_rate": 0.0001765669103261961,
"loss": 46.0,
"step": 675
},
{
"epoch": 0.09231504557713974,
"grad_norm": 0.0006982755148783326,
"learning_rate": 0.00017649928341685297,
"loss": 46.0,
"step": 676
},
{
"epoch": 0.09245160629544912,
"grad_norm": 0.0008973072981461883,
"learning_rate": 0.00017643157205458483,
"loss": 46.0,
"step": 677
},
{
"epoch": 0.0925881670137585,
"grad_norm": 0.0009211792494170368,
"learning_rate": 0.00017636377631414303,
"loss": 46.0,
"step": 678
},
{
"epoch": 0.09272472773206787,
"grad_norm": 0.0013491098070517182,
"learning_rate": 0.00017629589627037203,
"loss": 46.0,
"step": 679
},
{
"epoch": 0.09286128845037725,
"grad_norm": 0.0011835723416879773,
"learning_rate": 0.00017622793199820934,
"loss": 46.0,
"step": 680
},
{
"epoch": 0.09299784916868663,
"grad_norm": 0.0003937036672141403,
"learning_rate": 0.0001761598835726855,
"loss": 46.0,
"step": 681
},
{
"epoch": 0.093134409886996,
"grad_norm": 0.00029390319832600653,
"learning_rate": 0.00017609175106892395,
"loss": 46.0,
"step": 682
},
{
"epoch": 0.09327097060530538,
"grad_norm": 0.0005982140428386629,
"learning_rate": 0.00017602353456214095,
"loss": 46.0,
"step": 683
},
{
"epoch": 0.09340753132361476,
"grad_norm": 0.0007088780985213816,
"learning_rate": 0.00017595523412764549,
"loss": 46.0,
"step": 684
},
{
"epoch": 0.09354409204192414,
"grad_norm": 0.000541395100299269,
"learning_rate": 0.0001758868498408392,
"loss": 46.0,
"step": 685
},
{
"epoch": 0.09368065276023352,
"grad_norm": 0.0010750001529231668,
"learning_rate": 0.0001758183817772163,
"loss": 46.0,
"step": 686
},
{
"epoch": 0.0938172134785429,
"grad_norm": 0.0009917879942804575,
"learning_rate": 0.00017574983001236345,
"loss": 46.0,
"step": 687
},
{
"epoch": 0.09395377419685227,
"grad_norm": 0.0007594460621476173,
"learning_rate": 0.00017568119462195978,
"loss": 46.0,
"step": 688
},
{
"epoch": 0.09409033491516165,
"grad_norm": 0.0013289398048073053,
"learning_rate": 0.00017561247568177672,
"loss": 46.0,
"step": 689
},
{
"epoch": 0.09422689563347103,
"grad_norm": 0.002443774603307247,
"learning_rate": 0.00017554367326767792,
"loss": 46.0,
"step": 690
},
{
"epoch": 0.09436345635178041,
"grad_norm": 0.000834045116789639,
"learning_rate": 0.0001754747874556192,
"loss": 46.0,
"step": 691
},
{
"epoch": 0.09450001707008979,
"grad_norm": 0.0008974446100182831,
"learning_rate": 0.0001754058183216484,
"loss": 46.0,
"step": 692
},
{
"epoch": 0.09463657778839916,
"grad_norm": 0.0006684563704766333,
"learning_rate": 0.00017533676594190544,
"loss": 46.0,
"step": 693
},
{
"epoch": 0.09477313850670854,
"grad_norm": 0.0005444415146484971,
"learning_rate": 0.00017526763039262206,
"loss": 46.0,
"step": 694
},
{
"epoch": 0.09490969922501792,
"grad_norm": 0.00041295934352092445,
"learning_rate": 0.00017519841175012184,
"loss": 46.0,
"step": 695
},
{
"epoch": 0.0950462599433273,
"grad_norm": 0.0014025474665686488,
"learning_rate": 0.0001751291100908201,
"loss": 46.0,
"step": 696
},
{
"epoch": 0.09518282066163668,
"grad_norm": 0.0016710846684873104,
"learning_rate": 0.0001750597254912238,
"loss": 46.0,
"step": 697
},
{
"epoch": 0.09531938137994606,
"grad_norm": 0.002275792183354497,
"learning_rate": 0.00017499025802793146,
"loss": 46.0,
"step": 698
},
{
"epoch": 0.09545594209825543,
"grad_norm": 0.001069681253284216,
"learning_rate": 0.0001749207077776331,
"loss": 46.0,
"step": 699
},
{
"epoch": 0.09559250281656481,
"grad_norm": 0.0005686444346792996,
"learning_rate": 0.00017485107481711012,
"loss": 46.0,
"step": 700
},
{
"epoch": 0.09572906353487419,
"grad_norm": 0.0004446564707905054,
"learning_rate": 0.00017478135922323522,
"loss": 46.0,
"step": 701
},
{
"epoch": 0.09586562425318357,
"grad_norm": 0.0005210356903262436,
"learning_rate": 0.00017471156107297232,
"loss": 46.0,
"step": 702
},
{
"epoch": 0.09600218497149295,
"grad_norm": 0.0006958620506338775,
"learning_rate": 0.00017464168044337654,
"loss": 46.0,
"step": 703
},
{
"epoch": 0.09613874568980232,
"grad_norm": 0.000465748249553144,
"learning_rate": 0.00017457171741159394,
"loss": 46.0,
"step": 704
},
{
"epoch": 0.0962753064081117,
"grad_norm": 0.0003542457125149667,
"learning_rate": 0.0001745016720548617,
"loss": 46.0,
"step": 705
},
{
"epoch": 0.09641186712642108,
"grad_norm": 0.0006130662513896823,
"learning_rate": 0.00017443154445050775,
"loss": 46.0,
"step": 706
},
{
"epoch": 0.09654842784473046,
"grad_norm": 0.0005357970949262381,
"learning_rate": 0.00017436133467595087,
"loss": 46.0,
"step": 707
},
{
"epoch": 0.09668498856303984,
"grad_norm": 0.0003605932288337499,
"learning_rate": 0.00017429104280870057,
"loss": 46.0,
"step": 708
},
{
"epoch": 0.09682154928134921,
"grad_norm": 0.0004822358023375273,
"learning_rate": 0.00017422066892635696,
"loss": 46.0,
"step": 709
},
{
"epoch": 0.09695810999965859,
"grad_norm": 0.000799324014224112,
"learning_rate": 0.0001741502131066107,
"loss": 46.0,
"step": 710
},
{
"epoch": 0.09709467071796797,
"grad_norm": 0.0005530801718123257,
"learning_rate": 0.00017407967542724297,
"loss": 46.0,
"step": 711
},
{
"epoch": 0.09723123143627735,
"grad_norm": 0.0008446394931524992,
"learning_rate": 0.0001740090559661252,
"loss": 46.0,
"step": 712
},
{
"epoch": 0.09736779215458673,
"grad_norm": 0.0004706636827904731,
"learning_rate": 0.0001739383548012192,
"loss": 46.0,
"step": 713
},
{
"epoch": 0.0975043528728961,
"grad_norm": 0.0003858576819766313,
"learning_rate": 0.00017386757201057692,
"loss": 46.0,
"step": 714
},
{
"epoch": 0.09764091359120548,
"grad_norm": 0.0006433392409235239,
"learning_rate": 0.00017379670767234045,
"loss": 46.0,
"step": 715
},
{
"epoch": 0.09777747430951486,
"grad_norm": 0.00046265096170827746,
"learning_rate": 0.0001737257618647419,
"loss": 46.0,
"step": 716
},
{
"epoch": 0.09791403502782425,
"grad_norm": 0.0004183817654848099,
"learning_rate": 0.00017365473466610337,
"loss": 46.0,
"step": 717
},
{
"epoch": 0.09805059574613363,
"grad_norm": 0.0005469456664286554,
"learning_rate": 0.0001735836261548367,
"loss": 46.0,
"step": 718
},
{
"epoch": 0.09818715646444301,
"grad_norm": 0.000383463193429634,
"learning_rate": 0.00017351243640944362,
"loss": 46.0,
"step": 719
},
{
"epoch": 0.09832371718275239,
"grad_norm": 0.0005470316973514855,
"learning_rate": 0.00017344116550851543,
"loss": 46.0,
"step": 720
},
{
"epoch": 0.09846027790106177,
"grad_norm": 0.00037759976112283766,
"learning_rate": 0.00017336981353073314,
"loss": 46.0,
"step": 721
},
{
"epoch": 0.09859683861937114,
"grad_norm": 0.0006073216791264713,
"learning_rate": 0.00017329838055486717,
"loss": 46.0,
"step": 722
},
{
"epoch": 0.09873339933768052,
"grad_norm": 0.0004972846945747733,
"learning_rate": 0.00017322686665977737,
"loss": 46.0,
"step": 723
},
{
"epoch": 0.0988699600559899,
"grad_norm": 0.0004591084725689143,
"learning_rate": 0.00017315527192441298,
"loss": 46.0,
"step": 724
},
{
"epoch": 0.09900652077429928,
"grad_norm": 0.0008169691427610815,
"learning_rate": 0.00017308359642781242,
"loss": 46.0,
"step": 725
},
{
"epoch": 0.09914308149260866,
"grad_norm": 0.0005359476199373603,
"learning_rate": 0.00017301184024910333,
"loss": 46.0,
"step": 726
},
{
"epoch": 0.09927964221091803,
"grad_norm": 0.0005213018739596009,
"learning_rate": 0.0001729400034675024,
"loss": 46.0,
"step": 727
},
{
"epoch": 0.09941620292922741,
"grad_norm": 0.0006647670525126159,
"learning_rate": 0.00017286808616231522,
"loss": 46.0,
"step": 728
},
{
"epoch": 0.09955276364753679,
"grad_norm": 0.0007033472065813839,
"learning_rate": 0.00017279608841293639,
"loss": 46.0,
"step": 729
},
{
"epoch": 0.09968932436584617,
"grad_norm": 0.0009473874815739691,
"learning_rate": 0.00017272401029884933,
"loss": 46.0,
"step": 730
},
{
"epoch": 0.09982588508415555,
"grad_norm": 0.0005871194880455732,
"learning_rate": 0.00017265185189962608,
"loss": 46.0,
"step": 731
},
{
"epoch": 0.09996244580246492,
"grad_norm": 0.00048681360203772783,
"learning_rate": 0.00017257961329492728,
"loss": 46.0,
"step": 732
},
{
"epoch": 0.1000990065207743,
"grad_norm": 0.0005745171220041811,
"learning_rate": 0.00017250729456450234,
"loss": 46.0,
"step": 733
},
{
"epoch": 0.10023556723908368,
"grad_norm": 0.00050855748122558,
"learning_rate": 0.0001724348957881889,
"loss": 46.0,
"step": 734
},
{
"epoch": 0.10037212795739306,
"grad_norm": 0.00042842066613957286,
"learning_rate": 0.00017236241704591304,
"loss": 46.0,
"step": 735
},
{
"epoch": 0.10050868867570244,
"grad_norm": 0.0010201798286288977,
"learning_rate": 0.00017228985841768915,
"loss": 46.0,
"step": 736
},
{
"epoch": 0.10064524939401182,
"grad_norm": 0.0005850894376635551,
"learning_rate": 0.00017221721998361976,
"loss": 46.0,
"step": 737
},
{
"epoch": 0.1007818101123212,
"grad_norm": 0.0009064356563612819,
"learning_rate": 0.00017214450182389559,
"loss": 46.0,
"step": 738
},
{
"epoch": 0.10091837083063057,
"grad_norm": 0.0015590882394462824,
"learning_rate": 0.00017207170401879526,
"loss": 46.0,
"step": 739
},
{
"epoch": 0.10105493154893995,
"grad_norm": 0.0011711895931512117,
"learning_rate": 0.00017199882664868538,
"loss": 46.0,
"step": 740
},
{
"epoch": 0.10119149226724933,
"grad_norm": 0.0029380624182522297,
"learning_rate": 0.00017192586979402044,
"loss": 46.0,
"step": 741
},
{
"epoch": 0.1013280529855587,
"grad_norm": 0.0007488722330890596,
"learning_rate": 0.0001718528335353426,
"loss": 46.0,
"step": 742
},
{
"epoch": 0.10146461370386808,
"grad_norm": 0.0006716151256114244,
"learning_rate": 0.00017177971795328167,
"loss": 46.0,
"step": 743
},
{
"epoch": 0.10160117442217746,
"grad_norm": 0.0008320304332301021,
"learning_rate": 0.00017170652312855514,
"loss": 46.0,
"step": 744
},
{
"epoch": 0.10173773514048684,
"grad_norm": 0.002169104292988777,
"learning_rate": 0.00017163324914196783,
"loss": 46.0,
"step": 745
},
{
"epoch": 0.10187429585879622,
"grad_norm": 0.0008557327091693878,
"learning_rate": 0.00017155989607441213,
"loss": 46.0,
"step": 746
},
{
"epoch": 0.1020108565771056,
"grad_norm": 0.0009351296466775239,
"learning_rate": 0.00017148646400686753,
"loss": 46.0,
"step": 747
},
{
"epoch": 0.10214741729541497,
"grad_norm": 0.0019438541494309902,
"learning_rate": 0.00017141295302040095,
"loss": 46.0,
"step": 748
},
{
"epoch": 0.10228397801372435,
"grad_norm": 0.0014289816608652472,
"learning_rate": 0.00017133936319616626,
"loss": 46.0,
"step": 749
},
{
"epoch": 0.10242053873203373,
"grad_norm": 0.003017352195456624,
"learning_rate": 0.00017126569461540443,
"loss": 46.0,
"step": 750
},
{
"epoch": 0.10255709945034311,
"grad_norm": 0.0006331245531328022,
"learning_rate": 0.00017119194735944337,
"loss": 46.0,
"step": 751
},
{
"epoch": 0.10269366016865249,
"grad_norm": 0.0009659952484071255,
"learning_rate": 0.0001711181215096979,
"loss": 46.0,
"step": 752
},
{
"epoch": 0.10283022088696187,
"grad_norm": 0.0003564142098184675,
"learning_rate": 0.00017104421714766947,
"loss": 46.0,
"step": 753
},
{
"epoch": 0.10296678160527124,
"grad_norm": 0.0004242552968207747,
"learning_rate": 0.00017097023435494636,
"loss": 46.0,
"step": 754
},
{
"epoch": 0.10310334232358062,
"grad_norm": 0.00037972754216752946,
"learning_rate": 0.00017089617321320335,
"loss": 46.0,
"step": 755
},
{
"epoch": 0.10323990304189,
"grad_norm": 0.00022272802016232163,
"learning_rate": 0.0001708220338042017,
"loss": 46.0,
"step": 756
},
{
"epoch": 0.10337646376019938,
"grad_norm": 0.0004986607236787677,
"learning_rate": 0.0001707478162097891,
"loss": 46.0,
"step": 757
},
{
"epoch": 0.10351302447850876,
"grad_norm": 0.0006160014308989048,
"learning_rate": 0.00017067352051189965,
"loss": 46.0,
"step": 758
},
{
"epoch": 0.10364958519681813,
"grad_norm": 0.0006992130074650049,
"learning_rate": 0.0001705991467925535,
"loss": 46.0,
"step": 759
},
{
"epoch": 0.10378614591512751,
"grad_norm": 0.0003626852994784713,
"learning_rate": 0.000170524695133857,
"loss": 46.0,
"step": 760
},
{
"epoch": 0.10392270663343689,
"grad_norm": 0.0008579789428040385,
"learning_rate": 0.0001704501656180026,
"loss": 46.0,
"step": 761
},
{
"epoch": 0.10405926735174627,
"grad_norm": 0.0009895421098917723,
"learning_rate": 0.00017037555832726865,
"loss": 46.0,
"step": 762
},
{
"epoch": 0.10419582807005565,
"grad_norm": 0.0005099592381156981,
"learning_rate": 0.00017030087334401936,
"loss": 46.0,
"step": 763
},
{
"epoch": 0.10433238878836502,
"grad_norm": 0.0006343593122437596,
"learning_rate": 0.00017022611075070474,
"loss": 46.0,
"step": 764
},
{
"epoch": 0.1044689495066744,
"grad_norm": 0.0002918439276982099,
"learning_rate": 0.00017015127062986043,
"loss": 46.0,
"step": 765
},
{
"epoch": 0.10460551022498378,
"grad_norm": 0.00021448293409775943,
"learning_rate": 0.00017007635306410775,
"loss": 46.0,
"step": 766
},
{
"epoch": 0.10474207094329316,
"grad_norm": 0.0003549446410033852,
"learning_rate": 0.00017000135813615338,
"loss": 46.0,
"step": 767
},
{
"epoch": 0.10487863166160254,
"grad_norm": 0.0008276899461634457,
"learning_rate": 0.00016992628592878956,
"loss": 46.0,
"step": 768
},
{
"epoch": 0.10501519237991191,
"grad_norm": 0.0006797234527766705,
"learning_rate": 0.00016985113652489374,
"loss": 46.0,
"step": 769
},
{
"epoch": 0.10515175309822129,
"grad_norm": 0.0012585432268679142,
"learning_rate": 0.00016977591000742854,
"loss": 46.0,
"step": 770
},
{
"epoch": 0.10528831381653067,
"grad_norm": 0.0007930306601338089,
"learning_rate": 0.0001697006064594419,
"loss": 46.0,
"step": 771
},
{
"epoch": 0.10542487453484005,
"grad_norm": 0.0008263712516054511,
"learning_rate": 0.00016962522596406662,
"loss": 46.0,
"step": 772
},
{
"epoch": 0.10556143525314943,
"grad_norm": 0.0004895622842013836,
"learning_rate": 0.00016954976860452054,
"loss": 46.0,
"step": 773
},
{
"epoch": 0.1056979959714588,
"grad_norm": 0.000630986993201077,
"learning_rate": 0.00016947423446410636,
"loss": 46.0,
"step": 774
},
{
"epoch": 0.10583455668976818,
"grad_norm": 0.0006404675077646971,
"learning_rate": 0.00016939862362621146,
"loss": 46.0,
"step": 775
},
{
"epoch": 0.10597111740807756,
"grad_norm": 0.0023967279121279716,
"learning_rate": 0.00016932293617430796,
"loss": 46.0,
"step": 776
},
{
"epoch": 0.10610767812638694,
"grad_norm": 0.0005658793961629272,
"learning_rate": 0.0001692471721919526,
"loss": 46.0,
"step": 777
},
{
"epoch": 0.10624423884469632,
"grad_norm": 0.0009352597990073264,
"learning_rate": 0.00016917133176278648,
"loss": 46.0,
"step": 778
},
{
"epoch": 0.1063807995630057,
"grad_norm": 0.0011438351357355714,
"learning_rate": 0.00016909541497053522,
"loss": 46.0,
"step": 779
},
{
"epoch": 0.10651736028131507,
"grad_norm": 0.0010559734655544162,
"learning_rate": 0.00016901942189900867,
"loss": 46.0,
"step": 780
},
{
"epoch": 0.10665392099962445,
"grad_norm": 0.0005015085334889591,
"learning_rate": 0.0001689433526321009,
"loss": 46.0,
"step": 781
},
{
"epoch": 0.10679048171793383,
"grad_norm": 0.0003511472314130515,
"learning_rate": 0.0001688672072537902,
"loss": 46.0,
"step": 782
},
{
"epoch": 0.10692704243624321,
"grad_norm": 0.000647745851892978,
"learning_rate": 0.00016879098584813865,
"loss": 46.0,
"step": 783
},
{
"epoch": 0.10706360315455259,
"grad_norm": 0.0014171921648085117,
"learning_rate": 0.00016871468849929253,
"loss": 46.0,
"step": 784
},
{
"epoch": 0.10720016387286196,
"grad_norm": 0.0005998522392474115,
"learning_rate": 0.00016863831529148176,
"loss": 46.0,
"step": 785
},
{
"epoch": 0.10733672459117136,
"grad_norm": 0.0008481833501718938,
"learning_rate": 0.00016856186630902014,
"loss": 46.0,
"step": 786
},
{
"epoch": 0.10747328530948073,
"grad_norm": 0.001625437056645751,
"learning_rate": 0.00016848534163630498,
"loss": 46.0,
"step": 787
},
{
"epoch": 0.10760984602779011,
"grad_norm": 0.004598119296133518,
"learning_rate": 0.0001684087413578173,
"loss": 46.0,
"step": 788
},
{
"epoch": 0.10774640674609949,
"grad_norm": 0.0032096696086227894,
"learning_rate": 0.00016833206555812153,
"loss": 46.0,
"step": 789
},
{
"epoch": 0.10788296746440887,
"grad_norm": 0.00042951159412041306,
"learning_rate": 0.00016825531432186543,
"loss": 46.0,
"step": 790
},
{
"epoch": 0.10801952818271825,
"grad_norm": 0.0017844205722212791,
"learning_rate": 0.00016817848773378007,
"loss": 46.0,
"step": 791
},
{
"epoch": 0.10815608890102762,
"grad_norm": 0.0006350985495373607,
"learning_rate": 0.00016810158587867973,
"loss": 46.0,
"step": 792
},
{
"epoch": 0.108292649619337,
"grad_norm": 0.0009360946132801473,
"learning_rate": 0.00016802460884146175,
"loss": 46.0,
"step": 793
},
{
"epoch": 0.10842921033764638,
"grad_norm": 0.00030124749173410237,
"learning_rate": 0.0001679475567071065,
"loss": 46.0,
"step": 794
},
{
"epoch": 0.10856577105595576,
"grad_norm": 0.00164910894818604,
"learning_rate": 0.0001678704295606772,
"loss": 46.0,
"step": 795
},
{
"epoch": 0.10870233177426514,
"grad_norm": 0.0012204207014292479,
"learning_rate": 0.00016779322748731995,
"loss": 46.0,
"step": 796
},
{
"epoch": 0.10883889249257452,
"grad_norm": 0.0007130260928533971,
"learning_rate": 0.0001677159505722635,
"loss": 46.0,
"step": 797
},
{
"epoch": 0.1089754532108839,
"grad_norm": 0.0006258345092646778,
"learning_rate": 0.0001676385989008193,
"loss": 46.0,
"step": 798
},
{
"epoch": 0.10911201392919327,
"grad_norm": 0.0013600183883681893,
"learning_rate": 0.00016756117255838128,
"loss": 46.0,
"step": 799
},
{
"epoch": 0.10924857464750265,
"grad_norm": 0.001245712861418724,
"learning_rate": 0.00016748367163042576,
"loss": 46.0,
"step": 800
},
{
"epoch": 0.10938513536581203,
"grad_norm": 0.00043424879550002515,
"learning_rate": 0.0001674060962025115,
"loss": 46.0,
"step": 801
},
{
"epoch": 0.1095216960841214,
"grad_norm": 0.00040041119791567326,
"learning_rate": 0.00016732844636027948,
"loss": 46.0,
"step": 802
},
{
"epoch": 0.10965825680243078,
"grad_norm": 0.00047825041110627353,
"learning_rate": 0.00016725072218945272,
"loss": 46.0,
"step": 803
},
{
"epoch": 0.10979481752074016,
"grad_norm": 0.0006943688495084643,
"learning_rate": 0.00016717292377583647,
"loss": 46.0,
"step": 804
},
{
"epoch": 0.10993137823904954,
"grad_norm": 0.00046805053716525435,
"learning_rate": 0.00016709505120531782,
"loss": 46.0,
"step": 805
},
{
"epoch": 0.11006793895735892,
"grad_norm": 0.001021806849166751,
"learning_rate": 0.00016701710456386572,
"loss": 46.0,
"step": 806
},
{
"epoch": 0.1102044996756683,
"grad_norm": 0.000839448010083288,
"learning_rate": 0.000166939083937531,
"loss": 46.0,
"step": 807
},
{
"epoch": 0.11034106039397767,
"grad_norm": 0.0007610208704136312,
"learning_rate": 0.0001668609894124461,
"loss": 46.0,
"step": 808
},
{
"epoch": 0.11047762111228705,
"grad_norm": 0.0008387729176320136,
"learning_rate": 0.00016678282107482502,
"loss": 46.0,
"step": 809
},
{
"epoch": 0.11061418183059643,
"grad_norm": 0.0005413633771240711,
"learning_rate": 0.00016670457901096328,
"loss": 46.0,
"step": 810
},
{
"epoch": 0.11075074254890581,
"grad_norm": 0.0008596319821663201,
"learning_rate": 0.0001666262633072378,
"loss": 46.0,
"step": 811
},
{
"epoch": 0.11088730326721519,
"grad_norm": 0.0005809023859910667,
"learning_rate": 0.0001665478740501067,
"loss": 46.0,
"step": 812
},
{
"epoch": 0.11102386398552457,
"grad_norm": 0.0005002027610316873,
"learning_rate": 0.00016646941132610947,
"loss": 46.0,
"step": 813
},
{
"epoch": 0.11116042470383394,
"grad_norm": 0.0005074172513559461,
"learning_rate": 0.0001663908752218666,
"loss": 46.0,
"step": 814
},
{
"epoch": 0.11129698542214332,
"grad_norm": 0.00038702471647411585,
"learning_rate": 0.00016631226582407952,
"loss": 46.0,
"step": 815
},
{
"epoch": 0.1114335461404527,
"grad_norm": 0.0005355363246053457,
"learning_rate": 0.00016623358321953078,
"loss": 46.0,
"step": 816
},
{
"epoch": 0.11157010685876208,
"grad_norm": 0.000400405318941921,
"learning_rate": 0.00016615482749508356,
"loss": 46.0,
"step": 817
},
{
"epoch": 0.11170666757707146,
"grad_norm": 0.0003890878870151937,
"learning_rate": 0.00016607599873768182,
"loss": 46.0,
"step": 818
},
{
"epoch": 0.11184322829538083,
"grad_norm": 0.0006321795517578721,
"learning_rate": 0.0001659970970343502,
"loss": 46.0,
"step": 819
},
{
"epoch": 0.11197978901369021,
"grad_norm": 0.0007353540859185159,
"learning_rate": 0.00016591812247219377,
"loss": 46.0,
"step": 820
},
{
"epoch": 0.11211634973199959,
"grad_norm": 0.0012658998603001237,
"learning_rate": 0.00016583907513839817,
"loss": 46.0,
"step": 821
},
{
"epoch": 0.11225291045030897,
"grad_norm": 0.0012359356041997671,
"learning_rate": 0.00016575995512022921,
"loss": 46.0,
"step": 822
},
{
"epoch": 0.11238947116861835,
"grad_norm": 0.00024977774592116475,
"learning_rate": 0.00016568076250503304,
"loss": 46.0,
"step": 823
},
{
"epoch": 0.11252603188692772,
"grad_norm": 0.0007921140058897436,
"learning_rate": 0.000165601497380236,
"loss": 46.0,
"step": 824
},
{
"epoch": 0.1126625926052371,
"grad_norm": 0.00028611160814762115,
"learning_rate": 0.00016552215983334437,
"loss": 46.0,
"step": 825
},
{
"epoch": 0.11279915332354648,
"grad_norm": 0.00045781530207023025,
"learning_rate": 0.00016544274995194448,
"loss": 46.0,
"step": 826
},
{
"epoch": 0.11293571404185586,
"grad_norm": 0.0010054416488856077,
"learning_rate": 0.0001653632678237024,
"loss": 46.0,
"step": 827
},
{
"epoch": 0.11307227476016524,
"grad_norm": 0.0003318900417070836,
"learning_rate": 0.00016528371353636406,
"loss": 46.0,
"step": 828
},
{
"epoch": 0.11320883547847461,
"grad_norm": 0.0006165113882161677,
"learning_rate": 0.00016520408717775507,
"loss": 46.0,
"step": 829
},
{
"epoch": 0.11334539619678399,
"grad_norm": 0.0005957252578809857,
"learning_rate": 0.00016512438883578044,
"loss": 46.0,
"step": 830
},
{
"epoch": 0.11348195691509337,
"grad_norm": 0.0002892489719670266,
"learning_rate": 0.00016504461859842486,
"loss": 46.0,
"step": 831
},
{
"epoch": 0.11361851763340275,
"grad_norm": 0.00041660640272311866,
"learning_rate": 0.00016496477655375227,
"loss": 46.0,
"step": 832
},
{
"epoch": 0.11375507835171213,
"grad_norm": 0.00023521836556028575,
"learning_rate": 0.00016488486278990586,
"loss": 46.0,
"step": 833
},
{
"epoch": 0.1138916390700215,
"grad_norm": 0.0009512827964499593,
"learning_rate": 0.00016480487739510807,
"loss": 46.0,
"step": 834
},
{
"epoch": 0.11402819978833088,
"grad_norm": 0.0008759453776292503,
"learning_rate": 0.00016472482045766043,
"loss": 46.0,
"step": 835
},
{
"epoch": 0.11416476050664026,
"grad_norm": 0.000294568162644282,
"learning_rate": 0.00016464469206594332,
"loss": 46.0,
"step": 836
},
{
"epoch": 0.11430132122494964,
"grad_norm": 0.0007791619864292443,
"learning_rate": 0.00016456449230841617,
"loss": 46.0,
"step": 837
},
{
"epoch": 0.11443788194325902,
"grad_norm": 0.0015861823922023177,
"learning_rate": 0.00016448422127361706,
"loss": 46.0,
"step": 838
},
{
"epoch": 0.1145744426615684,
"grad_norm": 0.0011900209356099367,
"learning_rate": 0.00016440387905016285,
"loss": 46.0,
"step": 839
},
{
"epoch": 0.11471100337987777,
"grad_norm": 0.0017578421393409371,
"learning_rate": 0.00016432346572674896,
"loss": 46.0,
"step": 840
},
{
"epoch": 0.11484756409818715,
"grad_norm": 0.0018410708289593458,
"learning_rate": 0.00016424298139214929,
"loss": 46.0,
"step": 841
},
{
"epoch": 0.11498412481649653,
"grad_norm": 0.00044194411020725965,
"learning_rate": 0.0001641624261352161,
"loss": 46.0,
"step": 842
},
{
"epoch": 0.11512068553480591,
"grad_norm": 0.0007466517854481936,
"learning_rate": 0.00016408180004488007,
"loss": 46.0,
"step": 843
},
{
"epoch": 0.11525724625311529,
"grad_norm": 0.0008741529891267419,
"learning_rate": 0.00016400110321014992,
"loss": 46.0,
"step": 844
},
{
"epoch": 0.11539380697142466,
"grad_norm": 0.0008493126952089369,
"learning_rate": 0.00016392033572011261,
"loss": 46.0,
"step": 845
},
{
"epoch": 0.11553036768973404,
"grad_norm": 0.0007634422508999705,
"learning_rate": 0.000163839497663933,
"loss": 46.0,
"step": 846
},
{
"epoch": 0.11566692840804342,
"grad_norm": 0.0005714827566407621,
"learning_rate": 0.0001637585891308539,
"loss": 46.0,
"step": 847
},
{
"epoch": 0.1158034891263528,
"grad_norm": 0.0008292211568914354,
"learning_rate": 0.0001636776102101959,
"loss": 46.0,
"step": 848
},
{
"epoch": 0.11594004984466218,
"grad_norm": 0.00044870973215438426,
"learning_rate": 0.00016359656099135733,
"loss": 46.0,
"step": 849
},
{
"epoch": 0.11607661056297155,
"grad_norm": 0.0012383662397041917,
"learning_rate": 0.00016351544156381414,
"loss": 46.0,
"step": 850
},
{
"epoch": 0.11621317128128093,
"grad_norm": 0.0004602587141562253,
"learning_rate": 0.00016343425201711966,
"loss": 46.0,
"step": 851
},
{
"epoch": 0.11634973199959031,
"grad_norm": 0.0005955328815616667,
"learning_rate": 0.00016335299244090478,
"loss": 46.0,
"step": 852
},
{
"epoch": 0.11648629271789969,
"grad_norm": 0.0005287445383146405,
"learning_rate": 0.0001632716629248777,
"loss": 46.0,
"step": 853
},
{
"epoch": 0.11662285343620908,
"grad_norm": 0.0004926318651996553,
"learning_rate": 0.0001631902635588237,
"loss": 46.0,
"step": 854
},
{
"epoch": 0.11675941415451846,
"grad_norm": 0.0010656327940523624,
"learning_rate": 0.00016310879443260528,
"loss": 46.0,
"step": 855
},
{
"epoch": 0.11689597487282784,
"grad_norm": 0.0005354030872695148,
"learning_rate": 0.00016302725563616192,
"loss": 46.0,
"step": 856
},
{
"epoch": 0.11703253559113722,
"grad_norm": 0.0005059898248873651,
"learning_rate": 0.00016294564725951002,
"loss": 46.0,
"step": 857
},
{
"epoch": 0.1171690963094466,
"grad_norm": 0.0004201128613203764,
"learning_rate": 0.0001628639693927428,
"loss": 46.0,
"step": 858
},
{
"epoch": 0.11730565702775597,
"grad_norm": 0.0003003796737175435,
"learning_rate": 0.00016278222212603018,
"loss": 46.0,
"step": 859
},
{
"epoch": 0.11744221774606535,
"grad_norm": 0.0005390554433688521,
"learning_rate": 0.00016270040554961868,
"loss": 46.0,
"step": 860
},
{
"epoch": 0.11757877846437473,
"grad_norm": 0.0015715021872892976,
"learning_rate": 0.00016261851975383137,
"loss": 46.0,
"step": 861
},
{
"epoch": 0.1177153391826841,
"grad_norm": 0.0010319905122742057,
"learning_rate": 0.00016253656482906776,
"loss": 46.0,
"step": 862
},
{
"epoch": 0.11785189990099348,
"grad_norm": 0.000347215129295364,
"learning_rate": 0.0001624545408658036,
"loss": 46.0,
"step": 863
},
{
"epoch": 0.11798846061930286,
"grad_norm": 0.0006791274063289165,
"learning_rate": 0.00016237244795459086,
"loss": 46.0,
"step": 864
},
{
"epoch": 0.11812502133761224,
"grad_norm": 0.00031977854087017477,
"learning_rate": 0.00016229028618605775,
"loss": 46.0,
"step": 865
},
{
"epoch": 0.11826158205592162,
"grad_norm": 0.0037054885178804398,
"learning_rate": 0.00016220805565090836,
"loss": 46.0,
"step": 866
},
{
"epoch": 0.118398142774231,
"grad_norm": 0.0005012017791159451,
"learning_rate": 0.00016212575643992277,
"loss": 46.0,
"step": 867
},
{
"epoch": 0.11853470349254037,
"grad_norm": 0.00028602650854736567,
"learning_rate": 0.00016204338864395684,
"loss": 46.0,
"step": 868
},
{
"epoch": 0.11867126421084975,
"grad_norm": 0.0006842486909590662,
"learning_rate": 0.00016196095235394207,
"loss": 46.0,
"step": 869
},
{
"epoch": 0.11880782492915913,
"grad_norm": 0.000561655790079385,
"learning_rate": 0.00016187844766088586,
"loss": 46.0,
"step": 870
},
{
"epoch": 0.11894438564746851,
"grad_norm": 0.002418296178802848,
"learning_rate": 0.00016179587465587077,
"loss": 46.0,
"step": 871
},
{
"epoch": 0.11908094636577789,
"grad_norm": 0.00027192741981707513,
"learning_rate": 0.00016171323343005498,
"loss": 46.0,
"step": 872
},
{
"epoch": 0.11921750708408727,
"grad_norm": 0.0004957255441695452,
"learning_rate": 0.0001616305240746719,
"loss": 46.0,
"step": 873
},
{
"epoch": 0.11935406780239664,
"grad_norm": 0.00048530122148804367,
"learning_rate": 0.00016154774668103027,
"loss": 46.0,
"step": 874
},
{
"epoch": 0.11949062852070602,
"grad_norm": 0.0009217667393386364,
"learning_rate": 0.0001614649013405138,
"loss": 46.0,
"step": 875
},
{
"epoch": 0.1196271892390154,
"grad_norm": 0.0015625512460246682,
"learning_rate": 0.0001613819881445813,
"loss": 46.0,
"step": 876
},
{
"epoch": 0.11976374995732478,
"grad_norm": 0.0009254494798369706,
"learning_rate": 0.00016129900718476637,
"loss": 46.0,
"step": 877
},
{
"epoch": 0.11990031067563416,
"grad_norm": 0.00046171454596333206,
"learning_rate": 0.00016121595855267767,
"loss": 46.0,
"step": 878
},
{
"epoch": 0.12003687139394353,
"grad_norm": 0.0003148307732772082,
"learning_rate": 0.0001611328423399983,
"loss": 46.0,
"step": 879
},
{
"epoch": 0.12017343211225291,
"grad_norm": 0.0004730523796752095,
"learning_rate": 0.00016104965863848617,
"loss": 46.0,
"step": 880
},
{
"epoch": 0.12030999283056229,
"grad_norm": 0.001140785519964993,
"learning_rate": 0.00016096640753997346,
"loss": 46.0,
"step": 881
},
{
"epoch": 0.12044655354887167,
"grad_norm": 0.0007769656367599964,
"learning_rate": 0.00016088308913636703,
"loss": 46.0,
"step": 882
},
{
"epoch": 0.12058311426718105,
"grad_norm": 0.000732703018002212,
"learning_rate": 0.00016079970351964783,
"loss": 46.0,
"step": 883
},
{
"epoch": 0.12071967498549042,
"grad_norm": 0.0007818607264198363,
"learning_rate": 0.00016071625078187114,
"loss": 46.0,
"step": 884
},
{
"epoch": 0.1208562357037998,
"grad_norm": 0.0006149871041998267,
"learning_rate": 0.00016063273101516625,
"loss": 46.0,
"step": 885
},
{
"epoch": 0.12099279642210918,
"grad_norm": 0.0008054501377046108,
"learning_rate": 0.00016054914431173654,
"loss": 46.0,
"step": 886
},
{
"epoch": 0.12112935714041856,
"grad_norm": 0.0008161257137544453,
"learning_rate": 0.0001604654907638592,
"loss": 46.0,
"step": 887
},
{
"epoch": 0.12126591785872794,
"grad_norm": 0.0030461640562862158,
"learning_rate": 0.00016038177046388523,
"loss": 46.0,
"step": 888
},
{
"epoch": 0.12140247857703731,
"grad_norm": 0.0007848728564567864,
"learning_rate": 0.0001602979835042394,
"loss": 46.0,
"step": 889
},
{
"epoch": 0.12153903929534669,
"grad_norm": 0.005675900261849165,
"learning_rate": 0.00016021412997741993,
"loss": 46.0,
"step": 890
},
{
"epoch": 0.12167560001365607,
"grad_norm": 0.00039349167491309345,
"learning_rate": 0.0001601302099759987,
"loss": 46.0,
"step": 891
},
{
"epoch": 0.12181216073196545,
"grad_norm": 0.0006075625424273312,
"learning_rate": 0.00016004622359262085,
"loss": 46.0,
"step": 892
},
{
"epoch": 0.12194872145027483,
"grad_norm": 0.0003772106138058007,
"learning_rate": 0.0001599621709200048,
"loss": 46.0,
"step": 893
},
{
"epoch": 0.1220852821685842,
"grad_norm": 0.0015227465191856027,
"learning_rate": 0.00015987805205094227,
"loss": 46.0,
"step": 894
},
{
"epoch": 0.12222184288689358,
"grad_norm": 0.00073139468440786,
"learning_rate": 0.00015979386707829792,
"loss": 46.0,
"step": 895
},
{
"epoch": 0.12235840360520296,
"grad_norm": 0.0011622250312939286,
"learning_rate": 0.00015970961609500944,
"loss": 46.0,
"step": 896
},
{
"epoch": 0.12249496432351234,
"grad_norm": 0.001449939445592463,
"learning_rate": 0.00015962529919408746,
"loss": 46.0,
"step": 897
},
{
"epoch": 0.12263152504182172,
"grad_norm": 0.0005385968834161758,
"learning_rate": 0.00015954091646861525,
"loss": 46.0,
"step": 898
},
{
"epoch": 0.1227680857601311,
"grad_norm": 0.0007052323780953884,
"learning_rate": 0.00015945646801174886,
"loss": 46.0,
"step": 899
},
{
"epoch": 0.12290464647844047,
"grad_norm": 0.001713512814603746,
"learning_rate": 0.0001593719539167169,
"loss": 46.0,
"step": 900
},
{
"epoch": 0.12290464647844047,
"eval_loss": 11.5,
"eval_runtime": 20.6643,
"eval_samples_per_second": 149.243,
"eval_steps_per_second": 74.621,
"step": 900
},
{
"epoch": 0.12304120719674985,
"grad_norm": 0.0009241271764039993,
"learning_rate": 0.00015928737427682032,
"loss": 46.0,
"step": 901
},
{
"epoch": 0.12317776791505923,
"grad_norm": 0.0003521353646647185,
"learning_rate": 0.00015920272918543257,
"loss": 46.0,
"step": 902
},
{
"epoch": 0.12331432863336861,
"grad_norm": 0.0004476907488424331,
"learning_rate": 0.00015911801873599933,
"loss": 46.0,
"step": 903
},
{
"epoch": 0.12345088935167799,
"grad_norm": 0.0008625802001915872,
"learning_rate": 0.00015903324302203836,
"loss": 46.0,
"step": 904
},
{
"epoch": 0.12358745006998736,
"grad_norm": 0.000347345310728997,
"learning_rate": 0.00015894840213713952,
"loss": 46.0,
"step": 905
},
{
"epoch": 0.12372401078829674,
"grad_norm": 0.00042961168219335377,
"learning_rate": 0.0001588634961749646,
"loss": 46.0,
"step": 906
},
{
"epoch": 0.12386057150660612,
"grad_norm": 0.0005479567334987223,
"learning_rate": 0.00015877852522924732,
"loss": 46.0,
"step": 907
},
{
"epoch": 0.1239971322249155,
"grad_norm": 0.000546832219697535,
"learning_rate": 0.00015869348939379302,
"loss": 46.0,
"step": 908
},
{
"epoch": 0.12413369294322488,
"grad_norm": 0.0005068538011983037,
"learning_rate": 0.0001586083887624787,
"loss": 46.0,
"step": 909
},
{
"epoch": 0.12427025366153426,
"grad_norm": 0.0005834728945046663,
"learning_rate": 0.00015852322342925295,
"loss": 46.0,
"step": 910
},
{
"epoch": 0.12440681437984363,
"grad_norm": 0.0004933670861646533,
"learning_rate": 0.00015843799348813574,
"loss": 46.0,
"step": 911
},
{
"epoch": 0.12454337509815301,
"grad_norm": 0.0011595729738473892,
"learning_rate": 0.0001583526990332184,
"loss": 46.0,
"step": 912
},
{
"epoch": 0.12467993581646239,
"grad_norm": 0.00029905018163844943,
"learning_rate": 0.00015826734015866344,
"loss": 46.0,
"step": 913
},
{
"epoch": 0.12481649653477177,
"grad_norm": 0.000434244517236948,
"learning_rate": 0.00015818191695870452,
"loss": 46.0,
"step": 914
},
{
"epoch": 0.12495305725308115,
"grad_norm": 0.0006040096050128341,
"learning_rate": 0.00015809642952764632,
"loss": 46.0,
"step": 915
},
{
"epoch": 0.12508961797139054,
"grad_norm": 0.0007016469608061016,
"learning_rate": 0.00015801087795986438,
"loss": 46.0,
"step": 916
},
{
"epoch": 0.1252261786896999,
"grad_norm": 0.0003513791016303003,
"learning_rate": 0.0001579252623498051,
"loss": 46.0,
"step": 917
},
{
"epoch": 0.1253627394080093,
"grad_norm": 0.0003278540389146656,
"learning_rate": 0.0001578395827919855,
"loss": 46.0,
"step": 918
},
{
"epoch": 0.12549930012631866,
"grad_norm": 0.000646027154289186,
"learning_rate": 0.00015775383938099332,
"loss": 46.0,
"step": 919
},
{
"epoch": 0.12563586084462805,
"grad_norm": 0.0005723676295019686,
"learning_rate": 0.00015766803221148673,
"loss": 46.0,
"step": 920
},
{
"epoch": 0.12577242156293741,
"grad_norm": 0.0015359300887212157,
"learning_rate": 0.00015758216137819422,
"loss": 46.0,
"step": 921
},
{
"epoch": 0.1259089822812468,
"grad_norm": 0.00043970157275907695,
"learning_rate": 0.0001574962269759147,
"loss": 46.0,
"step": 922
},
{
"epoch": 0.12604554299955617,
"grad_norm": 0.00028622214449569583,
"learning_rate": 0.00015741022909951716,
"loss": 46.0,
"step": 923
},
{
"epoch": 0.12618210371786556,
"grad_norm": 0.00038283158210106194,
"learning_rate": 0.00015732416784394065,
"loss": 46.0,
"step": 924
},
{
"epoch": 0.12631866443617493,
"grad_norm": 0.00039500248385593295,
"learning_rate": 0.00015723804330419422,
"loss": 46.0,
"step": 925
},
{
"epoch": 0.12645522515448432,
"grad_norm": 0.0004930765135213733,
"learning_rate": 0.00015715185557535689,
"loss": 46.0,
"step": 926
},
{
"epoch": 0.12659178587279368,
"grad_norm": 0.0005405626725405455,
"learning_rate": 0.00015706560475257727,
"loss": 46.0,
"step": 927
},
{
"epoch": 0.12672834659110307,
"grad_norm": 0.00043443331378512084,
"learning_rate": 0.00015697929093107365,
"loss": 46.0,
"step": 928
},
{
"epoch": 0.12686490730941244,
"grad_norm": 0.0005110527272336185,
"learning_rate": 0.000156892914206134,
"loss": 46.0,
"step": 929
},
{
"epoch": 0.12700146802772183,
"grad_norm": 0.00031187915010377765,
"learning_rate": 0.00015680647467311557,
"loss": 46.0,
"step": 930
},
{
"epoch": 0.1271380287460312,
"grad_norm": 0.0001981136156246066,
"learning_rate": 0.00015671997242744511,
"loss": 46.0,
"step": 931
},
{
"epoch": 0.1272745894643406,
"grad_norm": 0.0026429896242916584,
"learning_rate": 0.00015663340756461844,
"loss": 46.0,
"step": 932
},
{
"epoch": 0.12741115018264995,
"grad_norm": 0.0008942155982367694,
"learning_rate": 0.0001565467801802006,
"loss": 46.0,
"step": 933
},
{
"epoch": 0.12754771090095934,
"grad_norm": 0.0006840588175691664,
"learning_rate": 0.00015646009036982567,
"loss": 46.0,
"step": 934
},
{
"epoch": 0.1276842716192687,
"grad_norm": 0.0009611474233679473,
"learning_rate": 0.00015637333822919656,
"loss": 46.0,
"step": 935
},
{
"epoch": 0.1278208323375781,
"grad_norm": 0.000530791119672358,
"learning_rate": 0.00015628652385408508,
"loss": 46.0,
"step": 936
},
{
"epoch": 0.12795739305588746,
"grad_norm": 0.00041494445758871734,
"learning_rate": 0.00015619964734033172,
"loss": 46.0,
"step": 937
},
{
"epoch": 0.12809395377419686,
"grad_norm": 0.0008938443497754633,
"learning_rate": 0.00015611270878384552,
"loss": 46.0,
"step": 938
},
{
"epoch": 0.12823051449250622,
"grad_norm": 0.000739588460419327,
"learning_rate": 0.00015602570828060407,
"loss": 46.0,
"step": 939
},
{
"epoch": 0.1283670752108156,
"grad_norm": 0.0025961471255868673,
"learning_rate": 0.00015593864592665333,
"loss": 46.0,
"step": 940
},
{
"epoch": 0.12850363592912498,
"grad_norm": 0.00049801473505795,
"learning_rate": 0.00015585152181810753,
"loss": 46.0,
"step": 941
},
{
"epoch": 0.12864019664743437,
"grad_norm": 0.000621246756054461,
"learning_rate": 0.00015576433605114912,
"loss": 46.0,
"step": 942
},
{
"epoch": 0.12877675736574373,
"grad_norm": 0.0007328742649406195,
"learning_rate": 0.00015567708872202854,
"loss": 46.0,
"step": 943
},
{
"epoch": 0.12891331808405312,
"grad_norm": 0.005625641904771328,
"learning_rate": 0.00015558977992706426,
"loss": 46.0,
"step": 944
},
{
"epoch": 0.1290498788023625,
"grad_norm": 0.0005238280282355845,
"learning_rate": 0.00015550240976264253,
"loss": 46.0,
"step": 945
},
{
"epoch": 0.12918643952067188,
"grad_norm": 0.001820914214476943,
"learning_rate": 0.0001554149783252175,
"loss": 46.0,
"step": 946
},
{
"epoch": 0.12932300023898124,
"grad_norm": 0.0006223174277693033,
"learning_rate": 0.0001553274857113108,
"loss": 46.0,
"step": 947
},
{
"epoch": 0.12945956095729064,
"grad_norm": 0.0015029089991003275,
"learning_rate": 0.00015523993201751167,
"loss": 46.0,
"step": 948
},
{
"epoch": 0.1295961216756,
"grad_norm": 0.0012488181237131357,
"learning_rate": 0.00015515231734047677,
"loss": 46.0,
"step": 949
},
{
"epoch": 0.1297326823939094,
"grad_norm": 0.0007277615368366241,
"learning_rate": 0.0001550646417769301,
"loss": 46.0,
"step": 950
},
{
"epoch": 0.12986924311221876,
"grad_norm": 0.0024758039508014917,
"learning_rate": 0.0001549769054236629,
"loss": 46.0,
"step": 951
},
{
"epoch": 0.13000580383052815,
"grad_norm": 0.00043562057544477284,
"learning_rate": 0.00015488910837753342,
"loss": 46.0,
"step": 952
},
{
"epoch": 0.1301423645488375,
"grad_norm": 0.0006010486977174878,
"learning_rate": 0.00015480125073546704,
"loss": 46.0,
"step": 953
},
{
"epoch": 0.1302789252671469,
"grad_norm": 0.00039981160080060363,
"learning_rate": 0.0001547133325944559,
"loss": 46.0,
"step": 954
},
{
"epoch": 0.13041548598545627,
"grad_norm": 0.00056524045066908,
"learning_rate": 0.00015462535405155902,
"loss": 46.0,
"step": 955
},
{
"epoch": 0.13055204670376566,
"grad_norm": 0.00019664541468955576,
"learning_rate": 0.00015453731520390215,
"loss": 46.0,
"step": 956
},
{
"epoch": 0.13068860742207505,
"grad_norm": 0.000747493002563715,
"learning_rate": 0.0001544492161486775,
"loss": 46.0,
"step": 957
},
{
"epoch": 0.13082516814038442,
"grad_norm": 0.00021618347091134638,
"learning_rate": 0.00015436105698314384,
"loss": 46.0,
"step": 958
},
{
"epoch": 0.1309617288586938,
"grad_norm": 0.0005658991285599768,
"learning_rate": 0.0001542728378046262,
"loss": 46.0,
"step": 959
},
{
"epoch": 0.13109828957700317,
"grad_norm": 0.0010320099536329508,
"learning_rate": 0.00015418455871051592,
"loss": 46.0,
"step": 960
},
{
"epoch": 0.13123485029531257,
"grad_norm": 0.00037752182106487453,
"learning_rate": 0.00015409621979827048,
"loss": 46.0,
"step": 961
},
{
"epoch": 0.13137141101362193,
"grad_norm": 0.0003615982714109123,
"learning_rate": 0.0001540078211654135,
"loss": 46.0,
"step": 962
},
{
"epoch": 0.13150797173193132,
"grad_norm": 0.00041104850242845714,
"learning_rate": 0.0001539193629095343,
"loss": 46.0,
"step": 963
},
{
"epoch": 0.1316445324502407,
"grad_norm": 0.0005330987041816115,
"learning_rate": 0.00015383084512828824,
"loss": 46.0,
"step": 964
},
{
"epoch": 0.13178109316855008,
"grad_norm": 0.0003800500126089901,
"learning_rate": 0.00015374226791939628,
"loss": 46.0,
"step": 965
},
{
"epoch": 0.13191765388685944,
"grad_norm": 0.0006545698852278292,
"learning_rate": 0.000153653631380645,
"loss": 46.0,
"step": 966
},
{
"epoch": 0.13205421460516883,
"grad_norm": 0.0020852810703217983,
"learning_rate": 0.0001535649356098865,
"loss": 46.0,
"step": 967
},
{
"epoch": 0.1321907753234782,
"grad_norm": 0.00031325622694566846,
"learning_rate": 0.00015347618070503827,
"loss": 46.0,
"step": 968
},
{
"epoch": 0.1323273360417876,
"grad_norm": 0.00019234443607274443,
"learning_rate": 0.0001533873667640831,
"loss": 46.0,
"step": 969
},
{
"epoch": 0.13246389676009696,
"grad_norm": 0.00038069483707658947,
"learning_rate": 0.00015329849388506886,
"loss": 46.0,
"step": 970
},
{
"epoch": 0.13260045747840635,
"grad_norm": 0.00045511999633163214,
"learning_rate": 0.00015320956216610866,
"loss": 46.0,
"step": 971
},
{
"epoch": 0.1327370181967157,
"grad_norm": 0.0005886334110982716,
"learning_rate": 0.00015312057170538035,
"loss": 46.0,
"step": 972
},
{
"epoch": 0.1328735789150251,
"grad_norm": 0.0009905985789373517,
"learning_rate": 0.00015303152260112682,
"loss": 46.0,
"step": 973
},
{
"epoch": 0.13301013963333447,
"grad_norm": 0.0010108448332175612,
"learning_rate": 0.00015294241495165557,
"loss": 46.0,
"step": 974
},
{
"epoch": 0.13314670035164386,
"grad_norm": 0.0005389642901718616,
"learning_rate": 0.00015285324885533884,
"loss": 46.0,
"step": 975
},
{
"epoch": 0.13328326106995322,
"grad_norm": 0.000410493987146765,
"learning_rate": 0.0001527640244106133,
"loss": 46.0,
"step": 976
},
{
"epoch": 0.13341982178826262,
"grad_norm": 0.001756677869707346,
"learning_rate": 0.00015267474171598005,
"loss": 46.0,
"step": 977
},
{
"epoch": 0.13355638250657198,
"grad_norm": 0.0003254815237596631,
"learning_rate": 0.0001525854008700046,
"loss": 46.0,
"step": 978
},
{
"epoch": 0.13369294322488137,
"grad_norm": 0.0003471802920103073,
"learning_rate": 0.00015249600197131651,
"loss": 46.0,
"step": 979
},
{
"epoch": 0.13382950394319074,
"grad_norm": 0.0015930512454360723,
"learning_rate": 0.0001524065451186095,
"loss": 46.0,
"step": 980
},
{
"epoch": 0.13396606466150013,
"grad_norm": 0.00028746266616508365,
"learning_rate": 0.0001523170304106413,
"loss": 46.0,
"step": 981
},
{
"epoch": 0.1341026253798095,
"grad_norm": 0.0005899532698094845,
"learning_rate": 0.0001522274579462334,
"loss": 46.0,
"step": 982
},
{
"epoch": 0.13423918609811888,
"grad_norm": 0.0005316737224347889,
"learning_rate": 0.00015213782782427123,
"loss": 46.0,
"step": 983
},
{
"epoch": 0.13437574681642825,
"grad_norm": 0.00045934764784760773,
"learning_rate": 0.00015204814014370372,
"loss": 46.0,
"step": 984
},
{
"epoch": 0.13451230753473764,
"grad_norm": 0.001285345759242773,
"learning_rate": 0.00015195839500354335,
"loss": 46.0,
"step": 985
},
{
"epoch": 0.134648868253047,
"grad_norm": 0.0005873920163139701,
"learning_rate": 0.00015186859250286615,
"loss": 46.0,
"step": 986
},
{
"epoch": 0.1347854289713564,
"grad_norm": 0.0011875568889081478,
"learning_rate": 0.00015177873274081137,
"loss": 46.0,
"step": 987
},
{
"epoch": 0.13492198968966576,
"grad_norm": 0.001056193490512669,
"learning_rate": 0.00015168881581658147,
"loss": 46.0,
"step": 988
},
{
"epoch": 0.13505855040797515,
"grad_norm": 0.0009340514661744237,
"learning_rate": 0.00015159884182944211,
"loss": 46.0,
"step": 989
},
{
"epoch": 0.13519511112628452,
"grad_norm": 0.0011555576929822564,
"learning_rate": 0.00015150881087872185,
"loss": 46.0,
"step": 990
},
{
"epoch": 0.1353316718445939,
"grad_norm": 0.001084683695808053,
"learning_rate": 0.00015141872306381215,
"loss": 46.0,
"step": 991
},
{
"epoch": 0.13546823256290327,
"grad_norm": 0.000519106222782284,
"learning_rate": 0.00015132857848416733,
"loss": 46.0,
"step": 992
},
{
"epoch": 0.13560479328121267,
"grad_norm": 0.001280359923839569,
"learning_rate": 0.00015123837723930424,
"loss": 46.0,
"step": 993
},
{
"epoch": 0.13574135399952203,
"grad_norm": 0.0007028987165540457,
"learning_rate": 0.00015114811942880242,
"loss": 46.0,
"step": 994
},
{
"epoch": 0.13587791471783142,
"grad_norm": 0.0005714019644074142,
"learning_rate": 0.00015105780515230376,
"loss": 46.0,
"step": 995
},
{
"epoch": 0.13601447543614079,
"grad_norm": 0.0009716853965073824,
"learning_rate": 0.00015096743450951258,
"loss": 46.0,
"step": 996
},
{
"epoch": 0.13615103615445018,
"grad_norm": 0.0006497084395959973,
"learning_rate": 0.00015087700760019532,
"loss": 46.0,
"step": 997
},
{
"epoch": 0.13628759687275954,
"grad_norm": 0.0013521420769393444,
"learning_rate": 0.00015078652452418063,
"loss": 46.0,
"step": 998
},
{
"epoch": 0.13642415759106893,
"grad_norm": 0.0008505037403665483,
"learning_rate": 0.00015069598538135906,
"loss": 46.0,
"step": 999
},
{
"epoch": 0.1365607183093783,
"grad_norm": 0.0011916455114260316,
"learning_rate": 0.00015060539027168316,
"loss": 46.0,
"step": 1000
},
{
"epoch": 0.1366972790276877,
"grad_norm": 0.0005008620209991932,
"learning_rate": 0.00015051473929516722,
"loss": 46.0,
"step": 1001
},
{
"epoch": 0.13683383974599705,
"grad_norm": 0.0006068138754926622,
"learning_rate": 0.00015042403255188723,
"loss": 46.0,
"step": 1002
},
{
"epoch": 0.13697040046430645,
"grad_norm": 0.0003954765561502427,
"learning_rate": 0.00015033327014198075,
"loss": 46.0,
"step": 1003
},
{
"epoch": 0.1371069611826158,
"grad_norm": 0.0005776135949417949,
"learning_rate": 0.00015024245216564667,
"loss": 46.0,
"step": 1004
},
{
"epoch": 0.1372435219009252,
"grad_norm": 0.0008297267486341298,
"learning_rate": 0.00015015157872314542,
"loss": 46.0,
"step": 1005
},
{
"epoch": 0.13738008261923457,
"grad_norm": 0.000555426231585443,
"learning_rate": 0.00015006064991479853,
"loss": 46.0,
"step": 1006
},
{
"epoch": 0.13751664333754396,
"grad_norm": 0.0002457842347212136,
"learning_rate": 0.0001499696658409887,
"loss": 46.0,
"step": 1007
},
{
"epoch": 0.13765320405585332,
"grad_norm": 0.0005408066790550947,
"learning_rate": 0.00014987862660215966,
"loss": 46.0,
"step": 1008
},
{
"epoch": 0.13778976477416272,
"grad_norm": 0.0010539703071117401,
"learning_rate": 0.00014978753229881594,
"loss": 46.0,
"step": 1009
},
{
"epoch": 0.13792632549247208,
"grad_norm": 0.00045947683975100517,
"learning_rate": 0.00014969638303152295,
"loss": 46.0,
"step": 1010
},
{
"epoch": 0.13806288621078147,
"grad_norm": 0.0004952670424245298,
"learning_rate": 0.0001496051789009068,
"loss": 46.0,
"step": 1011
},
{
"epoch": 0.13819944692909084,
"grad_norm": 0.0006827415782026947,
"learning_rate": 0.00014951392000765411,
"loss": 46.0,
"step": 1012
},
{
"epoch": 0.13833600764740023,
"grad_norm": 0.000486463715787977,
"learning_rate": 0.000149422606452512,
"loss": 46.0,
"step": 1013
},
{
"epoch": 0.1384725683657096,
"grad_norm": 0.0002832711033988744,
"learning_rate": 0.00014933123833628785,
"loss": 46.0,
"step": 1014
},
{
"epoch": 0.13860912908401898,
"grad_norm": 0.0004940008511766791,
"learning_rate": 0.00014923981575984936,
"loss": 46.0,
"step": 1015
},
{
"epoch": 0.13874568980232835,
"grad_norm": 0.0008655837154947221,
"learning_rate": 0.00014914833882412435,
"loss": 46.0,
"step": 1016
},
{
"epoch": 0.13888225052063774,
"grad_norm": 0.0007375786663033068,
"learning_rate": 0.00014905680763010058,
"loss": 46.0,
"step": 1017
},
{
"epoch": 0.1390188112389471,
"grad_norm": 0.0006843106239102781,
"learning_rate": 0.00014896522227882578,
"loss": 46.0,
"step": 1018
},
{
"epoch": 0.1391553719572565,
"grad_norm": 0.001130104181356728,
"learning_rate": 0.00014887358287140744,
"loss": 46.0,
"step": 1019
},
{
"epoch": 0.13929193267556586,
"grad_norm": 0.0008909485186450183,
"learning_rate": 0.00014878188950901276,
"loss": 46.0,
"step": 1020
},
{
"epoch": 0.13942849339387525,
"grad_norm": 0.005642786156386137,
"learning_rate": 0.0001486901422928684,
"loss": 46.0,
"step": 1021
},
{
"epoch": 0.13956505411218462,
"grad_norm": 0.0005347135593183339,
"learning_rate": 0.0001485983413242606,
"loss": 46.0,
"step": 1022
},
{
"epoch": 0.139701614830494,
"grad_norm": 0.000356485164957121,
"learning_rate": 0.00014850648670453493,
"loss": 46.0,
"step": 1023
},
{
"epoch": 0.13983817554880337,
"grad_norm": 0.0006373700452968478,
"learning_rate": 0.00014841457853509606,
"loss": 46.0,
"step": 1024
},
{
"epoch": 0.13997473626711276,
"grad_norm": 0.00020091190526727587,
"learning_rate": 0.0001483226169174079,
"loss": 46.0,
"step": 1025
},
{
"epoch": 0.14011129698542216,
"grad_norm": 0.00042303564259782434,
"learning_rate": 0.00014823060195299337,
"loss": 46.0,
"step": 1026
},
{
"epoch": 0.14024785770373152,
"grad_norm": 0.0004543966497294605,
"learning_rate": 0.00014813853374343419,
"loss": 46.0,
"step": 1027
},
{
"epoch": 0.1403844184220409,
"grad_norm": 0.00042528280755504966,
"learning_rate": 0.00014804641239037097,
"loss": 46.0,
"step": 1028
},
{
"epoch": 0.14052097914035028,
"grad_norm": 0.0005864663980901241,
"learning_rate": 0.00014795423799550284,
"loss": 46.0,
"step": 1029
},
{
"epoch": 0.14065753985865967,
"grad_norm": 0.0005385400145314634,
"learning_rate": 0.00014786201066058766,
"loss": 46.0,
"step": 1030
},
{
"epoch": 0.14079410057696903,
"grad_norm": 0.0011653905967250466,
"learning_rate": 0.00014776973048744165,
"loss": 46.0,
"step": 1031
},
{
"epoch": 0.14093066129527843,
"grad_norm": 0.0004561395035125315,
"learning_rate": 0.0001476773975779393,
"loss": 46.0,
"step": 1032
},
{
"epoch": 0.1410672220135878,
"grad_norm": 0.000643297506030649,
"learning_rate": 0.00014758501203401348,
"loss": 46.0,
"step": 1033
},
{
"epoch": 0.14120378273189718,
"grad_norm": 0.0006550021353177726,
"learning_rate": 0.00014749257395765502,
"loss": 46.0,
"step": 1034
},
{
"epoch": 0.14134034345020655,
"grad_norm": 0.0007623559795320034,
"learning_rate": 0.0001474000834509128,
"loss": 46.0,
"step": 1035
},
{
"epoch": 0.14147690416851594,
"grad_norm": 0.000623580242972821,
"learning_rate": 0.00014730754061589355,
"loss": 46.0,
"step": 1036
},
{
"epoch": 0.1416134648868253,
"grad_norm": 0.0010572531027719378,
"learning_rate": 0.00014721494555476188,
"loss": 46.0,
"step": 1037
},
{
"epoch": 0.1417500256051347,
"grad_norm": 0.0011201991001144052,
"learning_rate": 0.00014712229836973988,
"loss": 46.0,
"step": 1038
},
{
"epoch": 0.14188658632344406,
"grad_norm": 0.001960804220288992,
"learning_rate": 0.00014702959916310736,
"loss": 46.0,
"step": 1039
},
{
"epoch": 0.14202314704175345,
"grad_norm": 0.0050703976303339005,
"learning_rate": 0.00014693684803720138,
"loss": 46.0,
"step": 1040
},
{
"epoch": 0.14215970776006281,
"grad_norm": 0.00040476518915966153,
"learning_rate": 0.0001468440450944165,
"loss": 46.0,
"step": 1041
},
{
"epoch": 0.1422962684783722,
"grad_norm": 0.0007858510361984372,
"learning_rate": 0.00014675119043720437,
"loss": 46.0,
"step": 1042
},
{
"epoch": 0.14243282919668157,
"grad_norm": 0.0007758094579912722,
"learning_rate": 0.0001466582841680737,
"loss": 46.0,
"step": 1043
},
{
"epoch": 0.14256938991499096,
"grad_norm": 0.0008653284166939557,
"learning_rate": 0.00014656532638959035,
"loss": 46.0,
"step": 1044
},
{
"epoch": 0.14270595063330033,
"grad_norm": 0.0004421341873239726,
"learning_rate": 0.00014647231720437686,
"loss": 46.0,
"step": 1045
},
{
"epoch": 0.14284251135160972,
"grad_norm": 0.0008486118167638779,
"learning_rate": 0.0001463792567151126,
"loss": 46.0,
"step": 1046
},
{
"epoch": 0.14297907206991908,
"grad_norm": 0.000525649928022176,
"learning_rate": 0.0001462861450245336,
"loss": 46.0,
"step": 1047
},
{
"epoch": 0.14311563278822848,
"grad_norm": 0.0017683632904663682,
"learning_rate": 0.00014619298223543235,
"loss": 46.0,
"step": 1048
},
{
"epoch": 0.14325219350653784,
"grad_norm": 0.0012217290932312608,
"learning_rate": 0.00014609976845065783,
"loss": 46.0,
"step": 1049
},
{
"epoch": 0.14338875422484723,
"grad_norm": 0.0011137262918055058,
"learning_rate": 0.00014600650377311522,
"loss": 46.0,
"step": 1050
},
{
"epoch": 0.1435253149431566,
"grad_norm": 0.0003707687428686768,
"learning_rate": 0.00014591318830576598,
"loss": 46.0,
"step": 1051
},
{
"epoch": 0.143661875661466,
"grad_norm": 0.000414914742577821,
"learning_rate": 0.0001458198221516276,
"loss": 46.0,
"step": 1052
},
{
"epoch": 0.14379843637977535,
"grad_norm": 0.0008973225485533476,
"learning_rate": 0.0001457264054137735,
"loss": 46.0,
"step": 1053
},
{
"epoch": 0.14393499709808474,
"grad_norm": 0.00040008637006394565,
"learning_rate": 0.000145632938195333,
"loss": 46.0,
"step": 1054
},
{
"epoch": 0.1440715578163941,
"grad_norm": 0.0005400644731707871,
"learning_rate": 0.0001455394205994911,
"loss": 46.0,
"step": 1055
},
{
"epoch": 0.1442081185347035,
"grad_norm": 0.00028061779448762536,
"learning_rate": 0.00014544585272948843,
"loss": 46.0,
"step": 1056
},
{
"epoch": 0.14434467925301286,
"grad_norm": 0.0007635858491994441,
"learning_rate": 0.00014535223468862114,
"loss": 46.0,
"step": 1057
},
{
"epoch": 0.14448123997132226,
"grad_norm": 0.0004812279948964715,
"learning_rate": 0.00014525856658024076,
"loss": 46.0,
"step": 1058
},
{
"epoch": 0.14461780068963162,
"grad_norm": 0.0004690811620093882,
"learning_rate": 0.00014516484850775406,
"loss": 46.0,
"step": 1059
},
{
"epoch": 0.144754361407941,
"grad_norm": 0.0008514091605320573,
"learning_rate": 0.00014507108057462296,
"loss": 46.0,
"step": 1060
},
{
"epoch": 0.14489092212625038,
"grad_norm": 0.00031304339063353837,
"learning_rate": 0.00014497726288436458,
"loss": 46.0,
"step": 1061
},
{
"epoch": 0.14502748284455977,
"grad_norm": 0.00015942241589073092,
"learning_rate": 0.00014488339554055073,
"loss": 46.0,
"step": 1062
},
{
"epoch": 0.14516404356286913,
"grad_norm": 0.0012250308645889163,
"learning_rate": 0.0001447894786468082,
"loss": 46.0,
"step": 1063
},
{
"epoch": 0.14530060428117852,
"grad_norm": 0.00036729895509779453,
"learning_rate": 0.00014469551230681844,
"loss": 46.0,
"step": 1064
},
{
"epoch": 0.1454371649994879,
"grad_norm": 0.000257661915384233,
"learning_rate": 0.00014460149662431747,
"loss": 46.0,
"step": 1065
},
{
"epoch": 0.14557372571779728,
"grad_norm": 0.000458430964499712,
"learning_rate": 0.00014450743170309584,
"loss": 46.0,
"step": 1066
},
{
"epoch": 0.14571028643610665,
"grad_norm": 0.0004429294203873724,
"learning_rate": 0.00014441331764699836,
"loss": 46.0,
"step": 1067
},
{
"epoch": 0.14584684715441604,
"grad_norm": 0.0005923425196669996,
"learning_rate": 0.00014431915455992414,
"loss": 46.0,
"step": 1068
},
{
"epoch": 0.1459834078727254,
"grad_norm": 0.0003316183283459395,
"learning_rate": 0.00014422494254582647,
"loss": 46.0,
"step": 1069
},
{
"epoch": 0.1461199685910348,
"grad_norm": 0.0004024969239253551,
"learning_rate": 0.0001441306817087125,
"loss": 46.0,
"step": 1070
},
{
"epoch": 0.14625652930934416,
"grad_norm": 0.00029238680144771934,
"learning_rate": 0.00014403637215264353,
"loss": 46.0,
"step": 1071
},
{
"epoch": 0.14639309002765355,
"grad_norm": 0.00044409476686269045,
"learning_rate": 0.00014394201398173437,
"loss": 46.0,
"step": 1072
},
{
"epoch": 0.1465296507459629,
"grad_norm": 0.000767083081882447,
"learning_rate": 0.00014384760730015364,
"loss": 46.0,
"step": 1073
},
{
"epoch": 0.1466662114642723,
"grad_norm": 0.00029706236091442406,
"learning_rate": 0.00014375315221212357,
"loss": 46.0,
"step": 1074
},
{
"epoch": 0.14680277218258167,
"grad_norm": 0.0004188843595329672,
"learning_rate": 0.00014365864882191968,
"loss": 46.0,
"step": 1075
},
{
"epoch": 0.14693933290089106,
"grad_norm": 0.0005889105377718806,
"learning_rate": 0.0001435640972338709,
"loss": 46.0,
"step": 1076
},
{
"epoch": 0.14707589361920043,
"grad_norm": 0.00072791165439412,
"learning_rate": 0.00014346949755235944,
"loss": 46.0,
"step": 1077
},
{
"epoch": 0.14721245433750982,
"grad_norm": 0.0006283425609581172,
"learning_rate": 0.00014337484988182042,
"loss": 46.0,
"step": 1078
},
{
"epoch": 0.14734901505581918,
"grad_norm": 0.000486049015307799,
"learning_rate": 0.00014328015432674214,
"loss": 46.0,
"step": 1079
},
{
"epoch": 0.14748557577412857,
"grad_norm": 0.0007576828938908875,
"learning_rate": 0.00014318541099166555,
"loss": 46.0,
"step": 1080
},
{
"epoch": 0.14762213649243794,
"grad_norm": 0.0010862492490559816,
"learning_rate": 0.00014309061998118454,
"loss": 46.0,
"step": 1081
},
{
"epoch": 0.14775869721074733,
"grad_norm": 0.0005167250637896359,
"learning_rate": 0.00014299578139994557,
"loss": 46.0,
"step": 1082
},
{
"epoch": 0.1478952579290567,
"grad_norm": 0.002369736786931753,
"learning_rate": 0.00014290089535264755,
"loss": 46.0,
"step": 1083
},
{
"epoch": 0.1480318186473661,
"grad_norm": 0.0003688117431011051,
"learning_rate": 0.0001428059619440419,
"loss": 46.0,
"step": 1084
},
{
"epoch": 0.14816837936567545,
"grad_norm": 0.0003458712890278548,
"learning_rate": 0.00014271098127893218,
"loss": 46.0,
"step": 1085
},
{
"epoch": 0.14830494008398484,
"grad_norm": 0.0013860361650586128,
"learning_rate": 0.0001426159534621743,
"loss": 46.0,
"step": 1086
},
{
"epoch": 0.1484415008022942,
"grad_norm": 0.001035936875268817,
"learning_rate": 0.00014252087859867608,
"loss": 46.0,
"step": 1087
},
{
"epoch": 0.1485780615206036,
"grad_norm": 0.00632870476692915,
"learning_rate": 0.00014242575679339738,
"loss": 46.0,
"step": 1088
},
{
"epoch": 0.14871462223891296,
"grad_norm": 0.0006592991994693875,
"learning_rate": 0.00014233058815134978,
"loss": 46.0,
"step": 1089
},
{
"epoch": 0.14885118295722236,
"grad_norm": 0.0028478745371103287,
"learning_rate": 0.00014223537277759666,
"loss": 46.0,
"step": 1090
},
{
"epoch": 0.14898774367553172,
"grad_norm": 0.0005855032941326499,
"learning_rate": 0.00014214011077725292,
"loss": 46.0,
"step": 1091
},
{
"epoch": 0.1491243043938411,
"grad_norm": 0.0023942850530147552,
"learning_rate": 0.00014204480225548494,
"loss": 46.0,
"step": 1092
},
{
"epoch": 0.1492608651121505,
"grad_norm": 0.0013539772480726242,
"learning_rate": 0.00014194944731751058,
"loss": 46.0,
"step": 1093
},
{
"epoch": 0.14939742583045987,
"grad_norm": 0.0009589577093720436,
"learning_rate": 0.00014185404606859877,
"loss": 46.0,
"step": 1094
},
{
"epoch": 0.14953398654876926,
"grad_norm": 0.0003180347557645291,
"learning_rate": 0.00014175859861406966,
"loss": 46.0,
"step": 1095
},
{
"epoch": 0.14967054726707862,
"grad_norm": 0.0014342650538310409,
"learning_rate": 0.00014166310505929434,
"loss": 46.0,
"step": 1096
},
{
"epoch": 0.14980710798538802,
"grad_norm": 0.0006643772940151393,
"learning_rate": 0.00014156756550969492,
"loss": 46.0,
"step": 1097
},
{
"epoch": 0.14994366870369738,
"grad_norm": 0.001409722724929452,
"learning_rate": 0.00014147198007074415,
"loss": 46.0,
"step": 1098
},
{
"epoch": 0.15008022942200677,
"grad_norm": 0.0008714981959201396,
"learning_rate": 0.00014137634884796557,
"loss": 46.0,
"step": 1099
},
{
"epoch": 0.15021679014031614,
"grad_norm": 0.00160513399168849,
"learning_rate": 0.00014128067194693316,
"loss": 46.0,
"step": 1100
},
{
"epoch": 0.15035335085862553,
"grad_norm": 0.0004049557028338313,
"learning_rate": 0.0001411849494732713,
"loss": 46.0,
"step": 1101
},
{
"epoch": 0.1504899115769349,
"grad_norm": 0.0005186764756217599,
"learning_rate": 0.00014108918153265485,
"loss": 46.0,
"step": 1102
},
{
"epoch": 0.15062647229524428,
"grad_norm": 0.002497048582881689,
"learning_rate": 0.00014099336823080865,
"loss": 46.0,
"step": 1103
},
{
"epoch": 0.15076303301355365,
"grad_norm": 0.00020665116608142853,
"learning_rate": 0.00014089750967350781,
"loss": 46.0,
"step": 1104
},
{
"epoch": 0.15089959373186304,
"grad_norm": 0.0005294004804454744,
"learning_rate": 0.0001408016059665773,
"loss": 46.0,
"step": 1105
},
{
"epoch": 0.1510361544501724,
"grad_norm": 0.00033093662932515144,
"learning_rate": 0.00014070565721589195,
"loss": 46.0,
"step": 1106
},
{
"epoch": 0.1511727151684818,
"grad_norm": 0.0003122551424894482,
"learning_rate": 0.00014060966352737628,
"loss": 46.0,
"step": 1107
},
{
"epoch": 0.15130927588679116,
"grad_norm": 0.0006798842805437744,
"learning_rate": 0.00014051362500700447,
"loss": 46.0,
"step": 1108
},
{
"epoch": 0.15144583660510055,
"grad_norm": 0.0005911933840252459,
"learning_rate": 0.00014041754176080017,
"loss": 46.0,
"step": 1109
},
{
"epoch": 0.15158239732340992,
"grad_norm": 0.0010168857406824827,
"learning_rate": 0.00014032141389483648,
"loss": 46.0,
"step": 1110
},
{
"epoch": 0.1517189580417193,
"grad_norm": 0.00033409081515856087,
"learning_rate": 0.00014022524151523563,
"loss": 46.0,
"step": 1111
},
{
"epoch": 0.15185551876002867,
"grad_norm": 0.0002926045854110271,
"learning_rate": 0.00014012902472816907,
"loss": 46.0,
"step": 1112
},
{
"epoch": 0.15199207947833807,
"grad_norm": 0.0013335029361769557,
"learning_rate": 0.00014003276363985727,
"loss": 46.0,
"step": 1113
},
{
"epoch": 0.15212864019664743,
"grad_norm": 0.000854438403621316,
"learning_rate": 0.00013993645835656953,
"loss": 46.0,
"step": 1114
},
{
"epoch": 0.15226520091495682,
"grad_norm": 0.0009656418114900589,
"learning_rate": 0.00013984010898462416,
"loss": 46.0,
"step": 1115
},
{
"epoch": 0.1524017616332662,
"grad_norm": 0.00042812732863239944,
"learning_rate": 0.00013974371563038785,
"loss": 46.0,
"step": 1116
},
{
"epoch": 0.15253832235157558,
"grad_norm": 0.0006366227171383798,
"learning_rate": 0.00013964727840027604,
"loss": 46.0,
"step": 1117
},
{
"epoch": 0.15267488306988494,
"grad_norm": 0.0011450116289779544,
"learning_rate": 0.00013955079740075256,
"loss": 46.0,
"step": 1118
},
{
"epoch": 0.15281144378819433,
"grad_norm": 0.00023903950932435691,
"learning_rate": 0.00013945427273832954,
"loss": 46.0,
"step": 1119
},
{
"epoch": 0.1529480045065037,
"grad_norm": 0.0011047361185774207,
"learning_rate": 0.0001393577045195673,
"loss": 46.0,
"step": 1120
},
{
"epoch": 0.1530845652248131,
"grad_norm": 0.0004785344353877008,
"learning_rate": 0.0001392610928510743,
"loss": 46.0,
"step": 1121
},
{
"epoch": 0.15322112594312245,
"grad_norm": 0.0008018110529519618,
"learning_rate": 0.00013916443783950694,
"loss": 46.0,
"step": 1122
},
{
"epoch": 0.15335768666143185,
"grad_norm": 0.0012497154530137777,
"learning_rate": 0.00013906773959156948,
"loss": 46.0,
"step": 1123
},
{
"epoch": 0.1534942473797412,
"grad_norm": 0.00042997964192181826,
"learning_rate": 0.00013897099821401384,
"loss": 46.0,
"step": 1124
},
{
"epoch": 0.1536308080980506,
"grad_norm": 0.0003085716161876917,
"learning_rate": 0.00013887421381363968,
"loss": 46.0,
"step": 1125
},
{
"epoch": 0.15376736881635997,
"grad_norm": 0.000396199116948992,
"learning_rate": 0.00013877738649729405,
"loss": 46.0,
"step": 1126
},
{
"epoch": 0.15390392953466936,
"grad_norm": 0.0006908946088515222,
"learning_rate": 0.00013868051637187144,
"loss": 46.0,
"step": 1127
},
{
"epoch": 0.15404049025297872,
"grad_norm": 0.0003736602666322142,
"learning_rate": 0.00013858360354431355,
"loss": 46.0,
"step": 1128
},
{
"epoch": 0.15417705097128812,
"grad_norm": 0.0006938680890016258,
"learning_rate": 0.00013848664812160925,
"loss": 46.0,
"step": 1129
},
{
"epoch": 0.15431361168959748,
"grad_norm": 0.0005900769028812647,
"learning_rate": 0.00013838965021079446,
"loss": 46.0,
"step": 1130
},
{
"epoch": 0.15445017240790687,
"grad_norm": 0.002096734009683132,
"learning_rate": 0.00013829260991895197,
"loss": 46.0,
"step": 1131
},
{
"epoch": 0.15458673312621624,
"grad_norm": 0.0011866495478898287,
"learning_rate": 0.00013819552735321134,
"loss": 46.0,
"step": 1132
},
{
"epoch": 0.15472329384452563,
"grad_norm": 0.00037106405943632126,
"learning_rate": 0.00013809840262074885,
"loss": 46.0,
"step": 1133
},
{
"epoch": 0.154859854562835,
"grad_norm": 0.0006705286214128137,
"learning_rate": 0.0001380012358287873,
"loss": 46.0,
"step": 1134
},
{
"epoch": 0.15499641528114438,
"grad_norm": 0.00040015694685280323,
"learning_rate": 0.0001379040270845959,
"loss": 46.0,
"step": 1135
},
{
"epoch": 0.15513297599945375,
"grad_norm": 0.0005712392157875001,
"learning_rate": 0.00013780677649549025,
"loss": 46.0,
"step": 1136
},
{
"epoch": 0.15526953671776314,
"grad_norm": 0.00047067005652934313,
"learning_rate": 0.00013770948416883205,
"loss": 46.0,
"step": 1137
},
{
"epoch": 0.1554060974360725,
"grad_norm": 0.0008378413622267544,
"learning_rate": 0.00013761215021202916,
"loss": 46.0,
"step": 1138
},
{
"epoch": 0.1555426581543819,
"grad_norm": 0.0007883374928496778,
"learning_rate": 0.00013751477473253533,
"loss": 46.0,
"step": 1139
},
{
"epoch": 0.15567921887269126,
"grad_norm": 0.0012233637971803546,
"learning_rate": 0.0001374173578378502,
"loss": 46.0,
"step": 1140
},
{
"epoch": 0.15581577959100065,
"grad_norm": 0.004130385350435972,
"learning_rate": 0.00013731989963551913,
"loss": 46.0,
"step": 1141
},
{
"epoch": 0.15595234030931002,
"grad_norm": 0.0029437027405947447,
"learning_rate": 0.00013722240023313306,
"loss": 46.0,
"step": 1142
},
{
"epoch": 0.1560889010276194,
"grad_norm": 0.0003660391375888139,
"learning_rate": 0.00013712485973832838,
"loss": 46.0,
"step": 1143
},
{
"epoch": 0.15622546174592877,
"grad_norm": 0.0020895125344395638,
"learning_rate": 0.00013702727825878693,
"loss": 46.0,
"step": 1144
},
{
"epoch": 0.15636202246423817,
"grad_norm": 0.0016986053669825196,
"learning_rate": 0.00013692965590223573,
"loss": 46.0,
"step": 1145
},
{
"epoch": 0.15649858318254753,
"grad_norm": 0.0005671007093042135,
"learning_rate": 0.00013683199277644693,
"loss": 46.0,
"step": 1146
},
{
"epoch": 0.15663514390085692,
"grad_norm": 0.0009818869875743985,
"learning_rate": 0.00013673428898923774,
"loss": 46.0,
"step": 1147
},
{
"epoch": 0.15677170461916629,
"grad_norm": 0.0004315339319873601,
"learning_rate": 0.00013663654464847022,
"loss": 46.0,
"step": 1148
},
{
"epoch": 0.15690826533747568,
"grad_norm": 0.0006189457490108907,
"learning_rate": 0.0001365387598620512,
"loss": 46.0,
"step": 1149
},
{
"epoch": 0.15704482605578504,
"grad_norm": 0.0007538103964179754,
"learning_rate": 0.00013644093473793215,
"loss": 46.0,
"step": 1150
},
{
"epoch": 0.15718138677409443,
"grad_norm": 0.0005718530155718327,
"learning_rate": 0.00013634306938410911,
"loss": 46.0,
"step": 1151
},
{
"epoch": 0.1573179474924038,
"grad_norm": 0.0006374249351210892,
"learning_rate": 0.00013624516390862244,
"loss": 46.0,
"step": 1152
},
{
"epoch": 0.1574545082107132,
"grad_norm": 0.0007302735466510057,
"learning_rate": 0.00013614721841955692,
"loss": 46.0,
"step": 1153
},
{
"epoch": 0.15759106892902255,
"grad_norm": 0.0005098398542031646,
"learning_rate": 0.00013604923302504147,
"loss": 46.0,
"step": 1154
},
{
"epoch": 0.15772762964733195,
"grad_norm": 0.0005700102774426341,
"learning_rate": 0.00013595120783324902,
"loss": 46.0,
"step": 1155
},
{
"epoch": 0.1578641903656413,
"grad_norm": 0.00040146647370420396,
"learning_rate": 0.00013585314295239644,
"loss": 46.0,
"step": 1156
},
{
"epoch": 0.1580007510839507,
"grad_norm": 0.0008773392182774842,
"learning_rate": 0.00013575503849074444,
"loss": 46.0,
"step": 1157
},
{
"epoch": 0.15813731180226007,
"grad_norm": 0.0003678193024825305,
"learning_rate": 0.0001356568945565974,
"loss": 46.0,
"step": 1158
},
{
"epoch": 0.15827387252056946,
"grad_norm": 0.0013553998433053493,
"learning_rate": 0.0001355587112583033,
"loss": 46.0,
"step": 1159
},
{
"epoch": 0.15841043323887882,
"grad_norm": 0.0009887435007840395,
"learning_rate": 0.00013546048870425356,
"loss": 46.0,
"step": 1160
},
{
"epoch": 0.15854699395718821,
"grad_norm": 0.0019033915596082807,
"learning_rate": 0.00013536222700288303,
"loss": 46.0,
"step": 1161
},
{
"epoch": 0.1586835546754976,
"grad_norm": 0.0006128349923528731,
"learning_rate": 0.00013526392626266956,
"loss": 46.0,
"step": 1162
},
{
"epoch": 0.15882011539380697,
"grad_norm": 0.0005847832653671503,
"learning_rate": 0.00013516558659213432,
"loss": 46.0,
"step": 1163
},
{
"epoch": 0.15895667611211636,
"grad_norm": 0.0005258521996438503,
"learning_rate": 0.00013506720809984137,
"loss": 46.0,
"step": 1164
},
{
"epoch": 0.15909323683042573,
"grad_norm": 0.00040099999750964344,
"learning_rate": 0.0001349687908943976,
"loss": 46.0,
"step": 1165
},
{
"epoch": 0.15922979754873512,
"grad_norm": 0.0007036002352833748,
"learning_rate": 0.0001348703350844527,
"loss": 46.0,
"step": 1166
},
{
"epoch": 0.15936635826704448,
"grad_norm": 0.00037679230445064604,
"learning_rate": 0.00013477184077869892,
"loss": 46.0,
"step": 1167
},
{
"epoch": 0.15950291898535388,
"grad_norm": 0.0005223838961683214,
"learning_rate": 0.000134673308085871,
"loss": 46.0,
"step": 1168
},
{
"epoch": 0.15963947970366324,
"grad_norm": 0.0008007617434486747,
"learning_rate": 0.0001345747371147461,
"loss": 46.0,
"step": 1169
},
{
"epoch": 0.15977604042197263,
"grad_norm": 0.000517090258654207,
"learning_rate": 0.0001344761279741437,
"loss": 46.0,
"step": 1170
},
{
"epoch": 0.159912601140282,
"grad_norm": 0.0008526128367520869,
"learning_rate": 0.0001343774807729253,
"loss": 46.0,
"step": 1171
},
{
"epoch": 0.1600491618585914,
"grad_norm": 0.0011307375971227884,
"learning_rate": 0.0001342787956199945,
"loss": 46.0,
"step": 1172
},
{
"epoch": 0.16018572257690075,
"grad_norm": 0.00033380292006768286,
"learning_rate": 0.00013418007262429668,
"loss": 46.0,
"step": 1173
},
{
"epoch": 0.16032228329521014,
"grad_norm": 0.000278162129689008,
"learning_rate": 0.00013408131189481911,
"loss": 46.0,
"step": 1174
},
{
"epoch": 0.1604588440135195,
"grad_norm": 0.00258398219011724,
"learning_rate": 0.00013398251354059077,
"loss": 46.0,
"step": 1175
},
{
"epoch": 0.1605954047318289,
"grad_norm": 0.0006168753025121987,
"learning_rate": 0.000133883677670682,
"loss": 46.0,
"step": 1176
},
{
"epoch": 0.16073196545013826,
"grad_norm": 0.00029901755624450743,
"learning_rate": 0.0001337848043942047,
"loss": 46.0,
"step": 1177
},
{
"epoch": 0.16086852616844766,
"grad_norm": 0.000997790601104498,
"learning_rate": 0.00013368589382031196,
"loss": 46.0,
"step": 1178
},
{
"epoch": 0.16100508688675702,
"grad_norm": 0.0006558905588462949,
"learning_rate": 0.00013358694605819814,
"loss": 46.0,
"step": 1179
},
{
"epoch": 0.1611416476050664,
"grad_norm": 0.0006217307527549565,
"learning_rate": 0.00013348796121709862,
"loss": 46.0,
"step": 1180
},
{
"epoch": 0.16127820832337578,
"grad_norm": 0.0004898210754618049,
"learning_rate": 0.00013338893940628973,
"loss": 46.0,
"step": 1181
},
{
"epoch": 0.16141476904168517,
"grad_norm": 0.0009382545249536633,
"learning_rate": 0.00013328988073508852,
"loss": 46.0,
"step": 1182
},
{
"epoch": 0.16155132975999453,
"grad_norm": 0.0006358098471537232,
"learning_rate": 0.00013319078531285285,
"loss": 46.0,
"step": 1183
},
{
"epoch": 0.16168789047830393,
"grad_norm": 0.0012355463113635778,
"learning_rate": 0.00013309165324898112,
"loss": 46.0,
"step": 1184
},
{
"epoch": 0.1618244511966133,
"grad_norm": 0.0005907994927838445,
"learning_rate": 0.00013299248465291214,
"loss": 46.0,
"step": 1185
},
{
"epoch": 0.16196101191492268,
"grad_norm": 0.002715210895985365,
"learning_rate": 0.00013289327963412513,
"loss": 46.0,
"step": 1186
},
{
"epoch": 0.16209757263323205,
"grad_norm": 0.001038241432979703,
"learning_rate": 0.00013279403830213942,
"loss": 46.0,
"step": 1187
},
{
"epoch": 0.16223413335154144,
"grad_norm": 0.0015468295896425843,
"learning_rate": 0.00013269476076651447,
"loss": 46.0,
"step": 1188
},
{
"epoch": 0.1623706940698508,
"grad_norm": 0.0017287740483880043,
"learning_rate": 0.00013259544713684974,
"loss": 46.0,
"step": 1189
},
{
"epoch": 0.1625072547881602,
"grad_norm": 0.0018615883309394121,
"learning_rate": 0.00013249609752278454,
"loss": 46.0,
"step": 1190
},
{
"epoch": 0.16264381550646956,
"grad_norm": 0.00047380104660987854,
"learning_rate": 0.0001323967120339978,
"loss": 46.0,
"step": 1191
},
{
"epoch": 0.16278037622477895,
"grad_norm": 0.001483297673985362,
"learning_rate": 0.00013229729078020823,
"loss": 46.0,
"step": 1192
},
{
"epoch": 0.16291693694308831,
"grad_norm": 0.0004559273656923324,
"learning_rate": 0.00013219783387117385,
"loss": 46.0,
"step": 1193
},
{
"epoch": 0.1630534976613977,
"grad_norm": 0.0008615512633696198,
"learning_rate": 0.00013209834141669213,
"loss": 46.0,
"step": 1194
},
{
"epoch": 0.16319005837970707,
"grad_norm": 0.0005908702732995152,
"learning_rate": 0.0001319988135265998,
"loss": 46.0,
"step": 1195
},
{
"epoch": 0.16332661909801646,
"grad_norm": 0.0008730573463253677,
"learning_rate": 0.00013189925031077267,
"loss": 46.0,
"step": 1196
},
{
"epoch": 0.16346317981632583,
"grad_norm": 0.0012465447653084993,
"learning_rate": 0.00013179965187912554,
"loss": 46.0,
"step": 1197
},
{
"epoch": 0.16359974053463522,
"grad_norm": 0.0013986461563035846,
"learning_rate": 0.00013170001834161209,
"loss": 46.0,
"step": 1198
},
{
"epoch": 0.16373630125294458,
"grad_norm": 0.0011632711393758655,
"learning_rate": 0.0001316003498082248,
"loss": 46.0,
"step": 1199
},
{
"epoch": 0.16387286197125397,
"grad_norm": 0.001200903090648353,
"learning_rate": 0.0001315006463889948,
"loss": 46.0,
"step": 1200
},
{
"epoch": 0.16387286197125397,
"eval_loss": 11.5,
"eval_runtime": 20.5706,
"eval_samples_per_second": 149.923,
"eval_steps_per_second": 74.961,
"step": 1200
}
],
"logging_steps": 1,
"max_steps": 3000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 300,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 42923841650688.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}