{ "best_metric": 11.5, "best_model_checkpoint": "miner_id_24/checkpoint-1000", "epoch": 2.4009603841536613, "eval_steps": 1000, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006002400960384153, "grad_norm": 1.8054486645269208e-05, "learning_rate": 6.666666666666667e-06, "loss": 23.0, "step": 1 }, { "epoch": 0.0006002400960384153, "eval_loss": 11.5, "eval_runtime": 5.3215, "eval_samples_per_second": 263.649, "eval_steps_per_second": 33.074, "step": 1 }, { "epoch": 0.0012004801920768306, "grad_norm": 2.727241735556163e-05, "learning_rate": 1.3333333333333333e-05, "loss": 23.0, "step": 2 }, { "epoch": 0.001800720288115246, "grad_norm": 2.7739322831621394e-05, "learning_rate": 2e-05, "loss": 23.0, "step": 3 }, { "epoch": 0.0024009603841536613, "grad_norm": 2.7980300728813745e-05, "learning_rate": 2.6666666666666667e-05, "loss": 23.0, "step": 4 }, { "epoch": 0.003001200480192077, "grad_norm": 3.064045085920952e-05, "learning_rate": 3.3333333333333335e-05, "loss": 23.0, "step": 5 }, { "epoch": 0.003601440576230492, "grad_norm": 2.1872214347240515e-05, "learning_rate": 4e-05, "loss": 23.0, "step": 6 }, { "epoch": 0.004201680672268907, "grad_norm": 3.888764695147984e-05, "learning_rate": 4.666666666666667e-05, "loss": 23.0, "step": 7 }, { "epoch": 0.004801920768307323, "grad_norm": 2.100978235830553e-05, "learning_rate": 5.333333333333333e-05, "loss": 23.0, "step": 8 }, { "epoch": 0.005402160864345739, "grad_norm": 2.6072715627378784e-05, "learning_rate": 6e-05, "loss": 23.0, "step": 9 }, { "epoch": 0.006002400960384154, "grad_norm": 1.7852040400612168e-05, "learning_rate": 6.666666666666667e-05, "loss": 23.0, "step": 10 }, { "epoch": 0.006602641056422569, "grad_norm": 2.71389744739281e-05, "learning_rate": 7.333333333333333e-05, "loss": 23.0, "step": 11 }, { "epoch": 0.007202881152460984, "grad_norm": 2.6971414627041668e-05, "learning_rate": 8e-05, "loss": 23.0, "step": 12 }, { "epoch": 0.0078031212484993995, "grad_norm": 2.012933873629663e-05, "learning_rate": 8.666666666666667e-05, "loss": 23.0, "step": 13 }, { "epoch": 0.008403361344537815, "grad_norm": 2.6451836674823426e-05, "learning_rate": 9.333333333333334e-05, "loss": 23.0, "step": 14 }, { "epoch": 0.00900360144057623, "grad_norm": 3.4380853321636096e-05, "learning_rate": 0.0001, "loss": 23.0, "step": 15 }, { "epoch": 0.009603841536614645, "grad_norm": 2.7640449843602255e-05, "learning_rate": 0.00010666666666666667, "loss": 23.0, "step": 16 }, { "epoch": 0.01020408163265306, "grad_norm": 1.8410619304631837e-05, "learning_rate": 0.00011333333333333334, "loss": 23.0, "step": 17 }, { "epoch": 0.010804321728691477, "grad_norm": 2.8392196327331476e-05, "learning_rate": 0.00012, "loss": 23.0, "step": 18 }, { "epoch": 0.011404561824729893, "grad_norm": 2.826511081366334e-05, "learning_rate": 0.00012666666666666666, "loss": 23.0, "step": 19 }, { "epoch": 0.012004801920768308, "grad_norm": 3.356917659402825e-05, "learning_rate": 0.00013333333333333334, "loss": 23.0, "step": 20 }, { "epoch": 0.012605042016806723, "grad_norm": 2.246521216875408e-05, "learning_rate": 0.00014, "loss": 23.0, "step": 21 }, { "epoch": 0.013205282112845138, "grad_norm": 2.638386831677053e-05, "learning_rate": 0.00014666666666666666, "loss": 23.0, "step": 22 }, { "epoch": 0.013805522208883553, "grad_norm": 3.957346052629873e-05, "learning_rate": 0.00015333333333333334, "loss": 23.0, "step": 23 }, { "epoch": 0.014405762304921969, "grad_norm": 2.2368900317815132e-05, "learning_rate": 0.00016, "loss": 23.0, "step": 24 }, { "epoch": 0.015006002400960384, "grad_norm": 2.7279023925075307e-05, "learning_rate": 0.0001666666666666667, "loss": 23.0, "step": 25 }, { "epoch": 0.015606242496998799, "grad_norm": 2.1262459995341487e-05, "learning_rate": 0.00017333333333333334, "loss": 23.0, "step": 26 }, { "epoch": 0.016206482593037214, "grad_norm": 2.396126728854142e-05, "learning_rate": 0.00018, "loss": 23.0, "step": 27 }, { "epoch": 0.01680672268907563, "grad_norm": 1.7820493667386472e-05, "learning_rate": 0.0001866666666666667, "loss": 23.0, "step": 28 }, { "epoch": 0.017406962785114045, "grad_norm": 2.8694874345092103e-05, "learning_rate": 0.00019333333333333333, "loss": 23.0, "step": 29 }, { "epoch": 0.01800720288115246, "grad_norm": 2.7091318770544603e-05, "learning_rate": 0.0002, "loss": 23.0, "step": 30 }, { "epoch": 0.018607442977190875, "grad_norm": 3.307936276542023e-05, "learning_rate": 0.0001999999982156286, "loss": 23.0, "step": 31 }, { "epoch": 0.01920768307322929, "grad_norm": 3.513343108352274e-05, "learning_rate": 0.00019999999286251453, "loss": 23.0, "step": 32 }, { "epoch": 0.019807923169267706, "grad_norm": 1.9473052816465497e-05, "learning_rate": 0.00019999998394065792, "loss": 23.0, "step": 33 }, { "epoch": 0.02040816326530612, "grad_norm": 2.3083173800841905e-05, "learning_rate": 0.00019999997145005914, "loss": 23.0, "step": 34 }, { "epoch": 0.02100840336134454, "grad_norm": 3.38441641360987e-05, "learning_rate": 0.0001999999553907186, "loss": 23.0, "step": 35 }, { "epoch": 0.021608643457382955, "grad_norm": 3.0117924325168133e-05, "learning_rate": 0.00019999993576263686, "loss": 23.0, "step": 36 }, { "epoch": 0.02220888355342137, "grad_norm": 2.34516519412864e-05, "learning_rate": 0.00019999991256581466, "loss": 23.0, "step": 37 }, { "epoch": 0.022809123649459785, "grad_norm": 3.247776839998551e-05, "learning_rate": 0.0001999998858002528, "loss": 23.0, "step": 38 }, { "epoch": 0.0234093637454982, "grad_norm": 2.7047510229749605e-05, "learning_rate": 0.00019999985546595225, "loss": 23.0, "step": 39 }, { "epoch": 0.024009603841536616, "grad_norm": 2.9209651984274387e-05, "learning_rate": 0.00019999982156291407, "loss": 23.0, "step": 40 }, { "epoch": 0.02460984393757503, "grad_norm": 2.5179328076774254e-05, "learning_rate": 0.00019999978409113953, "loss": 23.0, "step": 41 }, { "epoch": 0.025210084033613446, "grad_norm": 2.729222433117684e-05, "learning_rate": 0.0001999997430506299, "loss": 23.0, "step": 42 }, { "epoch": 0.02581032412965186, "grad_norm": 3.7065288779558614e-05, "learning_rate": 0.0001999996984413867, "loss": 23.0, "step": 43 }, { "epoch": 0.026410564225690276, "grad_norm": 3.8142901757964864e-05, "learning_rate": 0.00019999965026341144, "loss": 23.0, "step": 44 }, { "epoch": 0.02701080432172869, "grad_norm": 3.7107929529156536e-05, "learning_rate": 0.00019999959851670594, "loss": 23.0, "step": 45 }, { "epoch": 0.027611044417767107, "grad_norm": 4.2656189179979265e-05, "learning_rate": 0.00019999954320127196, "loss": 23.0, "step": 46 }, { "epoch": 0.028211284513805522, "grad_norm": 3.613159424276091e-05, "learning_rate": 0.00019999948431711153, "loss": 23.0, "step": 47 }, { "epoch": 0.028811524609843937, "grad_norm": 4.232913488522172e-05, "learning_rate": 0.00019999942186422673, "loss": 23.0, "step": 48 }, { "epoch": 0.029411764705882353, "grad_norm": 3.183892840752378e-05, "learning_rate": 0.00019999935584261982, "loss": 23.0, "step": 49 }, { "epoch": 0.030012004801920768, "grad_norm": 3.41582708642818e-05, "learning_rate": 0.0001999992862522931, "loss": 23.0, "step": 50 }, { "epoch": 0.030612244897959183, "grad_norm": 4.701867146650329e-05, "learning_rate": 0.0001999992130932491, "loss": 23.0, "step": 51 }, { "epoch": 0.031212484993997598, "grad_norm": 8.062861888902262e-05, "learning_rate": 0.00019999913636549037, "loss": 23.0, "step": 52 }, { "epoch": 0.03181272509003601, "grad_norm": 4.4446765969041735e-05, "learning_rate": 0.00019999905606901973, "loss": 23.0, "step": 53 }, { "epoch": 0.03241296518607443, "grad_norm": 7.205520523712039e-05, "learning_rate": 0.00019999897220384, "loss": 23.0, "step": 54 }, { "epoch": 0.033013205282112844, "grad_norm": 2.287816460011527e-05, "learning_rate": 0.0001999988847699542, "loss": 23.0, "step": 55 }, { "epoch": 0.03361344537815126, "grad_norm": 5.946783130639233e-05, "learning_rate": 0.0001999987937673654, "loss": 23.0, "step": 56 }, { "epoch": 0.034213685474189674, "grad_norm": 2.981833131343592e-05, "learning_rate": 0.0001999986991960769, "loss": 23.0, "step": 57 }, { "epoch": 0.03481392557022809, "grad_norm": 3.543734783306718e-05, "learning_rate": 0.00019999860105609206, "loss": 23.0, "step": 58 }, { "epoch": 0.035414165666266505, "grad_norm": 4.3358595576137304e-05, "learning_rate": 0.00019999849934741433, "loss": 23.0, "step": 59 }, { "epoch": 0.03601440576230492, "grad_norm": 4.9726917495718226e-05, "learning_rate": 0.00019999839407004743, "loss": 23.0, "step": 60 }, { "epoch": 0.036614645858343335, "grad_norm": 4.8018726374721155e-05, "learning_rate": 0.00019999828522399504, "loss": 23.0, "step": 61 }, { "epoch": 0.03721488595438175, "grad_norm": 4.2448387830518186e-05, "learning_rate": 0.0001999981728092611, "loss": 23.0, "step": 62 }, { "epoch": 0.037815126050420166, "grad_norm": 6.509755621664226e-05, "learning_rate": 0.0001999980568258496, "loss": 23.0, "step": 63 }, { "epoch": 0.03841536614645858, "grad_norm": 0.00011501594417495653, "learning_rate": 0.00019999793727376467, "loss": 23.0, "step": 64 }, { "epoch": 0.039015606242496996, "grad_norm": 6.020090222591534e-05, "learning_rate": 0.00019999781415301057, "loss": 23.0, "step": 65 }, { "epoch": 0.03961584633853541, "grad_norm": 7.283796003321186e-05, "learning_rate": 0.00019999768746359174, "loss": 23.0, "step": 66 }, { "epoch": 0.040216086434573826, "grad_norm": 9.18004079721868e-05, "learning_rate": 0.00019999755720551262, "loss": 23.0, "step": 67 }, { "epoch": 0.04081632653061224, "grad_norm": 6.185177335282788e-05, "learning_rate": 0.00019999742337877793, "loss": 23.0, "step": 68 }, { "epoch": 0.04141656662665066, "grad_norm": 5.33986822119914e-05, "learning_rate": 0.00019999728598339247, "loss": 23.0, "step": 69 }, { "epoch": 0.04201680672268908, "grad_norm": 9.752002370078117e-05, "learning_rate": 0.00019999714501936104, "loss": 23.0, "step": 70 }, { "epoch": 0.042617046818727494, "grad_norm": 0.0001452133001293987, "learning_rate": 0.00019999700048668875, "loss": 23.0, "step": 71 }, { "epoch": 0.04321728691476591, "grad_norm": 6.744873826391995e-05, "learning_rate": 0.00019999685238538073, "loss": 23.0, "step": 72 }, { "epoch": 0.043817527010804325, "grad_norm": 9.654606401454657e-05, "learning_rate": 0.00019999670071544227, "loss": 23.0, "step": 73 }, { "epoch": 0.04441776710684274, "grad_norm": 7.846657535992563e-05, "learning_rate": 0.0001999965454768788, "loss": 23.0, "step": 74 }, { "epoch": 0.045018007202881155, "grad_norm": 8.308366523124278e-05, "learning_rate": 0.00019999638666969583, "loss": 23.0, "step": 75 }, { "epoch": 0.04561824729891957, "grad_norm": 9.814684744924307e-05, "learning_rate": 0.00019999622429389905, "loss": 23.0, "step": 76 }, { "epoch": 0.046218487394957986, "grad_norm": 9.901592420646921e-05, "learning_rate": 0.0001999960583494942, "loss": 23.0, "step": 77 }, { "epoch": 0.0468187274909964, "grad_norm": 5.98341939621605e-05, "learning_rate": 0.00019999588883648733, "loss": 23.0, "step": 78 }, { "epoch": 0.047418967587034816, "grad_norm": 7.498155173379928e-05, "learning_rate": 0.00019999571575488433, "loss": 23.0, "step": 79 }, { "epoch": 0.04801920768307323, "grad_norm": 0.00012995477300137281, "learning_rate": 0.00019999553910469148, "loss": 23.0, "step": 80 }, { "epoch": 0.048619447779111646, "grad_norm": 7.154367631301284e-05, "learning_rate": 0.00019999535888591508, "loss": 23.0, "step": 81 }, { "epoch": 0.04921968787515006, "grad_norm": 6.733864574925974e-05, "learning_rate": 0.00019999517509856153, "loss": 23.0, "step": 82 }, { "epoch": 0.04981992797118848, "grad_norm": 6.913344986969605e-05, "learning_rate": 0.00019999498774263738, "loss": 23.0, "step": 83 }, { "epoch": 0.05042016806722689, "grad_norm": 7.432701386278495e-05, "learning_rate": 0.00019999479681814935, "loss": 23.0, "step": 84 }, { "epoch": 0.05102040816326531, "grad_norm": 0.00011996757530141622, "learning_rate": 0.00019999460232510426, "loss": 23.0, "step": 85 }, { "epoch": 0.05162064825930372, "grad_norm": 5.151230652700178e-05, "learning_rate": 0.000199994404263509, "loss": 23.0, "step": 86 }, { "epoch": 0.05222088835534214, "grad_norm": 4.7111861931625754e-05, "learning_rate": 0.00019999420263337066, "loss": 23.0, "step": 87 }, { "epoch": 0.05282112845138055, "grad_norm": 0.00010227777238469571, "learning_rate": 0.00019999399743469648, "loss": 23.0, "step": 88 }, { "epoch": 0.05342136854741897, "grad_norm": 6.315651990007609e-05, "learning_rate": 0.00019999378866749373, "loss": 23.0, "step": 89 }, { "epoch": 0.05402160864345738, "grad_norm": 6.648873386438936e-05, "learning_rate": 0.00019999357633176986, "loss": 23.0, "step": 90 }, { "epoch": 0.0546218487394958, "grad_norm": 0.00017616510740481317, "learning_rate": 0.0001999933604275325, "loss": 23.0, "step": 91 }, { "epoch": 0.055222088835534214, "grad_norm": 0.00013492346624843776, "learning_rate": 0.0001999931409547893, "loss": 23.0, "step": 92 }, { "epoch": 0.05582232893157263, "grad_norm": 0.00010630957694957033, "learning_rate": 0.00019999291791354811, "loss": 23.0, "step": 93 }, { "epoch": 0.056422569027611044, "grad_norm": 0.00013615177886094898, "learning_rate": 0.0001999926913038169, "loss": 23.0, "step": 94 }, { "epoch": 0.05702280912364946, "grad_norm": 0.00013460556510835886, "learning_rate": 0.00019999246112560376, "loss": 23.0, "step": 95 }, { "epoch": 0.057623049219687875, "grad_norm": 0.00021354878845158964, "learning_rate": 0.00019999222737891685, "loss": 23.0, "step": 96 }, { "epoch": 0.05822328931572629, "grad_norm": 0.00010901458881562576, "learning_rate": 0.0001999919900637646, "loss": 23.0, "step": 97 }, { "epoch": 0.058823529411764705, "grad_norm": 0.00015199890185613185, "learning_rate": 0.00019999174918015542, "loss": 23.0, "step": 98 }, { "epoch": 0.05942376950780312, "grad_norm": 0.00010835780994966626, "learning_rate": 0.0001999915047280979, "loss": 23.0, "step": 99 }, { "epoch": 0.060024009603841535, "grad_norm": 0.0001574648340465501, "learning_rate": 0.00019999125670760082, "loss": 23.0, "step": 100 }, { "epoch": 0.06062424969987995, "grad_norm": 9.17216602829285e-05, "learning_rate": 0.00019999100511867296, "loss": 23.0, "step": 101 }, { "epoch": 0.061224489795918366, "grad_norm": 0.00016906972450669855, "learning_rate": 0.00019999074996132336, "loss": 23.0, "step": 102 }, { "epoch": 0.06182472989195678, "grad_norm": 0.00029384472873061895, "learning_rate": 0.00019999049123556111, "loss": 23.0, "step": 103 }, { "epoch": 0.062424969987995196, "grad_norm": 0.0001368739758618176, "learning_rate": 0.0001999902289413954, "loss": 23.0, "step": 104 }, { "epoch": 0.06302521008403361, "grad_norm": 6.585185474250466e-05, "learning_rate": 0.00019998996307883565, "loss": 23.0, "step": 105 }, { "epoch": 0.06362545018007203, "grad_norm": 0.0002683665370568633, "learning_rate": 0.00019998969364789132, "loss": 23.0, "step": 106 }, { "epoch": 0.06422569027611044, "grad_norm": 0.0001274021778954193, "learning_rate": 0.00019998942064857202, "loss": 23.0, "step": 107 }, { "epoch": 0.06482593037214886, "grad_norm": 0.00012011367653030902, "learning_rate": 0.00019998914408088752, "loss": 23.0, "step": 108 }, { "epoch": 0.06542617046818727, "grad_norm": 0.00017174589447677135, "learning_rate": 0.00019998886394484764, "loss": 23.0, "step": 109 }, { "epoch": 0.06602641056422569, "grad_norm": 0.00015978926967363805, "learning_rate": 0.00019998858024046243, "loss": 23.0, "step": 110 }, { "epoch": 0.0666266506602641, "grad_norm": 0.00011685083882184699, "learning_rate": 0.000199988292967742, "loss": 23.0, "step": 111 }, { "epoch": 0.06722689075630252, "grad_norm": 0.00012352668272797018, "learning_rate": 0.00019998800212669656, "loss": 23.0, "step": 112 }, { "epoch": 0.06782713085234093, "grad_norm": 0.00013859743194188923, "learning_rate": 0.00019998770771733658, "loss": 23.0, "step": 113 }, { "epoch": 0.06842737094837935, "grad_norm": 0.00013077691255602986, "learning_rate": 0.00019998740973967244, "loss": 23.0, "step": 114 }, { "epoch": 0.06902761104441776, "grad_norm": 0.00024535699049010873, "learning_rate": 0.0001999871081937149, "loss": 23.0, "step": 115 }, { "epoch": 0.06962785114045618, "grad_norm": 0.00018474037642590702, "learning_rate": 0.00019998680307947464, "loss": 23.0, "step": 116 }, { "epoch": 0.0702280912364946, "grad_norm": 0.0001755363482516259, "learning_rate": 0.00019998649439696258, "loss": 23.0, "step": 117 }, { "epoch": 0.07082833133253301, "grad_norm": 0.00021930545335635543, "learning_rate": 0.00019998618214618974, "loss": 23.0, "step": 118 }, { "epoch": 0.07142857142857142, "grad_norm": 0.00014808699779678136, "learning_rate": 0.00019998586632716725, "loss": 23.0, "step": 119 }, { "epoch": 0.07202881152460984, "grad_norm": 0.00011154917592648417, "learning_rate": 0.0001999855469399064, "loss": 23.0, "step": 120 }, { "epoch": 0.07262905162064826, "grad_norm": 0.00016951010911725461, "learning_rate": 0.00019998522398441851, "loss": 23.0, "step": 121 }, { "epoch": 0.07322929171668667, "grad_norm": 0.0003669381549116224, "learning_rate": 0.00019998489746071526, "loss": 23.0, "step": 122 }, { "epoch": 0.07382953181272509, "grad_norm": 0.00013156913337297738, "learning_rate": 0.00019998456736880816, "loss": 23.0, "step": 123 }, { "epoch": 0.0744297719087635, "grad_norm": 0.0001488238776801154, "learning_rate": 0.00019998423370870902, "loss": 23.0, "step": 124 }, { "epoch": 0.07503001200480192, "grad_norm": 0.00010793282854137942, "learning_rate": 0.00019998389648042978, "loss": 23.0, "step": 125 }, { "epoch": 0.07563025210084033, "grad_norm": 0.00041736027924343944, "learning_rate": 0.0001999835556839825, "loss": 23.0, "step": 126 }, { "epoch": 0.07623049219687875, "grad_norm": 0.00013802212197333574, "learning_rate": 0.0001999832113193793, "loss": 23.0, "step": 127 }, { "epoch": 0.07683073229291716, "grad_norm": 0.00014316567103378475, "learning_rate": 0.00019998286338663242, "loss": 23.0, "step": 128 }, { "epoch": 0.07743097238895558, "grad_norm": 0.0005471606855280697, "learning_rate": 0.0001999825118857544, "loss": 23.0, "step": 129 }, { "epoch": 0.07803121248499399, "grad_norm": 0.00032816806924529374, "learning_rate": 0.00019998215681675768, "loss": 23.0, "step": 130 }, { "epoch": 0.07863145258103241, "grad_norm": 0.0002986426989082247, "learning_rate": 0.000199981798179655, "loss": 23.0, "step": 131 }, { "epoch": 0.07923169267707082, "grad_norm": 0.0002443274424877018, "learning_rate": 0.0001999814359744591, "loss": 23.0, "step": 132 }, { "epoch": 0.07983193277310924, "grad_norm": 0.00016547096311114728, "learning_rate": 0.00019998107020118293, "loss": 23.0, "step": 133 }, { "epoch": 0.08043217286914765, "grad_norm": 0.0002686209627427161, "learning_rate": 0.00019998070085983956, "loss": 23.0, "step": 134 }, { "epoch": 0.08103241296518607, "grad_norm": 0.00021909335919190198, "learning_rate": 0.00019998032795044216, "loss": 23.0, "step": 135 }, { "epoch": 0.08163265306122448, "grad_norm": 0.00032937468495219946, "learning_rate": 0.00019997995147300404, "loss": 23.0, "step": 136 }, { "epoch": 0.0822328931572629, "grad_norm": 0.0002638402802404016, "learning_rate": 0.0001999795714275386, "loss": 23.0, "step": 137 }, { "epoch": 0.08283313325330131, "grad_norm": 0.0002468010934535414, "learning_rate": 0.00019997918781405947, "loss": 23.0, "step": 138 }, { "epoch": 0.08343337334933974, "grad_norm": 0.0004808467347174883, "learning_rate": 0.00019997880063258027, "loss": 23.0, "step": 139 }, { "epoch": 0.08403361344537816, "grad_norm": 0.0004602950648404658, "learning_rate": 0.00019997840988311488, "loss": 23.0, "step": 140 }, { "epoch": 0.08463385354141657, "grad_norm": 0.0004139028023928404, "learning_rate": 0.00019997801556567718, "loss": 23.0, "step": 141 }, { "epoch": 0.08523409363745499, "grad_norm": 0.0002881626132875681, "learning_rate": 0.00019997761768028133, "loss": 23.0, "step": 142 }, { "epoch": 0.0858343337334934, "grad_norm": 0.00026587379397824407, "learning_rate": 0.00019997721622694146, "loss": 23.0, "step": 143 }, { "epoch": 0.08643457382953182, "grad_norm": 0.00015115221322048455, "learning_rate": 0.0001999768112056719, "loss": 23.0, "step": 144 }, { "epoch": 0.08703481392557023, "grad_norm": 0.0003756193327717483, "learning_rate": 0.00019997640261648712, "loss": 23.0, "step": 145 }, { "epoch": 0.08763505402160865, "grad_norm": 0.0002508624747861177, "learning_rate": 0.0001999759904594017, "loss": 23.0, "step": 146 }, { "epoch": 0.08823529411764706, "grad_norm": 0.0003372096107341349, "learning_rate": 0.00019997557473443037, "loss": 23.0, "step": 147 }, { "epoch": 0.08883553421368548, "grad_norm": 0.00023483167751692235, "learning_rate": 0.0001999751554415879, "loss": 23.0, "step": 148 }, { "epoch": 0.0894357743097239, "grad_norm": 0.00021300539083313197, "learning_rate": 0.00019997473258088935, "loss": 23.0, "step": 149 }, { "epoch": 0.09003601440576231, "grad_norm": 0.0002520008129067719, "learning_rate": 0.00019997430615234976, "loss": 23.0, "step": 150 }, { "epoch": 0.09063625450180073, "grad_norm": 0.0002747656835708767, "learning_rate": 0.00019997387615598432, "loss": 23.0, "step": 151 }, { "epoch": 0.09123649459783914, "grad_norm": 0.00019271083874627948, "learning_rate": 0.0001999734425918084, "loss": 23.0, "step": 152 }, { "epoch": 0.09183673469387756, "grad_norm": 0.00039355031913146377, "learning_rate": 0.0001999730054598375, "loss": 23.0, "step": 153 }, { "epoch": 0.09243697478991597, "grad_norm": 0.000462890398921445, "learning_rate": 0.0001999725647600872, "loss": 23.0, "step": 154 }, { "epoch": 0.09303721488595439, "grad_norm": 0.0004298924177419394, "learning_rate": 0.00019997212049257317, "loss": 23.0, "step": 155 }, { "epoch": 0.0936374549819928, "grad_norm": 0.00041610770858824253, "learning_rate": 0.00019997167265731135, "loss": 23.0, "step": 156 }, { "epoch": 0.09423769507803122, "grad_norm": 0.00030013787909410894, "learning_rate": 0.0001999712212543177, "loss": 23.0, "step": 157 }, { "epoch": 0.09483793517406963, "grad_norm": 0.00035850016865879297, "learning_rate": 0.00019997076628360834, "loss": 23.0, "step": 158 }, { "epoch": 0.09543817527010805, "grad_norm": 0.00044739991426467896, "learning_rate": 0.00019997030774519944, "loss": 23.0, "step": 159 }, { "epoch": 0.09603841536614646, "grad_norm": 0.0003083112242165953, "learning_rate": 0.00019996984563910747, "loss": 23.0, "step": 160 }, { "epoch": 0.09663865546218488, "grad_norm": 0.0003727200091816485, "learning_rate": 0.00019996937996534883, "loss": 23.0, "step": 161 }, { "epoch": 0.09723889555822329, "grad_norm": 0.00040038779843598604, "learning_rate": 0.00019996891072394016, "loss": 23.0, "step": 162 }, { "epoch": 0.09783913565426171, "grad_norm": 0.000661612895783037, "learning_rate": 0.00019996843791489824, "loss": 23.0, "step": 163 }, { "epoch": 0.09843937575030012, "grad_norm": 0.00018554573762230575, "learning_rate": 0.00019996796153823988, "loss": 23.0, "step": 164 }, { "epoch": 0.09903961584633854, "grad_norm": 0.0003709488082677126, "learning_rate": 0.00019996748159398215, "loss": 23.0, "step": 165 }, { "epoch": 0.09963985594237695, "grad_norm": 0.00026602845173329115, "learning_rate": 0.00019996699808214215, "loss": 23.0, "step": 166 }, { "epoch": 0.10024009603841537, "grad_norm": 0.0003050805244129151, "learning_rate": 0.00019996651100273713, "loss": 23.0, "step": 167 }, { "epoch": 0.10084033613445378, "grad_norm": 0.0006026671035215259, "learning_rate": 0.00019996602035578448, "loss": 23.0, "step": 168 }, { "epoch": 0.1014405762304922, "grad_norm": 0.00023183878511190414, "learning_rate": 0.0001999655261413017, "loss": 23.0, "step": 169 }, { "epoch": 0.10204081632653061, "grad_norm": 0.00046876040869392455, "learning_rate": 0.00019996502835930644, "loss": 23.0, "step": 170 }, { "epoch": 0.10264105642256903, "grad_norm": 0.00031560304341837764, "learning_rate": 0.00019996452700981644, "loss": 23.0, "step": 171 }, { "epoch": 0.10324129651860744, "grad_norm": 0.0005790601135231555, "learning_rate": 0.00019996402209284964, "loss": 23.0, "step": 172 }, { "epoch": 0.10384153661464586, "grad_norm": 0.00031595080508850515, "learning_rate": 0.00019996351360842402, "loss": 23.0, "step": 173 }, { "epoch": 0.10444177671068428, "grad_norm": 0.0004917896003462374, "learning_rate": 0.00019996300155655775, "loss": 23.0, "step": 174 }, { "epoch": 0.10504201680672269, "grad_norm": 0.00028941556229256094, "learning_rate": 0.00019996248593726908, "loss": 23.0, "step": 175 }, { "epoch": 0.1056422569027611, "grad_norm": 0.0005954919033683836, "learning_rate": 0.0001999619667505764, "loss": 23.0, "step": 176 }, { "epoch": 0.10624249699879952, "grad_norm": 0.0004171891196165234, "learning_rate": 0.00019996144399649825, "loss": 23.0, "step": 177 }, { "epoch": 0.10684273709483794, "grad_norm": 0.00026635677204467356, "learning_rate": 0.00019996091767505333, "loss": 23.0, "step": 178 }, { "epoch": 0.10744297719087635, "grad_norm": 0.00038086227141320705, "learning_rate": 0.00019996038778626038, "loss": 23.0, "step": 179 }, { "epoch": 0.10804321728691477, "grad_norm": 0.0007933559827506542, "learning_rate": 0.00019995985433013833, "loss": 23.0, "step": 180 }, { "epoch": 0.10864345738295318, "grad_norm": 0.00028796150581911206, "learning_rate": 0.00019995931730670618, "loss": 23.0, "step": 181 }, { "epoch": 0.1092436974789916, "grad_norm": 0.0004604592395480722, "learning_rate": 0.00019995877671598314, "loss": 23.0, "step": 182 }, { "epoch": 0.10984393757503001, "grad_norm": 0.0002964911109302193, "learning_rate": 0.00019995823255798846, "loss": 23.0, "step": 183 }, { "epoch": 0.11044417767106843, "grad_norm": 0.00046794061199761927, "learning_rate": 0.00019995768483274163, "loss": 23.0, "step": 184 }, { "epoch": 0.11104441776710684, "grad_norm": 0.00039684621151536703, "learning_rate": 0.0001999571335402621, "loss": 23.0, "step": 185 }, { "epoch": 0.11164465786314526, "grad_norm": 0.0006988070090301335, "learning_rate": 0.00019995657868056963, "loss": 23.0, "step": 186 }, { "epoch": 0.11224489795918367, "grad_norm": 0.000538469001185149, "learning_rate": 0.00019995602025368395, "loss": 23.0, "step": 187 }, { "epoch": 0.11284513805522209, "grad_norm": 0.0002729691914282739, "learning_rate": 0.00019995545825962508, "loss": 23.0, "step": 188 }, { "epoch": 0.1134453781512605, "grad_norm": 0.00039077899418771267, "learning_rate": 0.00019995489269841296, "loss": 23.0, "step": 189 }, { "epoch": 0.11404561824729892, "grad_norm": 0.0004048171394970268, "learning_rate": 0.00019995432357006786, "loss": 23.0, "step": 190 }, { "epoch": 0.11464585834333733, "grad_norm": 0.0005215174751356244, "learning_rate": 0.00019995375087461005, "loss": 23.0, "step": 191 }, { "epoch": 0.11524609843937575, "grad_norm": 0.0004714977403637022, "learning_rate": 0.00019995317461206003, "loss": 23.0, "step": 192 }, { "epoch": 0.11584633853541416, "grad_norm": 0.0005683483323082328, "learning_rate": 0.0001999525947824383, "loss": 23.0, "step": 193 }, { "epoch": 0.11644657863145258, "grad_norm": 0.00026816027821041644, "learning_rate": 0.00019995201138576553, "loss": 23.0, "step": 194 }, { "epoch": 0.117046818727491, "grad_norm": 0.00048822627286426723, "learning_rate": 0.0001999514244220626, "loss": 23.0, "step": 195 }, { "epoch": 0.11764705882352941, "grad_norm": 0.0006117990124039352, "learning_rate": 0.00019995083389135046, "loss": 23.0, "step": 196 }, { "epoch": 0.11824729891956783, "grad_norm": 0.0009073524852283299, "learning_rate": 0.00019995023979365014, "loss": 23.0, "step": 197 }, { "epoch": 0.11884753901560624, "grad_norm": 0.0006450892542488873, "learning_rate": 0.00019994964212898287, "loss": 23.0, "step": 198 }, { "epoch": 0.11944777911164466, "grad_norm": 0.0005826326669193804, "learning_rate": 0.00019994904089737, "loss": 23.0, "step": 199 }, { "epoch": 0.12004801920768307, "grad_norm": 0.0007119604852050543, "learning_rate": 0.00019994843609883294, "loss": 23.0, "step": 200 }, { "epoch": 0.12064825930372149, "grad_norm": 0.0005299604963511229, "learning_rate": 0.0001999478277333933, "loss": 23.0, "step": 201 }, { "epoch": 0.1212484993997599, "grad_norm": 0.0006639006896875799, "learning_rate": 0.00019994721580107277, "loss": 23.0, "step": 202 }, { "epoch": 0.12184873949579832, "grad_norm": 0.0006667767884209752, "learning_rate": 0.0001999466003018932, "loss": 23.0, "step": 203 }, { "epoch": 0.12244897959183673, "grad_norm": 0.00042130585643462837, "learning_rate": 0.00019994598123587655, "loss": 23.0, "step": 204 }, { "epoch": 0.12304921968787515, "grad_norm": 0.0011373097077012062, "learning_rate": 0.00019994535860304495, "loss": 23.0, "step": 205 }, { "epoch": 0.12364945978391356, "grad_norm": 0.0016360898735001683, "learning_rate": 0.0001999447324034206, "loss": 23.0, "step": 206 }, { "epoch": 0.12424969987995198, "grad_norm": 0.0004542885872069746, "learning_rate": 0.0001999441026370258, "loss": 23.0, "step": 207 }, { "epoch": 0.12484993997599039, "grad_norm": 0.0004969018045812845, "learning_rate": 0.00019994346930388312, "loss": 23.0, "step": 208 }, { "epoch": 0.12545018007202882, "grad_norm": 0.000546945258975029, "learning_rate": 0.00019994283240401504, "loss": 23.0, "step": 209 }, { "epoch": 0.12605042016806722, "grad_norm": 0.0007595718489028513, "learning_rate": 0.0001999421919374444, "loss": 23.0, "step": 210 }, { "epoch": 0.12665066026410565, "grad_norm": 0.000410681328503415, "learning_rate": 0.000199941547904194, "loss": 23.0, "step": 211 }, { "epoch": 0.12725090036014405, "grad_norm": 0.0005949096521362662, "learning_rate": 0.00019994090030428684, "loss": 23.0, "step": 212 }, { "epoch": 0.12785114045618248, "grad_norm": 0.0006973805138841271, "learning_rate": 0.000199940249137746, "loss": 23.0, "step": 213 }, { "epoch": 0.12845138055222088, "grad_norm": 0.0003782823623623699, "learning_rate": 0.00019993959440459474, "loss": 23.0, "step": 214 }, { "epoch": 0.1290516206482593, "grad_norm": 0.0008317967294715345, "learning_rate": 0.00019993893610485648, "loss": 23.0, "step": 215 }, { "epoch": 0.12965186074429771, "grad_norm": 0.000514440587721765, "learning_rate": 0.00019993827423855464, "loss": 23.0, "step": 216 }, { "epoch": 0.13025210084033614, "grad_norm": 0.0004617723752744496, "learning_rate": 0.00019993760880571287, "loss": 23.0, "step": 217 }, { "epoch": 0.13085234093637454, "grad_norm": 0.0009777372470125556, "learning_rate": 0.0001999369398063549, "loss": 23.0, "step": 218 }, { "epoch": 0.13145258103241297, "grad_norm": 0.0005835364572703838, "learning_rate": 0.00019993626724050463, "loss": 23.0, "step": 219 }, { "epoch": 0.13205282112845138, "grad_norm": 0.0005018878728151321, "learning_rate": 0.00019993559110818602, "loss": 23.0, "step": 220 }, { "epoch": 0.1326530612244898, "grad_norm": 0.0006578268366865814, "learning_rate": 0.00019993491140942327, "loss": 23.0, "step": 221 }, { "epoch": 0.1332533013205282, "grad_norm": 0.0007726168259978294, "learning_rate": 0.0001999342281442406, "loss": 23.0, "step": 222 }, { "epoch": 0.13385354141656663, "grad_norm": 0.0008200584561564028, "learning_rate": 0.00019993354131266232, "loss": 23.0, "step": 223 }, { "epoch": 0.13445378151260504, "grad_norm": 0.0005691193509846926, "learning_rate": 0.00019993285091471306, "loss": 23.0, "step": 224 }, { "epoch": 0.13505402160864347, "grad_norm": 0.0006812798674218357, "learning_rate": 0.00019993215695041742, "loss": 23.0, "step": 225 }, { "epoch": 0.13565426170468187, "grad_norm": 0.0006009634817019105, "learning_rate": 0.00019993145941980012, "loss": 23.0, "step": 226 }, { "epoch": 0.1362545018007203, "grad_norm": 0.0005906272563152015, "learning_rate": 0.0001999307583228861, "loss": 23.0, "step": 227 }, { "epoch": 0.1368547418967587, "grad_norm": 0.0009002196020446718, "learning_rate": 0.0001999300536597004, "loss": 23.0, "step": 228 }, { "epoch": 0.13745498199279713, "grad_norm": 0.0007381000905297697, "learning_rate": 0.0001999293454302681, "loss": 23.0, "step": 229 }, { "epoch": 0.13805522208883553, "grad_norm": 0.0004923134110867977, "learning_rate": 0.00019992863363461455, "loss": 23.0, "step": 230 }, { "epoch": 0.13865546218487396, "grad_norm": 0.0006840071291662753, "learning_rate": 0.00019992791827276504, "loss": 23.0, "step": 231 }, { "epoch": 0.13925570228091236, "grad_norm": 0.0008459037635475397, "learning_rate": 0.00019992719934474522, "loss": 23.0, "step": 232 }, { "epoch": 0.1398559423769508, "grad_norm": 0.0010143484687432647, "learning_rate": 0.0001999264768505807, "loss": 23.0, "step": 233 }, { "epoch": 0.1404561824729892, "grad_norm": 0.00038011017022654414, "learning_rate": 0.00019992575079029728, "loss": 23.0, "step": 234 }, { "epoch": 0.14105642256902762, "grad_norm": 0.0005543521838262677, "learning_rate": 0.0001999250211639208, "loss": 23.0, "step": 235 }, { "epoch": 0.14165666266506602, "grad_norm": 0.00035856247995980084, "learning_rate": 0.00019992428797147737, "loss": 23.0, "step": 236 }, { "epoch": 0.14225690276110445, "grad_norm": 0.0007775566191412508, "learning_rate": 0.00019992355121299318, "loss": 23.0, "step": 237 }, { "epoch": 0.14285714285714285, "grad_norm": 0.0007267727633006871, "learning_rate": 0.00019992281088849445, "loss": 23.0, "step": 238 }, { "epoch": 0.14345738295318128, "grad_norm": 0.0004261432040948421, "learning_rate": 0.00019992206699800762, "loss": 23.0, "step": 239 }, { "epoch": 0.14405762304921968, "grad_norm": 0.0008677646401338279, "learning_rate": 0.0001999213195415593, "loss": 23.0, "step": 240 }, { "epoch": 0.1446578631452581, "grad_norm": 0.0005699426401406527, "learning_rate": 0.00019992056851917607, "loss": 23.0, "step": 241 }, { "epoch": 0.1452581032412965, "grad_norm": 0.0007082479423843324, "learning_rate": 0.00019991981393088475, "loss": 23.0, "step": 242 }, { "epoch": 0.14585834333733494, "grad_norm": 0.000806904979981482, "learning_rate": 0.00019991905577671233, "loss": 23.0, "step": 243 }, { "epoch": 0.14645858343337334, "grad_norm": 0.0006296361098065972, "learning_rate": 0.00019991829405668584, "loss": 23.0, "step": 244 }, { "epoch": 0.14705882352941177, "grad_norm": 0.0009300713427364826, "learning_rate": 0.00019991752877083246, "loss": 23.0, "step": 245 }, { "epoch": 0.14765906362545017, "grad_norm": 0.0010097339982166886, "learning_rate": 0.00019991675991917946, "loss": 23.0, "step": 246 }, { "epoch": 0.1482593037214886, "grad_norm": 0.0007776516722515225, "learning_rate": 0.00019991598750175436, "loss": 23.0, "step": 247 }, { "epoch": 0.148859543817527, "grad_norm": 0.00047628002357669175, "learning_rate": 0.00019991521151858463, "loss": 23.0, "step": 248 }, { "epoch": 0.14945978391356543, "grad_norm": 0.00046940191532485187, "learning_rate": 0.00019991443196969806, "loss": 23.0, "step": 249 }, { "epoch": 0.15006002400960383, "grad_norm": 0.0007664930308237672, "learning_rate": 0.0001999136488551224, "loss": 23.0, "step": 250 }, { "epoch": 0.15066026410564226, "grad_norm": 0.000685586070176214, "learning_rate": 0.00019991286217488564, "loss": 23.0, "step": 251 }, { "epoch": 0.15126050420168066, "grad_norm": 0.0010549027938395739, "learning_rate": 0.0001999120719290158, "loss": 23.0, "step": 252 }, { "epoch": 0.1518607442977191, "grad_norm": 0.0005077957175672054, "learning_rate": 0.00019991127811754114, "loss": 23.0, "step": 253 }, { "epoch": 0.1524609843937575, "grad_norm": 0.0005034700152464211, "learning_rate": 0.00019991048074048998, "loss": 23.0, "step": 254 }, { "epoch": 0.15306122448979592, "grad_norm": 0.0007512738811783493, "learning_rate": 0.00019990967979789074, "loss": 23.0, "step": 255 }, { "epoch": 0.15366146458583432, "grad_norm": 0.00046786750317551196, "learning_rate": 0.00019990887528977203, "loss": 23.0, "step": 256 }, { "epoch": 0.15426170468187275, "grad_norm": 0.001064613345079124, "learning_rate": 0.00019990806721616257, "loss": 23.0, "step": 257 }, { "epoch": 0.15486194477791115, "grad_norm": 0.00039759057108312845, "learning_rate": 0.00019990725557709118, "loss": 23.0, "step": 258 }, { "epoch": 0.15546218487394958, "grad_norm": 0.0007065651589073241, "learning_rate": 0.0001999064403725868, "loss": 23.0, "step": 259 }, { "epoch": 0.15606242496998798, "grad_norm": 0.00046774890506640077, "learning_rate": 0.0001999056216026786, "loss": 23.0, "step": 260 }, { "epoch": 0.1566626650660264, "grad_norm": 0.0005046619917266071, "learning_rate": 0.00019990479926739575, "loss": 23.0, "step": 261 }, { "epoch": 0.15726290516206481, "grad_norm": 0.0005047253216616809, "learning_rate": 0.00019990397336676759, "loss": 23.0, "step": 262 }, { "epoch": 0.15786314525810324, "grad_norm": 0.0007587158470414579, "learning_rate": 0.00019990314390082358, "loss": 23.0, "step": 263 }, { "epoch": 0.15846338535414164, "grad_norm": 0.0008176558185368776, "learning_rate": 0.00019990231086959337, "loss": 23.0, "step": 264 }, { "epoch": 0.15906362545018007, "grad_norm": 0.0005377682973630726, "learning_rate": 0.00019990147427310665, "loss": 23.0, "step": 265 }, { "epoch": 0.15966386554621848, "grad_norm": 0.0006355341174639761, "learning_rate": 0.0001999006341113933, "loss": 23.0, "step": 266 }, { "epoch": 0.1602641056422569, "grad_norm": 0.0007314942195080221, "learning_rate": 0.0001998997903844833, "loss": 23.0, "step": 267 }, { "epoch": 0.1608643457382953, "grad_norm": 0.0007350289961323142, "learning_rate": 0.00019989894309240674, "loss": 23.0, "step": 268 }, { "epoch": 0.16146458583433373, "grad_norm": 0.0008466751896776259, "learning_rate": 0.00019989809223519388, "loss": 23.0, "step": 269 }, { "epoch": 0.16206482593037214, "grad_norm": 0.0009016711846925318, "learning_rate": 0.0001998972378128751, "loss": 23.0, "step": 270 }, { "epoch": 0.16266506602641057, "grad_norm": 0.0005349046550691128, "learning_rate": 0.00019989637982548083, "loss": 23.0, "step": 271 }, { "epoch": 0.16326530612244897, "grad_norm": 0.0005445227725431323, "learning_rate": 0.0001998955182730417, "loss": 23.0, "step": 272 }, { "epoch": 0.1638655462184874, "grad_norm": 0.00042219756869599223, "learning_rate": 0.00019989465315558856, "loss": 23.0, "step": 273 }, { "epoch": 0.1644657863145258, "grad_norm": 0.0010625041322782636, "learning_rate": 0.00019989378447315216, "loss": 23.0, "step": 274 }, { "epoch": 0.16506602641056423, "grad_norm": 0.000675600953400135, "learning_rate": 0.00019989291222576356, "loss": 23.0, "step": 275 }, { "epoch": 0.16566626650660263, "grad_norm": 0.0005503667634911835, "learning_rate": 0.00019989203641345392, "loss": 23.0, "step": 276 }, { "epoch": 0.16626650660264106, "grad_norm": 0.0006727102445438504, "learning_rate": 0.00019989115703625442, "loss": 23.0, "step": 277 }, { "epoch": 0.16686674669867949, "grad_norm": 0.0004575340135488659, "learning_rate": 0.00019989027409419646, "loss": 23.0, "step": 278 }, { "epoch": 0.1674669867947179, "grad_norm": 0.0006102299084886909, "learning_rate": 0.00019988938758731158, "loss": 23.0, "step": 279 }, { "epoch": 0.16806722689075632, "grad_norm": 0.0005805498803965747, "learning_rate": 0.0001998884975156314, "loss": 23.0, "step": 280 }, { "epoch": 0.16866746698679472, "grad_norm": 0.0008877067011781037, "learning_rate": 0.00019988760387918767, "loss": 23.0, "step": 281 }, { "epoch": 0.16926770708283315, "grad_norm": 0.0008169253706000745, "learning_rate": 0.00019988670667801234, "loss": 23.0, "step": 282 }, { "epoch": 0.16986794717887155, "grad_norm": 0.0007667009485885501, "learning_rate": 0.00019988580591213736, "loss": 23.0, "step": 283 }, { "epoch": 0.17046818727490998, "grad_norm": 0.0006645764224231243, "learning_rate": 0.0001998849015815949, "loss": 23.0, "step": 284 }, { "epoch": 0.17106842737094838, "grad_norm": 0.0006818636902607977, "learning_rate": 0.00019988399368641723, "loss": 23.0, "step": 285 }, { "epoch": 0.1716686674669868, "grad_norm": 0.0002914422075264156, "learning_rate": 0.00019988308222663678, "loss": 23.0, "step": 286 }, { "epoch": 0.1722689075630252, "grad_norm": 0.0006489845691248775, "learning_rate": 0.00019988216720228606, "loss": 23.0, "step": 287 }, { "epoch": 0.17286914765906364, "grad_norm": 0.0005175631958991289, "learning_rate": 0.00019988124861339772, "loss": 23.0, "step": 288 }, { "epoch": 0.17346938775510204, "grad_norm": 0.000415219459682703, "learning_rate": 0.00019988032646000453, "loss": 23.0, "step": 289 }, { "epoch": 0.17406962785114047, "grad_norm": 0.0004089119902346283, "learning_rate": 0.0001998794007421394, "loss": 23.0, "step": 290 }, { "epoch": 0.17466986794717887, "grad_norm": 0.0007048040861263871, "learning_rate": 0.0001998784714598354, "loss": 23.0, "step": 291 }, { "epoch": 0.1752701080432173, "grad_norm": 0.00052332115592435, "learning_rate": 0.00019987753861312567, "loss": 23.0, "step": 292 }, { "epoch": 0.1758703481392557, "grad_norm": 0.000617658079136163, "learning_rate": 0.0001998766022020435, "loss": 23.0, "step": 293 }, { "epoch": 0.17647058823529413, "grad_norm": 0.0005852694157510996, "learning_rate": 0.0001998756622266223, "loss": 23.0, "step": 294 }, { "epoch": 0.17707082833133253, "grad_norm": 0.000565545866265893, "learning_rate": 0.00019987471868689562, "loss": 23.0, "step": 295 }, { "epoch": 0.17767106842737096, "grad_norm": 0.0007343909819610417, "learning_rate": 0.00019987377158289715, "loss": 23.0, "step": 296 }, { "epoch": 0.17827130852340936, "grad_norm": 0.0007629229803569615, "learning_rate": 0.0001998728209146607, "loss": 23.0, "step": 297 }, { "epoch": 0.1788715486194478, "grad_norm": 0.0007439067121595144, "learning_rate": 0.00019987186668222014, "loss": 23.0, "step": 298 }, { "epoch": 0.1794717887154862, "grad_norm": 0.0007367929792962968, "learning_rate": 0.00019987090888560956, "loss": 23.0, "step": 299 }, { "epoch": 0.18007202881152462, "grad_norm": 0.0017375691095367074, "learning_rate": 0.00019986994752486318, "loss": 23.0, "step": 300 }, { "epoch": 0.18067226890756302, "grad_norm": 0.0009452842641621828, "learning_rate": 0.00019986898260001526, "loss": 23.0, "step": 301 }, { "epoch": 0.18127250900360145, "grad_norm": 0.0005082595744170249, "learning_rate": 0.00019986801411110022, "loss": 23.0, "step": 302 }, { "epoch": 0.18187274909963985, "grad_norm": 0.0008125613094307482, "learning_rate": 0.00019986704205815264, "loss": 23.0, "step": 303 }, { "epoch": 0.18247298919567828, "grad_norm": 0.000651197973638773, "learning_rate": 0.00019986606644120723, "loss": 23.0, "step": 304 }, { "epoch": 0.18307322929171668, "grad_norm": 0.0007686960743740201, "learning_rate": 0.0001998650872602988, "loss": 23.0, "step": 305 }, { "epoch": 0.1836734693877551, "grad_norm": 0.00037045447970740497, "learning_rate": 0.0001998641045154623, "loss": 23.0, "step": 306 }, { "epoch": 0.1842737094837935, "grad_norm": 0.0011275415308773518, "learning_rate": 0.00019986311820673278, "loss": 23.0, "step": 307 }, { "epoch": 0.18487394957983194, "grad_norm": 0.000977221643552184, "learning_rate": 0.00019986212833414543, "loss": 23.0, "step": 308 }, { "epoch": 0.18547418967587034, "grad_norm": 0.0009870711946859956, "learning_rate": 0.00019986113489773563, "loss": 23.0, "step": 309 }, { "epoch": 0.18607442977190877, "grad_norm": 0.0010398519225418568, "learning_rate": 0.0001998601378975388, "loss": 23.0, "step": 310 }, { "epoch": 0.18667466986794717, "grad_norm": 0.0007000160403549671, "learning_rate": 0.00019985913733359048, "loss": 23.0, "step": 311 }, { "epoch": 0.1872749099639856, "grad_norm": 0.0006655632168985903, "learning_rate": 0.00019985813320592643, "loss": 23.0, "step": 312 }, { "epoch": 0.187875150060024, "grad_norm": 0.0005049473256804049, "learning_rate": 0.0001998571255145825, "loss": 23.0, "step": 313 }, { "epoch": 0.18847539015606243, "grad_norm": 0.0008667523507028818, "learning_rate": 0.00019985611425959462, "loss": 23.0, "step": 314 }, { "epoch": 0.18907563025210083, "grad_norm": 0.0008610632503405213, "learning_rate": 0.00019985509944099884, "loss": 23.0, "step": 315 }, { "epoch": 0.18967587034813926, "grad_norm": 0.0006444462342187762, "learning_rate": 0.00019985408105883146, "loss": 23.0, "step": 316 }, { "epoch": 0.19027611044417767, "grad_norm": 0.0011097770184278488, "learning_rate": 0.00019985305911312878, "loss": 23.0, "step": 317 }, { "epoch": 0.1908763505402161, "grad_norm": 0.000943410734180361, "learning_rate": 0.00019985203360392727, "loss": 23.0, "step": 318 }, { "epoch": 0.1914765906362545, "grad_norm": 0.000921803992241621, "learning_rate": 0.00019985100453126352, "loss": 23.0, "step": 319 }, { "epoch": 0.19207683073229292, "grad_norm": 0.000856006343383342, "learning_rate": 0.00019984997189517425, "loss": 23.0, "step": 320 }, { "epoch": 0.19267707082833133, "grad_norm": 0.0008737395983189344, "learning_rate": 0.00019984893569569633, "loss": 23.0, "step": 321 }, { "epoch": 0.19327731092436976, "grad_norm": 0.0007819237653166056, "learning_rate": 0.00019984789593286678, "loss": 23.0, "step": 322 }, { "epoch": 0.19387755102040816, "grad_norm": 0.0008402220555581152, "learning_rate": 0.00019984685260672263, "loss": 23.0, "step": 323 }, { "epoch": 0.19447779111644659, "grad_norm": 0.0003997522871941328, "learning_rate": 0.00019984580571730115, "loss": 23.0, "step": 324 }, { "epoch": 0.195078031212485, "grad_norm": 0.0004815506108570844, "learning_rate": 0.0001998447552646397, "loss": 23.0, "step": 325 }, { "epoch": 0.19567827130852342, "grad_norm": 0.00059786211932078, "learning_rate": 0.00019984370124877577, "loss": 23.0, "step": 326 }, { "epoch": 0.19627851140456182, "grad_norm": 0.0006378625403158367, "learning_rate": 0.00019984264366974697, "loss": 23.0, "step": 327 }, { "epoch": 0.19687875150060025, "grad_norm": 0.00038927397690713406, "learning_rate": 0.00019984158252759105, "loss": 23.0, "step": 328 }, { "epoch": 0.19747899159663865, "grad_norm": 0.0007686428143642843, "learning_rate": 0.00019984051782234589, "loss": 23.0, "step": 329 }, { "epoch": 0.19807923169267708, "grad_norm": 0.0006654906319454312, "learning_rate": 0.00019983944955404942, "loss": 23.0, "step": 330 }, { "epoch": 0.19867947178871548, "grad_norm": 0.00034374112146906555, "learning_rate": 0.00019983837772273984, "loss": 23.0, "step": 331 }, { "epoch": 0.1992797118847539, "grad_norm": 0.00040528926183469594, "learning_rate": 0.00019983730232845537, "loss": 23.0, "step": 332 }, { "epoch": 0.1998799519807923, "grad_norm": 0.0007493866141885519, "learning_rate": 0.0001998362233712344, "loss": 23.0, "step": 333 }, { "epoch": 0.20048019207683074, "grad_norm": 0.0008692885749042034, "learning_rate": 0.0001998351408511154, "loss": 23.0, "step": 334 }, { "epoch": 0.20108043217286914, "grad_norm": 0.0005017250659875572, "learning_rate": 0.00019983405476813707, "loss": 23.0, "step": 335 }, { "epoch": 0.20168067226890757, "grad_norm": 0.0005804816028103232, "learning_rate": 0.0001998329651223381, "loss": 23.0, "step": 336 }, { "epoch": 0.20228091236494597, "grad_norm": 0.0006715598865412176, "learning_rate": 0.00019983187191375743, "loss": 23.0, "step": 337 }, { "epoch": 0.2028811524609844, "grad_norm": 0.00039470859337598085, "learning_rate": 0.00019983077514243403, "loss": 23.0, "step": 338 }, { "epoch": 0.2034813925570228, "grad_norm": 0.0004575586353894323, "learning_rate": 0.00019982967480840707, "loss": 23.0, "step": 339 }, { "epoch": 0.20408163265306123, "grad_norm": 0.0005345721147023141, "learning_rate": 0.0001998285709117158, "loss": 23.0, "step": 340 }, { "epoch": 0.20468187274909963, "grad_norm": 0.0005476151127368212, "learning_rate": 0.0001998274634523996, "loss": 23.0, "step": 341 }, { "epoch": 0.20528211284513806, "grad_norm": 0.0011936496011912823, "learning_rate": 0.00019982635243049807, "loss": 23.0, "step": 342 }, { "epoch": 0.20588235294117646, "grad_norm": 0.0005647265934385359, "learning_rate": 0.0001998252378460508, "loss": 23.0, "step": 343 }, { "epoch": 0.2064825930372149, "grad_norm": 0.000607253925409168, "learning_rate": 0.00019982411969909753, "loss": 23.0, "step": 344 }, { "epoch": 0.2070828331332533, "grad_norm": 0.00046976387966424227, "learning_rate": 0.00019982299798967823, "loss": 23.0, "step": 345 }, { "epoch": 0.20768307322929172, "grad_norm": 0.00045033791684545577, "learning_rate": 0.00019982187271783293, "loss": 23.0, "step": 346 }, { "epoch": 0.20828331332533012, "grad_norm": 0.000813774939160794, "learning_rate": 0.0001998207438836017, "loss": 23.0, "step": 347 }, { "epoch": 0.20888355342136855, "grad_norm": 0.000533080892637372, "learning_rate": 0.00019981961148702495, "loss": 23.0, "step": 348 }, { "epoch": 0.20948379351740695, "grad_norm": 0.000772426079493016, "learning_rate": 0.000199818475528143, "loss": 23.0, "step": 349 }, { "epoch": 0.21008403361344538, "grad_norm": 0.0005271838745102286, "learning_rate": 0.00019981733600699645, "loss": 23.0, "step": 350 }, { "epoch": 0.21068427370948378, "grad_norm": 0.0005881227552890778, "learning_rate": 0.00019981619292362593, "loss": 23.0, "step": 351 }, { "epoch": 0.2112845138055222, "grad_norm": 0.0006613187142647803, "learning_rate": 0.00019981504627807227, "loss": 23.0, "step": 352 }, { "epoch": 0.2118847539015606, "grad_norm": 0.000673049536999315, "learning_rate": 0.0001998138960703763, "loss": 23.0, "step": 353 }, { "epoch": 0.21248499399759904, "grad_norm": 0.0004623003478627652, "learning_rate": 0.00019981274230057916, "loss": 23.0, "step": 354 }, { "epoch": 0.21308523409363744, "grad_norm": 0.0009141325135715306, "learning_rate": 0.00019981158496872202, "loss": 23.0, "step": 355 }, { "epoch": 0.21368547418967587, "grad_norm": 0.0004129754670429975, "learning_rate": 0.00019981042407484615, "loss": 23.0, "step": 356 }, { "epoch": 0.21428571428571427, "grad_norm": 0.0007131838356144726, "learning_rate": 0.00019980925961899295, "loss": 23.0, "step": 357 }, { "epoch": 0.2148859543817527, "grad_norm": 0.00048736046301200986, "learning_rate": 0.00019980809160120405, "loss": 23.0, "step": 358 }, { "epoch": 0.2154861944777911, "grad_norm": 0.0008709396352060139, "learning_rate": 0.0001998069200215211, "loss": 23.0, "step": 359 }, { "epoch": 0.21608643457382953, "grad_norm": 0.0008411543094553053, "learning_rate": 0.0001998057448799859, "loss": 23.0, "step": 360 }, { "epoch": 0.21668667466986793, "grad_norm": 0.00045339285861700773, "learning_rate": 0.0001998045661766404, "loss": 23.0, "step": 361 }, { "epoch": 0.21728691476590636, "grad_norm": 0.0008114994852803648, "learning_rate": 0.00019980338391152662, "loss": 23.0, "step": 362 }, { "epoch": 0.21788715486194477, "grad_norm": 0.0009748362936079502, "learning_rate": 0.00019980219808468682, "loss": 23.0, "step": 363 }, { "epoch": 0.2184873949579832, "grad_norm": 0.0005413390463218093, "learning_rate": 0.0001998010086961633, "loss": 23.0, "step": 364 }, { "epoch": 0.2190876350540216, "grad_norm": 0.0009059528820216656, "learning_rate": 0.00019979981574599852, "loss": 23.0, "step": 365 }, { "epoch": 0.21968787515006002, "grad_norm": 0.0004444699443411082, "learning_rate": 0.000199798619234235, "loss": 23.0, "step": 366 }, { "epoch": 0.22028811524609843, "grad_norm": 0.0008990266942419112, "learning_rate": 0.0001997974191609155, "loss": 23.0, "step": 367 }, { "epoch": 0.22088835534213686, "grad_norm": 0.0004023246292490512, "learning_rate": 0.00019979621552608276, "loss": 23.0, "step": 368 }, { "epoch": 0.22148859543817526, "grad_norm": 0.0007271401700563729, "learning_rate": 0.00019979500832977985, "loss": 23.0, "step": 369 }, { "epoch": 0.22208883553421369, "grad_norm": 0.0003374427615199238, "learning_rate": 0.00019979379757204978, "loss": 23.0, "step": 370 }, { "epoch": 0.22268907563025211, "grad_norm": 0.000754833163227886, "learning_rate": 0.0001997925832529358, "loss": 23.0, "step": 371 }, { "epoch": 0.22328931572629052, "grad_norm": 0.000961002369876951, "learning_rate": 0.00019979136537248118, "loss": 23.0, "step": 372 }, { "epoch": 0.22388955582232895, "grad_norm": 0.0005781974759884179, "learning_rate": 0.00019979014393072943, "loss": 23.0, "step": 373 }, { "epoch": 0.22448979591836735, "grad_norm": 0.00044546660501509905, "learning_rate": 0.00019978891892772415, "loss": 23.0, "step": 374 }, { "epoch": 0.22509003601440578, "grad_norm": 0.0005686086369678378, "learning_rate": 0.00019978769036350905, "loss": 23.0, "step": 375 }, { "epoch": 0.22569027611044418, "grad_norm": 0.000627652567345649, "learning_rate": 0.00019978645823812797, "loss": 23.0, "step": 376 }, { "epoch": 0.2262905162064826, "grad_norm": 0.0006272499449551105, "learning_rate": 0.00019978522255162485, "loss": 23.0, "step": 377 }, { "epoch": 0.226890756302521, "grad_norm": 0.0007022761856205761, "learning_rate": 0.0001997839833040438, "loss": 23.0, "step": 378 }, { "epoch": 0.22749099639855944, "grad_norm": 0.0006133838323876262, "learning_rate": 0.0001997827404954291, "loss": 23.0, "step": 379 }, { "epoch": 0.22809123649459784, "grad_norm": 0.0004374498676043004, "learning_rate": 0.00019978149412582506, "loss": 23.0, "step": 380 }, { "epoch": 0.22869147659063627, "grad_norm": 0.00046753635979257524, "learning_rate": 0.00019978024419527613, "loss": 23.0, "step": 381 }, { "epoch": 0.22929171668667467, "grad_norm": 0.0006269077421166003, "learning_rate": 0.00019977899070382698, "loss": 23.0, "step": 382 }, { "epoch": 0.2298919567827131, "grad_norm": 0.0008202579920180142, "learning_rate": 0.00019977773365152231, "loss": 23.0, "step": 383 }, { "epoch": 0.2304921968787515, "grad_norm": 0.0009668364655226469, "learning_rate": 0.00019977647303840698, "loss": 23.0, "step": 384 }, { "epoch": 0.23109243697478993, "grad_norm": 0.0004292426456231624, "learning_rate": 0.00019977520886452598, "loss": 23.0, "step": 385 }, { "epoch": 0.23169267707082833, "grad_norm": 0.00030157389119267464, "learning_rate": 0.0001997739411299244, "loss": 23.0, "step": 386 }, { "epoch": 0.23229291716686676, "grad_norm": 0.00035860264324583113, "learning_rate": 0.00019977266983464757, "loss": 23.0, "step": 387 }, { "epoch": 0.23289315726290516, "grad_norm": 0.00041682066512294114, "learning_rate": 0.00019977139497874078, "loss": 23.0, "step": 388 }, { "epoch": 0.2334933973589436, "grad_norm": 0.0006654221215285361, "learning_rate": 0.00019977011656224954, "loss": 23.0, "step": 389 }, { "epoch": 0.234093637454982, "grad_norm": 0.0003358942340128124, "learning_rate": 0.00019976883458521947, "loss": 23.0, "step": 390 }, { "epoch": 0.23469387755102042, "grad_norm": 0.0004877279279753566, "learning_rate": 0.00019976754904769633, "loss": 23.0, "step": 391 }, { "epoch": 0.23529411764705882, "grad_norm": 0.0004985998966731131, "learning_rate": 0.000199766259949726, "loss": 23.0, "step": 392 }, { "epoch": 0.23589435774309725, "grad_norm": 0.0006408619810827076, "learning_rate": 0.0001997649672913545, "loss": 23.0, "step": 393 }, { "epoch": 0.23649459783913565, "grad_norm": 0.00035928620491176844, "learning_rate": 0.00019976367107262793, "loss": 23.0, "step": 394 }, { "epoch": 0.23709483793517408, "grad_norm": 0.0007407786906696856, "learning_rate": 0.00019976237129359255, "loss": 23.0, "step": 395 }, { "epoch": 0.23769507803121248, "grad_norm": 0.0005418708897195756, "learning_rate": 0.00019976106795429477, "loss": 23.0, "step": 396 }, { "epoch": 0.2382953181272509, "grad_norm": 0.0006996035808697343, "learning_rate": 0.00019975976105478108, "loss": 23.0, "step": 397 }, { "epoch": 0.2388955582232893, "grad_norm": 0.0005328754195943475, "learning_rate": 0.00019975845059509815, "loss": 23.0, "step": 398 }, { "epoch": 0.23949579831932774, "grad_norm": 0.0009513835539110005, "learning_rate": 0.00019975713657529275, "loss": 23.0, "step": 399 }, { "epoch": 0.24009603841536614, "grad_norm": 0.0006411597714759409, "learning_rate": 0.00019975581899541172, "loss": 23.0, "step": 400 }, { "epoch": 0.24069627851140457, "grad_norm": 0.001117164152674377, "learning_rate": 0.00019975449785550212, "loss": 23.0, "step": 401 }, { "epoch": 0.24129651860744297, "grad_norm": 0.0010399962775409222, "learning_rate": 0.00019975317315561108, "loss": 23.0, "step": 402 }, { "epoch": 0.2418967587034814, "grad_norm": 0.0008498011156916618, "learning_rate": 0.00019975184489578587, "loss": 23.0, "step": 403 }, { "epoch": 0.2424969987995198, "grad_norm": 0.0005976161919534206, "learning_rate": 0.0001997505130760739, "loss": 23.0, "step": 404 }, { "epoch": 0.24309723889555823, "grad_norm": 0.00085114233661443, "learning_rate": 0.00019974917769652274, "loss": 23.0, "step": 405 }, { "epoch": 0.24369747899159663, "grad_norm": 0.0006555085419677198, "learning_rate": 0.00019974783875718, "loss": 23.0, "step": 406 }, { "epoch": 0.24429771908763506, "grad_norm": 0.00048095532110892236, "learning_rate": 0.0001997464962580935, "loss": 23.0, "step": 407 }, { "epoch": 0.24489795918367346, "grad_norm": 0.0006680062506347895, "learning_rate": 0.00019974515019931114, "loss": 23.0, "step": 408 }, { "epoch": 0.2454981992797119, "grad_norm": 0.0006157276802696288, "learning_rate": 0.0001997438005808809, "loss": 23.0, "step": 409 }, { "epoch": 0.2460984393757503, "grad_norm": 0.0004931309376843274, "learning_rate": 0.000199742447402851, "loss": 23.0, "step": 410 }, { "epoch": 0.24669867947178872, "grad_norm": 0.0005629705265164375, "learning_rate": 0.00019974109066526975, "loss": 23.0, "step": 411 }, { "epoch": 0.24729891956782712, "grad_norm": 0.0009498510626144707, "learning_rate": 0.00019973973036818552, "loss": 23.0, "step": 412 }, { "epoch": 0.24789915966386555, "grad_norm": 0.000444902601884678, "learning_rate": 0.00019973836651164683, "loss": 23.0, "step": 413 }, { "epoch": 0.24849939975990396, "grad_norm": 0.0004672042850870639, "learning_rate": 0.00019973699909570244, "loss": 23.0, "step": 414 }, { "epoch": 0.24909963985594238, "grad_norm": 0.0008003438706509769, "learning_rate": 0.0001997356281204011, "loss": 23.0, "step": 415 }, { "epoch": 0.24969987995198079, "grad_norm": 0.0003626558172982186, "learning_rate": 0.00019973425358579176, "loss": 23.0, "step": 416 }, { "epoch": 0.2503001200480192, "grad_norm": 0.0004940864164382219, "learning_rate": 0.00019973287549192344, "loss": 23.0, "step": 417 }, { "epoch": 0.25090036014405764, "grad_norm": 0.0007219372782856226, "learning_rate": 0.00019973149383884536, "loss": 23.0, "step": 418 }, { "epoch": 0.251500600240096, "grad_norm": 0.0007677687099203467, "learning_rate": 0.00019973010862660677, "loss": 23.0, "step": 419 }, { "epoch": 0.25210084033613445, "grad_norm": 0.0005680742906406522, "learning_rate": 0.00019972871985525717, "loss": 23.0, "step": 420 }, { "epoch": 0.2527010804321729, "grad_norm": 0.0012042596936225891, "learning_rate": 0.00019972732752484605, "loss": 23.0, "step": 421 }, { "epoch": 0.2533013205282113, "grad_norm": 0.0010296528926119208, "learning_rate": 0.0001997259316354232, "loss": 23.0, "step": 422 }, { "epoch": 0.2539015606242497, "grad_norm": 0.0006402980070561171, "learning_rate": 0.00019972453218703834, "loss": 23.0, "step": 423 }, { "epoch": 0.2545018007202881, "grad_norm": 0.0006200941861607134, "learning_rate": 0.00019972312917974143, "loss": 23.0, "step": 424 }, { "epoch": 0.25510204081632654, "grad_norm": 0.0005011442117393017, "learning_rate": 0.00019972172261358259, "loss": 23.0, "step": 425 }, { "epoch": 0.25570228091236497, "grad_norm": 0.0007742932066321373, "learning_rate": 0.00019972031248861196, "loss": 23.0, "step": 426 }, { "epoch": 0.25630252100840334, "grad_norm": 0.0013804651098325849, "learning_rate": 0.0001997188988048799, "loss": 23.0, "step": 427 }, { "epoch": 0.25690276110444177, "grad_norm": 0.000599585531745106, "learning_rate": 0.00019971748156243687, "loss": 23.0, "step": 428 }, { "epoch": 0.2575030012004802, "grad_norm": 0.00047438021283596754, "learning_rate": 0.0001997160607613334, "loss": 23.0, "step": 429 }, { "epoch": 0.2581032412965186, "grad_norm": 0.0008628160576336086, "learning_rate": 0.00019971463640162024, "loss": 23.0, "step": 430 }, { "epoch": 0.258703481392557, "grad_norm": 0.0007876844028942287, "learning_rate": 0.00019971320848334817, "loss": 23.0, "step": 431 }, { "epoch": 0.25930372148859543, "grad_norm": 0.000673728296533227, "learning_rate": 0.00019971177700656823, "loss": 23.0, "step": 432 }, { "epoch": 0.25990396158463386, "grad_norm": 0.0009601108031347394, "learning_rate": 0.0001997103419713314, "loss": 23.0, "step": 433 }, { "epoch": 0.2605042016806723, "grad_norm": 0.0006990964757278562, "learning_rate": 0.000199708903377689, "loss": 23.0, "step": 434 }, { "epoch": 0.26110444177671066, "grad_norm": 0.0005273604765534401, "learning_rate": 0.0001997074612256923, "loss": 23.0, "step": 435 }, { "epoch": 0.2617046818727491, "grad_norm": 0.0015036009717732668, "learning_rate": 0.00019970601551539278, "loss": 23.0, "step": 436 }, { "epoch": 0.2623049219687875, "grad_norm": 0.000941345700994134, "learning_rate": 0.00019970456624684207, "loss": 23.0, "step": 437 }, { "epoch": 0.26290516206482595, "grad_norm": 0.0005987265612930059, "learning_rate": 0.00019970311342009184, "loss": 23.0, "step": 438 }, { "epoch": 0.2635054021608643, "grad_norm": 0.00040275233914144337, "learning_rate": 0.00019970165703519396, "loss": 23.0, "step": 439 }, { "epoch": 0.26410564225690275, "grad_norm": 0.0006889562937431037, "learning_rate": 0.0001997001970922004, "loss": 23.0, "step": 440 }, { "epoch": 0.2647058823529412, "grad_norm": 0.0005851078312844038, "learning_rate": 0.00019969873359116326, "loss": 23.0, "step": 441 }, { "epoch": 0.2653061224489796, "grad_norm": 0.0005285036750137806, "learning_rate": 0.00019969726653213478, "loss": 23.0, "step": 442 }, { "epoch": 0.265906362545018, "grad_norm": 0.000706472375895828, "learning_rate": 0.00019969579591516733, "loss": 23.0, "step": 443 }, { "epoch": 0.2665066026410564, "grad_norm": 0.0005748828989453614, "learning_rate": 0.00019969432174031336, "loss": 23.0, "step": 444 }, { "epoch": 0.26710684273709484, "grad_norm": 0.0009791315533220768, "learning_rate": 0.0001996928440076255, "loss": 23.0, "step": 445 }, { "epoch": 0.26770708283313327, "grad_norm": 0.00048265812802128494, "learning_rate": 0.00019969136271715644, "loss": 23.0, "step": 446 }, { "epoch": 0.26830732292917164, "grad_norm": 0.0005793205345980823, "learning_rate": 0.00019968987786895913, "loss": 23.0, "step": 447 }, { "epoch": 0.2689075630252101, "grad_norm": 0.0006732153124175966, "learning_rate": 0.00019968838946308649, "loss": 23.0, "step": 448 }, { "epoch": 0.2695078031212485, "grad_norm": 0.0007057045004330575, "learning_rate": 0.00019968689749959165, "loss": 23.0, "step": 449 }, { "epoch": 0.27010804321728693, "grad_norm": 0.0005959949921816587, "learning_rate": 0.00019968540197852787, "loss": 23.0, "step": 450 }, { "epoch": 0.2707082833133253, "grad_norm": 0.0011223838664591312, "learning_rate": 0.00019968390289994852, "loss": 23.0, "step": 451 }, { "epoch": 0.27130852340936373, "grad_norm": 0.00040181170334108174, "learning_rate": 0.0001996824002639071, "loss": 23.0, "step": 452 }, { "epoch": 0.27190876350540216, "grad_norm": 0.0006953475531190634, "learning_rate": 0.0001996808940704572, "loss": 23.0, "step": 453 }, { "epoch": 0.2725090036014406, "grad_norm": 0.0008081067353487015, "learning_rate": 0.00019967938431965262, "loss": 23.0, "step": 454 }, { "epoch": 0.27310924369747897, "grad_norm": 0.0007403423660434783, "learning_rate": 0.00019967787101154722, "loss": 23.0, "step": 455 }, { "epoch": 0.2737094837935174, "grad_norm": 0.0007606777944602072, "learning_rate": 0.000199676354146195, "loss": 23.0, "step": 456 }, { "epoch": 0.2743097238895558, "grad_norm": 0.000691116729285568, "learning_rate": 0.00019967483372365012, "loss": 23.0, "step": 457 }, { "epoch": 0.27490996398559425, "grad_norm": 0.0007377572474069893, "learning_rate": 0.0001996733097439668, "loss": 23.0, "step": 458 }, { "epoch": 0.2755102040816326, "grad_norm": 0.000592804339248687, "learning_rate": 0.00019967178220719944, "loss": 23.0, "step": 459 }, { "epoch": 0.27611044417767105, "grad_norm": 0.0005102523718960583, "learning_rate": 0.00019967025111340255, "loss": 23.0, "step": 460 }, { "epoch": 0.2767106842737095, "grad_norm": 0.0010906271636486053, "learning_rate": 0.0001996687164626308, "loss": 23.0, "step": 461 }, { "epoch": 0.2773109243697479, "grad_norm": 0.0008513031643815339, "learning_rate": 0.00019966717825493894, "loss": 23.0, "step": 462 }, { "epoch": 0.27791116446578634, "grad_norm": 0.0007379608578048646, "learning_rate": 0.00019966563649038185, "loss": 23.0, "step": 463 }, { "epoch": 0.2785114045618247, "grad_norm": 0.0006293412297964096, "learning_rate": 0.0001996640911690146, "loss": 23.0, "step": 464 }, { "epoch": 0.27911164465786314, "grad_norm": 0.0005843341932632029, "learning_rate": 0.00019966254229089226, "loss": 23.0, "step": 465 }, { "epoch": 0.2797118847539016, "grad_norm": 0.00046797405229881406, "learning_rate": 0.00019966098985607015, "loss": 23.0, "step": 466 }, { "epoch": 0.28031212484994, "grad_norm": 0.0007364925113506615, "learning_rate": 0.0001996594338646037, "loss": 23.0, "step": 467 }, { "epoch": 0.2809123649459784, "grad_norm": 0.0011836322955787182, "learning_rate": 0.0001996578743165484, "loss": 23.0, "step": 468 }, { "epoch": 0.2815126050420168, "grad_norm": 0.0005399160436354578, "learning_rate": 0.00019965631121195992, "loss": 23.0, "step": 469 }, { "epoch": 0.28211284513805523, "grad_norm": 0.0007591730100102723, "learning_rate": 0.000199654744550894, "loss": 23.0, "step": 470 }, { "epoch": 0.28271308523409366, "grad_norm": 0.0005403325776569545, "learning_rate": 0.00019965317433340662, "loss": 23.0, "step": 471 }, { "epoch": 0.28331332533013204, "grad_norm": 0.0005854291375726461, "learning_rate": 0.00019965160055955382, "loss": 23.0, "step": 472 }, { "epoch": 0.28391356542617047, "grad_norm": 0.00045302778016775846, "learning_rate": 0.0001996500232293917, "loss": 23.0, "step": 473 }, { "epoch": 0.2845138055222089, "grad_norm": 0.0004636446828953922, "learning_rate": 0.00019964844234297656, "loss": 23.0, "step": 474 }, { "epoch": 0.2851140456182473, "grad_norm": 0.0004668672336265445, "learning_rate": 0.00019964685790036484, "loss": 23.0, "step": 475 }, { "epoch": 0.2857142857142857, "grad_norm": 0.0009069807711057365, "learning_rate": 0.0001996452699016131, "loss": 23.0, "step": 476 }, { "epoch": 0.2863145258103241, "grad_norm": 0.0005298946634866297, "learning_rate": 0.000199643678346778, "loss": 23.0, "step": 477 }, { "epoch": 0.28691476590636256, "grad_norm": 0.0009359425166621804, "learning_rate": 0.00019964208323591633, "loss": 23.0, "step": 478 }, { "epoch": 0.287515006002401, "grad_norm": 0.0008660585153847933, "learning_rate": 0.000199640484569085, "loss": 23.0, "step": 479 }, { "epoch": 0.28811524609843936, "grad_norm": 0.00040566539973951876, "learning_rate": 0.00019963888234634112, "loss": 23.0, "step": 480 }, { "epoch": 0.2887154861944778, "grad_norm": 0.0003369592595845461, "learning_rate": 0.0001996372765677418, "loss": 23.0, "step": 481 }, { "epoch": 0.2893157262905162, "grad_norm": 0.0015372679335996509, "learning_rate": 0.0001996356672333444, "loss": 23.0, "step": 482 }, { "epoch": 0.28991596638655465, "grad_norm": 0.000425403646659106, "learning_rate": 0.0001996340543432063, "loss": 23.0, "step": 483 }, { "epoch": 0.290516206482593, "grad_norm": 0.001078411820344627, "learning_rate": 0.00019963243789738512, "loss": 23.0, "step": 484 }, { "epoch": 0.29111644657863145, "grad_norm": 0.000449808721896261, "learning_rate": 0.00019963081789593851, "loss": 23.0, "step": 485 }, { "epoch": 0.2917166866746699, "grad_norm": 0.0013598357327282429, "learning_rate": 0.0001996291943389243, "loss": 23.0, "step": 486 }, { "epoch": 0.2923169267707083, "grad_norm": 0.0015207430114969611, "learning_rate": 0.0001996275672264004, "loss": 23.0, "step": 487 }, { "epoch": 0.2929171668667467, "grad_norm": 0.0006574721192009747, "learning_rate": 0.0001996259365584249, "loss": 23.0, "step": 488 }, { "epoch": 0.2935174069627851, "grad_norm": 0.000553544145077467, "learning_rate": 0.000199624302335056, "loss": 23.0, "step": 489 }, { "epoch": 0.29411764705882354, "grad_norm": 0.0004415796138346195, "learning_rate": 0.000199622664556352, "loss": 23.0, "step": 490 }, { "epoch": 0.29471788715486197, "grad_norm": 0.0005506374291144311, "learning_rate": 0.00019962102322237138, "loss": 23.0, "step": 491 }, { "epoch": 0.29531812725090034, "grad_norm": 0.0005820810911245644, "learning_rate": 0.0001996193783331727, "loss": 23.0, "step": 492 }, { "epoch": 0.29591836734693877, "grad_norm": 0.0007612020126543939, "learning_rate": 0.00019961772988881464, "loss": 23.0, "step": 493 }, { "epoch": 0.2965186074429772, "grad_norm": 0.0010789800435304642, "learning_rate": 0.00019961607788935605, "loss": 23.0, "step": 494 }, { "epoch": 0.29711884753901563, "grad_norm": 0.00042243878124281764, "learning_rate": 0.0001996144223348559, "loss": 23.0, "step": 495 }, { "epoch": 0.297719087635054, "grad_norm": 0.0007266091415658593, "learning_rate": 0.00019961276322537325, "loss": 23.0, "step": 496 }, { "epoch": 0.29831932773109243, "grad_norm": 0.0008625111076980829, "learning_rate": 0.0001996111005609673, "loss": 23.0, "step": 497 }, { "epoch": 0.29891956782713086, "grad_norm": 0.000400591641664505, "learning_rate": 0.00019960943434169743, "loss": 23.0, "step": 498 }, { "epoch": 0.2995198079231693, "grad_norm": 0.0005319428746588528, "learning_rate": 0.00019960776456762305, "loss": 23.0, "step": 499 }, { "epoch": 0.30012004801920766, "grad_norm": 0.0007574324845336378, "learning_rate": 0.00019960609123880377, "loss": 23.0, "step": 500 }, { "epoch": 0.3007202881152461, "grad_norm": 0.0008873497718013823, "learning_rate": 0.00019960441435529932, "loss": 23.0, "step": 501 }, { "epoch": 0.3013205282112845, "grad_norm": 0.0003605948877520859, "learning_rate": 0.00019960273391716955, "loss": 23.0, "step": 502 }, { "epoch": 0.30192076830732295, "grad_norm": 0.0008765537058934569, "learning_rate": 0.0001996010499244744, "loss": 23.0, "step": 503 }, { "epoch": 0.3025210084033613, "grad_norm": 0.0005614471738226712, "learning_rate": 0.00019959936237727398, "loss": 23.0, "step": 504 }, { "epoch": 0.30312124849939975, "grad_norm": 0.0006175639573484659, "learning_rate": 0.00019959767127562854, "loss": 23.0, "step": 505 }, { "epoch": 0.3037214885954382, "grad_norm": 0.0006772951455786824, "learning_rate": 0.00019959597661959837, "loss": 23.0, "step": 506 }, { "epoch": 0.3043217286914766, "grad_norm": 0.0007493697921745479, "learning_rate": 0.000199594278409244, "loss": 23.0, "step": 507 }, { "epoch": 0.304921968787515, "grad_norm": 0.0013073779409751296, "learning_rate": 0.00019959257664462605, "loss": 23.0, "step": 508 }, { "epoch": 0.3055222088835534, "grad_norm": 0.001169772120192647, "learning_rate": 0.00019959087132580519, "loss": 23.0, "step": 509 }, { "epoch": 0.30612244897959184, "grad_norm": 0.0010430820984765887, "learning_rate": 0.00019958916245284232, "loss": 23.0, "step": 510 }, { "epoch": 0.3067226890756303, "grad_norm": 0.0007382632466033101, "learning_rate": 0.0001995874500257984, "loss": 23.0, "step": 511 }, { "epoch": 0.30732292917166865, "grad_norm": 0.00046590864076279104, "learning_rate": 0.0001995857340447346, "loss": 23.0, "step": 512 }, { "epoch": 0.3079231692677071, "grad_norm": 0.0008095836383290589, "learning_rate": 0.00019958401450971208, "loss": 23.0, "step": 513 }, { "epoch": 0.3085234093637455, "grad_norm": 0.0004994541523046792, "learning_rate": 0.00019958229142079224, "loss": 23.0, "step": 514 }, { "epoch": 0.30912364945978393, "grad_norm": 0.00032915148767642677, "learning_rate": 0.0001995805647780366, "loss": 23.0, "step": 515 }, { "epoch": 0.3097238895558223, "grad_norm": 0.00047977123176679015, "learning_rate": 0.00019957883458150675, "loss": 23.0, "step": 516 }, { "epoch": 0.31032412965186074, "grad_norm": 0.0004798262962140143, "learning_rate": 0.00019957710083126444, "loss": 23.0, "step": 517 }, { "epoch": 0.31092436974789917, "grad_norm": 0.0012143745552748442, "learning_rate": 0.00019957536352737152, "loss": 23.0, "step": 518 }, { "epoch": 0.3115246098439376, "grad_norm": 0.0008633871912024915, "learning_rate": 0.00019957362266989005, "loss": 23.0, "step": 519 }, { "epoch": 0.31212484993997597, "grad_norm": 0.0005252179689705372, "learning_rate": 0.0001995718782588821, "loss": 23.0, "step": 520 }, { "epoch": 0.3127250900360144, "grad_norm": 0.0013168033910915256, "learning_rate": 0.00019957013029440993, "loss": 23.0, "step": 521 }, { "epoch": 0.3133253301320528, "grad_norm": 0.00026732790865935385, "learning_rate": 0.00019956837877653597, "loss": 23.0, "step": 522 }, { "epoch": 0.31392557022809126, "grad_norm": 0.0005732500576414168, "learning_rate": 0.0001995666237053227, "loss": 23.0, "step": 523 }, { "epoch": 0.31452581032412963, "grad_norm": 0.0003575915179681033, "learning_rate": 0.0001995648650808327, "loss": 23.0, "step": 524 }, { "epoch": 0.31512605042016806, "grad_norm": 0.0006335443467833102, "learning_rate": 0.0001995631029031288, "loss": 23.0, "step": 525 }, { "epoch": 0.3157262905162065, "grad_norm": 0.0018182954518124461, "learning_rate": 0.00019956133717227386, "loss": 23.0, "step": 526 }, { "epoch": 0.3163265306122449, "grad_norm": 0.0011957690585404634, "learning_rate": 0.0001995595678883309, "loss": 23.0, "step": 527 }, { "epoch": 0.3169267707082833, "grad_norm": 0.00040781046845950186, "learning_rate": 0.00019955779505136304, "loss": 23.0, "step": 528 }, { "epoch": 0.3175270108043217, "grad_norm": 0.0010567825520411134, "learning_rate": 0.00019955601866143358, "loss": 23.0, "step": 529 }, { "epoch": 0.31812725090036015, "grad_norm": 0.0007379227317869663, "learning_rate": 0.00019955423871860593, "loss": 23.0, "step": 530 }, { "epoch": 0.3187274909963986, "grad_norm": 0.0005250342655926943, "learning_rate": 0.00019955245522294357, "loss": 23.0, "step": 531 }, { "epoch": 0.31932773109243695, "grad_norm": 0.0020976525265723467, "learning_rate": 0.00019955066817451016, "loss": 23.0, "step": 532 }, { "epoch": 0.3199279711884754, "grad_norm": 0.0012484453618526459, "learning_rate": 0.00019954887757336946, "loss": 23.0, "step": 533 }, { "epoch": 0.3205282112845138, "grad_norm": 0.00038985523860901594, "learning_rate": 0.0001995470834195854, "loss": 23.0, "step": 534 }, { "epoch": 0.32112845138055224, "grad_norm": 0.0005392651073634624, "learning_rate": 0.00019954528571322202, "loss": 23.0, "step": 535 }, { "epoch": 0.3217286914765906, "grad_norm": 0.00044334621634334326, "learning_rate": 0.00019954348445434345, "loss": 23.0, "step": 536 }, { "epoch": 0.32232893157262904, "grad_norm": 0.001021339907310903, "learning_rate": 0.00019954167964301393, "loss": 23.0, "step": 537 }, { "epoch": 0.32292917166866747, "grad_norm": 0.0005136961117386818, "learning_rate": 0.00019953987127929796, "loss": 23.0, "step": 538 }, { "epoch": 0.3235294117647059, "grad_norm": 0.0010431839618831873, "learning_rate": 0.00019953805936326002, "loss": 23.0, "step": 539 }, { "epoch": 0.3241296518607443, "grad_norm": 0.0008680829196237028, "learning_rate": 0.0001995362438949648, "loss": 23.0, "step": 540 }, { "epoch": 0.3247298919567827, "grad_norm": 0.00047551849274896085, "learning_rate": 0.00019953442487447704, "loss": 23.0, "step": 541 }, { "epoch": 0.32533013205282113, "grad_norm": 0.000665904488414526, "learning_rate": 0.00019953260230186172, "loss": 23.0, "step": 542 }, { "epoch": 0.32593037214885956, "grad_norm": 0.0007536861230619252, "learning_rate": 0.00019953077617718384, "loss": 23.0, "step": 543 }, { "epoch": 0.32653061224489793, "grad_norm": 0.0005331055726855993, "learning_rate": 0.00019952894650050856, "loss": 23.0, "step": 544 }, { "epoch": 0.32713085234093636, "grad_norm": 0.0006826044409535825, "learning_rate": 0.00019952711327190123, "loss": 23.0, "step": 545 }, { "epoch": 0.3277310924369748, "grad_norm": 0.001005376921966672, "learning_rate": 0.00019952527649142723, "loss": 23.0, "step": 546 }, { "epoch": 0.3283313325330132, "grad_norm": 0.0010133546311408281, "learning_rate": 0.00019952343615915212, "loss": 23.0, "step": 547 }, { "epoch": 0.3289315726290516, "grad_norm": 0.0015964146004989743, "learning_rate": 0.00019952159227514159, "loss": 23.0, "step": 548 }, { "epoch": 0.32953181272509, "grad_norm": 0.0006953754345886409, "learning_rate": 0.0001995197448394614, "loss": 23.0, "step": 549 }, { "epoch": 0.33013205282112845, "grad_norm": 0.0009064920013770461, "learning_rate": 0.00019951789385217757, "loss": 23.0, "step": 550 }, { "epoch": 0.3307322929171669, "grad_norm": 0.0009463357273489237, "learning_rate": 0.00019951603931335602, "loss": 23.0, "step": 551 }, { "epoch": 0.33133253301320525, "grad_norm": 0.0007558033103123307, "learning_rate": 0.00019951418122306306, "loss": 23.0, "step": 552 }, { "epoch": 0.3319327731092437, "grad_norm": 0.0005100120906718075, "learning_rate": 0.00019951231958136498, "loss": 23.0, "step": 553 }, { "epoch": 0.3325330132052821, "grad_norm": 0.0008068382157944143, "learning_rate": 0.00019951045438832817, "loss": 23.0, "step": 554 }, { "epoch": 0.33313325330132054, "grad_norm": 0.0005732056451961398, "learning_rate": 0.00019950858564401916, "loss": 23.0, "step": 555 }, { "epoch": 0.33373349339735897, "grad_norm": 0.0005752798751927912, "learning_rate": 0.00019950671334850476, "loss": 23.0, "step": 556 }, { "epoch": 0.33433373349339734, "grad_norm": 0.0005713048158213496, "learning_rate": 0.0001995048375018517, "loss": 23.0, "step": 557 }, { "epoch": 0.3349339735894358, "grad_norm": 0.000498209788929671, "learning_rate": 0.00019950295810412696, "loss": 23.0, "step": 558 }, { "epoch": 0.3355342136854742, "grad_norm": 0.0008248278754763305, "learning_rate": 0.00019950107515539757, "loss": 23.0, "step": 559 }, { "epoch": 0.33613445378151263, "grad_norm": 0.00075948127778247, "learning_rate": 0.00019949918865573078, "loss": 23.0, "step": 560 }, { "epoch": 0.336734693877551, "grad_norm": 0.0006507814396172762, "learning_rate": 0.00019949729860519385, "loss": 23.0, "step": 561 }, { "epoch": 0.33733493397358943, "grad_norm": 0.0006093204719945788, "learning_rate": 0.00019949540500385432, "loss": 23.0, "step": 562 }, { "epoch": 0.33793517406962786, "grad_norm": 0.0009938915027305484, "learning_rate": 0.00019949350785177972, "loss": 23.0, "step": 563 }, { "epoch": 0.3385354141656663, "grad_norm": 0.0005058671813458204, "learning_rate": 0.00019949160714903772, "loss": 23.0, "step": 564 }, { "epoch": 0.33913565426170467, "grad_norm": 0.0005110659985803068, "learning_rate": 0.00019948970289569617, "loss": 23.0, "step": 565 }, { "epoch": 0.3397358943577431, "grad_norm": 0.000917077821213752, "learning_rate": 0.00019948779509182306, "loss": 23.0, "step": 566 }, { "epoch": 0.3403361344537815, "grad_norm": 0.0008047555456869304, "learning_rate": 0.0001994858837374865, "loss": 23.0, "step": 567 }, { "epoch": 0.34093637454981995, "grad_norm": 0.0007102153031155467, "learning_rate": 0.0001994839688327546, "loss": 23.0, "step": 568 }, { "epoch": 0.3415366146458583, "grad_norm": 0.0006414945237338543, "learning_rate": 0.0001994820503776958, "loss": 23.0, "step": 569 }, { "epoch": 0.34213685474189676, "grad_norm": 0.0003412929072510451, "learning_rate": 0.0001994801283723785, "loss": 23.0, "step": 570 }, { "epoch": 0.3427370948379352, "grad_norm": 0.0008194602560251951, "learning_rate": 0.0001994782028168713, "loss": 23.0, "step": 571 }, { "epoch": 0.3433373349339736, "grad_norm": 0.0007939741481095552, "learning_rate": 0.0001994762737112429, "loss": 23.0, "step": 572 }, { "epoch": 0.343937575030012, "grad_norm": 0.0010254220105707645, "learning_rate": 0.00019947434105556223, "loss": 23.0, "step": 573 }, { "epoch": 0.3445378151260504, "grad_norm": 0.0005377535708248615, "learning_rate": 0.0001994724048498982, "loss": 23.0, "step": 574 }, { "epoch": 0.34513805522208885, "grad_norm": 0.0005842957179993391, "learning_rate": 0.0001994704650943199, "loss": 23.0, "step": 575 }, { "epoch": 0.3457382953181273, "grad_norm": 0.0005292725982144475, "learning_rate": 0.00019946852178889658, "loss": 23.0, "step": 576 }, { "epoch": 0.34633853541416565, "grad_norm": 0.00041592164780013263, "learning_rate": 0.00019946657493369755, "loss": 23.0, "step": 577 }, { "epoch": 0.3469387755102041, "grad_norm": 0.0002317595499334857, "learning_rate": 0.00019946462452879237, "loss": 23.0, "step": 578 }, { "epoch": 0.3475390156062425, "grad_norm": 0.0005468165036290884, "learning_rate": 0.00019946267057425056, "loss": 23.0, "step": 579 }, { "epoch": 0.34813925570228094, "grad_norm": 0.0007788361399434507, "learning_rate": 0.00019946071307014188, "loss": 23.0, "step": 580 }, { "epoch": 0.3487394957983193, "grad_norm": 0.0006015602848492563, "learning_rate": 0.00019945875201653621, "loss": 23.0, "step": 581 }, { "epoch": 0.34933973589435774, "grad_norm": 0.0006226922268979251, "learning_rate": 0.00019945678741350352, "loss": 23.0, "step": 582 }, { "epoch": 0.34993997599039617, "grad_norm": 0.0010158922523260117, "learning_rate": 0.00019945481926111392, "loss": 23.0, "step": 583 }, { "epoch": 0.3505402160864346, "grad_norm": 0.0007838707533665001, "learning_rate": 0.00019945284755943765, "loss": 23.0, "step": 584 }, { "epoch": 0.35114045618247297, "grad_norm": 0.000780130154453218, "learning_rate": 0.00019945087230854505, "loss": 23.0, "step": 585 }, { "epoch": 0.3517406962785114, "grad_norm": 0.0010645820293575525, "learning_rate": 0.00019944889350850668, "loss": 23.0, "step": 586 }, { "epoch": 0.35234093637454983, "grad_norm": 0.0005978618864901364, "learning_rate": 0.00019944691115939307, "loss": 23.0, "step": 587 }, { "epoch": 0.35294117647058826, "grad_norm": 0.0006172691937536001, "learning_rate": 0.00019944492526127502, "loss": 23.0, "step": 588 }, { "epoch": 0.35354141656662663, "grad_norm": 0.0006004899623803794, "learning_rate": 0.00019944293581422343, "loss": 23.0, "step": 589 }, { "epoch": 0.35414165666266506, "grad_norm": 0.0004828960227314383, "learning_rate": 0.0001994409428183092, "loss": 23.0, "step": 590 }, { "epoch": 0.3547418967587035, "grad_norm": 0.00032999063841998577, "learning_rate": 0.00019943894627360355, "loss": 23.0, "step": 591 }, { "epoch": 0.3553421368547419, "grad_norm": 0.0004524400574155152, "learning_rate": 0.00019943694618017768, "loss": 23.0, "step": 592 }, { "epoch": 0.3559423769507803, "grad_norm": 0.0006170731503516436, "learning_rate": 0.000199434942538103, "loss": 23.0, "step": 593 }, { "epoch": 0.3565426170468187, "grad_norm": 0.0010504665551707149, "learning_rate": 0.00019943293534745098, "loss": 23.0, "step": 594 }, { "epoch": 0.35714285714285715, "grad_norm": 0.0006527103250846267, "learning_rate": 0.00019943092460829326, "loss": 23.0, "step": 595 }, { "epoch": 0.3577430972388956, "grad_norm": 0.0005603809840977192, "learning_rate": 0.00019942891032070163, "loss": 23.0, "step": 596 }, { "epoch": 0.35834333733493395, "grad_norm": 0.0008743616635911167, "learning_rate": 0.00019942689248474794, "loss": 23.0, "step": 597 }, { "epoch": 0.3589435774309724, "grad_norm": 0.0012338734231889248, "learning_rate": 0.00019942487110050422, "loss": 23.0, "step": 598 }, { "epoch": 0.3595438175270108, "grad_norm": 0.0007378676091320813, "learning_rate": 0.0001994228461680426, "loss": 23.0, "step": 599 }, { "epoch": 0.36014405762304924, "grad_norm": 0.0008557065157219768, "learning_rate": 0.00019942081768743535, "loss": 23.0, "step": 600 }, { "epoch": 0.3607442977190876, "grad_norm": 0.00044995357166044414, "learning_rate": 0.00019941878565875486, "loss": 23.0, "step": 601 }, { "epoch": 0.36134453781512604, "grad_norm": 0.0010641762055456638, "learning_rate": 0.00019941675008207366, "loss": 23.0, "step": 602 }, { "epoch": 0.3619447779111645, "grad_norm": 0.0004161446413490921, "learning_rate": 0.00019941471095746437, "loss": 23.0, "step": 603 }, { "epoch": 0.3625450180072029, "grad_norm": 0.0010861216578632593, "learning_rate": 0.00019941266828499973, "loss": 23.0, "step": 604 }, { "epoch": 0.3631452581032413, "grad_norm": 0.0004336018755566329, "learning_rate": 0.00019941062206475273, "loss": 23.0, "step": 605 }, { "epoch": 0.3637454981992797, "grad_norm": 0.0007256188546307385, "learning_rate": 0.0001994085722967963, "loss": 23.0, "step": 606 }, { "epoch": 0.36434573829531813, "grad_norm": 0.0005281938938423991, "learning_rate": 0.00019940651898120367, "loss": 23.0, "step": 607 }, { "epoch": 0.36494597839135656, "grad_norm": 0.00047858108882792294, "learning_rate": 0.00019940446211804807, "loss": 23.0, "step": 608 }, { "epoch": 0.36554621848739494, "grad_norm": 0.0003085435600951314, "learning_rate": 0.0001994024017074029, "loss": 23.0, "step": 609 }, { "epoch": 0.36614645858343337, "grad_norm": 0.00042341326479800045, "learning_rate": 0.00019940033774934173, "loss": 23.0, "step": 610 }, { "epoch": 0.3667466986794718, "grad_norm": 0.000549526244867593, "learning_rate": 0.00019939827024393817, "loss": 23.0, "step": 611 }, { "epoch": 0.3673469387755102, "grad_norm": 0.0006585200899280608, "learning_rate": 0.00019939619919126606, "loss": 23.0, "step": 612 }, { "epoch": 0.3679471788715486, "grad_norm": 0.000494246429298073, "learning_rate": 0.00019939412459139925, "loss": 23.0, "step": 613 }, { "epoch": 0.368547418967587, "grad_norm": 0.0006791771738789976, "learning_rate": 0.00019939204644441182, "loss": 23.0, "step": 614 }, { "epoch": 0.36914765906362546, "grad_norm": 0.0006217627669684589, "learning_rate": 0.00019938996475037793, "loss": 23.0, "step": 615 }, { "epoch": 0.3697478991596639, "grad_norm": 0.0007271217764355242, "learning_rate": 0.00019938787950937186, "loss": 23.0, "step": 616 }, { "epoch": 0.37034813925570226, "grad_norm": 0.000804447045084089, "learning_rate": 0.000199385790721468, "loss": 23.0, "step": 617 }, { "epoch": 0.3709483793517407, "grad_norm": 0.0016518638003617525, "learning_rate": 0.00019938369838674093, "loss": 23.0, "step": 618 }, { "epoch": 0.3715486194477791, "grad_norm": 0.0004141997778788209, "learning_rate": 0.00019938160250526532, "loss": 23.0, "step": 619 }, { "epoch": 0.37214885954381755, "grad_norm": 0.0006064656772650778, "learning_rate": 0.00019937950307711595, "loss": 23.0, "step": 620 }, { "epoch": 0.3727490996398559, "grad_norm": 0.0011940618278458714, "learning_rate": 0.00019937740010236773, "loss": 23.0, "step": 621 }, { "epoch": 0.37334933973589435, "grad_norm": 0.0008350508869625628, "learning_rate": 0.00019937529358109575, "loss": 23.0, "step": 622 }, { "epoch": 0.3739495798319328, "grad_norm": 0.0005108820623718202, "learning_rate": 0.00019937318351337515, "loss": 23.0, "step": 623 }, { "epoch": 0.3745498199279712, "grad_norm": 0.0004091697046533227, "learning_rate": 0.00019937106989928127, "loss": 23.0, "step": 624 }, { "epoch": 0.3751500600240096, "grad_norm": 0.0006039454601705074, "learning_rate": 0.00019936895273888952, "loss": 23.0, "step": 625 }, { "epoch": 0.375750300120048, "grad_norm": 0.0002572101657278836, "learning_rate": 0.00019936683203227544, "loss": 23.0, "step": 626 }, { "epoch": 0.37635054021608644, "grad_norm": 0.0002840797242242843, "learning_rate": 0.00019936470777951474, "loss": 23.0, "step": 627 }, { "epoch": 0.37695078031212487, "grad_norm": 0.0005371993756853044, "learning_rate": 0.0001993625799806832, "loss": 23.0, "step": 628 }, { "epoch": 0.37755102040816324, "grad_norm": 0.0005530430353246629, "learning_rate": 0.00019936044863585678, "loss": 23.0, "step": 629 }, { "epoch": 0.37815126050420167, "grad_norm": 0.0003712098114192486, "learning_rate": 0.00019935831374511153, "loss": 23.0, "step": 630 }, { "epoch": 0.3787515006002401, "grad_norm": 0.0006810548948124051, "learning_rate": 0.00019935617530852367, "loss": 23.0, "step": 631 }, { "epoch": 0.3793517406962785, "grad_norm": 0.0009463174501433969, "learning_rate": 0.00019935403332616944, "loss": 23.0, "step": 632 }, { "epoch": 0.3799519807923169, "grad_norm": 0.0003082946641370654, "learning_rate": 0.00019935188779812534, "loss": 23.0, "step": 633 }, { "epoch": 0.38055222088835533, "grad_norm": 0.0007540938095189631, "learning_rate": 0.00019934973872446797, "loss": 23.0, "step": 634 }, { "epoch": 0.38115246098439376, "grad_norm": 0.0006780321127735078, "learning_rate": 0.00019934758610527396, "loss": 23.0, "step": 635 }, { "epoch": 0.3817527010804322, "grad_norm": 0.0007656219531781971, "learning_rate": 0.00019934542994062013, "loss": 23.0, "step": 636 }, { "epoch": 0.38235294117647056, "grad_norm": 0.0005332346190698445, "learning_rate": 0.00019934327023058348, "loss": 23.0, "step": 637 }, { "epoch": 0.382953181272509, "grad_norm": 0.001408933661878109, "learning_rate": 0.00019934110697524104, "loss": 23.0, "step": 638 }, { "epoch": 0.3835534213685474, "grad_norm": 0.0006234024185687304, "learning_rate": 0.00019933894017467004, "loss": 23.0, "step": 639 }, { "epoch": 0.38415366146458585, "grad_norm": 0.0009115723660215735, "learning_rate": 0.0001993367698289478, "loss": 23.0, "step": 640 }, { "epoch": 0.3847539015606242, "grad_norm": 0.0003554839931894094, "learning_rate": 0.00019933459593815176, "loss": 23.0, "step": 641 }, { "epoch": 0.38535414165666265, "grad_norm": 0.0003548678068909794, "learning_rate": 0.0001993324185023595, "loss": 23.0, "step": 642 }, { "epoch": 0.3859543817527011, "grad_norm": 0.0009928151266649365, "learning_rate": 0.00019933023752164876, "loss": 23.0, "step": 643 }, { "epoch": 0.3865546218487395, "grad_norm": 0.0005392222083173692, "learning_rate": 0.00019932805299609734, "loss": 23.0, "step": 644 }, { "epoch": 0.3871548619447779, "grad_norm": 0.00045687207602895796, "learning_rate": 0.0001993258649257832, "loss": 23.0, "step": 645 }, { "epoch": 0.3877551020408163, "grad_norm": 0.0005536451353691518, "learning_rate": 0.00019932367331078444, "loss": 23.0, "step": 646 }, { "epoch": 0.38835534213685474, "grad_norm": 0.0006612985744141042, "learning_rate": 0.00019932147815117927, "loss": 23.0, "step": 647 }, { "epoch": 0.38895558223289317, "grad_norm": 0.0006857411353848875, "learning_rate": 0.00019931927944704604, "loss": 23.0, "step": 648 }, { "epoch": 0.3895558223289316, "grad_norm": 0.0006747473962605, "learning_rate": 0.0001993170771984632, "loss": 23.0, "step": 649 }, { "epoch": 0.39015606242497, "grad_norm": 0.0006530406535603106, "learning_rate": 0.00019931487140550935, "loss": 23.0, "step": 650 }, { "epoch": 0.3907563025210084, "grad_norm": 0.000865319452714175, "learning_rate": 0.00019931266206826321, "loss": 23.0, "step": 651 }, { "epoch": 0.39135654261704683, "grad_norm": 0.0008172982488758862, "learning_rate": 0.00019931044918680363, "loss": 23.0, "step": 652 }, { "epoch": 0.39195678271308526, "grad_norm": 0.0006401525461114943, "learning_rate": 0.00019930823276120957, "loss": 23.0, "step": 653 }, { "epoch": 0.39255702280912363, "grad_norm": 0.001018511364236474, "learning_rate": 0.00019930601279156013, "loss": 23.0, "step": 654 }, { "epoch": 0.39315726290516206, "grad_norm": 0.001182066509500146, "learning_rate": 0.00019930378927793453, "loss": 23.0, "step": 655 }, { "epoch": 0.3937575030012005, "grad_norm": 0.0002946962777059525, "learning_rate": 0.00019930156222041216, "loss": 23.0, "step": 656 }, { "epoch": 0.3943577430972389, "grad_norm": 0.0008485467988066375, "learning_rate": 0.00019929933161907246, "loss": 23.0, "step": 657 }, { "epoch": 0.3949579831932773, "grad_norm": 0.000641645397990942, "learning_rate": 0.00019929709747399502, "loss": 23.0, "step": 658 }, { "epoch": 0.3955582232893157, "grad_norm": 0.0013788726646453142, "learning_rate": 0.00019929485978525962, "loss": 23.0, "step": 659 }, { "epoch": 0.39615846338535415, "grad_norm": 0.0006469928775914013, "learning_rate": 0.0001992926185529461, "loss": 23.0, "step": 660 }, { "epoch": 0.3967587034813926, "grad_norm": 0.0006551746046170592, "learning_rate": 0.00019929037377713438, "loss": 23.0, "step": 661 }, { "epoch": 0.39735894357743096, "grad_norm": 0.0003588496765587479, "learning_rate": 0.00019928812545790464, "loss": 23.0, "step": 662 }, { "epoch": 0.3979591836734694, "grad_norm": 0.00034284719731658697, "learning_rate": 0.00019928587359533716, "loss": 23.0, "step": 663 }, { "epoch": 0.3985594237695078, "grad_norm": 0.0008505964069627225, "learning_rate": 0.0001992836181895122, "loss": 23.0, "step": 664 }, { "epoch": 0.39915966386554624, "grad_norm": 0.0005401921225711703, "learning_rate": 0.00019928135924051027, "loss": 23.0, "step": 665 }, { "epoch": 0.3997599039615846, "grad_norm": 0.0008277631714008749, "learning_rate": 0.00019927909674841208, "loss": 23.0, "step": 666 }, { "epoch": 0.40036014405762305, "grad_norm": 0.0004785002674907446, "learning_rate": 0.00019927683071329825, "loss": 23.0, "step": 667 }, { "epoch": 0.4009603841536615, "grad_norm": 0.0007532955496571958, "learning_rate": 0.00019927456113524972, "loss": 23.0, "step": 668 }, { "epoch": 0.4015606242496999, "grad_norm": 0.0003629917628131807, "learning_rate": 0.00019927228801434746, "loss": 23.0, "step": 669 }, { "epoch": 0.4021608643457383, "grad_norm": 0.0006129452376626432, "learning_rate": 0.0001992700113506726, "loss": 23.0, "step": 670 }, { "epoch": 0.4027611044417767, "grad_norm": 0.00048521102871745825, "learning_rate": 0.00019926773114430638, "loss": 23.0, "step": 671 }, { "epoch": 0.40336134453781514, "grad_norm": 0.0007425924413837492, "learning_rate": 0.0001992654473953302, "loss": 23.0, "step": 672 }, { "epoch": 0.40396158463385357, "grad_norm": 0.0003996037703473121, "learning_rate": 0.00019926316010382555, "loss": 23.0, "step": 673 }, { "epoch": 0.40456182472989194, "grad_norm": 0.0006881665321998298, "learning_rate": 0.00019926086926987403, "loss": 23.0, "step": 674 }, { "epoch": 0.40516206482593037, "grad_norm": 0.0005098696565255523, "learning_rate": 0.00019925857489355744, "loss": 23.0, "step": 675 }, { "epoch": 0.4057623049219688, "grad_norm": 0.000523019814863801, "learning_rate": 0.00019925627697495762, "loss": 23.0, "step": 676 }, { "epoch": 0.4063625450180072, "grad_norm": 0.0005873084883205593, "learning_rate": 0.0001992539755141566, "loss": 23.0, "step": 677 }, { "epoch": 0.4069627851140456, "grad_norm": 0.0009000878199003637, "learning_rate": 0.0001992516705112365, "loss": 23.0, "step": 678 }, { "epoch": 0.40756302521008403, "grad_norm": 0.00041991635225713253, "learning_rate": 0.0001992493619662796, "loss": 23.0, "step": 679 }, { "epoch": 0.40816326530612246, "grad_norm": 0.0004964503459632397, "learning_rate": 0.00019924704987936824, "loss": 23.0, "step": 680 }, { "epoch": 0.4087635054021609, "grad_norm": 0.0012351791374385357, "learning_rate": 0.00019924473425058498, "loss": 23.0, "step": 681 }, { "epoch": 0.40936374549819926, "grad_norm": 0.0009568947134539485, "learning_rate": 0.00019924241508001244, "loss": 23.0, "step": 682 }, { "epoch": 0.4099639855942377, "grad_norm": 0.0006957779405638576, "learning_rate": 0.00019924009236773339, "loss": 23.0, "step": 683 }, { "epoch": 0.4105642256902761, "grad_norm": 0.0007713912636972964, "learning_rate": 0.00019923776611383072, "loss": 23.0, "step": 684 }, { "epoch": 0.41116446578631455, "grad_norm": 0.0005927034071646631, "learning_rate": 0.00019923543631838743, "loss": 23.0, "step": 685 }, { "epoch": 0.4117647058823529, "grad_norm": 0.0012774481438100338, "learning_rate": 0.00019923310298148672, "loss": 23.0, "step": 686 }, { "epoch": 0.41236494597839135, "grad_norm": 0.0006231871666386724, "learning_rate": 0.0001992307661032118, "loss": 23.0, "step": 687 }, { "epoch": 0.4129651860744298, "grad_norm": 0.0008017731597647071, "learning_rate": 0.00019922842568364607, "loss": 23.0, "step": 688 }, { "epoch": 0.4135654261704682, "grad_norm": 0.0005019367672502995, "learning_rate": 0.0001992260817228731, "loss": 23.0, "step": 689 }, { "epoch": 0.4141656662665066, "grad_norm": 0.000567154842428863, "learning_rate": 0.0001992237342209765, "loss": 23.0, "step": 690 }, { "epoch": 0.414765906362545, "grad_norm": 0.00036567161441780627, "learning_rate": 0.00019922138317804007, "loss": 23.0, "step": 691 }, { "epoch": 0.41536614645858344, "grad_norm": 0.0007109650177881122, "learning_rate": 0.0001992190285941477, "loss": 23.0, "step": 692 }, { "epoch": 0.41596638655462187, "grad_norm": 0.000749673810787499, "learning_rate": 0.0001992166704693834, "loss": 23.0, "step": 693 }, { "epoch": 0.41656662665066024, "grad_norm": 0.0009893577080219984, "learning_rate": 0.00019921430880383138, "loss": 23.0, "step": 694 }, { "epoch": 0.4171668667466987, "grad_norm": 0.00036272848956286907, "learning_rate": 0.0001992119435975759, "loss": 23.0, "step": 695 }, { "epoch": 0.4177671068427371, "grad_norm": 0.0006051092641428113, "learning_rate": 0.00019920957485070132, "loss": 23.0, "step": 696 }, { "epoch": 0.41836734693877553, "grad_norm": 0.0004827500961255282, "learning_rate": 0.00019920720256329222, "loss": 23.0, "step": 697 }, { "epoch": 0.4189675870348139, "grad_norm": 0.001281979726627469, "learning_rate": 0.00019920482673543326, "loss": 23.0, "step": 698 }, { "epoch": 0.41956782713085233, "grad_norm": 0.0011693161213770509, "learning_rate": 0.00019920244736720922, "loss": 23.0, "step": 699 }, { "epoch": 0.42016806722689076, "grad_norm": 0.0006273601320572197, "learning_rate": 0.000199200064458705, "loss": 23.0, "step": 700 }, { "epoch": 0.4207683073229292, "grad_norm": 0.003010783577337861, "learning_rate": 0.00019919767801000564, "loss": 23.0, "step": 701 }, { "epoch": 0.42136854741896757, "grad_norm": 0.0008471604087390006, "learning_rate": 0.00019919528802119632, "loss": 23.0, "step": 702 }, { "epoch": 0.421968787515006, "grad_norm": 0.0008838615613058209, "learning_rate": 0.00019919289449236237, "loss": 23.0, "step": 703 }, { "epoch": 0.4225690276110444, "grad_norm": 0.0009554813150316477, "learning_rate": 0.00019919049742358916, "loss": 23.0, "step": 704 }, { "epoch": 0.42316926770708285, "grad_norm": 0.0005560766439884901, "learning_rate": 0.00019918809681496223, "loss": 23.0, "step": 705 }, { "epoch": 0.4237695078031212, "grad_norm": 0.0004707193002104759, "learning_rate": 0.00019918569266656725, "loss": 23.0, "step": 706 }, { "epoch": 0.42436974789915966, "grad_norm": 0.000530709105078131, "learning_rate": 0.00019918328497849007, "loss": 23.0, "step": 707 }, { "epoch": 0.4249699879951981, "grad_norm": 0.0006235828041099012, "learning_rate": 0.00019918087375081657, "loss": 23.0, "step": 708 }, { "epoch": 0.4255702280912365, "grad_norm": 0.000593838223721832, "learning_rate": 0.0001991784589836328, "loss": 23.0, "step": 709 }, { "epoch": 0.4261704681872749, "grad_norm": 0.0005683311028406024, "learning_rate": 0.00019917604067702495, "loss": 23.0, "step": 710 }, { "epoch": 0.4267707082833133, "grad_norm": 0.0009590789559297264, "learning_rate": 0.00019917361883107931, "loss": 23.0, "step": 711 }, { "epoch": 0.42737094837935174, "grad_norm": 0.0005070093320682645, "learning_rate": 0.00019917119344588235, "loss": 23.0, "step": 712 }, { "epoch": 0.4279711884753902, "grad_norm": 0.00040911248652264476, "learning_rate": 0.00019916876452152056, "loss": 23.0, "step": 713 }, { "epoch": 0.42857142857142855, "grad_norm": 0.00044645063462667167, "learning_rate": 0.00019916633205808068, "loss": 23.0, "step": 714 }, { "epoch": 0.429171668667467, "grad_norm": 0.0003959390742238611, "learning_rate": 0.0001991638960556495, "loss": 23.0, "step": 715 }, { "epoch": 0.4297719087635054, "grad_norm": 0.0006004471215419471, "learning_rate": 0.00019916145651431393, "loss": 23.0, "step": 716 }, { "epoch": 0.43037214885954383, "grad_norm": 0.00035068404395133257, "learning_rate": 0.00019915901343416103, "loss": 23.0, "step": 717 }, { "epoch": 0.4309723889555822, "grad_norm": 0.0003599017218220979, "learning_rate": 0.00019915656681527807, "loss": 23.0, "step": 718 }, { "epoch": 0.43157262905162064, "grad_norm": 0.0003325287252664566, "learning_rate": 0.00019915411665775225, "loss": 23.0, "step": 719 }, { "epoch": 0.43217286914765907, "grad_norm": 0.0007046267855912447, "learning_rate": 0.00019915166296167108, "loss": 23.0, "step": 720 }, { "epoch": 0.4327731092436975, "grad_norm": 0.0003630674909800291, "learning_rate": 0.0001991492057271221, "loss": 23.0, "step": 721 }, { "epoch": 0.43337334933973587, "grad_norm": 0.0003503916668705642, "learning_rate": 0.000199146744954193, "loss": 23.0, "step": 722 }, { "epoch": 0.4339735894357743, "grad_norm": 0.0007532882154919207, "learning_rate": 0.00019914428064297165, "loss": 23.0, "step": 723 }, { "epoch": 0.4345738295318127, "grad_norm": 0.0012118295999243855, "learning_rate": 0.0001991418127935459, "loss": 23.0, "step": 724 }, { "epoch": 0.43517406962785116, "grad_norm": 0.0005586919724009931, "learning_rate": 0.00019913934140600392, "loss": 23.0, "step": 725 }, { "epoch": 0.43577430972388953, "grad_norm": 0.0007068076520226896, "learning_rate": 0.00019913686648043383, "loss": 23.0, "step": 726 }, { "epoch": 0.43637454981992796, "grad_norm": 0.0005361078074201941, "learning_rate": 0.00019913438801692402, "loss": 23.0, "step": 727 }, { "epoch": 0.4369747899159664, "grad_norm": 0.0009473745012655854, "learning_rate": 0.00019913190601556288, "loss": 23.0, "step": 728 }, { "epoch": 0.4375750300120048, "grad_norm": 0.0005555340903811157, "learning_rate": 0.00019912942047643902, "loss": 23.0, "step": 729 }, { "epoch": 0.4381752701080432, "grad_norm": 0.0014016212662681937, "learning_rate": 0.0001991269313996411, "loss": 23.0, "step": 730 }, { "epoch": 0.4387755102040816, "grad_norm": 0.00027204706566408277, "learning_rate": 0.00019912443878525803, "loss": 23.0, "step": 731 }, { "epoch": 0.43937575030012005, "grad_norm": 0.0005901534459553659, "learning_rate": 0.0001991219426333787, "loss": 23.0, "step": 732 }, { "epoch": 0.4399759903961585, "grad_norm": 0.00031070367549546063, "learning_rate": 0.00019911944294409223, "loss": 23.0, "step": 733 }, { "epoch": 0.44057623049219685, "grad_norm": 0.0007562999962829053, "learning_rate": 0.00019911693971748777, "loss": 23.0, "step": 734 }, { "epoch": 0.4411764705882353, "grad_norm": 0.000757384579628706, "learning_rate": 0.00019911443295365472, "loss": 23.0, "step": 735 }, { "epoch": 0.4417767106842737, "grad_norm": 0.0009893305832520127, "learning_rate": 0.00019911192265268247, "loss": 23.0, "step": 736 }, { "epoch": 0.44237695078031214, "grad_norm": 0.0008873580954968929, "learning_rate": 0.0001991094088146607, "loss": 23.0, "step": 737 }, { "epoch": 0.4429771908763505, "grad_norm": 0.0006723484839312732, "learning_rate": 0.00019910689143967905, "loss": 23.0, "step": 738 }, { "epoch": 0.44357743097238894, "grad_norm": 0.0008835842600092292, "learning_rate": 0.00019910437052782736, "loss": 23.0, "step": 739 }, { "epoch": 0.44417767106842737, "grad_norm": 0.00031283125281333923, "learning_rate": 0.00019910184607919564, "loss": 23.0, "step": 740 }, { "epoch": 0.4447779111644658, "grad_norm": 0.0006608405965380371, "learning_rate": 0.00019909931809387394, "loss": 23.0, "step": 741 }, { "epoch": 0.44537815126050423, "grad_norm": 0.0005028719315305352, "learning_rate": 0.0001990967865719525, "loss": 23.0, "step": 742 }, { "epoch": 0.4459783913565426, "grad_norm": 0.000783328665420413, "learning_rate": 0.00019909425151352163, "loss": 23.0, "step": 743 }, { "epoch": 0.44657863145258103, "grad_norm": 0.0007136694621294737, "learning_rate": 0.00019909171291867183, "loss": 23.0, "step": 744 }, { "epoch": 0.44717887154861946, "grad_norm": 0.0006191918510012329, "learning_rate": 0.00019908917078749372, "loss": 23.0, "step": 745 }, { "epoch": 0.4477791116446579, "grad_norm": 0.0007277901167981327, "learning_rate": 0.00019908662512007797, "loss": 23.0, "step": 746 }, { "epoch": 0.44837935174069626, "grad_norm": 0.0006948673981241882, "learning_rate": 0.00019908407591651546, "loss": 23.0, "step": 747 }, { "epoch": 0.4489795918367347, "grad_norm": 0.0010942791122943163, "learning_rate": 0.00019908152317689714, "loss": 23.0, "step": 748 }, { "epoch": 0.4495798319327731, "grad_norm": 0.0005513243959285319, "learning_rate": 0.00019907896690131414, "loss": 23.0, "step": 749 }, { "epoch": 0.45018007202881155, "grad_norm": 0.0003519784368108958, "learning_rate": 0.00019907640708985766, "loss": 23.0, "step": 750 }, { "epoch": 0.4507803121248499, "grad_norm": 0.0008878299267962575, "learning_rate": 0.0001990738437426191, "loss": 23.0, "step": 751 }, { "epoch": 0.45138055222088835, "grad_norm": 0.0005296217277646065, "learning_rate": 0.00019907127685968984, "loss": 23.0, "step": 752 }, { "epoch": 0.4519807923169268, "grad_norm": 0.0009205266833305359, "learning_rate": 0.0001990687064411616, "loss": 23.0, "step": 753 }, { "epoch": 0.4525810324129652, "grad_norm": 0.0008753446745686233, "learning_rate": 0.00019906613248712606, "loss": 23.0, "step": 754 }, { "epoch": 0.4531812725090036, "grad_norm": 0.0008117556571960449, "learning_rate": 0.00019906355499767507, "loss": 23.0, "step": 755 }, { "epoch": 0.453781512605042, "grad_norm": 0.0009617914911359549, "learning_rate": 0.00019906097397290063, "loss": 23.0, "step": 756 }, { "epoch": 0.45438175270108044, "grad_norm": 0.0005935687222518027, "learning_rate": 0.00019905838941289484, "loss": 23.0, "step": 757 }, { "epoch": 0.4549819927971189, "grad_norm": 0.0007953408057801425, "learning_rate": 0.00019905580131774993, "loss": 23.0, "step": 758 }, { "epoch": 0.45558223289315725, "grad_norm": 0.0006210616556927562, "learning_rate": 0.00019905320968755828, "loss": 23.0, "step": 759 }, { "epoch": 0.4561824729891957, "grad_norm": 0.0006589915719814599, "learning_rate": 0.00019905061452241238, "loss": 23.0, "step": 760 }, { "epoch": 0.4567827130852341, "grad_norm": 0.0009153984137810767, "learning_rate": 0.00019904801582240484, "loss": 23.0, "step": 761 }, { "epoch": 0.45738295318127253, "grad_norm": 0.0017457930371165276, "learning_rate": 0.00019904541358762838, "loss": 23.0, "step": 762 }, { "epoch": 0.4579831932773109, "grad_norm": 0.0007837511948309839, "learning_rate": 0.0001990428078181759, "loss": 23.0, "step": 763 }, { "epoch": 0.45858343337334934, "grad_norm": 0.0005806999397464097, "learning_rate": 0.00019904019851414034, "loss": 23.0, "step": 764 }, { "epoch": 0.45918367346938777, "grad_norm": 0.0006374964723363519, "learning_rate": 0.0001990375856756149, "loss": 23.0, "step": 765 }, { "epoch": 0.4597839135654262, "grad_norm": 0.0009351090411655605, "learning_rate": 0.00019903496930269278, "loss": 23.0, "step": 766 }, { "epoch": 0.46038415366146457, "grad_norm": 0.0006809848127886653, "learning_rate": 0.00019903234939546734, "loss": 23.0, "step": 767 }, { "epoch": 0.460984393757503, "grad_norm": 0.000654295610729605, "learning_rate": 0.00019902972595403209, "loss": 23.0, "step": 768 }, { "epoch": 0.4615846338535414, "grad_norm": 0.0006075274432078004, "learning_rate": 0.0001990270989784807, "loss": 23.0, "step": 769 }, { "epoch": 0.46218487394957986, "grad_norm": 0.00044143499690108, "learning_rate": 0.0001990244684689068, "loss": 23.0, "step": 770 }, { "epoch": 0.46278511404561823, "grad_norm": 0.0013147240970283747, "learning_rate": 0.0001990218344254044, "loss": 23.0, "step": 771 }, { "epoch": 0.46338535414165666, "grad_norm": 0.00033725271350704134, "learning_rate": 0.0001990191968480674, "loss": 23.0, "step": 772 }, { "epoch": 0.4639855942376951, "grad_norm": 0.0009398331167176366, "learning_rate": 0.00019901655573698998, "loss": 23.0, "step": 773 }, { "epoch": 0.4645858343337335, "grad_norm": 0.0007297866977751255, "learning_rate": 0.0001990139110922664, "loss": 23.0, "step": 774 }, { "epoch": 0.4651860744297719, "grad_norm": 0.0007459200569428504, "learning_rate": 0.00019901126291399103, "loss": 23.0, "step": 775 }, { "epoch": 0.4657863145258103, "grad_norm": 0.0008847762364894152, "learning_rate": 0.0001990086112022584, "loss": 23.0, "step": 776 }, { "epoch": 0.46638655462184875, "grad_norm": 0.00059562484966591, "learning_rate": 0.0001990059559571631, "loss": 23.0, "step": 777 }, { "epoch": 0.4669867947178872, "grad_norm": 0.0007079718634486198, "learning_rate": 0.00019900329717879988, "loss": 23.0, "step": 778 }, { "epoch": 0.46758703481392555, "grad_norm": 0.0006528205703943968, "learning_rate": 0.00019900063486726364, "loss": 23.0, "step": 779 }, { "epoch": 0.468187274909964, "grad_norm": 0.0007662418647669256, "learning_rate": 0.00019899796902264944, "loss": 23.0, "step": 780 }, { "epoch": 0.4687875150060024, "grad_norm": 0.0009094589622691274, "learning_rate": 0.00019899529964505236, "loss": 23.0, "step": 781 }, { "epoch": 0.46938775510204084, "grad_norm": 0.0007676285458728671, "learning_rate": 0.0001989926267345677, "loss": 23.0, "step": 782 }, { "epoch": 0.4699879951980792, "grad_norm": 0.00048528422485105693, "learning_rate": 0.0001989899502912908, "loss": 23.0, "step": 783 }, { "epoch": 0.47058823529411764, "grad_norm": 0.0008444074192084372, "learning_rate": 0.00019898727031531722, "loss": 23.0, "step": 784 }, { "epoch": 0.47118847539015607, "grad_norm": 0.0017573406221345067, "learning_rate": 0.0001989845868067426, "loss": 23.0, "step": 785 }, { "epoch": 0.4717887154861945, "grad_norm": 0.0013762153685092926, "learning_rate": 0.0001989818997656627, "loss": 23.0, "step": 786 }, { "epoch": 0.4723889555822329, "grad_norm": 0.0007878051837906241, "learning_rate": 0.00019897920919217338, "loss": 23.0, "step": 787 }, { "epoch": 0.4729891956782713, "grad_norm": 0.0008741064229980111, "learning_rate": 0.0001989765150863707, "loss": 23.0, "step": 788 }, { "epoch": 0.47358943577430973, "grad_norm": 0.0005487430025823414, "learning_rate": 0.0001989738174483508, "loss": 23.0, "step": 789 }, { "epoch": 0.47418967587034816, "grad_norm": 0.0016626424621790648, "learning_rate": 0.00019897111627820998, "loss": 23.0, "step": 790 }, { "epoch": 0.47478991596638653, "grad_norm": 0.0006386625464074314, "learning_rate": 0.0001989684115760446, "loss": 23.0, "step": 791 }, { "epoch": 0.47539015606242496, "grad_norm": 0.0006513031548820436, "learning_rate": 0.00019896570334195112, "loss": 23.0, "step": 792 }, { "epoch": 0.4759903961584634, "grad_norm": 0.0005831916932947934, "learning_rate": 0.00019896299157602632, "loss": 23.0, "step": 793 }, { "epoch": 0.4765906362545018, "grad_norm": 0.0008199085714295506, "learning_rate": 0.0001989602762783669, "loss": 23.0, "step": 794 }, { "epoch": 0.4771908763505402, "grad_norm": 0.0009538012091070414, "learning_rate": 0.00019895755744906975, "loss": 23.0, "step": 795 }, { "epoch": 0.4777911164465786, "grad_norm": 0.0003274022601544857, "learning_rate": 0.00019895483508823193, "loss": 23.0, "step": 796 }, { "epoch": 0.47839135654261705, "grad_norm": 0.00060317013412714, "learning_rate": 0.0001989521091959506, "loss": 23.0, "step": 797 }, { "epoch": 0.4789915966386555, "grad_norm": 0.0006186945829540491, "learning_rate": 0.00019894937977232301, "loss": 23.0, "step": 798 }, { "epoch": 0.47959183673469385, "grad_norm": 0.0016297240508720279, "learning_rate": 0.0001989466468174466, "loss": 23.0, "step": 799 }, { "epoch": 0.4801920768307323, "grad_norm": 0.0004947784473188221, "learning_rate": 0.0001989439103314189, "loss": 23.0, "step": 800 }, { "epoch": 0.4807923169267707, "grad_norm": 0.0003158549952786416, "learning_rate": 0.00019894117031433756, "loss": 23.0, "step": 801 }, { "epoch": 0.48139255702280914, "grad_norm": 0.0005081993876956403, "learning_rate": 0.00019893842676630032, "loss": 23.0, "step": 802 }, { "epoch": 0.4819927971188475, "grad_norm": 0.0011078780516982079, "learning_rate": 0.00019893567968740517, "loss": 23.0, "step": 803 }, { "epoch": 0.48259303721488594, "grad_norm": 0.0006481404998339713, "learning_rate": 0.00019893292907775006, "loss": 23.0, "step": 804 }, { "epoch": 0.4831932773109244, "grad_norm": 0.001042915158905089, "learning_rate": 0.0001989301749374332, "loss": 23.0, "step": 805 }, { "epoch": 0.4837935174069628, "grad_norm": 0.0006100856116972864, "learning_rate": 0.00019892741726655294, "loss": 23.0, "step": 806 }, { "epoch": 0.4843937575030012, "grad_norm": 0.0006573795690201223, "learning_rate": 0.0001989246560652076, "loss": 23.0, "step": 807 }, { "epoch": 0.4849939975990396, "grad_norm": 0.0009064198238775134, "learning_rate": 0.00019892189133349572, "loss": 23.0, "step": 808 }, { "epoch": 0.48559423769507803, "grad_norm": 0.0012316376669332385, "learning_rate": 0.000198919123071516, "loss": 23.0, "step": 809 }, { "epoch": 0.48619447779111646, "grad_norm": 0.0006688730791211128, "learning_rate": 0.00019891635127936724, "loss": 23.0, "step": 810 }, { "epoch": 0.48679471788715484, "grad_norm": 0.0014101287815719843, "learning_rate": 0.00019891357595714835, "loss": 23.0, "step": 811 }, { "epoch": 0.48739495798319327, "grad_norm": 0.0005622003809548914, "learning_rate": 0.00019891079710495838, "loss": 23.0, "step": 812 }, { "epoch": 0.4879951980792317, "grad_norm": 0.000873049721121788, "learning_rate": 0.00019890801472289646, "loss": 23.0, "step": 813 }, { "epoch": 0.4885954381752701, "grad_norm": 0.0007714332896284759, "learning_rate": 0.00019890522881106193, "loss": 23.0, "step": 814 }, { "epoch": 0.4891956782713085, "grad_norm": 0.0008941619307734072, "learning_rate": 0.0001989024393695542, "loss": 23.0, "step": 815 }, { "epoch": 0.4897959183673469, "grad_norm": 0.0009259075159206986, "learning_rate": 0.00019889964639847283, "loss": 23.0, "step": 816 }, { "epoch": 0.49039615846338536, "grad_norm": 0.0004040625353809446, "learning_rate": 0.00019889684989791745, "loss": 23.0, "step": 817 }, { "epoch": 0.4909963985594238, "grad_norm": 0.0011171556543558836, "learning_rate": 0.0001988940498679879, "loss": 23.0, "step": 818 }, { "epoch": 0.49159663865546216, "grad_norm": 0.00037770162452943623, "learning_rate": 0.0001988912463087841, "loss": 23.0, "step": 819 }, { "epoch": 0.4921968787515006, "grad_norm": 0.00041134696220979095, "learning_rate": 0.00019888843922040612, "loss": 23.0, "step": 820 }, { "epoch": 0.492797118847539, "grad_norm": 0.0006298330845311284, "learning_rate": 0.00019888562860295408, "loss": 23.0, "step": 821 }, { "epoch": 0.49339735894357745, "grad_norm": 0.0006055657286196947, "learning_rate": 0.00019888281445652837, "loss": 23.0, "step": 822 }, { "epoch": 0.4939975990396158, "grad_norm": 0.001203834661282599, "learning_rate": 0.00019887999678122934, "loss": 23.0, "step": 823 }, { "epoch": 0.49459783913565425, "grad_norm": 0.0006844853051006794, "learning_rate": 0.00019887717557715755, "loss": 23.0, "step": 824 }, { "epoch": 0.4951980792316927, "grad_norm": 0.0004448948602657765, "learning_rate": 0.00019887435084441372, "loss": 23.0, "step": 825 }, { "epoch": 0.4957983193277311, "grad_norm": 0.0004217177047394216, "learning_rate": 0.00019887152258309863, "loss": 23.0, "step": 826 }, { "epoch": 0.4963985594237695, "grad_norm": 0.0006263357936404645, "learning_rate": 0.00019886869079331328, "loss": 23.0, "step": 827 }, { "epoch": 0.4969987995198079, "grad_norm": 0.0007113023893907666, "learning_rate": 0.00019886585547515862, "loss": 23.0, "step": 828 }, { "epoch": 0.49759903961584634, "grad_norm": 0.0014177537523210049, "learning_rate": 0.00019886301662873593, "loss": 23.0, "step": 829 }, { "epoch": 0.49819927971188477, "grad_norm": 0.0008082055719569325, "learning_rate": 0.00019886017425414643, "loss": 23.0, "step": 830 }, { "epoch": 0.49879951980792314, "grad_norm": 0.0005536688840948045, "learning_rate": 0.00019885732835149168, "loss": 23.0, "step": 831 }, { "epoch": 0.49939975990396157, "grad_norm": 0.0006400597630999982, "learning_rate": 0.00019885447892087314, "loss": 23.0, "step": 832 }, { "epoch": 0.5, "grad_norm": 0.0005208631628192961, "learning_rate": 0.00019885162596239254, "loss": 23.0, "step": 833 }, { "epoch": 0.5006002400960384, "grad_norm": 0.0007002478232607245, "learning_rate": 0.00019884876947615166, "loss": 23.0, "step": 834 }, { "epoch": 0.5012004801920769, "grad_norm": 0.0004122959799133241, "learning_rate": 0.00019884590946225248, "loss": 23.0, "step": 835 }, { "epoch": 0.5018007202881153, "grad_norm": 0.0006469865329563618, "learning_rate": 0.00019884304592079707, "loss": 23.0, "step": 836 }, { "epoch": 0.5024009603841537, "grad_norm": 0.000518444343470037, "learning_rate": 0.00019884017885188758, "loss": 23.0, "step": 837 }, { "epoch": 0.503001200480192, "grad_norm": 0.0012191816931590438, "learning_rate": 0.00019883730825562637, "loss": 23.0, "step": 838 }, { "epoch": 0.5036014405762305, "grad_norm": 0.0007649417966604233, "learning_rate": 0.00019883443413211586, "loss": 23.0, "step": 839 }, { "epoch": 0.5042016806722689, "grad_norm": 0.0008528571343049407, "learning_rate": 0.00019883155648145863, "loss": 23.0, "step": 840 }, { "epoch": 0.5048019207683073, "grad_norm": 0.0006200968637131155, "learning_rate": 0.00019882867530375737, "loss": 23.0, "step": 841 }, { "epoch": 0.5054021608643458, "grad_norm": 0.0011856872588396072, "learning_rate": 0.00019882579059911492, "loss": 23.0, "step": 842 }, { "epoch": 0.5060024009603842, "grad_norm": 0.0005892259650863707, "learning_rate": 0.0001988229023676342, "loss": 23.0, "step": 843 }, { "epoch": 0.5066026410564226, "grad_norm": 0.001340447342954576, "learning_rate": 0.00019882001060941832, "loss": 23.0, "step": 844 }, { "epoch": 0.507202881152461, "grad_norm": 0.0006858596461825073, "learning_rate": 0.00019881711532457043, "loss": 23.0, "step": 845 }, { "epoch": 0.5078031212484994, "grad_norm": 0.0013606062857434154, "learning_rate": 0.0001988142165131939, "loss": 23.0, "step": 846 }, { "epoch": 0.5084033613445378, "grad_norm": 0.0010817131260409951, "learning_rate": 0.00019881131417539215, "loss": 23.0, "step": 847 }, { "epoch": 0.5090036014405762, "grad_norm": 0.0007884291699156165, "learning_rate": 0.00019880840831126877, "loss": 23.0, "step": 848 }, { "epoch": 0.5096038415366146, "grad_norm": 0.0008213819819502532, "learning_rate": 0.00019880549892092747, "loss": 23.0, "step": 849 }, { "epoch": 0.5102040816326531, "grad_norm": 0.0007805051864124835, "learning_rate": 0.0001988025860044721, "loss": 23.0, "step": 850 }, { "epoch": 0.5108043217286915, "grad_norm": 0.001384717645123601, "learning_rate": 0.00019879966956200655, "loss": 23.0, "step": 851 }, { "epoch": 0.5114045618247299, "grad_norm": 0.0005536239477805793, "learning_rate": 0.0001987967495936349, "loss": 23.0, "step": 852 }, { "epoch": 0.5120048019207684, "grad_norm": 0.0010214439826086164, "learning_rate": 0.00019879382609946147, "loss": 23.0, "step": 853 }, { "epoch": 0.5126050420168067, "grad_norm": 0.0006562453927472234, "learning_rate": 0.00019879089907959047, "loss": 23.0, "step": 854 }, { "epoch": 0.5132052821128451, "grad_norm": 0.0006304988055489957, "learning_rate": 0.00019878796853412643, "loss": 23.0, "step": 855 }, { "epoch": 0.5138055222088835, "grad_norm": 0.0008602405432611704, "learning_rate": 0.00019878503446317387, "loss": 23.0, "step": 856 }, { "epoch": 0.514405762304922, "grad_norm": 0.00042852506157942116, "learning_rate": 0.00019878209686683754, "loss": 23.0, "step": 857 }, { "epoch": 0.5150060024009604, "grad_norm": 0.0005161712178960443, "learning_rate": 0.00019877915574522233, "loss": 23.0, "step": 858 }, { "epoch": 0.5156062424969988, "grad_norm": 0.000647779437713325, "learning_rate": 0.00019877621109843307, "loss": 23.0, "step": 859 }, { "epoch": 0.5162064825930373, "grad_norm": 0.0008087221649475396, "learning_rate": 0.00019877326292657498, "loss": 23.0, "step": 860 }, { "epoch": 0.5168067226890757, "grad_norm": 0.0024992094840854406, "learning_rate": 0.00019877031122975315, "loss": 23.0, "step": 861 }, { "epoch": 0.517406962785114, "grad_norm": 0.0006555477739311755, "learning_rate": 0.00019876735600807299, "loss": 23.0, "step": 862 }, { "epoch": 0.5180072028811524, "grad_norm": 0.00048210759996436536, "learning_rate": 0.00019876439726164, "loss": 23.0, "step": 863 }, { "epoch": 0.5186074429771909, "grad_norm": 0.0006051068194210529, "learning_rate": 0.00019876143499055968, "loss": 23.0, "step": 864 }, { "epoch": 0.5192076830732293, "grad_norm": 0.0005166485789231956, "learning_rate": 0.00019875846919493778, "loss": 23.0, "step": 865 }, { "epoch": 0.5198079231692677, "grad_norm": 0.00048096582759171724, "learning_rate": 0.0001987554998748802, "loss": 23.0, "step": 866 }, { "epoch": 0.5204081632653061, "grad_norm": 0.0007085447432473302, "learning_rate": 0.00019875252703049285, "loss": 23.0, "step": 867 }, { "epoch": 0.5210084033613446, "grad_norm": 0.0005757782491855323, "learning_rate": 0.0001987495506618818, "loss": 23.0, "step": 868 }, { "epoch": 0.521608643457383, "grad_norm": 0.0017492113402113318, "learning_rate": 0.00019874657076915332, "loss": 23.0, "step": 869 }, { "epoch": 0.5222088835534213, "grad_norm": 0.00044582822010852396, "learning_rate": 0.00019874358735241375, "loss": 23.0, "step": 870 }, { "epoch": 0.5228091236494598, "grad_norm": 0.0008416534401476383, "learning_rate": 0.00019874060041176952, "loss": 23.0, "step": 871 }, { "epoch": 0.5234093637454982, "grad_norm": 0.0010793217225000262, "learning_rate": 0.00019873760994732726, "loss": 23.0, "step": 872 }, { "epoch": 0.5240096038415366, "grad_norm": 0.000765673175919801, "learning_rate": 0.00019873461595919367, "loss": 23.0, "step": 873 }, { "epoch": 0.524609843937575, "grad_norm": 0.004173364955931902, "learning_rate": 0.00019873161844747564, "loss": 23.0, "step": 874 }, { "epoch": 0.5252100840336135, "grad_norm": 0.00026861258083954453, "learning_rate": 0.00019872861741228011, "loss": 23.0, "step": 875 }, { "epoch": 0.5258103241296519, "grad_norm": 0.0014661707682535052, "learning_rate": 0.0001987256128537142, "loss": 23.0, "step": 876 }, { "epoch": 0.5264105642256903, "grad_norm": 0.0008170696091838181, "learning_rate": 0.0001987226047718851, "loss": 23.0, "step": 877 }, { "epoch": 0.5270108043217286, "grad_norm": 0.00043921300675719976, "learning_rate": 0.00019871959316690015, "loss": 23.0, "step": 878 }, { "epoch": 0.5276110444177671, "grad_norm": 0.0004785111523233354, "learning_rate": 0.0001987165780388669, "loss": 23.0, "step": 879 }, { "epoch": 0.5282112845138055, "grad_norm": 0.0008355296449735761, "learning_rate": 0.00019871355938789288, "loss": 23.0, "step": 880 }, { "epoch": 0.5288115246098439, "grad_norm": 0.0006070987437851727, "learning_rate": 0.00019871053721408588, "loss": 23.0, "step": 881 }, { "epoch": 0.5294117647058824, "grad_norm": 0.000812305836006999, "learning_rate": 0.00019870751151755372, "loss": 23.0, "step": 882 }, { "epoch": 0.5300120048019208, "grad_norm": 0.0011360760545358062, "learning_rate": 0.00019870448229840435, "loss": 23.0, "step": 883 }, { "epoch": 0.5306122448979592, "grad_norm": 0.000924605003092438, "learning_rate": 0.0001987014495567459, "loss": 23.0, "step": 884 }, { "epoch": 0.5312124849939976, "grad_norm": 0.0007006872328929603, "learning_rate": 0.00019869841329268664, "loss": 23.0, "step": 885 }, { "epoch": 0.531812725090036, "grad_norm": 0.0006443238235078752, "learning_rate": 0.00019869537350633486, "loss": 23.0, "step": 886 }, { "epoch": 0.5324129651860744, "grad_norm": 0.0008577117114327848, "learning_rate": 0.0001986923301977991, "loss": 23.0, "step": 887 }, { "epoch": 0.5330132052821128, "grad_norm": 0.0007688856567256153, "learning_rate": 0.00019868928336718792, "loss": 23.0, "step": 888 }, { "epoch": 0.5336134453781513, "grad_norm": 0.0004415331350173801, "learning_rate": 0.0001986862330146101, "loss": 23.0, "step": 889 }, { "epoch": 0.5342136854741897, "grad_norm": 0.0004884483059868217, "learning_rate": 0.00019868317914017446, "loss": 23.0, "step": 890 }, { "epoch": 0.5348139255702281, "grad_norm": 0.0005003849510103464, "learning_rate": 0.00019868012174399, "loss": 23.0, "step": 891 }, { "epoch": 0.5354141656662665, "grad_norm": 0.0009174185106530786, "learning_rate": 0.00019867706082616585, "loss": 23.0, "step": 892 }, { "epoch": 0.536014405762305, "grad_norm": 0.0009697056957520545, "learning_rate": 0.0001986739963868112, "loss": 23.0, "step": 893 }, { "epoch": 0.5366146458583433, "grad_norm": 0.000463727192254737, "learning_rate": 0.00019867092842603548, "loss": 23.0, "step": 894 }, { "epoch": 0.5372148859543817, "grad_norm": 0.0004660410340875387, "learning_rate": 0.0001986678569439481, "loss": 23.0, "step": 895 }, { "epoch": 0.5378151260504201, "grad_norm": 0.0007628395687788725, "learning_rate": 0.00019866478194065871, "loss": 23.0, "step": 896 }, { "epoch": 0.5384153661464586, "grad_norm": 0.0005763001972809434, "learning_rate": 0.00019866170341627704, "loss": 23.0, "step": 897 }, { "epoch": 0.539015606242497, "grad_norm": 0.0007523773238062859, "learning_rate": 0.000198658621370913, "loss": 23.0, "step": 898 }, { "epoch": 0.5396158463385354, "grad_norm": 0.00048348799464292824, "learning_rate": 0.0001986555358046765, "loss": 23.0, "step": 899 }, { "epoch": 0.5402160864345739, "grad_norm": 0.0004331173258833587, "learning_rate": 0.00019865244671767772, "loss": 23.0, "step": 900 }, { "epoch": 0.5408163265306123, "grad_norm": 0.0005526962340809405, "learning_rate": 0.0001986493541100269, "loss": 23.0, "step": 901 }, { "epoch": 0.5414165666266506, "grad_norm": 0.0016207111766561866, "learning_rate": 0.00019864625798183435, "loss": 23.0, "step": 902 }, { "epoch": 0.542016806722689, "grad_norm": 0.0004141308309044689, "learning_rate": 0.00019864315833321064, "loss": 23.0, "step": 903 }, { "epoch": 0.5426170468187275, "grad_norm": 0.0006656160694546998, "learning_rate": 0.00019864005516426634, "loss": 23.0, "step": 904 }, { "epoch": 0.5432172869147659, "grad_norm": 0.0007548914290964603, "learning_rate": 0.00019863694847511217, "loss": 23.0, "step": 905 }, { "epoch": 0.5438175270108043, "grad_norm": 0.0011304601794108748, "learning_rate": 0.00019863383826585905, "loss": 23.0, "step": 906 }, { "epoch": 0.5444177671068428, "grad_norm": 0.0009428642224520445, "learning_rate": 0.000198630724536618, "loss": 23.0, "step": 907 }, { "epoch": 0.5450180072028812, "grad_norm": 0.0005785591201856732, "learning_rate": 0.00019862760728750004, "loss": 23.0, "step": 908 }, { "epoch": 0.5456182472989196, "grad_norm": 0.00038587188464589417, "learning_rate": 0.00019862448651861652, "loss": 23.0, "step": 909 }, { "epoch": 0.5462184873949579, "grad_norm": 0.00045680327457375824, "learning_rate": 0.00019862136223007876, "loss": 23.0, "step": 910 }, { "epoch": 0.5468187274909964, "grad_norm": 0.0005812095478177071, "learning_rate": 0.00019861823442199824, "loss": 23.0, "step": 911 }, { "epoch": 0.5474189675870348, "grad_norm": 0.0011624370235949755, "learning_rate": 0.00019861510309448664, "loss": 23.0, "step": 912 }, { "epoch": 0.5480192076830732, "grad_norm": 0.0005838541546836495, "learning_rate": 0.00019861196824765568, "loss": 23.0, "step": 913 }, { "epoch": 0.5486194477791116, "grad_norm": 0.0005749358097091317, "learning_rate": 0.0001986088298816172, "loss": 23.0, "step": 914 }, { "epoch": 0.5492196878751501, "grad_norm": 0.0007014405564405024, "learning_rate": 0.00019860568799648327, "loss": 23.0, "step": 915 }, { "epoch": 0.5498199279711885, "grad_norm": 0.0005809336435049772, "learning_rate": 0.00019860254259236596, "loss": 23.0, "step": 916 }, { "epoch": 0.5504201680672269, "grad_norm": 0.0006069474620744586, "learning_rate": 0.00019859939366937755, "loss": 23.0, "step": 917 }, { "epoch": 0.5510204081632653, "grad_norm": 0.0013985512778162956, "learning_rate": 0.0001985962412276304, "loss": 23.0, "step": 918 }, { "epoch": 0.5516206482593037, "grad_norm": 0.0009238467318937182, "learning_rate": 0.000198593085267237, "loss": 23.0, "step": 919 }, { "epoch": 0.5522208883553421, "grad_norm": 0.0011213804828003049, "learning_rate": 0.00019858992578831005, "loss": 23.0, "step": 920 }, { "epoch": 0.5528211284513805, "grad_norm": 0.0006154056172817945, "learning_rate": 0.0001985867627909622, "loss": 23.0, "step": 921 }, { "epoch": 0.553421368547419, "grad_norm": 0.000759779242798686, "learning_rate": 0.00019858359627530643, "loss": 23.0, "step": 922 }, { "epoch": 0.5540216086434574, "grad_norm": 0.0006431901128962636, "learning_rate": 0.00019858042624145566, "loss": 23.0, "step": 923 }, { "epoch": 0.5546218487394958, "grad_norm": 0.0006690253503620625, "learning_rate": 0.00019857725268952305, "loss": 23.0, "step": 924 }, { "epoch": 0.5552220888355343, "grad_norm": 0.0008324479567818344, "learning_rate": 0.0001985740756196219, "loss": 23.0, "step": 925 }, { "epoch": 0.5558223289315727, "grad_norm": 0.0006556141306646168, "learning_rate": 0.00019857089503186553, "loss": 23.0, "step": 926 }, { "epoch": 0.556422569027611, "grad_norm": 0.000579280371312052, "learning_rate": 0.00019856771092636747, "loss": 23.0, "step": 927 }, { "epoch": 0.5570228091236494, "grad_norm": 0.0006807451136410236, "learning_rate": 0.00019856452330324135, "loss": 23.0, "step": 928 }, { "epoch": 0.5576230492196879, "grad_norm": 0.0007975809858180583, "learning_rate": 0.00019856133216260096, "loss": 23.0, "step": 929 }, { "epoch": 0.5582232893157263, "grad_norm": 0.0006549794343300164, "learning_rate": 0.0001985581375045601, "loss": 23.0, "step": 930 }, { "epoch": 0.5588235294117647, "grad_norm": 0.0008131754002533853, "learning_rate": 0.00019855493932923287, "loss": 23.0, "step": 931 }, { "epoch": 0.5594237695078031, "grad_norm": 0.0004512158047873527, "learning_rate": 0.0001985517376367334, "loss": 23.0, "step": 932 }, { "epoch": 0.5600240096038416, "grad_norm": 0.0005497694946825504, "learning_rate": 0.00019854853242717589, "loss": 23.0, "step": 933 }, { "epoch": 0.56062424969988, "grad_norm": 0.0007320971344597638, "learning_rate": 0.00019854532370067472, "loss": 23.0, "step": 934 }, { "epoch": 0.5612244897959183, "grad_norm": 0.0007082552183419466, "learning_rate": 0.0001985421114573445, "loss": 23.0, "step": 935 }, { "epoch": 0.5618247298919568, "grad_norm": 0.0010231130290776491, "learning_rate": 0.00019853889569729978, "loss": 23.0, "step": 936 }, { "epoch": 0.5624249699879952, "grad_norm": 0.0009601555648259819, "learning_rate": 0.0001985356764206553, "loss": 23.0, "step": 937 }, { "epoch": 0.5630252100840336, "grad_norm": 0.0006255342741496861, "learning_rate": 0.00019853245362752605, "loss": 23.0, "step": 938 }, { "epoch": 0.563625450180072, "grad_norm": 0.0004980456433258951, "learning_rate": 0.00019852922731802695, "loss": 23.0, "step": 939 }, { "epoch": 0.5642256902761105, "grad_norm": 0.0007995213381946087, "learning_rate": 0.0001985259974922732, "loss": 23.0, "step": 940 }, { "epoch": 0.5648259303721489, "grad_norm": 0.00110067508649081, "learning_rate": 0.00019852276415038004, "loss": 23.0, "step": 941 }, { "epoch": 0.5654261704681873, "grad_norm": 0.001249002176336944, "learning_rate": 0.00019851952729246284, "loss": 23.0, "step": 942 }, { "epoch": 0.5660264105642256, "grad_norm": 0.0004925851826556027, "learning_rate": 0.00019851628691863713, "loss": 23.0, "step": 943 }, { "epoch": 0.5666266506602641, "grad_norm": 0.0005772880977019668, "learning_rate": 0.00019851304302901857, "loss": 23.0, "step": 944 }, { "epoch": 0.5672268907563025, "grad_norm": 0.0006800185656175017, "learning_rate": 0.0001985097956237229, "loss": 23.0, "step": 945 }, { "epoch": 0.5678271308523409, "grad_norm": 0.0006493727560155094, "learning_rate": 0.000198506544702866, "loss": 23.0, "step": 946 }, { "epoch": 0.5684273709483794, "grad_norm": 0.0012511987006291747, "learning_rate": 0.00019850329026656395, "loss": 23.0, "step": 947 }, { "epoch": 0.5690276110444178, "grad_norm": 0.0010138124926015735, "learning_rate": 0.00019850003231493285, "loss": 23.0, "step": 948 }, { "epoch": 0.5696278511404562, "grad_norm": 0.0007604805869050324, "learning_rate": 0.00019849677084808894, "loss": 23.0, "step": 949 }, { "epoch": 0.5702280912364946, "grad_norm": 0.0005830854061059654, "learning_rate": 0.00019849350586614866, "loss": 23.0, "step": 950 }, { "epoch": 0.570828331332533, "grad_norm": 0.0005887901061214507, "learning_rate": 0.00019849023736922853, "loss": 23.0, "step": 951 }, { "epoch": 0.5714285714285714, "grad_norm": 0.001255355542525649, "learning_rate": 0.00019848696535744513, "loss": 23.0, "step": 952 }, { "epoch": 0.5720288115246098, "grad_norm": 0.0004925208049826324, "learning_rate": 0.0001984836898309153, "loss": 23.0, "step": 953 }, { "epoch": 0.5726290516206483, "grad_norm": 0.0006673120078630745, "learning_rate": 0.0001984804107897559, "loss": 23.0, "step": 954 }, { "epoch": 0.5732292917166867, "grad_norm": 0.00044477067422121763, "learning_rate": 0.00019847712823408396, "loss": 23.0, "step": 955 }, { "epoch": 0.5738295318127251, "grad_norm": 0.0007815766730345786, "learning_rate": 0.00019847384216401665, "loss": 23.0, "step": 956 }, { "epoch": 0.5744297719087635, "grad_norm": 0.0005144941969774663, "learning_rate": 0.0001984705525796712, "loss": 23.0, "step": 957 }, { "epoch": 0.575030012004802, "grad_norm": 0.00047222990542650223, "learning_rate": 0.000198467259481165, "loss": 23.0, "step": 958 }, { "epoch": 0.5756302521008403, "grad_norm": 0.0008695271681062877, "learning_rate": 0.00019846396286861566, "loss": 23.0, "step": 959 }, { "epoch": 0.5762304921968787, "grad_norm": 0.0004319838772062212, "learning_rate": 0.00019846066274214073, "loss": 23.0, "step": 960 }, { "epoch": 0.5768307322929171, "grad_norm": 0.0007976375636644661, "learning_rate": 0.000198457359101858, "loss": 23.0, "step": 961 }, { "epoch": 0.5774309723889556, "grad_norm": 0.0004369649104773998, "learning_rate": 0.00019845405194788543, "loss": 23.0, "step": 962 }, { "epoch": 0.578031212484994, "grad_norm": 0.0010789295192807913, "learning_rate": 0.00019845074128034096, "loss": 23.0, "step": 963 }, { "epoch": 0.5786314525810324, "grad_norm": 0.00045877910451963544, "learning_rate": 0.0001984474270993428, "loss": 23.0, "step": 964 }, { "epoch": 0.5792316926770709, "grad_norm": 0.0005742406356148422, "learning_rate": 0.00019844410940500918, "loss": 23.0, "step": 965 }, { "epoch": 0.5798319327731093, "grad_norm": 0.0008709350368008018, "learning_rate": 0.00019844078819745856, "loss": 23.0, "step": 966 }, { "epoch": 0.5804321728691476, "grad_norm": 0.0005361419753171504, "learning_rate": 0.0001984374634768094, "loss": 23.0, "step": 967 }, { "epoch": 0.581032412965186, "grad_norm": 0.0003321192634757608, "learning_rate": 0.0001984341352431804, "loss": 23.0, "step": 968 }, { "epoch": 0.5816326530612245, "grad_norm": 0.0009687889833003283, "learning_rate": 0.0001984308034966903, "loss": 23.0, "step": 969 }, { "epoch": 0.5822328931572629, "grad_norm": 0.0008406118722632527, "learning_rate": 0.00019842746823745802, "loss": 23.0, "step": 970 }, { "epoch": 0.5828331332533013, "grad_norm": 0.0004772779066115618, "learning_rate": 0.00019842412946560257, "loss": 23.0, "step": 971 }, { "epoch": 0.5834333733493398, "grad_norm": 0.0014819619245827198, "learning_rate": 0.00019842078718124316, "loss": 23.0, "step": 972 }, { "epoch": 0.5840336134453782, "grad_norm": 0.001416862360201776, "learning_rate": 0.00019841744138449899, "loss": 23.0, "step": 973 }, { "epoch": 0.5846338535414166, "grad_norm": 0.0005799952778033912, "learning_rate": 0.00019841409207548951, "loss": 23.0, "step": 974 }, { "epoch": 0.5852340936374549, "grad_norm": 0.0013107365230098367, "learning_rate": 0.00019841073925433423, "loss": 23.0, "step": 975 }, { "epoch": 0.5858343337334934, "grad_norm": 0.0022494373843073845, "learning_rate": 0.0001984073829211528, "loss": 23.0, "step": 976 }, { "epoch": 0.5864345738295318, "grad_norm": 0.00047388195525854826, "learning_rate": 0.000198404023076065, "loss": 23.0, "step": 977 }, { "epoch": 0.5870348139255702, "grad_norm": 0.0004757082206197083, "learning_rate": 0.0001984006597191908, "loss": 23.0, "step": 978 }, { "epoch": 0.5876350540216086, "grad_norm": 0.0005135858664289117, "learning_rate": 0.00019839729285065012, "loss": 23.0, "step": 979 }, { "epoch": 0.5882352941176471, "grad_norm": 0.0009471832890994847, "learning_rate": 0.00019839392247056318, "loss": 23.0, "step": 980 }, { "epoch": 0.5888355342136855, "grad_norm": 0.0007709990604780614, "learning_rate": 0.00019839054857905022, "loss": 23.0, "step": 981 }, { "epoch": 0.5894357743097239, "grad_norm": 0.00041485187830403447, "learning_rate": 0.0001983871711762317, "loss": 23.0, "step": 982 }, { "epoch": 0.5900360144057623, "grad_norm": 0.0010062240762636065, "learning_rate": 0.0001983837902622281, "loss": 23.0, "step": 983 }, { "epoch": 0.5906362545018007, "grad_norm": 0.0022086831741034985, "learning_rate": 0.00019838040583716014, "loss": 23.0, "step": 984 }, { "epoch": 0.5912364945978391, "grad_norm": 0.000324814987834543, "learning_rate": 0.00019837701790114851, "loss": 23.0, "step": 985 }, { "epoch": 0.5918367346938775, "grad_norm": 0.0006208634586073458, "learning_rate": 0.0001983736264543142, "loss": 23.0, "step": 986 }, { "epoch": 0.592436974789916, "grad_norm": 0.000827543786726892, "learning_rate": 0.00019837023149677822, "loss": 23.0, "step": 987 }, { "epoch": 0.5930372148859544, "grad_norm": 0.0008125030435621738, "learning_rate": 0.0001983668330286617, "loss": 23.0, "step": 988 }, { "epoch": 0.5936374549819928, "grad_norm": 0.0006292484467849135, "learning_rate": 0.00019836343105008594, "loss": 23.0, "step": 989 }, { "epoch": 0.5942376950780313, "grad_norm": 0.0005044650752097368, "learning_rate": 0.00019836002556117234, "loss": 23.0, "step": 990 }, { "epoch": 0.5948379351740696, "grad_norm": 0.0005759747582487762, "learning_rate": 0.00019835661656204243, "loss": 23.0, "step": 991 }, { "epoch": 0.595438175270108, "grad_norm": 0.0010958920465782285, "learning_rate": 0.0001983532040528179, "loss": 23.0, "step": 992 }, { "epoch": 0.5960384153661464, "grad_norm": 0.0005467525334097445, "learning_rate": 0.0001983497880336205, "loss": 23.0, "step": 993 }, { "epoch": 0.5966386554621849, "grad_norm": 0.0010392843978479505, "learning_rate": 0.00019834636850457217, "loss": 23.0, "step": 994 }, { "epoch": 0.5972388955582233, "grad_norm": 0.0006499423179775476, "learning_rate": 0.00019834294546579488, "loss": 23.0, "step": 995 }, { "epoch": 0.5978391356542617, "grad_norm": 0.00035032519372180104, "learning_rate": 0.00019833951891741087, "loss": 23.0, "step": 996 }, { "epoch": 0.5984393757503002, "grad_norm": 0.0006970508839003742, "learning_rate": 0.00019833608885954238, "loss": 23.0, "step": 997 }, { "epoch": 0.5990396158463386, "grad_norm": 0.0005337274633347988, "learning_rate": 0.00019833265529231185, "loss": 23.0, "step": 998 }, { "epoch": 0.5996398559423769, "grad_norm": 0.0004186936712358147, "learning_rate": 0.0001983292182158418, "loss": 23.0, "step": 999 }, { "epoch": 0.6002400960384153, "grad_norm": 0.0005397711065597832, "learning_rate": 0.00019832577763025485, "loss": 23.0, "step": 1000 }, { "epoch": 0.6002400960384153, "eval_loss": 11.5, "eval_runtime": 5.4733, "eval_samples_per_second": 256.333, "eval_steps_per_second": 32.156, "step": 1000 }, { "epoch": 0.6008403361344538, "grad_norm": 0.0008261637412942946, "learning_rate": 0.00019832233353567381, "loss": 23.0, "step": 1001 }, { "epoch": 0.6014405762304922, "grad_norm": 0.00048322940710932016, "learning_rate": 0.00019831888593222163, "loss": 23.0, "step": 1002 }, { "epoch": 0.6020408163265306, "grad_norm": 0.0007284404709935188, "learning_rate": 0.0001983154348200213, "loss": 23.0, "step": 1003 }, { "epoch": 0.602641056422569, "grad_norm": 0.0010770062217488885, "learning_rate": 0.00019831198019919604, "loss": 23.0, "step": 1004 }, { "epoch": 0.6032412965186075, "grad_norm": 0.000601259816903621, "learning_rate": 0.00019830852206986907, "loss": 23.0, "step": 1005 }, { "epoch": 0.6038415366146459, "grad_norm": 0.0005424935952760279, "learning_rate": 0.00019830506043216382, "loss": 23.0, "step": 1006 }, { "epoch": 0.6044417767106842, "grad_norm": 0.0010248576290905476, "learning_rate": 0.00019830159528620383, "loss": 23.0, "step": 1007 }, { "epoch": 0.6050420168067226, "grad_norm": 0.0003622387012001127, "learning_rate": 0.00019829812663211275, "loss": 23.0, "step": 1008 }, { "epoch": 0.6056422569027611, "grad_norm": 0.0007084689568728209, "learning_rate": 0.00019829465447001443, "loss": 23.0, "step": 1009 }, { "epoch": 0.6062424969987995, "grad_norm": 0.0012313174083828926, "learning_rate": 0.00019829117880003268, "loss": 23.0, "step": 1010 }, { "epoch": 0.6068427370948379, "grad_norm": 0.0007114025647751987, "learning_rate": 0.00019828769962229164, "loss": 23.0, "step": 1011 }, { "epoch": 0.6074429771908764, "grad_norm": 0.0008022873662412167, "learning_rate": 0.0001982842169369154, "loss": 23.0, "step": 1012 }, { "epoch": 0.6080432172869148, "grad_norm": 0.00036245142109692097, "learning_rate": 0.0001982807307440283, "loss": 23.0, "step": 1013 }, { "epoch": 0.6086434573829532, "grad_norm": 0.0004387908265925944, "learning_rate": 0.00019827724104375474, "loss": 23.0, "step": 1014 }, { "epoch": 0.6092436974789915, "grad_norm": 0.0004411818808875978, "learning_rate": 0.0001982737478362192, "loss": 23.0, "step": 1015 }, { "epoch": 0.60984393757503, "grad_norm": 0.0005459621315822005, "learning_rate": 0.0001982702511215464, "loss": 23.0, "step": 1016 }, { "epoch": 0.6104441776710684, "grad_norm": 0.0004670721828006208, "learning_rate": 0.00019826675089986115, "loss": 23.0, "step": 1017 }, { "epoch": 0.6110444177671068, "grad_norm": 0.0008358248160220683, "learning_rate": 0.00019826324717128833, "loss": 23.0, "step": 1018 }, { "epoch": 0.6116446578631453, "grad_norm": 0.0003749076568055898, "learning_rate": 0.00019825973993595295, "loss": 23.0, "step": 1019 }, { "epoch": 0.6122448979591837, "grad_norm": 0.0014997563557699323, "learning_rate": 0.0001982562291939802, "loss": 23.0, "step": 1020 }, { "epoch": 0.6128451380552221, "grad_norm": 0.0007296651601791382, "learning_rate": 0.0001982527149454954, "loss": 23.0, "step": 1021 }, { "epoch": 0.6134453781512605, "grad_norm": 0.0010369520168751478, "learning_rate": 0.00019824919719062397, "loss": 23.0, "step": 1022 }, { "epoch": 0.614045618247299, "grad_norm": 0.0006567711825482547, "learning_rate": 0.00019824567592949136, "loss": 23.0, "step": 1023 }, { "epoch": 0.6146458583433373, "grad_norm": 0.0005550686037167907, "learning_rate": 0.0001982421511622233, "loss": 23.0, "step": 1024 }, { "epoch": 0.6152460984393757, "grad_norm": 0.0005852804169990122, "learning_rate": 0.0001982386228889456, "loss": 23.0, "step": 1025 }, { "epoch": 0.6158463385354142, "grad_norm": 0.0011896343203261495, "learning_rate": 0.00019823509110978416, "loss": 23.0, "step": 1026 }, { "epoch": 0.6164465786314526, "grad_norm": 0.00104072829708457, "learning_rate": 0.000198231555824865, "loss": 23.0, "step": 1027 }, { "epoch": 0.617046818727491, "grad_norm": 0.0006339827086776495, "learning_rate": 0.00019822801703431425, "loss": 23.0, "step": 1028 }, { "epoch": 0.6176470588235294, "grad_norm": 0.0007693166844546795, "learning_rate": 0.00019822447473825833, "loss": 23.0, "step": 1029 }, { "epoch": 0.6182472989195679, "grad_norm": 0.0009480812004767358, "learning_rate": 0.0001982209289368235, "loss": 23.0, "step": 1030 }, { "epoch": 0.6188475390156063, "grad_norm": 0.0013159072259441018, "learning_rate": 0.00019821737963013637, "loss": 23.0, "step": 1031 }, { "epoch": 0.6194477791116446, "grad_norm": 0.0008248360245488584, "learning_rate": 0.00019821382681832367, "loss": 23.0, "step": 1032 }, { "epoch": 0.620048019207683, "grad_norm": 0.0014143993612378836, "learning_rate": 0.00019821027050151205, "loss": 23.0, "step": 1033 }, { "epoch": 0.6206482593037215, "grad_norm": 0.0009804586879909039, "learning_rate": 0.00019820671067982855, "loss": 23.0, "step": 1034 }, { "epoch": 0.6212484993997599, "grad_norm": 0.0003080831083934754, "learning_rate": 0.00019820314735340017, "loss": 23.0, "step": 1035 }, { "epoch": 0.6218487394957983, "grad_norm": 0.000753725238610059, "learning_rate": 0.00019819958052235404, "loss": 23.0, "step": 1036 }, { "epoch": 0.6224489795918368, "grad_norm": 0.0003446524788159877, "learning_rate": 0.0001981960101868175, "loss": 23.0, "step": 1037 }, { "epoch": 0.6230492196878752, "grad_norm": 0.001190772163681686, "learning_rate": 0.0001981924363469179, "loss": 23.0, "step": 1038 }, { "epoch": 0.6236494597839136, "grad_norm": 0.0011959177209064364, "learning_rate": 0.00019818885900278288, "loss": 23.0, "step": 1039 }, { "epoch": 0.6242496998799519, "grad_norm": 0.0007084035314619541, "learning_rate": 0.00019818527815454004, "loss": 23.0, "step": 1040 }, { "epoch": 0.6248499399759904, "grad_norm": 0.0007333476096391678, "learning_rate": 0.00019818169380231718, "loss": 23.0, "step": 1041 }, { "epoch": 0.6254501800720288, "grad_norm": 0.0005077448440715671, "learning_rate": 0.00019817810594624223, "loss": 23.0, "step": 1042 }, { "epoch": 0.6260504201680672, "grad_norm": 0.001495958655141294, "learning_rate": 0.00019817451458644322, "loss": 23.0, "step": 1043 }, { "epoch": 0.6266506602641057, "grad_norm": 0.0008460984681732953, "learning_rate": 0.0001981709197230483, "loss": 23.0, "step": 1044 }, { "epoch": 0.6272509003601441, "grad_norm": 0.0012502010213211179, "learning_rate": 0.00019816732135618582, "loss": 23.0, "step": 1045 }, { "epoch": 0.6278511404561825, "grad_norm": 0.0005108893965370953, "learning_rate": 0.00019816371948598414, "loss": 23.0, "step": 1046 }, { "epoch": 0.6284513805522209, "grad_norm": 0.0008723620558157563, "learning_rate": 0.0001981601141125718, "loss": 23.0, "step": 1047 }, { "epoch": 0.6290516206482593, "grad_norm": 0.00039634722634218633, "learning_rate": 0.0001981565052360775, "loss": 23.0, "step": 1048 }, { "epoch": 0.6296518607442977, "grad_norm": 0.0007444315706379712, "learning_rate": 0.00019815289285663004, "loss": 23.0, "step": 1049 }, { "epoch": 0.6302521008403361, "grad_norm": 0.0008287147502414882, "learning_rate": 0.00019814927697435827, "loss": 23.0, "step": 1050 }, { "epoch": 0.6308523409363745, "grad_norm": 0.0009593195863999426, "learning_rate": 0.00019814565758939133, "loss": 23.0, "step": 1051 }, { "epoch": 0.631452581032413, "grad_norm": 0.0005941663403064013, "learning_rate": 0.00019814203470185828, "loss": 23.0, "step": 1052 }, { "epoch": 0.6320528211284514, "grad_norm": 0.0003965641662944108, "learning_rate": 0.00019813840831188847, "loss": 23.0, "step": 1053 }, { "epoch": 0.6326530612244898, "grad_norm": 0.0006090254755690694, "learning_rate": 0.00019813477841961133, "loss": 23.0, "step": 1054 }, { "epoch": 0.6332533013205283, "grad_norm": 0.0005143271991983056, "learning_rate": 0.00019813114502515634, "loss": 23.0, "step": 1055 }, { "epoch": 0.6338535414165666, "grad_norm": 0.0008223920594900846, "learning_rate": 0.00019812750812865326, "loss": 23.0, "step": 1056 }, { "epoch": 0.634453781512605, "grad_norm": 0.0006902985624037683, "learning_rate": 0.00019812386773023183, "loss": 23.0, "step": 1057 }, { "epoch": 0.6350540216086434, "grad_norm": 0.0007466610404662788, "learning_rate": 0.00019812022383002193, "loss": 23.0, "step": 1058 }, { "epoch": 0.6356542617046819, "grad_norm": 0.0008086318848654628, "learning_rate": 0.00019811657642815363, "loss": 23.0, "step": 1059 }, { "epoch": 0.6362545018007203, "grad_norm": 0.0014404834946617484, "learning_rate": 0.00019811292552475714, "loss": 23.0, "step": 1060 }, { "epoch": 0.6368547418967587, "grad_norm": 0.00042763081728480756, "learning_rate": 0.00019810927111996269, "loss": 23.0, "step": 1061 }, { "epoch": 0.6374549819927972, "grad_norm": 0.0004761995223816484, "learning_rate": 0.00019810561321390073, "loss": 23.0, "step": 1062 }, { "epoch": 0.6380552220888356, "grad_norm": 0.0008606476476415992, "learning_rate": 0.0001981019518067018, "loss": 23.0, "step": 1063 }, { "epoch": 0.6386554621848739, "grad_norm": 0.0010334079852327704, "learning_rate": 0.00019809828689849654, "loss": 23.0, "step": 1064 }, { "epoch": 0.6392557022809123, "grad_norm": 0.0007184183341450989, "learning_rate": 0.00019809461848941577, "loss": 23.0, "step": 1065 }, { "epoch": 0.6398559423769508, "grad_norm": 0.0006008398486301303, "learning_rate": 0.0001980909465795904, "loss": 23.0, "step": 1066 }, { "epoch": 0.6404561824729892, "grad_norm": 0.0009349811589345336, "learning_rate": 0.00019808727116915145, "loss": 23.0, "step": 1067 }, { "epoch": 0.6410564225690276, "grad_norm": 0.001079434179700911, "learning_rate": 0.0001980835922582301, "loss": 23.0, "step": 1068 }, { "epoch": 0.641656662665066, "grad_norm": 0.00046767835738137364, "learning_rate": 0.0001980799098469577, "loss": 23.0, "step": 1069 }, { "epoch": 0.6422569027611045, "grad_norm": 0.0007817531586624682, "learning_rate": 0.0001980762239354655, "loss": 23.0, "step": 1070 }, { "epoch": 0.6428571428571429, "grad_norm": 0.0009396449313499033, "learning_rate": 0.00019807253452388526, "loss": 23.0, "step": 1071 }, { "epoch": 0.6434573829531812, "grad_norm": 0.0006175727467052639, "learning_rate": 0.0001980688416123485, "loss": 23.0, "step": 1072 }, { "epoch": 0.6440576230492197, "grad_norm": 0.00031634268816560507, "learning_rate": 0.000198065145200987, "loss": 23.0, "step": 1073 }, { "epoch": 0.6446578631452581, "grad_norm": 0.00112092902418226, "learning_rate": 0.00019806144528993277, "loss": 23.0, "step": 1074 }, { "epoch": 0.6452581032412965, "grad_norm": 0.0004822269838768989, "learning_rate": 0.00019805774187931782, "loss": 23.0, "step": 1075 }, { "epoch": 0.6458583433373349, "grad_norm": 0.0007649666513316333, "learning_rate": 0.00019805403496927425, "loss": 23.0, "step": 1076 }, { "epoch": 0.6464585834333734, "grad_norm": 0.0004432103887666017, "learning_rate": 0.00019805032455993442, "loss": 23.0, "step": 1077 }, { "epoch": 0.6470588235294118, "grad_norm": 0.0005666416836902499, "learning_rate": 0.00019804661065143071, "loss": 23.0, "step": 1078 }, { "epoch": 0.6476590636254502, "grad_norm": 0.0007401452166959643, "learning_rate": 0.00019804289324389568, "loss": 23.0, "step": 1079 }, { "epoch": 0.6482593037214885, "grad_norm": 0.0005741413333453238, "learning_rate": 0.000198039172337462, "loss": 23.0, "step": 1080 }, { "epoch": 0.648859543817527, "grad_norm": 0.001217698329128325, "learning_rate": 0.0001980354479322624, "loss": 23.0, "step": 1081 }, { "epoch": 0.6494597839135654, "grad_norm": 0.0010777065763249993, "learning_rate": 0.00019803172002842988, "loss": 23.0, "step": 1082 }, { "epoch": 0.6500600240096038, "grad_norm": 0.0005868837470188737, "learning_rate": 0.00019802798862609744, "loss": 23.0, "step": 1083 }, { "epoch": 0.6506602641056423, "grad_norm": 0.0004865597002208233, "learning_rate": 0.00019802425372539824, "loss": 23.0, "step": 1084 }, { "epoch": 0.6512605042016807, "grad_norm": 0.0012064909096807241, "learning_rate": 0.00019802051532646556, "loss": 23.0, "step": 1085 }, { "epoch": 0.6518607442977191, "grad_norm": 0.000426503480412066, "learning_rate": 0.00019801677342943283, "loss": 23.0, "step": 1086 }, { "epoch": 0.6524609843937575, "grad_norm": 0.0006327254232019186, "learning_rate": 0.0001980130280344336, "loss": 23.0, "step": 1087 }, { "epoch": 0.6530612244897959, "grad_norm": 0.0007822368061169982, "learning_rate": 0.0001980092791416015, "loss": 23.0, "step": 1088 }, { "epoch": 0.6536614645858343, "grad_norm": 0.0005497146630659699, "learning_rate": 0.00019800552675107035, "loss": 23.0, "step": 1089 }, { "epoch": 0.6542617046818727, "grad_norm": 0.0010286032920703292, "learning_rate": 0.00019800177086297404, "loss": 23.0, "step": 1090 }, { "epoch": 0.6548619447779112, "grad_norm": 0.0011467989534139633, "learning_rate": 0.00019799801147744664, "loss": 23.0, "step": 1091 }, { "epoch": 0.6554621848739496, "grad_norm": 0.0007217766833491623, "learning_rate": 0.00019799424859462227, "loss": 23.0, "step": 1092 }, { "epoch": 0.656062424969988, "grad_norm": 0.0006906822673045099, "learning_rate": 0.00019799048221463524, "loss": 23.0, "step": 1093 }, { "epoch": 0.6566626650660264, "grad_norm": 0.0004381495527923107, "learning_rate": 0.00019798671233761997, "loss": 23.0, "step": 1094 }, { "epoch": 0.6572629051620649, "grad_norm": 0.000589704723097384, "learning_rate": 0.00019798293896371099, "loss": 23.0, "step": 1095 }, { "epoch": 0.6578631452581032, "grad_norm": 0.0009171977289952338, "learning_rate": 0.00019797916209304294, "loss": 23.0, "step": 1096 }, { "epoch": 0.6584633853541416, "grad_norm": 0.000955081544816494, "learning_rate": 0.00019797538172575067, "loss": 23.0, "step": 1097 }, { "epoch": 0.65906362545018, "grad_norm": 0.000602062267716974, "learning_rate": 0.00019797159786196902, "loss": 23.0, "step": 1098 }, { "epoch": 0.6596638655462185, "grad_norm": 0.0008331533172167838, "learning_rate": 0.00019796781050183306, "loss": 23.0, "step": 1099 }, { "epoch": 0.6602641056422569, "grad_norm": 0.0007971031591296196, "learning_rate": 0.00019796401964547794, "loss": 23.0, "step": 1100 }, { "epoch": 0.6608643457382953, "grad_norm": 0.0007683509029448032, "learning_rate": 0.00019796022529303895, "loss": 23.0, "step": 1101 }, { "epoch": 0.6614645858343338, "grad_norm": 0.0004775487759616226, "learning_rate": 0.00019795642744465153, "loss": 23.0, "step": 1102 }, { "epoch": 0.6620648259303722, "grad_norm": 0.0012434301897883415, "learning_rate": 0.00019795262610045118, "loss": 23.0, "step": 1103 }, { "epoch": 0.6626650660264105, "grad_norm": 0.0016068033874034882, "learning_rate": 0.00019794882126057353, "loss": 23.0, "step": 1104 }, { "epoch": 0.6632653061224489, "grad_norm": 0.0007158942753449082, "learning_rate": 0.00019794501292515443, "loss": 23.0, "step": 1105 }, { "epoch": 0.6638655462184874, "grad_norm": 0.0007577801588922739, "learning_rate": 0.00019794120109432978, "loss": 23.0, "step": 1106 }, { "epoch": 0.6644657863145258, "grad_norm": 0.0005971409264020622, "learning_rate": 0.00019793738576823558, "loss": 23.0, "step": 1107 }, { "epoch": 0.6650660264105642, "grad_norm": 0.0010119474027305841, "learning_rate": 0.00019793356694700802, "loss": 23.0, "step": 1108 }, { "epoch": 0.6656662665066027, "grad_norm": 0.0004232526698615402, "learning_rate": 0.00019792974463078337, "loss": 23.0, "step": 1109 }, { "epoch": 0.6662665066026411, "grad_norm": 0.0006399360718205571, "learning_rate": 0.00019792591881969803, "loss": 23.0, "step": 1110 }, { "epoch": 0.6668667466986795, "grad_norm": 0.0005233879201114178, "learning_rate": 0.0001979220895138885, "loss": 23.0, "step": 1111 }, { "epoch": 0.6674669867947179, "grad_norm": 0.0009274845360778272, "learning_rate": 0.00019791825671349153, "loss": 23.0, "step": 1112 }, { "epoch": 0.6680672268907563, "grad_norm": 0.00030104804318398237, "learning_rate": 0.00019791442041864387, "loss": 23.0, "step": 1113 }, { "epoch": 0.6686674669867947, "grad_norm": 0.0008154875249601901, "learning_rate": 0.0001979105806294824, "loss": 23.0, "step": 1114 }, { "epoch": 0.6692677070828331, "grad_norm": 0.0012951076496392488, "learning_rate": 0.00019790673734614413, "loss": 23.0, "step": 1115 }, { "epoch": 0.6698679471788715, "grad_norm": 0.0006664313841611147, "learning_rate": 0.00019790289056876626, "loss": 23.0, "step": 1116 }, { "epoch": 0.67046818727491, "grad_norm": 0.0006468769279308617, "learning_rate": 0.0001978990402974861, "loss": 23.0, "step": 1117 }, { "epoch": 0.6710684273709484, "grad_norm": 0.0006649359129369259, "learning_rate": 0.00019789518653244098, "loss": 23.0, "step": 1118 }, { "epoch": 0.6716686674669868, "grad_norm": 0.0008164861355908215, "learning_rate": 0.00019789132927376847, "loss": 23.0, "step": 1119 }, { "epoch": 0.6722689075630253, "grad_norm": 0.0004112444876227528, "learning_rate": 0.00019788746852160625, "loss": 23.0, "step": 1120 }, { "epoch": 0.6728691476590636, "grad_norm": 0.0005869813612662256, "learning_rate": 0.00019788360427609204, "loss": 23.0, "step": 1121 }, { "epoch": 0.673469387755102, "grad_norm": 0.0013158872025087476, "learning_rate": 0.00019787973653736382, "loss": 23.0, "step": 1122 }, { "epoch": 0.6740696278511404, "grad_norm": 0.0008600007859058678, "learning_rate": 0.00019787586530555957, "loss": 23.0, "step": 1123 }, { "epoch": 0.6746698679471789, "grad_norm": 0.0008138124831020832, "learning_rate": 0.00019787199058081745, "loss": 23.0, "step": 1124 }, { "epoch": 0.6752701080432173, "grad_norm": 0.0006357624079100788, "learning_rate": 0.00019786811236327574, "loss": 23.0, "step": 1125 }, { "epoch": 0.6758703481392557, "grad_norm": 0.0011706652585417032, "learning_rate": 0.00019786423065307283, "loss": 23.0, "step": 1126 }, { "epoch": 0.6764705882352942, "grad_norm": 0.0005258683231659234, "learning_rate": 0.0001978603454503473, "loss": 23.0, "step": 1127 }, { "epoch": 0.6770708283313326, "grad_norm": 0.0005510378978215158, "learning_rate": 0.00019785645675523774, "loss": 23.0, "step": 1128 }, { "epoch": 0.6776710684273709, "grad_norm": 0.000846723560243845, "learning_rate": 0.00019785256456788294, "loss": 23.0, "step": 1129 }, { "epoch": 0.6782713085234093, "grad_norm": 0.001075017498806119, "learning_rate": 0.00019784866888842187, "loss": 23.0, "step": 1130 }, { "epoch": 0.6788715486194478, "grad_norm": 0.0012707427376881242, "learning_rate": 0.00019784476971699346, "loss": 23.0, "step": 1131 }, { "epoch": 0.6794717887154862, "grad_norm": 0.0005327064427547157, "learning_rate": 0.00019784086705373693, "loss": 23.0, "step": 1132 }, { "epoch": 0.6800720288115246, "grad_norm": 0.0002214109554188326, "learning_rate": 0.0001978369608987915, "loss": 23.0, "step": 1133 }, { "epoch": 0.680672268907563, "grad_norm": 0.0012937859864905477, "learning_rate": 0.00019783305125229667, "loss": 23.0, "step": 1134 }, { "epoch": 0.6812725090036015, "grad_norm": 0.0005032792105339468, "learning_rate": 0.00019782913811439184, "loss": 23.0, "step": 1135 }, { "epoch": 0.6818727490996399, "grad_norm": 0.0009746074792928994, "learning_rate": 0.00019782522148521672, "loss": 23.0, "step": 1136 }, { "epoch": 0.6824729891956782, "grad_norm": 0.0006890950025990605, "learning_rate": 0.00019782130136491107, "loss": 23.0, "step": 1137 }, { "epoch": 0.6830732292917167, "grad_norm": 0.0012513356050476432, "learning_rate": 0.00019781737775361485, "loss": 23.0, "step": 1138 }, { "epoch": 0.6836734693877551, "grad_norm": 0.000591045303735882, "learning_rate": 0.000197813450651468, "loss": 23.0, "step": 1139 }, { "epoch": 0.6842737094837935, "grad_norm": 0.0003968993842136115, "learning_rate": 0.00019780952005861072, "loss": 23.0, "step": 1140 }, { "epoch": 0.6848739495798319, "grad_norm": 0.0014705831417813897, "learning_rate": 0.00019780558597518323, "loss": 23.0, "step": 1141 }, { "epoch": 0.6854741896758704, "grad_norm": 0.0006096931174397469, "learning_rate": 0.00019780164840132598, "loss": 23.0, "step": 1142 }, { "epoch": 0.6860744297719088, "grad_norm": 0.0011487213196232915, "learning_rate": 0.0001977977073371795, "loss": 23.0, "step": 1143 }, { "epoch": 0.6866746698679472, "grad_norm": 0.0006493202527053654, "learning_rate": 0.00019779376278288436, "loss": 23.0, "step": 1144 }, { "epoch": 0.6872749099639855, "grad_norm": 0.00033152487594634295, "learning_rate": 0.00019778981473858142, "loss": 23.0, "step": 1145 }, { "epoch": 0.687875150060024, "grad_norm": 0.0007980447262525558, "learning_rate": 0.00019778586320441153, "loss": 23.0, "step": 1146 }, { "epoch": 0.6884753901560624, "grad_norm": 0.000773175444919616, "learning_rate": 0.00019778190818051572, "loss": 23.0, "step": 1147 }, { "epoch": 0.6890756302521008, "grad_norm": 0.000656657968647778, "learning_rate": 0.00019777794966703514, "loss": 23.0, "step": 1148 }, { "epoch": 0.6896758703481393, "grad_norm": 0.0007515581091865897, "learning_rate": 0.00019777398766411105, "loss": 23.0, "step": 1149 }, { "epoch": 0.6902761104441777, "grad_norm": 0.000665062281768769, "learning_rate": 0.00019777002217188482, "loss": 23.0, "step": 1150 }, { "epoch": 0.6908763505402161, "grad_norm": 0.0005964938900433481, "learning_rate": 0.000197766053190498, "loss": 23.0, "step": 1151 }, { "epoch": 0.6914765906362546, "grad_norm": 0.0011970044579356909, "learning_rate": 0.00019776208072009227, "loss": 23.0, "step": 1152 }, { "epoch": 0.6920768307322929, "grad_norm": 0.0008634612313471735, "learning_rate": 0.0001977581047608093, "loss": 23.0, "step": 1153 }, { "epoch": 0.6926770708283313, "grad_norm": 0.0006760912365280092, "learning_rate": 0.00019775412531279106, "loss": 23.0, "step": 1154 }, { "epoch": 0.6932773109243697, "grad_norm": 0.000713443267159164, "learning_rate": 0.0001977501423761795, "loss": 23.0, "step": 1155 }, { "epoch": 0.6938775510204082, "grad_norm": 0.0007609796593897045, "learning_rate": 0.00019774615595111685, "loss": 23.0, "step": 1156 }, { "epoch": 0.6944777911164466, "grad_norm": 0.0005407094722613692, "learning_rate": 0.00019774216603774534, "loss": 23.0, "step": 1157 }, { "epoch": 0.695078031212485, "grad_norm": 0.0019383853068575263, "learning_rate": 0.00019773817263620731, "loss": 23.0, "step": 1158 }, { "epoch": 0.6956782713085234, "grad_norm": 0.0008223625482060015, "learning_rate": 0.00019773417574664532, "loss": 23.0, "step": 1159 }, { "epoch": 0.6962785114045619, "grad_norm": 0.00045506638707593083, "learning_rate": 0.000197730175369202, "loss": 23.0, "step": 1160 }, { "epoch": 0.6968787515006002, "grad_norm": 0.001282287179492414, "learning_rate": 0.00019772617150402012, "loss": 23.0, "step": 1161 }, { "epoch": 0.6974789915966386, "grad_norm": 0.0006550766411237419, "learning_rate": 0.00019772216415124257, "loss": 23.0, "step": 1162 }, { "epoch": 0.698079231692677, "grad_norm": 0.0007768716313876212, "learning_rate": 0.00019771815331101232, "loss": 23.0, "step": 1163 }, { "epoch": 0.6986794717887155, "grad_norm": 0.0004817126027774066, "learning_rate": 0.00019771413898347256, "loss": 23.0, "step": 1164 }, { "epoch": 0.6992797118847539, "grad_norm": 0.000473877094918862, "learning_rate": 0.0001977101211687665, "loss": 23.0, "step": 1165 }, { "epoch": 0.6998799519807923, "grad_norm": 0.0005641740863211453, "learning_rate": 0.00019770609986703762, "loss": 23.0, "step": 1166 }, { "epoch": 0.7004801920768308, "grad_norm": 0.0006742041441611946, "learning_rate": 0.0001977020750784293, "loss": 23.0, "step": 1167 }, { "epoch": 0.7010804321728692, "grad_norm": 0.0004474896704778075, "learning_rate": 0.0001976980468030853, "loss": 23.0, "step": 1168 }, { "epoch": 0.7016806722689075, "grad_norm": 0.0007633958011865616, "learning_rate": 0.00019769401504114926, "loss": 23.0, "step": 1169 }, { "epoch": 0.7022809123649459, "grad_norm": 0.0009618657059036195, "learning_rate": 0.00019768997979276515, "loss": 23.0, "step": 1170 }, { "epoch": 0.7028811524609844, "grad_norm": 0.0003534193674568087, "learning_rate": 0.00019768594105807697, "loss": 23.0, "step": 1171 }, { "epoch": 0.7034813925570228, "grad_norm": 0.0012746489373967052, "learning_rate": 0.0001976818988372288, "loss": 23.0, "step": 1172 }, { "epoch": 0.7040816326530612, "grad_norm": 0.0004915861063636839, "learning_rate": 0.00019767785313036496, "loss": 23.0, "step": 1173 }, { "epoch": 0.7046818727490997, "grad_norm": 0.001498072873800993, "learning_rate": 0.00019767380393762978, "loss": 23.0, "step": 1174 }, { "epoch": 0.7052821128451381, "grad_norm": 0.000703103025443852, "learning_rate": 0.00019766975125916778, "loss": 23.0, "step": 1175 }, { "epoch": 0.7058823529411765, "grad_norm": 0.0019263819558545947, "learning_rate": 0.00019766569509512358, "loss": 23.0, "step": 1176 }, { "epoch": 0.7064825930372148, "grad_norm": 0.0006543250638060272, "learning_rate": 0.00019766163544564198, "loss": 23.0, "step": 1177 }, { "epoch": 0.7070828331332533, "grad_norm": 0.0014025021810084581, "learning_rate": 0.00019765757231086784, "loss": 23.0, "step": 1178 }, { "epoch": 0.7076830732292917, "grad_norm": 0.0005559175042435527, "learning_rate": 0.0001976535056909461, "loss": 23.0, "step": 1179 }, { "epoch": 0.7082833133253301, "grad_norm": 0.0010201229015365243, "learning_rate": 0.00019764943558602197, "loss": 23.0, "step": 1180 }, { "epoch": 0.7088835534213686, "grad_norm": 0.0008555446984246373, "learning_rate": 0.00019764536199624066, "loss": 23.0, "step": 1181 }, { "epoch": 0.709483793517407, "grad_norm": 0.0009428702760487795, "learning_rate": 0.00019764128492174755, "loss": 23.0, "step": 1182 }, { "epoch": 0.7100840336134454, "grad_norm": 0.0007122241659089923, "learning_rate": 0.00019763720436268814, "loss": 23.0, "step": 1183 }, { "epoch": 0.7106842737094838, "grad_norm": 0.0006242154049687088, "learning_rate": 0.00019763312031920806, "loss": 23.0, "step": 1184 }, { "epoch": 0.7112845138055222, "grad_norm": 0.0005786659312434494, "learning_rate": 0.0001976290327914531, "loss": 23.0, "step": 1185 }, { "epoch": 0.7118847539015606, "grad_norm": 0.0006534986314363778, "learning_rate": 0.00019762494177956902, "loss": 23.0, "step": 1186 }, { "epoch": 0.712484993997599, "grad_norm": 0.0011145776370540261, "learning_rate": 0.00019762084728370193, "loss": 23.0, "step": 1187 }, { "epoch": 0.7130852340936374, "grad_norm": 0.0005813301540911198, "learning_rate": 0.0001976167493039979, "loss": 23.0, "step": 1188 }, { "epoch": 0.7136854741896759, "grad_norm": 0.0007640339899808168, "learning_rate": 0.00019761264784060322, "loss": 23.0, "step": 1189 }, { "epoch": 0.7142857142857143, "grad_norm": 0.0006791938212700188, "learning_rate": 0.00019760854289366418, "loss": 23.0, "step": 1190 }, { "epoch": 0.7148859543817527, "grad_norm": 0.0006645294488407671, "learning_rate": 0.00019760443446332734, "loss": 23.0, "step": 1191 }, { "epoch": 0.7154861944777912, "grad_norm": 0.000585117086302489, "learning_rate": 0.00019760032254973934, "loss": 23.0, "step": 1192 }, { "epoch": 0.7160864345738295, "grad_norm": 0.0005612806417047977, "learning_rate": 0.00019759620715304683, "loss": 23.0, "step": 1193 }, { "epoch": 0.7166866746698679, "grad_norm": 0.0006190498825162649, "learning_rate": 0.00019759208827339677, "loss": 23.0, "step": 1194 }, { "epoch": 0.7172869147659063, "grad_norm": 0.0007879780023358762, "learning_rate": 0.00019758796591093608, "loss": 23.0, "step": 1195 }, { "epoch": 0.7178871548619448, "grad_norm": 0.0006741132820025086, "learning_rate": 0.00019758384006581195, "loss": 23.0, "step": 1196 }, { "epoch": 0.7184873949579832, "grad_norm": 0.0004388502275105566, "learning_rate": 0.00019757971073817158, "loss": 23.0, "step": 1197 }, { "epoch": 0.7190876350540216, "grad_norm": 0.0004376680008135736, "learning_rate": 0.00019757557792816232, "loss": 23.0, "step": 1198 }, { "epoch": 0.71968787515006, "grad_norm": 0.0008469756576232612, "learning_rate": 0.0001975714416359317, "loss": 23.0, "step": 1199 }, { "epoch": 0.7202881152460985, "grad_norm": 0.0006075134151615202, "learning_rate": 0.0001975673018616273, "loss": 23.0, "step": 1200 }, { "epoch": 0.7208883553421368, "grad_norm": 0.0010701941791921854, "learning_rate": 0.00019756315860539687, "loss": 23.0, "step": 1201 }, { "epoch": 0.7214885954381752, "grad_norm": 0.0008353521698154509, "learning_rate": 0.00019755901186738827, "loss": 23.0, "step": 1202 }, { "epoch": 0.7220888355342137, "grad_norm": 0.00043628833373077214, "learning_rate": 0.00019755486164774947, "loss": 23.0, "step": 1203 }, { "epoch": 0.7226890756302521, "grad_norm": 0.0015184413641691208, "learning_rate": 0.00019755070794662865, "loss": 23.0, "step": 1204 }, { "epoch": 0.7232893157262905, "grad_norm": 0.0011512304190546274, "learning_rate": 0.00019754655076417396, "loss": 23.0, "step": 1205 }, { "epoch": 0.723889555822329, "grad_norm": 0.0009428293560631573, "learning_rate": 0.00019754239010053376, "loss": 23.0, "step": 1206 }, { "epoch": 0.7244897959183674, "grad_norm": 0.0006190779386088252, "learning_rate": 0.00019753822595585662, "loss": 23.0, "step": 1207 }, { "epoch": 0.7250900360144058, "grad_norm": 0.0004119576478842646, "learning_rate": 0.0001975340583302911, "loss": 23.0, "step": 1208 }, { "epoch": 0.7256902761104442, "grad_norm": 0.0005930104525759816, "learning_rate": 0.00019752988722398587, "loss": 23.0, "step": 1209 }, { "epoch": 0.7262905162064826, "grad_norm": 0.0004418524040374905, "learning_rate": 0.00019752571263708987, "loss": 23.0, "step": 1210 }, { "epoch": 0.726890756302521, "grad_norm": 0.0010499389609321952, "learning_rate": 0.00019752153456975208, "loss": 23.0, "step": 1211 }, { "epoch": 0.7274909963985594, "grad_norm": 0.0011717370944097638, "learning_rate": 0.00019751735302212154, "loss": 23.0, "step": 1212 }, { "epoch": 0.7280912364945978, "grad_norm": 0.00043891393579542637, "learning_rate": 0.0001975131679943475, "loss": 23.0, "step": 1213 }, { "epoch": 0.7286914765906363, "grad_norm": 0.0005633426480926573, "learning_rate": 0.00019750897948657938, "loss": 23.0, "step": 1214 }, { "epoch": 0.7292917166866747, "grad_norm": 0.0004441675846464932, "learning_rate": 0.00019750478749896657, "loss": 23.0, "step": 1215 }, { "epoch": 0.7298919567827131, "grad_norm": 0.0012788623571395874, "learning_rate": 0.00019750059203165873, "loss": 23.0, "step": 1216 }, { "epoch": 0.7304921968787516, "grad_norm": 0.0004025893867947161, "learning_rate": 0.00019749639308480553, "loss": 23.0, "step": 1217 }, { "epoch": 0.7310924369747899, "grad_norm": 0.000841877656057477, "learning_rate": 0.00019749219065855688, "loss": 23.0, "step": 1218 }, { "epoch": 0.7316926770708283, "grad_norm": 0.0007755985716357827, "learning_rate": 0.00019748798475306272, "loss": 23.0, "step": 1219 }, { "epoch": 0.7322929171668667, "grad_norm": 0.0006181802018545568, "learning_rate": 0.00019748377536847316, "loss": 23.0, "step": 1220 }, { "epoch": 0.7328931572629052, "grad_norm": 0.0007388837984763086, "learning_rate": 0.0001974795625049384, "loss": 23.0, "step": 1221 }, { "epoch": 0.7334933973589436, "grad_norm": 0.0009291544556617737, "learning_rate": 0.0001974753461626088, "loss": 23.0, "step": 1222 }, { "epoch": 0.734093637454982, "grad_norm": 0.00043915770947933197, "learning_rate": 0.00019747112634163486, "loss": 23.0, "step": 1223 }, { "epoch": 0.7346938775510204, "grad_norm": 0.002203888026997447, "learning_rate": 0.0001974669030421671, "loss": 23.0, "step": 1224 }, { "epoch": 0.7352941176470589, "grad_norm": 0.0006513252155855298, "learning_rate": 0.0001974626762643563, "loss": 23.0, "step": 1225 }, { "epoch": 0.7358943577430972, "grad_norm": 0.0007130668964236975, "learning_rate": 0.00019745844600835332, "loss": 23.0, "step": 1226 }, { "epoch": 0.7364945978391356, "grad_norm": 0.0014644530601799488, "learning_rate": 0.00019745421227430904, "loss": 23.0, "step": 1227 }, { "epoch": 0.737094837935174, "grad_norm": 0.0018409211188554764, "learning_rate": 0.00019744997506237466, "loss": 23.0, "step": 1228 }, { "epoch": 0.7376950780312125, "grad_norm": 0.0011605211766436696, "learning_rate": 0.00019744573437270133, "loss": 23.0, "step": 1229 }, { "epoch": 0.7382953181272509, "grad_norm": 0.0006024561589583755, "learning_rate": 0.00019744149020544036, "loss": 23.0, "step": 1230 }, { "epoch": 0.7388955582232893, "grad_norm": 0.00031817928538657725, "learning_rate": 0.0001974372425607433, "loss": 23.0, "step": 1231 }, { "epoch": 0.7394957983193278, "grad_norm": 0.0020129424519836903, "learning_rate": 0.00019743299143876167, "loss": 23.0, "step": 1232 }, { "epoch": 0.7400960384153662, "grad_norm": 0.0006386892055161297, "learning_rate": 0.0001974287368396472, "loss": 23.0, "step": 1233 }, { "epoch": 0.7406962785114045, "grad_norm": 0.000763615476898849, "learning_rate": 0.00019742447876355175, "loss": 23.0, "step": 1234 }, { "epoch": 0.741296518607443, "grad_norm": 0.0007875692099332809, "learning_rate": 0.00019742021721062724, "loss": 23.0, "step": 1235 }, { "epoch": 0.7418967587034814, "grad_norm": 0.0014706990914419293, "learning_rate": 0.0001974159521810258, "loss": 23.0, "step": 1236 }, { "epoch": 0.7424969987995198, "grad_norm": 0.0007930982974357903, "learning_rate": 0.00019741168367489958, "loss": 23.0, "step": 1237 }, { "epoch": 0.7430972388955582, "grad_norm": 0.0004145725688431412, "learning_rate": 0.00019740741169240096, "loss": 23.0, "step": 1238 }, { "epoch": 0.7436974789915967, "grad_norm": 0.0007471741992048919, "learning_rate": 0.00019740313623368234, "loss": 23.0, "step": 1239 }, { "epoch": 0.7442977190876351, "grad_norm": 0.001122286426834762, "learning_rate": 0.00019739885729889637, "loss": 23.0, "step": 1240 }, { "epoch": 0.7448979591836735, "grad_norm": 0.0006311690085567534, "learning_rate": 0.00019739457488819574, "loss": 23.0, "step": 1241 }, { "epoch": 0.7454981992797118, "grad_norm": 0.0005989365745335817, "learning_rate": 0.00019739028900173322, "loss": 23.0, "step": 1242 }, { "epoch": 0.7460984393757503, "grad_norm": 0.0006342297419905663, "learning_rate": 0.00019738599963966183, "loss": 23.0, "step": 1243 }, { "epoch": 0.7466986794717887, "grad_norm": 0.0006295668426901102, "learning_rate": 0.00019738170680213464, "loss": 23.0, "step": 1244 }, { "epoch": 0.7472989195678271, "grad_norm": 0.0005823764950037003, "learning_rate": 0.00019737741048930478, "loss": 23.0, "step": 1245 }, { "epoch": 0.7478991596638656, "grad_norm": 0.001335416454821825, "learning_rate": 0.00019737311070132564, "loss": 23.0, "step": 1246 }, { "epoch": 0.748499399759904, "grad_norm": 0.0008027239236980677, "learning_rate": 0.00019736880743835068, "loss": 23.0, "step": 1247 }, { "epoch": 0.7490996398559424, "grad_norm": 0.0004205139121040702, "learning_rate": 0.00019736450070053342, "loss": 23.0, "step": 1248 }, { "epoch": 0.7496998799519808, "grad_norm": 0.0005247509106993675, "learning_rate": 0.0001973601904880276, "loss": 23.0, "step": 1249 }, { "epoch": 0.7503001200480192, "grad_norm": 0.00047644192818552256, "learning_rate": 0.000197355876800987, "loss": 23.0, "step": 1250 }, { "epoch": 0.7509003601440576, "grad_norm": 0.0006163313519209623, "learning_rate": 0.0001973515596395656, "loss": 23.0, "step": 1251 }, { "epoch": 0.751500600240096, "grad_norm": 0.0009510073577985168, "learning_rate": 0.00019734723900391744, "loss": 23.0, "step": 1252 }, { "epoch": 0.7521008403361344, "grad_norm": 0.00038402751670219004, "learning_rate": 0.00019734291489419673, "loss": 23.0, "step": 1253 }, { "epoch": 0.7527010804321729, "grad_norm": 0.00068047852255404, "learning_rate": 0.0001973385873105578, "loss": 23.0, "step": 1254 }, { "epoch": 0.7533013205282113, "grad_norm": 0.00037462665932253003, "learning_rate": 0.00019733425625315508, "loss": 23.0, "step": 1255 }, { "epoch": 0.7539015606242497, "grad_norm": 0.0004069966671522707, "learning_rate": 0.0001973299217221431, "loss": 23.0, "step": 1256 }, { "epoch": 0.7545018007202882, "grad_norm": 0.001153744524344802, "learning_rate": 0.00019732558371767658, "loss": 23.0, "step": 1257 }, { "epoch": 0.7551020408163265, "grad_norm": 0.0005174583056941628, "learning_rate": 0.00019732124223991034, "loss": 23.0, "step": 1258 }, { "epoch": 0.7557022809123649, "grad_norm": 0.0006991420523263514, "learning_rate": 0.0001973168972889993, "loss": 23.0, "step": 1259 }, { "epoch": 0.7563025210084033, "grad_norm": 0.00046238224604167044, "learning_rate": 0.00019731254886509852, "loss": 23.0, "step": 1260 }, { "epoch": 0.7569027611044418, "grad_norm": 0.0011713410494849086, "learning_rate": 0.0001973081969683632, "loss": 23.0, "step": 1261 }, { "epoch": 0.7575030012004802, "grad_norm": 0.000421454751631245, "learning_rate": 0.00019730384159894864, "loss": 23.0, "step": 1262 }, { "epoch": 0.7581032412965186, "grad_norm": 0.0012959492160007358, "learning_rate": 0.00019729948275701027, "loss": 23.0, "step": 1263 }, { "epoch": 0.758703481392557, "grad_norm": 0.0007972400635480881, "learning_rate": 0.00019729512044270364, "loss": 23.0, "step": 1264 }, { "epoch": 0.7593037214885955, "grad_norm": 0.0007275702082552016, "learning_rate": 0.00019729075465618442, "loss": 23.0, "step": 1265 }, { "epoch": 0.7599039615846338, "grad_norm": 0.000372605980373919, "learning_rate": 0.00019728638539760843, "loss": 23.0, "step": 1266 }, { "epoch": 0.7605042016806722, "grad_norm": 0.0007662245770916343, "learning_rate": 0.0001972820126671316, "loss": 23.0, "step": 1267 }, { "epoch": 0.7611044417767107, "grad_norm": 0.0008609720971435308, "learning_rate": 0.00019727763646491, "loss": 23.0, "step": 1268 }, { "epoch": 0.7617046818727491, "grad_norm": 0.0013051877031102777, "learning_rate": 0.00019727325679109976, "loss": 23.0, "step": 1269 }, { "epoch": 0.7623049219687875, "grad_norm": 0.0011220271699130535, "learning_rate": 0.00019726887364585716, "loss": 23.0, "step": 1270 }, { "epoch": 0.762905162064826, "grad_norm": 0.0006207393016666174, "learning_rate": 0.00019726448702933876, "loss": 23.0, "step": 1271 }, { "epoch": 0.7635054021608644, "grad_norm": 0.0008961490821093321, "learning_rate": 0.00019726009694170094, "loss": 23.0, "step": 1272 }, { "epoch": 0.7641056422569028, "grad_norm": 0.0011103064753115177, "learning_rate": 0.00019725570338310046, "loss": 23.0, "step": 1273 }, { "epoch": 0.7647058823529411, "grad_norm": 0.0011056560324504972, "learning_rate": 0.0001972513063536941, "loss": 23.0, "step": 1274 }, { "epoch": 0.7653061224489796, "grad_norm": 0.0009583919891156256, "learning_rate": 0.00019724690585363878, "loss": 23.0, "step": 1275 }, { "epoch": 0.765906362545018, "grad_norm": 0.0007008077809587121, "learning_rate": 0.00019724250188309154, "loss": 23.0, "step": 1276 }, { "epoch": 0.7665066026410564, "grad_norm": 0.0014242903562262654, "learning_rate": 0.00019723809444220956, "loss": 23.0, "step": 1277 }, { "epoch": 0.7671068427370948, "grad_norm": 0.0008935600053519011, "learning_rate": 0.00019723368353115008, "loss": 23.0, "step": 1278 }, { "epoch": 0.7677070828331333, "grad_norm": 0.0012262960663065314, "learning_rate": 0.00019722926915007055, "loss": 23.0, "step": 1279 }, { "epoch": 0.7683073229291717, "grad_norm": 0.0005361196235753596, "learning_rate": 0.00019722485129912854, "loss": 23.0, "step": 1280 }, { "epoch": 0.7689075630252101, "grad_norm": 0.0010704381857067347, "learning_rate": 0.00019722042997848168, "loss": 23.0, "step": 1281 }, { "epoch": 0.7695078031212484, "grad_norm": 0.0009096910944208503, "learning_rate": 0.00019721600518828774, "loss": 23.0, "step": 1282 }, { "epoch": 0.7701080432172869, "grad_norm": 0.0007512019947171211, "learning_rate": 0.00019721157692870463, "loss": 23.0, "step": 1283 }, { "epoch": 0.7707082833133253, "grad_norm": 0.0009879972785711288, "learning_rate": 0.00019720714519989044, "loss": 23.0, "step": 1284 }, { "epoch": 0.7713085234093637, "grad_norm": 0.0007033760193735361, "learning_rate": 0.00019720271000200324, "loss": 23.0, "step": 1285 }, { "epoch": 0.7719087635054022, "grad_norm": 0.0007616998627781868, "learning_rate": 0.00019719827133520137, "loss": 23.0, "step": 1286 }, { "epoch": 0.7725090036014406, "grad_norm": 0.0008339902269653976, "learning_rate": 0.00019719382919964321, "loss": 23.0, "step": 1287 }, { "epoch": 0.773109243697479, "grad_norm": 0.0006231502629816532, "learning_rate": 0.0001971893835954873, "loss": 23.0, "step": 1288 }, { "epoch": 0.7737094837935174, "grad_norm": 0.0006061487947590649, "learning_rate": 0.0001971849345228923, "loss": 23.0, "step": 1289 }, { "epoch": 0.7743097238895558, "grad_norm": 0.0002856554929167032, "learning_rate": 0.00019718048198201697, "loss": 23.0, "step": 1290 }, { "epoch": 0.7749099639855942, "grad_norm": 0.0006068464717827737, "learning_rate": 0.00019717602597302025, "loss": 23.0, "step": 1291 }, { "epoch": 0.7755102040816326, "grad_norm": 0.00040136679308488965, "learning_rate": 0.00019717156649606108, "loss": 23.0, "step": 1292 }, { "epoch": 0.776110444177671, "grad_norm": 0.0006840699934400618, "learning_rate": 0.0001971671035512987, "loss": 23.0, "step": 1293 }, { "epoch": 0.7767106842737095, "grad_norm": 0.000576976512093097, "learning_rate": 0.00019716263713889228, "loss": 23.0, "step": 1294 }, { "epoch": 0.7773109243697479, "grad_norm": 0.000296157319098711, "learning_rate": 0.00019715816725900134, "loss": 23.0, "step": 1295 }, { "epoch": 0.7779111644657863, "grad_norm": 0.001230229390785098, "learning_rate": 0.00019715369391178528, "loss": 23.0, "step": 1296 }, { "epoch": 0.7785114045618248, "grad_norm": 0.00044690098729915917, "learning_rate": 0.0001971492170974038, "loss": 23.0, "step": 1297 }, { "epoch": 0.7791116446578632, "grad_norm": 0.0006561097106896341, "learning_rate": 0.0001971447368160167, "loss": 23.0, "step": 1298 }, { "epoch": 0.7797118847539015, "grad_norm": 0.0005576546536758542, "learning_rate": 0.0001971402530677838, "loss": 23.0, "step": 1299 }, { "epoch": 0.78031212484994, "grad_norm": 0.0013528415001928806, "learning_rate": 0.00019713576585286515, "loss": 23.0, "step": 1300 }, { "epoch": 0.7809123649459784, "grad_norm": 0.000892074138391763, "learning_rate": 0.00019713127517142088, "loss": 23.0, "step": 1301 }, { "epoch": 0.7815126050420168, "grad_norm": 0.001970496727153659, "learning_rate": 0.00019712678102361123, "loss": 23.0, "step": 1302 }, { "epoch": 0.7821128451380552, "grad_norm": 0.0010920679196715355, "learning_rate": 0.00019712228340959661, "loss": 23.0, "step": 1303 }, { "epoch": 0.7827130852340937, "grad_norm": 0.0005002216785214841, "learning_rate": 0.00019711778232953754, "loss": 23.0, "step": 1304 }, { "epoch": 0.7833133253301321, "grad_norm": 0.0010139576625078917, "learning_rate": 0.00019711327778359464, "loss": 23.0, "step": 1305 }, { "epoch": 0.7839135654261705, "grad_norm": 0.0016053385334089398, "learning_rate": 0.00019710876977192864, "loss": 23.0, "step": 1306 }, { "epoch": 0.7845138055222088, "grad_norm": 0.0008072099881246686, "learning_rate": 0.00019710425829470046, "loss": 23.0, "step": 1307 }, { "epoch": 0.7851140456182473, "grad_norm": 0.0008704765350557864, "learning_rate": 0.00019709974335207106, "loss": 23.0, "step": 1308 }, { "epoch": 0.7857142857142857, "grad_norm": 0.0007995392079465091, "learning_rate": 0.0001970952249442016, "loss": 23.0, "step": 1309 }, { "epoch": 0.7863145258103241, "grad_norm": 0.0007567350985482335, "learning_rate": 0.00019709070307125334, "loss": 23.0, "step": 1310 }, { "epoch": 0.7869147659063626, "grad_norm": 0.001349606434814632, "learning_rate": 0.00019708617773338762, "loss": 23.0, "step": 1311 }, { "epoch": 0.787515006002401, "grad_norm": 0.00046586417010985315, "learning_rate": 0.00019708164893076594, "loss": 23.0, "step": 1312 }, { "epoch": 0.7881152460984394, "grad_norm": 0.0007738059503026307, "learning_rate": 0.00019707711666354998, "loss": 23.0, "step": 1313 }, { "epoch": 0.7887154861944778, "grad_norm": 0.001490103080868721, "learning_rate": 0.0001970725809319014, "loss": 23.0, "step": 1314 }, { "epoch": 0.7893157262905162, "grad_norm": 0.0004968742723576725, "learning_rate": 0.00019706804173598208, "loss": 23.0, "step": 1315 }, { "epoch": 0.7899159663865546, "grad_norm": 0.0006271405727602541, "learning_rate": 0.00019706349907595407, "loss": 23.0, "step": 1316 }, { "epoch": 0.790516206482593, "grad_norm": 0.000878729042597115, "learning_rate": 0.00019705895295197946, "loss": 23.0, "step": 1317 }, { "epoch": 0.7911164465786314, "grad_norm": 0.000744846067391336, "learning_rate": 0.0001970544033642205, "loss": 23.0, "step": 1318 }, { "epoch": 0.7917166866746699, "grad_norm": 0.0011991016799584031, "learning_rate": 0.0001970498503128395, "loss": 23.0, "step": 1319 }, { "epoch": 0.7923169267707083, "grad_norm": 0.0007287758053280413, "learning_rate": 0.000197045293797999, "loss": 23.0, "step": 1320 }, { "epoch": 0.7929171668667467, "grad_norm": 0.00045763529487885535, "learning_rate": 0.00019704073381986162, "loss": 23.0, "step": 1321 }, { "epoch": 0.7935174069627852, "grad_norm": 0.00036194606218487024, "learning_rate": 0.00019703617037859002, "loss": 23.0, "step": 1322 }, { "epoch": 0.7941176470588235, "grad_norm": 0.0006920311716385186, "learning_rate": 0.00019703160347434712, "loss": 23.0, "step": 1323 }, { "epoch": 0.7947178871548619, "grad_norm": 0.0006445212638936937, "learning_rate": 0.0001970270331072959, "loss": 23.0, "step": 1324 }, { "epoch": 0.7953181272509003, "grad_norm": 0.001651501515880227, "learning_rate": 0.00019702245927759947, "loss": 23.0, "step": 1325 }, { "epoch": 0.7959183673469388, "grad_norm": 0.0009059353033080697, "learning_rate": 0.00019701788198542102, "loss": 23.0, "step": 1326 }, { "epoch": 0.7965186074429772, "grad_norm": 0.0016654802020639181, "learning_rate": 0.00019701330123092392, "loss": 23.0, "step": 1327 }, { "epoch": 0.7971188475390156, "grad_norm": 0.0012016391847282648, "learning_rate": 0.00019700871701427164, "loss": 23.0, "step": 1328 }, { "epoch": 0.7977190876350541, "grad_norm": 0.0009673786698840559, "learning_rate": 0.0001970041293356278, "loss": 23.0, "step": 1329 }, { "epoch": 0.7983193277310925, "grad_norm": 0.0007137698121368885, "learning_rate": 0.00019699953819515611, "loss": 23.0, "step": 1330 }, { "epoch": 0.7989195678271308, "grad_norm": 0.000576613936573267, "learning_rate": 0.00019699494359302037, "loss": 23.0, "step": 1331 }, { "epoch": 0.7995198079231692, "grad_norm": 0.00029468824504874647, "learning_rate": 0.00019699034552938467, "loss": 23.0, "step": 1332 }, { "epoch": 0.8001200480192077, "grad_norm": 0.0005525742890313268, "learning_rate": 0.00019698574400441298, "loss": 23.0, "step": 1333 }, { "epoch": 0.8007202881152461, "grad_norm": 0.0015778294764459133, "learning_rate": 0.00019698113901826957, "loss": 23.0, "step": 1334 }, { "epoch": 0.8013205282112845, "grad_norm": 0.0004467495309654623, "learning_rate": 0.00019697653057111877, "loss": 23.0, "step": 1335 }, { "epoch": 0.801920768307323, "grad_norm": 0.0006680064834654331, "learning_rate": 0.00019697191866312506, "loss": 23.0, "step": 1336 }, { "epoch": 0.8025210084033614, "grad_norm": 0.0011591626098379493, "learning_rate": 0.00019696730329445302, "loss": 23.0, "step": 1337 }, { "epoch": 0.8031212484993998, "grad_norm": 0.0004280972934793681, "learning_rate": 0.0001969626844652673, "loss": 23.0, "step": 1338 }, { "epoch": 0.8037214885954381, "grad_norm": 0.0008343479712493718, "learning_rate": 0.00019695806217573286, "loss": 23.0, "step": 1339 }, { "epoch": 0.8043217286914766, "grad_norm": 0.0013662086566910148, "learning_rate": 0.00019695343642601456, "loss": 23.0, "step": 1340 }, { "epoch": 0.804921968787515, "grad_norm": 0.0006845472962595522, "learning_rate": 0.00019694880721627748, "loss": 23.0, "step": 1341 }, { "epoch": 0.8055222088835534, "grad_norm": 0.0020524887368083, "learning_rate": 0.0001969441745466869, "loss": 23.0, "step": 1342 }, { "epoch": 0.8061224489795918, "grad_norm": 0.0011398709611967206, "learning_rate": 0.00019693953841740808, "loss": 23.0, "step": 1343 }, { "epoch": 0.8067226890756303, "grad_norm": 0.0005793363670818508, "learning_rate": 0.00019693489882860647, "loss": 23.0, "step": 1344 }, { "epoch": 0.8073229291716687, "grad_norm": 0.0007696032989770174, "learning_rate": 0.0001969302557804477, "loss": 23.0, "step": 1345 }, { "epoch": 0.8079231692677071, "grad_norm": 0.000903173116967082, "learning_rate": 0.00019692560927309742, "loss": 23.0, "step": 1346 }, { "epoch": 0.8085234093637454, "grad_norm": 0.0005450271419249475, "learning_rate": 0.00019692095930672147, "loss": 23.0, "step": 1347 }, { "epoch": 0.8091236494597839, "grad_norm": 0.0007734773098491132, "learning_rate": 0.0001969163058814858, "loss": 23.0, "step": 1348 }, { "epoch": 0.8097238895558223, "grad_norm": 0.0007161099347285926, "learning_rate": 0.0001969116489975565, "loss": 23.0, "step": 1349 }, { "epoch": 0.8103241296518607, "grad_norm": 0.00046699721133336425, "learning_rate": 0.00019690698865509966, "loss": 23.0, "step": 1350 }, { "epoch": 0.8109243697478992, "grad_norm": 0.0009711477323435247, "learning_rate": 0.0001969023248542817, "loss": 23.0, "step": 1351 }, { "epoch": 0.8115246098439376, "grad_norm": 0.001304179197177291, "learning_rate": 0.00019689765759526906, "loss": 23.0, "step": 1352 }, { "epoch": 0.812124849939976, "grad_norm": 0.0004178002418484539, "learning_rate": 0.00019689298687822822, "loss": 23.0, "step": 1353 }, { "epoch": 0.8127250900360145, "grad_norm": 0.00036371249007061124, "learning_rate": 0.00019688831270332595, "loss": 23.0, "step": 1354 }, { "epoch": 0.8133253301320528, "grad_norm": 0.0006969739333726466, "learning_rate": 0.00019688363507072904, "loss": 23.0, "step": 1355 }, { "epoch": 0.8139255702280912, "grad_norm": 0.001011022599413991, "learning_rate": 0.00019687895398060439, "loss": 23.0, "step": 1356 }, { "epoch": 0.8145258103241296, "grad_norm": 0.0005929060280323029, "learning_rate": 0.00019687426943311906, "loss": 23.0, "step": 1357 }, { "epoch": 0.8151260504201681, "grad_norm": 0.00029369836556725204, "learning_rate": 0.00019686958142844024, "loss": 23.0, "step": 1358 }, { "epoch": 0.8157262905162065, "grad_norm": 0.0005359333590604365, "learning_rate": 0.00019686488996673528, "loss": 23.0, "step": 1359 }, { "epoch": 0.8163265306122449, "grad_norm": 0.0005220797029323876, "learning_rate": 0.00019686019504817155, "loss": 23.0, "step": 1360 }, { "epoch": 0.8169267707082833, "grad_norm": 0.000851228425744921, "learning_rate": 0.0001968554966729166, "loss": 23.0, "step": 1361 }, { "epoch": 0.8175270108043218, "grad_norm": 0.0005501755513250828, "learning_rate": 0.0001968507948411381, "loss": 23.0, "step": 1362 }, { "epoch": 0.8181272509003601, "grad_norm": 0.0010599548695608974, "learning_rate": 0.0001968460895530039, "loss": 23.0, "step": 1363 }, { "epoch": 0.8187274909963985, "grad_norm": 0.0009824999142438173, "learning_rate": 0.00019684138080868185, "loss": 23.0, "step": 1364 }, { "epoch": 0.819327731092437, "grad_norm": 0.00047340954188257456, "learning_rate": 0.00019683666860834004, "loss": 23.0, "step": 1365 }, { "epoch": 0.8199279711884754, "grad_norm": 0.0014246002538129687, "learning_rate": 0.00019683195295214662, "loss": 23.0, "step": 1366 }, { "epoch": 0.8205282112845138, "grad_norm": 0.00044270779471844435, "learning_rate": 0.00019682723384026985, "loss": 23.0, "step": 1367 }, { "epoch": 0.8211284513805522, "grad_norm": 0.0005253563867881894, "learning_rate": 0.00019682251127287824, "loss": 23.0, "step": 1368 }, { "epoch": 0.8217286914765907, "grad_norm": 0.0022502955980598927, "learning_rate": 0.00019681778525014023, "loss": 23.0, "step": 1369 }, { "epoch": 0.8223289315726291, "grad_norm": 0.0013780895387753844, "learning_rate": 0.00019681305577222449, "loss": 23.0, "step": 1370 }, { "epoch": 0.8229291716686674, "grad_norm": 0.001034077606163919, "learning_rate": 0.00019680832283929984, "loss": 23.0, "step": 1371 }, { "epoch": 0.8235294117647058, "grad_norm": 0.000522664631716907, "learning_rate": 0.00019680358645153513, "loss": 23.0, "step": 1372 }, { "epoch": 0.8241296518607443, "grad_norm": 0.00048665585927665234, "learning_rate": 0.00019679884660909948, "loss": 23.0, "step": 1373 }, { "epoch": 0.8247298919567827, "grad_norm": 0.0013081665383651853, "learning_rate": 0.00019679410331216197, "loss": 23.0, "step": 1374 }, { "epoch": 0.8253301320528211, "grad_norm": 0.000508311262819916, "learning_rate": 0.0001967893565608919, "loss": 23.0, "step": 1375 }, { "epoch": 0.8259303721488596, "grad_norm": 0.0004545389674603939, "learning_rate": 0.0001967846063554587, "loss": 23.0, "step": 1376 }, { "epoch": 0.826530612244898, "grad_norm": 0.0004129679291509092, "learning_rate": 0.0001967798526960318, "loss": 23.0, "step": 1377 }, { "epoch": 0.8271308523409364, "grad_norm": 0.000469258549856022, "learning_rate": 0.00019677509558278092, "loss": 23.0, "step": 1378 }, { "epoch": 0.8277310924369747, "grad_norm": 0.0006527505465783179, "learning_rate": 0.00019677033501587583, "loss": 23.0, "step": 1379 }, { "epoch": 0.8283313325330132, "grad_norm": 0.0009122650953941047, "learning_rate": 0.0001967655709954864, "loss": 23.0, "step": 1380 }, { "epoch": 0.8289315726290516, "grad_norm": 0.000640625599771738, "learning_rate": 0.00019676080352178265, "loss": 23.0, "step": 1381 }, { "epoch": 0.82953181272509, "grad_norm": 0.0005764299421571195, "learning_rate": 0.00019675603259493472, "loss": 23.0, "step": 1382 }, { "epoch": 0.8301320528211285, "grad_norm": 0.0013179031666368246, "learning_rate": 0.00019675125821511287, "loss": 23.0, "step": 1383 }, { "epoch": 0.8307322929171669, "grad_norm": 0.0005996134132146835, "learning_rate": 0.00019674648038248748, "loss": 23.0, "step": 1384 }, { "epoch": 0.8313325330132053, "grad_norm": 0.0007975632906891406, "learning_rate": 0.00019674169909722911, "loss": 23.0, "step": 1385 }, { "epoch": 0.8319327731092437, "grad_norm": 0.0007109794532880187, "learning_rate": 0.00019673691435950828, "loss": 23.0, "step": 1386 }, { "epoch": 0.8325330132052821, "grad_norm": 0.0006750710308551788, "learning_rate": 0.00019673212616949588, "loss": 23.0, "step": 1387 }, { "epoch": 0.8331332533013205, "grad_norm": 0.00053409393876791, "learning_rate": 0.00019672733452736268, "loss": 23.0, "step": 1388 }, { "epoch": 0.8337334933973589, "grad_norm": 0.0008340748026967049, "learning_rate": 0.00019672253943327973, "loss": 23.0, "step": 1389 }, { "epoch": 0.8343337334933973, "grad_norm": 0.0005991861689835787, "learning_rate": 0.00019671774088741815, "loss": 23.0, "step": 1390 }, { "epoch": 0.8349339735894358, "grad_norm": 0.0009061013697646558, "learning_rate": 0.00019671293888994922, "loss": 23.0, "step": 1391 }, { "epoch": 0.8355342136854742, "grad_norm": 0.0011456649517640471, "learning_rate": 0.00019670813344104424, "loss": 23.0, "step": 1392 }, { "epoch": 0.8361344537815126, "grad_norm": 0.0008859516819939017, "learning_rate": 0.00019670332454087476, "loss": 23.0, "step": 1393 }, { "epoch": 0.8367346938775511, "grad_norm": 0.0008393028401769698, "learning_rate": 0.00019669851218961232, "loss": 23.0, "step": 1394 }, { "epoch": 0.8373349339735895, "grad_norm": 0.0008376188343390822, "learning_rate": 0.00019669369638742877, "loss": 23.0, "step": 1395 }, { "epoch": 0.8379351740696278, "grad_norm": 0.0004782997420988977, "learning_rate": 0.0001966888771344959, "loss": 23.0, "step": 1396 }, { "epoch": 0.8385354141656662, "grad_norm": 0.0008413901668973267, "learning_rate": 0.00019668405443098576, "loss": 23.0, "step": 1397 }, { "epoch": 0.8391356542617047, "grad_norm": 0.00035914263571612537, "learning_rate": 0.00019667922827707037, "loss": 23.0, "step": 1398 }, { "epoch": 0.8397358943577431, "grad_norm": 0.0005109699559397995, "learning_rate": 0.00019667439867292202, "loss": 23.0, "step": 1399 }, { "epoch": 0.8403361344537815, "grad_norm": 0.0008713309071026742, "learning_rate": 0.0001966695656187131, "loss": 23.0, "step": 1400 }, { "epoch": 0.84093637454982, "grad_norm": 0.0014358647167682648, "learning_rate": 0.00019666472911461603, "loss": 23.0, "step": 1401 }, { "epoch": 0.8415366146458584, "grad_norm": 0.0006467948551289737, "learning_rate": 0.0001966598891608034, "loss": 23.0, "step": 1402 }, { "epoch": 0.8421368547418968, "grad_norm": 0.0008227209909819067, "learning_rate": 0.000196655045757448, "loss": 23.0, "step": 1403 }, { "epoch": 0.8427370948379351, "grad_norm": 0.00041979513480328023, "learning_rate": 0.00019665019890472262, "loss": 23.0, "step": 1404 }, { "epoch": 0.8433373349339736, "grad_norm": 0.0006606088718399405, "learning_rate": 0.00019664534860280023, "loss": 23.0, "step": 1405 }, { "epoch": 0.843937575030012, "grad_norm": 0.00072106794686988, "learning_rate": 0.000196640494851854, "loss": 23.0, "step": 1406 }, { "epoch": 0.8445378151260504, "grad_norm": 0.0009728367440402508, "learning_rate": 0.00019663563765205707, "loss": 23.0, "step": 1407 }, { "epoch": 0.8451380552220888, "grad_norm": 0.001023156102746725, "learning_rate": 0.0001966307770035828, "loss": 23.0, "step": 1408 }, { "epoch": 0.8457382953181273, "grad_norm": 0.0005268081440590322, "learning_rate": 0.0001966259129066047, "loss": 23.0, "step": 1409 }, { "epoch": 0.8463385354141657, "grad_norm": 0.0008058066596277058, "learning_rate": 0.0001966210453612963, "loss": 23.0, "step": 1410 }, { "epoch": 0.8469387755102041, "grad_norm": 0.0006653359159827232, "learning_rate": 0.00019661617436783133, "loss": 23.0, "step": 1411 }, { "epoch": 0.8475390156062425, "grad_norm": 0.0004622488922905177, "learning_rate": 0.0001966112999263836, "loss": 23.0, "step": 1412 }, { "epoch": 0.8481392557022809, "grad_norm": 0.00042153761023655534, "learning_rate": 0.00019660642203712714, "loss": 23.0, "step": 1413 }, { "epoch": 0.8487394957983193, "grad_norm": 0.0008719123434275389, "learning_rate": 0.00019660154070023597, "loss": 23.0, "step": 1414 }, { "epoch": 0.8493397358943577, "grad_norm": 0.0012196196475997567, "learning_rate": 0.00019659665591588424, "loss": 23.0, "step": 1415 }, { "epoch": 0.8499399759903962, "grad_norm": 0.0006624732632189989, "learning_rate": 0.0001965917676842464, "loss": 23.0, "step": 1416 }, { "epoch": 0.8505402160864346, "grad_norm": 0.0004318116116337478, "learning_rate": 0.0001965868760054968, "loss": 23.0, "step": 1417 }, { "epoch": 0.851140456182473, "grad_norm": 0.0016290737548843026, "learning_rate": 0.00019658198087981008, "loss": 23.0, "step": 1418 }, { "epoch": 0.8517406962785115, "grad_norm": 0.0006357937818393111, "learning_rate": 0.00019657708230736085, "loss": 23.0, "step": 1419 }, { "epoch": 0.8523409363745498, "grad_norm": 0.0008247081423178315, "learning_rate": 0.00019657218028832404, "loss": 23.0, "step": 1420 }, { "epoch": 0.8529411764705882, "grad_norm": 0.0008558662375435233, "learning_rate": 0.0001965672748228745, "loss": 23.0, "step": 1421 }, { "epoch": 0.8535414165666266, "grad_norm": 0.0006650417344644666, "learning_rate": 0.00019656236591118732, "loss": 23.0, "step": 1422 }, { "epoch": 0.8541416566626651, "grad_norm": 0.0011802432127296925, "learning_rate": 0.00019655745355343765, "loss": 23.0, "step": 1423 }, { "epoch": 0.8547418967587035, "grad_norm": 0.0005101741407997906, "learning_rate": 0.0001965525377498009, "loss": 23.0, "step": 1424 }, { "epoch": 0.8553421368547419, "grad_norm": 0.0006552199483849108, "learning_rate": 0.0001965476185004524, "loss": 23.0, "step": 1425 }, { "epoch": 0.8559423769507803, "grad_norm": 0.0013779390137642622, "learning_rate": 0.0001965426958055678, "loss": 23.0, "step": 1426 }, { "epoch": 0.8565426170468188, "grad_norm": 0.0015933418180793524, "learning_rate": 0.00019653776966532265, "loss": 23.0, "step": 1427 }, { "epoch": 0.8571428571428571, "grad_norm": 0.0007609377498738468, "learning_rate": 0.00019653284007989288, "loss": 23.0, "step": 1428 }, { "epoch": 0.8577430972388955, "grad_norm": 0.0004458023759070784, "learning_rate": 0.00019652790704945434, "loss": 23.0, "step": 1429 }, { "epoch": 0.858343337334934, "grad_norm": 0.0006291582831181586, "learning_rate": 0.0001965229705741831, "loss": 23.0, "step": 1430 }, { "epoch": 0.8589435774309724, "grad_norm": 0.0006121039623394608, "learning_rate": 0.00019651803065425533, "loss": 23.0, "step": 1431 }, { "epoch": 0.8595438175270108, "grad_norm": 0.0012091121170669794, "learning_rate": 0.0001965130872898473, "loss": 23.0, "step": 1432 }, { "epoch": 0.8601440576230492, "grad_norm": 0.00046850915532559156, "learning_rate": 0.00019650814048113548, "loss": 23.0, "step": 1433 }, { "epoch": 0.8607442977190877, "grad_norm": 0.0009892003145068884, "learning_rate": 0.00019650319022829636, "loss": 23.0, "step": 1434 }, { "epoch": 0.8613445378151261, "grad_norm": 0.0005852010217495263, "learning_rate": 0.0001964982365315066, "loss": 23.0, "step": 1435 }, { "epoch": 0.8619447779111644, "grad_norm": 0.0006273521576076746, "learning_rate": 0.00019649327939094303, "loss": 23.0, "step": 1436 }, { "epoch": 0.8625450180072028, "grad_norm": 0.0006230876315385103, "learning_rate": 0.0001964883188067825, "loss": 23.0, "step": 1437 }, { "epoch": 0.8631452581032413, "grad_norm": 0.0003952436672989279, "learning_rate": 0.00019648335477920207, "loss": 23.0, "step": 1438 }, { "epoch": 0.8637454981992797, "grad_norm": 0.003755589248612523, "learning_rate": 0.0001964783873083789, "loss": 23.0, "step": 1439 }, { "epoch": 0.8643457382953181, "grad_norm": 0.0012677287450060248, "learning_rate": 0.00019647341639449027, "loss": 23.0, "step": 1440 }, { "epoch": 0.8649459783913566, "grad_norm": 0.0004233888175804168, "learning_rate": 0.00019646844203771358, "loss": 23.0, "step": 1441 }, { "epoch": 0.865546218487395, "grad_norm": 0.0010196702787652612, "learning_rate": 0.00019646346423822633, "loss": 23.0, "step": 1442 }, { "epoch": 0.8661464585834334, "grad_norm": 0.0009242582018487155, "learning_rate": 0.00019645848299620618, "loss": 23.0, "step": 1443 }, { "epoch": 0.8667466986794717, "grad_norm": 0.0009520480525679886, "learning_rate": 0.00019645349831183087, "loss": 23.0, "step": 1444 }, { "epoch": 0.8673469387755102, "grad_norm": 0.0017986897146329284, "learning_rate": 0.0001964485101852783, "loss": 23.0, "step": 1445 }, { "epoch": 0.8679471788715486, "grad_norm": 0.0005925934528931975, "learning_rate": 0.00019644351861672653, "loss": 23.0, "step": 1446 }, { "epoch": 0.868547418967587, "grad_norm": 0.0011633182875812054, "learning_rate": 0.00019643852360635365, "loss": 23.0, "step": 1447 }, { "epoch": 0.8691476590636255, "grad_norm": 0.0009133127168752253, "learning_rate": 0.00019643352515433794, "loss": 23.0, "step": 1448 }, { "epoch": 0.8697478991596639, "grad_norm": 0.001071081729605794, "learning_rate": 0.00019642852326085778, "loss": 23.0, "step": 1449 }, { "epoch": 0.8703481392557023, "grad_norm": 0.0005460986285470426, "learning_rate": 0.00019642351792609165, "loss": 23.0, "step": 1450 }, { "epoch": 0.8709483793517407, "grad_norm": 0.0007283551385626197, "learning_rate": 0.0001964185091502182, "loss": 23.0, "step": 1451 }, { "epoch": 0.8715486194477791, "grad_norm": 0.000346360175171867, "learning_rate": 0.0001964134969334162, "loss": 23.0, "step": 1452 }, { "epoch": 0.8721488595438175, "grad_norm": 0.0015917529817670584, "learning_rate": 0.00019640848127586445, "loss": 23.0, "step": 1453 }, { "epoch": 0.8727490996398559, "grad_norm": 0.0009000685531646013, "learning_rate": 0.00019640346217774204, "loss": 23.0, "step": 1454 }, { "epoch": 0.8733493397358943, "grad_norm": 0.00068512256257236, "learning_rate": 0.00019639843963922804, "loss": 23.0, "step": 1455 }, { "epoch": 0.8739495798319328, "grad_norm": 0.0012403937289491296, "learning_rate": 0.0001963934136605017, "loss": 23.0, "step": 1456 }, { "epoch": 0.8745498199279712, "grad_norm": 0.0012238931376487017, "learning_rate": 0.00019638838424174235, "loss": 23.0, "step": 1457 }, { "epoch": 0.8751500600240096, "grad_norm": 0.0005801822990179062, "learning_rate": 0.00019638335138312952, "loss": 23.0, "step": 1458 }, { "epoch": 0.8757503001200481, "grad_norm": 0.0006364347646012902, "learning_rate": 0.0001963783150848428, "loss": 23.0, "step": 1459 }, { "epoch": 0.8763505402160864, "grad_norm": 0.0006346119334921241, "learning_rate": 0.00019637327534706195, "loss": 23.0, "step": 1460 }, { "epoch": 0.8769507803121248, "grad_norm": 0.00040727152372710407, "learning_rate": 0.00019636823216996678, "loss": 23.0, "step": 1461 }, { "epoch": 0.8775510204081632, "grad_norm": 0.0008567664190195501, "learning_rate": 0.0001963631855537373, "loss": 23.0, "step": 1462 }, { "epoch": 0.8781512605042017, "grad_norm": 0.0006258795619942248, "learning_rate": 0.0001963581354985536, "loss": 23.0, "step": 1463 }, { "epoch": 0.8787515006002401, "grad_norm": 0.0005797443445771933, "learning_rate": 0.00019635308200459593, "loss": 23.0, "step": 1464 }, { "epoch": 0.8793517406962785, "grad_norm": 0.0007031543063931167, "learning_rate": 0.0001963480250720446, "loss": 23.0, "step": 1465 }, { "epoch": 0.879951980792317, "grad_norm": 0.0009526876965537667, "learning_rate": 0.0001963429647010801, "loss": 23.0, "step": 1466 }, { "epoch": 0.8805522208883554, "grad_norm": 0.000710951688233763, "learning_rate": 0.000196337900891883, "loss": 23.0, "step": 1467 }, { "epoch": 0.8811524609843937, "grad_norm": 0.0012169844703748822, "learning_rate": 0.00019633283364463403, "loss": 23.0, "step": 1468 }, { "epoch": 0.8817527010804321, "grad_norm": 0.0011517518432810903, "learning_rate": 0.00019632776295951403, "loss": 23.0, "step": 1469 }, { "epoch": 0.8823529411764706, "grad_norm": 0.0008190085063688457, "learning_rate": 0.00019632268883670393, "loss": 23.0, "step": 1470 }, { "epoch": 0.882953181272509, "grad_norm": 0.0016430607065558434, "learning_rate": 0.0001963176112763849, "loss": 23.0, "step": 1471 }, { "epoch": 0.8835534213685474, "grad_norm": 0.0006540712201967835, "learning_rate": 0.000196312530278738, "loss": 23.0, "step": 1472 }, { "epoch": 0.8841536614645858, "grad_norm": 0.0008266113582067192, "learning_rate": 0.00019630744584394467, "loss": 23.0, "step": 1473 }, { "epoch": 0.8847539015606243, "grad_norm": 0.000942007580306381, "learning_rate": 0.00019630235797218638, "loss": 23.0, "step": 1474 }, { "epoch": 0.8853541416566627, "grad_norm": 0.0007080623181536794, "learning_rate": 0.0001962972666636446, "loss": 23.0, "step": 1475 }, { "epoch": 0.885954381752701, "grad_norm": 0.0004341659077908844, "learning_rate": 0.0001962921719185011, "loss": 23.0, "step": 1476 }, { "epoch": 0.8865546218487395, "grad_norm": 0.0004911704454571009, "learning_rate": 0.00019628707373693763, "loss": 23.0, "step": 1477 }, { "epoch": 0.8871548619447779, "grad_norm": 0.00033043770235963166, "learning_rate": 0.00019628197211913625, "loss": 23.0, "step": 1478 }, { "epoch": 0.8877551020408163, "grad_norm": 0.0010875524021685123, "learning_rate": 0.0001962768670652789, "loss": 23.0, "step": 1479 }, { "epoch": 0.8883553421368547, "grad_norm": 0.0007014353759586811, "learning_rate": 0.00019627175857554784, "loss": 23.0, "step": 1480 }, { "epoch": 0.8889555822328932, "grad_norm": 0.0004790904640685767, "learning_rate": 0.00019626664665012535, "loss": 23.0, "step": 1481 }, { "epoch": 0.8895558223289316, "grad_norm": 0.00041154108475893736, "learning_rate": 0.00019626153128919386, "loss": 23.0, "step": 1482 }, { "epoch": 0.89015606242497, "grad_norm": 0.0009722684626467526, "learning_rate": 0.00019625641249293595, "loss": 23.0, "step": 1483 }, { "epoch": 0.8907563025210085, "grad_norm": 0.0011275166179984808, "learning_rate": 0.00019625129026153428, "loss": 23.0, "step": 1484 }, { "epoch": 0.8913565426170468, "grad_norm": 0.0010743034072220325, "learning_rate": 0.00019624616459517166, "loss": 23.0, "step": 1485 }, { "epoch": 0.8919567827130852, "grad_norm": 0.0004264815361239016, "learning_rate": 0.000196241035494031, "loss": 23.0, "step": 1486 }, { "epoch": 0.8925570228091236, "grad_norm": 0.0013276687823235989, "learning_rate": 0.0001962359029582953, "loss": 23.0, "step": 1487 }, { "epoch": 0.8931572629051621, "grad_norm": 0.000943381863180548, "learning_rate": 0.0001962307669881478, "loss": 23.0, "step": 1488 }, { "epoch": 0.8937575030012005, "grad_norm": 0.0006792533094994724, "learning_rate": 0.00019622562758377177, "loss": 23.0, "step": 1489 }, { "epoch": 0.8943577430972389, "grad_norm": 0.0022334514651447535, "learning_rate": 0.0001962204847453506, "loss": 23.0, "step": 1490 }, { "epoch": 0.8949579831932774, "grad_norm": 0.0005680921021848917, "learning_rate": 0.00019621533847306784, "loss": 23.0, "step": 1491 }, { "epoch": 0.8955582232893158, "grad_norm": 0.0007726846379227936, "learning_rate": 0.00019621018876710716, "loss": 23.0, "step": 1492 }, { "epoch": 0.8961584633853541, "grad_norm": 0.0007318484713323414, "learning_rate": 0.0001962050356276523, "loss": 23.0, "step": 1493 }, { "epoch": 0.8967587034813925, "grad_norm": 0.0006041128071956336, "learning_rate": 0.00019619987905488721, "loss": 23.0, "step": 1494 }, { "epoch": 0.897358943577431, "grad_norm": 0.0009723983239382505, "learning_rate": 0.00019619471904899587, "loss": 23.0, "step": 1495 }, { "epoch": 0.8979591836734694, "grad_norm": 0.0008362563094124198, "learning_rate": 0.00019618955561016247, "loss": 23.0, "step": 1496 }, { "epoch": 0.8985594237695078, "grad_norm": 0.0006792604108341038, "learning_rate": 0.00019618438873857124, "loss": 23.0, "step": 1497 }, { "epoch": 0.8991596638655462, "grad_norm": 0.0007895256276242435, "learning_rate": 0.0001961792184344066, "loss": 23.0, "step": 1498 }, { "epoch": 0.8997599039615847, "grad_norm": 0.0011160440044477582, "learning_rate": 0.00019617404469785305, "loss": 23.0, "step": 1499 }, { "epoch": 0.9003601440576231, "grad_norm": 0.0006315569626167417, "learning_rate": 0.00019616886752909526, "loss": 23.0, "step": 1500 }, { "epoch": 0.9009603841536614, "grad_norm": 0.0010441996855661273, "learning_rate": 0.00019616368692831795, "loss": 23.0, "step": 1501 }, { "epoch": 0.9015606242496998, "grad_norm": 0.001420605811290443, "learning_rate": 0.000196158502895706, "loss": 23.0, "step": 1502 }, { "epoch": 0.9021608643457383, "grad_norm": 0.0008463888661935925, "learning_rate": 0.00019615331543144446, "loss": 23.0, "step": 1503 }, { "epoch": 0.9027611044417767, "grad_norm": 0.0011006301501765847, "learning_rate": 0.0001961481245357184, "loss": 23.0, "step": 1504 }, { "epoch": 0.9033613445378151, "grad_norm": 0.000740750867407769, "learning_rate": 0.00019614293020871312, "loss": 23.0, "step": 1505 }, { "epoch": 0.9039615846338536, "grad_norm": 0.0004690020577982068, "learning_rate": 0.00019613773245061395, "loss": 23.0, "step": 1506 }, { "epoch": 0.904561824729892, "grad_norm": 0.0004144386912230402, "learning_rate": 0.00019613253126160641, "loss": 23.0, "step": 1507 }, { "epoch": 0.9051620648259304, "grad_norm": 0.0006933241384103894, "learning_rate": 0.00019612732664187613, "loss": 23.0, "step": 1508 }, { "epoch": 0.9057623049219687, "grad_norm": 0.00047544355038553476, "learning_rate": 0.0001961221185916088, "loss": 23.0, "step": 1509 }, { "epoch": 0.9063625450180072, "grad_norm": 0.0005907033337280154, "learning_rate": 0.00019611690711099033, "loss": 23.0, "step": 1510 }, { "epoch": 0.9069627851140456, "grad_norm": 0.000761785835493356, "learning_rate": 0.0001961116922002067, "loss": 23.0, "step": 1511 }, { "epoch": 0.907563025210084, "grad_norm": 0.000696085742674768, "learning_rate": 0.000196106473859444, "loss": 23.0, "step": 1512 }, { "epoch": 0.9081632653061225, "grad_norm": 0.0013306043110787868, "learning_rate": 0.00019610125208888843, "loss": 23.0, "step": 1513 }, { "epoch": 0.9087635054021609, "grad_norm": 0.0017198032001033425, "learning_rate": 0.00019609602688872642, "loss": 23.0, "step": 1514 }, { "epoch": 0.9093637454981993, "grad_norm": 0.0004617887025233358, "learning_rate": 0.00019609079825914436, "loss": 23.0, "step": 1515 }, { "epoch": 0.9099639855942377, "grad_norm": 0.0006450479850172997, "learning_rate": 0.00019608556620032892, "loss": 23.0, "step": 1516 }, { "epoch": 0.9105642256902761, "grad_norm": 0.000481103896163404, "learning_rate": 0.00019608033071246678, "loss": 23.0, "step": 1517 }, { "epoch": 0.9111644657863145, "grad_norm": 0.0014282638439908624, "learning_rate": 0.00019607509179574473, "loss": 23.0, "step": 1518 }, { "epoch": 0.9117647058823529, "grad_norm": 0.0011594934621825814, "learning_rate": 0.00019606984945034985, "loss": 23.0, "step": 1519 }, { "epoch": 0.9123649459783914, "grad_norm": 0.0007562115788459778, "learning_rate": 0.00019606460367646916, "loss": 23.0, "step": 1520 }, { "epoch": 0.9129651860744298, "grad_norm": 0.0005319854826666415, "learning_rate": 0.00019605935447428985, "loss": 23.0, "step": 1521 }, { "epoch": 0.9135654261704682, "grad_norm": 0.0007216419326141477, "learning_rate": 0.00019605410184399927, "loss": 23.0, "step": 1522 }, { "epoch": 0.9141656662665066, "grad_norm": 0.0007937728078104556, "learning_rate": 0.00019604884578578488, "loss": 23.0, "step": 1523 }, { "epoch": 0.9147659063625451, "grad_norm": 0.0007360963500104845, "learning_rate": 0.00019604358629983428, "loss": 23.0, "step": 1524 }, { "epoch": 0.9153661464585834, "grad_norm": 0.0009595039300620556, "learning_rate": 0.00019603832338633512, "loss": 23.0, "step": 1525 }, { "epoch": 0.9159663865546218, "grad_norm": 0.0013019670732319355, "learning_rate": 0.00019603305704547522, "loss": 23.0, "step": 1526 }, { "epoch": 0.9165666266506602, "grad_norm": 0.0011073811911046505, "learning_rate": 0.00019602778727744258, "loss": 23.0, "step": 1527 }, { "epoch": 0.9171668667466987, "grad_norm": 0.0009854474337771535, "learning_rate": 0.0001960225140824252, "loss": 23.0, "step": 1528 }, { "epoch": 0.9177671068427371, "grad_norm": 0.0018431590870022774, "learning_rate": 0.0001960172374606113, "loss": 23.0, "step": 1529 }, { "epoch": 0.9183673469387755, "grad_norm": 0.0007089927676133811, "learning_rate": 0.00019601195741218917, "loss": 23.0, "step": 1530 }, { "epoch": 0.918967587034814, "grad_norm": 0.0009421886061318219, "learning_rate": 0.0001960066739373472, "loss": 23.0, "step": 1531 }, { "epoch": 0.9195678271308524, "grad_norm": 0.0009645966929383576, "learning_rate": 0.00019600138703627408, "loss": 23.0, "step": 1532 }, { "epoch": 0.9201680672268907, "grad_norm": 0.00032688723877072334, "learning_rate": 0.00019599609670915838, "loss": 23.0, "step": 1533 }, { "epoch": 0.9207683073229291, "grad_norm": 0.0006491580861620605, "learning_rate": 0.0001959908029561889, "loss": 23.0, "step": 1534 }, { "epoch": 0.9213685474189676, "grad_norm": 0.0006321097607724369, "learning_rate": 0.00019598550577755458, "loss": 23.0, "step": 1535 }, { "epoch": 0.921968787515006, "grad_norm": 0.000790178484749049, "learning_rate": 0.0001959802051734445, "loss": 23.0, "step": 1536 }, { "epoch": 0.9225690276110444, "grad_norm": 0.0002988299529533833, "learning_rate": 0.00019597490114404775, "loss": 23.0, "step": 1537 }, { "epoch": 0.9231692677070829, "grad_norm": 0.00046298291999846697, "learning_rate": 0.00019596959368955368, "loss": 23.0, "step": 1538 }, { "epoch": 0.9237695078031213, "grad_norm": 0.0023733177222311497, "learning_rate": 0.00019596428281015168, "loss": 23.0, "step": 1539 }, { "epoch": 0.9243697478991597, "grad_norm": 0.001180400256998837, "learning_rate": 0.00019595896850603124, "loss": 23.0, "step": 1540 }, { "epoch": 0.924969987995198, "grad_norm": 0.0008325580274686217, "learning_rate": 0.00019595365077738208, "loss": 23.0, "step": 1541 }, { "epoch": 0.9255702280912365, "grad_norm": 0.000297403777949512, "learning_rate": 0.00019594832962439393, "loss": 23.0, "step": 1542 }, { "epoch": 0.9261704681872749, "grad_norm": 0.0009298890945501626, "learning_rate": 0.0001959430050472567, "loss": 23.0, "step": 1543 }, { "epoch": 0.9267707082833133, "grad_norm": 0.0006622796645388007, "learning_rate": 0.0001959376770461604, "loss": 23.0, "step": 1544 }, { "epoch": 0.9273709483793517, "grad_norm": 0.0008475258946418762, "learning_rate": 0.00019593234562129524, "loss": 23.0, "step": 1545 }, { "epoch": 0.9279711884753902, "grad_norm": 0.0005952999927103519, "learning_rate": 0.0001959270107728514, "loss": 23.0, "step": 1546 }, { "epoch": 0.9285714285714286, "grad_norm": 0.002795860404148698, "learning_rate": 0.0001959216725010193, "loss": 23.0, "step": 1547 }, { "epoch": 0.929171668667467, "grad_norm": 0.001094569219276309, "learning_rate": 0.00019591633080598948, "loss": 23.0, "step": 1548 }, { "epoch": 0.9297719087635054, "grad_norm": 0.0005409182049334049, "learning_rate": 0.0001959109856879525, "loss": 23.0, "step": 1549 }, { "epoch": 0.9303721488595438, "grad_norm": 0.00046397114056162536, "learning_rate": 0.00019590563714709918, "loss": 23.0, "step": 1550 }, { "epoch": 0.9309723889555822, "grad_norm": 0.001301168231293559, "learning_rate": 0.00019590028518362034, "loss": 23.0, "step": 1551 }, { "epoch": 0.9315726290516206, "grad_norm": 0.0005839786608703434, "learning_rate": 0.00019589492979770704, "loss": 23.0, "step": 1552 }, { "epoch": 0.9321728691476591, "grad_norm": 0.0005165533511899412, "learning_rate": 0.00019588957098955033, "loss": 23.0, "step": 1553 }, { "epoch": 0.9327731092436975, "grad_norm": 0.0005963983130641282, "learning_rate": 0.0001958842087593415, "loss": 23.0, "step": 1554 }, { "epoch": 0.9333733493397359, "grad_norm": 0.001547312829643488, "learning_rate": 0.0001958788431072719, "loss": 23.0, "step": 1555 }, { "epoch": 0.9339735894357744, "grad_norm": 0.000669624307192862, "learning_rate": 0.00019587347403353302, "loss": 23.0, "step": 1556 }, { "epoch": 0.9345738295318127, "grad_norm": 0.0019656869117170572, "learning_rate": 0.0001958681015383165, "loss": 23.0, "step": 1557 }, { "epoch": 0.9351740696278511, "grad_norm": 0.0012716340133920312, "learning_rate": 0.000195862725621814, "loss": 23.0, "step": 1558 }, { "epoch": 0.9357743097238895, "grad_norm": 0.0007798285805620253, "learning_rate": 0.00019585734628421744, "loss": 23.0, "step": 1559 }, { "epoch": 0.936374549819928, "grad_norm": 0.0011394252069294453, "learning_rate": 0.00019585196352571872, "loss": 23.0, "step": 1560 }, { "epoch": 0.9369747899159664, "grad_norm": 0.000518056214787066, "learning_rate": 0.00019584657734651005, "loss": 23.0, "step": 1561 }, { "epoch": 0.9375750300120048, "grad_norm": 0.0007476578466594219, "learning_rate": 0.00019584118774678352, "loss": 23.0, "step": 1562 }, { "epoch": 0.9381752701080432, "grad_norm": 0.0008059003739617765, "learning_rate": 0.00019583579472673158, "loss": 23.0, "step": 1563 }, { "epoch": 0.9387755102040817, "grad_norm": 0.0019434284185990691, "learning_rate": 0.00019583039828654662, "loss": 23.0, "step": 1564 }, { "epoch": 0.93937575030012, "grad_norm": 0.0008690199465490878, "learning_rate": 0.00019582499842642122, "loss": 23.0, "step": 1565 }, { "epoch": 0.9399759903961584, "grad_norm": 0.0008223998011089861, "learning_rate": 0.00019581959514654816, "loss": 23.0, "step": 1566 }, { "epoch": 0.9405762304921969, "grad_norm": 0.001912278588861227, "learning_rate": 0.00019581418844712023, "loss": 23.0, "step": 1567 }, { "epoch": 0.9411764705882353, "grad_norm": 0.0009676693007349968, "learning_rate": 0.00019580877832833037, "loss": 23.0, "step": 1568 }, { "epoch": 0.9417767106842737, "grad_norm": 0.0007918410701677203, "learning_rate": 0.00019580336479037164, "loss": 23.0, "step": 1569 }, { "epoch": 0.9423769507803121, "grad_norm": 0.0008247760706581175, "learning_rate": 0.00019579794783343728, "loss": 23.0, "step": 1570 }, { "epoch": 0.9429771908763506, "grad_norm": 0.0006936482386663556, "learning_rate": 0.0001957925274577206, "loss": 23.0, "step": 1571 }, { "epoch": 0.943577430972389, "grad_norm": 0.001130030956119299, "learning_rate": 0.00019578710366341498, "loss": 23.0, "step": 1572 }, { "epoch": 0.9441776710684273, "grad_norm": 0.0017003900138661265, "learning_rate": 0.00019578167645071403, "loss": 23.0, "step": 1573 }, { "epoch": 0.9447779111644657, "grad_norm": 0.00029980865656398237, "learning_rate": 0.00019577624581981144, "loss": 23.0, "step": 1574 }, { "epoch": 0.9453781512605042, "grad_norm": 0.0006558767054229975, "learning_rate": 0.000195770811770901, "loss": 23.0, "step": 1575 }, { "epoch": 0.9459783913565426, "grad_norm": 0.0006806926685385406, "learning_rate": 0.00019576537430417666, "loss": 23.0, "step": 1576 }, { "epoch": 0.946578631452581, "grad_norm": 0.000988876330666244, "learning_rate": 0.00019575993341983244, "loss": 23.0, "step": 1577 }, { "epoch": 0.9471788715486195, "grad_norm": 0.0013003622880205512, "learning_rate": 0.00019575448911806248, "loss": 23.0, "step": 1578 }, { "epoch": 0.9477791116446579, "grad_norm": 0.0005484142457135022, "learning_rate": 0.00019574904139906115, "loss": 23.0, "step": 1579 }, { "epoch": 0.9483793517406963, "grad_norm": 0.0021183365024626255, "learning_rate": 0.00019574359026302283, "loss": 23.0, "step": 1580 }, { "epoch": 0.9489795918367347, "grad_norm": 0.0008176713599823415, "learning_rate": 0.00019573813571014203, "loss": 23.0, "step": 1581 }, { "epoch": 0.9495798319327731, "grad_norm": 0.0007823684718459845, "learning_rate": 0.00019573267774061347, "loss": 23.0, "step": 1582 }, { "epoch": 0.9501800720288115, "grad_norm": 0.0012796282535418868, "learning_rate": 0.0001957272163546319, "loss": 23.0, "step": 1583 }, { "epoch": 0.9507803121248499, "grad_norm": 0.0009288830915465951, "learning_rate": 0.0001957217515523922, "loss": 23.0, "step": 1584 }, { "epoch": 0.9513805522208884, "grad_norm": 0.0009111070539802313, "learning_rate": 0.0001957162833340894, "loss": 23.0, "step": 1585 }, { "epoch": 0.9519807923169268, "grad_norm": 0.0007150248857215047, "learning_rate": 0.00019571081169991866, "loss": 23.0, "step": 1586 }, { "epoch": 0.9525810324129652, "grad_norm": 0.0008457883959636092, "learning_rate": 0.00019570533665007526, "loss": 23.0, "step": 1587 }, { "epoch": 0.9531812725090036, "grad_norm": 0.0007468207040801644, "learning_rate": 0.0001956998581847546, "loss": 23.0, "step": 1588 }, { "epoch": 0.9537815126050421, "grad_norm": 0.0007428768440149724, "learning_rate": 0.0001956943763041521, "loss": 23.0, "step": 1589 }, { "epoch": 0.9543817527010804, "grad_norm": 0.00031296900124289095, "learning_rate": 0.00019568889100846353, "loss": 23.0, "step": 1590 }, { "epoch": 0.9549819927971188, "grad_norm": 0.001030638930387795, "learning_rate": 0.00019568340229788456, "loss": 23.0, "step": 1591 }, { "epoch": 0.9555822328931572, "grad_norm": 0.0011427139397710562, "learning_rate": 0.00019567791017261108, "loss": 23.0, "step": 1592 }, { "epoch": 0.9561824729891957, "grad_norm": 0.0013832402182742953, "learning_rate": 0.0001956724146328391, "loss": 23.0, "step": 1593 }, { "epoch": 0.9567827130852341, "grad_norm": 0.001810853835195303, "learning_rate": 0.00019566691567876477, "loss": 23.0, "step": 1594 }, { "epoch": 0.9573829531812725, "grad_norm": 0.0009885139297693968, "learning_rate": 0.0001956614133105843, "loss": 23.0, "step": 1595 }, { "epoch": 0.957983193277311, "grad_norm": 0.0010269538033753633, "learning_rate": 0.00019565590752849404, "loss": 23.0, "step": 1596 }, { "epoch": 0.9585834333733494, "grad_norm": 0.0007400467875413597, "learning_rate": 0.0001956503983326905, "loss": 23.0, "step": 1597 }, { "epoch": 0.9591836734693877, "grad_norm": 0.0005352499429136515, "learning_rate": 0.00019564488572337027, "loss": 23.0, "step": 1598 }, { "epoch": 0.9597839135654261, "grad_norm": 0.0011886239517480135, "learning_rate": 0.0001956393697007301, "loss": 23.0, "step": 1599 }, { "epoch": 0.9603841536614646, "grad_norm": 0.0006831000209785998, "learning_rate": 0.00019563385026496688, "loss": 23.0, "step": 1600 }, { "epoch": 0.960984393757503, "grad_norm": 0.001316646346822381, "learning_rate": 0.0001956283274162775, "loss": 23.0, "step": 1601 }, { "epoch": 0.9615846338535414, "grad_norm": 0.0008040472748689353, "learning_rate": 0.00019562280115485915, "loss": 23.0, "step": 1602 }, { "epoch": 0.9621848739495799, "grad_norm": 0.0010138957295566797, "learning_rate": 0.00019561727148090894, "loss": 23.0, "step": 1603 }, { "epoch": 0.9627851140456183, "grad_norm": 0.0006073630647733808, "learning_rate": 0.00019561173839462427, "loss": 23.0, "step": 1604 }, { "epoch": 0.9633853541416567, "grad_norm": 0.000547137635294348, "learning_rate": 0.00019560620189620263, "loss": 23.0, "step": 1605 }, { "epoch": 0.963985594237695, "grad_norm": 0.000544386391993612, "learning_rate": 0.00019560066198584156, "loss": 23.0, "step": 1606 }, { "epoch": 0.9645858343337335, "grad_norm": 0.000897867837920785, "learning_rate": 0.00019559511866373882, "loss": 23.0, "step": 1607 }, { "epoch": 0.9651860744297719, "grad_norm": 0.0004369942471385002, "learning_rate": 0.00019558957193009213, "loss": 23.0, "step": 1608 }, { "epoch": 0.9657863145258103, "grad_norm": 0.0010517592309042811, "learning_rate": 0.00019558402178509952, "loss": 23.0, "step": 1609 }, { "epoch": 0.9663865546218487, "grad_norm": 0.00039016862865537405, "learning_rate": 0.00019557846822895907, "loss": 23.0, "step": 1610 }, { "epoch": 0.9669867947178872, "grad_norm": 0.0016062683425843716, "learning_rate": 0.00019557291126186895, "loss": 23.0, "step": 1611 }, { "epoch": 0.9675870348139256, "grad_norm": 0.0006835482781752944, "learning_rate": 0.00019556735088402743, "loss": 23.0, "step": 1612 }, { "epoch": 0.968187274909964, "grad_norm": 0.000899162725545466, "learning_rate": 0.000195561787095633, "loss": 23.0, "step": 1613 }, { "epoch": 0.9687875150060024, "grad_norm": 0.0005444439593702555, "learning_rate": 0.0001955562198968842, "loss": 23.0, "step": 1614 }, { "epoch": 0.9693877551020408, "grad_norm": 0.0008718172903172672, "learning_rate": 0.00019555064928797973, "loss": 23.0, "step": 1615 }, { "epoch": 0.9699879951980792, "grad_norm": 0.0003390471974853426, "learning_rate": 0.00019554507526911837, "loss": 23.0, "step": 1616 }, { "epoch": 0.9705882352941176, "grad_norm": 0.000555711449123919, "learning_rate": 0.00019553949784049902, "loss": 23.0, "step": 1617 }, { "epoch": 0.9711884753901561, "grad_norm": 0.0003842646547127515, "learning_rate": 0.00019553391700232077, "loss": 23.0, "step": 1618 }, { "epoch": 0.9717887154861945, "grad_norm": 0.0005270171677693725, "learning_rate": 0.00019552833275478278, "loss": 23.0, "step": 1619 }, { "epoch": 0.9723889555822329, "grad_norm": 0.0006583972717635334, "learning_rate": 0.00019552274509808428, "loss": 23.0, "step": 1620 }, { "epoch": 0.9729891956782714, "grad_norm": 0.0007377169677056372, "learning_rate": 0.00019551715403242476, "loss": 23.0, "step": 1621 }, { "epoch": 0.9735894357743097, "grad_norm": 0.0012161527993157506, "learning_rate": 0.0001955115595580037, "loss": 23.0, "step": 1622 }, { "epoch": 0.9741896758703481, "grad_norm": 0.00046388438204303384, "learning_rate": 0.00019550596167502076, "loss": 23.0, "step": 1623 }, { "epoch": 0.9747899159663865, "grad_norm": 0.0014881680253893137, "learning_rate": 0.0001955003603836757, "loss": 23.0, "step": 1624 }, { "epoch": 0.975390156062425, "grad_norm": 0.0010795058915391564, "learning_rate": 0.00019549475568416848, "loss": 23.0, "step": 1625 }, { "epoch": 0.9759903961584634, "grad_norm": 0.0007747529307380319, "learning_rate": 0.00019548914757669901, "loss": 23.0, "step": 1626 }, { "epoch": 0.9765906362545018, "grad_norm": 0.0010959148639813066, "learning_rate": 0.00019548353606146755, "loss": 23.0, "step": 1627 }, { "epoch": 0.9771908763505402, "grad_norm": 0.0006908348295837641, "learning_rate": 0.00019547792113867426, "loss": 23.0, "step": 1628 }, { "epoch": 0.9777911164465787, "grad_norm": 0.000946520478464663, "learning_rate": 0.0001954723028085196, "loss": 23.0, "step": 1629 }, { "epoch": 0.978391356542617, "grad_norm": 0.0008105000597424805, "learning_rate": 0.00019546668107120403, "loss": 23.0, "step": 1630 }, { "epoch": 0.9789915966386554, "grad_norm": 0.0005076247034594417, "learning_rate": 0.00019546105592692817, "loss": 23.0, "step": 1631 }, { "epoch": 0.9795918367346939, "grad_norm": 0.0004834830469917506, "learning_rate": 0.00019545542737589276, "loss": 23.0, "step": 1632 }, { "epoch": 0.9801920768307323, "grad_norm": 0.00032156877568922937, "learning_rate": 0.00019544979541829874, "loss": 23.0, "step": 1633 }, { "epoch": 0.9807923169267707, "grad_norm": 0.0005227511865086854, "learning_rate": 0.00019544416005434697, "loss": 23.0, "step": 1634 }, { "epoch": 0.9813925570228091, "grad_norm": 0.001222484279423952, "learning_rate": 0.00019543852128423868, "loss": 23.0, "step": 1635 }, { "epoch": 0.9819927971188476, "grad_norm": 0.0010409961687400937, "learning_rate": 0.00019543287910817506, "loss": 23.0, "step": 1636 }, { "epoch": 0.982593037214886, "grad_norm": 0.0007245879387483001, "learning_rate": 0.0001954272335263575, "loss": 23.0, "step": 1637 }, { "epoch": 0.9831932773109243, "grad_norm": 0.00042797072092071176, "learning_rate": 0.00019542158453898743, "loss": 23.0, "step": 1638 }, { "epoch": 0.9837935174069627, "grad_norm": 0.0005213615368120372, "learning_rate": 0.00019541593214626642, "loss": 23.0, "step": 1639 }, { "epoch": 0.9843937575030012, "grad_norm": 0.0005875686765648425, "learning_rate": 0.00019541027634839626, "loss": 23.0, "step": 1640 }, { "epoch": 0.9849939975990396, "grad_norm": 0.0010960171930491924, "learning_rate": 0.0001954046171455788, "loss": 23.0, "step": 1641 }, { "epoch": 0.985594237695078, "grad_norm": 0.002143792575225234, "learning_rate": 0.00019539895453801588, "loss": 23.0, "step": 1642 }, { "epoch": 0.9861944777911165, "grad_norm": 0.001718964776955545, "learning_rate": 0.0001953932885259097, "loss": 23.0, "step": 1643 }, { "epoch": 0.9867947178871549, "grad_norm": 0.0011779890628531575, "learning_rate": 0.00019538761910946247, "loss": 23.0, "step": 1644 }, { "epoch": 0.9873949579831933, "grad_norm": 0.0014262180775403976, "learning_rate": 0.00019538194628887645, "loss": 23.0, "step": 1645 }, { "epoch": 0.9879951980792316, "grad_norm": 0.0003844281018245965, "learning_rate": 0.00019537627006435413, "loss": 23.0, "step": 1646 }, { "epoch": 0.9885954381752701, "grad_norm": 0.0007693939842283726, "learning_rate": 0.00019537059043609807, "loss": 23.0, "step": 1647 }, { "epoch": 0.9891956782713085, "grad_norm": 0.0002765923854894936, "learning_rate": 0.00019536490740431096, "loss": 23.0, "step": 1648 }, { "epoch": 0.9897959183673469, "grad_norm": 0.00032293866388499737, "learning_rate": 0.0001953592209691956, "loss": 23.0, "step": 1649 }, { "epoch": 0.9903961584633854, "grad_norm": 0.0011859709629788995, "learning_rate": 0.00019535353113095494, "loss": 23.0, "step": 1650 }, { "epoch": 0.9909963985594238, "grad_norm": 0.0023132844362407923, "learning_rate": 0.00019534783788979204, "loss": 23.0, "step": 1651 }, { "epoch": 0.9915966386554622, "grad_norm": 0.0004042698710691184, "learning_rate": 0.00019534214124591006, "loss": 23.0, "step": 1652 }, { "epoch": 0.9921968787515006, "grad_norm": 0.0006533717969432473, "learning_rate": 0.0001953364411995123, "loss": 23.0, "step": 1653 }, { "epoch": 0.992797118847539, "grad_norm": 0.000819582084659487, "learning_rate": 0.00019533073775080222, "loss": 23.0, "step": 1654 }, { "epoch": 0.9933973589435774, "grad_norm": 0.00045290245907381177, "learning_rate": 0.0001953250308999833, "loss": 23.0, "step": 1655 }, { "epoch": 0.9939975990396158, "grad_norm": 0.0009432425140403211, "learning_rate": 0.00019531932064725924, "loss": 23.0, "step": 1656 }, { "epoch": 0.9945978391356542, "grad_norm": 0.0010298251872882247, "learning_rate": 0.0001953136069928338, "loss": 23.0, "step": 1657 }, { "epoch": 0.9951980792316927, "grad_norm": 0.00044269522186368704, "learning_rate": 0.00019530788993691095, "loss": 23.0, "step": 1658 }, { "epoch": 0.9957983193277311, "grad_norm": 0.0016579084331169724, "learning_rate": 0.00019530216947969464, "loss": 23.0, "step": 1659 }, { "epoch": 0.9963985594237695, "grad_norm": 0.0013545328984037042, "learning_rate": 0.00019529644562138904, "loss": 23.0, "step": 1660 }, { "epoch": 0.996998799519808, "grad_norm": 0.0006384035223163664, "learning_rate": 0.00019529071836219846, "loss": 23.0, "step": 1661 }, { "epoch": 0.9975990396158463, "grad_norm": 0.0019687407184392214, "learning_rate": 0.0001952849877023272, "loss": 23.0, "step": 1662 }, { "epoch": 0.9981992797118847, "grad_norm": 0.0010327474446967244, "learning_rate": 0.00019527925364197987, "loss": 23.0, "step": 1663 }, { "epoch": 0.9987995198079231, "grad_norm": 0.0003291741304565221, "learning_rate": 0.00019527351618136107, "loss": 23.0, "step": 1664 }, { "epoch": 0.9993997599039616, "grad_norm": 0.0008972626528702676, "learning_rate": 0.00019526777532067553, "loss": 23.0, "step": 1665 }, { "epoch": 1.0, "grad_norm": 0.0016940529458224773, "learning_rate": 0.00019526203106012816, "loss": 23.0, "step": 1666 }, { "epoch": 1.0006002400960383, "grad_norm": 0.00041232057265006006, "learning_rate": 0.00019525628339992393, "loss": 23.0, "step": 1667 }, { "epoch": 1.0012004801920769, "grad_norm": 0.0018668645061552525, "learning_rate": 0.00019525053234026803, "loss": 23.0, "step": 1668 }, { "epoch": 1.0018007202881152, "grad_norm": 0.0006821074057370424, "learning_rate": 0.00019524477788136557, "loss": 23.0, "step": 1669 }, { "epoch": 1.0024009603841537, "grad_norm": 0.0022220544051378965, "learning_rate": 0.000195239020023422, "loss": 23.0, "step": 1670 }, { "epoch": 1.003001200480192, "grad_norm": 0.0005048304446972907, "learning_rate": 0.00019523325876664283, "loss": 23.0, "step": 1671 }, { "epoch": 1.0036014405762306, "grad_norm": 0.0006994889699853957, "learning_rate": 0.0001952274941112336, "loss": 23.0, "step": 1672 }, { "epoch": 1.004201680672269, "grad_norm": 0.00142678152769804, "learning_rate": 0.00019522172605740008, "loss": 23.0, "step": 1673 }, { "epoch": 1.0048019207683074, "grad_norm": 0.0002718534669838846, "learning_rate": 0.00019521595460534808, "loss": 23.0, "step": 1674 }, { "epoch": 1.0054021608643458, "grad_norm": 0.0011870613088831306, "learning_rate": 0.00019521017975528357, "loss": 23.0, "step": 1675 }, { "epoch": 1.006002400960384, "grad_norm": 0.0005727344541810453, "learning_rate": 0.00019520440150741267, "loss": 23.0, "step": 1676 }, { "epoch": 1.0066026410564226, "grad_norm": 0.0013802223838865757, "learning_rate": 0.00019519861986194156, "loss": 23.0, "step": 1677 }, { "epoch": 1.007202881152461, "grad_norm": 0.0009242665255442262, "learning_rate": 0.0001951928348190766, "loss": 23.0, "step": 1678 }, { "epoch": 1.0078031212484995, "grad_norm": 0.000813992868643254, "learning_rate": 0.00019518704637902422, "loss": 23.0, "step": 1679 }, { "epoch": 1.0084033613445378, "grad_norm": 0.0006548427045345306, "learning_rate": 0.000195181254541991, "loss": 23.0, "step": 1680 }, { "epoch": 1.0090036014405763, "grad_norm": 0.00041556128417141736, "learning_rate": 0.00019517545930818368, "loss": 23.0, "step": 1681 }, { "epoch": 1.0096038415366146, "grad_norm": 0.0006826121243648231, "learning_rate": 0.00019516966067780898, "loss": 23.0, "step": 1682 }, { "epoch": 1.010204081632653, "grad_norm": 0.0007658922113478184, "learning_rate": 0.00019516385865107395, "loss": 23.0, "step": 1683 }, { "epoch": 1.0108043217286915, "grad_norm": 0.0010680507402867079, "learning_rate": 0.00019515805322818558, "loss": 23.0, "step": 1684 }, { "epoch": 1.0114045618247298, "grad_norm": 0.0007877147872932255, "learning_rate": 0.00019515224440935105, "loss": 23.0, "step": 1685 }, { "epoch": 1.0120048019207684, "grad_norm": 0.0010208765743300319, "learning_rate": 0.0001951464321947777, "loss": 23.0, "step": 1686 }, { "epoch": 1.0126050420168067, "grad_norm": 0.000991889159195125, "learning_rate": 0.00019514061658467292, "loss": 23.0, "step": 1687 }, { "epoch": 1.0132052821128452, "grad_norm": 0.0005913240602239966, "learning_rate": 0.00019513479757924426, "loss": 23.0, "step": 1688 }, { "epoch": 1.0138055222088835, "grad_norm": 0.0007095134351402521, "learning_rate": 0.00019512897517869938, "loss": 23.0, "step": 1689 }, { "epoch": 1.014405762304922, "grad_norm": 0.002312737051397562, "learning_rate": 0.0001951231493832461, "loss": 23.0, "step": 1690 }, { "epoch": 1.0150060024009604, "grad_norm": 0.0005010634195059538, "learning_rate": 0.0001951173201930923, "loss": 23.0, "step": 1691 }, { "epoch": 1.0156062424969987, "grad_norm": 0.000687493709847331, "learning_rate": 0.000195111487608446, "loss": 23.0, "step": 1692 }, { "epoch": 1.0162064825930373, "grad_norm": 0.0007613154011778533, "learning_rate": 0.00019510565162951537, "loss": 23.0, "step": 1693 }, { "epoch": 1.0168067226890756, "grad_norm": 0.0009327959851361811, "learning_rate": 0.00019509981225650867, "loss": 23.0, "step": 1694 }, { "epoch": 1.017406962785114, "grad_norm": 0.0010371070820838213, "learning_rate": 0.0001950939694896343, "loss": 23.0, "step": 1695 }, { "epoch": 1.0180072028811524, "grad_norm": 0.0004927398986183107, "learning_rate": 0.00019508812332910078, "loss": 23.0, "step": 1696 }, { "epoch": 1.018607442977191, "grad_norm": 0.001125394832342863, "learning_rate": 0.0001950822737751167, "loss": 23.0, "step": 1697 }, { "epoch": 1.0192076830732293, "grad_norm": 0.0010863045463338494, "learning_rate": 0.00019507642082789087, "loss": 23.0, "step": 1698 }, { "epoch": 1.0198079231692676, "grad_norm": 0.001297933398745954, "learning_rate": 0.0001950705644876322, "loss": 23.0, "step": 1699 }, { "epoch": 1.0204081632653061, "grad_norm": 0.0008561917347833514, "learning_rate": 0.00019506470475454956, "loss": 23.0, "step": 1700 }, { "epoch": 1.0210084033613445, "grad_norm": 0.000712038017809391, "learning_rate": 0.00019505884162885218, "loss": 23.0, "step": 1701 }, { "epoch": 1.021608643457383, "grad_norm": 0.0004501239163801074, "learning_rate": 0.00019505297511074926, "loss": 23.0, "step": 1702 }, { "epoch": 1.0222088835534213, "grad_norm": 0.0008231149986386299, "learning_rate": 0.00019504710520045016, "loss": 23.0, "step": 1703 }, { "epoch": 1.0228091236494599, "grad_norm": 0.0007951670559123158, "learning_rate": 0.00019504123189816435, "loss": 23.0, "step": 1704 }, { "epoch": 1.0234093637454982, "grad_norm": 0.0006479431758634746, "learning_rate": 0.00019503535520410146, "loss": 23.0, "step": 1705 }, { "epoch": 1.0240096038415367, "grad_norm": 0.0005281239282339811, "learning_rate": 0.0001950294751184712, "loss": 23.0, "step": 1706 }, { "epoch": 1.024609843937575, "grad_norm": 0.000501799862831831, "learning_rate": 0.0001950235916414834, "loss": 23.0, "step": 1707 }, { "epoch": 1.0252100840336134, "grad_norm": 0.0005454413476400077, "learning_rate": 0.0001950177047733481, "loss": 23.0, "step": 1708 }, { "epoch": 1.025810324129652, "grad_norm": 0.0004704859165940434, "learning_rate": 0.0001950118145142753, "loss": 23.0, "step": 1709 }, { "epoch": 1.0264105642256902, "grad_norm": 0.0006746204453520477, "learning_rate": 0.00019500592086447522, "loss": 23.0, "step": 1710 }, { "epoch": 1.0270108043217288, "grad_norm": 0.0018217653268948197, "learning_rate": 0.00019500002382415822, "loss": 23.0, "step": 1711 }, { "epoch": 1.027611044417767, "grad_norm": 0.0017384163802489638, "learning_rate": 0.00019499412339353477, "loss": 23.0, "step": 1712 }, { "epoch": 1.0282112845138056, "grad_norm": 0.001107073505409062, "learning_rate": 0.00019498821957281536, "loss": 23.0, "step": 1713 }, { "epoch": 1.028811524609844, "grad_norm": 0.001236962154507637, "learning_rate": 0.00019498231236221076, "loss": 23.0, "step": 1714 }, { "epoch": 1.0294117647058822, "grad_norm": 0.0008350252173841, "learning_rate": 0.00019497640176193175, "loss": 23.0, "step": 1715 }, { "epoch": 1.0300120048019208, "grad_norm": 0.0014757871394976974, "learning_rate": 0.00019497048777218927, "loss": 23.0, "step": 1716 }, { "epoch": 1.030612244897959, "grad_norm": 0.0012261768570169806, "learning_rate": 0.00019496457039319437, "loss": 23.0, "step": 1717 }, { "epoch": 1.0312124849939976, "grad_norm": 0.0005536626558750868, "learning_rate": 0.00019495864962515823, "loss": 23.0, "step": 1718 }, { "epoch": 1.031812725090036, "grad_norm": 0.000878203718457371, "learning_rate": 0.00019495272546829218, "loss": 23.0, "step": 1719 }, { "epoch": 1.0324129651860745, "grad_norm": 0.0019284028094261885, "learning_rate": 0.00019494679792280754, "loss": 23.0, "step": 1720 }, { "epoch": 1.0330132052821128, "grad_norm": 0.0012051918311044574, "learning_rate": 0.000194940866988916, "loss": 23.0, "step": 1721 }, { "epoch": 1.0336134453781514, "grad_norm": 0.0010601821122691035, "learning_rate": 0.00019493493266682906, "loss": 23.0, "step": 1722 }, { "epoch": 1.0342136854741897, "grad_norm": 0.00037417240673676133, "learning_rate": 0.0001949289949567586, "loss": 23.0, "step": 1723 }, { "epoch": 1.034813925570228, "grad_norm": 0.0007202907581813633, "learning_rate": 0.0001949230538589165, "loss": 23.0, "step": 1724 }, { "epoch": 1.0354141656662665, "grad_norm": 0.0011258620070293546, "learning_rate": 0.0001949171093735148, "loss": 23.0, "step": 1725 }, { "epoch": 1.0360144057623049, "grad_norm": 0.0006037678685970604, "learning_rate": 0.0001949111615007656, "loss": 23.0, "step": 1726 }, { "epoch": 1.0366146458583434, "grad_norm": 0.0015769102610647678, "learning_rate": 0.00019490521024088117, "loss": 23.0, "step": 1727 }, { "epoch": 1.0372148859543817, "grad_norm": 0.001167101669125259, "learning_rate": 0.00019489925559407393, "loss": 23.0, "step": 1728 }, { "epoch": 1.0378151260504203, "grad_norm": 0.0004130336456000805, "learning_rate": 0.00019489329756055637, "loss": 23.0, "step": 1729 }, { "epoch": 1.0384153661464586, "grad_norm": 0.0007463322835974395, "learning_rate": 0.0001948873361405411, "loss": 23.0, "step": 1730 }, { "epoch": 1.039015606242497, "grad_norm": 0.001030348939821124, "learning_rate": 0.0001948813713342409, "loss": 23.0, "step": 1731 }, { "epoch": 1.0396158463385354, "grad_norm": 0.0018405051669105887, "learning_rate": 0.00019487540314186864, "loss": 23.0, "step": 1732 }, { "epoch": 1.0402160864345738, "grad_norm": 0.0007769169751554728, "learning_rate": 0.00019486943156363725, "loss": 23.0, "step": 1733 }, { "epoch": 1.0408163265306123, "grad_norm": 0.0008031317847780883, "learning_rate": 0.00019486345659975988, "loss": 23.0, "step": 1734 }, { "epoch": 1.0414165666266506, "grad_norm": 0.0003373367653694004, "learning_rate": 0.0001948574782504498, "loss": 23.0, "step": 1735 }, { "epoch": 1.0420168067226891, "grad_norm": 0.001183771644718945, "learning_rate": 0.00019485149651592032, "loss": 23.0, "step": 1736 }, { "epoch": 1.0426170468187275, "grad_norm": 0.00049160688649863, "learning_rate": 0.00019484551139638487, "loss": 23.0, "step": 1737 }, { "epoch": 1.043217286914766, "grad_norm": 0.0005904583376832306, "learning_rate": 0.00019483952289205714, "loss": 23.0, "step": 1738 }, { "epoch": 1.0438175270108043, "grad_norm": 0.0012503239559009671, "learning_rate": 0.0001948335310031508, "loss": 23.0, "step": 1739 }, { "epoch": 1.0444177671068426, "grad_norm": 0.001702033099718392, "learning_rate": 0.00019482753572987963, "loss": 23.0, "step": 1740 }, { "epoch": 1.0450180072028812, "grad_norm": 0.0005251219845376909, "learning_rate": 0.00019482153707245766, "loss": 23.0, "step": 1741 }, { "epoch": 1.0456182472989195, "grad_norm": 0.0004746287304442376, "learning_rate": 0.00019481553503109892, "loss": 23.0, "step": 1742 }, { "epoch": 1.046218487394958, "grad_norm": 0.0006970618851482868, "learning_rate": 0.00019480952960601765, "loss": 23.0, "step": 1743 }, { "epoch": 1.0468187274909964, "grad_norm": 0.0005676356377080083, "learning_rate": 0.00019480352079742817, "loss": 23.0, "step": 1744 }, { "epoch": 1.047418967587035, "grad_norm": 0.000801927933935076, "learning_rate": 0.00019479750860554486, "loss": 23.0, "step": 1745 }, { "epoch": 1.0480192076830732, "grad_norm": 0.0004625970032066107, "learning_rate": 0.00019479149303058233, "loss": 23.0, "step": 1746 }, { "epoch": 1.0486194477791115, "grad_norm": 0.0007363795302808285, "learning_rate": 0.00019478547407275525, "loss": 23.0, "step": 1747 }, { "epoch": 1.04921968787515, "grad_norm": 0.0011515008518472314, "learning_rate": 0.00019477945173227844, "loss": 23.0, "step": 1748 }, { "epoch": 1.0498199279711884, "grad_norm": 0.0005580635624937713, "learning_rate": 0.00019477342600936675, "loss": 23.0, "step": 1749 }, { "epoch": 1.050420168067227, "grad_norm": 0.0010533389868214726, "learning_rate": 0.00019476739690423532, "loss": 23.0, "step": 1750 }, { "epoch": 1.0510204081632653, "grad_norm": 0.0008852750179357827, "learning_rate": 0.00019476136441709923, "loss": 23.0, "step": 1751 }, { "epoch": 1.0516206482593038, "grad_norm": 0.0007593745249323547, "learning_rate": 0.0001947553285481738, "loss": 23.0, "step": 1752 }, { "epoch": 1.052220888355342, "grad_norm": 0.00030658565810881555, "learning_rate": 0.00019474928929767444, "loss": 23.0, "step": 1753 }, { "epoch": 1.0528211284513807, "grad_norm": 0.0006529420497827232, "learning_rate": 0.0001947432466658167, "loss": 23.0, "step": 1754 }, { "epoch": 1.053421368547419, "grad_norm": 0.0011519229738041759, "learning_rate": 0.00019473720065281614, "loss": 23.0, "step": 1755 }, { "epoch": 1.0540216086434573, "grad_norm": 0.0003251166781410575, "learning_rate": 0.00019473115125888862, "loss": 23.0, "step": 1756 }, { "epoch": 1.0546218487394958, "grad_norm": 0.00099608872551471, "learning_rate": 0.00019472509848424997, "loss": 23.0, "step": 1757 }, { "epoch": 1.0552220888355341, "grad_norm": 0.0007561391685158014, "learning_rate": 0.00019471904232911622, "loss": 23.0, "step": 1758 }, { "epoch": 1.0558223289315727, "grad_norm": 0.0008905903669074178, "learning_rate": 0.00019471298279370353, "loss": 23.0, "step": 1759 }, { "epoch": 1.056422569027611, "grad_norm": 0.0009993897983804345, "learning_rate": 0.0001947069198782281, "loss": 23.0, "step": 1760 }, { "epoch": 1.0570228091236495, "grad_norm": 0.0006743833655491471, "learning_rate": 0.00019470085358290628, "loss": 23.0, "step": 1761 }, { "epoch": 1.0576230492196879, "grad_norm": 0.0009488465148024261, "learning_rate": 0.0001946947839079546, "loss": 23.0, "step": 1762 }, { "epoch": 1.0582232893157264, "grad_norm": 0.000561807188205421, "learning_rate": 0.0001946887108535897, "loss": 23.0, "step": 1763 }, { "epoch": 1.0588235294117647, "grad_norm": 0.0009906566701829433, "learning_rate": 0.0001946826344200283, "loss": 23.0, "step": 1764 }, { "epoch": 1.059423769507803, "grad_norm": 0.0008896515937522054, "learning_rate": 0.00019467655460748718, "loss": 23.0, "step": 1765 }, { "epoch": 1.0600240096038416, "grad_norm": 0.0005601485027000308, "learning_rate": 0.00019467047141618336, "loss": 23.0, "step": 1766 }, { "epoch": 1.06062424969988, "grad_norm": 0.0007029306143522263, "learning_rate": 0.00019466438484633396, "loss": 23.0, "step": 1767 }, { "epoch": 1.0612244897959184, "grad_norm": 0.0009415207314305007, "learning_rate": 0.00019465829489815614, "loss": 23.0, "step": 1768 }, { "epoch": 1.0618247298919568, "grad_norm": 0.0005582883604802191, "learning_rate": 0.0001946522015718673, "loss": 23.0, "step": 1769 }, { "epoch": 1.0624249699879953, "grad_norm": 0.0004074526659678668, "learning_rate": 0.00019464610486768485, "loss": 23.0, "step": 1770 }, { "epoch": 1.0630252100840336, "grad_norm": 0.0006803267169743776, "learning_rate": 0.00019464000478582638, "loss": 23.0, "step": 1771 }, { "epoch": 1.063625450180072, "grad_norm": 0.0007196204387582839, "learning_rate": 0.0001946339013265096, "loss": 23.0, "step": 1772 }, { "epoch": 1.0642256902761105, "grad_norm": 0.00023397163022309542, "learning_rate": 0.00019462779448995227, "loss": 23.0, "step": 1773 }, { "epoch": 1.0648259303721488, "grad_norm": 0.0004155267379246652, "learning_rate": 0.00019462168427637237, "loss": 23.0, "step": 1774 }, { "epoch": 1.0654261704681873, "grad_norm": 0.0004314890829846263, "learning_rate": 0.00019461557068598797, "loss": 23.0, "step": 1775 }, { "epoch": 1.0660264105642256, "grad_norm": 0.0005188965005800128, "learning_rate": 0.00019460945371901724, "loss": 23.0, "step": 1776 }, { "epoch": 1.0666266506602642, "grad_norm": 0.00037539261393249035, "learning_rate": 0.00019460333337567844, "loss": 23.0, "step": 1777 }, { "epoch": 1.0672268907563025, "grad_norm": 0.001011545886285603, "learning_rate": 0.00019459720965619005, "loss": 23.0, "step": 1778 }, { "epoch": 1.0678271308523408, "grad_norm": 0.0004321189771872014, "learning_rate": 0.0001945910825607706, "loss": 23.0, "step": 1779 }, { "epoch": 1.0684273709483794, "grad_norm": 0.0005004126578569412, "learning_rate": 0.0001945849520896387, "loss": 23.0, "step": 1780 }, { "epoch": 1.0690276110444177, "grad_norm": 0.0024395366199314594, "learning_rate": 0.00019457881824301313, "loss": 23.0, "step": 1781 }, { "epoch": 1.0696278511404562, "grad_norm": 0.0013693496584892273, "learning_rate": 0.00019457268102111288, "loss": 23.0, "step": 1782 }, { "epoch": 1.0702280912364945, "grad_norm": 0.0010166800348088145, "learning_rate": 0.00019456654042415686, "loss": 23.0, "step": 1783 }, { "epoch": 1.070828331332533, "grad_norm": 0.0006024075555615127, "learning_rate": 0.00019456039645236433, "loss": 23.0, "step": 1784 }, { "epoch": 1.0714285714285714, "grad_norm": 0.0007192663033492863, "learning_rate": 0.00019455424910595445, "loss": 23.0, "step": 1785 }, { "epoch": 1.07202881152461, "grad_norm": 0.0006474677356891334, "learning_rate": 0.00019454809838514664, "loss": 23.0, "step": 1786 }, { "epoch": 1.0726290516206483, "grad_norm": 0.0006114196730777621, "learning_rate": 0.00019454194429016042, "loss": 23.0, "step": 1787 }, { "epoch": 1.0732292917166866, "grad_norm": 0.0007615411304868758, "learning_rate": 0.00019453578682121538, "loss": 23.0, "step": 1788 }, { "epoch": 1.0738295318127251, "grad_norm": 0.0015807717572897673, "learning_rate": 0.00019452962597853133, "loss": 23.0, "step": 1789 }, { "epoch": 1.0744297719087634, "grad_norm": 0.000809942779596895, "learning_rate": 0.00019452346176232802, "loss": 23.0, "step": 1790 }, { "epoch": 1.075030012004802, "grad_norm": 0.0013856550212949514, "learning_rate": 0.00019451729417282557, "loss": 23.0, "step": 1791 }, { "epoch": 1.0756302521008403, "grad_norm": 0.0016725269379094243, "learning_rate": 0.00019451112321024398, "loss": 23.0, "step": 1792 }, { "epoch": 1.0762304921968788, "grad_norm": 0.0009614219306968153, "learning_rate": 0.0001945049488748035, "loss": 23.0, "step": 1793 }, { "epoch": 1.0768307322929171, "grad_norm": 0.0008319414919242263, "learning_rate": 0.00019449877116672453, "loss": 23.0, "step": 1794 }, { "epoch": 1.0774309723889557, "grad_norm": 0.0004205653676763177, "learning_rate": 0.00019449259008622746, "loss": 23.0, "step": 1795 }, { "epoch": 1.078031212484994, "grad_norm": 0.0007004413055256009, "learning_rate": 0.00019448640563353293, "loss": 23.0, "step": 1796 }, { "epoch": 1.0786314525810323, "grad_norm": 0.001981280744075775, "learning_rate": 0.00019448021780886165, "loss": 23.0, "step": 1797 }, { "epoch": 1.0792316926770709, "grad_norm": 0.0008109554764814675, "learning_rate": 0.0001944740266124344, "loss": 23.0, "step": 1798 }, { "epoch": 1.0798319327731092, "grad_norm": 0.0008627360220998526, "learning_rate": 0.00019446783204447217, "loss": 23.0, "step": 1799 }, { "epoch": 1.0804321728691477, "grad_norm": 0.00029498786898329854, "learning_rate": 0.00019446163410519603, "loss": 23.0, "step": 1800 }, { "epoch": 1.081032412965186, "grad_norm": 0.0010635429061949253, "learning_rate": 0.00019445543279482714, "loss": 23.0, "step": 1801 }, { "epoch": 1.0816326530612246, "grad_norm": 0.0012698029167950153, "learning_rate": 0.00019444922811358683, "loss": 23.0, "step": 1802 }, { "epoch": 1.082232893157263, "grad_norm": 0.0006329349125735462, "learning_rate": 0.0001944430200616965, "loss": 23.0, "step": 1803 }, { "epoch": 1.0828331332533012, "grad_norm": 0.0003994033904746175, "learning_rate": 0.0001944368086393777, "loss": 23.0, "step": 1804 }, { "epoch": 1.0834333733493398, "grad_norm": 0.0010559451766312122, "learning_rate": 0.00019443059384685217, "loss": 23.0, "step": 1805 }, { "epoch": 1.084033613445378, "grad_norm": 0.0007147497381083667, "learning_rate": 0.0001944243756843416, "loss": 23.0, "step": 1806 }, { "epoch": 1.0846338535414166, "grad_norm": 0.0006631044088862836, "learning_rate": 0.00019441815415206798, "loss": 23.0, "step": 1807 }, { "epoch": 1.085234093637455, "grad_norm": 0.0013852432603016496, "learning_rate": 0.00019441192925025334, "loss": 23.0, "step": 1808 }, { "epoch": 1.0858343337334935, "grad_norm": 0.0013341652229428291, "learning_rate": 0.00019440570097911975, "loss": 23.0, "step": 1809 }, { "epoch": 1.0864345738295318, "grad_norm": 0.000731373846065253, "learning_rate": 0.00019439946933888958, "loss": 23.0, "step": 1810 }, { "epoch": 1.0870348139255703, "grad_norm": 0.0006150943809188902, "learning_rate": 0.00019439323432978513, "loss": 23.0, "step": 1811 }, { "epoch": 1.0876350540216086, "grad_norm": 0.0007697598193772137, "learning_rate": 0.00019438699595202897, "loss": 23.0, "step": 1812 }, { "epoch": 1.088235294117647, "grad_norm": 0.000452876411145553, "learning_rate": 0.00019438075420584374, "loss": 23.0, "step": 1813 }, { "epoch": 1.0888355342136855, "grad_norm": 0.0010431616101413965, "learning_rate": 0.00019437450909145213, "loss": 23.0, "step": 1814 }, { "epoch": 1.0894357743097238, "grad_norm": 0.00121380016207695, "learning_rate": 0.0001943682606090771, "loss": 23.0, "step": 1815 }, { "epoch": 1.0900360144057624, "grad_norm": 0.0017278955783694983, "learning_rate": 0.00019436200875894155, "loss": 23.0, "step": 1816 }, { "epoch": 1.0906362545018007, "grad_norm": 0.0006101586041040719, "learning_rate": 0.00019435575354126864, "loss": 23.0, "step": 1817 }, { "epoch": 1.0912364945978392, "grad_norm": 0.000790750200394541, "learning_rate": 0.0001943494949562816, "loss": 23.0, "step": 1818 }, { "epoch": 1.0918367346938775, "grad_norm": 0.000308141577988863, "learning_rate": 0.00019434323300420377, "loss": 23.0, "step": 1819 }, { "epoch": 1.092436974789916, "grad_norm": 0.0013721545692533255, "learning_rate": 0.00019433696768525867, "loss": 23.0, "step": 1820 }, { "epoch": 1.0930372148859544, "grad_norm": 0.0009004747844301164, "learning_rate": 0.00019433069899966982, "loss": 23.0, "step": 1821 }, { "epoch": 1.0936374549819927, "grad_norm": 0.0007536576013080776, "learning_rate": 0.000194324426947661, "loss": 23.0, "step": 1822 }, { "epoch": 1.0942376950780313, "grad_norm": 0.0009083595359697938, "learning_rate": 0.000194318151529456, "loss": 23.0, "step": 1823 }, { "epoch": 1.0948379351740696, "grad_norm": 0.0006454298854805529, "learning_rate": 0.0001943118727452788, "loss": 23.0, "step": 1824 }, { "epoch": 1.0954381752701081, "grad_norm": 0.000571611279156059, "learning_rate": 0.00019430559059535344, "loss": 23.0, "step": 1825 }, { "epoch": 1.0960384153661464, "grad_norm": 0.000725729507394135, "learning_rate": 0.00019429930507990415, "loss": 23.0, "step": 1826 }, { "epoch": 1.096638655462185, "grad_norm": 0.0006706914282403886, "learning_rate": 0.00019429301619915522, "loss": 23.0, "step": 1827 }, { "epoch": 1.0972388955582233, "grad_norm": 0.0010118356440216303, "learning_rate": 0.00019428672395333112, "loss": 23.0, "step": 1828 }, { "epoch": 1.0978391356542616, "grad_norm": 0.0009120289469137788, "learning_rate": 0.00019428042834265634, "loss": 23.0, "step": 1829 }, { "epoch": 1.0984393757503002, "grad_norm": 0.0008227874641306698, "learning_rate": 0.00019427412936735562, "loss": 23.0, "step": 1830 }, { "epoch": 1.0990396158463385, "grad_norm": 0.00032032793387770653, "learning_rate": 0.00019426782702765366, "loss": 23.0, "step": 1831 }, { "epoch": 1.099639855942377, "grad_norm": 0.0006032648961991072, "learning_rate": 0.0001942615213237755, "loss": 23.0, "step": 1832 }, { "epoch": 1.1002400960384153, "grad_norm": 0.0007296554394997656, "learning_rate": 0.00019425521225594608, "loss": 23.0, "step": 1833 }, { "epoch": 1.1008403361344539, "grad_norm": 0.000931189744733274, "learning_rate": 0.00019424889982439062, "loss": 23.0, "step": 1834 }, { "epoch": 1.1014405762304922, "grad_norm": 0.001093988073989749, "learning_rate": 0.00019424258402933435, "loss": 23.0, "step": 1835 }, { "epoch": 1.1020408163265305, "grad_norm": 0.0012053363025188446, "learning_rate": 0.00019423626487100268, "loss": 23.0, "step": 1836 }, { "epoch": 1.102641056422569, "grad_norm": 0.0007283682352863252, "learning_rate": 0.00019422994234962113, "loss": 23.0, "step": 1837 }, { "epoch": 1.1032412965186074, "grad_norm": 0.0008196471026167274, "learning_rate": 0.00019422361646541532, "loss": 23.0, "step": 1838 }, { "epoch": 1.103841536614646, "grad_norm": 0.000663764018099755, "learning_rate": 0.00019421728721861098, "loss": 23.0, "step": 1839 }, { "epoch": 1.1044417767106842, "grad_norm": 0.00034105373197235167, "learning_rate": 0.00019421095460943406, "loss": 23.0, "step": 1840 }, { "epoch": 1.1050420168067228, "grad_norm": 0.00047400983748957515, "learning_rate": 0.00019420461863811048, "loss": 23.0, "step": 1841 }, { "epoch": 1.105642256902761, "grad_norm": 0.00045720263733528554, "learning_rate": 0.0001941982793048664, "loss": 23.0, "step": 1842 }, { "epoch": 1.1062424969987996, "grad_norm": 0.001074103289283812, "learning_rate": 0.00019419193660992801, "loss": 23.0, "step": 1843 }, { "epoch": 1.106842737094838, "grad_norm": 0.00114881107583642, "learning_rate": 0.00019418559055352176, "loss": 23.0, "step": 1844 }, { "epoch": 1.1074429771908763, "grad_norm": 0.00042918327380903065, "learning_rate": 0.000194179241135874, "loss": 23.0, "step": 1845 }, { "epoch": 1.1080432172869148, "grad_norm": 0.0016334144165739417, "learning_rate": 0.00019417288835721136, "loss": 23.0, "step": 1846 }, { "epoch": 1.1086434573829531, "grad_norm": 0.0006906944909133017, "learning_rate": 0.00019416653221776065, "loss": 23.0, "step": 1847 }, { "epoch": 1.1092436974789917, "grad_norm": 0.0012676967307925224, "learning_rate": 0.00019416017271774858, "loss": 23.0, "step": 1848 }, { "epoch": 1.10984393757503, "grad_norm": 0.0007337264833040535, "learning_rate": 0.00019415380985740214, "loss": 23.0, "step": 1849 }, { "epoch": 1.1104441776710685, "grad_norm": 0.0017202390590682626, "learning_rate": 0.00019414744363694845, "loss": 23.0, "step": 1850 }, { "epoch": 1.1110444177671068, "grad_norm": 0.0005216122954152524, "learning_rate": 0.00019414107405661468, "loss": 23.0, "step": 1851 }, { "epoch": 1.1116446578631454, "grad_norm": 0.0006443992024287581, "learning_rate": 0.00019413470111662813, "loss": 23.0, "step": 1852 }, { "epoch": 1.1122448979591837, "grad_norm": 0.0009258884820155799, "learning_rate": 0.00019412832481721624, "loss": 23.0, "step": 1853 }, { "epoch": 1.112845138055222, "grad_norm": 0.0015777413500472903, "learning_rate": 0.00019412194515860653, "loss": 23.0, "step": 1854 }, { "epoch": 1.1134453781512605, "grad_norm": 0.0007551502203568816, "learning_rate": 0.00019411556214102676, "loss": 23.0, "step": 1855 }, { "epoch": 1.1140456182472989, "grad_norm": 0.0004909979179501534, "learning_rate": 0.0001941091757647046, "loss": 23.0, "step": 1856 }, { "epoch": 1.1146458583433374, "grad_norm": 0.0002796545159071684, "learning_rate": 0.00019410278602986813, "loss": 23.0, "step": 1857 }, { "epoch": 1.1152460984393757, "grad_norm": 0.0010796081041917205, "learning_rate": 0.00019409639293674525, "loss": 23.0, "step": 1858 }, { "epoch": 1.1158463385354143, "grad_norm": 0.00046648201532661915, "learning_rate": 0.0001940899964855641, "loss": 23.0, "step": 1859 }, { "epoch": 1.1164465786314526, "grad_norm": 0.000706287391949445, "learning_rate": 0.00019408359667655306, "loss": 23.0, "step": 1860 }, { "epoch": 1.117046818727491, "grad_norm": 0.0010415187571197748, "learning_rate": 0.00019407719350994046, "loss": 23.0, "step": 1861 }, { "epoch": 1.1176470588235294, "grad_norm": 0.0007331138476729393, "learning_rate": 0.0001940707869859548, "loss": 23.0, "step": 1862 }, { "epoch": 1.1182472989195678, "grad_norm": 0.00044027238618582487, "learning_rate": 0.00019406437710482475, "loss": 23.0, "step": 1863 }, { "epoch": 1.1188475390156063, "grad_norm": 0.0006217750487849116, "learning_rate": 0.00019405796386677904, "loss": 23.0, "step": 1864 }, { "epoch": 1.1194477791116446, "grad_norm": 0.0009194968733936548, "learning_rate": 0.00019405154727204652, "loss": 23.0, "step": 1865 }, { "epoch": 1.1200480192076832, "grad_norm": 0.000699158466886729, "learning_rate": 0.00019404512732085625, "loss": 23.0, "step": 1866 }, { "epoch": 1.1206482593037215, "grad_norm": 0.0007176969083957374, "learning_rate": 0.00019403870401343726, "loss": 23.0, "step": 1867 }, { "epoch": 1.1212484993997598, "grad_norm": 0.0007284717867150903, "learning_rate": 0.00019403227735001886, "loss": 23.0, "step": 1868 }, { "epoch": 1.1218487394957983, "grad_norm": 0.0010808160295709968, "learning_rate": 0.00019402584733083032, "loss": 23.0, "step": 1869 }, { "epoch": 1.1224489795918366, "grad_norm": 0.0016216632211580873, "learning_rate": 0.00019401941395610117, "loss": 23.0, "step": 1870 }, { "epoch": 1.1230492196878752, "grad_norm": 0.0007159097003750503, "learning_rate": 0.00019401297722606098, "loss": 23.0, "step": 1871 }, { "epoch": 1.1236494597839135, "grad_norm": 0.0017748618265613914, "learning_rate": 0.00019400653714093946, "loss": 23.0, "step": 1872 }, { "epoch": 1.124249699879952, "grad_norm": 0.0015833675861358643, "learning_rate": 0.00019400009370096645, "loss": 23.0, "step": 1873 }, { "epoch": 1.1248499399759904, "grad_norm": 0.00064207857940346, "learning_rate": 0.0001939936469063719, "loss": 23.0, "step": 1874 }, { "epoch": 1.125450180072029, "grad_norm": 0.0004406536172609776, "learning_rate": 0.00019398719675738587, "loss": 23.0, "step": 1875 }, { "epoch": 1.1260504201680672, "grad_norm": 0.00028265893342904747, "learning_rate": 0.00019398074325423855, "loss": 23.0, "step": 1876 }, { "epoch": 1.1266506602641058, "grad_norm": 0.003184243803843856, "learning_rate": 0.00019397428639716022, "loss": 23.0, "step": 1877 }, { "epoch": 1.127250900360144, "grad_norm": 0.0008285728981718421, "learning_rate": 0.00019396782618638137, "loss": 23.0, "step": 1878 }, { "epoch": 1.1278511404561824, "grad_norm": 0.001333149615675211, "learning_rate": 0.00019396136262213253, "loss": 23.0, "step": 1879 }, { "epoch": 1.128451380552221, "grad_norm": 0.0005505660083144903, "learning_rate": 0.00019395489570464433, "loss": 23.0, "step": 1880 }, { "epoch": 1.1290516206482593, "grad_norm": 0.0003775360237341374, "learning_rate": 0.0001939484254341476, "loss": 23.0, "step": 1881 }, { "epoch": 1.1296518607442978, "grad_norm": 0.001226317137479782, "learning_rate": 0.0001939419518108732, "loss": 23.0, "step": 1882 }, { "epoch": 1.1302521008403361, "grad_norm": 0.0006789937033317983, "learning_rate": 0.00019393547483505222, "loss": 23.0, "step": 1883 }, { "epoch": 1.1308523409363747, "grad_norm": 0.001176654826849699, "learning_rate": 0.00019392899450691576, "loss": 23.0, "step": 1884 }, { "epoch": 1.131452581032413, "grad_norm": 0.0007635357324033976, "learning_rate": 0.00019392251082669512, "loss": 23.0, "step": 1885 }, { "epoch": 1.1320528211284513, "grad_norm": 0.001103962305933237, "learning_rate": 0.00019391602379462163, "loss": 23.0, "step": 1886 }, { "epoch": 1.1326530612244898, "grad_norm": 0.0004466601531021297, "learning_rate": 0.00019390953341092687, "loss": 23.0, "step": 1887 }, { "epoch": 1.1332533013205282, "grad_norm": 0.0006208834820427, "learning_rate": 0.0001939030396758424, "loss": 23.0, "step": 1888 }, { "epoch": 1.1338535414165667, "grad_norm": 0.0007072006119415164, "learning_rate": 0.0001938965425896, "loss": 23.0, "step": 1889 }, { "epoch": 1.134453781512605, "grad_norm": 0.0006387155153788626, "learning_rate": 0.00019389004215243153, "loss": 23.0, "step": 1890 }, { "epoch": 1.1350540216086435, "grad_norm": 0.0009332761983387172, "learning_rate": 0.00019388353836456896, "loss": 23.0, "step": 1891 }, { "epoch": 1.1356542617046819, "grad_norm": 0.0007608407759107649, "learning_rate": 0.00019387703122624442, "loss": 23.0, "step": 1892 }, { "epoch": 1.1362545018007202, "grad_norm": 0.00044079619692638516, "learning_rate": 0.0001938705207376901, "loss": 23.0, "step": 1893 }, { "epoch": 1.1368547418967587, "grad_norm": 0.0009360851254314184, "learning_rate": 0.00019386400689913838, "loss": 23.0, "step": 1894 }, { "epoch": 1.137454981992797, "grad_norm": 0.0009462125017307699, "learning_rate": 0.00019385748971082167, "loss": 23.0, "step": 1895 }, { "epoch": 1.1380552220888356, "grad_norm": 0.0005419541266746819, "learning_rate": 0.00019385096917297258, "loss": 23.0, "step": 1896 }, { "epoch": 1.138655462184874, "grad_norm": 0.0010163012193515897, "learning_rate": 0.00019384444528582385, "loss": 23.0, "step": 1897 }, { "epoch": 1.1392557022809124, "grad_norm": 0.0006951330578885972, "learning_rate": 0.00019383791804960824, "loss": 23.0, "step": 1898 }, { "epoch": 1.1398559423769508, "grad_norm": 0.0006850925274193287, "learning_rate": 0.00019383138746455872, "loss": 23.0, "step": 1899 }, { "epoch": 1.140456182472989, "grad_norm": 0.0007126698037609458, "learning_rate": 0.00019382485353090832, "loss": 23.0, "step": 1900 }, { "epoch": 1.1410564225690276, "grad_norm": 0.0005496939411386847, "learning_rate": 0.00019381831624889028, "loss": 23.0, "step": 1901 }, { "epoch": 1.141656662665066, "grad_norm": 0.0005483221611939371, "learning_rate": 0.00019381177561873786, "loss": 23.0, "step": 1902 }, { "epoch": 1.1422569027611045, "grad_norm": 0.000862023385707289, "learning_rate": 0.00019380523164068446, "loss": 23.0, "step": 1903 }, { "epoch": 1.1428571428571428, "grad_norm": 0.0019896167796105146, "learning_rate": 0.00019379868431496365, "loss": 23.0, "step": 1904 }, { "epoch": 1.1434573829531813, "grad_norm": 0.0011680411407724023, "learning_rate": 0.00019379213364180908, "loss": 23.0, "step": 1905 }, { "epoch": 1.1440576230492197, "grad_norm": 0.0008858213550411165, "learning_rate": 0.0001937855796214545, "loss": 23.0, "step": 1906 }, { "epoch": 1.1446578631452582, "grad_norm": 0.0010865547228604555, "learning_rate": 0.00019377902225413385, "loss": 23.0, "step": 1907 }, { "epoch": 1.1452581032412965, "grad_norm": 0.0010592964245006442, "learning_rate": 0.00019377246154008112, "loss": 23.0, "step": 1908 }, { "epoch": 1.145858343337335, "grad_norm": 0.000688974279910326, "learning_rate": 0.00019376589747953046, "loss": 23.0, "step": 1909 }, { "epoch": 1.1464585834333734, "grad_norm": 0.0005789738497696817, "learning_rate": 0.00019375933007271612, "loss": 23.0, "step": 1910 }, { "epoch": 1.1470588235294117, "grad_norm": 0.0003665656258817762, "learning_rate": 0.0001937527593198724, "loss": 23.0, "step": 1911 }, { "epoch": 1.1476590636254502, "grad_norm": 0.001227127038873732, "learning_rate": 0.00019374618522123395, "loss": 23.0, "step": 1912 }, { "epoch": 1.1482593037214885, "grad_norm": 0.0005941904382780194, "learning_rate": 0.00019373960777703525, "loss": 23.0, "step": 1913 }, { "epoch": 1.148859543817527, "grad_norm": 0.0007471052813343704, "learning_rate": 0.0001937330269875111, "loss": 23.0, "step": 1914 }, { "epoch": 1.1494597839135654, "grad_norm": 0.0004938453203067183, "learning_rate": 0.0001937264428528963, "loss": 23.0, "step": 1915 }, { "epoch": 1.150060024009604, "grad_norm": 0.0009274558979086578, "learning_rate": 0.00019371985537342586, "loss": 23.0, "step": 1916 }, { "epoch": 1.1506602641056423, "grad_norm": 0.0015852872747927904, "learning_rate": 0.00019371326454933484, "loss": 23.0, "step": 1917 }, { "epoch": 1.1512605042016806, "grad_norm": 0.0007189033203758299, "learning_rate": 0.00019370667038085847, "loss": 23.0, "step": 1918 }, { "epoch": 1.1518607442977191, "grad_norm": 0.0006547678494825959, "learning_rate": 0.0001937000728682321, "loss": 23.0, "step": 1919 }, { "epoch": 1.1524609843937574, "grad_norm": 0.0006517032161355019, "learning_rate": 0.00019369347201169114, "loss": 23.0, "step": 1920 }, { "epoch": 1.153061224489796, "grad_norm": 0.0009650959400460124, "learning_rate": 0.00019368686781147116, "loss": 23.0, "step": 1921 }, { "epoch": 1.1536614645858343, "grad_norm": 0.0007866573869250715, "learning_rate": 0.00019368026026780788, "loss": 23.0, "step": 1922 }, { "epoch": 1.1542617046818728, "grad_norm": 0.0005117384134791791, "learning_rate": 0.0001936736493809371, "loss": 23.0, "step": 1923 }, { "epoch": 1.1548619447779112, "grad_norm": 0.0005935249500907958, "learning_rate": 0.0001936670351510947, "loss": 23.0, "step": 1924 }, { "epoch": 1.1554621848739495, "grad_norm": 0.00046214647591114044, "learning_rate": 0.00019366041757851675, "loss": 23.0, "step": 1925 }, { "epoch": 1.156062424969988, "grad_norm": 0.0011201126035302877, "learning_rate": 0.00019365379666343944, "loss": 23.0, "step": 1926 }, { "epoch": 1.1566626650660263, "grad_norm": 0.000578420062083751, "learning_rate": 0.00019364717240609905, "loss": 23.0, "step": 1927 }, { "epoch": 1.1572629051620649, "grad_norm": 0.0009162966744042933, "learning_rate": 0.00019364054480673195, "loss": 23.0, "step": 1928 }, { "epoch": 1.1578631452581032, "grad_norm": 0.0007997804204933345, "learning_rate": 0.00019363391386557465, "loss": 23.0, "step": 1929 }, { "epoch": 1.1584633853541417, "grad_norm": 0.0005844663246534765, "learning_rate": 0.00019362727958286384, "loss": 23.0, "step": 1930 }, { "epoch": 1.15906362545018, "grad_norm": 0.0007434054859913886, "learning_rate": 0.00019362064195883627, "loss": 23.0, "step": 1931 }, { "epoch": 1.1596638655462184, "grad_norm": 0.001075836829841137, "learning_rate": 0.0001936140009937288, "loss": 23.0, "step": 1932 }, { "epoch": 1.160264105642257, "grad_norm": 0.0012545333011075854, "learning_rate": 0.00019360735668777843, "loss": 23.0, "step": 1933 }, { "epoch": 1.1608643457382952, "grad_norm": 0.000714300200343132, "learning_rate": 0.00019360070904122232, "loss": 23.0, "step": 1934 }, { "epoch": 1.1614645858343338, "grad_norm": 0.0012586622033268213, "learning_rate": 0.00019359405805429765, "loss": 23.0, "step": 1935 }, { "epoch": 1.162064825930372, "grad_norm": 0.0007662964635528624, "learning_rate": 0.0001935874037272418, "loss": 23.0, "step": 1936 }, { "epoch": 1.1626650660264106, "grad_norm": 0.0007426751544699073, "learning_rate": 0.00019358074606029227, "loss": 23.0, "step": 1937 }, { "epoch": 1.163265306122449, "grad_norm": 0.0007442353526130319, "learning_rate": 0.0001935740850536866, "loss": 23.0, "step": 1938 }, { "epoch": 1.1638655462184875, "grad_norm": 0.0003253078320994973, "learning_rate": 0.00019356742070766254, "loss": 23.0, "step": 1939 }, { "epoch": 1.1644657863145258, "grad_norm": 0.0004222916322760284, "learning_rate": 0.0001935607530224579, "loss": 23.0, "step": 1940 }, { "epoch": 1.1650660264105643, "grad_norm": 0.0008304398506879807, "learning_rate": 0.0001935540819983107, "loss": 23.0, "step": 1941 }, { "epoch": 1.1656662665066027, "grad_norm": 0.0008976273820735514, "learning_rate": 0.00019354740763545894, "loss": 23.0, "step": 1942 }, { "epoch": 1.166266506602641, "grad_norm": 0.0006581676425412297, "learning_rate": 0.00019354072993414082, "loss": 23.0, "step": 1943 }, { "epoch": 1.1668667466986795, "grad_norm": 0.0009220499778166413, "learning_rate": 0.0001935340488945947, "loss": 23.0, "step": 1944 }, { "epoch": 1.1674669867947178, "grad_norm": 0.0015496306587010622, "learning_rate": 0.00019352736451705896, "loss": 23.0, "step": 1945 }, { "epoch": 1.1680672268907564, "grad_norm": 0.001087090466171503, "learning_rate": 0.00019352067680177213, "loss": 23.0, "step": 1946 }, { "epoch": 1.1686674669867947, "grad_norm": 0.00046333111822605133, "learning_rate": 0.00019351398574897293, "loss": 23.0, "step": 1947 }, { "epoch": 1.1692677070828332, "grad_norm": 0.0005021764663979411, "learning_rate": 0.00019350729135890013, "loss": 23.0, "step": 1948 }, { "epoch": 1.1698679471788715, "grad_norm": 0.0011674378765746951, "learning_rate": 0.00019350059363179264, "loss": 23.0, "step": 1949 }, { "epoch": 1.1704681872749099, "grad_norm": 0.0008699194295331836, "learning_rate": 0.00019349389256788943, "loss": 23.0, "step": 1950 }, { "epoch": 1.1710684273709484, "grad_norm": 0.0007664512377232313, "learning_rate": 0.00019348718816742974, "loss": 23.0, "step": 1951 }, { "epoch": 1.1716686674669867, "grad_norm": 0.0016348707722499967, "learning_rate": 0.00019348048043065278, "loss": 23.0, "step": 1952 }, { "epoch": 1.1722689075630253, "grad_norm": 0.0011596891563385725, "learning_rate": 0.0001934737693577979, "loss": 23.0, "step": 1953 }, { "epoch": 1.1728691476590636, "grad_norm": 0.0011909161694347858, "learning_rate": 0.00019346705494910466, "loss": 23.0, "step": 1954 }, { "epoch": 1.1734693877551021, "grad_norm": 0.0013877099845558405, "learning_rate": 0.00019346033720481264, "loss": 23.0, "step": 1955 }, { "epoch": 1.1740696278511404, "grad_norm": 0.0008768134284764528, "learning_rate": 0.0001934536161251616, "loss": 23.0, "step": 1956 }, { "epoch": 1.1746698679471788, "grad_norm": 0.0006206629332154989, "learning_rate": 0.0001934468917103914, "loss": 23.0, "step": 1957 }, { "epoch": 1.1752701080432173, "grad_norm": 0.0009947314392775297, "learning_rate": 0.000193440163960742, "loss": 23.0, "step": 1958 }, { "epoch": 1.1758703481392556, "grad_norm": 0.0003389040066394955, "learning_rate": 0.0001934334328764535, "loss": 23.0, "step": 1959 }, { "epoch": 1.1764705882352942, "grad_norm": 0.0008335797465406358, "learning_rate": 0.00019342669845776614, "loss": 23.0, "step": 1960 }, { "epoch": 1.1770708283313325, "grad_norm": 0.0015772458864375949, "learning_rate": 0.00019341996070492022, "loss": 23.0, "step": 1961 }, { "epoch": 1.177671068427371, "grad_norm": 0.0009163774084299803, "learning_rate": 0.0001934132196181562, "loss": 23.0, "step": 1962 }, { "epoch": 1.1782713085234093, "grad_norm": 0.001706429524347186, "learning_rate": 0.00019340647519771467, "loss": 23.0, "step": 1963 }, { "epoch": 1.1788715486194479, "grad_norm": 0.0012178387260064483, "learning_rate": 0.0001933997274438363, "loss": 23.0, "step": 1964 }, { "epoch": 1.1794717887154862, "grad_norm": 0.00025741333956830204, "learning_rate": 0.00019339297635676195, "loss": 23.0, "step": 1965 }, { "epoch": 1.1800720288115247, "grad_norm": 0.002575822174549103, "learning_rate": 0.00019338622193673245, "loss": 23.0, "step": 1966 }, { "epoch": 1.180672268907563, "grad_norm": 0.0012099726591259241, "learning_rate": 0.00019337946418398897, "loss": 23.0, "step": 1967 }, { "epoch": 1.1812725090036014, "grad_norm": 0.000488203571876511, "learning_rate": 0.00019337270309877257, "loss": 23.0, "step": 1968 }, { "epoch": 1.18187274909964, "grad_norm": 0.0011559462873265147, "learning_rate": 0.00019336593868132457, "loss": 23.0, "step": 1969 }, { "epoch": 1.1824729891956782, "grad_norm": 0.001535662915557623, "learning_rate": 0.0001933591709318864, "loss": 23.0, "step": 1970 }, { "epoch": 1.1830732292917168, "grad_norm": 0.0009023676975630224, "learning_rate": 0.0001933523998506996, "loss": 23.0, "step": 1971 }, { "epoch": 1.183673469387755, "grad_norm": 0.0007497956976294518, "learning_rate": 0.00019334562543800573, "loss": 23.0, "step": 1972 }, { "epoch": 1.1842737094837936, "grad_norm": 0.0006753331399522722, "learning_rate": 0.00019333884769404661, "loss": 23.0, "step": 1973 }, { "epoch": 1.184873949579832, "grad_norm": 0.0019581643864512444, "learning_rate": 0.00019333206661906416, "loss": 23.0, "step": 1974 }, { "epoch": 1.1854741896758703, "grad_norm": 0.001169833354651928, "learning_rate": 0.00019332528221330028, "loss": 23.0, "step": 1975 }, { "epoch": 1.1860744297719088, "grad_norm": 0.0015272792661562562, "learning_rate": 0.00019331849447699716, "loss": 23.0, "step": 1976 }, { "epoch": 1.1866746698679471, "grad_norm": 0.0005761996726505458, "learning_rate": 0.000193311703410397, "loss": 23.0, "step": 1977 }, { "epoch": 1.1872749099639857, "grad_norm": 0.0010293567320331931, "learning_rate": 0.0001933049090137422, "loss": 23.0, "step": 1978 }, { "epoch": 1.187875150060024, "grad_norm": 0.0007244606385938823, "learning_rate": 0.0001932981112872752, "loss": 23.0, "step": 1979 }, { "epoch": 1.1884753901560625, "grad_norm": 0.00049425387987867, "learning_rate": 0.0001932913102312386, "loss": 23.0, "step": 1980 }, { "epoch": 1.1890756302521008, "grad_norm": 0.0006668847054243088, "learning_rate": 0.00019328450584587506, "loss": 23.0, "step": 1981 }, { "epoch": 1.1896758703481392, "grad_norm": 0.0004593547491822392, "learning_rate": 0.00019327769813142753, "loss": 23.0, "step": 1982 }, { "epoch": 1.1902761104441777, "grad_norm": 0.0003765747824218124, "learning_rate": 0.00019327088708813886, "loss": 23.0, "step": 1983 }, { "epoch": 1.190876350540216, "grad_norm": 0.0006263687973842025, "learning_rate": 0.00019326407271625217, "loss": 23.0, "step": 1984 }, { "epoch": 1.1914765906362546, "grad_norm": 0.0006472663953900337, "learning_rate": 0.0001932572550160106, "loss": 23.0, "step": 1985 }, { "epoch": 1.1920768307322929, "grad_norm": 0.0017092872876673937, "learning_rate": 0.0001932504339876575, "loss": 23.0, "step": 1986 }, { "epoch": 1.1926770708283314, "grad_norm": 0.000995015725493431, "learning_rate": 0.0001932436096314363, "loss": 23.0, "step": 1987 }, { "epoch": 1.1932773109243697, "grad_norm": 0.0012725737178698182, "learning_rate": 0.00019323678194759048, "loss": 23.0, "step": 1988 }, { "epoch": 1.193877551020408, "grad_norm": 0.0013512310106307268, "learning_rate": 0.00019322995093636375, "loss": 23.0, "step": 1989 }, { "epoch": 1.1944777911164466, "grad_norm": 0.0010033018188551068, "learning_rate": 0.0001932231165979999, "loss": 23.0, "step": 1990 }, { "epoch": 1.195078031212485, "grad_norm": 0.0009326778817921877, "learning_rate": 0.00019321627893274286, "loss": 23.0, "step": 1991 }, { "epoch": 1.1956782713085234, "grad_norm": 0.0008911388576962054, "learning_rate": 0.00019320943794083655, "loss": 23.0, "step": 1992 }, { "epoch": 1.1962785114045618, "grad_norm": 0.0008909364696592093, "learning_rate": 0.0001932025936225252, "loss": 23.0, "step": 1993 }, { "epoch": 1.1968787515006003, "grad_norm": 0.0005291649722494185, "learning_rate": 0.000193195745978053, "loss": 23.0, "step": 1994 }, { "epoch": 1.1974789915966386, "grad_norm": 0.0015328566078096628, "learning_rate": 0.00019318889500766437, "loss": 23.0, "step": 1995 }, { "epoch": 1.1980792316926772, "grad_norm": 0.0018019595881924033, "learning_rate": 0.0001931820407116038, "loss": 23.0, "step": 1996 }, { "epoch": 1.1986794717887155, "grad_norm": 0.0018812305061146617, "learning_rate": 0.00019317518309011587, "loss": 23.0, "step": 1997 }, { "epoch": 1.199279711884754, "grad_norm": 0.0010826725047081709, "learning_rate": 0.00019316832214344532, "loss": 23.0, "step": 1998 }, { "epoch": 1.1998799519807923, "grad_norm": 0.0004899448831565678, "learning_rate": 0.00019316145787183703, "loss": 23.0, "step": 1999 }, { "epoch": 1.2004801920768307, "grad_norm": 0.0009604006190784276, "learning_rate": 0.00019315459027553593, "loss": 23.0, "step": 2000 }, { "epoch": 1.2004801920768307, "eval_loss": 11.5, "eval_runtime": 5.481, "eval_samples_per_second": 255.976, "eval_steps_per_second": 32.111, "step": 2000 }, { "epoch": 1.2010804321728692, "grad_norm": 0.00026419636560603976, "learning_rate": 0.00019314771935478715, "loss": 23.0, "step": 2001 }, { "epoch": 1.2016806722689075, "grad_norm": 0.001335683511570096, "learning_rate": 0.00019314084510983586, "loss": 23.0, "step": 2002 }, { "epoch": 1.202280912364946, "grad_norm": 0.0013580817030742764, "learning_rate": 0.00019313396754092738, "loss": 23.0, "step": 2003 }, { "epoch": 1.2028811524609844, "grad_norm": 0.0006976149743422866, "learning_rate": 0.0001931270866483072, "loss": 23.0, "step": 2004 }, { "epoch": 1.203481392557023, "grad_norm": 0.0003821998252533376, "learning_rate": 0.00019312020243222083, "loss": 23.0, "step": 2005 }, { "epoch": 1.2040816326530612, "grad_norm": 0.0007348189828917384, "learning_rate": 0.00019311331489291398, "loss": 23.0, "step": 2006 }, { "epoch": 1.2046818727490995, "grad_norm": 0.0006483017932623625, "learning_rate": 0.00019310642403063243, "loss": 23.0, "step": 2007 }, { "epoch": 1.205282112845138, "grad_norm": 0.0008024135604500771, "learning_rate": 0.0001930995298456221, "loss": 23.0, "step": 2008 }, { "epoch": 1.2058823529411764, "grad_norm": 0.0016785354819148779, "learning_rate": 0.00019309263233812903, "loss": 23.0, "step": 2009 }, { "epoch": 1.206482593037215, "grad_norm": 0.0009075549314729869, "learning_rate": 0.00019308573150839936, "loss": 23.0, "step": 2010 }, { "epoch": 1.2070828331332533, "grad_norm": 0.0004748840001411736, "learning_rate": 0.00019307882735667938, "loss": 23.0, "step": 2011 }, { "epoch": 1.2076830732292918, "grad_norm": 0.0007885516388341784, "learning_rate": 0.0001930719198832155, "loss": 23.0, "step": 2012 }, { "epoch": 1.2082833133253301, "grad_norm": 0.0006227381527423859, "learning_rate": 0.0001930650090882542, "loss": 23.0, "step": 2013 }, { "epoch": 1.2088835534213684, "grad_norm": 0.0008232907857745886, "learning_rate": 0.00019305809497204216, "loss": 23.0, "step": 2014 }, { "epoch": 1.209483793517407, "grad_norm": 0.0010013766586780548, "learning_rate": 0.00019305117753482602, "loss": 23.0, "step": 2015 }, { "epoch": 1.2100840336134453, "grad_norm": 0.0007637909147888422, "learning_rate": 0.00019304425677685273, "loss": 23.0, "step": 2016 }, { "epoch": 1.2106842737094838, "grad_norm": 0.000717534392606467, "learning_rate": 0.00019303733269836925, "loss": 23.0, "step": 2017 }, { "epoch": 1.2112845138055222, "grad_norm": 0.0015243000816553831, "learning_rate": 0.0001930304052996227, "loss": 23.0, "step": 2018 }, { "epoch": 1.2118847539015607, "grad_norm": 0.0008644640329293907, "learning_rate": 0.0001930234745808603, "loss": 23.0, "step": 2019 }, { "epoch": 1.212484993997599, "grad_norm": 0.0006340988329611719, "learning_rate": 0.00019301654054232933, "loss": 23.0, "step": 2020 }, { "epoch": 1.2130852340936373, "grad_norm": 0.0020876338239759207, "learning_rate": 0.00019300960318427735, "loss": 23.0, "step": 2021 }, { "epoch": 1.2136854741896759, "grad_norm": 0.0013070470886304975, "learning_rate": 0.00019300266250695184, "loss": 23.0, "step": 2022 }, { "epoch": 1.2142857142857142, "grad_norm": 0.00044245924800634384, "learning_rate": 0.00019299571851060058, "loss": 23.0, "step": 2023 }, { "epoch": 1.2148859543817527, "grad_norm": 0.0024676304310560226, "learning_rate": 0.0001929887711954713, "loss": 23.0, "step": 2024 }, { "epoch": 1.215486194477791, "grad_norm": 0.0019125710241496563, "learning_rate": 0.00019298182056181197, "loss": 23.0, "step": 2025 }, { "epoch": 1.2160864345738296, "grad_norm": 0.001551152323372662, "learning_rate": 0.00019297486660987064, "loss": 23.0, "step": 2026 }, { "epoch": 1.216686674669868, "grad_norm": 0.0012455354444682598, "learning_rate": 0.00019296790933989554, "loss": 23.0, "step": 2027 }, { "epoch": 1.2172869147659064, "grad_norm": 0.0008667007205076516, "learning_rate": 0.00019296094875213485, "loss": 23.0, "step": 2028 }, { "epoch": 1.2178871548619448, "grad_norm": 0.0018421221757307649, "learning_rate": 0.000192953984846837, "loss": 23.0, "step": 2029 }, { "epoch": 1.2184873949579833, "grad_norm": 0.0005994903040118515, "learning_rate": 0.0001929470176242506, "loss": 23.0, "step": 2030 }, { "epoch": 1.2190876350540216, "grad_norm": 0.0008131373324431479, "learning_rate": 0.00019294004708462418, "loss": 23.0, "step": 2031 }, { "epoch": 1.21968787515006, "grad_norm": 0.0005514805088751018, "learning_rate": 0.00019293307322820654, "loss": 23.0, "step": 2032 }, { "epoch": 1.2202881152460985, "grad_norm": 0.002355314092710614, "learning_rate": 0.0001929260960552466, "loss": 23.0, "step": 2033 }, { "epoch": 1.2208883553421368, "grad_norm": 0.0011875891359522939, "learning_rate": 0.00019291911556599333, "loss": 23.0, "step": 2034 }, { "epoch": 1.2214885954381753, "grad_norm": 0.001237823162227869, "learning_rate": 0.00019291213176069586, "loss": 23.0, "step": 2035 }, { "epoch": 1.2220888355342137, "grad_norm": 0.0003888348874170333, "learning_rate": 0.00019290514463960335, "loss": 23.0, "step": 2036 }, { "epoch": 1.2226890756302522, "grad_norm": 0.0008330006385222077, "learning_rate": 0.00019289815420296522, "loss": 23.0, "step": 2037 }, { "epoch": 1.2232893157262905, "grad_norm": 0.0006106719956733286, "learning_rate": 0.00019289116045103098, "loss": 23.0, "step": 2038 }, { "epoch": 1.2238895558223288, "grad_norm": 0.0008156050462275743, "learning_rate": 0.0001928841633840501, "loss": 23.0, "step": 2039 }, { "epoch": 1.2244897959183674, "grad_norm": 0.0006298265652731061, "learning_rate": 0.00019287716300227242, "loss": 23.0, "step": 2040 }, { "epoch": 1.2250900360144057, "grad_norm": 0.0007098076166585088, "learning_rate": 0.00019287015930594765, "loss": 23.0, "step": 2041 }, { "epoch": 1.2256902761104442, "grad_norm": 0.001036522793583572, "learning_rate": 0.00019286315229532578, "loss": 23.0, "step": 2042 }, { "epoch": 1.2262905162064826, "grad_norm": 0.0007545838598161936, "learning_rate": 0.00019285614197065692, "loss": 23.0, "step": 2043 }, { "epoch": 1.226890756302521, "grad_norm": 0.0007426731754094362, "learning_rate": 0.0001928491283321912, "loss": 23.0, "step": 2044 }, { "epoch": 1.2274909963985594, "grad_norm": 0.0010281699942424893, "learning_rate": 0.0001928421113801789, "loss": 23.0, "step": 2045 }, { "epoch": 1.2280912364945977, "grad_norm": 0.002106086118146777, "learning_rate": 0.00019283509111487048, "loss": 23.0, "step": 2046 }, { "epoch": 1.2286914765906363, "grad_norm": 0.0012783302227035165, "learning_rate": 0.00019282806753651645, "loss": 23.0, "step": 2047 }, { "epoch": 1.2292917166866746, "grad_norm": 0.0009432644001208246, "learning_rate": 0.00019282104064536747, "loss": 23.0, "step": 2048 }, { "epoch": 1.2298919567827131, "grad_norm": 0.0011243056505918503, "learning_rate": 0.00019281401044167433, "loss": 23.0, "step": 2049 }, { "epoch": 1.2304921968787514, "grad_norm": 0.0022574688773602247, "learning_rate": 0.0001928069769256879, "loss": 23.0, "step": 2050 }, { "epoch": 1.23109243697479, "grad_norm": 0.0010988580761477351, "learning_rate": 0.00019279994009765918, "loss": 23.0, "step": 2051 }, { "epoch": 1.2316926770708283, "grad_norm": 0.0012815982336178422, "learning_rate": 0.00019279289995783932, "loss": 23.0, "step": 2052 }, { "epoch": 1.2322929171668668, "grad_norm": 0.0008450283785350621, "learning_rate": 0.00019278585650647957, "loss": 23.0, "step": 2053 }, { "epoch": 1.2328931572629052, "grad_norm": 0.0011646057246252894, "learning_rate": 0.00019277880974383124, "loss": 23.0, "step": 2054 }, { "epoch": 1.2334933973589437, "grad_norm": 0.00048065732698887587, "learning_rate": 0.00019277175967014586, "loss": 23.0, "step": 2055 }, { "epoch": 1.234093637454982, "grad_norm": 0.0006098438170738518, "learning_rate": 0.000192764706285675, "loss": 23.0, "step": 2056 }, { "epoch": 1.2346938775510203, "grad_norm": 0.0008195044356398284, "learning_rate": 0.00019275764959067042, "loss": 23.0, "step": 2057 }, { "epoch": 1.2352941176470589, "grad_norm": 0.000680581375490874, "learning_rate": 0.0001927505895853839, "loss": 23.0, "step": 2058 }, { "epoch": 1.2358943577430972, "grad_norm": 0.00044709868961945176, "learning_rate": 0.00019274352627006745, "loss": 23.0, "step": 2059 }, { "epoch": 1.2364945978391357, "grad_norm": 0.0006181981298141181, "learning_rate": 0.0001927364596449731, "loss": 23.0, "step": 2060 }, { "epoch": 1.237094837935174, "grad_norm": 0.0008112747455015779, "learning_rate": 0.00019272938971035304, "loss": 23.0, "step": 2061 }, { "epoch": 1.2376950780312126, "grad_norm": 0.001151968608610332, "learning_rate": 0.0001927223164664596, "loss": 23.0, "step": 2062 }, { "epoch": 1.238295318127251, "grad_norm": 0.000988475512713194, "learning_rate": 0.00019271523991354518, "loss": 23.0, "step": 2063 }, { "epoch": 1.2388955582232892, "grad_norm": 0.0005085468292236328, "learning_rate": 0.00019270816005186237, "loss": 23.0, "step": 2064 }, { "epoch": 1.2394957983193278, "grad_norm": 0.0015622277278453112, "learning_rate": 0.00019270107688166378, "loss": 23.0, "step": 2065 }, { "epoch": 1.240096038415366, "grad_norm": 0.00020089696045033634, "learning_rate": 0.0001926939904032022, "loss": 23.0, "step": 2066 }, { "epoch": 1.2406962785114046, "grad_norm": 0.0010697637917473912, "learning_rate": 0.00019268690061673057, "loss": 23.0, "step": 2067 }, { "epoch": 1.241296518607443, "grad_norm": 0.0005508419126272202, "learning_rate": 0.00019267980752250185, "loss": 23.0, "step": 2068 }, { "epoch": 1.2418967587034815, "grad_norm": 0.0016626521246507764, "learning_rate": 0.00019267271112076922, "loss": 23.0, "step": 2069 }, { "epoch": 1.2424969987995198, "grad_norm": 0.0008421835373155773, "learning_rate": 0.0001926656114117859, "loss": 23.0, "step": 2070 }, { "epoch": 1.2430972388955581, "grad_norm": 0.0008170657092705369, "learning_rate": 0.00019265850839580528, "loss": 23.0, "step": 2071 }, { "epoch": 1.2436974789915967, "grad_norm": 0.0004373532719910145, "learning_rate": 0.00019265140207308084, "loss": 23.0, "step": 2072 }, { "epoch": 1.244297719087635, "grad_norm": 0.0011121369898319244, "learning_rate": 0.0001926442924438662, "loss": 23.0, "step": 2073 }, { "epoch": 1.2448979591836735, "grad_norm": 0.002869250951334834, "learning_rate": 0.00019263717950841506, "loss": 23.0, "step": 2074 }, { "epoch": 1.2454981992797118, "grad_norm": 0.0016094837337732315, "learning_rate": 0.00019263006326698126, "loss": 23.0, "step": 2075 }, { "epoch": 1.2460984393757504, "grad_norm": 0.002325515728443861, "learning_rate": 0.00019262294371981878, "loss": 23.0, "step": 2076 }, { "epoch": 1.2466986794717887, "grad_norm": 0.0007525268592871726, "learning_rate": 0.0001926158208671817, "loss": 23.0, "step": 2077 }, { "epoch": 1.247298919567827, "grad_norm": 0.001686456729657948, "learning_rate": 0.0001926086947093242, "loss": 23.0, "step": 2078 }, { "epoch": 1.2478991596638656, "grad_norm": 0.0010857430752366781, "learning_rate": 0.00019260156524650063, "loss": 23.0, "step": 2079 }, { "epoch": 1.2484993997599039, "grad_norm": 0.002079520607367158, "learning_rate": 0.00019259443247896538, "loss": 23.0, "step": 2080 }, { "epoch": 1.2490996398559424, "grad_norm": 0.0006012477679178119, "learning_rate": 0.00019258729640697303, "loss": 23.0, "step": 2081 }, { "epoch": 1.2496998799519807, "grad_norm": 0.000783745723310858, "learning_rate": 0.0001925801570307782, "loss": 23.0, "step": 2082 }, { "epoch": 1.2503001200480193, "grad_norm": 0.0021253866143524647, "learning_rate": 0.00019257301435063573, "loss": 23.0, "step": 2083 }, { "epoch": 1.2509003601440576, "grad_norm": 0.0007235134835354984, "learning_rate": 0.00019256586836680054, "loss": 23.0, "step": 2084 }, { "epoch": 1.251500600240096, "grad_norm": 0.0009064192418009043, "learning_rate": 0.00019255871907952757, "loss": 23.0, "step": 2085 }, { "epoch": 1.2521008403361344, "grad_norm": 0.00042548036435618997, "learning_rate": 0.00019255156648907202, "loss": 23.0, "step": 2086 }, { "epoch": 1.252701080432173, "grad_norm": 0.0005528698093257844, "learning_rate": 0.0001925444105956891, "loss": 23.0, "step": 2087 }, { "epoch": 1.2533013205282113, "grad_norm": 0.00168812635820359, "learning_rate": 0.00019253725139963427, "loss": 23.0, "step": 2088 }, { "epoch": 1.2539015606242496, "grad_norm": 0.0014038488734513521, "learning_rate": 0.00019253008890116296, "loss": 23.0, "step": 2089 }, { "epoch": 1.2545018007202882, "grad_norm": 0.0008206113707274199, "learning_rate": 0.00019252292310053079, "loss": 23.0, "step": 2090 }, { "epoch": 1.2551020408163265, "grad_norm": 0.0004761457093991339, "learning_rate": 0.00019251575399799347, "loss": 23.0, "step": 2091 }, { "epoch": 1.255702280912365, "grad_norm": 0.001002299482934177, "learning_rate": 0.0001925085815938069, "loss": 23.0, "step": 2092 }, { "epoch": 1.2563025210084033, "grad_norm": 0.0009327147854492068, "learning_rate": 0.00019250140588822698, "loss": 23.0, "step": 2093 }, { "epoch": 1.2569027611044419, "grad_norm": 0.0012192872818559408, "learning_rate": 0.00019249422688150984, "loss": 23.0, "step": 2094 }, { "epoch": 1.2575030012004802, "grad_norm": 0.0009881231235340238, "learning_rate": 0.00019248704457391168, "loss": 23.0, "step": 2095 }, { "epoch": 1.2581032412965185, "grad_norm": 0.001701731001958251, "learning_rate": 0.0001924798589656888, "loss": 23.0, "step": 2096 }, { "epoch": 1.258703481392557, "grad_norm": 0.0018471120856702328, "learning_rate": 0.00019247267005709763, "loss": 23.0, "step": 2097 }, { "epoch": 1.2593037214885954, "grad_norm": 0.0005706116207875311, "learning_rate": 0.00019246547784839473, "loss": 23.0, "step": 2098 }, { "epoch": 1.259903961584634, "grad_norm": 0.002569433068856597, "learning_rate": 0.0001924582823398368, "loss": 23.0, "step": 2099 }, { "epoch": 1.2605042016806722, "grad_norm": 0.0016303047304973006, "learning_rate": 0.00019245108353168057, "loss": 23.0, "step": 2100 }, { "epoch": 1.2611044417767108, "grad_norm": 0.0008211960666812956, "learning_rate": 0.000192443881424183, "loss": 23.0, "step": 2101 }, { "epoch": 1.261704681872749, "grad_norm": 0.002193295396864414, "learning_rate": 0.0001924366760176011, "loss": 23.0, "step": 2102 }, { "epoch": 1.2623049219687874, "grad_norm": 0.0011547781759873033, "learning_rate": 0.000192429467312192, "loss": 23.0, "step": 2103 }, { "epoch": 1.262905162064826, "grad_norm": 0.0009378142422065139, "learning_rate": 0.00019242225530821297, "loss": 23.0, "step": 2104 }, { "epoch": 1.2635054021608643, "grad_norm": 0.0003709706070367247, "learning_rate": 0.00019241504000592138, "loss": 23.0, "step": 2105 }, { "epoch": 1.2641056422569028, "grad_norm": 0.0010277227265760303, "learning_rate": 0.00019240782140557476, "loss": 23.0, "step": 2106 }, { "epoch": 1.2647058823529411, "grad_norm": 0.001920362701639533, "learning_rate": 0.00019240059950743067, "loss": 23.0, "step": 2107 }, { "epoch": 1.2653061224489797, "grad_norm": 0.002360978629440069, "learning_rate": 0.00019239337431174687, "loss": 23.0, "step": 2108 }, { "epoch": 1.265906362545018, "grad_norm": 0.0010547456331551075, "learning_rate": 0.00019238614581878121, "loss": 23.0, "step": 2109 }, { "epoch": 1.2665066026410563, "grad_norm": 0.000743915035855025, "learning_rate": 0.0001923789140287916, "loss": 23.0, "step": 2110 }, { "epoch": 1.2671068427370948, "grad_norm": 0.001828330336138606, "learning_rate": 0.00019237167894203626, "loss": 23.0, "step": 2111 }, { "epoch": 1.2677070828331334, "grad_norm": 0.0018606131197884679, "learning_rate": 0.00019236444055877326, "loss": 23.0, "step": 2112 }, { "epoch": 1.2683073229291717, "grad_norm": 0.0015011877985671163, "learning_rate": 0.000192357198879261, "loss": 23.0, "step": 2113 }, { "epoch": 1.26890756302521, "grad_norm": 0.0007122366223484278, "learning_rate": 0.00019234995390375785, "loss": 23.0, "step": 2114 }, { "epoch": 1.2695078031212486, "grad_norm": 0.0007740426226519048, "learning_rate": 0.00019234270563252238, "loss": 23.0, "step": 2115 }, { "epoch": 1.2701080432172869, "grad_norm": 0.0010612767655402422, "learning_rate": 0.00019233545406581332, "loss": 23.0, "step": 2116 }, { "epoch": 1.2707082833133252, "grad_norm": 0.0016505690291523933, "learning_rate": 0.0001923281992038894, "loss": 23.0, "step": 2117 }, { "epoch": 1.2713085234093637, "grad_norm": 0.0023043276742100716, "learning_rate": 0.00019232094104700958, "loss": 23.0, "step": 2118 }, { "epoch": 1.2719087635054023, "grad_norm": 0.001076166401617229, "learning_rate": 0.00019231367959543283, "loss": 23.0, "step": 2119 }, { "epoch": 1.2725090036014406, "grad_norm": 0.0007836589938960969, "learning_rate": 0.0001923064148494183, "loss": 23.0, "step": 2120 }, { "epoch": 1.273109243697479, "grad_norm": 0.0007020245539024472, "learning_rate": 0.00019229914680922528, "loss": 23.0, "step": 2121 }, { "epoch": 1.2737094837935174, "grad_norm": 0.0008544962620362639, "learning_rate": 0.00019229187547511312, "loss": 23.0, "step": 2122 }, { "epoch": 1.2743097238895558, "grad_norm": 0.0005639759474433959, "learning_rate": 0.0001922846008473413, "loss": 23.0, "step": 2123 }, { "epoch": 1.2749099639855943, "grad_norm": 0.0004858477332163602, "learning_rate": 0.00019227732292616953, "loss": 23.0, "step": 2124 }, { "epoch": 1.2755102040816326, "grad_norm": 0.0009530950337648392, "learning_rate": 0.00019227004171185744, "loss": 23.0, "step": 2125 }, { "epoch": 1.2761104441776712, "grad_norm": 0.0008700921316631138, "learning_rate": 0.0001922627572046649, "loss": 23.0, "step": 2126 }, { "epoch": 1.2767106842737095, "grad_norm": 0.00051358831115067, "learning_rate": 0.00019225546940485186, "loss": 23.0, "step": 2127 }, { "epoch": 1.2773109243697478, "grad_norm": 0.0018087092321366072, "learning_rate": 0.00019224817831267849, "loss": 23.0, "step": 2128 }, { "epoch": 1.2779111644657863, "grad_norm": 0.0012789365136995912, "learning_rate": 0.00019224088392840488, "loss": 23.0, "step": 2129 }, { "epoch": 1.2785114045618247, "grad_norm": 0.0012055577244609594, "learning_rate": 0.0001922335862522914, "loss": 23.0, "step": 2130 }, { "epoch": 1.2791116446578632, "grad_norm": 0.0004859871114604175, "learning_rate": 0.00019222628528459847, "loss": 23.0, "step": 2131 }, { "epoch": 1.2797118847539015, "grad_norm": 0.002568140160292387, "learning_rate": 0.00019221898102558666, "loss": 23.0, "step": 2132 }, { "epoch": 1.28031212484994, "grad_norm": 0.0009791409829631448, "learning_rate": 0.00019221167347551664, "loss": 23.0, "step": 2133 }, { "epoch": 1.2809123649459784, "grad_norm": 0.0005456181825138628, "learning_rate": 0.00019220436263464922, "loss": 23.0, "step": 2134 }, { "epoch": 1.2815126050420167, "grad_norm": 0.000637693505268544, "learning_rate": 0.0001921970485032452, "loss": 23.0, "step": 2135 }, { "epoch": 1.2821128451380552, "grad_norm": 0.0009480795706622303, "learning_rate": 0.00019218973108156573, "loss": 23.0, "step": 2136 }, { "epoch": 1.2827130852340938, "grad_norm": 0.0003983963106293231, "learning_rate": 0.0001921824103698719, "loss": 23.0, "step": 2137 }, { "epoch": 1.283313325330132, "grad_norm": 0.0008890287135727704, "learning_rate": 0.00019217508636842494, "loss": 23.0, "step": 2138 }, { "epoch": 1.2839135654261704, "grad_norm": 0.0007761778542771935, "learning_rate": 0.00019216775907748628, "loss": 23.0, "step": 2139 }, { "epoch": 1.284513805522209, "grad_norm": 0.0007144435658119619, "learning_rate": 0.00019216042849731736, "loss": 23.0, "step": 2140 }, { "epoch": 1.2851140456182473, "grad_norm": 0.001242082566022873, "learning_rate": 0.00019215309462817983, "loss": 23.0, "step": 2141 }, { "epoch": 1.2857142857142856, "grad_norm": 0.0006997886812314391, "learning_rate": 0.00019214575747033538, "loss": 23.0, "step": 2142 }, { "epoch": 1.2863145258103241, "grad_norm": 0.0010159621015191078, "learning_rate": 0.0001921384170240459, "loss": 23.0, "step": 2143 }, { "epoch": 1.2869147659063627, "grad_norm": 0.0007167985895648599, "learning_rate": 0.00019213107328957328, "loss": 23.0, "step": 2144 }, { "epoch": 1.287515006002401, "grad_norm": 0.0008636672864668071, "learning_rate": 0.0001921237262671797, "loss": 23.0, "step": 2145 }, { "epoch": 1.2881152460984393, "grad_norm": 0.0005083580617792904, "learning_rate": 0.00019211637595712726, "loss": 23.0, "step": 2146 }, { "epoch": 1.2887154861944778, "grad_norm": 0.0004971363814547658, "learning_rate": 0.0001921090223596783, "loss": 23.0, "step": 2147 }, { "epoch": 1.2893157262905162, "grad_norm": 0.0008241168106906116, "learning_rate": 0.0001921016654750953, "loss": 23.0, "step": 2148 }, { "epoch": 1.2899159663865547, "grad_norm": 0.001977398758754134, "learning_rate": 0.00019209430530364076, "loss": 23.0, "step": 2149 }, { "epoch": 1.290516206482593, "grad_norm": 0.0014348250115290284, "learning_rate": 0.00019208694184557736, "loss": 23.0, "step": 2150 }, { "epoch": 1.2911164465786316, "grad_norm": 0.0009256337652914226, "learning_rate": 0.00019207957510116786, "loss": 23.0, "step": 2151 }, { "epoch": 1.2917166866746699, "grad_norm": 0.0022123123053461313, "learning_rate": 0.00019207220507067523, "loss": 23.0, "step": 2152 }, { "epoch": 1.2923169267707082, "grad_norm": 0.0014656918356195092, "learning_rate": 0.00019206483175436242, "loss": 23.0, "step": 2153 }, { "epoch": 1.2929171668667467, "grad_norm": 0.0016659768298268318, "learning_rate": 0.00019205745515249255, "loss": 23.0, "step": 2154 }, { "epoch": 1.293517406962785, "grad_norm": 0.000796042731963098, "learning_rate": 0.00019205007526532894, "loss": 23.0, "step": 2155 }, { "epoch": 1.2941176470588236, "grad_norm": 0.000784591306000948, "learning_rate": 0.00019204269209313493, "loss": 23.0, "step": 2156 }, { "epoch": 1.294717887154862, "grad_norm": 0.0017622907180339098, "learning_rate": 0.000192035305636174, "loss": 23.0, "step": 2157 }, { "epoch": 1.2953181272509005, "grad_norm": 0.0008178095449693501, "learning_rate": 0.0001920279158947097, "loss": 23.0, "step": 2158 }, { "epoch": 1.2959183673469388, "grad_norm": 0.0005034728674218059, "learning_rate": 0.00019202052286900587, "loss": 23.0, "step": 2159 }, { "epoch": 1.296518607442977, "grad_norm": 0.00046888680662959814, "learning_rate": 0.00019201312655932627, "loss": 23.0, "step": 2160 }, { "epoch": 1.2971188475390156, "grad_norm": 0.0005256042932160199, "learning_rate": 0.00019200572696593485, "loss": 23.0, "step": 2161 }, { "epoch": 1.297719087635054, "grad_norm": 0.0023607781622558832, "learning_rate": 0.00019199832408909573, "loss": 23.0, "step": 2162 }, { "epoch": 1.2983193277310925, "grad_norm": 0.0007398081361316144, "learning_rate": 0.00019199091792907304, "loss": 23.0, "step": 2163 }, { "epoch": 1.2989195678271308, "grad_norm": 0.0015953012043610215, "learning_rate": 0.00019198350848613114, "loss": 23.0, "step": 2164 }, { "epoch": 1.2995198079231693, "grad_norm": 0.000722983677405864, "learning_rate": 0.0001919760957605344, "loss": 23.0, "step": 2165 }, { "epoch": 1.3001200480192077, "grad_norm": 0.0008493372588418424, "learning_rate": 0.00019196867975254742, "loss": 23.0, "step": 2166 }, { "epoch": 1.300720288115246, "grad_norm": 0.0012202919460833073, "learning_rate": 0.00019196126046243484, "loss": 23.0, "step": 2167 }, { "epoch": 1.3013205282112845, "grad_norm": 0.0005322351935319602, "learning_rate": 0.0001919538378904614, "loss": 23.0, "step": 2168 }, { "epoch": 1.301920768307323, "grad_norm": 0.0007932167500257492, "learning_rate": 0.00019194641203689204, "loss": 23.0, "step": 2169 }, { "epoch": 1.3025210084033614, "grad_norm": 0.0009172199061140418, "learning_rate": 0.00019193898290199174, "loss": 23.0, "step": 2170 }, { "epoch": 1.3031212484993997, "grad_norm": 0.0008713841089047492, "learning_rate": 0.00019193155048602564, "loss": 23.0, "step": 2171 }, { "epoch": 1.3037214885954382, "grad_norm": 0.0030673607252538204, "learning_rate": 0.000191924114789259, "loss": 23.0, "step": 2172 }, { "epoch": 1.3043217286914766, "grad_norm": 0.0025866650976240635, "learning_rate": 0.00019191667581195713, "loss": 23.0, "step": 2173 }, { "epoch": 1.3049219687875149, "grad_norm": 0.000421585253207013, "learning_rate": 0.00019190923355438556, "loss": 23.0, "step": 2174 }, { "epoch": 1.3055222088835534, "grad_norm": 0.00038466870319098234, "learning_rate": 0.00019190178801680984, "loss": 23.0, "step": 2175 }, { "epoch": 1.306122448979592, "grad_norm": 0.001396143576130271, "learning_rate": 0.0001918943391994957, "loss": 23.0, "step": 2176 }, { "epoch": 1.3067226890756303, "grad_norm": 0.0006378819234669209, "learning_rate": 0.000191886887102709, "loss": 23.0, "step": 2177 }, { "epoch": 1.3073229291716686, "grad_norm": 0.0013305838219821453, "learning_rate": 0.00019187943172671563, "loss": 23.0, "step": 2178 }, { "epoch": 1.3079231692677071, "grad_norm": 0.0008714368450455368, "learning_rate": 0.00019187197307178171, "loss": 23.0, "step": 2179 }, { "epoch": 1.3085234093637454, "grad_norm": 0.0007764401962049305, "learning_rate": 0.00019186451113817338, "loss": 23.0, "step": 2180 }, { "epoch": 1.309123649459784, "grad_norm": 0.0008344615343958139, "learning_rate": 0.00019185704592615698, "loss": 23.0, "step": 2181 }, { "epoch": 1.3097238895558223, "grad_norm": 0.001748396665789187, "learning_rate": 0.00019184957743599888, "loss": 23.0, "step": 2182 }, { "epoch": 1.3103241296518608, "grad_norm": 0.0007260398706421256, "learning_rate": 0.0001918421056679656, "loss": 23.0, "step": 2183 }, { "epoch": 1.3109243697478992, "grad_norm": 0.0010761874727904797, "learning_rate": 0.00019183463062232386, "loss": 23.0, "step": 2184 }, { "epoch": 1.3115246098439375, "grad_norm": 0.0006633952725678682, "learning_rate": 0.0001918271522993403, "loss": 23.0, "step": 2185 }, { "epoch": 1.312124849939976, "grad_norm": 0.00033936891122721136, "learning_rate": 0.00019181967069928198, "loss": 23.0, "step": 2186 }, { "epoch": 1.3127250900360143, "grad_norm": 0.0006078967708162963, "learning_rate": 0.00019181218582241572, "loss": 23.0, "step": 2187 }, { "epoch": 1.3133253301320529, "grad_norm": 0.0008051611948758364, "learning_rate": 0.00019180469766900878, "loss": 23.0, "step": 2188 }, { "epoch": 1.3139255702280912, "grad_norm": 0.0008598083513788879, "learning_rate": 0.0001917972062393283, "loss": 23.0, "step": 2189 }, { "epoch": 1.3145258103241297, "grad_norm": 0.0005701205227524042, "learning_rate": 0.00019178971153364167, "loss": 23.0, "step": 2190 }, { "epoch": 1.315126050420168, "grad_norm": 0.0019546288531273603, "learning_rate": 0.00019178221355221634, "loss": 23.0, "step": 2191 }, { "epoch": 1.3157262905162064, "grad_norm": 0.0011223703622817993, "learning_rate": 0.00019177471229531992, "loss": 23.0, "step": 2192 }, { "epoch": 1.316326530612245, "grad_norm": 0.0010649703908711672, "learning_rate": 0.00019176720776322006, "loss": 23.0, "step": 2193 }, { "epoch": 1.3169267707082832, "grad_norm": 0.0003854198439512402, "learning_rate": 0.00019175969995618463, "loss": 23.0, "step": 2194 }, { "epoch": 1.3175270108043218, "grad_norm": 0.0007395111606456339, "learning_rate": 0.00019175218887448154, "loss": 23.0, "step": 2195 }, { "epoch": 1.31812725090036, "grad_norm": 0.000572826131246984, "learning_rate": 0.00019174467451837883, "loss": 23.0, "step": 2196 }, { "epoch": 1.3187274909963986, "grad_norm": 0.0016554270405322313, "learning_rate": 0.00019173715688814468, "loss": 23.0, "step": 2197 }, { "epoch": 1.319327731092437, "grad_norm": 0.001633728388696909, "learning_rate": 0.00019172963598404737, "loss": 23.0, "step": 2198 }, { "epoch": 1.3199279711884753, "grad_norm": 0.0005962157738395035, "learning_rate": 0.00019172211180635534, "loss": 23.0, "step": 2199 }, { "epoch": 1.3205282112845138, "grad_norm": 0.0007311934023164213, "learning_rate": 0.00019171458435533707, "loss": 23.0, "step": 2200 }, { "epoch": 1.3211284513805523, "grad_norm": 0.0023586212191730738, "learning_rate": 0.00019170705363126122, "loss": 23.0, "step": 2201 }, { "epoch": 1.3217286914765907, "grad_norm": 0.0025315319653600454, "learning_rate": 0.00019169951963439647, "loss": 23.0, "step": 2202 }, { "epoch": 1.322328931572629, "grad_norm": 0.0017554480582475662, "learning_rate": 0.00019169198236501178, "loss": 23.0, "step": 2203 }, { "epoch": 1.3229291716686675, "grad_norm": 0.0008356471662409604, "learning_rate": 0.00019168444182337608, "loss": 23.0, "step": 2204 }, { "epoch": 1.3235294117647058, "grad_norm": 0.0005913945497013628, "learning_rate": 0.0001916768980097585, "loss": 23.0, "step": 2205 }, { "epoch": 1.3241296518607442, "grad_norm": 0.0014620408182963729, "learning_rate": 0.0001916693509244283, "loss": 23.0, "step": 2206 }, { "epoch": 1.3247298919567827, "grad_norm": 0.000512272643391043, "learning_rate": 0.0001916618005676547, "loss": 23.0, "step": 2207 }, { "epoch": 1.3253301320528212, "grad_norm": 0.0008890106109902263, "learning_rate": 0.00019165424693970724, "loss": 23.0, "step": 2208 }, { "epoch": 1.3259303721488596, "grad_norm": 0.0010873951250687242, "learning_rate": 0.00019164669004085547, "loss": 23.0, "step": 2209 }, { "epoch": 1.3265306122448979, "grad_norm": 0.0007970140432007611, "learning_rate": 0.00019163912987136907, "loss": 23.0, "step": 2210 }, { "epoch": 1.3271308523409364, "grad_norm": 0.00045438751112669706, "learning_rate": 0.00019163156643151787, "loss": 23.0, "step": 2211 }, { "epoch": 1.3277310924369747, "grad_norm": 0.001197776640765369, "learning_rate": 0.00019162399972157178, "loss": 23.0, "step": 2212 }, { "epoch": 1.3283313325330133, "grad_norm": 0.0003519585879985243, "learning_rate": 0.0001916164297418008, "loss": 23.0, "step": 2213 }, { "epoch": 1.3289315726290516, "grad_norm": 0.0005266814259812236, "learning_rate": 0.00019160885649247507, "loss": 23.0, "step": 2214 }, { "epoch": 1.3295318127250901, "grad_norm": 0.0012946182396262884, "learning_rate": 0.00019160127997386496, "loss": 23.0, "step": 2215 }, { "epoch": 1.3301320528211285, "grad_norm": 0.001070786383934319, "learning_rate": 0.00019159370018624077, "loss": 23.0, "step": 2216 }, { "epoch": 1.3307322929171668, "grad_norm": 0.0010140641825273633, "learning_rate": 0.000191586117129873, "loss": 23.0, "step": 2217 }, { "epoch": 1.3313325330132053, "grad_norm": 0.0005159311112947762, "learning_rate": 0.00019157853080503233, "loss": 23.0, "step": 2218 }, { "epoch": 1.3319327731092436, "grad_norm": 0.00089635094627738, "learning_rate": 0.0001915709412119895, "loss": 23.0, "step": 2219 }, { "epoch": 1.3325330132052822, "grad_norm": 0.0012715085176751018, "learning_rate": 0.00019156334835101525, "loss": 23.0, "step": 2220 }, { "epoch": 1.3331332533013205, "grad_norm": 0.0008847500430420041, "learning_rate": 0.00019155575222238068, "loss": 23.0, "step": 2221 }, { "epoch": 1.333733493397359, "grad_norm": 0.0015832451172173023, "learning_rate": 0.00019154815282635678, "loss": 23.0, "step": 2222 }, { "epoch": 1.3343337334933973, "grad_norm": 0.0008942346321418881, "learning_rate": 0.00019154055016321484, "loss": 23.0, "step": 2223 }, { "epoch": 1.3349339735894357, "grad_norm": 0.0011502058478072286, "learning_rate": 0.00019153294423322612, "loss": 23.0, "step": 2224 }, { "epoch": 1.3355342136854742, "grad_norm": 0.0013056400930508971, "learning_rate": 0.00019152533503666205, "loss": 23.0, "step": 2225 }, { "epoch": 1.3361344537815127, "grad_norm": 0.0015896232798695564, "learning_rate": 0.00019151772257379424, "loss": 23.0, "step": 2226 }, { "epoch": 1.336734693877551, "grad_norm": 0.0015102779725566506, "learning_rate": 0.0001915101068448943, "loss": 23.0, "step": 2227 }, { "epoch": 1.3373349339735894, "grad_norm": 0.001040091272443533, "learning_rate": 0.00019150248785023406, "loss": 23.0, "step": 2228 }, { "epoch": 1.337935174069628, "grad_norm": 0.0003573865396901965, "learning_rate": 0.00019149486559008535, "loss": 23.0, "step": 2229 }, { "epoch": 1.3385354141656662, "grad_norm": 0.00047850568080320954, "learning_rate": 0.0001914872400647203, "loss": 23.0, "step": 2230 }, { "epoch": 1.3391356542617046, "grad_norm": 0.0013392203254625201, "learning_rate": 0.00019147961127441093, "loss": 23.0, "step": 2231 }, { "epoch": 1.339735894357743, "grad_norm": 0.001735447091050446, "learning_rate": 0.00019147197921942958, "loss": 23.0, "step": 2232 }, { "epoch": 1.3403361344537816, "grad_norm": 0.001040574279613793, "learning_rate": 0.0001914643439000486, "loss": 23.0, "step": 2233 }, { "epoch": 1.34093637454982, "grad_norm": 0.0008239164599217474, "learning_rate": 0.00019145670531654045, "loss": 23.0, "step": 2234 }, { "epoch": 1.3415366146458583, "grad_norm": 0.0007725712493993342, "learning_rate": 0.00019144906346917772, "loss": 23.0, "step": 2235 }, { "epoch": 1.3421368547418968, "grad_norm": 0.001513920957222581, "learning_rate": 0.00019144141835823316, "loss": 23.0, "step": 2236 }, { "epoch": 1.3427370948379351, "grad_norm": 0.0009453982929699123, "learning_rate": 0.00019143376998397962, "loss": 23.0, "step": 2237 }, { "epoch": 1.3433373349339737, "grad_norm": 0.000519758730661124, "learning_rate": 0.00019142611834668997, "loss": 23.0, "step": 2238 }, { "epoch": 1.343937575030012, "grad_norm": 0.0008691302500665188, "learning_rate": 0.00019141846344663738, "loss": 23.0, "step": 2239 }, { "epoch": 1.3445378151260505, "grad_norm": 0.00117309985216707, "learning_rate": 0.00019141080528409495, "loss": 23.0, "step": 2240 }, { "epoch": 1.3451380552220888, "grad_norm": 0.0016505197854712605, "learning_rate": 0.00019140314385933608, "loss": 23.0, "step": 2241 }, { "epoch": 1.3457382953181272, "grad_norm": 0.001228606328368187, "learning_rate": 0.00019139547917263405, "loss": 23.0, "step": 2242 }, { "epoch": 1.3463385354141657, "grad_norm": 0.0015320234233513474, "learning_rate": 0.0001913878112242625, "loss": 23.0, "step": 2243 }, { "epoch": 1.346938775510204, "grad_norm": 0.0024812333285808563, "learning_rate": 0.00019138014001449503, "loss": 23.0, "step": 2244 }, { "epoch": 1.3475390156062426, "grad_norm": 0.001015107030980289, "learning_rate": 0.00019137246554360547, "loss": 23.0, "step": 2245 }, { "epoch": 1.3481392557022809, "grad_norm": 0.0019902249332517385, "learning_rate": 0.0001913647878118676, "loss": 23.0, "step": 2246 }, { "epoch": 1.3487394957983194, "grad_norm": 0.0008530864142812788, "learning_rate": 0.00019135710681955547, "loss": 23.0, "step": 2247 }, { "epoch": 1.3493397358943577, "grad_norm": 0.0009168292162939906, "learning_rate": 0.00019134942256694322, "loss": 23.0, "step": 2248 }, { "epoch": 1.349939975990396, "grad_norm": 0.0006323324050754309, "learning_rate": 0.00019134173505430506, "loss": 23.0, "step": 2249 }, { "epoch": 1.3505402160864346, "grad_norm": 0.002496844856068492, "learning_rate": 0.00019133404428191533, "loss": 23.0, "step": 2250 }, { "epoch": 1.351140456182473, "grad_norm": 0.0013751224614679813, "learning_rate": 0.0001913263502500485, "loss": 23.0, "step": 2251 }, { "epoch": 1.3517406962785115, "grad_norm": 0.0009248317801393569, "learning_rate": 0.00019131865295897912, "loss": 23.0, "step": 2252 }, { "epoch": 1.3523409363745498, "grad_norm": 0.0009484643815085292, "learning_rate": 0.00019131095240898196, "loss": 23.0, "step": 2253 }, { "epoch": 1.3529411764705883, "grad_norm": 0.00116802251432091, "learning_rate": 0.00019130324860033176, "loss": 23.0, "step": 2254 }, { "epoch": 1.3535414165666266, "grad_norm": 0.000703223398886621, "learning_rate": 0.0001912955415333035, "loss": 23.0, "step": 2255 }, { "epoch": 1.354141656662665, "grad_norm": 0.0020953151397407055, "learning_rate": 0.00019128783120817214, "loss": 23.0, "step": 2256 }, { "epoch": 1.3547418967587035, "grad_norm": 0.0014630835503339767, "learning_rate": 0.00019128011762521298, "loss": 23.0, "step": 2257 }, { "epoch": 1.355342136854742, "grad_norm": 0.002706082770600915, "learning_rate": 0.0001912724007847012, "loss": 23.0, "step": 2258 }, { "epoch": 1.3559423769507803, "grad_norm": 0.0006396888638846576, "learning_rate": 0.00019126468068691218, "loss": 23.0, "step": 2259 }, { "epoch": 1.3565426170468187, "grad_norm": 0.0011111192870885134, "learning_rate": 0.0001912569573321215, "loss": 23.0, "step": 2260 }, { "epoch": 1.3571428571428572, "grad_norm": 0.001541930716484785, "learning_rate": 0.00019124923072060476, "loss": 23.0, "step": 2261 }, { "epoch": 1.3577430972388955, "grad_norm": 0.0007236685487441719, "learning_rate": 0.0001912415008526377, "loss": 23.0, "step": 2262 }, { "epoch": 1.3583433373349338, "grad_norm": 0.0009044542675837874, "learning_rate": 0.00019123376772849614, "loss": 23.0, "step": 2263 }, { "epoch": 1.3589435774309724, "grad_norm": 0.0010049508418887854, "learning_rate": 0.00019122603134845613, "loss": 23.0, "step": 2264 }, { "epoch": 1.359543817527011, "grad_norm": 0.000888896465767175, "learning_rate": 0.00019121829171279368, "loss": 23.0, "step": 2265 }, { "epoch": 1.3601440576230492, "grad_norm": 0.0020322557538747787, "learning_rate": 0.0001912105488217851, "loss": 23.0, "step": 2266 }, { "epoch": 1.3607442977190876, "grad_norm": 0.0015879786806181073, "learning_rate": 0.0001912028026757066, "loss": 23.0, "step": 2267 }, { "epoch": 1.361344537815126, "grad_norm": 0.001246255007572472, "learning_rate": 0.0001911950532748347, "loss": 23.0, "step": 2268 }, { "epoch": 1.3619447779111644, "grad_norm": 0.0009220580104738474, "learning_rate": 0.00019118730061944592, "loss": 23.0, "step": 2269 }, { "epoch": 1.362545018007203, "grad_norm": 0.0006278387154452503, "learning_rate": 0.00019117954470981697, "loss": 23.0, "step": 2270 }, { "epoch": 1.3631452581032413, "grad_norm": 0.001516466960310936, "learning_rate": 0.0001911717855462246, "loss": 23.0, "step": 2271 }, { "epoch": 1.3637454981992798, "grad_norm": 0.000532143865711987, "learning_rate": 0.0001911640231289457, "loss": 23.0, "step": 2272 }, { "epoch": 1.3643457382953181, "grad_norm": 0.0007709377678111196, "learning_rate": 0.00019115625745825732, "loss": 23.0, "step": 2273 }, { "epoch": 1.3649459783913565, "grad_norm": 0.0011233460390940309, "learning_rate": 0.0001911484885344366, "loss": 23.0, "step": 2274 }, { "epoch": 1.365546218487395, "grad_norm": 0.001685342169366777, "learning_rate": 0.0001911407163577608, "loss": 23.0, "step": 2275 }, { "epoch": 1.3661464585834333, "grad_norm": 0.0012172020506113768, "learning_rate": 0.00019113294092850725, "loss": 23.0, "step": 2276 }, { "epoch": 1.3667466986794718, "grad_norm": 0.0018125694477930665, "learning_rate": 0.00019112516224695347, "loss": 23.0, "step": 2277 }, { "epoch": 1.3673469387755102, "grad_norm": 0.0003331191255711019, "learning_rate": 0.0001911173803133771, "loss": 23.0, "step": 2278 }, { "epoch": 1.3679471788715487, "grad_norm": 0.0006346300942823291, "learning_rate": 0.00019110959512805574, "loss": 23.0, "step": 2279 }, { "epoch": 1.368547418967587, "grad_norm": 0.002104330575093627, "learning_rate": 0.0001911018066912673, "loss": 23.0, "step": 2280 }, { "epoch": 1.3691476590636253, "grad_norm": 0.0028025060892105103, "learning_rate": 0.00019109401500328973, "loss": 23.0, "step": 2281 }, { "epoch": 1.3697478991596639, "grad_norm": 0.0009631137363612652, "learning_rate": 0.00019108622006440112, "loss": 23.0, "step": 2282 }, { "epoch": 1.3703481392557022, "grad_norm": 0.0011770789278671145, "learning_rate": 0.0001910784218748796, "loss": 23.0, "step": 2283 }, { "epoch": 1.3709483793517407, "grad_norm": 0.0006025804905220866, "learning_rate": 0.00019107062043500345, "loss": 23.0, "step": 2284 }, { "epoch": 1.371548619447779, "grad_norm": 0.0008317616302520037, "learning_rate": 0.00019106281574505116, "loss": 23.0, "step": 2285 }, { "epoch": 1.3721488595438176, "grad_norm": 0.0026311827823519707, "learning_rate": 0.00019105500780530123, "loss": 23.0, "step": 2286 }, { "epoch": 1.372749099639856, "grad_norm": 0.0010507861152291298, "learning_rate": 0.00019104719661603223, "loss": 23.0, "step": 2287 }, { "epoch": 1.3733493397358942, "grad_norm": 0.0007436316227540374, "learning_rate": 0.00019103938217752303, "loss": 23.0, "step": 2288 }, { "epoch": 1.3739495798319328, "grad_norm": 0.000857638951856643, "learning_rate": 0.00019103156449005245, "loss": 23.0, "step": 2289 }, { "epoch": 1.3745498199279713, "grad_norm": 0.0014579903800040483, "learning_rate": 0.0001910237435538995, "loss": 23.0, "step": 2290 }, { "epoch": 1.3751500600240096, "grad_norm": 0.0011235218262299895, "learning_rate": 0.00019101591936934327, "loss": 23.0, "step": 2291 }, { "epoch": 1.375750300120048, "grad_norm": 0.0005367260309867561, "learning_rate": 0.00019100809193666303, "loss": 23.0, "step": 2292 }, { "epoch": 1.3763505402160865, "grad_norm": 0.0010245360899716616, "learning_rate": 0.00019100026125613804, "loss": 23.0, "step": 2293 }, { "epoch": 1.3769507803121248, "grad_norm": 0.0010933050652965903, "learning_rate": 0.00019099242732804786, "loss": 23.0, "step": 2294 }, { "epoch": 1.3775510204081631, "grad_norm": 0.0009408457553945482, "learning_rate": 0.00019098459015267198, "loss": 23.0, "step": 2295 }, { "epoch": 1.3781512605042017, "grad_norm": 0.0014276335714384913, "learning_rate": 0.00019097674973029012, "loss": 23.0, "step": 2296 }, { "epoch": 1.3787515006002402, "grad_norm": 0.0010626583825796843, "learning_rate": 0.00019096890606118207, "loss": 23.0, "step": 2297 }, { "epoch": 1.3793517406962785, "grad_norm": 0.003225801046937704, "learning_rate": 0.00019096105914562776, "loss": 23.0, "step": 2298 }, { "epoch": 1.3799519807923168, "grad_norm": 0.0023619416169822216, "learning_rate": 0.00019095320898390727, "loss": 23.0, "step": 2299 }, { "epoch": 1.3805522208883554, "grad_norm": 0.0009626576793380082, "learning_rate": 0.0001909453555763007, "loss": 23.0, "step": 2300 }, { "epoch": 1.3811524609843937, "grad_norm": 0.0005572536610998213, "learning_rate": 0.0001909374989230883, "loss": 23.0, "step": 2301 }, { "epoch": 1.3817527010804322, "grad_norm": 0.0014027627184987068, "learning_rate": 0.00019092963902455048, "loss": 23.0, "step": 2302 }, { "epoch": 1.3823529411764706, "grad_norm": 0.0008851447491906583, "learning_rate": 0.00019092177588096777, "loss": 23.0, "step": 2303 }, { "epoch": 1.382953181272509, "grad_norm": 0.0006001814617775381, "learning_rate": 0.00019091390949262075, "loss": 23.0, "step": 2304 }, { "epoch": 1.3835534213685474, "grad_norm": 0.0013794521801173687, "learning_rate": 0.00019090603985979013, "loss": 23.0, "step": 2305 }, { "epoch": 1.3841536614645857, "grad_norm": 0.0014090888435021043, "learning_rate": 0.00019089816698275683, "loss": 23.0, "step": 2306 }, { "epoch": 1.3847539015606243, "grad_norm": 0.0011951072374358773, "learning_rate": 0.00019089029086180178, "loss": 23.0, "step": 2307 }, { "epoch": 1.3853541416566626, "grad_norm": 0.000998032046481967, "learning_rate": 0.00019088241149720599, "loss": 23.0, "step": 2308 }, { "epoch": 1.3859543817527011, "grad_norm": 0.0004453418077901006, "learning_rate": 0.00019087452888925076, "loss": 23.0, "step": 2309 }, { "epoch": 1.3865546218487395, "grad_norm": 0.0009691806044429541, "learning_rate": 0.00019086664303821728, "loss": 23.0, "step": 2310 }, { "epoch": 1.387154861944778, "grad_norm": 0.0017116405069828033, "learning_rate": 0.00019085875394438711, "loss": 23.0, "step": 2311 }, { "epoch": 1.3877551020408163, "grad_norm": 0.0006677840137854218, "learning_rate": 0.00019085086160804173, "loss": 23.0, "step": 2312 }, { "epoch": 1.3883553421368546, "grad_norm": 0.0009493288234807551, "learning_rate": 0.00019084296602946276, "loss": 23.0, "step": 2313 }, { "epoch": 1.3889555822328932, "grad_norm": 0.00211167661473155, "learning_rate": 0.00019083506720893204, "loss": 23.0, "step": 2314 }, { "epoch": 1.3895558223289317, "grad_norm": 0.001380943926051259, "learning_rate": 0.0001908271651467314, "loss": 23.0, "step": 2315 }, { "epoch": 1.39015606242497, "grad_norm": 0.0011355826864019036, "learning_rate": 0.0001908192598431429, "loss": 23.0, "step": 2316 }, { "epoch": 1.3907563025210083, "grad_norm": 0.001229851390235126, "learning_rate": 0.00019081135129844862, "loss": 23.0, "step": 2317 }, { "epoch": 1.3913565426170469, "grad_norm": 0.0016962314257398248, "learning_rate": 0.0001908034395129308, "loss": 23.0, "step": 2318 }, { "epoch": 1.3919567827130852, "grad_norm": 0.0005502055282704532, "learning_rate": 0.00019079552448687183, "loss": 23.0, "step": 2319 }, { "epoch": 1.3925570228091235, "grad_norm": 0.0024172854609787464, "learning_rate": 0.00019078760622055408, "loss": 23.0, "step": 2320 }, { "epoch": 1.393157262905162, "grad_norm": 0.0010021852795034647, "learning_rate": 0.00019077968471426025, "loss": 23.0, "step": 2321 }, { "epoch": 1.3937575030012006, "grad_norm": 0.002354429569095373, "learning_rate": 0.00019077175996827297, "loss": 23.0, "step": 2322 }, { "epoch": 1.394357743097239, "grad_norm": 0.0012778837699443102, "learning_rate": 0.00019076383198287505, "loss": 23.0, "step": 2323 }, { "epoch": 1.3949579831932772, "grad_norm": 0.0016614958876743913, "learning_rate": 0.00019075590075834947, "loss": 23.0, "step": 2324 }, { "epoch": 1.3955582232893158, "grad_norm": 0.0007704552845098078, "learning_rate": 0.00019074796629497923, "loss": 23.0, "step": 2325 }, { "epoch": 1.396158463385354, "grad_norm": 0.002393638016656041, "learning_rate": 0.0001907400285930475, "loss": 23.0, "step": 2326 }, { "epoch": 1.3967587034813926, "grad_norm": 0.001964516704902053, "learning_rate": 0.00019073208765283756, "loss": 23.0, "step": 2327 }, { "epoch": 1.397358943577431, "grad_norm": 0.0004503531090449542, "learning_rate": 0.00019072414347463284, "loss": 23.0, "step": 2328 }, { "epoch": 1.3979591836734695, "grad_norm": 0.0006227733101695776, "learning_rate": 0.00019071619605871677, "loss": 23.0, "step": 2329 }, { "epoch": 1.3985594237695078, "grad_norm": 0.0005024244310334325, "learning_rate": 0.00019070824540537302, "loss": 23.0, "step": 2330 }, { "epoch": 1.3991596638655461, "grad_norm": 0.0006691348971799016, "learning_rate": 0.0001907002915148853, "loss": 23.0, "step": 2331 }, { "epoch": 1.3997599039615847, "grad_norm": 0.001576990936882794, "learning_rate": 0.00019069233438753753, "loss": 23.0, "step": 2332 }, { "epoch": 1.400360144057623, "grad_norm": 0.0007380506140179932, "learning_rate": 0.00019068437402361364, "loss": 23.0, "step": 2333 }, { "epoch": 1.4009603841536615, "grad_norm": 0.001869592466391623, "learning_rate": 0.00019067641042339766, "loss": 23.0, "step": 2334 }, { "epoch": 1.4015606242496998, "grad_norm": 0.0005642715259455144, "learning_rate": 0.00019066844358717387, "loss": 23.0, "step": 2335 }, { "epoch": 1.4021608643457384, "grad_norm": 0.0023026717826724052, "learning_rate": 0.00019066047351522656, "loss": 23.0, "step": 2336 }, { "epoch": 1.4027611044417767, "grad_norm": 0.0009191056014969945, "learning_rate": 0.00019065250020784013, "loss": 23.0, "step": 2337 }, { "epoch": 1.403361344537815, "grad_norm": 0.0015248807612806559, "learning_rate": 0.00019064452366529918, "loss": 23.0, "step": 2338 }, { "epoch": 1.4039615846338536, "grad_norm": 0.0011346536921337247, "learning_rate": 0.0001906365438878883, "loss": 23.0, "step": 2339 }, { "epoch": 1.4045618247298919, "grad_norm": 0.0008668985683470964, "learning_rate": 0.00019062856087589237, "loss": 23.0, "step": 2340 }, { "epoch": 1.4051620648259304, "grad_norm": 0.0007433283608406782, "learning_rate": 0.0001906205746295962, "loss": 23.0, "step": 2341 }, { "epoch": 1.4057623049219687, "grad_norm": 0.001420574146322906, "learning_rate": 0.00019061258514928482, "loss": 23.0, "step": 2342 }, { "epoch": 1.4063625450180073, "grad_norm": 0.0016676129307597876, "learning_rate": 0.00019060459243524337, "loss": 23.0, "step": 2343 }, { "epoch": 1.4069627851140456, "grad_norm": 0.001237909309566021, "learning_rate": 0.0001905965964877571, "loss": 23.0, "step": 2344 }, { "epoch": 1.407563025210084, "grad_norm": 0.0010071388678625226, "learning_rate": 0.00019058859730711132, "loss": 23.0, "step": 2345 }, { "epoch": 1.4081632653061225, "grad_norm": 0.00047392971464432776, "learning_rate": 0.00019058059489359154, "loss": 23.0, "step": 2346 }, { "epoch": 1.408763505402161, "grad_norm": 0.001075922162272036, "learning_rate": 0.00019057258924748326, "loss": 23.0, "step": 2347 }, { "epoch": 1.4093637454981993, "grad_norm": 0.00468486687168479, "learning_rate": 0.00019056458036907233, "loss": 23.0, "step": 2348 }, { "epoch": 1.4099639855942376, "grad_norm": 0.0004578742664307356, "learning_rate": 0.00019055656825864444, "loss": 23.0, "step": 2349 }, { "epoch": 1.4105642256902762, "grad_norm": 0.0006120658363215625, "learning_rate": 0.00019054855291648562, "loss": 23.0, "step": 2350 }, { "epoch": 1.4111644657863145, "grad_norm": 0.0007080400246195495, "learning_rate": 0.00019054053434288182, "loss": 23.0, "step": 2351 }, { "epoch": 1.4117647058823528, "grad_norm": 0.0007749073556624353, "learning_rate": 0.00019053251253811924, "loss": 23.0, "step": 2352 }, { "epoch": 1.4123649459783914, "grad_norm": 0.0013292570365592837, "learning_rate": 0.00019052448750248423, "loss": 23.0, "step": 2353 }, { "epoch": 1.41296518607443, "grad_norm": 0.0019230906618759036, "learning_rate": 0.00019051645923626306, "loss": 23.0, "step": 2354 }, { "epoch": 1.4135654261704682, "grad_norm": 0.004612374119460583, "learning_rate": 0.0001905084277397423, "loss": 23.0, "step": 2355 }, { "epoch": 1.4141656662665065, "grad_norm": 0.002344324951991439, "learning_rate": 0.0001905003930132086, "loss": 23.0, "step": 2356 }, { "epoch": 1.414765906362545, "grad_norm": 0.0015184414805844426, "learning_rate": 0.00019049235505694864, "loss": 23.0, "step": 2357 }, { "epoch": 1.4153661464585834, "grad_norm": 0.0006440380238927901, "learning_rate": 0.00019048431387124932, "loss": 23.0, "step": 2358 }, { "epoch": 1.415966386554622, "grad_norm": 0.0011973325163125992, "learning_rate": 0.00019047626945639757, "loss": 23.0, "step": 2359 }, { "epoch": 1.4165666266506602, "grad_norm": 0.0014662436442449689, "learning_rate": 0.00019046822181268048, "loss": 23.0, "step": 2360 }, { "epoch": 1.4171668667466988, "grad_norm": 0.0008078878745436668, "learning_rate": 0.00019046017094038532, "loss": 23.0, "step": 2361 }, { "epoch": 1.417767106842737, "grad_norm": 0.0005584595492109656, "learning_rate": 0.00019045211683979934, "loss": 23.0, "step": 2362 }, { "epoch": 1.4183673469387754, "grad_norm": 0.0008244646014645696, "learning_rate": 0.00019044405951120993, "loss": 23.0, "step": 2363 }, { "epoch": 1.418967587034814, "grad_norm": 0.0017755191074684262, "learning_rate": 0.00019043599895490475, "loss": 23.0, "step": 2364 }, { "epoch": 1.4195678271308523, "grad_norm": 0.0010308343917131424, "learning_rate": 0.00019042793517117133, "loss": 23.0, "step": 2365 }, { "epoch": 1.4201680672268908, "grad_norm": 0.0005713171558454633, "learning_rate": 0.00019041986816029755, "loss": 23.0, "step": 2366 }, { "epoch": 1.4207683073229291, "grad_norm": 0.0006858983542770147, "learning_rate": 0.00019041179792257128, "loss": 23.0, "step": 2367 }, { "epoch": 1.4213685474189677, "grad_norm": 0.0008377997437492013, "learning_rate": 0.0001904037244582805, "loss": 23.0, "step": 2368 }, { "epoch": 1.421968787515006, "grad_norm": 0.0011340173659846187, "learning_rate": 0.00019039564776771333, "loss": 23.0, "step": 2369 }, { "epoch": 1.4225690276110443, "grad_norm": 0.002067905617877841, "learning_rate": 0.000190387567851158, "loss": 23.0, "step": 2370 }, { "epoch": 1.4231692677070829, "grad_norm": 0.0007086987025104463, "learning_rate": 0.0001903794847089029, "loss": 23.0, "step": 2371 }, { "epoch": 1.4237695078031212, "grad_norm": 0.0014819285133853555, "learning_rate": 0.00019037139834123649, "loss": 23.0, "step": 2372 }, { "epoch": 1.4243697478991597, "grad_norm": 0.0010032330173999071, "learning_rate": 0.0001903633087484473, "loss": 23.0, "step": 2373 }, { "epoch": 1.424969987995198, "grad_norm": 0.00293930247426033, "learning_rate": 0.00019035521593082407, "loss": 23.0, "step": 2374 }, { "epoch": 1.4255702280912366, "grad_norm": 0.0007761257002130151, "learning_rate": 0.00019034711988865564, "loss": 23.0, "step": 2375 }, { "epoch": 1.4261704681872749, "grad_norm": 0.001004933612421155, "learning_rate": 0.00019033902062223085, "loss": 23.0, "step": 2376 }, { "epoch": 1.4267707082833132, "grad_norm": 0.0011718858731910586, "learning_rate": 0.00019033091813183884, "loss": 23.0, "step": 2377 }, { "epoch": 1.4273709483793517, "grad_norm": 0.0009318561642430723, "learning_rate": 0.00019032281241776868, "loss": 23.0, "step": 2378 }, { "epoch": 1.4279711884753903, "grad_norm": 0.001223216182552278, "learning_rate": 0.0001903147034803097, "loss": 23.0, "step": 2379 }, { "epoch": 1.4285714285714286, "grad_norm": 0.0014702687039971352, "learning_rate": 0.00019030659131975128, "loss": 23.0, "step": 2380 }, { "epoch": 1.429171668667467, "grad_norm": 0.0015447470359504223, "learning_rate": 0.0001902984759363829, "loss": 23.0, "step": 2381 }, { "epoch": 1.4297719087635055, "grad_norm": 0.0005654869601130486, "learning_rate": 0.0001902903573304942, "loss": 23.0, "step": 2382 }, { "epoch": 1.4303721488595438, "grad_norm": 0.0011179533321410418, "learning_rate": 0.0001902822355023749, "loss": 23.0, "step": 2383 }, { "epoch": 1.430972388955582, "grad_norm": 0.001682326546870172, "learning_rate": 0.00019027411045231482, "loss": 23.0, "step": 2384 }, { "epoch": 1.4315726290516206, "grad_norm": 0.0007868417887948453, "learning_rate": 0.000190265982180604, "loss": 23.0, "step": 2385 }, { "epoch": 1.4321728691476592, "grad_norm": 0.0008736375602893531, "learning_rate": 0.00019025785068753245, "loss": 23.0, "step": 2386 }, { "epoch": 1.4327731092436975, "grad_norm": 0.0012772384798154235, "learning_rate": 0.00019024971597339036, "loss": 23.0, "step": 2387 }, { "epoch": 1.4333733493397358, "grad_norm": 0.0024452575016766787, "learning_rate": 0.00019024157803846807, "loss": 23.0, "step": 2388 }, { "epoch": 1.4339735894357744, "grad_norm": 0.000944132509175688, "learning_rate": 0.00019023343688305598, "loss": 23.0, "step": 2389 }, { "epoch": 1.4345738295318127, "grad_norm": 0.0018963133916258812, "learning_rate": 0.00019022529250744465, "loss": 23.0, "step": 2390 }, { "epoch": 1.4351740696278512, "grad_norm": 0.0011875929776579142, "learning_rate": 0.00019021714491192473, "loss": 23.0, "step": 2391 }, { "epoch": 1.4357743097238895, "grad_norm": 0.0014995178207755089, "learning_rate": 0.00019020899409678694, "loss": 23.0, "step": 2392 }, { "epoch": 1.436374549819928, "grad_norm": 0.0005268016830086708, "learning_rate": 0.0001902008400623222, "loss": 23.0, "step": 2393 }, { "epoch": 1.4369747899159664, "grad_norm": 0.001868417952209711, "learning_rate": 0.00019019268280882153, "loss": 23.0, "step": 2394 }, { "epoch": 1.4375750300120047, "grad_norm": 0.0018561460310593247, "learning_rate": 0.000190184522336576, "loss": 23.0, "step": 2395 }, { "epoch": 1.4381752701080432, "grad_norm": 0.0015242897206917405, "learning_rate": 0.00019017635864587685, "loss": 23.0, "step": 2396 }, { "epoch": 1.4387755102040816, "grad_norm": 0.0020303060300648212, "learning_rate": 0.00019016819173701542, "loss": 23.0, "step": 2397 }, { "epoch": 1.43937575030012, "grad_norm": 0.0019102046499028802, "learning_rate": 0.00019016002161028315, "loss": 23.0, "step": 2398 }, { "epoch": 1.4399759903961584, "grad_norm": 0.0020126604940742254, "learning_rate": 0.00019015184826597165, "loss": 23.0, "step": 2399 }, { "epoch": 1.440576230492197, "grad_norm": 0.0019320209976285696, "learning_rate": 0.00019014367170437258, "loss": 23.0, "step": 2400 }, { "epoch": 1.4411764705882353, "grad_norm": 0.0005183393368497491, "learning_rate": 0.00019013549192577775, "loss": 23.0, "step": 2401 }, { "epoch": 1.4417767106842736, "grad_norm": 0.0018166369991376996, "learning_rate": 0.00019012730893047905, "loss": 23.0, "step": 2402 }, { "epoch": 1.4423769507803121, "grad_norm": 0.001152240438386798, "learning_rate": 0.00019011912271876854, "loss": 23.0, "step": 2403 }, { "epoch": 1.4429771908763505, "grad_norm": 0.0012412526411935687, "learning_rate": 0.00019011093329093835, "loss": 23.0, "step": 2404 }, { "epoch": 1.443577430972389, "grad_norm": 0.0005008511943742633, "learning_rate": 0.00019010274064728075, "loss": 23.0, "step": 2405 }, { "epoch": 1.4441776710684273, "grad_norm": 0.0009910052176564932, "learning_rate": 0.0001900945447880881, "loss": 23.0, "step": 2406 }, { "epoch": 1.4447779111644659, "grad_norm": 0.000912656367290765, "learning_rate": 0.00019008634571365292, "loss": 23.0, "step": 2407 }, { "epoch": 1.4453781512605042, "grad_norm": 0.0028049952816218138, "learning_rate": 0.00019007814342426776, "loss": 23.0, "step": 2408 }, { "epoch": 1.4459783913565425, "grad_norm": 0.0006766768638044596, "learning_rate": 0.0001900699379202254, "loss": 23.0, "step": 2409 }, { "epoch": 1.446578631452581, "grad_norm": 0.0007151064346544445, "learning_rate": 0.0001900617292018186, "loss": 23.0, "step": 2410 }, { "epoch": 1.4471788715486196, "grad_norm": 0.0006462062592618167, "learning_rate": 0.00019005351726934043, "loss": 23.0, "step": 2411 }, { "epoch": 1.447779111644658, "grad_norm": 0.0017846194095909595, "learning_rate": 0.0001900453021230838, "loss": 23.0, "step": 2412 }, { "epoch": 1.4483793517406962, "grad_norm": 0.0005367895937524736, "learning_rate": 0.00019003708376334196, "loss": 23.0, "step": 2413 }, { "epoch": 1.4489795918367347, "grad_norm": 0.0016319414135068655, "learning_rate": 0.00019002886219040825, "loss": 23.0, "step": 2414 }, { "epoch": 1.449579831932773, "grad_norm": 0.0011456223437562585, "learning_rate": 0.000190020637404576, "loss": 23.0, "step": 2415 }, { "epoch": 1.4501800720288116, "grad_norm": 0.0025063680950552225, "learning_rate": 0.00019001240940613877, "loss": 23.0, "step": 2416 }, { "epoch": 1.45078031212485, "grad_norm": 0.0013597876532003284, "learning_rate": 0.0001900041781953902, "loss": 23.0, "step": 2417 }, { "epoch": 1.4513805522208885, "grad_norm": 0.0005757425678893924, "learning_rate": 0.00018999594377262403, "loss": 23.0, "step": 2418 }, { "epoch": 1.4519807923169268, "grad_norm": 0.000643598148599267, "learning_rate": 0.0001899877061381341, "loss": 23.0, "step": 2419 }, { "epoch": 1.452581032412965, "grad_norm": 0.0012208611005917192, "learning_rate": 0.00018997946529221443, "loss": 23.0, "step": 2420 }, { "epoch": 1.4531812725090036, "grad_norm": 0.000552306417375803, "learning_rate": 0.00018997122123515909, "loss": 23.0, "step": 2421 }, { "epoch": 1.453781512605042, "grad_norm": 0.0013741564471274614, "learning_rate": 0.0001899629739672623, "loss": 23.0, "step": 2422 }, { "epoch": 1.4543817527010805, "grad_norm": 0.0015043517341837287, "learning_rate": 0.00018995472348881838, "loss": 23.0, "step": 2423 }, { "epoch": 1.4549819927971188, "grad_norm": 0.0006686780834570527, "learning_rate": 0.0001899464698001218, "loss": 23.0, "step": 2424 }, { "epoch": 1.4555822328931574, "grad_norm": 0.0020055018831044436, "learning_rate": 0.00018993821290146703, "loss": 23.0, "step": 2425 }, { "epoch": 1.4561824729891957, "grad_norm": 0.000782818766310811, "learning_rate": 0.00018992995279314883, "loss": 23.0, "step": 2426 }, { "epoch": 1.456782713085234, "grad_norm": 0.000715934787876904, "learning_rate": 0.00018992168947546193, "loss": 23.0, "step": 2427 }, { "epoch": 1.4573829531812725, "grad_norm": 0.002988057676702738, "learning_rate": 0.00018991342294870123, "loss": 23.0, "step": 2428 }, { "epoch": 1.4579831932773109, "grad_norm": 0.0012659791391342878, "learning_rate": 0.00018990515321316175, "loss": 23.0, "step": 2429 }, { "epoch": 1.4585834333733494, "grad_norm": 0.002724630059674382, "learning_rate": 0.00018989688026913864, "loss": 23.0, "step": 2430 }, { "epoch": 1.4591836734693877, "grad_norm": 0.0005784712266176939, "learning_rate": 0.0001898886041169271, "loss": 23.0, "step": 2431 }, { "epoch": 1.4597839135654262, "grad_norm": 0.0010768299689516425, "learning_rate": 0.00018988032475682247, "loss": 23.0, "step": 2432 }, { "epoch": 1.4603841536614646, "grad_norm": 0.0011558542028069496, "learning_rate": 0.00018987204218912028, "loss": 23.0, "step": 2433 }, { "epoch": 1.4609843937575029, "grad_norm": 0.0020028555300086737, "learning_rate": 0.0001898637564141161, "loss": 23.0, "step": 2434 }, { "epoch": 1.4615846338535414, "grad_norm": 0.0011177972191944718, "learning_rate": 0.00018985546743210559, "loss": 23.0, "step": 2435 }, { "epoch": 1.46218487394958, "grad_norm": 0.0016774655086919665, "learning_rate": 0.00018984717524338457, "loss": 23.0, "step": 2436 }, { "epoch": 1.4627851140456183, "grad_norm": 0.001039148191921413, "learning_rate": 0.00018983887984824898, "loss": 23.0, "step": 2437 }, { "epoch": 1.4633853541416566, "grad_norm": 0.0009601426427252591, "learning_rate": 0.0001898305812469949, "loss": 23.0, "step": 2438 }, { "epoch": 1.4639855942376951, "grad_norm": 0.00051392603199929, "learning_rate": 0.0001898222794399184, "loss": 23.0, "step": 2439 }, { "epoch": 1.4645858343337335, "grad_norm": 0.0011603968450799584, "learning_rate": 0.00018981397442731582, "loss": 23.0, "step": 2440 }, { "epoch": 1.4651860744297718, "grad_norm": 0.0007191347540356219, "learning_rate": 0.00018980566620948355, "loss": 23.0, "step": 2441 }, { "epoch": 1.4657863145258103, "grad_norm": 0.002068307949230075, "learning_rate": 0.000189797354786718, "loss": 23.0, "step": 2442 }, { "epoch": 1.4663865546218489, "grad_norm": 0.0010287434561178088, "learning_rate": 0.0001897890401593159, "loss": 23.0, "step": 2443 }, { "epoch": 1.4669867947178872, "grad_norm": 0.000688788655679673, "learning_rate": 0.00018978072232757388, "loss": 23.0, "step": 2444 }, { "epoch": 1.4675870348139255, "grad_norm": 0.0016002441989257932, "learning_rate": 0.00018977240129178883, "loss": 23.0, "step": 2445 }, { "epoch": 1.468187274909964, "grad_norm": 0.0014760290505364537, "learning_rate": 0.00018976407705225773, "loss": 23.0, "step": 2446 }, { "epoch": 1.4687875150060024, "grad_norm": 0.0016490549314767122, "learning_rate": 0.0001897557496092776, "loss": 23.0, "step": 2447 }, { "epoch": 1.469387755102041, "grad_norm": 0.0013884782092645764, "learning_rate": 0.00018974741896314565, "loss": 23.0, "step": 2448 }, { "epoch": 1.4699879951980792, "grad_norm": 0.0020489206071943045, "learning_rate": 0.00018973908511415918, "loss": 23.0, "step": 2449 }, { "epoch": 1.4705882352941178, "grad_norm": 0.0006343182176351547, "learning_rate": 0.00018973074806261558, "loss": 23.0, "step": 2450 }, { "epoch": 1.471188475390156, "grad_norm": 0.002966964850202203, "learning_rate": 0.0001897224078088124, "loss": 23.0, "step": 2451 }, { "epoch": 1.4717887154861944, "grad_norm": 0.0014962563291192055, "learning_rate": 0.0001897140643530473, "loss": 23.0, "step": 2452 }, { "epoch": 1.472388955582233, "grad_norm": 0.001116880332119763, "learning_rate": 0.000189705717695618, "loss": 23.0, "step": 2453 }, { "epoch": 1.4729891956782712, "grad_norm": 0.001895223744213581, "learning_rate": 0.0001896973678368224, "loss": 23.0, "step": 2454 }, { "epoch": 1.4735894357743098, "grad_norm": 0.0010942331282421947, "learning_rate": 0.00018968901477695846, "loss": 23.0, "step": 2455 }, { "epoch": 1.474189675870348, "grad_norm": 0.0024703119415789843, "learning_rate": 0.00018968065851632427, "loss": 23.0, "step": 2456 }, { "epoch": 1.4747899159663866, "grad_norm": 0.0033121961168944836, "learning_rate": 0.0001896722990552181, "loss": 23.0, "step": 2457 }, { "epoch": 1.475390156062425, "grad_norm": 0.0023775578010827303, "learning_rate": 0.00018966393639393824, "loss": 23.0, "step": 2458 }, { "epoch": 1.4759903961584633, "grad_norm": 0.0013918614713475108, "learning_rate": 0.0001896555705327831, "loss": 23.0, "step": 2459 }, { "epoch": 1.4765906362545018, "grad_norm": 0.0009230630821548402, "learning_rate": 0.00018964720147205132, "loss": 23.0, "step": 2460 }, { "epoch": 1.4771908763505401, "grad_norm": 0.0010851762490347028, "learning_rate": 0.00018963882921204145, "loss": 23.0, "step": 2461 }, { "epoch": 1.4777911164465787, "grad_norm": 0.0005464069545269012, "learning_rate": 0.0001896304537530524, "loss": 23.0, "step": 2462 }, { "epoch": 1.478391356542617, "grad_norm": 0.0014860890805721283, "learning_rate": 0.00018962207509538298, "loss": 23.0, "step": 2463 }, { "epoch": 1.4789915966386555, "grad_norm": 0.0019383092876523733, "learning_rate": 0.00018961369323933225, "loss": 23.0, "step": 2464 }, { "epoch": 1.4795918367346939, "grad_norm": 0.002337732119485736, "learning_rate": 0.00018960530818519934, "loss": 23.0, "step": 2465 }, { "epoch": 1.4801920768307322, "grad_norm": 0.0012965687783434987, "learning_rate": 0.00018959691993328343, "loss": 23.0, "step": 2466 }, { "epoch": 1.4807923169267707, "grad_norm": 0.0022145984694361687, "learning_rate": 0.00018958852848388396, "loss": 23.0, "step": 2467 }, { "epoch": 1.4813925570228093, "grad_norm": 0.0013813393888995051, "learning_rate": 0.00018958013383730036, "loss": 23.0, "step": 2468 }, { "epoch": 1.4819927971188476, "grad_norm": 0.0012145678047090769, "learning_rate": 0.00018957173599383218, "loss": 23.0, "step": 2469 }, { "epoch": 1.482593037214886, "grad_norm": 0.0006841164431534708, "learning_rate": 0.00018956333495377914, "loss": 23.0, "step": 2470 }, { "epoch": 1.4831932773109244, "grad_norm": 0.002038002945482731, "learning_rate": 0.00018955493071744109, "loss": 23.0, "step": 2471 }, { "epoch": 1.4837935174069627, "grad_norm": 0.0006899077561683953, "learning_rate": 0.00018954652328511792, "loss": 23.0, "step": 2472 }, { "epoch": 1.484393757503001, "grad_norm": 0.0032006518449634314, "learning_rate": 0.0001895381126571097, "loss": 23.0, "step": 2473 }, { "epoch": 1.4849939975990396, "grad_norm": 0.0013399770250543952, "learning_rate": 0.0001895296988337165, "loss": 23.0, "step": 2474 }, { "epoch": 1.4855942376950781, "grad_norm": 0.001352188060991466, "learning_rate": 0.00018952128181523868, "loss": 23.0, "step": 2475 }, { "epoch": 1.4861944777911165, "grad_norm": 0.0017339931800961494, "learning_rate": 0.00018951286160197656, "loss": 23.0, "step": 2476 }, { "epoch": 1.4867947178871548, "grad_norm": 0.0018044788157567382, "learning_rate": 0.00018950443819423072, "loss": 23.0, "step": 2477 }, { "epoch": 1.4873949579831933, "grad_norm": 0.002194973872974515, "learning_rate": 0.00018949601159230168, "loss": 23.0, "step": 2478 }, { "epoch": 1.4879951980792316, "grad_norm": 0.0009345563594251871, "learning_rate": 0.0001894875817964902, "loss": 23.0, "step": 2479 }, { "epoch": 1.4885954381752702, "grad_norm": 0.0019275409867987037, "learning_rate": 0.0001894791488070971, "loss": 23.0, "step": 2480 }, { "epoch": 1.4891956782713085, "grad_norm": 0.0015987679362297058, "learning_rate": 0.00018947071262442338, "loss": 23.0, "step": 2481 }, { "epoch": 1.489795918367347, "grad_norm": 0.0010282109724357724, "learning_rate": 0.00018946227324877004, "loss": 23.0, "step": 2482 }, { "epoch": 1.4903961584633854, "grad_norm": 0.0021582935005426407, "learning_rate": 0.00018945383068043832, "loss": 23.0, "step": 2483 }, { "epoch": 1.4909963985594237, "grad_norm": 0.0014552563661709428, "learning_rate": 0.00018944538491972947, "loss": 23.0, "step": 2484 }, { "epoch": 1.4915966386554622, "grad_norm": 0.003442388027906418, "learning_rate": 0.00018943693596694492, "loss": 23.0, "step": 2485 }, { "epoch": 1.4921968787515005, "grad_norm": 0.0013504648813977838, "learning_rate": 0.00018942848382238618, "loss": 23.0, "step": 2486 }, { "epoch": 1.492797118847539, "grad_norm": 0.0007023458019830287, "learning_rate": 0.0001894200284863549, "loss": 23.0, "step": 2487 }, { "epoch": 1.4933973589435774, "grad_norm": 0.0015389977488666773, "learning_rate": 0.0001894115699591528, "loss": 23.0, "step": 2488 }, { "epoch": 1.493997599039616, "grad_norm": 0.0009908635402098298, "learning_rate": 0.00018940310824108175, "loss": 23.0, "step": 2489 }, { "epoch": 1.4945978391356542, "grad_norm": 0.0017589518101885915, "learning_rate": 0.00018939464333244378, "loss": 23.0, "step": 2490 }, { "epoch": 1.4951980792316926, "grad_norm": 0.0017695450223982334, "learning_rate": 0.00018938617523354089, "loss": 23.0, "step": 2491 }, { "epoch": 1.495798319327731, "grad_norm": 0.0020960380788892508, "learning_rate": 0.0001893777039446754, "loss": 23.0, "step": 2492 }, { "epoch": 1.4963985594237694, "grad_norm": 0.0016196799697354436, "learning_rate": 0.0001893692294661495, "loss": 23.0, "step": 2493 }, { "epoch": 1.496998799519808, "grad_norm": 0.0021666022948920727, "learning_rate": 0.00018936075179826572, "loss": 23.0, "step": 2494 }, { "epoch": 1.4975990396158463, "grad_norm": 0.0011034191120415926, "learning_rate": 0.00018935227094132658, "loss": 23.0, "step": 2495 }, { "epoch": 1.4981992797118848, "grad_norm": 0.0014189224457368255, "learning_rate": 0.0001893437868956347, "loss": 23.0, "step": 2496 }, { "epoch": 1.4987995198079231, "grad_norm": 0.0015656520845368505, "learning_rate": 0.00018933529966149291, "loss": 23.0, "step": 2497 }, { "epoch": 1.4993997599039615, "grad_norm": 0.002596880542114377, "learning_rate": 0.00018932680923920407, "loss": 23.0, "step": 2498 }, { "epoch": 1.5, "grad_norm": 0.001045036013238132, "learning_rate": 0.00018931831562907117, "loss": 23.0, "step": 2499 }, { "epoch": 1.5006002400960385, "grad_norm": 0.0023872291203588247, "learning_rate": 0.00018930981883139735, "loss": 23.0, "step": 2500 }, { "epoch": 1.5012004801920769, "grad_norm": 0.0013668040046468377, "learning_rate": 0.00018930131884648584, "loss": 23.0, "step": 2501 }, { "epoch": 1.5018007202881152, "grad_norm": 0.002747413469478488, "learning_rate": 0.00018929281567463995, "loss": 23.0, "step": 2502 }, { "epoch": 1.5024009603841537, "grad_norm": 0.001867507235147059, "learning_rate": 0.00018928430931616316, "loss": 23.0, "step": 2503 }, { "epoch": 1.503001200480192, "grad_norm": 0.0007478939369320869, "learning_rate": 0.00018927579977135906, "loss": 23.0, "step": 2504 }, { "epoch": 1.5036014405762304, "grad_norm": 0.0014916093787178397, "learning_rate": 0.0001892672870405313, "loss": 23.0, "step": 2505 }, { "epoch": 1.504201680672269, "grad_norm": 0.0011776410974562168, "learning_rate": 0.00018925877112398366, "loss": 23.0, "step": 2506 }, { "epoch": 1.5048019207683074, "grad_norm": 0.0019988473504781723, "learning_rate": 0.0001892502520220201, "loss": 23.0, "step": 2507 }, { "epoch": 1.5054021608643458, "grad_norm": 0.0010818892624229193, "learning_rate": 0.00018924172973494463, "loss": 23.0, "step": 2508 }, { "epoch": 1.506002400960384, "grad_norm": 0.0022928828839212656, "learning_rate": 0.0001892332042630614, "loss": 23.0, "step": 2509 }, { "epoch": 1.5066026410564226, "grad_norm": 0.001313739106990397, "learning_rate": 0.00018922467560667457, "loss": 23.0, "step": 2510 }, { "epoch": 1.5072028811524611, "grad_norm": 0.0003109007375314832, "learning_rate": 0.00018921614376608866, "loss": 23.0, "step": 2511 }, { "epoch": 1.5078031212484992, "grad_norm": 0.0010564455296844244, "learning_rate": 0.00018920760874160806, "loss": 23.0, "step": 2512 }, { "epoch": 1.5084033613445378, "grad_norm": 0.0011524189030751586, "learning_rate": 0.00018919907053353735, "loss": 23.0, "step": 2513 }, { "epoch": 1.5090036014405763, "grad_norm": 0.0017336489399895072, "learning_rate": 0.00018919052914218124, "loss": 23.0, "step": 2514 }, { "epoch": 1.5096038415366146, "grad_norm": 0.0009461884619668126, "learning_rate": 0.00018918198456784464, "loss": 23.0, "step": 2515 }, { "epoch": 1.510204081632653, "grad_norm": 0.0022306712344288826, "learning_rate": 0.00018917343681083235, "loss": 23.0, "step": 2516 }, { "epoch": 1.5108043217286915, "grad_norm": 0.0007099023787304759, "learning_rate": 0.0001891648858714495, "loss": 23.0, "step": 2517 }, { "epoch": 1.51140456182473, "grad_norm": 0.0007101186783984303, "learning_rate": 0.00018915633175000125, "loss": 23.0, "step": 2518 }, { "epoch": 1.5120048019207684, "grad_norm": 0.000460692128399387, "learning_rate": 0.00018914777444679282, "loss": 23.0, "step": 2519 }, { "epoch": 1.5126050420168067, "grad_norm": 0.0009436405962333083, "learning_rate": 0.00018913921396212968, "loss": 23.0, "step": 2520 }, { "epoch": 1.5132052821128452, "grad_norm": 0.0034887429792433977, "learning_rate": 0.00018913065029631726, "loss": 23.0, "step": 2521 }, { "epoch": 1.5138055222088835, "grad_norm": 0.001372122555039823, "learning_rate": 0.0001891220834496612, "loss": 23.0, "step": 2522 }, { "epoch": 1.5144057623049219, "grad_norm": 0.0013257608516141772, "learning_rate": 0.00018911351342246725, "loss": 23.0, "step": 2523 }, { "epoch": 1.5150060024009604, "grad_norm": 0.0008941548294387758, "learning_rate": 0.00018910494021504124, "loss": 23.0, "step": 2524 }, { "epoch": 1.515606242496999, "grad_norm": 0.0010371002135798335, "learning_rate": 0.0001890963638276891, "loss": 23.0, "step": 2525 }, { "epoch": 1.5162064825930373, "grad_norm": 0.002510013757273555, "learning_rate": 0.00018908778426071692, "loss": 23.0, "step": 2526 }, { "epoch": 1.5168067226890756, "grad_norm": 0.0019762797746807337, "learning_rate": 0.00018907920151443088, "loss": 23.0, "step": 2527 }, { "epoch": 1.517406962785114, "grad_norm": 0.0006245961994864047, "learning_rate": 0.00018907061558913726, "loss": 23.0, "step": 2528 }, { "epoch": 1.5180072028811524, "grad_norm": 0.001987320836633444, "learning_rate": 0.00018906202648514252, "loss": 23.0, "step": 2529 }, { "epoch": 1.5186074429771907, "grad_norm": 0.0013297735713422298, "learning_rate": 0.00018905343420275313, "loss": 23.0, "step": 2530 }, { "epoch": 1.5192076830732293, "grad_norm": 0.0017429828876629472, "learning_rate": 0.00018904483874227578, "loss": 23.0, "step": 2531 }, { "epoch": 1.5198079231692678, "grad_norm": 0.0008359710918739438, "learning_rate": 0.00018903624010401714, "loss": 23.0, "step": 2532 }, { "epoch": 1.5204081632653061, "grad_norm": 0.000509335775859654, "learning_rate": 0.0001890276382882841, "loss": 23.0, "step": 2533 }, { "epoch": 1.5210084033613445, "grad_norm": 0.0008946338784880936, "learning_rate": 0.00018901903329538372, "loss": 23.0, "step": 2534 }, { "epoch": 1.521608643457383, "grad_norm": 0.0009605719242244959, "learning_rate": 0.000189010425125623, "loss": 23.0, "step": 2535 }, { "epoch": 1.5222088835534213, "grad_norm": 0.0011078940005972981, "learning_rate": 0.00018900181377930916, "loss": 23.0, "step": 2536 }, { "epoch": 1.5228091236494596, "grad_norm": 0.0016285383608192205, "learning_rate": 0.0001889931992567495, "loss": 23.0, "step": 2537 }, { "epoch": 1.5234093637454982, "grad_norm": 0.0011555464006960392, "learning_rate": 0.00018898458155825154, "loss": 23.0, "step": 2538 }, { "epoch": 1.5240096038415367, "grad_norm": 0.0008873185142874718, "learning_rate": 0.00018897596068412272, "loss": 23.0, "step": 2539 }, { "epoch": 1.524609843937575, "grad_norm": 0.000611953844781965, "learning_rate": 0.00018896733663467075, "loss": 23.0, "step": 2540 }, { "epoch": 1.5252100840336134, "grad_norm": 0.001338244299404323, "learning_rate": 0.00018895870941020338, "loss": 23.0, "step": 2541 }, { "epoch": 1.525810324129652, "grad_norm": 0.0021093355026096106, "learning_rate": 0.00018895007901102853, "loss": 23.0, "step": 2542 }, { "epoch": 1.5264105642256904, "grad_norm": 0.0007958555361256003, "learning_rate": 0.0001889414454374541, "loss": 23.0, "step": 2543 }, { "epoch": 1.5270108043217285, "grad_norm": 0.001077108783647418, "learning_rate": 0.00018893280868978834, "loss": 23.0, "step": 2544 }, { "epoch": 1.527611044417767, "grad_norm": 0.003100859234109521, "learning_rate": 0.00018892416876833937, "loss": 23.0, "step": 2545 }, { "epoch": 1.5282112845138056, "grad_norm": 0.0009456438128836453, "learning_rate": 0.00018891552567341555, "loss": 23.0, "step": 2546 }, { "epoch": 1.528811524609844, "grad_norm": 0.0015903826570138335, "learning_rate": 0.00018890687940532533, "loss": 23.0, "step": 2547 }, { "epoch": 1.5294117647058822, "grad_norm": 0.0013777603162452579, "learning_rate": 0.0001888982299643773, "loss": 23.0, "step": 2548 }, { "epoch": 1.5300120048019208, "grad_norm": 0.002740132389590144, "learning_rate": 0.00018888957735088008, "loss": 23.0, "step": 2549 }, { "epoch": 1.5306122448979593, "grad_norm": 0.003908548038452864, "learning_rate": 0.00018888092156514255, "loss": 23.0, "step": 2550 }, { "epoch": 1.5312124849939976, "grad_norm": 0.0005045512807555497, "learning_rate": 0.00018887226260747353, "loss": 23.0, "step": 2551 }, { "epoch": 1.531812725090036, "grad_norm": 0.0009278635843656957, "learning_rate": 0.00018886360047818207, "loss": 23.0, "step": 2552 }, { "epoch": 1.5324129651860745, "grad_norm": 0.0005404682597145438, "learning_rate": 0.0001888549351775773, "loss": 23.0, "step": 2553 }, { "epoch": 1.5330132052821128, "grad_norm": 0.0007404520874843001, "learning_rate": 0.0001888462667059684, "loss": 23.0, "step": 2554 }, { "epoch": 1.5336134453781511, "grad_norm": 0.0019501621136441827, "learning_rate": 0.00018883759506366484, "loss": 23.0, "step": 2555 }, { "epoch": 1.5342136854741897, "grad_norm": 0.0011888896115124226, "learning_rate": 0.00018882892025097602, "loss": 23.0, "step": 2556 }, { "epoch": 1.5348139255702282, "grad_norm": 0.0018972912803292274, "learning_rate": 0.00018882024226821154, "loss": 23.0, "step": 2557 }, { "epoch": 1.5354141656662665, "grad_norm": 0.0017216479172930121, "learning_rate": 0.00018881156111568106, "loss": 23.0, "step": 2558 }, { "epoch": 1.5360144057623049, "grad_norm": 0.0018141475738957524, "learning_rate": 0.00018880287679369444, "loss": 23.0, "step": 2559 }, { "epoch": 1.5366146458583434, "grad_norm": 0.000642350350972265, "learning_rate": 0.00018879418930256155, "loss": 23.0, "step": 2560 }, { "epoch": 1.5372148859543817, "grad_norm": 0.0018314807675778866, "learning_rate": 0.00018878549864259248, "loss": 23.0, "step": 2561 }, { "epoch": 1.53781512605042, "grad_norm": 0.0006086727953515947, "learning_rate": 0.00018877680481409732, "loss": 23.0, "step": 2562 }, { "epoch": 1.5384153661464586, "grad_norm": 0.0024697300978004932, "learning_rate": 0.00018876810781738636, "loss": 23.0, "step": 2563 }, { "epoch": 1.5390156062424971, "grad_norm": 0.00328907766379416, "learning_rate": 0.00018875940765276998, "loss": 23.0, "step": 2564 }, { "epoch": 1.5396158463385354, "grad_norm": 0.0006023477180860937, "learning_rate": 0.00018875070432055867, "loss": 23.0, "step": 2565 }, { "epoch": 1.5402160864345738, "grad_norm": 0.001448491937480867, "learning_rate": 0.000188741997821063, "loss": 23.0, "step": 2566 }, { "epoch": 1.5408163265306123, "grad_norm": 0.005367580335587263, "learning_rate": 0.0001887332881545937, "loss": 23.0, "step": 2567 }, { "epoch": 1.5414165666266506, "grad_norm": 0.0038878999184817076, "learning_rate": 0.0001887245753214616, "loss": 23.0, "step": 2568 }, { "epoch": 1.542016806722689, "grad_norm": 0.0016524168895557523, "learning_rate": 0.00018871585932197764, "loss": 23.0, "step": 2569 }, { "epoch": 1.5426170468187275, "grad_norm": 0.0018641971983015537, "learning_rate": 0.00018870714015645286, "loss": 23.0, "step": 2570 }, { "epoch": 1.543217286914766, "grad_norm": 0.0016603025142103434, "learning_rate": 0.0001886984178251984, "loss": 23.0, "step": 2571 }, { "epoch": 1.5438175270108043, "grad_norm": 0.001292040222324431, "learning_rate": 0.0001886896923285256, "loss": 23.0, "step": 2572 }, { "epoch": 1.5444177671068426, "grad_norm": 0.0008465956198051572, "learning_rate": 0.0001886809636667458, "loss": 23.0, "step": 2573 }, { "epoch": 1.5450180072028812, "grad_norm": 0.000892527517862618, "learning_rate": 0.00018867223184017054, "loss": 23.0, "step": 2574 }, { "epoch": 1.5456182472989197, "grad_norm": 0.0008575652609579265, "learning_rate": 0.0001886634968491114, "loss": 23.0, "step": 2575 }, { "epoch": 1.5462184873949578, "grad_norm": 0.0013137779897078872, "learning_rate": 0.00018865475869388014, "loss": 23.0, "step": 2576 }, { "epoch": 1.5468187274909964, "grad_norm": 0.0005612425738945603, "learning_rate": 0.0001886460173747886, "loss": 23.0, "step": 2577 }, { "epoch": 1.547418967587035, "grad_norm": 0.0016055781161412597, "learning_rate": 0.0001886372728921487, "loss": 23.0, "step": 2578 }, { "epoch": 1.5480192076830732, "grad_norm": 0.0006941373576410115, "learning_rate": 0.00018862852524627254, "loss": 23.0, "step": 2579 }, { "epoch": 1.5486194477791115, "grad_norm": 0.0018642544746398926, "learning_rate": 0.0001886197744374723, "loss": 23.0, "step": 2580 }, { "epoch": 1.54921968787515, "grad_norm": 0.0013856175355613232, "learning_rate": 0.00018861102046606028, "loss": 23.0, "step": 2581 }, { "epoch": 1.5498199279711886, "grad_norm": 0.0014988569309934974, "learning_rate": 0.00018860226333234885, "loss": 23.0, "step": 2582 }, { "epoch": 1.550420168067227, "grad_norm": 0.0015163436764851213, "learning_rate": 0.00018859350303665058, "loss": 23.0, "step": 2583 }, { "epoch": 1.5510204081632653, "grad_norm": 0.002808369230479002, "learning_rate": 0.00018858473957927805, "loss": 23.0, "step": 2584 }, { "epoch": 1.5516206482593038, "grad_norm": 0.0011914572678506374, "learning_rate": 0.00018857597296054404, "loss": 23.0, "step": 2585 }, { "epoch": 1.552220888355342, "grad_norm": 0.0012025490868836641, "learning_rate": 0.00018856720318076139, "loss": 23.0, "step": 2586 }, { "epoch": 1.5528211284513804, "grad_norm": 0.001126796007156372, "learning_rate": 0.00018855843024024308, "loss": 23.0, "step": 2587 }, { "epoch": 1.553421368547419, "grad_norm": 0.0008894942584447563, "learning_rate": 0.00018854965413930223, "loss": 23.0, "step": 2588 }, { "epoch": 1.5540216086434575, "grad_norm": 0.0018061741720885038, "learning_rate": 0.00018854087487825196, "loss": 23.0, "step": 2589 }, { "epoch": 1.5546218487394958, "grad_norm": 0.0035426064860075712, "learning_rate": 0.00018853209245740565, "loss": 23.0, "step": 2590 }, { "epoch": 1.5552220888355341, "grad_norm": 0.0011812059674412012, "learning_rate": 0.00018852330687707669, "loss": 23.0, "step": 2591 }, { "epoch": 1.5558223289315727, "grad_norm": 0.001240815268829465, "learning_rate": 0.0001885145181375786, "loss": 23.0, "step": 2592 }, { "epoch": 1.556422569027611, "grad_norm": 0.0007779524312354624, "learning_rate": 0.00018850572623922503, "loss": 23.0, "step": 2593 }, { "epoch": 1.5570228091236493, "grad_norm": 0.0011602307204157114, "learning_rate": 0.00018849693118232978, "loss": 23.0, "step": 2594 }, { "epoch": 1.5576230492196879, "grad_norm": 0.002238318556919694, "learning_rate": 0.00018848813296720672, "loss": 23.0, "step": 2595 }, { "epoch": 1.5582232893157264, "grad_norm": 0.0019287511240690947, "learning_rate": 0.00018847933159416976, "loss": 23.0, "step": 2596 }, { "epoch": 1.5588235294117647, "grad_norm": 0.0006660337676294148, "learning_rate": 0.00018847052706353308, "loss": 23.0, "step": 2597 }, { "epoch": 1.559423769507803, "grad_norm": 0.0008473030175082386, "learning_rate": 0.00018846171937561085, "loss": 23.0, "step": 2598 }, { "epoch": 1.5600240096038416, "grad_norm": 0.001743235276080668, "learning_rate": 0.00018845290853071743, "loss": 23.0, "step": 2599 }, { "epoch": 1.5606242496998801, "grad_norm": 0.000990196131169796, "learning_rate": 0.0001884440945291672, "loss": 23.0, "step": 2600 }, { "epoch": 1.5612244897959182, "grad_norm": 0.0017959270626306534, "learning_rate": 0.00018843527737127475, "loss": 23.0, "step": 2601 }, { "epoch": 1.5618247298919568, "grad_norm": 0.0012067595962435007, "learning_rate": 0.00018842645705735475, "loss": 23.0, "step": 2602 }, { "epoch": 1.5624249699879953, "grad_norm": 0.0005739233456552029, "learning_rate": 0.00018841763358772194, "loss": 23.0, "step": 2603 }, { "epoch": 1.5630252100840336, "grad_norm": 0.0009168532560579479, "learning_rate": 0.00018840880696269127, "loss": 23.0, "step": 2604 }, { "epoch": 1.563625450180072, "grad_norm": 0.0008688515517860651, "learning_rate": 0.00018839997718257765, "loss": 23.0, "step": 2605 }, { "epoch": 1.5642256902761105, "grad_norm": 0.0036098710261285305, "learning_rate": 0.00018839114424769625, "loss": 23.0, "step": 2606 }, { "epoch": 1.564825930372149, "grad_norm": 0.0009449098724871874, "learning_rate": 0.00018838230815836226, "loss": 23.0, "step": 2607 }, { "epoch": 1.5654261704681873, "grad_norm": 0.0010306858457624912, "learning_rate": 0.00018837346891489105, "loss": 23.0, "step": 2608 }, { "epoch": 1.5660264105642256, "grad_norm": 0.0003999176260549575, "learning_rate": 0.00018836462651759806, "loss": 23.0, "step": 2609 }, { "epoch": 1.5666266506602642, "grad_norm": 0.0008212673128582537, "learning_rate": 0.00018835578096679885, "loss": 23.0, "step": 2610 }, { "epoch": 1.5672268907563025, "grad_norm": 0.0017521216068416834, "learning_rate": 0.0001883469322628091, "loss": 23.0, "step": 2611 }, { "epoch": 1.5678271308523408, "grad_norm": 0.0013428755337372422, "learning_rate": 0.0001883380804059446, "loss": 23.0, "step": 2612 }, { "epoch": 1.5684273709483794, "grad_norm": 0.0019537501502782106, "learning_rate": 0.00018832922539652124, "loss": 23.0, "step": 2613 }, { "epoch": 1.569027611044418, "grad_norm": 0.0013494847808033228, "learning_rate": 0.00018832036723485503, "loss": 23.0, "step": 2614 }, { "epoch": 1.5696278511404562, "grad_norm": 0.0013262125430628657, "learning_rate": 0.00018831150592126208, "loss": 23.0, "step": 2615 }, { "epoch": 1.5702280912364945, "grad_norm": 0.0012583464849740267, "learning_rate": 0.00018830264145605868, "loss": 23.0, "step": 2616 }, { "epoch": 1.570828331332533, "grad_norm": 0.0018956343410536647, "learning_rate": 0.00018829377383956114, "loss": 23.0, "step": 2617 }, { "epoch": 1.5714285714285714, "grad_norm": 0.0012951820390298963, "learning_rate": 0.0001882849030720859, "loss": 23.0, "step": 2618 }, { "epoch": 1.5720288115246097, "grad_norm": 0.0012970389798283577, "learning_rate": 0.0001882760291539496, "loss": 23.0, "step": 2619 }, { "epoch": 1.5726290516206483, "grad_norm": 0.001099150162190199, "learning_rate": 0.0001882671520854689, "loss": 23.0, "step": 2620 }, { "epoch": 1.5732292917166868, "grad_norm": 0.002676828298717737, "learning_rate": 0.00018825827186696057, "loss": 23.0, "step": 2621 }, { "epoch": 1.5738295318127251, "grad_norm": 0.001391171826981008, "learning_rate": 0.00018824938849874157, "loss": 23.0, "step": 2622 }, { "epoch": 1.5744297719087634, "grad_norm": 0.0007992578903213143, "learning_rate": 0.00018824050198112886, "loss": 23.0, "step": 2623 }, { "epoch": 1.575030012004802, "grad_norm": 0.0015663999365642667, "learning_rate": 0.00018823161231443965, "loss": 23.0, "step": 2624 }, { "epoch": 1.5756302521008403, "grad_norm": 0.00034730014158412814, "learning_rate": 0.00018822271949899114, "loss": 23.0, "step": 2625 }, { "epoch": 1.5762304921968786, "grad_norm": 0.0012912418460473418, "learning_rate": 0.00018821382353510072, "loss": 23.0, "step": 2626 }, { "epoch": 1.5768307322929171, "grad_norm": 0.001319359173066914, "learning_rate": 0.00018820492442308583, "loss": 23.0, "step": 2627 }, { "epoch": 1.5774309723889557, "grad_norm": 0.0020128735341131687, "learning_rate": 0.00018819602216326407, "loss": 23.0, "step": 2628 }, { "epoch": 1.578031212484994, "grad_norm": 0.0011375844478607178, "learning_rate": 0.00018818711675595316, "loss": 23.0, "step": 2629 }, { "epoch": 1.5786314525810323, "grad_norm": 0.001151563599705696, "learning_rate": 0.0001881782082014709, "loss": 23.0, "step": 2630 }, { "epoch": 1.5792316926770709, "grad_norm": 0.0022804574109613895, "learning_rate": 0.0001881692965001352, "loss": 23.0, "step": 2631 }, { "epoch": 1.5798319327731094, "grad_norm": 0.0008478496456518769, "learning_rate": 0.00018816038165226414, "loss": 23.0, "step": 2632 }, { "epoch": 1.5804321728691475, "grad_norm": 0.001113467151299119, "learning_rate": 0.0001881514636581758, "loss": 23.0, "step": 2633 }, { "epoch": 1.581032412965186, "grad_norm": 0.0009239333448931575, "learning_rate": 0.00018814254251818847, "loss": 23.0, "step": 2634 }, { "epoch": 1.5816326530612246, "grad_norm": 0.001530701294541359, "learning_rate": 0.00018813361823262056, "loss": 23.0, "step": 2635 }, { "epoch": 1.582232893157263, "grad_norm": 0.0008521588752046227, "learning_rate": 0.0001881246908017905, "loss": 23.0, "step": 2636 }, { "epoch": 1.5828331332533012, "grad_norm": 0.001877720351330936, "learning_rate": 0.00018811576022601691, "loss": 23.0, "step": 2637 }, { "epoch": 1.5834333733493398, "grad_norm": 0.0016532663721591234, "learning_rate": 0.0001881068265056185, "loss": 23.0, "step": 2638 }, { "epoch": 1.5840336134453783, "grad_norm": 0.0017709649400785565, "learning_rate": 0.0001880978896409141, "loss": 23.0, "step": 2639 }, { "epoch": 1.5846338535414166, "grad_norm": 0.0004166093422099948, "learning_rate": 0.00018808894963222262, "loss": 23.0, "step": 2640 }, { "epoch": 1.585234093637455, "grad_norm": 0.0013971725711598992, "learning_rate": 0.00018808000647986312, "loss": 23.0, "step": 2641 }, { "epoch": 1.5858343337334935, "grad_norm": 0.0002562387671787292, "learning_rate": 0.00018807106018415477, "loss": 23.0, "step": 2642 }, { "epoch": 1.5864345738295318, "grad_norm": 0.0017581944121047854, "learning_rate": 0.00018806211074541681, "loss": 23.0, "step": 2643 }, { "epoch": 1.58703481392557, "grad_norm": 0.0027277956251055002, "learning_rate": 0.00018805315816396867, "loss": 23.0, "step": 2644 }, { "epoch": 1.5876350540216086, "grad_norm": 0.0008474817150272429, "learning_rate": 0.0001880442024401298, "loss": 23.0, "step": 2645 }, { "epoch": 1.5882352941176472, "grad_norm": 0.0010701556457206607, "learning_rate": 0.00018803524357421982, "loss": 23.0, "step": 2646 }, { "epoch": 1.5888355342136855, "grad_norm": 0.001056216424331069, "learning_rate": 0.00018802628156655844, "loss": 23.0, "step": 2647 }, { "epoch": 1.5894357743097238, "grad_norm": 0.0007937379996292293, "learning_rate": 0.00018801731641746548, "loss": 23.0, "step": 2648 }, { "epoch": 1.5900360144057624, "grad_norm": 0.0021392793860286474, "learning_rate": 0.00018800834812726097, "loss": 23.0, "step": 2649 }, { "epoch": 1.5906362545018007, "grad_norm": 0.0005592108354903758, "learning_rate": 0.00018799937669626484, "loss": 23.0, "step": 2650 }, { "epoch": 1.591236494597839, "grad_norm": 0.0013419737806543708, "learning_rate": 0.00018799040212479735, "loss": 23.0, "step": 2651 }, { "epoch": 1.5918367346938775, "grad_norm": 0.0014628679491579533, "learning_rate": 0.00018798142441317874, "loss": 23.0, "step": 2652 }, { "epoch": 1.592436974789916, "grad_norm": 0.002074423711746931, "learning_rate": 0.0001879724435617294, "loss": 23.0, "step": 2653 }, { "epoch": 1.5930372148859544, "grad_norm": 0.001169572351500392, "learning_rate": 0.00018796345957076987, "loss": 23.0, "step": 2654 }, { "epoch": 1.5936374549819927, "grad_norm": 0.0019296786049380898, "learning_rate": 0.0001879544724406207, "loss": 23.0, "step": 2655 }, { "epoch": 1.5942376950780313, "grad_norm": 0.002079583238810301, "learning_rate": 0.00018794548217160268, "loss": 23.0, "step": 2656 }, { "epoch": 1.5948379351740696, "grad_norm": 0.0011784069938585162, "learning_rate": 0.0001879364887640366, "loss": 23.0, "step": 2657 }, { "epoch": 1.595438175270108, "grad_norm": 0.0019995667971670628, "learning_rate": 0.00018792749221824346, "loss": 23.0, "step": 2658 }, { "epoch": 1.5960384153661464, "grad_norm": 0.0017403881065547466, "learning_rate": 0.0001879184925345443, "loss": 23.0, "step": 2659 }, { "epoch": 1.596638655462185, "grad_norm": 0.002742258831858635, "learning_rate": 0.00018790948971326025, "loss": 23.0, "step": 2660 }, { "epoch": 1.5972388955582233, "grad_norm": 0.001577183953486383, "learning_rate": 0.0001879004837547127, "loss": 23.0, "step": 2661 }, { "epoch": 1.5978391356542616, "grad_norm": 0.003025846555829048, "learning_rate": 0.00018789147465922293, "loss": 23.0, "step": 2662 }, { "epoch": 1.5984393757503002, "grad_norm": 0.0008754048030823469, "learning_rate": 0.00018788246242711258, "loss": 23.0, "step": 2663 }, { "epoch": 1.5990396158463387, "grad_norm": 0.0012841924326494336, "learning_rate": 0.00018787344705870318, "loss": 23.0, "step": 2664 }, { "epoch": 1.5996398559423768, "grad_norm": 0.0024323989637196064, "learning_rate": 0.00018786442855431648, "loss": 23.0, "step": 2665 }, { "epoch": 1.6002400960384153, "grad_norm": 0.0006733940681442618, "learning_rate": 0.00018785540691427434, "loss": 23.0, "step": 2666 }, { "epoch": 1.6008403361344539, "grad_norm": 0.0016225153813138604, "learning_rate": 0.0001878463821388987, "loss": 23.0, "step": 2667 }, { "epoch": 1.6014405762304922, "grad_norm": 0.0007324088364839554, "learning_rate": 0.0001878373542285117, "loss": 23.0, "step": 2668 }, { "epoch": 1.6020408163265305, "grad_norm": 0.0010630915639922023, "learning_rate": 0.00018782832318343546, "loss": 23.0, "step": 2669 }, { "epoch": 1.602641056422569, "grad_norm": 0.0016140867955982685, "learning_rate": 0.00018781928900399227, "loss": 23.0, "step": 2670 }, { "epoch": 1.6032412965186076, "grad_norm": 0.0014620976289734244, "learning_rate": 0.00018781025169050454, "loss": 23.0, "step": 2671 }, { "epoch": 1.603841536614646, "grad_norm": 0.0013933505397289991, "learning_rate": 0.00018780121124329483, "loss": 23.0, "step": 2672 }, { "epoch": 1.6044417767106842, "grad_norm": 0.0016851526452228427, "learning_rate": 0.00018779216766268576, "loss": 23.0, "step": 2673 }, { "epoch": 1.6050420168067228, "grad_norm": 0.0022437949664890766, "learning_rate": 0.00018778312094900002, "loss": 23.0, "step": 2674 }, { "epoch": 1.605642256902761, "grad_norm": 0.0025705392472445965, "learning_rate": 0.00018777407110256048, "loss": 23.0, "step": 2675 }, { "epoch": 1.6062424969987994, "grad_norm": 0.0014031381579115987, "learning_rate": 0.00018776501812369016, "loss": 23.0, "step": 2676 }, { "epoch": 1.606842737094838, "grad_norm": 0.001516643795184791, "learning_rate": 0.0001877559620127121, "loss": 23.0, "step": 2677 }, { "epoch": 1.6074429771908765, "grad_norm": 0.0029964365530759096, "learning_rate": 0.00018774690276994948, "loss": 23.0, "step": 2678 }, { "epoch": 1.6080432172869148, "grad_norm": 0.0010419205063953996, "learning_rate": 0.0001877378403957256, "loss": 23.0, "step": 2679 }, { "epoch": 1.6086434573829531, "grad_norm": 0.0006046218913979828, "learning_rate": 0.0001877287748903639, "loss": 23.0, "step": 2680 }, { "epoch": 1.6092436974789917, "grad_norm": 0.0015536684077233076, "learning_rate": 0.0001877197062541879, "loss": 23.0, "step": 2681 }, { "epoch": 1.60984393757503, "grad_norm": 0.0012977086007595062, "learning_rate": 0.0001877106344875212, "loss": 23.0, "step": 2682 }, { "epoch": 1.6104441776710683, "grad_norm": 0.0015513448743149638, "learning_rate": 0.00018770155959068761, "loss": 23.0, "step": 2683 }, { "epoch": 1.6110444177671068, "grad_norm": 0.0021471292711794376, "learning_rate": 0.0001876924815640109, "loss": 23.0, "step": 2684 }, { "epoch": 1.6116446578631454, "grad_norm": 0.0021347152069211006, "learning_rate": 0.00018768340040781512, "loss": 23.0, "step": 2685 }, { "epoch": 1.6122448979591837, "grad_norm": 0.0017109934706240892, "learning_rate": 0.00018767431612242436, "loss": 23.0, "step": 2686 }, { "epoch": 1.612845138055222, "grad_norm": 0.0010965627152472734, "learning_rate": 0.00018766522870816273, "loss": 23.0, "step": 2687 }, { "epoch": 1.6134453781512605, "grad_norm": 0.0008786679827608168, "learning_rate": 0.00018765613816535464, "loss": 23.0, "step": 2688 }, { "epoch": 1.614045618247299, "grad_norm": 0.0014945195289328694, "learning_rate": 0.00018764704449432444, "loss": 23.0, "step": 2689 }, { "epoch": 1.6146458583433372, "grad_norm": 0.0020942450501024723, "learning_rate": 0.00018763794769539668, "loss": 23.0, "step": 2690 }, { "epoch": 1.6152460984393757, "grad_norm": 0.0010354813421145082, "learning_rate": 0.00018762884776889598, "loss": 23.0, "step": 2691 }, { "epoch": 1.6158463385354143, "grad_norm": 0.0007208411116153002, "learning_rate": 0.00018761974471514714, "loss": 23.0, "step": 2692 }, { "epoch": 1.6164465786314526, "grad_norm": 0.0014213203685358167, "learning_rate": 0.00018761063853447497, "loss": 23.0, "step": 2693 }, { "epoch": 1.617046818727491, "grad_norm": 0.0028033480048179626, "learning_rate": 0.00018760152922720445, "loss": 23.0, "step": 2694 }, { "epoch": 1.6176470588235294, "grad_norm": 0.0010669436305761337, "learning_rate": 0.00018759241679366072, "loss": 23.0, "step": 2695 }, { "epoch": 1.618247298919568, "grad_norm": 0.002347268396988511, "learning_rate": 0.00018758330123416897, "loss": 23.0, "step": 2696 }, { "epoch": 1.6188475390156063, "grad_norm": 0.00081385433441028, "learning_rate": 0.0001875741825490545, "loss": 23.0, "step": 2697 }, { "epoch": 1.6194477791116446, "grad_norm": 0.0014208892825990915, "learning_rate": 0.00018756506073864266, "loss": 23.0, "step": 2698 }, { "epoch": 1.6200480192076832, "grad_norm": 0.002604991663247347, "learning_rate": 0.00018755593580325908, "loss": 23.0, "step": 2699 }, { "epoch": 1.6206482593037215, "grad_norm": 0.0017933185445144773, "learning_rate": 0.00018754680774322937, "loss": 23.0, "step": 2700 }, { "epoch": 1.6212484993997598, "grad_norm": 0.001248373999260366, "learning_rate": 0.0001875376765588793, "loss": 23.0, "step": 2701 }, { "epoch": 1.6218487394957983, "grad_norm": 0.0012778202071785927, "learning_rate": 0.00018752854225053471, "loss": 23.0, "step": 2702 }, { "epoch": 1.6224489795918369, "grad_norm": 0.0027483454905450344, "learning_rate": 0.0001875194048185216, "loss": 23.0, "step": 2703 }, { "epoch": 1.6230492196878752, "grad_norm": 0.002060843864455819, "learning_rate": 0.00018751026426316604, "loss": 23.0, "step": 2704 }, { "epoch": 1.6236494597839135, "grad_norm": 0.0008097304962575436, "learning_rate": 0.00018750112058479429, "loss": 23.0, "step": 2705 }, { "epoch": 1.624249699879952, "grad_norm": 0.0013644036371260881, "learning_rate": 0.00018749197378373263, "loss": 23.0, "step": 2706 }, { "epoch": 1.6248499399759904, "grad_norm": 0.002363650593906641, "learning_rate": 0.00018748282386030747, "loss": 23.0, "step": 2707 }, { "epoch": 1.6254501800720287, "grad_norm": 0.001418265514075756, "learning_rate": 0.00018747367081484533, "loss": 23.0, "step": 2708 }, { "epoch": 1.6260504201680672, "grad_norm": 0.001415880979038775, "learning_rate": 0.00018746451464767293, "loss": 23.0, "step": 2709 }, { "epoch": 1.6266506602641058, "grad_norm": 0.0013812303077429533, "learning_rate": 0.00018745535535911697, "loss": 23.0, "step": 2710 }, { "epoch": 1.627250900360144, "grad_norm": 0.0014266427606344223, "learning_rate": 0.0001874461929495043, "loss": 23.0, "step": 2711 }, { "epoch": 1.6278511404561824, "grad_norm": 0.0015677702613174915, "learning_rate": 0.000187437027419162, "loss": 23.0, "step": 2712 }, { "epoch": 1.628451380552221, "grad_norm": 0.001671108417212963, "learning_rate": 0.00018742785876841708, "loss": 23.0, "step": 2713 }, { "epoch": 1.6290516206482593, "grad_norm": 0.002867297735065222, "learning_rate": 0.00018741868699759676, "loss": 23.0, "step": 2714 }, { "epoch": 1.6296518607442976, "grad_norm": 0.0007195291109383106, "learning_rate": 0.0001874095121070284, "loss": 23.0, "step": 2715 }, { "epoch": 1.6302521008403361, "grad_norm": 0.0005874624475836754, "learning_rate": 0.0001874003340970394, "loss": 23.0, "step": 2716 }, { "epoch": 1.6308523409363747, "grad_norm": 0.0006918995641171932, "learning_rate": 0.00018739115296795725, "loss": 23.0, "step": 2717 }, { "epoch": 1.631452581032413, "grad_norm": 0.0035936187487095594, "learning_rate": 0.0001873819687201097, "loss": 23.0, "step": 2718 }, { "epoch": 1.6320528211284513, "grad_norm": 0.0008181266020983458, "learning_rate": 0.0001873727813538244, "loss": 23.0, "step": 2719 }, { "epoch": 1.6326530612244898, "grad_norm": 0.0016008074162527919, "learning_rate": 0.00018736359086942933, "loss": 23.0, "step": 2720 }, { "epoch": 1.6332533013205284, "grad_norm": 0.00043132936116307974, "learning_rate": 0.00018735439726725242, "loss": 23.0, "step": 2721 }, { "epoch": 1.6338535414165665, "grad_norm": 0.001108235097490251, "learning_rate": 0.00018734520054762178, "loss": 23.0, "step": 2722 }, { "epoch": 1.634453781512605, "grad_norm": 0.0010154407937079668, "learning_rate": 0.00018733600071086558, "loss": 23.0, "step": 2723 }, { "epoch": 1.6350540216086435, "grad_norm": 0.0015051518566906452, "learning_rate": 0.00018732679775731222, "loss": 23.0, "step": 2724 }, { "epoch": 1.6356542617046819, "grad_norm": 0.0019470944534987211, "learning_rate": 0.00018731759168729003, "loss": 23.0, "step": 2725 }, { "epoch": 1.6362545018007202, "grad_norm": 0.0026996361557394266, "learning_rate": 0.00018730838250112763, "loss": 23.0, "step": 2726 }, { "epoch": 1.6368547418967587, "grad_norm": 0.0007813902338966727, "learning_rate": 0.00018729917019915364, "loss": 23.0, "step": 2727 }, { "epoch": 1.6374549819927973, "grad_norm": 0.0006077332654967904, "learning_rate": 0.0001872899547816968, "loss": 23.0, "step": 2728 }, { "epoch": 1.6380552220888356, "grad_norm": 0.001340494374744594, "learning_rate": 0.00018728073624908604, "loss": 23.0, "step": 2729 }, { "epoch": 1.638655462184874, "grad_norm": 0.001249544438906014, "learning_rate": 0.0001872715146016503, "loss": 23.0, "step": 2730 }, { "epoch": 1.6392557022809124, "grad_norm": 0.002360557671636343, "learning_rate": 0.0001872622898397187, "loss": 23.0, "step": 2731 }, { "epoch": 1.6398559423769508, "grad_norm": 0.0006911809323355556, "learning_rate": 0.00018725306196362045, "loss": 23.0, "step": 2732 }, { "epoch": 1.640456182472989, "grad_norm": 0.0007208830211311579, "learning_rate": 0.00018724383097368485, "loss": 23.0, "step": 2733 }, { "epoch": 1.6410564225690276, "grad_norm": 0.002218914683908224, "learning_rate": 0.00018723459687024135, "loss": 23.0, "step": 2734 }, { "epoch": 1.6416566626650662, "grad_norm": 0.00204529264010489, "learning_rate": 0.00018722535965361948, "loss": 23.0, "step": 2735 }, { "epoch": 1.6422569027611045, "grad_norm": 0.0008679120219312608, "learning_rate": 0.00018721611932414887, "loss": 23.0, "step": 2736 }, { "epoch": 1.6428571428571428, "grad_norm": 0.0013889900874346495, "learning_rate": 0.00018720687588215932, "loss": 23.0, "step": 2737 }, { "epoch": 1.6434573829531813, "grad_norm": 0.0008765452657826245, "learning_rate": 0.00018719762932798068, "loss": 23.0, "step": 2738 }, { "epoch": 1.6440576230492197, "grad_norm": 0.0008362770895473659, "learning_rate": 0.000187188379661943, "loss": 23.0, "step": 2739 }, { "epoch": 1.644657863145258, "grad_norm": 0.002404067199677229, "learning_rate": 0.0001871791268843763, "loss": 23.0, "step": 2740 }, { "epoch": 1.6452581032412965, "grad_norm": 0.0017013377510011196, "learning_rate": 0.00018716987099561078, "loss": 23.0, "step": 2741 }, { "epoch": 1.645858343337335, "grad_norm": 0.0016773076495155692, "learning_rate": 0.00018716061199597684, "loss": 23.0, "step": 2742 }, { "epoch": 1.6464585834333734, "grad_norm": 0.0012956123100593686, "learning_rate": 0.0001871513498858048, "loss": 23.0, "step": 2743 }, { "epoch": 1.6470588235294117, "grad_norm": 0.0014770814450457692, "learning_rate": 0.0001871420846654253, "loss": 23.0, "step": 2744 }, { "epoch": 1.6476590636254502, "grad_norm": 0.0028329326305538416, "learning_rate": 0.00018713281633516896, "loss": 23.0, "step": 2745 }, { "epoch": 1.6482593037214885, "grad_norm": 0.002407442545518279, "learning_rate": 0.00018712354489536654, "loss": 23.0, "step": 2746 }, { "epoch": 1.6488595438175269, "grad_norm": 0.0009911904344335198, "learning_rate": 0.00018711427034634893, "loss": 23.0, "step": 2747 }, { "epoch": 1.6494597839135654, "grad_norm": 0.0009113244013860822, "learning_rate": 0.00018710499268844704, "loss": 23.0, "step": 2748 }, { "epoch": 1.650060024009604, "grad_norm": 0.0010095303878188133, "learning_rate": 0.00018709571192199206, "loss": 23.0, "step": 2749 }, { "epoch": 1.6506602641056423, "grad_norm": 0.0011850446462631226, "learning_rate": 0.00018708642804731517, "loss": 23.0, "step": 2750 }, { "epoch": 1.6512605042016806, "grad_norm": 0.0017300506588071585, "learning_rate": 0.00018707714106474762, "loss": 23.0, "step": 2751 }, { "epoch": 1.6518607442977191, "grad_norm": 0.0003194924211129546, "learning_rate": 0.00018706785097462095, "loss": 23.0, "step": 2752 }, { "epoch": 1.6524609843937577, "grad_norm": 0.0020195262040942907, "learning_rate": 0.00018705855777726663, "loss": 23.0, "step": 2753 }, { "epoch": 1.6530612244897958, "grad_norm": 0.0017491821199655533, "learning_rate": 0.00018704926147301628, "loss": 23.0, "step": 2754 }, { "epoch": 1.6536614645858343, "grad_norm": 0.0022025639191269875, "learning_rate": 0.00018703996206220178, "loss": 23.0, "step": 2755 }, { "epoch": 1.6542617046818728, "grad_norm": 0.0026374238077551126, "learning_rate": 0.00018703065954515488, "loss": 23.0, "step": 2756 }, { "epoch": 1.6548619447779112, "grad_norm": 0.0011183744063600898, "learning_rate": 0.00018702135392220762, "loss": 23.0, "step": 2757 }, { "epoch": 1.6554621848739495, "grad_norm": 0.0015539663145318627, "learning_rate": 0.0001870120451936921, "loss": 23.0, "step": 2758 }, { "epoch": 1.656062424969988, "grad_norm": 0.0021634141448885202, "learning_rate": 0.00018700273335994048, "loss": 23.0, "step": 2759 }, { "epoch": 1.6566626650660266, "grad_norm": 0.0017279998864978552, "learning_rate": 0.0001869934184212851, "loss": 23.0, "step": 2760 }, { "epoch": 1.6572629051620649, "grad_norm": 0.0007326691993512213, "learning_rate": 0.00018698410037805842, "loss": 23.0, "step": 2761 }, { "epoch": 1.6578631452581032, "grad_norm": 0.0009592922870069742, "learning_rate": 0.00018697477923059294, "loss": 23.0, "step": 2762 }, { "epoch": 1.6584633853541417, "grad_norm": 0.001174344215542078, "learning_rate": 0.00018696545497922133, "loss": 23.0, "step": 2763 }, { "epoch": 1.65906362545018, "grad_norm": 0.0005253737326711416, "learning_rate": 0.0001869561276242763, "loss": 23.0, "step": 2764 }, { "epoch": 1.6596638655462184, "grad_norm": 0.002293418627232313, "learning_rate": 0.00018694679716609077, "loss": 23.0, "step": 2765 }, { "epoch": 1.660264105642257, "grad_norm": 0.001944358111359179, "learning_rate": 0.00018693746360499768, "loss": 23.0, "step": 2766 }, { "epoch": 1.6608643457382954, "grad_norm": 0.0005258253659121692, "learning_rate": 0.0001869281269413302, "loss": 23.0, "step": 2767 }, { "epoch": 1.6614645858343338, "grad_norm": 0.0025244082789868116, "learning_rate": 0.00018691878717542145, "loss": 23.0, "step": 2768 }, { "epoch": 1.662064825930372, "grad_norm": 0.003507652087137103, "learning_rate": 0.00018690944430760473, "loss": 23.0, "step": 2769 }, { "epoch": 1.6626650660264106, "grad_norm": 0.0008565374300815165, "learning_rate": 0.0001869000983382135, "loss": 23.0, "step": 2770 }, { "epoch": 1.663265306122449, "grad_norm": 0.0023754527792334557, "learning_rate": 0.00018689074926758138, "loss": 23.0, "step": 2771 }, { "epoch": 1.6638655462184873, "grad_norm": 0.0007233821670524776, "learning_rate": 0.00018688139709604186, "loss": 23.0, "step": 2772 }, { "epoch": 1.6644657863145258, "grad_norm": 0.0016606366261839867, "learning_rate": 0.00018687204182392874, "loss": 23.0, "step": 2773 }, { "epoch": 1.6650660264105643, "grad_norm": 0.000989391002804041, "learning_rate": 0.00018686268345157595, "loss": 23.0, "step": 2774 }, { "epoch": 1.6656662665066027, "grad_norm": 0.001483695930801332, "learning_rate": 0.00018685332197931743, "loss": 23.0, "step": 2775 }, { "epoch": 1.666266506602641, "grad_norm": 0.0011198918800801039, "learning_rate": 0.00018684395740748726, "loss": 23.0, "step": 2776 }, { "epoch": 1.6668667466986795, "grad_norm": 0.0007456332677975297, "learning_rate": 0.00018683458973641962, "loss": 23.0, "step": 2777 }, { "epoch": 1.667466986794718, "grad_norm": 0.001662615453824401, "learning_rate": 0.00018682521896644882, "loss": 23.0, "step": 2778 }, { "epoch": 1.6680672268907561, "grad_norm": 0.001044997712597251, "learning_rate": 0.00018681584509790936, "loss": 23.0, "step": 2779 }, { "epoch": 1.6686674669867947, "grad_norm": 0.0005202651955187321, "learning_rate": 0.00018680646813113564, "loss": 23.0, "step": 2780 }, { "epoch": 1.6692677070828332, "grad_norm": 0.0014490924077108502, "learning_rate": 0.0001867970880664624, "loss": 23.0, "step": 2781 }, { "epoch": 1.6698679471788715, "grad_norm": 0.0013336183037608862, "learning_rate": 0.00018678770490422437, "loss": 23.0, "step": 2782 }, { "epoch": 1.6704681872749099, "grad_norm": 0.0011176721891388297, "learning_rate": 0.00018677831864475635, "loss": 23.0, "step": 2783 }, { "epoch": 1.6710684273709484, "grad_norm": 0.0011959094554185867, "learning_rate": 0.0001867689292883934, "loss": 23.0, "step": 2784 }, { "epoch": 1.671668667466987, "grad_norm": 0.0013386125210672617, "learning_rate": 0.00018675953683547055, "loss": 23.0, "step": 2785 }, { "epoch": 1.6722689075630253, "grad_norm": 0.0008990365895442665, "learning_rate": 0.00018675014128632299, "loss": 23.0, "step": 2786 }, { "epoch": 1.6728691476590636, "grad_norm": 0.001523659797385335, "learning_rate": 0.00018674074264128602, "loss": 23.0, "step": 2787 }, { "epoch": 1.6734693877551021, "grad_norm": 0.0012871394865214825, "learning_rate": 0.0001867313409006951, "loss": 23.0, "step": 2788 }, { "epoch": 1.6740696278511404, "grad_norm": 0.0008513812208548188, "learning_rate": 0.0001867219360648857, "loss": 23.0, "step": 2789 }, { "epoch": 1.6746698679471788, "grad_norm": 0.0009285034029744565, "learning_rate": 0.0001867125281341935, "loss": 23.0, "step": 2790 }, { "epoch": 1.6752701080432173, "grad_norm": 0.0003783182764891535, "learning_rate": 0.0001867031171089542, "loss": 23.0, "step": 2791 }, { "epoch": 1.6758703481392558, "grad_norm": 0.0021908339112997055, "learning_rate": 0.00018669370298950372, "loss": 23.0, "step": 2792 }, { "epoch": 1.6764705882352942, "grad_norm": 0.002253185026347637, "learning_rate": 0.00018668428577617795, "loss": 23.0, "step": 2793 }, { "epoch": 1.6770708283313325, "grad_norm": 0.0012638723710551858, "learning_rate": 0.00018667486546931298, "loss": 23.0, "step": 2794 }, { "epoch": 1.677671068427371, "grad_norm": 0.0018435172969475389, "learning_rate": 0.00018666544206924506, "loss": 23.0, "step": 2795 }, { "epoch": 1.6782713085234093, "grad_norm": 0.0008160858415067196, "learning_rate": 0.0001866560155763104, "loss": 23.0, "step": 2796 }, { "epoch": 1.6788715486194477, "grad_norm": 0.001816743635572493, "learning_rate": 0.0001866465859908455, "loss": 23.0, "step": 2797 }, { "epoch": 1.6794717887154862, "grad_norm": 0.001861235941760242, "learning_rate": 0.00018663715331318682, "loss": 23.0, "step": 2798 }, { "epoch": 1.6800720288115247, "grad_norm": 0.0009585749357938766, "learning_rate": 0.00018662771754367094, "loss": 23.0, "step": 2799 }, { "epoch": 1.680672268907563, "grad_norm": 0.0016571884043514729, "learning_rate": 0.00018661827868263472, "loss": 23.0, "step": 2800 }, { "epoch": 1.6812725090036014, "grad_norm": 0.001067697536200285, "learning_rate": 0.00018660883673041488, "loss": 23.0, "step": 2801 }, { "epoch": 1.68187274909964, "grad_norm": 0.0011590105714276433, "learning_rate": 0.00018659939168734848, "loss": 23.0, "step": 2802 }, { "epoch": 1.6824729891956782, "grad_norm": 0.0012792943743988872, "learning_rate": 0.00018658994355377252, "loss": 23.0, "step": 2803 }, { "epoch": 1.6830732292917165, "grad_norm": 0.0009999220492318273, "learning_rate": 0.00018658049233002426, "loss": 23.0, "step": 2804 }, { "epoch": 1.683673469387755, "grad_norm": 0.0029290425591170788, "learning_rate": 0.00018657103801644092, "loss": 23.0, "step": 2805 }, { "epoch": 1.6842737094837936, "grad_norm": 0.0036934264935553074, "learning_rate": 0.0001865615806133599, "loss": 23.0, "step": 2806 }, { "epoch": 1.684873949579832, "grad_norm": 0.0015194531297311187, "learning_rate": 0.00018655212012111877, "loss": 23.0, "step": 2807 }, { "epoch": 1.6854741896758703, "grad_norm": 0.0013169290032237768, "learning_rate": 0.00018654265654005506, "loss": 23.0, "step": 2808 }, { "epoch": 1.6860744297719088, "grad_norm": 0.0026581338606774807, "learning_rate": 0.0001865331898705066, "loss": 23.0, "step": 2809 }, { "epoch": 1.6866746698679473, "grad_norm": 0.0007871690904721618, "learning_rate": 0.00018652372011281117, "loss": 23.0, "step": 2810 }, { "epoch": 1.6872749099639854, "grad_norm": 0.0032649554777890444, "learning_rate": 0.0001865142472673067, "loss": 23.0, "step": 2811 }, { "epoch": 1.687875150060024, "grad_norm": 0.0015880510909482837, "learning_rate": 0.0001865047713343313, "loss": 23.0, "step": 2812 }, { "epoch": 1.6884753901560625, "grad_norm": 0.002896341495215893, "learning_rate": 0.00018649529231422316, "loss": 23.0, "step": 2813 }, { "epoch": 1.6890756302521008, "grad_norm": 0.0030981532763689756, "learning_rate": 0.0001864858102073205, "loss": 23.0, "step": 2814 }, { "epoch": 1.6896758703481392, "grad_norm": 0.0014712786069139838, "learning_rate": 0.00018647632501396177, "loss": 23.0, "step": 2815 }, { "epoch": 1.6902761104441777, "grad_norm": 0.0037119940388947725, "learning_rate": 0.00018646683673448542, "loss": 23.0, "step": 2816 }, { "epoch": 1.6908763505402162, "grad_norm": 0.0017612777883186936, "learning_rate": 0.0001864573453692301, "loss": 23.0, "step": 2817 }, { "epoch": 1.6914765906362546, "grad_norm": 0.001388410571962595, "learning_rate": 0.0001864478509185345, "loss": 23.0, "step": 2818 }, { "epoch": 1.6920768307322929, "grad_norm": 0.0010633680503815413, "learning_rate": 0.0001864383533827375, "loss": 23.0, "step": 2819 }, { "epoch": 1.6926770708283314, "grad_norm": 0.004036045633256435, "learning_rate": 0.000186428852762178, "loss": 23.0, "step": 2820 }, { "epoch": 1.6932773109243697, "grad_norm": 0.0016166233690455556, "learning_rate": 0.00018641934905719505, "loss": 23.0, "step": 2821 }, { "epoch": 1.693877551020408, "grad_norm": 0.001802432001568377, "learning_rate": 0.00018640984226812787, "loss": 23.0, "step": 2822 }, { "epoch": 1.6944777911164466, "grad_norm": 0.0005948299658484757, "learning_rate": 0.00018640033239531565, "loss": 23.0, "step": 2823 }, { "epoch": 1.6950780312124851, "grad_norm": 0.0018249761778861284, "learning_rate": 0.00018639081943909783, "loss": 23.0, "step": 2824 }, { "epoch": 1.6956782713085234, "grad_norm": 0.001116413390263915, "learning_rate": 0.00018638130339981387, "loss": 23.0, "step": 2825 }, { "epoch": 1.6962785114045618, "grad_norm": 0.0015681873774155974, "learning_rate": 0.00018637178427780343, "loss": 23.0, "step": 2826 }, { "epoch": 1.6968787515006003, "grad_norm": 0.0022008116357028484, "learning_rate": 0.00018636226207340614, "loss": 23.0, "step": 2827 }, { "epoch": 1.6974789915966386, "grad_norm": 0.0006623781519010663, "learning_rate": 0.0001863527367869619, "loss": 23.0, "step": 2828 }, { "epoch": 1.698079231692677, "grad_norm": 0.002556073712185025, "learning_rate": 0.00018634320841881059, "loss": 23.0, "step": 2829 }, { "epoch": 1.6986794717887155, "grad_norm": 0.0012070855591446161, "learning_rate": 0.0001863336769692923, "loss": 23.0, "step": 2830 }, { "epoch": 1.699279711884754, "grad_norm": 0.0018783850828185678, "learning_rate": 0.00018632414243874713, "loss": 23.0, "step": 2831 }, { "epoch": 1.6998799519807923, "grad_norm": 0.0010801813332363963, "learning_rate": 0.00018631460482751536, "loss": 23.0, "step": 2832 }, { "epoch": 1.7004801920768307, "grad_norm": 0.0010861080372706056, "learning_rate": 0.0001863050641359374, "loss": 23.0, "step": 2833 }, { "epoch": 1.7010804321728692, "grad_norm": 0.0012606565142050385, "learning_rate": 0.00018629552036435372, "loss": 23.0, "step": 2834 }, { "epoch": 1.7016806722689075, "grad_norm": 0.002117091091349721, "learning_rate": 0.00018628597351310483, "loss": 23.0, "step": 2835 }, { "epoch": 1.7022809123649458, "grad_norm": 0.0013018603203818202, "learning_rate": 0.00018627642358253158, "loss": 23.0, "step": 2836 }, { "epoch": 1.7028811524609844, "grad_norm": 0.0006756331422366202, "learning_rate": 0.00018626687057297464, "loss": 23.0, "step": 2837 }, { "epoch": 1.703481392557023, "grad_norm": 0.0012766298605129123, "learning_rate": 0.000186257314484775, "loss": 23.0, "step": 2838 }, { "epoch": 1.7040816326530612, "grad_norm": 0.0006693665054626763, "learning_rate": 0.00018624775531827373, "loss": 23.0, "step": 2839 }, { "epoch": 1.7046818727490995, "grad_norm": 0.002290289616212249, "learning_rate": 0.00018623819307381188, "loss": 23.0, "step": 2840 }, { "epoch": 1.705282112845138, "grad_norm": 0.0008764312951825559, "learning_rate": 0.00018622862775173076, "loss": 23.0, "step": 2841 }, { "epoch": 1.7058823529411766, "grad_norm": 0.0008235902641899884, "learning_rate": 0.00018621905935237173, "loss": 23.0, "step": 2842 }, { "epoch": 1.7064825930372147, "grad_norm": 0.0006964323110878468, "learning_rate": 0.00018620948787607627, "loss": 23.0, "step": 2843 }, { "epoch": 1.7070828331332533, "grad_norm": 0.0005906512960791588, "learning_rate": 0.00018619991332318594, "loss": 23.0, "step": 2844 }, { "epoch": 1.7076830732292918, "grad_norm": 0.002924280008301139, "learning_rate": 0.0001861903356940424, "loss": 23.0, "step": 2845 }, { "epoch": 1.7082833133253301, "grad_norm": 0.000374676805222407, "learning_rate": 0.00018618075498898752, "loss": 23.0, "step": 2846 }, { "epoch": 1.7088835534213684, "grad_norm": 0.0012125116772949696, "learning_rate": 0.0001861711712083632, "loss": 23.0, "step": 2847 }, { "epoch": 1.709483793517407, "grad_norm": 0.00048197314026765525, "learning_rate": 0.00018616158435251137, "loss": 23.0, "step": 2848 }, { "epoch": 1.7100840336134455, "grad_norm": 0.0022257508244365454, "learning_rate": 0.00018615199442177427, "loss": 23.0, "step": 2849 }, { "epoch": 1.7106842737094838, "grad_norm": 0.0027254424057900906, "learning_rate": 0.0001861424014164941, "loss": 23.0, "step": 2850 }, { "epoch": 1.7112845138055222, "grad_norm": 0.002013017423450947, "learning_rate": 0.0001861328053370132, "loss": 23.0, "step": 2851 }, { "epoch": 1.7118847539015607, "grad_norm": 0.000818777596578002, "learning_rate": 0.00018612320618367404, "loss": 23.0, "step": 2852 }, { "epoch": 1.712484993997599, "grad_norm": 0.002587414812296629, "learning_rate": 0.0001861136039568192, "loss": 23.0, "step": 2853 }, { "epoch": 1.7130852340936373, "grad_norm": 0.001048891805112362, "learning_rate": 0.00018610399865679134, "loss": 23.0, "step": 2854 }, { "epoch": 1.7136854741896759, "grad_norm": 0.004490484017878771, "learning_rate": 0.00018609439028393325, "loss": 23.0, "step": 2855 }, { "epoch": 1.7142857142857144, "grad_norm": 0.0020678737200796604, "learning_rate": 0.00018608477883858782, "loss": 23.0, "step": 2856 }, { "epoch": 1.7148859543817527, "grad_norm": 0.0016980924410745502, "learning_rate": 0.00018607516432109805, "loss": 23.0, "step": 2857 }, { "epoch": 1.715486194477791, "grad_norm": 0.000979147502221167, "learning_rate": 0.00018606554673180714, "loss": 23.0, "step": 2858 }, { "epoch": 1.7160864345738296, "grad_norm": 0.0006189555278979242, "learning_rate": 0.00018605592607105818, "loss": 23.0, "step": 2859 }, { "epoch": 1.716686674669868, "grad_norm": 0.0011676882859319448, "learning_rate": 0.00018604630233919466, "loss": 23.0, "step": 2860 }, { "epoch": 1.7172869147659062, "grad_norm": 0.002290237694978714, "learning_rate": 0.0001860366755365599, "loss": 23.0, "step": 2861 }, { "epoch": 1.7178871548619448, "grad_norm": 0.0006044538458809257, "learning_rate": 0.00018602704566349752, "loss": 23.0, "step": 2862 }, { "epoch": 1.7184873949579833, "grad_norm": 0.0009377237292937934, "learning_rate": 0.00018601741272035116, "loss": 23.0, "step": 2863 }, { "epoch": 1.7190876350540216, "grad_norm": 0.0015994718996807933, "learning_rate": 0.00018600777670746462, "loss": 23.0, "step": 2864 }, { "epoch": 1.71968787515006, "grad_norm": 0.001292977947741747, "learning_rate": 0.00018599813762518172, "loss": 23.0, "step": 2865 }, { "epoch": 1.7202881152460985, "grad_norm": 0.002400642493739724, "learning_rate": 0.00018598849547384654, "loss": 23.0, "step": 2866 }, { "epoch": 1.7208883553421368, "grad_norm": 0.0013523305533453822, "learning_rate": 0.00018597885025380313, "loss": 23.0, "step": 2867 }, { "epoch": 1.7214885954381751, "grad_norm": 0.0012043274473398924, "learning_rate": 0.00018596920196539574, "loss": 23.0, "step": 2868 }, { "epoch": 1.7220888355342137, "grad_norm": 0.0021065007895231247, "learning_rate": 0.00018595955060896863, "loss": 23.0, "step": 2869 }, { "epoch": 1.7226890756302522, "grad_norm": 0.0005539829144254327, "learning_rate": 0.0001859498961848663, "loss": 23.0, "step": 2870 }, { "epoch": 1.7232893157262905, "grad_norm": 0.000724462850484997, "learning_rate": 0.0001859402386934333, "loss": 23.0, "step": 2871 }, { "epoch": 1.7238895558223288, "grad_norm": 0.001463893218897283, "learning_rate": 0.0001859305781350142, "loss": 23.0, "step": 2872 }, { "epoch": 1.7244897959183674, "grad_norm": 0.001887418096885085, "learning_rate": 0.0001859209145099538, "loss": 23.0, "step": 2873 }, { "epoch": 1.725090036014406, "grad_norm": 0.001024300348944962, "learning_rate": 0.000185911247818597, "loss": 23.0, "step": 2874 }, { "epoch": 1.7256902761104442, "grad_norm": 0.0024245274253189564, "learning_rate": 0.00018590157806128876, "loss": 23.0, "step": 2875 }, { "epoch": 1.7262905162064826, "grad_norm": 0.0017824191600084305, "learning_rate": 0.00018589190523837416, "loss": 23.0, "step": 2876 }, { "epoch": 1.726890756302521, "grad_norm": 0.0017186804907396436, "learning_rate": 0.00018588222935019837, "loss": 23.0, "step": 2877 }, { "epoch": 1.7274909963985594, "grad_norm": 0.001040443079546094, "learning_rate": 0.00018587255039710676, "loss": 23.0, "step": 2878 }, { "epoch": 1.7280912364945977, "grad_norm": 0.0011490770848467946, "learning_rate": 0.0001858628683794447, "loss": 23.0, "step": 2879 }, { "epoch": 1.7286914765906363, "grad_norm": 0.003117022570222616, "learning_rate": 0.00018585318329755775, "loss": 23.0, "step": 2880 }, { "epoch": 1.7292917166866748, "grad_norm": 0.0019710492342710495, "learning_rate": 0.00018584349515179152, "loss": 23.0, "step": 2881 }, { "epoch": 1.7298919567827131, "grad_norm": 0.0033849983010441065, "learning_rate": 0.00018583380394249178, "loss": 23.0, "step": 2882 }, { "epoch": 1.7304921968787514, "grad_norm": 0.0011864303378388286, "learning_rate": 0.00018582410967000438, "loss": 23.0, "step": 2883 }, { "epoch": 1.73109243697479, "grad_norm": 0.0011380382347851992, "learning_rate": 0.0001858144123346752, "loss": 23.0, "step": 2884 }, { "epoch": 1.7316926770708283, "grad_norm": 0.0026864493265748024, "learning_rate": 0.00018580471193685045, "loss": 23.0, "step": 2885 }, { "epoch": 1.7322929171668666, "grad_norm": 0.001236348645761609, "learning_rate": 0.00018579500847687623, "loss": 23.0, "step": 2886 }, { "epoch": 1.7328931572629052, "grad_norm": 0.0026629730127751827, "learning_rate": 0.00018578530195509883, "loss": 23.0, "step": 2887 }, { "epoch": 1.7334933973589437, "grad_norm": 0.003911031875759363, "learning_rate": 0.0001857755923718647, "loss": 23.0, "step": 2888 }, { "epoch": 1.734093637454982, "grad_norm": 0.0026813908480107784, "learning_rate": 0.0001857658797275203, "loss": 23.0, "step": 2889 }, { "epoch": 1.7346938775510203, "grad_norm": 0.001345346332527697, "learning_rate": 0.0001857561640224123, "loss": 23.0, "step": 2890 }, { "epoch": 1.7352941176470589, "grad_norm": 0.0010961355874314904, "learning_rate": 0.00018574644525688738, "loss": 23.0, "step": 2891 }, { "epoch": 1.7358943577430972, "grad_norm": 0.0021427536848932505, "learning_rate": 0.00018573672343129237, "loss": 23.0, "step": 2892 }, { "epoch": 1.7364945978391355, "grad_norm": 0.0031297134701162577, "learning_rate": 0.00018572699854597425, "loss": 23.0, "step": 2893 }, { "epoch": 1.737094837935174, "grad_norm": 0.0006432196823880076, "learning_rate": 0.0001857172706012801, "loss": 23.0, "step": 2894 }, { "epoch": 1.7376950780312126, "grad_norm": 0.0024129992816597223, "learning_rate": 0.00018570753959755703, "loss": 23.0, "step": 2895 }, { "epoch": 1.738295318127251, "grad_norm": 0.0017073117196559906, "learning_rate": 0.00018569780553515233, "loss": 23.0, "step": 2896 }, { "epoch": 1.7388955582232892, "grad_norm": 0.0020329838152974844, "learning_rate": 0.00018568806841441342, "loss": 23.0, "step": 2897 }, { "epoch": 1.7394957983193278, "grad_norm": 0.0009188112453557551, "learning_rate": 0.00018567832823568773, "loss": 23.0, "step": 2898 }, { "epoch": 1.7400960384153663, "grad_norm": 0.001637371489778161, "learning_rate": 0.00018566858499932288, "loss": 23.0, "step": 2899 }, { "epoch": 1.7406962785114044, "grad_norm": 0.0016789011424407363, "learning_rate": 0.00018565883870566666, "loss": 23.0, "step": 2900 }, { "epoch": 1.741296518607443, "grad_norm": 0.0010591614991426468, "learning_rate": 0.0001856490893550668, "loss": 23.0, "step": 2901 }, { "epoch": 1.7418967587034815, "grad_norm": 0.004590094089508057, "learning_rate": 0.00018563933694787125, "loss": 23.0, "step": 2902 }, { "epoch": 1.7424969987995198, "grad_norm": 0.0015041956212371588, "learning_rate": 0.00018562958148442803, "loss": 23.0, "step": 2903 }, { "epoch": 1.7430972388955581, "grad_norm": 0.00048696741578169167, "learning_rate": 0.00018561982296508536, "loss": 23.0, "step": 2904 }, { "epoch": 1.7436974789915967, "grad_norm": 0.0018151112599298358, "learning_rate": 0.00018561006139019144, "loss": 23.0, "step": 2905 }, { "epoch": 1.7442977190876352, "grad_norm": 0.00167025753762573, "learning_rate": 0.00018560029676009462, "loss": 23.0, "step": 2906 }, { "epoch": 1.7448979591836735, "grad_norm": 0.0013312215451151133, "learning_rate": 0.00018559052907514341, "loss": 23.0, "step": 2907 }, { "epoch": 1.7454981992797118, "grad_norm": 0.002203074749559164, "learning_rate": 0.00018558075833568637, "loss": 23.0, "step": 2908 }, { "epoch": 1.7460984393757504, "grad_norm": 0.0014002182288095355, "learning_rate": 0.00018557098454207225, "loss": 23.0, "step": 2909 }, { "epoch": 1.7466986794717887, "grad_norm": 0.000825204944703728, "learning_rate": 0.00018556120769464975, "loss": 23.0, "step": 2910 }, { "epoch": 1.747298919567827, "grad_norm": 0.001794505980797112, "learning_rate": 0.00018555142779376786, "loss": 23.0, "step": 2911 }, { "epoch": 1.7478991596638656, "grad_norm": 0.0010396147845312953, "learning_rate": 0.00018554164483977558, "loss": 23.0, "step": 2912 }, { "epoch": 1.748499399759904, "grad_norm": 0.0007051727152429521, "learning_rate": 0.00018553185883302207, "loss": 23.0, "step": 2913 }, { "epoch": 1.7490996398559424, "grad_norm": 0.001316645066253841, "learning_rate": 0.00018552206977385648, "loss": 23.0, "step": 2914 }, { "epoch": 1.7496998799519807, "grad_norm": 0.0025363380555063486, "learning_rate": 0.00018551227766262822, "loss": 23.0, "step": 2915 }, { "epoch": 1.7503001200480193, "grad_norm": 0.0016197512159124017, "learning_rate": 0.00018550248249968673, "loss": 23.0, "step": 2916 }, { "epoch": 1.7509003601440576, "grad_norm": 0.0016571108717471361, "learning_rate": 0.0001854926842853816, "loss": 23.0, "step": 2917 }, { "epoch": 1.751500600240096, "grad_norm": 0.002475859597325325, "learning_rate": 0.00018548288302006248, "loss": 23.0, "step": 2918 }, { "epoch": 1.7521008403361344, "grad_norm": 0.0017440817318856716, "learning_rate": 0.00018547307870407913, "loss": 23.0, "step": 2919 }, { "epoch": 1.752701080432173, "grad_norm": 0.0030700908973813057, "learning_rate": 0.00018546327133778148, "loss": 23.0, "step": 2920 }, { "epoch": 1.7533013205282113, "grad_norm": 0.0023228423669934273, "learning_rate": 0.00018545346092151949, "loss": 23.0, "step": 2921 }, { "epoch": 1.7539015606242496, "grad_norm": 0.0006581857451237738, "learning_rate": 0.00018544364745564333, "loss": 23.0, "step": 2922 }, { "epoch": 1.7545018007202882, "grad_norm": 0.0017514914507046342, "learning_rate": 0.00018543383094050318, "loss": 23.0, "step": 2923 }, { "epoch": 1.7551020408163265, "grad_norm": 0.0010599003871902823, "learning_rate": 0.00018542401137644935, "loss": 23.0, "step": 2924 }, { "epoch": 1.7557022809123648, "grad_norm": 0.0017716344445943832, "learning_rate": 0.00018541418876383227, "loss": 23.0, "step": 2925 }, { "epoch": 1.7563025210084033, "grad_norm": 0.0004864973889198154, "learning_rate": 0.00018540436310300252, "loss": 23.0, "step": 2926 }, { "epoch": 1.7569027611044419, "grad_norm": 0.001132338191382587, "learning_rate": 0.00018539453439431072, "loss": 23.0, "step": 2927 }, { "epoch": 1.7575030012004802, "grad_norm": 0.0017699715681374073, "learning_rate": 0.0001853847026381077, "loss": 23.0, "step": 2928 }, { "epoch": 1.7581032412965185, "grad_norm": 0.0012988667003810406, "learning_rate": 0.00018537486783474423, "loss": 23.0, "step": 2929 }, { "epoch": 1.758703481392557, "grad_norm": 0.0019157581264153123, "learning_rate": 0.00018536502998457138, "loss": 23.0, "step": 2930 }, { "epoch": 1.7593037214885956, "grad_norm": 0.0022223247215151787, "learning_rate": 0.00018535518908794013, "loss": 23.0, "step": 2931 }, { "epoch": 1.7599039615846337, "grad_norm": 0.002175229834392667, "learning_rate": 0.00018534534514520182, "loss": 23.0, "step": 2932 }, { "epoch": 1.7605042016806722, "grad_norm": 0.0016925784293562174, "learning_rate": 0.0001853354981567076, "loss": 23.0, "step": 2933 }, { "epoch": 1.7611044417767108, "grad_norm": 0.001816912554204464, "learning_rate": 0.00018532564812280905, "loss": 23.0, "step": 2934 }, { "epoch": 1.761704681872749, "grad_norm": 0.0013755745021626353, "learning_rate": 0.00018531579504385751, "loss": 23.0, "step": 2935 }, { "epoch": 1.7623049219687874, "grad_norm": 0.0008345983806066215, "learning_rate": 0.00018530593892020477, "loss": 23.0, "step": 2936 }, { "epoch": 1.762905162064826, "grad_norm": 0.0022048347163945436, "learning_rate": 0.00018529607975220245, "loss": 23.0, "step": 2937 }, { "epoch": 1.7635054021608645, "grad_norm": 0.001479812664911151, "learning_rate": 0.0001852862175402025, "loss": 23.0, "step": 2938 }, { "epoch": 1.7641056422569028, "grad_norm": 0.000645735184662044, "learning_rate": 0.00018527635228455678, "loss": 23.0, "step": 2939 }, { "epoch": 1.7647058823529411, "grad_norm": 0.0014726044610142708, "learning_rate": 0.00018526648398561744, "loss": 23.0, "step": 2940 }, { "epoch": 1.7653061224489797, "grad_norm": 0.00045085468445904553, "learning_rate": 0.0001852566126437366, "loss": 23.0, "step": 2941 }, { "epoch": 1.765906362545018, "grad_norm": 0.0018929915968328714, "learning_rate": 0.00018524673825926657, "loss": 23.0, "step": 2942 }, { "epoch": 1.7665066026410563, "grad_norm": 0.0010176091454923153, "learning_rate": 0.00018523686083255972, "loss": 23.0, "step": 2943 }, { "epoch": 1.7671068427370948, "grad_norm": 0.0013840744504705071, "learning_rate": 0.00018522698036396856, "loss": 23.0, "step": 2944 }, { "epoch": 1.7677070828331334, "grad_norm": 0.001135845435783267, "learning_rate": 0.00018521709685384572, "loss": 23.0, "step": 2945 }, { "epoch": 1.7683073229291717, "grad_norm": 0.0012144126230850816, "learning_rate": 0.0001852072103025439, "loss": 23.0, "step": 2946 }, { "epoch": 1.76890756302521, "grad_norm": 0.0015819761902093887, "learning_rate": 0.00018519732071041594, "loss": 23.0, "step": 2947 }, { "epoch": 1.7695078031212486, "grad_norm": 0.0015256191836670041, "learning_rate": 0.00018518742807781472, "loss": 23.0, "step": 2948 }, { "epoch": 1.7701080432172869, "grad_norm": 0.0010233953362330794, "learning_rate": 0.0001851775324050933, "loss": 23.0, "step": 2949 }, { "epoch": 1.7707082833133252, "grad_norm": 0.0009597190655767918, "learning_rate": 0.00018516763369260493, "loss": 23.0, "step": 2950 }, { "epoch": 1.7713085234093637, "grad_norm": 0.0007810408133082092, "learning_rate": 0.0001851577319407027, "loss": 23.0, "step": 2951 }, { "epoch": 1.7719087635054023, "grad_norm": 0.0017629596404731274, "learning_rate": 0.00018514782714974012, "loss": 23.0, "step": 2952 }, { "epoch": 1.7725090036014406, "grad_norm": 0.0036176845896989107, "learning_rate": 0.00018513791932007063, "loss": 23.0, "step": 2953 }, { "epoch": 1.773109243697479, "grad_norm": 0.0021507435012608767, "learning_rate": 0.0001851280084520478, "loss": 23.0, "step": 2954 }, { "epoch": 1.7737094837935174, "grad_norm": 0.0048975893296301365, "learning_rate": 0.00018511809454602526, "loss": 23.0, "step": 2955 }, { "epoch": 1.7743097238895558, "grad_norm": 0.0016850210959091783, "learning_rate": 0.00018510817760235694, "loss": 23.0, "step": 2956 }, { "epoch": 1.774909963985594, "grad_norm": 0.0007171302568167448, "learning_rate": 0.00018509825762139665, "loss": 23.0, "step": 2957 }, { "epoch": 1.7755102040816326, "grad_norm": 0.0011437806533649564, "learning_rate": 0.00018508833460349847, "loss": 23.0, "step": 2958 }, { "epoch": 1.7761104441776712, "grad_norm": 0.003833747934550047, "learning_rate": 0.00018507840854901648, "loss": 23.0, "step": 2959 }, { "epoch": 1.7767106842737095, "grad_norm": 0.0019063903018832207, "learning_rate": 0.00018506847945830494, "loss": 23.0, "step": 2960 }, { "epoch": 1.7773109243697478, "grad_norm": 0.0016151851741597056, "learning_rate": 0.0001850585473317182, "loss": 23.0, "step": 2961 }, { "epoch": 1.7779111644657863, "grad_norm": 0.004216872155666351, "learning_rate": 0.0001850486121696107, "loss": 23.0, "step": 2962 }, { "epoch": 1.7785114045618249, "grad_norm": 0.0011847265996038914, "learning_rate": 0.000185038673972337, "loss": 23.0, "step": 2963 }, { "epoch": 1.7791116446578632, "grad_norm": 0.0010568160796537995, "learning_rate": 0.00018502873274025178, "loss": 23.0, "step": 2964 }, { "epoch": 1.7797118847539015, "grad_norm": 0.001491167233325541, "learning_rate": 0.00018501878847370978, "loss": 23.0, "step": 2965 }, { "epoch": 1.78031212484994, "grad_norm": 0.000517088919878006, "learning_rate": 0.00018500884117306595, "loss": 23.0, "step": 2966 }, { "epoch": 1.7809123649459784, "grad_norm": 0.0025163828395307064, "learning_rate": 0.00018499889083867523, "loss": 23.0, "step": 2967 }, { "epoch": 1.7815126050420167, "grad_norm": 0.0018170212861150503, "learning_rate": 0.00018498893747089277, "loss": 23.0, "step": 2968 }, { "epoch": 1.7821128451380552, "grad_norm": 0.0015163187636062503, "learning_rate": 0.0001849789810700737, "loss": 23.0, "step": 2969 }, { "epoch": 1.7827130852340938, "grad_norm": 0.003132780082523823, "learning_rate": 0.00018496902163657342, "loss": 23.0, "step": 2970 }, { "epoch": 1.783313325330132, "grad_norm": 0.001669717370532453, "learning_rate": 0.00018495905917074732, "loss": 23.0, "step": 2971 }, { "epoch": 1.7839135654261704, "grad_norm": 0.0005561247817240655, "learning_rate": 0.00018494909367295091, "loss": 23.0, "step": 2972 }, { "epoch": 1.784513805522209, "grad_norm": 0.0017999939154833555, "learning_rate": 0.0001849391251435399, "loss": 23.0, "step": 2973 }, { "epoch": 1.7851140456182473, "grad_norm": 0.0021132882684469223, "learning_rate": 0.00018492915358286996, "loss": 23.0, "step": 2974 }, { "epoch": 1.7857142857142856, "grad_norm": 0.003159108804538846, "learning_rate": 0.00018491917899129704, "loss": 23.0, "step": 2975 }, { "epoch": 1.7863145258103241, "grad_norm": 0.0003376493405085057, "learning_rate": 0.00018490920136917706, "loss": 23.0, "step": 2976 }, { "epoch": 1.7869147659063627, "grad_norm": 0.001667432370595634, "learning_rate": 0.00018489922071686604, "loss": 23.0, "step": 2977 }, { "epoch": 1.787515006002401, "grad_norm": 0.0015298868529498577, "learning_rate": 0.0001848892370347203, "loss": 23.0, "step": 2978 }, { "epoch": 1.7881152460984393, "grad_norm": 0.0008003988186828792, "learning_rate": 0.00018487925032309598, "loss": 23.0, "step": 2979 }, { "epoch": 1.7887154861944778, "grad_norm": 0.00343932188116014, "learning_rate": 0.00018486926058234956, "loss": 23.0, "step": 2980 }, { "epoch": 1.7893157262905162, "grad_norm": 0.0029429318383336067, "learning_rate": 0.00018485926781283758, "loss": 23.0, "step": 2981 }, { "epoch": 1.7899159663865545, "grad_norm": 0.00047122538671828806, "learning_rate": 0.0001848492720149166, "loss": 23.0, "step": 2982 }, { "epoch": 1.790516206482593, "grad_norm": 0.0006141350604593754, "learning_rate": 0.00018483927318894337, "loss": 23.0, "step": 2983 }, { "epoch": 1.7911164465786316, "grad_norm": 0.0007275057141669095, "learning_rate": 0.00018482927133527469, "loss": 23.0, "step": 2984 }, { "epoch": 1.7917166866746699, "grad_norm": 0.0007060296484269202, "learning_rate": 0.00018481926645426752, "loss": 23.0, "step": 2985 }, { "epoch": 1.7923169267707082, "grad_norm": 0.0014618011191487312, "learning_rate": 0.00018480925854627892, "loss": 23.0, "step": 2986 }, { "epoch": 1.7929171668667467, "grad_norm": 0.0011972852516919374, "learning_rate": 0.00018479924761166604, "loss": 23.0, "step": 2987 }, { "epoch": 1.7935174069627853, "grad_norm": 0.002459679963067174, "learning_rate": 0.00018478923365078612, "loss": 23.0, "step": 2988 }, { "epoch": 1.7941176470588234, "grad_norm": 0.0012211776338517666, "learning_rate": 0.0001847792166639966, "loss": 23.0, "step": 2989 }, { "epoch": 1.794717887154862, "grad_norm": 0.000426774233346805, "learning_rate": 0.00018476919665165489, "loss": 23.0, "step": 2990 }, { "epoch": 1.7953181272509005, "grad_norm": 0.001523893210105598, "learning_rate": 0.00018475917361411858, "loss": 23.0, "step": 2991 }, { "epoch": 1.7959183673469388, "grad_norm": 0.001041338429786265, "learning_rate": 0.00018474914755174543, "loss": 23.0, "step": 2992 }, { "epoch": 1.796518607442977, "grad_norm": 0.0010472668800503016, "learning_rate": 0.00018473911846489318, "loss": 23.0, "step": 2993 }, { "epoch": 1.7971188475390156, "grad_norm": 0.0013296904508024454, "learning_rate": 0.0001847290863539198, "loss": 23.0, "step": 2994 }, { "epoch": 1.7977190876350542, "grad_norm": 0.001468316069804132, "learning_rate": 0.00018471905121918324, "loss": 23.0, "step": 2995 }, { "epoch": 1.7983193277310925, "grad_norm": 0.003528536995872855, "learning_rate": 0.0001847090130610417, "loss": 23.0, "step": 2996 }, { "epoch": 1.7989195678271308, "grad_norm": 0.0017783723305910826, "learning_rate": 0.00018469897187985336, "loss": 23.0, "step": 2997 }, { "epoch": 1.7995198079231693, "grad_norm": 0.005629006773233414, "learning_rate": 0.00018468892767597658, "loss": 23.0, "step": 2998 }, { "epoch": 1.8001200480192077, "grad_norm": 0.0033181149046868086, "learning_rate": 0.0001846788804497698, "loss": 23.0, "step": 2999 }, { "epoch": 1.800720288115246, "grad_norm": 0.001119964406825602, "learning_rate": 0.00018466883020159164, "loss": 23.0, "step": 3000 }, { "epoch": 1.800720288115246, "eval_loss": 11.5, "eval_runtime": 5.3647, "eval_samples_per_second": 261.522, "eval_steps_per_second": 32.807, "step": 3000 }, { "epoch": 1.8013205282112845, "grad_norm": 0.0011961712734773755, "learning_rate": 0.00018465877693180072, "loss": 23.0, "step": 3001 }, { "epoch": 1.801920768307323, "grad_norm": 0.0015885218745097518, "learning_rate": 0.00018464872064075577, "loss": 23.0, "step": 3002 }, { "epoch": 1.8025210084033614, "grad_norm": 0.0019562295638024807, "learning_rate": 0.00018463866132881578, "loss": 23.0, "step": 3003 }, { "epoch": 1.8031212484993997, "grad_norm": 0.003253971692174673, "learning_rate": 0.00018462859899633967, "loss": 23.0, "step": 3004 }, { "epoch": 1.8037214885954382, "grad_norm": 0.000821275869384408, "learning_rate": 0.00018461853364368653, "loss": 23.0, "step": 3005 }, { "epoch": 1.8043217286914766, "grad_norm": 0.0012803277932107449, "learning_rate": 0.0001846084652712156, "loss": 23.0, "step": 3006 }, { "epoch": 1.8049219687875149, "grad_norm": 0.0007911855354905128, "learning_rate": 0.00018459839387928619, "loss": 23.0, "step": 3007 }, { "epoch": 1.8055222088835534, "grad_norm": 0.0007396016153506935, "learning_rate": 0.0001845883194682577, "loss": 23.0, "step": 3008 }, { "epoch": 1.806122448979592, "grad_norm": 0.0014897441724315286, "learning_rate": 0.0001845782420384897, "loss": 23.0, "step": 3009 }, { "epoch": 1.8067226890756303, "grad_norm": 0.0004990187590010464, "learning_rate": 0.0001845681615903418, "loss": 23.0, "step": 3010 }, { "epoch": 1.8073229291716686, "grad_norm": 0.0008598102140240371, "learning_rate": 0.00018455807812417372, "loss": 23.0, "step": 3011 }, { "epoch": 1.8079231692677071, "grad_norm": 0.0011542581487447023, "learning_rate": 0.00018454799164034538, "loss": 23.0, "step": 3012 }, { "epoch": 1.8085234093637454, "grad_norm": 0.0013645098078995943, "learning_rate": 0.00018453790213921665, "loss": 23.0, "step": 3013 }, { "epoch": 1.8091236494597838, "grad_norm": 0.0008868533186614513, "learning_rate": 0.00018452780962114772, "loss": 23.0, "step": 3014 }, { "epoch": 1.8097238895558223, "grad_norm": 0.0018087849020957947, "learning_rate": 0.00018451771408649865, "loss": 23.0, "step": 3015 }, { "epoch": 1.8103241296518608, "grad_norm": 0.00035064207622781396, "learning_rate": 0.00018450761553562975, "loss": 23.0, "step": 3016 }, { "epoch": 1.8109243697478992, "grad_norm": 0.0014904963318258524, "learning_rate": 0.00018449751396890148, "loss": 23.0, "step": 3017 }, { "epoch": 1.8115246098439375, "grad_norm": 0.0019730026833713055, "learning_rate": 0.00018448740938667427, "loss": 23.0, "step": 3018 }, { "epoch": 1.812124849939976, "grad_norm": 0.0017877138452604413, "learning_rate": 0.00018447730178930874, "loss": 23.0, "step": 3019 }, { "epoch": 1.8127250900360146, "grad_norm": 0.0008809540886431932, "learning_rate": 0.0001844671911771656, "loss": 23.0, "step": 3020 }, { "epoch": 1.8133253301320527, "grad_norm": 0.0015870918286964297, "learning_rate": 0.0001844570775506057, "loss": 23.0, "step": 3021 }, { "epoch": 1.8139255702280912, "grad_norm": 0.001148207113146782, "learning_rate": 0.00018444696090998995, "loss": 23.0, "step": 3022 }, { "epoch": 1.8145258103241297, "grad_norm": 0.0025478298775851727, "learning_rate": 0.00018443684125567938, "loss": 23.0, "step": 3023 }, { "epoch": 1.815126050420168, "grad_norm": 0.0015544770285487175, "learning_rate": 0.00018442671858803514, "loss": 23.0, "step": 3024 }, { "epoch": 1.8157262905162064, "grad_norm": 0.001434922218322754, "learning_rate": 0.00018441659290741852, "loss": 23.0, "step": 3025 }, { "epoch": 1.816326530612245, "grad_norm": 0.0009723508264869452, "learning_rate": 0.00018440646421419085, "loss": 23.0, "step": 3026 }, { "epoch": 1.8169267707082835, "grad_norm": 0.0012221380602568388, "learning_rate": 0.00018439633250871354, "loss": 23.0, "step": 3027 }, { "epoch": 1.8175270108043218, "grad_norm": 0.0012381580891087651, "learning_rate": 0.00018438619779134822, "loss": 23.0, "step": 3028 }, { "epoch": 1.81812725090036, "grad_norm": 0.0011028312146663666, "learning_rate": 0.0001843760600624566, "loss": 23.0, "step": 3029 }, { "epoch": 1.8187274909963986, "grad_norm": 0.0011208378709852695, "learning_rate": 0.0001843659193224004, "loss": 23.0, "step": 3030 }, { "epoch": 1.819327731092437, "grad_norm": 0.001310601131990552, "learning_rate": 0.0001843557755715416, "loss": 23.0, "step": 3031 }, { "epoch": 1.8199279711884753, "grad_norm": 0.0017411686712875962, "learning_rate": 0.00018434562881024214, "loss": 23.0, "step": 3032 }, { "epoch": 1.8205282112845138, "grad_norm": 0.0012776413932442665, "learning_rate": 0.00018433547903886415, "loss": 23.0, "step": 3033 }, { "epoch": 1.8211284513805523, "grad_norm": 0.00228596362285316, "learning_rate": 0.00018432532625776987, "loss": 23.0, "step": 3034 }, { "epoch": 1.8217286914765907, "grad_norm": 0.0009363253484480083, "learning_rate": 0.00018431517046732157, "loss": 23.0, "step": 3035 }, { "epoch": 1.822328931572629, "grad_norm": 0.0019296659156680107, "learning_rate": 0.00018430501166788173, "loss": 23.0, "step": 3036 }, { "epoch": 1.8229291716686675, "grad_norm": 0.0037253268528729677, "learning_rate": 0.0001842948498598129, "loss": 23.0, "step": 3037 }, { "epoch": 1.8235294117647058, "grad_norm": 0.0003991881385445595, "learning_rate": 0.00018428468504347767, "loss": 23.0, "step": 3038 }, { "epoch": 1.8241296518607442, "grad_norm": 0.004006813745945692, "learning_rate": 0.00018427451721923888, "loss": 23.0, "step": 3039 }, { "epoch": 1.8247298919567827, "grad_norm": 0.0013116990448907018, "learning_rate": 0.00018426434638745933, "loss": 23.0, "step": 3040 }, { "epoch": 1.8253301320528212, "grad_norm": 0.0044224620796740055, "learning_rate": 0.000184254172548502, "loss": 23.0, "step": 3041 }, { "epoch": 1.8259303721488596, "grad_norm": 0.0010624263668432832, "learning_rate": 0.00018424399570273002, "loss": 23.0, "step": 3042 }, { "epoch": 1.8265306122448979, "grad_norm": 0.00032893690513446927, "learning_rate": 0.0001842338158505065, "loss": 23.0, "step": 3043 }, { "epoch": 1.8271308523409364, "grad_norm": 0.0007519045611843467, "learning_rate": 0.00018422363299219478, "loss": 23.0, "step": 3044 }, { "epoch": 1.8277310924369747, "grad_norm": 0.0011156450491398573, "learning_rate": 0.00018421344712815825, "loss": 23.0, "step": 3045 }, { "epoch": 1.828331332533013, "grad_norm": 0.0012068160576745868, "learning_rate": 0.0001842032582587604, "loss": 23.0, "step": 3046 }, { "epoch": 1.8289315726290516, "grad_norm": 0.00150686118286103, "learning_rate": 0.00018419306638436486, "loss": 23.0, "step": 3047 }, { "epoch": 1.8295318127250901, "grad_norm": 0.0015496856067329645, "learning_rate": 0.00018418287150533537, "loss": 23.0, "step": 3048 }, { "epoch": 1.8301320528211285, "grad_norm": 0.0019011717522516847, "learning_rate": 0.00018417267362203575, "loss": 23.0, "step": 3049 }, { "epoch": 1.8307322929171668, "grad_norm": 0.001324790413491428, "learning_rate": 0.00018416247273482988, "loss": 23.0, "step": 3050 }, { "epoch": 1.8313325330132053, "grad_norm": 0.0008508468163199723, "learning_rate": 0.0001841522688440819, "loss": 23.0, "step": 3051 }, { "epoch": 1.8319327731092439, "grad_norm": 0.0004634323122445494, "learning_rate": 0.00018414206195015587, "loss": 23.0, "step": 3052 }, { "epoch": 1.832533013205282, "grad_norm": 0.003283916972577572, "learning_rate": 0.0001841318520534161, "loss": 23.0, "step": 3053 }, { "epoch": 1.8331332533013205, "grad_norm": 0.0021354639902710915, "learning_rate": 0.00018412163915422695, "loss": 23.0, "step": 3054 }, { "epoch": 1.833733493397359, "grad_norm": 0.0010375322308391333, "learning_rate": 0.0001841114232529529, "loss": 23.0, "step": 3055 }, { "epoch": 1.8343337334933973, "grad_norm": 0.0026274758856743574, "learning_rate": 0.0001841012043499585, "loss": 23.0, "step": 3056 }, { "epoch": 1.8349339735894357, "grad_norm": 0.0008885942515917122, "learning_rate": 0.00018409098244560844, "loss": 23.0, "step": 3057 }, { "epoch": 1.8355342136854742, "grad_norm": 0.0019456876907497644, "learning_rate": 0.00018408075754026753, "loss": 23.0, "step": 3058 }, { "epoch": 1.8361344537815127, "grad_norm": 0.002114507369697094, "learning_rate": 0.00018407052963430068, "loss": 23.0, "step": 3059 }, { "epoch": 1.836734693877551, "grad_norm": 0.0008166294428519905, "learning_rate": 0.00018406029872807287, "loss": 23.0, "step": 3060 }, { "epoch": 1.8373349339735894, "grad_norm": 0.0016927891410887241, "learning_rate": 0.00018405006482194921, "loss": 23.0, "step": 3061 }, { "epoch": 1.837935174069628, "grad_norm": 0.0014067868469282985, "learning_rate": 0.00018403982791629497, "loss": 23.0, "step": 3062 }, { "epoch": 1.8385354141656662, "grad_norm": 0.0017834990285336971, "learning_rate": 0.00018402958801147542, "loss": 23.0, "step": 3063 }, { "epoch": 1.8391356542617046, "grad_norm": 0.0011745449155569077, "learning_rate": 0.00018401934510785606, "loss": 23.0, "step": 3064 }, { "epoch": 1.839735894357743, "grad_norm": 0.0009545880602672696, "learning_rate": 0.00018400909920580238, "loss": 23.0, "step": 3065 }, { "epoch": 1.8403361344537816, "grad_norm": 0.003715930739417672, "learning_rate": 0.00018399885030568003, "loss": 23.0, "step": 3066 }, { "epoch": 1.84093637454982, "grad_norm": 0.0006399541744031012, "learning_rate": 0.0001839885984078548, "loss": 23.0, "step": 3067 }, { "epoch": 1.8415366146458583, "grad_norm": 0.001763959531672299, "learning_rate": 0.00018397834351269255, "loss": 23.0, "step": 3068 }, { "epoch": 1.8421368547418968, "grad_norm": 0.0012634745799005032, "learning_rate": 0.0001839680856205592, "loss": 23.0, "step": 3069 }, { "epoch": 1.8427370948379351, "grad_norm": 0.0015287197893485427, "learning_rate": 0.0001839578247318209, "loss": 23.0, "step": 3070 }, { "epoch": 1.8433373349339734, "grad_norm": 0.0009792933706194162, "learning_rate": 0.00018394756084684377, "loss": 23.0, "step": 3071 }, { "epoch": 1.843937575030012, "grad_norm": 0.0009839729173108935, "learning_rate": 0.00018393729396599416, "loss": 23.0, "step": 3072 }, { "epoch": 1.8445378151260505, "grad_norm": 0.0008178153657354414, "learning_rate": 0.00018392702408963842, "loss": 23.0, "step": 3073 }, { "epoch": 1.8451380552220888, "grad_norm": 0.0018251874716952443, "learning_rate": 0.0001839167512181431, "loss": 23.0, "step": 3074 }, { "epoch": 1.8457382953181272, "grad_norm": 0.002089198911562562, "learning_rate": 0.00018390647535187476, "loss": 23.0, "step": 3075 }, { "epoch": 1.8463385354141657, "grad_norm": 0.0021562918554991484, "learning_rate": 0.00018389619649120017, "loss": 23.0, "step": 3076 }, { "epoch": 1.8469387755102042, "grad_norm": 0.0017833629390224814, "learning_rate": 0.00018388591463648616, "loss": 23.0, "step": 3077 }, { "epoch": 1.8475390156062423, "grad_norm": 0.000439735857071355, "learning_rate": 0.00018387562978809958, "loss": 23.0, "step": 3078 }, { "epoch": 1.8481392557022809, "grad_norm": 0.001050430117174983, "learning_rate": 0.00018386534194640754, "loss": 23.0, "step": 3079 }, { "epoch": 1.8487394957983194, "grad_norm": 0.001189610455185175, "learning_rate": 0.00018385505111177719, "loss": 23.0, "step": 3080 }, { "epoch": 1.8493397358943577, "grad_norm": 0.0010707725305110216, "learning_rate": 0.00018384475728457574, "loss": 23.0, "step": 3081 }, { "epoch": 1.849939975990396, "grad_norm": 0.002398673677816987, "learning_rate": 0.00018383446046517062, "loss": 23.0, "step": 3082 }, { "epoch": 1.8505402160864346, "grad_norm": 0.0018957408610731363, "learning_rate": 0.00018382416065392923, "loss": 23.0, "step": 3083 }, { "epoch": 1.8511404561824731, "grad_norm": 0.0022743563167750835, "learning_rate": 0.0001838138578512192, "loss": 23.0, "step": 3084 }, { "epoch": 1.8517406962785115, "grad_norm": 0.0014944793656468391, "learning_rate": 0.00018380355205740813, "loss": 23.0, "step": 3085 }, { "epoch": 1.8523409363745498, "grad_norm": 0.0011577775003388524, "learning_rate": 0.00018379324327286387, "loss": 23.0, "step": 3086 }, { "epoch": 1.8529411764705883, "grad_norm": 0.0023493997287005186, "learning_rate": 0.0001837829314979543, "loss": 23.0, "step": 3087 }, { "epoch": 1.8535414165666266, "grad_norm": 0.0017023790860548615, "learning_rate": 0.00018377261673304743, "loss": 23.0, "step": 3088 }, { "epoch": 1.854141656662665, "grad_norm": 0.001627809484489262, "learning_rate": 0.00018376229897851135, "loss": 23.0, "step": 3089 }, { "epoch": 1.8547418967587035, "grad_norm": 0.0016393931582570076, "learning_rate": 0.00018375197823471427, "loss": 23.0, "step": 3090 }, { "epoch": 1.855342136854742, "grad_norm": 0.0018370094476267695, "learning_rate": 0.00018374165450202456, "loss": 23.0, "step": 3091 }, { "epoch": 1.8559423769507803, "grad_norm": 0.0014342045178636909, "learning_rate": 0.0001837313277808106, "loss": 23.0, "step": 3092 }, { "epoch": 1.8565426170468187, "grad_norm": 0.0029582458082586527, "learning_rate": 0.0001837209980714409, "loss": 23.0, "step": 3093 }, { "epoch": 1.8571428571428572, "grad_norm": 0.0012816624948754907, "learning_rate": 0.00018371066537428417, "loss": 23.0, "step": 3094 }, { "epoch": 1.8577430972388955, "grad_norm": 0.002697428921237588, "learning_rate": 0.0001837003296897091, "loss": 23.0, "step": 3095 }, { "epoch": 1.8583433373349338, "grad_norm": 0.0037919096648693085, "learning_rate": 0.00018368999101808458, "loss": 23.0, "step": 3096 }, { "epoch": 1.8589435774309724, "grad_norm": 0.003335389541462064, "learning_rate": 0.00018367964935977953, "loss": 23.0, "step": 3097 }, { "epoch": 1.859543817527011, "grad_norm": 0.0011625085026025772, "learning_rate": 0.00018366930471516306, "loss": 23.0, "step": 3098 }, { "epoch": 1.8601440576230492, "grad_norm": 0.001312834327109158, "learning_rate": 0.00018365895708460434, "loss": 23.0, "step": 3099 }, { "epoch": 1.8607442977190876, "grad_norm": 0.0009217692422680557, "learning_rate": 0.00018364860646847265, "loss": 23.0, "step": 3100 }, { "epoch": 1.861344537815126, "grad_norm": 0.001049682148732245, "learning_rate": 0.00018363825286713735, "loss": 23.0, "step": 3101 }, { "epoch": 1.8619447779111644, "grad_norm": 0.0009720840607769787, "learning_rate": 0.00018362789628096794, "loss": 23.0, "step": 3102 }, { "epoch": 1.8625450180072027, "grad_norm": 0.0013249212643131614, "learning_rate": 0.000183617536710334, "loss": 23.0, "step": 3103 }, { "epoch": 1.8631452581032413, "grad_norm": 0.0006475619738921523, "learning_rate": 0.00018360717415560532, "loss": 23.0, "step": 3104 }, { "epoch": 1.8637454981992798, "grad_norm": 0.0009137241286225617, "learning_rate": 0.00018359680861715163, "loss": 23.0, "step": 3105 }, { "epoch": 1.8643457382953181, "grad_norm": 0.001901013427414, "learning_rate": 0.00018358644009534286, "loss": 23.0, "step": 3106 }, { "epoch": 1.8649459783913565, "grad_norm": 0.0014280707109719515, "learning_rate": 0.0001835760685905491, "loss": 23.0, "step": 3107 }, { "epoch": 1.865546218487395, "grad_norm": 0.0019309140043333173, "learning_rate": 0.00018356569410314042, "loss": 23.0, "step": 3108 }, { "epoch": 1.8661464585834335, "grad_norm": 0.0008051837212406099, "learning_rate": 0.00018355531663348705, "loss": 23.0, "step": 3109 }, { "epoch": 1.8667466986794716, "grad_norm": 0.0013398687588050961, "learning_rate": 0.00018354493618195935, "loss": 23.0, "step": 3110 }, { "epoch": 1.8673469387755102, "grad_norm": 0.000537229236215353, "learning_rate": 0.00018353455274892783, "loss": 23.0, "step": 3111 }, { "epoch": 1.8679471788715487, "grad_norm": 0.00452477065846324, "learning_rate": 0.00018352416633476298, "loss": 23.0, "step": 3112 }, { "epoch": 1.868547418967587, "grad_norm": 0.002200663322582841, "learning_rate": 0.0001835137769398355, "loss": 23.0, "step": 3113 }, { "epoch": 1.8691476590636253, "grad_norm": 0.0013940661447122693, "learning_rate": 0.0001835033845645161, "loss": 23.0, "step": 3114 }, { "epoch": 1.8697478991596639, "grad_norm": 0.002113936236128211, "learning_rate": 0.00018349298920917572, "loss": 23.0, "step": 3115 }, { "epoch": 1.8703481392557024, "grad_norm": 0.0012045325711369514, "learning_rate": 0.00018348259087418533, "loss": 23.0, "step": 3116 }, { "epoch": 1.8709483793517407, "grad_norm": 0.0033185603097081184, "learning_rate": 0.000183472189559916, "loss": 23.0, "step": 3117 }, { "epoch": 1.871548619447779, "grad_norm": 0.0011681788600981236, "learning_rate": 0.00018346178526673897, "loss": 23.0, "step": 3118 }, { "epoch": 1.8721488595438176, "grad_norm": 0.0022086468525230885, "learning_rate": 0.0001834513779950255, "loss": 23.0, "step": 3119 }, { "epoch": 1.872749099639856, "grad_norm": 0.0005794914322905242, "learning_rate": 0.000183440967745147, "loss": 23.0, "step": 3120 }, { "epoch": 1.8733493397358942, "grad_norm": 0.0011907683219760656, "learning_rate": 0.00018343055451747498, "loss": 23.0, "step": 3121 }, { "epoch": 1.8739495798319328, "grad_norm": 0.0005777979386039078, "learning_rate": 0.0001834201383123811, "loss": 23.0, "step": 3122 }, { "epoch": 1.8745498199279713, "grad_norm": 0.004292360506951809, "learning_rate": 0.00018340971913023707, "loss": 23.0, "step": 3123 }, { "epoch": 1.8751500600240096, "grad_norm": 0.0035806214436888695, "learning_rate": 0.00018339929697141474, "loss": 23.0, "step": 3124 }, { "epoch": 1.875750300120048, "grad_norm": 0.0015884626191109419, "learning_rate": 0.00018338887183628599, "loss": 23.0, "step": 3125 }, { "epoch": 1.8763505402160865, "grad_norm": 0.0011369887506589293, "learning_rate": 0.00018337844372522292, "loss": 23.0, "step": 3126 }, { "epoch": 1.8769507803121248, "grad_norm": 0.005162140820175409, "learning_rate": 0.00018336801263859768, "loss": 23.0, "step": 3127 }, { "epoch": 1.8775510204081631, "grad_norm": 0.0024860259145498276, "learning_rate": 0.00018335757857678253, "loss": 23.0, "step": 3128 }, { "epoch": 1.8781512605042017, "grad_norm": 0.0012651464203372598, "learning_rate": 0.0001833471415401498, "loss": 23.0, "step": 3129 }, { "epoch": 1.8787515006002402, "grad_norm": 0.00254456396214664, "learning_rate": 0.00018333670152907198, "loss": 23.0, "step": 3130 }, { "epoch": 1.8793517406962785, "grad_norm": 0.0015262291999533772, "learning_rate": 0.00018332625854392164, "loss": 23.0, "step": 3131 }, { "epoch": 1.8799519807923168, "grad_norm": 0.002018503611907363, "learning_rate": 0.0001833158125850715, "loss": 23.0, "step": 3132 }, { "epoch": 1.8805522208883554, "grad_norm": 0.00178581103682518, "learning_rate": 0.0001833053636528943, "loss": 23.0, "step": 3133 }, { "epoch": 1.8811524609843937, "grad_norm": 0.002688503125682473, "learning_rate": 0.00018329491174776295, "loss": 23.0, "step": 3134 }, { "epoch": 1.881752701080432, "grad_norm": 0.0027273113373667, "learning_rate": 0.0001832844568700505, "loss": 23.0, "step": 3135 }, { "epoch": 1.8823529411764706, "grad_norm": 0.0010961064836010337, "learning_rate": 0.00018327399902013, "loss": 23.0, "step": 3136 }, { "epoch": 1.882953181272509, "grad_norm": 0.0035878592170774937, "learning_rate": 0.00018326353819837467, "loss": 23.0, "step": 3137 }, { "epoch": 1.8835534213685474, "grad_norm": 0.0022766878828406334, "learning_rate": 0.00018325307440515785, "loss": 23.0, "step": 3138 }, { "epoch": 1.8841536614645857, "grad_norm": 0.0014452520990744233, "learning_rate": 0.00018324260764085294, "loss": 23.0, "step": 3139 }, { "epoch": 1.8847539015606243, "grad_norm": 0.001565837417729199, "learning_rate": 0.00018323213790583353, "loss": 23.0, "step": 3140 }, { "epoch": 1.8853541416566628, "grad_norm": 0.002488331636413932, "learning_rate": 0.0001832216652004732, "loss": 23.0, "step": 3141 }, { "epoch": 1.885954381752701, "grad_norm": 0.0023180756252259016, "learning_rate": 0.00018321118952514568, "loss": 23.0, "step": 3142 }, { "epoch": 1.8865546218487395, "grad_norm": 0.0013001611223444343, "learning_rate": 0.0001832007108802249, "loss": 23.0, "step": 3143 }, { "epoch": 1.887154861944778, "grad_norm": 0.00461295573040843, "learning_rate": 0.0001831902292660847, "loss": 23.0, "step": 3144 }, { "epoch": 1.8877551020408163, "grad_norm": 0.0016194535419344902, "learning_rate": 0.00018317974468309926, "loss": 23.0, "step": 3145 }, { "epoch": 1.8883553421368546, "grad_norm": 0.002495100488886237, "learning_rate": 0.00018316925713164267, "loss": 23.0, "step": 3146 }, { "epoch": 1.8889555822328932, "grad_norm": 0.0009624058147892356, "learning_rate": 0.00018315876661208926, "loss": 23.0, "step": 3147 }, { "epoch": 1.8895558223289317, "grad_norm": 0.002280160551890731, "learning_rate": 0.00018314827312481335, "loss": 23.0, "step": 3148 }, { "epoch": 1.89015606242497, "grad_norm": 0.001309713814407587, "learning_rate": 0.00018313777667018947, "loss": 23.0, "step": 3149 }, { "epoch": 1.8907563025210083, "grad_norm": 0.00454369792714715, "learning_rate": 0.0001831272772485922, "loss": 23.0, "step": 3150 }, { "epoch": 1.8913565426170469, "grad_norm": 0.0026175181847065687, "learning_rate": 0.0001831167748603962, "loss": 23.0, "step": 3151 }, { "epoch": 1.8919567827130852, "grad_norm": 0.0012701862724497914, "learning_rate": 0.00018310626950597637, "loss": 23.0, "step": 3152 }, { "epoch": 1.8925570228091235, "grad_norm": 0.0033725642133504152, "learning_rate": 0.0001830957611857075, "loss": 23.0, "step": 3153 }, { "epoch": 1.893157262905162, "grad_norm": 0.005127619951963425, "learning_rate": 0.0001830852498999647, "loss": 23.0, "step": 3154 }, { "epoch": 1.8937575030012006, "grad_norm": 0.0004975322517566383, "learning_rate": 0.00018307473564912303, "loss": 23.0, "step": 3155 }, { "epoch": 1.894357743097239, "grad_norm": 0.002670707879588008, "learning_rate": 0.00018306421843355773, "loss": 23.0, "step": 3156 }, { "epoch": 1.8949579831932772, "grad_norm": 0.003962348680943251, "learning_rate": 0.00018305369825364417, "loss": 23.0, "step": 3157 }, { "epoch": 1.8955582232893158, "grad_norm": 0.000557166466023773, "learning_rate": 0.00018304317510975775, "loss": 23.0, "step": 3158 }, { "epoch": 1.896158463385354, "grad_norm": 0.0021320811938494444, "learning_rate": 0.00018303264900227398, "loss": 23.0, "step": 3159 }, { "epoch": 1.8967587034813924, "grad_norm": 0.003195348661392927, "learning_rate": 0.0001830221199315686, "loss": 23.0, "step": 3160 }, { "epoch": 1.897358943577431, "grad_norm": 0.0013635127106681466, "learning_rate": 0.0001830115878980173, "loss": 23.0, "step": 3161 }, { "epoch": 1.8979591836734695, "grad_norm": 0.002584997797384858, "learning_rate": 0.00018300105290199595, "loss": 23.0, "step": 3162 }, { "epoch": 1.8985594237695078, "grad_norm": 0.0012520054588094354, "learning_rate": 0.00018299051494388054, "loss": 23.0, "step": 3163 }, { "epoch": 1.8991596638655461, "grad_norm": 0.0018940556328743696, "learning_rate": 0.00018297997402404715, "loss": 23.0, "step": 3164 }, { "epoch": 1.8997599039615847, "grad_norm": 0.0019348510541021824, "learning_rate": 0.00018296943014287193, "loss": 23.0, "step": 3165 }, { "epoch": 1.9003601440576232, "grad_norm": 0.0018156596925109625, "learning_rate": 0.00018295888330073113, "loss": 23.0, "step": 3166 }, { "epoch": 1.9009603841536613, "grad_norm": 0.0035436146426945925, "learning_rate": 0.0001829483334980012, "loss": 23.0, "step": 3167 }, { "epoch": 1.9015606242496998, "grad_norm": 0.0012572517152875662, "learning_rate": 0.00018293778073505865, "loss": 23.0, "step": 3168 }, { "epoch": 1.9021608643457384, "grad_norm": 0.0008448131266050041, "learning_rate": 0.00018292722501228004, "loss": 23.0, "step": 3169 }, { "epoch": 1.9027611044417767, "grad_norm": 0.0037472823169082403, "learning_rate": 0.00018291666633004204, "loss": 23.0, "step": 3170 }, { "epoch": 1.903361344537815, "grad_norm": 0.0018287114799022675, "learning_rate": 0.00018290610468872155, "loss": 23.0, "step": 3171 }, { "epoch": 1.9039615846338536, "grad_norm": 0.0022742743603885174, "learning_rate": 0.0001828955400886954, "loss": 23.0, "step": 3172 }, { "epoch": 1.904561824729892, "grad_norm": 0.0018581197364255786, "learning_rate": 0.0001828849725303407, "loss": 23.0, "step": 3173 }, { "epoch": 1.9051620648259304, "grad_norm": 0.001101133180782199, "learning_rate": 0.0001828744020140345, "loss": 23.0, "step": 3174 }, { "epoch": 1.9057623049219687, "grad_norm": 0.0020204149186611176, "learning_rate": 0.0001828638285401541, "loss": 23.0, "step": 3175 }, { "epoch": 1.9063625450180073, "grad_norm": 0.0028773958329111338, "learning_rate": 0.0001828532521090768, "loss": 23.0, "step": 3176 }, { "epoch": 1.9069627851140456, "grad_norm": 0.0018713538302108645, "learning_rate": 0.00018284267272118004, "loss": 23.0, "step": 3177 }, { "epoch": 1.907563025210084, "grad_norm": 0.0018324162811040878, "learning_rate": 0.00018283209037684141, "loss": 23.0, "step": 3178 }, { "epoch": 1.9081632653061225, "grad_norm": 0.002509085228666663, "learning_rate": 0.00018282150507643855, "loss": 23.0, "step": 3179 }, { "epoch": 1.908763505402161, "grad_norm": 0.002151481108739972, "learning_rate": 0.0001828109168203492, "loss": 23.0, "step": 3180 }, { "epoch": 1.9093637454981993, "grad_norm": 0.0014471776084974408, "learning_rate": 0.00018280032560895123, "loss": 23.0, "step": 3181 }, { "epoch": 1.9099639855942376, "grad_norm": 0.0023641546722501516, "learning_rate": 0.00018278973144262266, "loss": 23.0, "step": 3182 }, { "epoch": 1.9105642256902762, "grad_norm": 0.003392763901501894, "learning_rate": 0.0001827791343217415, "loss": 23.0, "step": 3183 }, { "epoch": 1.9111644657863145, "grad_norm": 0.003529018722474575, "learning_rate": 0.00018276853424668602, "loss": 23.0, "step": 3184 }, { "epoch": 1.9117647058823528, "grad_norm": 0.003656035289168358, "learning_rate": 0.0001827579312178344, "loss": 23.0, "step": 3185 }, { "epoch": 1.9123649459783914, "grad_norm": 0.0014189134817570448, "learning_rate": 0.00018274732523556513, "loss": 23.0, "step": 3186 }, { "epoch": 1.91296518607443, "grad_norm": 0.0013264564331620932, "learning_rate": 0.00018273671630025665, "loss": 23.0, "step": 3187 }, { "epoch": 1.9135654261704682, "grad_norm": 0.004117329139262438, "learning_rate": 0.00018272610441228763, "loss": 23.0, "step": 3188 }, { "epoch": 1.9141656662665065, "grad_norm": 0.0024092942476272583, "learning_rate": 0.0001827154895720367, "loss": 23.0, "step": 3189 }, { "epoch": 1.914765906362545, "grad_norm": 0.001247992506250739, "learning_rate": 0.00018270487177988272, "loss": 23.0, "step": 3190 }, { "epoch": 1.9153661464585834, "grad_norm": 0.006278992164880037, "learning_rate": 0.00018269425103620464, "loss": 23.0, "step": 3191 }, { "epoch": 1.9159663865546217, "grad_norm": 0.001128924312070012, "learning_rate": 0.00018268362734138142, "loss": 23.0, "step": 3192 }, { "epoch": 1.9165666266506602, "grad_norm": 0.004643549211323261, "learning_rate": 0.00018267300069579222, "loss": 23.0, "step": 3193 }, { "epoch": 1.9171668667466988, "grad_norm": 0.002494530752301216, "learning_rate": 0.00018266237109981632, "loss": 23.0, "step": 3194 }, { "epoch": 1.917767106842737, "grad_norm": 0.00441815285012126, "learning_rate": 0.000182651738553833, "loss": 23.0, "step": 3195 }, { "epoch": 1.9183673469387754, "grad_norm": 0.0028012660332024097, "learning_rate": 0.00018264110305822178, "loss": 23.0, "step": 3196 }, { "epoch": 1.918967587034814, "grad_norm": 0.0017156752292066813, "learning_rate": 0.00018263046461336214, "loss": 23.0, "step": 3197 }, { "epoch": 1.9195678271308525, "grad_norm": 0.002733529545366764, "learning_rate": 0.00018261982321963379, "loss": 23.0, "step": 3198 }, { "epoch": 1.9201680672268906, "grad_norm": 0.0011137155815958977, "learning_rate": 0.00018260917887741643, "loss": 23.0, "step": 3199 }, { "epoch": 1.9207683073229291, "grad_norm": 0.0014791876310482621, "learning_rate": 0.00018259853158708998, "loss": 23.0, "step": 3200 }, { "epoch": 1.9213685474189677, "grad_norm": 0.0009261185768991709, "learning_rate": 0.00018258788134903445, "loss": 23.0, "step": 3201 }, { "epoch": 1.921968787515006, "grad_norm": 0.0030771689489483833, "learning_rate": 0.00018257722816362985, "loss": 23.0, "step": 3202 }, { "epoch": 1.9225690276110443, "grad_norm": 0.003100763075053692, "learning_rate": 0.00018256657203125637, "loss": 23.0, "step": 3203 }, { "epoch": 1.9231692677070829, "grad_norm": 0.002674756571650505, "learning_rate": 0.00018255591295229437, "loss": 23.0, "step": 3204 }, { "epoch": 1.9237695078031214, "grad_norm": 0.0022848814260214567, "learning_rate": 0.00018254525092712415, "loss": 23.0, "step": 3205 }, { "epoch": 1.9243697478991597, "grad_norm": 0.0018523391336202621, "learning_rate": 0.00018253458595612627, "loss": 23.0, "step": 3206 }, { "epoch": 1.924969987995198, "grad_norm": 0.0009723806870169938, "learning_rate": 0.00018252391803968133, "loss": 23.0, "step": 3207 }, { "epoch": 1.9255702280912366, "grad_norm": 0.001458384795114398, "learning_rate": 0.00018251324717817005, "loss": 23.0, "step": 3208 }, { "epoch": 1.9261704681872749, "grad_norm": 0.0013265210436657071, "learning_rate": 0.0001825025733719732, "loss": 23.0, "step": 3209 }, { "epoch": 1.9267707082833132, "grad_norm": 0.0010744367027655244, "learning_rate": 0.00018249189662147171, "loss": 23.0, "step": 3210 }, { "epoch": 1.9273709483793517, "grad_norm": 0.0026470967568457127, "learning_rate": 0.00018248121692704668, "loss": 23.0, "step": 3211 }, { "epoch": 1.9279711884753903, "grad_norm": 0.0016062292270362377, "learning_rate": 0.00018247053428907913, "loss": 23.0, "step": 3212 }, { "epoch": 1.9285714285714286, "grad_norm": 0.000824375543743372, "learning_rate": 0.00018245984870795038, "loss": 23.0, "step": 3213 }, { "epoch": 1.929171668667467, "grad_norm": 0.002928749890998006, "learning_rate": 0.00018244916018404173, "loss": 23.0, "step": 3214 }, { "epoch": 1.9297719087635055, "grad_norm": 0.002207791665568948, "learning_rate": 0.00018243846871773465, "loss": 23.0, "step": 3215 }, { "epoch": 1.9303721488595438, "grad_norm": 0.0028601568192243576, "learning_rate": 0.00018242777430941068, "loss": 23.0, "step": 3216 }, { "epoch": 1.930972388955582, "grad_norm": 0.0021290185395628214, "learning_rate": 0.00018241707695945144, "loss": 23.0, "step": 3217 }, { "epoch": 1.9315726290516206, "grad_norm": 0.000839258951600641, "learning_rate": 0.00018240637666823876, "loss": 23.0, "step": 3218 }, { "epoch": 1.9321728691476592, "grad_norm": 0.002701072720810771, "learning_rate": 0.00018239567343615443, "loss": 23.0, "step": 3219 }, { "epoch": 1.9327731092436975, "grad_norm": 0.0020356529857963324, "learning_rate": 0.00018238496726358052, "loss": 23.0, "step": 3220 }, { "epoch": 1.9333733493397358, "grad_norm": 0.0014873469481244683, "learning_rate": 0.00018237425815089903, "loss": 23.0, "step": 3221 }, { "epoch": 1.9339735894357744, "grad_norm": 0.002182344440370798, "learning_rate": 0.00018236354609849214, "loss": 23.0, "step": 3222 }, { "epoch": 1.9345738295318127, "grad_norm": 0.0016528373816981912, "learning_rate": 0.00018235283110674218, "loss": 23.0, "step": 3223 }, { "epoch": 1.935174069627851, "grad_norm": 0.0012866542674601078, "learning_rate": 0.0001823421131760315, "loss": 23.0, "step": 3224 }, { "epoch": 1.9357743097238895, "grad_norm": 0.0019892966374754906, "learning_rate": 0.00018233139230674258, "loss": 23.0, "step": 3225 }, { "epoch": 1.936374549819928, "grad_norm": 0.002182098338380456, "learning_rate": 0.0001823206684992581, "loss": 23.0, "step": 3226 }, { "epoch": 1.9369747899159664, "grad_norm": 0.0021431762725114822, "learning_rate": 0.0001823099417539607, "loss": 23.0, "step": 3227 }, { "epoch": 1.9375750300120047, "grad_norm": 0.0024950047954916954, "learning_rate": 0.00018229921207123318, "loss": 23.0, "step": 3228 }, { "epoch": 1.9381752701080432, "grad_norm": 0.0017660352168604732, "learning_rate": 0.0001822884794514585, "loss": 23.0, "step": 3229 }, { "epoch": 1.9387755102040818, "grad_norm": 0.0065404558554291725, "learning_rate": 0.00018227774389501967, "loss": 23.0, "step": 3230 }, { "epoch": 1.9393757503001199, "grad_norm": 0.0011474026832729578, "learning_rate": 0.00018226700540229976, "loss": 23.0, "step": 3231 }, { "epoch": 1.9399759903961584, "grad_norm": 0.00047521217493340373, "learning_rate": 0.0001822562639736821, "loss": 23.0, "step": 3232 }, { "epoch": 1.940576230492197, "grad_norm": 0.0019478441681712866, "learning_rate": 0.00018224551960954995, "loss": 23.0, "step": 3233 }, { "epoch": 1.9411764705882353, "grad_norm": 0.0010433909483253956, "learning_rate": 0.00018223477231028677, "loss": 23.0, "step": 3234 }, { "epoch": 1.9417767106842736, "grad_norm": 0.0018290705047547817, "learning_rate": 0.00018222402207627606, "loss": 23.0, "step": 3235 }, { "epoch": 1.9423769507803121, "grad_norm": 0.0017392985755577683, "learning_rate": 0.00018221326890790153, "loss": 23.0, "step": 3236 }, { "epoch": 1.9429771908763507, "grad_norm": 0.002610771218314767, "learning_rate": 0.00018220251280554696, "loss": 23.0, "step": 3237 }, { "epoch": 1.943577430972389, "grad_norm": 0.0016300787683576345, "learning_rate": 0.00018219175376959606, "loss": 23.0, "step": 3238 }, { "epoch": 1.9441776710684273, "grad_norm": 0.0022878015879541636, "learning_rate": 0.00018218099180043295, "loss": 23.0, "step": 3239 }, { "epoch": 1.9447779111644659, "grad_norm": 0.0036289305426180363, "learning_rate": 0.00018217022689844163, "loss": 23.0, "step": 3240 }, { "epoch": 1.9453781512605042, "grad_norm": 0.0009557732264511287, "learning_rate": 0.00018215945906400628, "loss": 23.0, "step": 3241 }, { "epoch": 1.9459783913565425, "grad_norm": 0.0023918780498206615, "learning_rate": 0.0001821486882975112, "loss": 23.0, "step": 3242 }, { "epoch": 1.946578631452581, "grad_norm": 0.001407388597726822, "learning_rate": 0.0001821379145993407, "loss": 23.0, "step": 3243 }, { "epoch": 1.9471788715486196, "grad_norm": 0.0011877256911247969, "learning_rate": 0.00018212713796987936, "loss": 23.0, "step": 3244 }, { "epoch": 1.947779111644658, "grad_norm": 0.001021384377963841, "learning_rate": 0.0001821163584095117, "loss": 23.0, "step": 3245 }, { "epoch": 1.9483793517406962, "grad_norm": 0.0026115647051483393, "learning_rate": 0.0001821055759186224, "loss": 23.0, "step": 3246 }, { "epoch": 1.9489795918367347, "grad_norm": 0.001517315162345767, "learning_rate": 0.00018209479049759636, "loss": 23.0, "step": 3247 }, { "epoch": 1.949579831932773, "grad_norm": 0.003172236494719982, "learning_rate": 0.00018208400214681837, "loss": 23.0, "step": 3248 }, { "epoch": 1.9501800720288114, "grad_norm": 0.001906992169097066, "learning_rate": 0.00018207321086667353, "loss": 23.0, "step": 3249 }, { "epoch": 1.95078031212485, "grad_norm": 0.003104976611211896, "learning_rate": 0.00018206241665754688, "loss": 23.0, "step": 3250 }, { "epoch": 1.9513805522208885, "grad_norm": 0.0015054944669827819, "learning_rate": 0.00018205161951982367, "loss": 23.0, "step": 3251 }, { "epoch": 1.9519807923169268, "grad_norm": 0.0020006780978292227, "learning_rate": 0.00018204081945388922, "loss": 23.0, "step": 3252 }, { "epoch": 1.952581032412965, "grad_norm": 0.0018055624095723033, "learning_rate": 0.00018203001646012899, "loss": 23.0, "step": 3253 }, { "epoch": 1.9531812725090036, "grad_norm": 0.0017827142728492618, "learning_rate": 0.00018201921053892845, "loss": 23.0, "step": 3254 }, { "epoch": 1.9537815126050422, "grad_norm": 0.0020725359208881855, "learning_rate": 0.0001820084016906733, "loss": 23.0, "step": 3255 }, { "epoch": 1.9543817527010803, "grad_norm": 0.001384815899655223, "learning_rate": 0.0001819975899157492, "loss": 23.0, "step": 3256 }, { "epoch": 1.9549819927971188, "grad_norm": 0.002811410464346409, "learning_rate": 0.00018198677521454206, "loss": 23.0, "step": 3257 }, { "epoch": 1.9555822328931574, "grad_norm": 0.0008216790156438947, "learning_rate": 0.0001819759575874378, "loss": 23.0, "step": 3258 }, { "epoch": 1.9561824729891957, "grad_norm": 0.0014283070340752602, "learning_rate": 0.00018196513703482248, "loss": 23.0, "step": 3259 }, { "epoch": 1.956782713085234, "grad_norm": 0.0016851375112310052, "learning_rate": 0.00018195431355708225, "loss": 23.0, "step": 3260 }, { "epoch": 1.9573829531812725, "grad_norm": 0.0019182129763066769, "learning_rate": 0.0001819434871546034, "loss": 23.0, "step": 3261 }, { "epoch": 1.957983193277311, "grad_norm": 0.0008913344354368746, "learning_rate": 0.00018193265782777228, "loss": 23.0, "step": 3262 }, { "epoch": 1.9585834333733494, "grad_norm": 0.0013015334261581302, "learning_rate": 0.00018192182557697534, "loss": 23.0, "step": 3263 }, { "epoch": 1.9591836734693877, "grad_norm": 0.0009710061131045222, "learning_rate": 0.00018191099040259918, "loss": 23.0, "step": 3264 }, { "epoch": 1.9597839135654262, "grad_norm": 0.0013321270234882832, "learning_rate": 0.0001819001523050305, "loss": 23.0, "step": 3265 }, { "epoch": 1.9603841536614646, "grad_norm": 0.004230269230902195, "learning_rate": 0.00018188931128465603, "loss": 23.0, "step": 3266 }, { "epoch": 1.9609843937575029, "grad_norm": 0.0016738398699089885, "learning_rate": 0.00018187846734186267, "loss": 23.0, "step": 3267 }, { "epoch": 1.9615846338535414, "grad_norm": 0.0009718580404296517, "learning_rate": 0.00018186762047703745, "loss": 23.0, "step": 3268 }, { "epoch": 1.96218487394958, "grad_norm": 0.0022842460311949253, "learning_rate": 0.00018185677069056742, "loss": 23.0, "step": 3269 }, { "epoch": 1.9627851140456183, "grad_norm": 0.0014329022960737348, "learning_rate": 0.00018184591798283984, "loss": 23.0, "step": 3270 }, { "epoch": 1.9633853541416566, "grad_norm": 0.0033265934325754642, "learning_rate": 0.00018183506235424198, "loss": 23.0, "step": 3271 }, { "epoch": 1.9639855942376951, "grad_norm": 0.0007821978651918471, "learning_rate": 0.0001818242038051612, "loss": 23.0, "step": 3272 }, { "epoch": 1.9645858343337335, "grad_norm": 0.0015688884304836392, "learning_rate": 0.00018181334233598507, "loss": 23.0, "step": 3273 }, { "epoch": 1.9651860744297718, "grad_norm": 0.0008193565299734473, "learning_rate": 0.00018180247794710125, "loss": 23.0, "step": 3274 }, { "epoch": 1.9657863145258103, "grad_norm": 0.002213628264144063, "learning_rate": 0.0001817916106388974, "loss": 23.0, "step": 3275 }, { "epoch": 1.9663865546218489, "grad_norm": 0.0014024798292666674, "learning_rate": 0.00018178074041176135, "loss": 23.0, "step": 3276 }, { "epoch": 1.9669867947178872, "grad_norm": 0.0037618144415318966, "learning_rate": 0.00018176986726608103, "loss": 23.0, "step": 3277 }, { "epoch": 1.9675870348139255, "grad_norm": 0.0024038751143962145, "learning_rate": 0.00018175899120224449, "loss": 23.0, "step": 3278 }, { "epoch": 1.968187274909964, "grad_norm": 0.0033458671532571316, "learning_rate": 0.00018174811222063987, "loss": 23.0, "step": 3279 }, { "epoch": 1.9687875150060024, "grad_norm": 0.0019863531924784184, "learning_rate": 0.0001817372303216554, "loss": 23.0, "step": 3280 }, { "epoch": 1.9693877551020407, "grad_norm": 0.0015641896752640605, "learning_rate": 0.0001817263455056794, "loss": 23.0, "step": 3281 }, { "epoch": 1.9699879951980792, "grad_norm": 0.0012357387458905578, "learning_rate": 0.00018171545777310042, "loss": 23.0, "step": 3282 }, { "epoch": 1.9705882352941178, "grad_norm": 0.002113505732268095, "learning_rate": 0.00018170456712430691, "loss": 23.0, "step": 3283 }, { "epoch": 1.971188475390156, "grad_norm": 0.002042030915617943, "learning_rate": 0.0001816936735596876, "loss": 23.0, "step": 3284 }, { "epoch": 1.9717887154861944, "grad_norm": 0.0012755439383909106, "learning_rate": 0.00018168277707963118, "loss": 23.0, "step": 3285 }, { "epoch": 1.972388955582233, "grad_norm": 0.0019094778690487146, "learning_rate": 0.0001816718776845266, "loss": 23.0, "step": 3286 }, { "epoch": 1.9729891956782715, "grad_norm": 0.00413705175742507, "learning_rate": 0.00018166097537476277, "loss": 23.0, "step": 3287 }, { "epoch": 1.9735894357743096, "grad_norm": 0.0020690206438302994, "learning_rate": 0.00018165007015072876, "loss": 23.0, "step": 3288 }, { "epoch": 1.974189675870348, "grad_norm": 0.00169268692843616, "learning_rate": 0.00018163916201281382, "loss": 23.0, "step": 3289 }, { "epoch": 1.9747899159663866, "grad_norm": 0.001092193415388465, "learning_rate": 0.00018162825096140718, "loss": 23.0, "step": 3290 }, { "epoch": 1.975390156062425, "grad_norm": 0.0015564619097858667, "learning_rate": 0.0001816173369968982, "loss": 23.0, "step": 3291 }, { "epoch": 1.9759903961584633, "grad_norm": 0.0038381817284971476, "learning_rate": 0.00018160642011967644, "loss": 23.0, "step": 3292 }, { "epoch": 1.9765906362545018, "grad_norm": 0.0013099567731842399, "learning_rate": 0.00018159550033013144, "loss": 23.0, "step": 3293 }, { "epoch": 1.9771908763505404, "grad_norm": 0.00048246965161524713, "learning_rate": 0.00018158457762865296, "loss": 23.0, "step": 3294 }, { "epoch": 1.9777911164465787, "grad_norm": 0.0024292506277561188, "learning_rate": 0.00018157365201563073, "loss": 23.0, "step": 3295 }, { "epoch": 1.978391356542617, "grad_norm": 0.0020980066619813442, "learning_rate": 0.0001815627234914547, "loss": 23.0, "step": 3296 }, { "epoch": 1.9789915966386555, "grad_norm": 0.0009206788381561637, "learning_rate": 0.00018155179205651487, "loss": 23.0, "step": 3297 }, { "epoch": 1.9795918367346939, "grad_norm": 0.0014182684244588017, "learning_rate": 0.00018154085771120138, "loss": 23.0, "step": 3298 }, { "epoch": 1.9801920768307322, "grad_norm": 0.0010655620135366917, "learning_rate": 0.0001815299204559044, "loss": 23.0, "step": 3299 }, { "epoch": 1.9807923169267707, "grad_norm": 0.0018564078491181135, "learning_rate": 0.0001815189802910143, "loss": 23.0, "step": 3300 }, { "epoch": 1.9813925570228093, "grad_norm": 0.0016347630880773067, "learning_rate": 0.00018150803721692148, "loss": 23.0, "step": 3301 }, { "epoch": 1.9819927971188476, "grad_norm": 0.0021132517140358686, "learning_rate": 0.00018149709123401646, "loss": 23.0, "step": 3302 }, { "epoch": 1.982593037214886, "grad_norm": 0.002489116508513689, "learning_rate": 0.0001814861423426899, "loss": 23.0, "step": 3303 }, { "epoch": 1.9831932773109244, "grad_norm": 0.0014195957919582725, "learning_rate": 0.0001814751905433325, "loss": 23.0, "step": 3304 }, { "epoch": 1.9837935174069627, "grad_norm": 0.0011736804153770208, "learning_rate": 0.00018146423583633518, "loss": 23.0, "step": 3305 }, { "epoch": 1.984393757503001, "grad_norm": 0.002241331385448575, "learning_rate": 0.00018145327822208883, "loss": 23.0, "step": 3306 }, { "epoch": 1.9849939975990396, "grad_norm": 0.0017535320948809385, "learning_rate": 0.00018144231770098447, "loss": 23.0, "step": 3307 }, { "epoch": 1.9855942376950781, "grad_norm": 0.0021708435378968716, "learning_rate": 0.00018143135427341328, "loss": 23.0, "step": 3308 }, { "epoch": 1.9861944777911165, "grad_norm": 0.0023300815373659134, "learning_rate": 0.00018142038793976655, "loss": 23.0, "step": 3309 }, { "epoch": 1.9867947178871548, "grad_norm": 0.0014577904948964715, "learning_rate": 0.0001814094187004356, "loss": 23.0, "step": 3310 }, { "epoch": 1.9873949579831933, "grad_norm": 0.0024585998617112637, "learning_rate": 0.00018139844655581193, "loss": 23.0, "step": 3311 }, { "epoch": 1.9879951980792316, "grad_norm": 0.0038247143384069204, "learning_rate": 0.00018138747150628706, "loss": 23.0, "step": 3312 }, { "epoch": 1.98859543817527, "grad_norm": 0.0011696047149598598, "learning_rate": 0.0001813764935522527, "loss": 23.0, "step": 3313 }, { "epoch": 1.9891956782713085, "grad_norm": 0.0028556687757372856, "learning_rate": 0.00018136551269410062, "loss": 23.0, "step": 3314 }, { "epoch": 1.989795918367347, "grad_norm": 0.0023186022881418467, "learning_rate": 0.00018135452893222262, "loss": 23.0, "step": 3315 }, { "epoch": 1.9903961584633854, "grad_norm": 0.005861006677150726, "learning_rate": 0.0001813435422670108, "loss": 23.0, "step": 3316 }, { "epoch": 1.9909963985594237, "grad_norm": 0.0006705807754769921, "learning_rate": 0.00018133255269885723, "loss": 23.0, "step": 3317 }, { "epoch": 1.9915966386554622, "grad_norm": 0.0016647104639559984, "learning_rate": 0.00018132156022815404, "loss": 23.0, "step": 3318 }, { "epoch": 1.9921968787515008, "grad_norm": 0.0013882304774597287, "learning_rate": 0.00018131056485529352, "loss": 23.0, "step": 3319 }, { "epoch": 1.9927971188475389, "grad_norm": 0.0024001754354685545, "learning_rate": 0.00018129956658066812, "loss": 23.0, "step": 3320 }, { "epoch": 1.9933973589435774, "grad_norm": 0.0018285515252500772, "learning_rate": 0.0001812885654046703, "loss": 23.0, "step": 3321 }, { "epoch": 1.993997599039616, "grad_norm": 0.0015243416419252753, "learning_rate": 0.00018127756132769272, "loss": 23.0, "step": 3322 }, { "epoch": 1.9945978391356542, "grad_norm": 0.005187567323446274, "learning_rate": 0.00018126655435012803, "loss": 23.0, "step": 3323 }, { "epoch": 1.9951980792316926, "grad_norm": 0.0017248830990865827, "learning_rate": 0.00018125554447236906, "loss": 23.0, "step": 3324 }, { "epoch": 1.995798319327731, "grad_norm": 0.0024234713055193424, "learning_rate": 0.00018124453169480868, "loss": 23.0, "step": 3325 }, { "epoch": 1.9963985594237696, "grad_norm": 0.0025638325605541468, "learning_rate": 0.00018123351601784, "loss": 23.0, "step": 3326 }, { "epoch": 1.996998799519808, "grad_norm": 0.002679846016690135, "learning_rate": 0.00018122249744185605, "loss": 23.0, "step": 3327 }, { "epoch": 1.9975990396158463, "grad_norm": 0.003047887934371829, "learning_rate": 0.00018121147596725015, "loss": 23.0, "step": 3328 }, { "epoch": 1.9981992797118848, "grad_norm": 0.0017722995253279805, "learning_rate": 0.00018120045159441555, "loss": 23.0, "step": 3329 }, { "epoch": 1.9987995198079231, "grad_norm": 0.0012083728797733784, "learning_rate": 0.00018118942432374569, "loss": 23.0, "step": 3330 }, { "epoch": 1.9993997599039615, "grad_norm": 0.0022785505279898643, "learning_rate": 0.00018117839415563412, "loss": 23.0, "step": 3331 }, { "epoch": 2.0, "grad_norm": 0.0014194271061569452, "learning_rate": 0.0001811673610904745, "loss": 23.0, "step": 3332 }, { "epoch": 2.0006002400960385, "grad_norm": 0.0032136687077581882, "learning_rate": 0.0001811563251286605, "loss": 23.0, "step": 3333 }, { "epoch": 2.0012004801920766, "grad_norm": 0.00217724172398448, "learning_rate": 0.00018114528627058605, "loss": 23.0, "step": 3334 }, { "epoch": 2.001800720288115, "grad_norm": 0.0017727838130667806, "learning_rate": 0.00018113424451664508, "loss": 23.0, "step": 3335 }, { "epoch": 2.0024009603841537, "grad_norm": 0.00423310836777091, "learning_rate": 0.00018112319986723162, "loss": 23.0, "step": 3336 }, { "epoch": 2.0030012004801923, "grad_norm": 0.0028058600146323442, "learning_rate": 0.00018111215232273983, "loss": 23.0, "step": 3337 }, { "epoch": 2.0036014405762304, "grad_norm": 0.0015503312461078167, "learning_rate": 0.00018110110188356395, "loss": 23.0, "step": 3338 }, { "epoch": 2.004201680672269, "grad_norm": 0.0017939833924174309, "learning_rate": 0.00018109004855009837, "loss": 23.0, "step": 3339 }, { "epoch": 2.0048019207683074, "grad_norm": 0.0033577047288417816, "learning_rate": 0.00018107899232273756, "loss": 23.0, "step": 3340 }, { "epoch": 2.0054021608643455, "grad_norm": 0.0008726062951609492, "learning_rate": 0.00018106793320187605, "loss": 23.0, "step": 3341 }, { "epoch": 2.006002400960384, "grad_norm": 0.0011513312347233295, "learning_rate": 0.00018105687118790854, "loss": 23.0, "step": 3342 }, { "epoch": 2.0066026410564226, "grad_norm": 0.001895427005365491, "learning_rate": 0.00018104580628122984, "loss": 23.0, "step": 3343 }, { "epoch": 2.007202881152461, "grad_norm": 0.0009158776374533772, "learning_rate": 0.0001810347384822347, "loss": 23.0, "step": 3344 }, { "epoch": 2.0078031212484992, "grad_norm": 0.0008634175173938274, "learning_rate": 0.00018102366779131827, "loss": 23.0, "step": 3345 }, { "epoch": 2.008403361344538, "grad_norm": 0.0019585357513278723, "learning_rate": 0.00018101259420887553, "loss": 23.0, "step": 3346 }, { "epoch": 2.0090036014405763, "grad_norm": 0.005161345470696688, "learning_rate": 0.00018100151773530168, "loss": 23.0, "step": 3347 }, { "epoch": 2.009603841536615, "grad_norm": 0.003942587878555059, "learning_rate": 0.00018099043837099203, "loss": 23.0, "step": 3348 }, { "epoch": 2.010204081632653, "grad_norm": 0.0013700701529160142, "learning_rate": 0.00018097935611634198, "loss": 23.0, "step": 3349 }, { "epoch": 2.0108043217286915, "grad_norm": 0.0007138294167816639, "learning_rate": 0.000180968270971747, "loss": 23.0, "step": 3350 }, { "epoch": 2.01140456182473, "grad_norm": 0.005530905444175005, "learning_rate": 0.0001809571829376027, "loss": 23.0, "step": 3351 }, { "epoch": 2.012004801920768, "grad_norm": 0.0032923435792326927, "learning_rate": 0.0001809460920143048, "loss": 23.0, "step": 3352 }, { "epoch": 2.0126050420168067, "grad_norm": 0.0005104127340018749, "learning_rate": 0.0001809349982022491, "loss": 23.0, "step": 3353 }, { "epoch": 2.013205282112845, "grad_norm": 0.001574002206325531, "learning_rate": 0.0001809239015018315, "loss": 23.0, "step": 3354 }, { "epoch": 2.0138055222088838, "grad_norm": 0.002212009159848094, "learning_rate": 0.000180912801913448, "loss": 23.0, "step": 3355 }, { "epoch": 2.014405762304922, "grad_norm": 0.0011559105478227139, "learning_rate": 0.00018090169943749476, "loss": 23.0, "step": 3356 }, { "epoch": 2.0150060024009604, "grad_norm": 0.0033464916050434113, "learning_rate": 0.00018089059407436794, "loss": 23.0, "step": 3357 }, { "epoch": 2.015606242496999, "grad_norm": 0.0011934455251321197, "learning_rate": 0.0001808794858244639, "loss": 23.0, "step": 3358 }, { "epoch": 2.016206482593037, "grad_norm": 0.0006701741367578506, "learning_rate": 0.00018086837468817907, "loss": 23.0, "step": 3359 }, { "epoch": 2.0168067226890756, "grad_norm": 0.0014794671442359686, "learning_rate": 0.00018085726066590998, "loss": 23.0, "step": 3360 }, { "epoch": 2.017406962785114, "grad_norm": 0.00047398850438185036, "learning_rate": 0.00018084614375805323, "loss": 23.0, "step": 3361 }, { "epoch": 2.0180072028811527, "grad_norm": 0.0023361060302704573, "learning_rate": 0.00018083502396500556, "loss": 23.0, "step": 3362 }, { "epoch": 2.0186074429771907, "grad_norm": 0.0012610971461981535, "learning_rate": 0.0001808239012871638, "loss": 23.0, "step": 3363 }, { "epoch": 2.0192076830732293, "grad_norm": 0.004134600982069969, "learning_rate": 0.00018081277572492494, "loss": 23.0, "step": 3364 }, { "epoch": 2.019807923169268, "grad_norm": 0.000841438421048224, "learning_rate": 0.000180801647278686, "loss": 23.0, "step": 3365 }, { "epoch": 2.020408163265306, "grad_norm": 0.002190615748986602, "learning_rate": 0.0001807905159488441, "loss": 23.0, "step": 3366 }, { "epoch": 2.0210084033613445, "grad_norm": 0.0028436193242669106, "learning_rate": 0.00018077938173579648, "loss": 23.0, "step": 3367 }, { "epoch": 2.021608643457383, "grad_norm": 0.003500116989016533, "learning_rate": 0.00018076824463994056, "loss": 23.0, "step": 3368 }, { "epoch": 2.0222088835534215, "grad_norm": 0.0026991087943315506, "learning_rate": 0.0001807571046616737, "loss": 23.0, "step": 3369 }, { "epoch": 2.0228091236494596, "grad_norm": 0.0010631488403305411, "learning_rate": 0.00018074596180139353, "loss": 23.0, "step": 3370 }, { "epoch": 2.023409363745498, "grad_norm": 0.0020518836099654436, "learning_rate": 0.00018073481605949772, "loss": 23.0, "step": 3371 }, { "epoch": 2.0240096038415367, "grad_norm": 0.006052622105926275, "learning_rate": 0.00018072366743638395, "loss": 23.0, "step": 3372 }, { "epoch": 2.024609843937575, "grad_norm": 0.0021911882795393467, "learning_rate": 0.00018071251593245016, "loss": 23.0, "step": 3373 }, { "epoch": 2.0252100840336134, "grad_norm": 0.0032586664892733097, "learning_rate": 0.00018070136154809427, "loss": 23.0, "step": 3374 }, { "epoch": 2.025810324129652, "grad_norm": 0.000785050040576607, "learning_rate": 0.0001806902042837144, "loss": 23.0, "step": 3375 }, { "epoch": 2.0264105642256904, "grad_norm": 0.0007825555512681603, "learning_rate": 0.00018067904413970868, "loss": 23.0, "step": 3376 }, { "epoch": 2.0270108043217285, "grad_norm": 0.003356239991262555, "learning_rate": 0.00018066788111647543, "loss": 23.0, "step": 3377 }, { "epoch": 2.027611044417767, "grad_norm": 0.0014766575768589973, "learning_rate": 0.00018065671521441302, "loss": 23.0, "step": 3378 }, { "epoch": 2.0282112845138056, "grad_norm": 0.0009158303728327155, "learning_rate": 0.00018064554643391987, "loss": 23.0, "step": 3379 }, { "epoch": 2.028811524609844, "grad_norm": 0.0009348996100015938, "learning_rate": 0.00018063437477539466, "loss": 23.0, "step": 3380 }, { "epoch": 2.0294117647058822, "grad_norm": 0.0008702090126462281, "learning_rate": 0.00018062320023923598, "loss": 23.0, "step": 3381 }, { "epoch": 2.030012004801921, "grad_norm": 0.002282114699482918, "learning_rate": 0.0001806120228258427, "loss": 23.0, "step": 3382 }, { "epoch": 2.0306122448979593, "grad_norm": 0.005010796710848808, "learning_rate": 0.00018060084253561368, "loss": 23.0, "step": 3383 }, { "epoch": 2.0312124849939974, "grad_norm": 0.0017042094841599464, "learning_rate": 0.0001805896593689479, "loss": 23.0, "step": 3384 }, { "epoch": 2.031812725090036, "grad_norm": 0.0037521664053201675, "learning_rate": 0.00018057847332624452, "loss": 23.0, "step": 3385 }, { "epoch": 2.0324129651860745, "grad_norm": 0.0015214249724522233, "learning_rate": 0.00018056728440790266, "loss": 23.0, "step": 3386 }, { "epoch": 2.033013205282113, "grad_norm": 0.0004935787292197347, "learning_rate": 0.00018055609261432169, "loss": 23.0, "step": 3387 }, { "epoch": 2.033613445378151, "grad_norm": 0.003463397966697812, "learning_rate": 0.00018054489794590097, "loss": 23.0, "step": 3388 }, { "epoch": 2.0342136854741897, "grad_norm": 0.002584519563242793, "learning_rate": 0.00018053370040304004, "loss": 23.0, "step": 3389 }, { "epoch": 2.034813925570228, "grad_norm": 0.0012157384771853685, "learning_rate": 0.0001805224999861385, "loss": 23.0, "step": 3390 }, { "epoch": 2.0354141656662663, "grad_norm": 0.0029177102260291576, "learning_rate": 0.00018051129669559605, "loss": 23.0, "step": 3391 }, { "epoch": 2.036014405762305, "grad_norm": 0.0015428938204422593, "learning_rate": 0.00018050009053181257, "loss": 23.0, "step": 3392 }, { "epoch": 2.0366146458583434, "grad_norm": 0.0007340401643887162, "learning_rate": 0.00018048888149518788, "loss": 23.0, "step": 3393 }, { "epoch": 2.037214885954382, "grad_norm": 0.0011957536917179823, "learning_rate": 0.00018047766958612208, "loss": 23.0, "step": 3394 }, { "epoch": 2.03781512605042, "grad_norm": 0.0017125644953921437, "learning_rate": 0.00018046645480501524, "loss": 23.0, "step": 3395 }, { "epoch": 2.0384153661464586, "grad_norm": 0.0023938282392919064, "learning_rate": 0.0001804552371522676, "loss": 23.0, "step": 3396 }, { "epoch": 2.039015606242497, "grad_norm": 0.0033557454589754343, "learning_rate": 0.00018044401662827955, "loss": 23.0, "step": 3397 }, { "epoch": 2.039615846338535, "grad_norm": 0.0025200413074344397, "learning_rate": 0.00018043279323345143, "loss": 23.0, "step": 3398 }, { "epoch": 2.0402160864345738, "grad_norm": 0.0025761276483535767, "learning_rate": 0.00018042156696818385, "loss": 23.0, "step": 3399 }, { "epoch": 2.0408163265306123, "grad_norm": 0.0009423838928341866, "learning_rate": 0.00018041033783287738, "loss": 23.0, "step": 3400 }, { "epoch": 2.041416566626651, "grad_norm": 0.004442538134753704, "learning_rate": 0.00018039910582793282, "loss": 23.0, "step": 3401 }, { "epoch": 2.042016806722689, "grad_norm": 0.002732986817136407, "learning_rate": 0.000180387870953751, "loss": 23.0, "step": 3402 }, { "epoch": 2.0426170468187275, "grad_norm": 0.003854157403111458, "learning_rate": 0.0001803766332107328, "loss": 23.0, "step": 3403 }, { "epoch": 2.043217286914766, "grad_norm": 0.00363258458673954, "learning_rate": 0.00018036539259927932, "loss": 23.0, "step": 3404 }, { "epoch": 2.0438175270108045, "grad_norm": 0.004020926542580128, "learning_rate": 0.00018035414911979177, "loss": 23.0, "step": 3405 }, { "epoch": 2.0444177671068426, "grad_norm": 0.0015982519835233688, "learning_rate": 0.00018034290277267126, "loss": 23.0, "step": 3406 }, { "epoch": 2.045018007202881, "grad_norm": 0.0038941625971347094, "learning_rate": 0.00018033165355831924, "loss": 23.0, "step": 3407 }, { "epoch": 2.0456182472989197, "grad_norm": 0.0026739637833088636, "learning_rate": 0.00018032040147713716, "loss": 23.0, "step": 3408 }, { "epoch": 2.046218487394958, "grad_norm": 0.0016816748538985848, "learning_rate": 0.00018030914652952652, "loss": 23.0, "step": 3409 }, { "epoch": 2.0468187274909964, "grad_norm": 0.001945959753356874, "learning_rate": 0.00018029788871588905, "loss": 23.0, "step": 3410 }, { "epoch": 2.047418967587035, "grad_norm": 0.002473194617778063, "learning_rate": 0.0001802866280366265, "loss": 23.0, "step": 3411 }, { "epoch": 2.0480192076830734, "grad_norm": 0.003011916531249881, "learning_rate": 0.00018027536449214066, "loss": 23.0, "step": 3412 }, { "epoch": 2.0486194477791115, "grad_norm": 0.0023058606311678886, "learning_rate": 0.0001802640980828336, "loss": 23.0, "step": 3413 }, { "epoch": 2.04921968787515, "grad_norm": 0.0021154494024813175, "learning_rate": 0.00018025282880910733, "loss": 23.0, "step": 3414 }, { "epoch": 2.0498199279711886, "grad_norm": 0.000943707418628037, "learning_rate": 0.00018024155667136405, "loss": 23.0, "step": 3415 }, { "epoch": 2.0504201680672267, "grad_norm": 0.0005733675789088011, "learning_rate": 0.000180230281670006, "loss": 23.0, "step": 3416 }, { "epoch": 2.0510204081632653, "grad_norm": 0.0012774687493219972, "learning_rate": 0.00018021900380543558, "loss": 23.0, "step": 3417 }, { "epoch": 2.051620648259304, "grad_norm": 0.0015681550139561296, "learning_rate": 0.00018020772307805524, "loss": 23.0, "step": 3418 }, { "epoch": 2.0522208883553423, "grad_norm": 0.002922586165368557, "learning_rate": 0.00018019643948826762, "loss": 23.0, "step": 3419 }, { "epoch": 2.0528211284513804, "grad_norm": 0.000941049656830728, "learning_rate": 0.00018018515303647533, "loss": 23.0, "step": 3420 }, { "epoch": 2.053421368547419, "grad_norm": 0.0013102737721055746, "learning_rate": 0.0001801738637230812, "loss": 23.0, "step": 3421 }, { "epoch": 2.0540216086434575, "grad_norm": 0.0010620380053296685, "learning_rate": 0.00018016257154848813, "loss": 23.0, "step": 3422 }, { "epoch": 2.0546218487394956, "grad_norm": 0.001468815840780735, "learning_rate": 0.00018015127651309907, "loss": 23.0, "step": 3423 }, { "epoch": 2.055222088835534, "grad_norm": 0.002112776506692171, "learning_rate": 0.0001801399786173171, "loss": 23.0, "step": 3424 }, { "epoch": 2.0558223289315727, "grad_norm": 0.0043845935724675655, "learning_rate": 0.00018012867786154545, "loss": 23.0, "step": 3425 }, { "epoch": 2.0564225690276112, "grad_norm": 0.0023064163979142904, "learning_rate": 0.0001801173742461874, "loss": 23.0, "step": 3426 }, { "epoch": 2.0570228091236493, "grad_norm": 0.0015078387223184109, "learning_rate": 0.0001801060677716464, "loss": 23.0, "step": 3427 }, { "epoch": 2.057623049219688, "grad_norm": 0.0014458864461630583, "learning_rate": 0.00018009475843832587, "loss": 23.0, "step": 3428 }, { "epoch": 2.0582232893157264, "grad_norm": 0.0032603242434561253, "learning_rate": 0.00018008344624662943, "loss": 23.0, "step": 3429 }, { "epoch": 2.0588235294117645, "grad_norm": 0.0012629261473193765, "learning_rate": 0.00018007213119696077, "loss": 23.0, "step": 3430 }, { "epoch": 2.059423769507803, "grad_norm": 0.0016698999097570777, "learning_rate": 0.00018006081328972374, "loss": 23.0, "step": 3431 }, { "epoch": 2.0600240096038416, "grad_norm": 0.001395948464050889, "learning_rate": 0.00018004949252532223, "loss": 23.0, "step": 3432 }, { "epoch": 2.06062424969988, "grad_norm": 0.004593064077198505, "learning_rate": 0.00018003816890416023, "loss": 23.0, "step": 3433 }, { "epoch": 2.061224489795918, "grad_norm": 0.0021219372283667326, "learning_rate": 0.00018002684242664186, "loss": 23.0, "step": 3434 }, { "epoch": 2.0618247298919568, "grad_norm": 0.0006292082252912223, "learning_rate": 0.00018001551309317136, "loss": 23.0, "step": 3435 }, { "epoch": 2.0624249699879953, "grad_norm": 0.0024436218664050102, "learning_rate": 0.000180004180904153, "loss": 23.0, "step": 3436 }, { "epoch": 2.0630252100840334, "grad_norm": 0.0022101947106420994, "learning_rate": 0.00017999284585999126, "loss": 23.0, "step": 3437 }, { "epoch": 2.063625450180072, "grad_norm": 0.0024126351345330477, "learning_rate": 0.00017998150796109057, "loss": 23.0, "step": 3438 }, { "epoch": 2.0642256902761105, "grad_norm": 0.00167564966250211, "learning_rate": 0.00017997016720785563, "loss": 23.0, "step": 3439 }, { "epoch": 2.064825930372149, "grad_norm": 0.001377441338263452, "learning_rate": 0.0001799588236006911, "loss": 23.0, "step": 3440 }, { "epoch": 2.065426170468187, "grad_norm": 0.002041054656729102, "learning_rate": 0.00017994747714000187, "loss": 23.0, "step": 3441 }, { "epoch": 2.0660264105642256, "grad_norm": 0.003891473403200507, "learning_rate": 0.0001799361278261928, "loss": 23.0, "step": 3442 }, { "epoch": 2.066626650660264, "grad_norm": 0.0010193546768277884, "learning_rate": 0.00017992477565966896, "loss": 23.0, "step": 3443 }, { "epoch": 2.0672268907563027, "grad_norm": 0.0011679233284667134, "learning_rate": 0.00017991342064083547, "loss": 23.0, "step": 3444 }, { "epoch": 2.067827130852341, "grad_norm": 0.0009892381494864821, "learning_rate": 0.00017990206277009756, "loss": 23.0, "step": 3445 }, { "epoch": 2.0684273709483794, "grad_norm": 0.0028641563840210438, "learning_rate": 0.00017989070204786053, "loss": 23.0, "step": 3446 }, { "epoch": 2.069027611044418, "grad_norm": 0.002391669899225235, "learning_rate": 0.0001798793384745299, "loss": 23.0, "step": 3447 }, { "epoch": 2.069627851140456, "grad_norm": 0.0008025117567740381, "learning_rate": 0.0001798679720505111, "loss": 23.0, "step": 3448 }, { "epoch": 2.0702280912364945, "grad_norm": 0.0011154050007462502, "learning_rate": 0.00017985660277620985, "loss": 23.0, "step": 3449 }, { "epoch": 2.070828331332533, "grad_norm": 0.001934365602210164, "learning_rate": 0.00017984523065203188, "loss": 23.0, "step": 3450 }, { "epoch": 2.0714285714285716, "grad_norm": 0.0007151182508096099, "learning_rate": 0.000179833855678383, "loss": 23.0, "step": 3451 }, { "epoch": 2.0720288115246097, "grad_norm": 0.001716459752060473, "learning_rate": 0.00017982247785566918, "loss": 23.0, "step": 3452 }, { "epoch": 2.0726290516206483, "grad_norm": 0.0010225954465568066, "learning_rate": 0.00017981109718429643, "loss": 23.0, "step": 3453 }, { "epoch": 2.073229291716687, "grad_norm": 0.0027839874383062124, "learning_rate": 0.00017979971366467094, "loss": 23.0, "step": 3454 }, { "epoch": 2.073829531812725, "grad_norm": 0.0011828261194750667, "learning_rate": 0.00017978832729719893, "loss": 23.0, "step": 3455 }, { "epoch": 2.0744297719087634, "grad_norm": 0.00159098906442523, "learning_rate": 0.00017977693808228677, "loss": 23.0, "step": 3456 }, { "epoch": 2.075030012004802, "grad_norm": 0.0014060731045901775, "learning_rate": 0.0001797655460203409, "loss": 23.0, "step": 3457 }, { "epoch": 2.0756302521008405, "grad_norm": 0.001484134467318654, "learning_rate": 0.00017975415111176783, "loss": 23.0, "step": 3458 }, { "epoch": 2.0762304921968786, "grad_norm": 0.0017137357499450445, "learning_rate": 0.0001797427533569743, "loss": 23.0, "step": 3459 }, { "epoch": 2.076830732292917, "grad_norm": 0.0030945942271500826, "learning_rate": 0.000179731352756367, "loss": 23.0, "step": 3460 }, { "epoch": 2.0774309723889557, "grad_norm": 0.0011171615915372968, "learning_rate": 0.00017971994931035283, "loss": 23.0, "step": 3461 }, { "epoch": 2.078031212484994, "grad_norm": 0.001267662737518549, "learning_rate": 0.00017970854301933874, "loss": 23.0, "step": 3462 }, { "epoch": 2.0786314525810323, "grad_norm": 0.0021587491501122713, "learning_rate": 0.0001796971338837318, "loss": 23.0, "step": 3463 }, { "epoch": 2.079231692677071, "grad_norm": 0.002123309997841716, "learning_rate": 0.00017968572190393912, "loss": 23.0, "step": 3464 }, { "epoch": 2.0798319327731094, "grad_norm": 0.0012843715958297253, "learning_rate": 0.000179674307080368, "loss": 23.0, "step": 3465 }, { "epoch": 2.0804321728691475, "grad_norm": 0.0021665177773684263, "learning_rate": 0.00017966288941342583, "loss": 23.0, "step": 3466 }, { "epoch": 2.081032412965186, "grad_norm": 0.0019804276525974274, "learning_rate": 0.00017965146890352006, "loss": 23.0, "step": 3467 }, { "epoch": 2.0816326530612246, "grad_norm": 0.0025594120379537344, "learning_rate": 0.00017964004555105824, "loss": 23.0, "step": 3468 }, { "epoch": 2.082232893157263, "grad_norm": 0.002886942122131586, "learning_rate": 0.00017962861935644805, "loss": 23.0, "step": 3469 }, { "epoch": 2.082833133253301, "grad_norm": 0.001857664086855948, "learning_rate": 0.0001796171903200973, "loss": 23.0, "step": 3470 }, { "epoch": 2.0834333733493398, "grad_norm": 0.002572208410128951, "learning_rate": 0.0001796057584424138, "loss": 23.0, "step": 3471 }, { "epoch": 2.0840336134453783, "grad_norm": 0.002026068978011608, "learning_rate": 0.00017959432372380554, "loss": 23.0, "step": 3472 }, { "epoch": 2.0846338535414164, "grad_norm": 0.0016905996017158031, "learning_rate": 0.0001795828861646806, "loss": 23.0, "step": 3473 }, { "epoch": 2.085234093637455, "grad_norm": 0.0011786259710788727, "learning_rate": 0.00017957144576544718, "loss": 23.0, "step": 3474 }, { "epoch": 2.0858343337334935, "grad_norm": 0.0007971758022904396, "learning_rate": 0.00017956000252651353, "loss": 23.0, "step": 3475 }, { "epoch": 2.086434573829532, "grad_norm": 0.0009448099881410599, "learning_rate": 0.00017954855644828808, "loss": 23.0, "step": 3476 }, { "epoch": 2.08703481392557, "grad_norm": 0.002794134197756648, "learning_rate": 0.00017953710753117923, "loss": 23.0, "step": 3477 }, { "epoch": 2.0876350540216086, "grad_norm": 0.0017918311059474945, "learning_rate": 0.00017952565577559562, "loss": 23.0, "step": 3478 }, { "epoch": 2.088235294117647, "grad_norm": 0.0032223749440163374, "learning_rate": 0.00017951420118194594, "loss": 23.0, "step": 3479 }, { "epoch": 2.0888355342136853, "grad_norm": 0.0009358105598948896, "learning_rate": 0.00017950274375063893, "loss": 23.0, "step": 3480 }, { "epoch": 2.089435774309724, "grad_norm": 0.004243810195475817, "learning_rate": 0.00017949128348208353, "loss": 23.0, "step": 3481 }, { "epoch": 2.0900360144057624, "grad_norm": 0.002074790420010686, "learning_rate": 0.00017947982037668867, "loss": 23.0, "step": 3482 }, { "epoch": 2.090636254501801, "grad_norm": 0.0004455752205103636, "learning_rate": 0.00017946835443486347, "loss": 23.0, "step": 3483 }, { "epoch": 2.091236494597839, "grad_norm": 0.005418194457888603, "learning_rate": 0.00017945688565701714, "loss": 23.0, "step": 3484 }, { "epoch": 2.0918367346938775, "grad_norm": 0.0012525448109954596, "learning_rate": 0.0001794454140435589, "loss": 23.0, "step": 3485 }, { "epoch": 2.092436974789916, "grad_norm": 0.0017173114465549588, "learning_rate": 0.00017943393959489825, "loss": 23.0, "step": 3486 }, { "epoch": 2.093037214885954, "grad_norm": 0.0008452086476609111, "learning_rate": 0.00017942246231144457, "loss": 23.0, "step": 3487 }, { "epoch": 2.0936374549819927, "grad_norm": 0.0011205211048945785, "learning_rate": 0.00017941098219360755, "loss": 23.0, "step": 3488 }, { "epoch": 2.0942376950780313, "grad_norm": 0.0008696285076439381, "learning_rate": 0.00017939949924179684, "loss": 23.0, "step": 3489 }, { "epoch": 2.09483793517407, "grad_norm": 0.0013399510644376278, "learning_rate": 0.00017938801345642223, "loss": 23.0, "step": 3490 }, { "epoch": 2.095438175270108, "grad_norm": 0.0006139983888715506, "learning_rate": 0.00017937652483789363, "loss": 23.0, "step": 3491 }, { "epoch": 2.0960384153661464, "grad_norm": 0.0014349283883348107, "learning_rate": 0.00017936503338662104, "loss": 23.0, "step": 3492 }, { "epoch": 2.096638655462185, "grad_norm": 0.0010579722002148628, "learning_rate": 0.00017935353910301455, "loss": 23.0, "step": 3493 }, { "epoch": 2.097238895558223, "grad_norm": 0.0010757379932329059, "learning_rate": 0.0001793420419874844, "loss": 23.0, "step": 3494 }, { "epoch": 2.0978391356542616, "grad_norm": 0.0016692588105797768, "learning_rate": 0.00017933054204044083, "loss": 23.0, "step": 3495 }, { "epoch": 2.0984393757503, "grad_norm": 0.002716292394325137, "learning_rate": 0.00017931903926229428, "loss": 23.0, "step": 3496 }, { "epoch": 2.0990396158463387, "grad_norm": 0.0011049581225961447, "learning_rate": 0.00017930753365345527, "loss": 23.0, "step": 3497 }, { "epoch": 2.099639855942377, "grad_norm": 0.001968407304957509, "learning_rate": 0.0001792960252143344, "loss": 23.0, "step": 3498 }, { "epoch": 2.1002400960384153, "grad_norm": 0.0006715644267387688, "learning_rate": 0.00017928451394534233, "loss": 23.0, "step": 3499 }, { "epoch": 2.100840336134454, "grad_norm": 0.0011009502923116088, "learning_rate": 0.00017927299984688992, "loss": 23.0, "step": 3500 }, { "epoch": 2.1014405762304924, "grad_norm": 0.002423146739602089, "learning_rate": 0.00017926148291938808, "loss": 23.0, "step": 3501 }, { "epoch": 2.1020408163265305, "grad_norm": 0.004653971176594496, "learning_rate": 0.00017924996316324778, "loss": 23.0, "step": 3502 }, { "epoch": 2.102641056422569, "grad_norm": 0.002340496750548482, "learning_rate": 0.00017923844057888012, "loss": 23.0, "step": 3503 }, { "epoch": 2.1032412965186076, "grad_norm": 0.0018698208732530475, "learning_rate": 0.00017922691516669635, "loss": 23.0, "step": 3504 }, { "epoch": 2.1038415366146457, "grad_norm": 0.0017265928909182549, "learning_rate": 0.00017921538692710782, "loss": 23.0, "step": 3505 }, { "epoch": 2.104441776710684, "grad_norm": 0.0035198975820094347, "learning_rate": 0.00017920385586052588, "loss": 23.0, "step": 3506 }, { "epoch": 2.1050420168067228, "grad_norm": 0.005279972683638334, "learning_rate": 0.00017919232196736203, "loss": 23.0, "step": 3507 }, { "epoch": 2.1056422569027613, "grad_norm": 0.0019218252273276448, "learning_rate": 0.00017918078524802795, "loss": 23.0, "step": 3508 }, { "epoch": 2.1062424969987994, "grad_norm": 0.0037660326343029737, "learning_rate": 0.00017916924570293528, "loss": 23.0, "step": 3509 }, { "epoch": 2.106842737094838, "grad_norm": 0.0016380378510802984, "learning_rate": 0.00017915770333249594, "loss": 23.0, "step": 3510 }, { "epoch": 2.1074429771908765, "grad_norm": 0.0015107912477105856, "learning_rate": 0.00017914615813712173, "loss": 23.0, "step": 3511 }, { "epoch": 2.1080432172869146, "grad_norm": 0.0029202804435044527, "learning_rate": 0.00017913461011722474, "loss": 23.0, "step": 3512 }, { "epoch": 2.108643457382953, "grad_norm": 0.0013868259266018867, "learning_rate": 0.0001791230592732171, "loss": 23.0, "step": 3513 }, { "epoch": 2.1092436974789917, "grad_norm": 0.0014497030060738325, "learning_rate": 0.000179111505605511, "loss": 23.0, "step": 3514 }, { "epoch": 2.10984393757503, "grad_norm": 0.0015206874813884497, "learning_rate": 0.00017909994911451872, "loss": 23.0, "step": 3515 }, { "epoch": 2.1104441776710683, "grad_norm": 0.003641763236373663, "learning_rate": 0.00017908838980065274, "loss": 23.0, "step": 3516 }, { "epoch": 2.111044417767107, "grad_norm": 0.0021101627498865128, "learning_rate": 0.0001790768276643256, "loss": 23.0, "step": 3517 }, { "epoch": 2.1116446578631454, "grad_norm": 0.0026833494193851948, "learning_rate": 0.00017906526270594988, "loss": 23.0, "step": 3518 }, { "epoch": 2.1122448979591835, "grad_norm": 0.0010312756057828665, "learning_rate": 0.0001790536949259383, "loss": 23.0, "step": 3519 }, { "epoch": 2.112845138055222, "grad_norm": 0.0033930472563952208, "learning_rate": 0.00017904212432470371, "loss": 23.0, "step": 3520 }, { "epoch": 2.1134453781512605, "grad_norm": 0.0020478384103626013, "learning_rate": 0.00017903055090265902, "loss": 23.0, "step": 3521 }, { "epoch": 2.114045618247299, "grad_norm": 0.0017036633798852563, "learning_rate": 0.00017901897466021725, "loss": 23.0, "step": 3522 }, { "epoch": 2.114645858343337, "grad_norm": 0.0015527663053944707, "learning_rate": 0.00017900739559779155, "loss": 23.0, "step": 3523 }, { "epoch": 2.1152460984393757, "grad_norm": 0.001124212285503745, "learning_rate": 0.00017899581371579512, "loss": 23.0, "step": 3524 }, { "epoch": 2.1158463385354143, "grad_norm": 0.0014042783295735717, "learning_rate": 0.0001789842290146413, "loss": 23.0, "step": 3525 }, { "epoch": 2.116446578631453, "grad_norm": 0.003444352652877569, "learning_rate": 0.0001789726414947435, "loss": 23.0, "step": 3526 }, { "epoch": 2.117046818727491, "grad_norm": 0.003073784988373518, "learning_rate": 0.00017896105115651533, "loss": 23.0, "step": 3527 }, { "epoch": 2.1176470588235294, "grad_norm": 0.0012429555645212531, "learning_rate": 0.0001789494580003703, "loss": 23.0, "step": 3528 }, { "epoch": 2.118247298919568, "grad_norm": 0.004864856600761414, "learning_rate": 0.0001789378620267222, "loss": 23.0, "step": 3529 }, { "epoch": 2.118847539015606, "grad_norm": 0.0022995832841843367, "learning_rate": 0.00017892626323598486, "loss": 23.0, "step": 3530 }, { "epoch": 2.1194477791116446, "grad_norm": 0.00140533153899014, "learning_rate": 0.00017891466162857223, "loss": 23.0, "step": 3531 }, { "epoch": 2.120048019207683, "grad_norm": 0.0009306234424002469, "learning_rate": 0.0001789030572048983, "loss": 23.0, "step": 3532 }, { "epoch": 2.1206482593037217, "grad_norm": 0.003682507202029228, "learning_rate": 0.00017889144996537725, "loss": 23.0, "step": 3533 }, { "epoch": 2.12124849939976, "grad_norm": 0.0009380745468661189, "learning_rate": 0.00017887983991042323, "loss": 23.0, "step": 3534 }, { "epoch": 2.1218487394957983, "grad_norm": 0.0013799188891425729, "learning_rate": 0.00017886822704045064, "loss": 23.0, "step": 3535 }, { "epoch": 2.122448979591837, "grad_norm": 0.0009243764216080308, "learning_rate": 0.00017885661135587393, "loss": 23.0, "step": 3536 }, { "epoch": 2.123049219687875, "grad_norm": 0.0008515262743458152, "learning_rate": 0.00017884499285710762, "loss": 23.0, "step": 3537 }, { "epoch": 2.1236494597839135, "grad_norm": 0.0015884919557720423, "learning_rate": 0.00017883337154456629, "loss": 23.0, "step": 3538 }, { "epoch": 2.124249699879952, "grad_norm": 0.0032588981557637453, "learning_rate": 0.0001788217474186647, "loss": 23.0, "step": 3539 }, { "epoch": 2.1248499399759906, "grad_norm": 0.0024941442534327507, "learning_rate": 0.00017881012047981772, "loss": 23.0, "step": 3540 }, { "epoch": 2.1254501800720287, "grad_norm": 0.0017060820246115327, "learning_rate": 0.00017879849072844028, "loss": 23.0, "step": 3541 }, { "epoch": 2.1260504201680672, "grad_norm": 0.003131990320980549, "learning_rate": 0.0001787868581649474, "loss": 23.0, "step": 3542 }, { "epoch": 2.1266506602641058, "grad_norm": 0.0024005863815546036, "learning_rate": 0.00017877522278975417, "loss": 23.0, "step": 3543 }, { "epoch": 2.127250900360144, "grad_norm": 0.0006045355694368482, "learning_rate": 0.0001787635846032759, "loss": 23.0, "step": 3544 }, { "epoch": 2.1278511404561824, "grad_norm": 0.0035146684385836124, "learning_rate": 0.0001787519436059279, "loss": 23.0, "step": 3545 }, { "epoch": 2.128451380552221, "grad_norm": 0.0014111412456259131, "learning_rate": 0.00017874029979812563, "loss": 23.0, "step": 3546 }, { "epoch": 2.1290516206482595, "grad_norm": 0.0018674216698855162, "learning_rate": 0.00017872865318028459, "loss": 23.0, "step": 3547 }, { "epoch": 2.1296518607442976, "grad_norm": 0.0012715421617031097, "learning_rate": 0.00017871700375282043, "loss": 23.0, "step": 3548 }, { "epoch": 2.130252100840336, "grad_norm": 0.003209817223250866, "learning_rate": 0.0001787053515161489, "loss": 23.0, "step": 3549 }, { "epoch": 2.1308523409363747, "grad_norm": 0.001961090601980686, "learning_rate": 0.0001786936964706858, "loss": 23.0, "step": 3550 }, { "epoch": 2.1314525810324128, "grad_norm": 0.0013860109029337764, "learning_rate": 0.0001786820386168471, "loss": 23.0, "step": 3551 }, { "epoch": 2.1320528211284513, "grad_norm": 0.00047536773490719497, "learning_rate": 0.00017867037795504887, "loss": 23.0, "step": 3552 }, { "epoch": 2.13265306122449, "grad_norm": 0.0019673663191497326, "learning_rate": 0.00017865871448570717, "loss": 23.0, "step": 3553 }, { "epoch": 2.1332533013205284, "grad_norm": 0.0021188133396208286, "learning_rate": 0.00017864704820923832, "loss": 23.0, "step": 3554 }, { "epoch": 2.1338535414165665, "grad_norm": 0.0027213292196393013, "learning_rate": 0.0001786353791260586, "loss": 23.0, "step": 3555 }, { "epoch": 2.134453781512605, "grad_norm": 0.0027741962112486362, "learning_rate": 0.0001786237072365845, "loss": 23.0, "step": 3556 }, { "epoch": 2.1350540216086435, "grad_norm": 0.002745026256889105, "learning_rate": 0.00017861203254123252, "loss": 23.0, "step": 3557 }, { "epoch": 2.1356542617046816, "grad_norm": 0.0020450023002922535, "learning_rate": 0.00017860035504041932, "loss": 23.0, "step": 3558 }, { "epoch": 2.13625450180072, "grad_norm": 0.0010032516438513994, "learning_rate": 0.00017858867473456163, "loss": 23.0, "step": 3559 }, { "epoch": 2.1368547418967587, "grad_norm": 0.0014476818032562733, "learning_rate": 0.00017857699162407626, "loss": 23.0, "step": 3560 }, { "epoch": 2.1374549819927973, "grad_norm": 0.001177955069579184, "learning_rate": 0.00017856530570938022, "loss": 23.0, "step": 3561 }, { "epoch": 2.1380552220888354, "grad_norm": 0.002862164517864585, "learning_rate": 0.0001785536169908905, "loss": 23.0, "step": 3562 }, { "epoch": 2.138655462184874, "grad_norm": 0.0010038964683189988, "learning_rate": 0.00017854192546902427, "loss": 23.0, "step": 3563 }, { "epoch": 2.1392557022809124, "grad_norm": 0.0017746913945302367, "learning_rate": 0.00017853023114419877, "loss": 23.0, "step": 3564 }, { "epoch": 2.139855942376951, "grad_norm": 0.0028854040428996086, "learning_rate": 0.00017851853401683126, "loss": 23.0, "step": 3565 }, { "epoch": 2.140456182472989, "grad_norm": 0.001586511847563088, "learning_rate": 0.00017850683408733928, "loss": 23.0, "step": 3566 }, { "epoch": 2.1410564225690276, "grad_norm": 0.003254214534536004, "learning_rate": 0.00017849513135614035, "loss": 23.0, "step": 3567 }, { "epoch": 2.141656662665066, "grad_norm": 0.0012385440059006214, "learning_rate": 0.00017848342582365206, "loss": 23.0, "step": 3568 }, { "epoch": 2.1422569027611043, "grad_norm": 0.0018727433634921908, "learning_rate": 0.0001784717174902922, "loss": 23.0, "step": 3569 }, { "epoch": 2.142857142857143, "grad_norm": 0.004589486867189407, "learning_rate": 0.00017846000635647863, "loss": 23.0, "step": 3570 }, { "epoch": 2.1434573829531813, "grad_norm": 0.0021497050765901804, "learning_rate": 0.0001784482924226292, "loss": 23.0, "step": 3571 }, { "epoch": 2.14405762304922, "grad_norm": 0.0012627036776393652, "learning_rate": 0.00017843657568916206, "loss": 23.0, "step": 3572 }, { "epoch": 2.144657863145258, "grad_norm": 0.001609223079867661, "learning_rate": 0.00017842485615649528, "loss": 23.0, "step": 3573 }, { "epoch": 2.1452581032412965, "grad_norm": 0.001351055921986699, "learning_rate": 0.00017841313382504711, "loss": 23.0, "step": 3574 }, { "epoch": 2.145858343337335, "grad_norm": 0.004179363604635, "learning_rate": 0.00017840140869523587, "loss": 23.0, "step": 3575 }, { "epoch": 2.146458583433373, "grad_norm": 0.0016952658770605922, "learning_rate": 0.00017838968076748008, "loss": 23.0, "step": 3576 }, { "epoch": 2.1470588235294117, "grad_norm": 0.0015167961828410625, "learning_rate": 0.00017837795004219817, "loss": 23.0, "step": 3577 }, { "epoch": 2.1476590636254502, "grad_norm": 0.003133446676656604, "learning_rate": 0.00017836621651980887, "loss": 23.0, "step": 3578 }, { "epoch": 2.1482593037214888, "grad_norm": 0.00226042908616364, "learning_rate": 0.00017835448020073088, "loss": 23.0, "step": 3579 }, { "epoch": 2.148859543817527, "grad_norm": 0.0013810255331918597, "learning_rate": 0.00017834274108538304, "loss": 23.0, "step": 3580 }, { "epoch": 2.1494597839135654, "grad_norm": 0.001641986076720059, "learning_rate": 0.0001783309991741843, "loss": 23.0, "step": 3581 }, { "epoch": 2.150060024009604, "grad_norm": 0.002287675393745303, "learning_rate": 0.0001783192544675537, "loss": 23.0, "step": 3582 }, { "epoch": 2.1506602641056425, "grad_norm": 0.0009976879227906466, "learning_rate": 0.00017830750696591033, "loss": 23.0, "step": 3583 }, { "epoch": 2.1512605042016806, "grad_norm": 0.0018353014020249248, "learning_rate": 0.0001782957566696735, "loss": 23.0, "step": 3584 }, { "epoch": 2.151860744297719, "grad_norm": 0.0019918165635317564, "learning_rate": 0.00017828400357926253, "loss": 23.0, "step": 3585 }, { "epoch": 2.1524609843937577, "grad_norm": 0.0026910703163594007, "learning_rate": 0.0001782722476950968, "loss": 23.0, "step": 3586 }, { "epoch": 2.1530612244897958, "grad_norm": 0.002473343163728714, "learning_rate": 0.00017826048901759588, "loss": 23.0, "step": 3587 }, { "epoch": 2.1536614645858343, "grad_norm": 0.002732550958171487, "learning_rate": 0.00017824872754717945, "loss": 23.0, "step": 3588 }, { "epoch": 2.154261704681873, "grad_norm": 0.0010042079957202077, "learning_rate": 0.00017823696328426723, "loss": 23.0, "step": 3589 }, { "epoch": 2.1548619447779114, "grad_norm": 0.0027955600526183844, "learning_rate": 0.000178225196229279, "loss": 23.0, "step": 3590 }, { "epoch": 2.1554621848739495, "grad_norm": 0.004130184184759855, "learning_rate": 0.00017821342638263478, "loss": 23.0, "step": 3591 }, { "epoch": 2.156062424969988, "grad_norm": 0.003868487663567066, "learning_rate": 0.00017820165374475452, "loss": 23.0, "step": 3592 }, { "epoch": 2.1566626650660266, "grad_norm": 0.0011650120140984654, "learning_rate": 0.00017818987831605843, "loss": 23.0, "step": 3593 }, { "epoch": 2.1572629051620646, "grad_norm": 0.002393965143710375, "learning_rate": 0.00017817810009696666, "loss": 23.0, "step": 3594 }, { "epoch": 2.157863145258103, "grad_norm": 0.0035790205001831055, "learning_rate": 0.00017816631908789966, "loss": 23.0, "step": 3595 }, { "epoch": 2.1584633853541417, "grad_norm": 0.003235345706343651, "learning_rate": 0.00017815453528927778, "loss": 23.0, "step": 3596 }, { "epoch": 2.1590636254501803, "grad_norm": 0.0013717797119170427, "learning_rate": 0.00017814274870152155, "loss": 23.0, "step": 3597 }, { "epoch": 2.1596638655462184, "grad_norm": 0.0011841889936476946, "learning_rate": 0.0001781309593250516, "loss": 23.0, "step": 3598 }, { "epoch": 2.160264105642257, "grad_norm": 0.0035572301130741835, "learning_rate": 0.00017811916716028872, "loss": 23.0, "step": 3599 }, { "epoch": 2.1608643457382954, "grad_norm": 0.0009041174198500812, "learning_rate": 0.00017810737220765374, "loss": 23.0, "step": 3600 }, { "epoch": 2.1614645858343335, "grad_norm": 0.0011678171576932073, "learning_rate": 0.00017809557446756756, "loss": 23.0, "step": 3601 }, { "epoch": 2.162064825930372, "grad_norm": 0.0014870171435177326, "learning_rate": 0.0001780837739404512, "loss": 23.0, "step": 3602 }, { "epoch": 2.1626650660264106, "grad_norm": 0.0015179289039224386, "learning_rate": 0.00017807197062672577, "loss": 23.0, "step": 3603 }, { "epoch": 2.163265306122449, "grad_norm": 0.0020413794554769993, "learning_rate": 0.00017806016452681254, "loss": 23.0, "step": 3604 }, { "epoch": 2.1638655462184873, "grad_norm": 0.0032616599928587675, "learning_rate": 0.00017804835564113288, "loss": 23.0, "step": 3605 }, { "epoch": 2.164465786314526, "grad_norm": 0.0011516850208863616, "learning_rate": 0.00017803654397010815, "loss": 23.0, "step": 3606 }, { "epoch": 2.1650660264105643, "grad_norm": 0.003489278955385089, "learning_rate": 0.0001780247295141599, "loss": 23.0, "step": 3607 }, { "epoch": 2.1656662665066024, "grad_norm": 0.003842744044959545, "learning_rate": 0.00017801291227370974, "loss": 23.0, "step": 3608 }, { "epoch": 2.166266506602641, "grad_norm": 0.001340657938271761, "learning_rate": 0.0001780010922491794, "loss": 23.0, "step": 3609 }, { "epoch": 2.1668667466986795, "grad_norm": 0.0008930175099521875, "learning_rate": 0.00017798926944099077, "loss": 23.0, "step": 3610 }, { "epoch": 2.167466986794718, "grad_norm": 0.0036189949605613947, "learning_rate": 0.0001779774438495657, "loss": 23.0, "step": 3611 }, { "epoch": 2.168067226890756, "grad_norm": 0.003450029995292425, "learning_rate": 0.00017796561547532623, "loss": 23.0, "step": 3612 }, { "epoch": 2.1686674669867947, "grad_norm": 0.0013816222781315446, "learning_rate": 0.00017795378431869453, "loss": 23.0, "step": 3613 }, { "epoch": 2.1692677070828332, "grad_norm": 0.003153825644403696, "learning_rate": 0.00017794195038009275, "loss": 23.0, "step": 3614 }, { "epoch": 2.1698679471788713, "grad_norm": 0.001024477300234139, "learning_rate": 0.00017793011365994325, "loss": 23.0, "step": 3615 }, { "epoch": 2.17046818727491, "grad_norm": 0.003891037544235587, "learning_rate": 0.00017791827415866848, "loss": 23.0, "step": 3616 }, { "epoch": 2.1710684273709484, "grad_norm": 0.002924687694758177, "learning_rate": 0.00017790643187669093, "loss": 23.0, "step": 3617 }, { "epoch": 2.171668667466987, "grad_norm": 0.009289652109146118, "learning_rate": 0.00017789458681443323, "loss": 23.0, "step": 3618 }, { "epoch": 2.172268907563025, "grad_norm": 0.0021330954041332006, "learning_rate": 0.00017788273897231807, "loss": 23.0, "step": 3619 }, { "epoch": 2.1728691476590636, "grad_norm": 0.0019930656999349594, "learning_rate": 0.0001778708883507683, "loss": 23.0, "step": 3620 }, { "epoch": 2.173469387755102, "grad_norm": 0.00517197186127305, "learning_rate": 0.00017785903495020686, "loss": 23.0, "step": 3621 }, { "epoch": 2.1740696278511407, "grad_norm": 0.0015679982025176287, "learning_rate": 0.00017784717877105673, "loss": 23.0, "step": 3622 }, { "epoch": 2.1746698679471788, "grad_norm": 0.004141742363572121, "learning_rate": 0.00017783531981374102, "loss": 23.0, "step": 3623 }, { "epoch": 2.1752701080432173, "grad_norm": 0.00065791723318398, "learning_rate": 0.00017782345807868297, "loss": 23.0, "step": 3624 }, { "epoch": 2.175870348139256, "grad_norm": 0.004961501341313124, "learning_rate": 0.0001778115935663059, "loss": 23.0, "step": 3625 }, { "epoch": 2.176470588235294, "grad_norm": 0.0023005243856459856, "learning_rate": 0.0001777997262770332, "loss": 23.0, "step": 3626 }, { "epoch": 2.1770708283313325, "grad_norm": 0.00899723544716835, "learning_rate": 0.00017778785621128836, "loss": 23.0, "step": 3627 }, { "epoch": 2.177671068427371, "grad_norm": 0.0029707015492022038, "learning_rate": 0.0001777759833694951, "loss": 23.0, "step": 3628 }, { "epoch": 2.1782713085234096, "grad_norm": 0.0018353236373513937, "learning_rate": 0.000177764107752077, "loss": 23.0, "step": 3629 }, { "epoch": 2.1788715486194477, "grad_norm": 0.0010108323767781258, "learning_rate": 0.00017775222935945796, "loss": 23.0, "step": 3630 }, { "epoch": 2.179471788715486, "grad_norm": 0.0008860941743478179, "learning_rate": 0.00017774034819206181, "loss": 23.0, "step": 3631 }, { "epoch": 2.1800720288115247, "grad_norm": 0.003606445388868451, "learning_rate": 0.00017772846425031266, "loss": 23.0, "step": 3632 }, { "epoch": 2.180672268907563, "grad_norm": 0.0016308276681229472, "learning_rate": 0.00017771657753463453, "loss": 23.0, "step": 3633 }, { "epoch": 2.1812725090036014, "grad_norm": 0.0007045452948659658, "learning_rate": 0.00017770468804545169, "loss": 23.0, "step": 3634 }, { "epoch": 2.18187274909964, "grad_norm": 0.0013048743130639195, "learning_rate": 0.0001776927957831884, "loss": 23.0, "step": 3635 }, { "epoch": 2.1824729891956784, "grad_norm": 0.0012544161872938275, "learning_rate": 0.00017768090074826904, "loss": 23.0, "step": 3636 }, { "epoch": 2.1830732292917165, "grad_norm": 0.00237638084217906, "learning_rate": 0.0001776690029411182, "loss": 23.0, "step": 3637 }, { "epoch": 2.183673469387755, "grad_norm": 0.0048915608786046505, "learning_rate": 0.0001776571023621604, "loss": 23.0, "step": 3638 }, { "epoch": 2.1842737094837936, "grad_norm": 0.000844002643134445, "learning_rate": 0.0001776451990118204, "loss": 23.0, "step": 3639 }, { "epoch": 2.184873949579832, "grad_norm": 0.00372367724776268, "learning_rate": 0.00017763329289052296, "loss": 23.0, "step": 3640 }, { "epoch": 2.1854741896758703, "grad_norm": 0.0023307260125875473, "learning_rate": 0.000177621383998693, "loss": 23.0, "step": 3641 }, { "epoch": 2.186074429771909, "grad_norm": 0.0026479533407837152, "learning_rate": 0.0001776094723367555, "loss": 23.0, "step": 3642 }, { "epoch": 2.1866746698679473, "grad_norm": 0.00475787278264761, "learning_rate": 0.00017759755790513557, "loss": 23.0, "step": 3643 }, { "epoch": 2.1872749099639854, "grad_norm": 0.0008646330097690225, "learning_rate": 0.00017758564070425843, "loss": 23.0, "step": 3644 }, { "epoch": 2.187875150060024, "grad_norm": 0.000730241765268147, "learning_rate": 0.00017757372073454931, "loss": 23.0, "step": 3645 }, { "epoch": 2.1884753901560625, "grad_norm": 0.0020450761076062918, "learning_rate": 0.00017756179799643368, "loss": 23.0, "step": 3646 }, { "epoch": 2.189075630252101, "grad_norm": 0.001998509978875518, "learning_rate": 0.00017754987249033694, "loss": 23.0, "step": 3647 }, { "epoch": 2.189675870348139, "grad_norm": 0.0017022277461364865, "learning_rate": 0.00017753794421668478, "loss": 23.0, "step": 3648 }, { "epoch": 2.1902761104441777, "grad_norm": 0.0010228764731436968, "learning_rate": 0.0001775260131759028, "loss": 23.0, "step": 3649 }, { "epoch": 2.1908763505402162, "grad_norm": 0.0021690605208277702, "learning_rate": 0.00017751407936841688, "loss": 23.0, "step": 3650 }, { "epoch": 2.1914765906362543, "grad_norm": 0.0016400478780269623, "learning_rate": 0.00017750214279465282, "loss": 23.0, "step": 3651 }, { "epoch": 2.192076830732293, "grad_norm": 0.002070733578875661, "learning_rate": 0.00017749020345503666, "loss": 23.0, "step": 3652 }, { "epoch": 2.1926770708283314, "grad_norm": 0.0014095326187089086, "learning_rate": 0.00017747826134999445, "loss": 23.0, "step": 3653 }, { "epoch": 2.19327731092437, "grad_norm": 0.0019122892990708351, "learning_rate": 0.00017746631647995242, "loss": 23.0, "step": 3654 }, { "epoch": 2.193877551020408, "grad_norm": 0.0029271540697664022, "learning_rate": 0.00017745436884533683, "loss": 23.0, "step": 3655 }, { "epoch": 2.1944777911164466, "grad_norm": 0.002827174263074994, "learning_rate": 0.00017744241844657398, "loss": 23.0, "step": 3656 }, { "epoch": 2.195078031212485, "grad_norm": 0.0012498522410169244, "learning_rate": 0.00017743046528409049, "loss": 23.0, "step": 3657 }, { "epoch": 2.195678271308523, "grad_norm": 0.003829399822279811, "learning_rate": 0.00017741850935831287, "loss": 23.0, "step": 3658 }, { "epoch": 2.1962785114045618, "grad_norm": 0.0021660744678229094, "learning_rate": 0.00017740655066966773, "loss": 23.0, "step": 3659 }, { "epoch": 2.1968787515006003, "grad_norm": 0.002248313743621111, "learning_rate": 0.00017739458921858198, "loss": 23.0, "step": 3660 }, { "epoch": 2.197478991596639, "grad_norm": 0.0018800124526023865, "learning_rate": 0.00017738262500548236, "loss": 23.0, "step": 3661 }, { "epoch": 2.198079231692677, "grad_norm": 0.0030018959660083055, "learning_rate": 0.00017737065803079597, "loss": 23.0, "step": 3662 }, { "epoch": 2.1986794717887155, "grad_norm": 0.0009485651389695704, "learning_rate": 0.00017735868829494978, "loss": 23.0, "step": 3663 }, { "epoch": 2.199279711884754, "grad_norm": 0.002609929535537958, "learning_rate": 0.000177346715798371, "loss": 23.0, "step": 3664 }, { "epoch": 2.199879951980792, "grad_norm": 0.0013200175017118454, "learning_rate": 0.0001773347405414869, "loss": 23.0, "step": 3665 }, { "epoch": 2.2004801920768307, "grad_norm": 0.0007020602351985872, "learning_rate": 0.00017732276252472483, "loss": 23.0, "step": 3666 }, { "epoch": 2.201080432172869, "grad_norm": 0.0026193074882030487, "learning_rate": 0.00017731078174851227, "loss": 23.0, "step": 3667 }, { "epoch": 2.2016806722689077, "grad_norm": 0.001026253798045218, "learning_rate": 0.00017729879821327678, "loss": 23.0, "step": 3668 }, { "epoch": 2.202280912364946, "grad_norm": 0.0017351701389998198, "learning_rate": 0.000177286811919446, "loss": 23.0, "step": 3669 }, { "epoch": 2.2028811524609844, "grad_norm": 0.0014646050985902548, "learning_rate": 0.0001772748228674477, "loss": 23.0, "step": 3670 }, { "epoch": 2.203481392557023, "grad_norm": 0.0030125817283988, "learning_rate": 0.0001772628310577098, "loss": 23.0, "step": 3671 }, { "epoch": 2.204081632653061, "grad_norm": 0.004487418103963137, "learning_rate": 0.00017725083649066016, "loss": 23.0, "step": 3672 }, { "epoch": 2.2046818727490995, "grad_norm": 0.0021111650858074427, "learning_rate": 0.0001772388391667269, "loss": 23.0, "step": 3673 }, { "epoch": 2.205282112845138, "grad_norm": 0.00048810808220878243, "learning_rate": 0.00017722683908633812, "loss": 23.0, "step": 3674 }, { "epoch": 2.2058823529411766, "grad_norm": 0.003283157479017973, "learning_rate": 0.00017721483624992215, "loss": 23.0, "step": 3675 }, { "epoch": 2.2064825930372147, "grad_norm": 0.0016707901377230883, "learning_rate": 0.00017720283065790726, "loss": 23.0, "step": 3676 }, { "epoch": 2.2070828331332533, "grad_norm": 0.001358469482511282, "learning_rate": 0.00017719082231072193, "loss": 23.0, "step": 3677 }, { "epoch": 2.207683073229292, "grad_norm": 0.0013346460182219744, "learning_rate": 0.00017717881120879475, "loss": 23.0, "step": 3678 }, { "epoch": 2.20828331332533, "grad_norm": 0.001013239729218185, "learning_rate": 0.0001771667973525543, "loss": 23.0, "step": 3679 }, { "epoch": 2.2088835534213684, "grad_norm": 0.001471264404244721, "learning_rate": 0.00017715478074242932, "loss": 23.0, "step": 3680 }, { "epoch": 2.209483793517407, "grad_norm": 0.0015335743082687259, "learning_rate": 0.00017714276137884873, "loss": 23.0, "step": 3681 }, { "epoch": 2.2100840336134455, "grad_norm": 0.005060260649770498, "learning_rate": 0.0001771307392622414, "loss": 23.0, "step": 3682 }, { "epoch": 2.2106842737094836, "grad_norm": 0.0011684778146445751, "learning_rate": 0.00017711871439303638, "loss": 23.0, "step": 3683 }, { "epoch": 2.211284513805522, "grad_norm": 0.0035212927032262087, "learning_rate": 0.00017710668677166282, "loss": 23.0, "step": 3684 }, { "epoch": 2.2118847539015607, "grad_norm": 0.0023291881661862135, "learning_rate": 0.00017709465639854997, "loss": 23.0, "step": 3685 }, { "epoch": 2.2124849939975992, "grad_norm": 0.0015391027554869652, "learning_rate": 0.00017708262327412712, "loss": 23.0, "step": 3686 }, { "epoch": 2.2130852340936373, "grad_norm": 0.001002484350465238, "learning_rate": 0.00017707058739882374, "loss": 23.0, "step": 3687 }, { "epoch": 2.213685474189676, "grad_norm": 0.0014180615544319153, "learning_rate": 0.00017705854877306934, "loss": 23.0, "step": 3688 }, { "epoch": 2.2142857142857144, "grad_norm": 0.0008763477671891451, "learning_rate": 0.00017704650739729357, "loss": 23.0, "step": 3689 }, { "epoch": 2.2148859543817525, "grad_norm": 0.0022361702285706997, "learning_rate": 0.00017703446327192612, "loss": 23.0, "step": 3690 }, { "epoch": 2.215486194477791, "grad_norm": 0.0015827770112082362, "learning_rate": 0.0001770224163973968, "loss": 23.0, "step": 3691 }, { "epoch": 2.2160864345738296, "grad_norm": 0.001904232893139124, "learning_rate": 0.00017701036677413563, "loss": 23.0, "step": 3692 }, { "epoch": 2.216686674669868, "grad_norm": 0.0013081013457849622, "learning_rate": 0.00017699831440257251, "loss": 23.0, "step": 3693 }, { "epoch": 2.2172869147659062, "grad_norm": 0.004817170090973377, "learning_rate": 0.00017698625928313765, "loss": 23.0, "step": 3694 }, { "epoch": 2.2178871548619448, "grad_norm": 0.004000441636890173, "learning_rate": 0.0001769742014162612, "loss": 23.0, "step": 3695 }, { "epoch": 2.2184873949579833, "grad_norm": 0.0027444609440863132, "learning_rate": 0.00017696214080237352, "loss": 23.0, "step": 3696 }, { "epoch": 2.2190876350540214, "grad_norm": 0.002896189922466874, "learning_rate": 0.000176950077441905, "loss": 23.0, "step": 3697 }, { "epoch": 2.21968787515006, "grad_norm": 0.002128873486071825, "learning_rate": 0.00017693801133528616, "loss": 23.0, "step": 3698 }, { "epoch": 2.2202881152460985, "grad_norm": 0.0011179738212376833, "learning_rate": 0.0001769259424829476, "loss": 23.0, "step": 3699 }, { "epoch": 2.220888355342137, "grad_norm": 0.002058587037026882, "learning_rate": 0.00017691387088532003, "loss": 23.0, "step": 3700 }, { "epoch": 2.221488595438175, "grad_norm": 0.0022081804927438498, "learning_rate": 0.00017690179654283425, "loss": 23.0, "step": 3701 }, { "epoch": 2.2220888355342137, "grad_norm": 0.0023553965147584677, "learning_rate": 0.0001768897194559212, "loss": 23.0, "step": 3702 }, { "epoch": 2.222689075630252, "grad_norm": 0.001018383540213108, "learning_rate": 0.00017687763962501182, "loss": 23.0, "step": 3703 }, { "epoch": 2.2232893157262907, "grad_norm": 0.0007036729366518557, "learning_rate": 0.0001768655570505372, "loss": 23.0, "step": 3704 }, { "epoch": 2.223889555822329, "grad_norm": 0.0008182197925634682, "learning_rate": 0.0001768534717329286, "loss": 23.0, "step": 3705 }, { "epoch": 2.2244897959183674, "grad_norm": 0.0010957723716273904, "learning_rate": 0.00017684138367261731, "loss": 23.0, "step": 3706 }, { "epoch": 2.225090036014406, "grad_norm": 0.0017919333186000586, "learning_rate": 0.00017682929287003466, "loss": 23.0, "step": 3707 }, { "epoch": 2.225690276110444, "grad_norm": 0.003724980168044567, "learning_rate": 0.0001768171993256122, "loss": 23.0, "step": 3708 }, { "epoch": 2.2262905162064826, "grad_norm": 0.0013054338051006198, "learning_rate": 0.00017680510303978147, "loss": 23.0, "step": 3709 }, { "epoch": 2.226890756302521, "grad_norm": 0.0018630026606842875, "learning_rate": 0.0001767930040129742, "loss": 23.0, "step": 3710 }, { "epoch": 2.2274909963985596, "grad_norm": 0.0014727768721058965, "learning_rate": 0.00017678090224562213, "loss": 23.0, "step": 3711 }, { "epoch": 2.2280912364945977, "grad_norm": 0.0010810722596943378, "learning_rate": 0.0001767687977381572, "loss": 23.0, "step": 3712 }, { "epoch": 2.2286914765906363, "grad_norm": 0.001895120949484408, "learning_rate": 0.00017675669049101132, "loss": 23.0, "step": 3713 }, { "epoch": 2.229291716686675, "grad_norm": 0.0021297503262758255, "learning_rate": 0.0001767445805046166, "loss": 23.0, "step": 3714 }, { "epoch": 2.229891956782713, "grad_norm": 0.0009311452740803361, "learning_rate": 0.00017673246777940522, "loss": 23.0, "step": 3715 }, { "epoch": 2.2304921968787514, "grad_norm": 0.0026380294002592564, "learning_rate": 0.00017672035231580942, "loss": 23.0, "step": 3716 }, { "epoch": 2.23109243697479, "grad_norm": 0.0012547700898721814, "learning_rate": 0.00017670823411426162, "loss": 23.0, "step": 3717 }, { "epoch": 2.2316926770708285, "grad_norm": 0.0012008714256808162, "learning_rate": 0.0001766961131751943, "loss": 23.0, "step": 3718 }, { "epoch": 2.2322929171668666, "grad_norm": 0.0007726665935479105, "learning_rate": 0.00017668398949903993, "loss": 23.0, "step": 3719 }, { "epoch": 2.232893157262905, "grad_norm": 0.003182122251018882, "learning_rate": 0.00017667186308623122, "loss": 23.0, "step": 3720 }, { "epoch": 2.2334933973589437, "grad_norm": 0.0025740358978509903, "learning_rate": 0.000176659733937201, "loss": 23.0, "step": 3721 }, { "epoch": 2.234093637454982, "grad_norm": 0.0025537547189742327, "learning_rate": 0.00017664760205238203, "loss": 23.0, "step": 3722 }, { "epoch": 2.2346938775510203, "grad_norm": 0.0030393393244594336, "learning_rate": 0.00017663546743220733, "loss": 23.0, "step": 3723 }, { "epoch": 2.235294117647059, "grad_norm": 0.0024942343588918447, "learning_rate": 0.00017662333007710987, "loss": 23.0, "step": 3724 }, { "epoch": 2.2358943577430974, "grad_norm": 0.0016120598884299397, "learning_rate": 0.0001766111899875229, "loss": 23.0, "step": 3725 }, { "epoch": 2.2364945978391355, "grad_norm": 0.00033892018836922944, "learning_rate": 0.0001765990471638796, "loss": 23.0, "step": 3726 }, { "epoch": 2.237094837935174, "grad_norm": 0.002562824171036482, "learning_rate": 0.00017658690160661338, "loss": 23.0, "step": 3727 }, { "epoch": 2.2376950780312126, "grad_norm": 0.0022175584454089403, "learning_rate": 0.00017657475331615762, "loss": 23.0, "step": 3728 }, { "epoch": 2.2382953181272507, "grad_norm": 0.001756917918100953, "learning_rate": 0.0001765626022929459, "loss": 23.0, "step": 3729 }, { "epoch": 2.2388955582232892, "grad_norm": 0.003884568577632308, "learning_rate": 0.00017655044853741184, "loss": 23.0, "step": 3730 }, { "epoch": 2.2394957983193278, "grad_norm": 0.001167623675428331, "learning_rate": 0.00017653829204998918, "loss": 23.0, "step": 3731 }, { "epoch": 2.2400960384153663, "grad_norm": 0.0010216180235147476, "learning_rate": 0.00017652613283111176, "loss": 23.0, "step": 3732 }, { "epoch": 2.2406962785114044, "grad_norm": 0.003365274053066969, "learning_rate": 0.00017651397088121353, "loss": 23.0, "step": 3733 }, { "epoch": 2.241296518607443, "grad_norm": 0.0018816734664142132, "learning_rate": 0.00017650180620072846, "loss": 23.0, "step": 3734 }, { "epoch": 2.2418967587034815, "grad_norm": 0.0029328542295843363, "learning_rate": 0.00017648963879009075, "loss": 23.0, "step": 3735 }, { "epoch": 2.2424969987995196, "grad_norm": 0.0007874789298512042, "learning_rate": 0.00017647746864973458, "loss": 23.0, "step": 3736 }, { "epoch": 2.243097238895558, "grad_norm": 0.0030753707978874445, "learning_rate": 0.00017646529578009426, "loss": 23.0, "step": 3737 }, { "epoch": 2.2436974789915967, "grad_norm": 0.0010473758447915316, "learning_rate": 0.00017645312018160422, "loss": 23.0, "step": 3738 }, { "epoch": 2.244297719087635, "grad_norm": 0.0018478642450645566, "learning_rate": 0.00017644094185469898, "loss": 23.0, "step": 3739 }, { "epoch": 2.2448979591836733, "grad_norm": 0.0013350150547921658, "learning_rate": 0.0001764287607998132, "loss": 23.0, "step": 3740 }, { "epoch": 2.245498199279712, "grad_norm": 0.0012342286063358188, "learning_rate": 0.00017641657701738148, "loss": 23.0, "step": 3741 }, { "epoch": 2.2460984393757504, "grad_norm": 0.0013594189658761024, "learning_rate": 0.00017640439050783875, "loss": 23.0, "step": 3742 }, { "epoch": 2.246698679471789, "grad_norm": 0.0033450396731495857, "learning_rate": 0.00017639220127161983, "loss": 23.0, "step": 3743 }, { "epoch": 2.247298919567827, "grad_norm": 0.002889930969104171, "learning_rate": 0.00017638000930915974, "loss": 23.0, "step": 3744 }, { "epoch": 2.2478991596638656, "grad_norm": 0.0019997726194560528, "learning_rate": 0.00017636781462089364, "loss": 23.0, "step": 3745 }, { "epoch": 2.248499399759904, "grad_norm": 0.0012661672662943602, "learning_rate": 0.0001763556172072566, "loss": 23.0, "step": 3746 }, { "epoch": 2.249099639855942, "grad_norm": 0.007048753555864096, "learning_rate": 0.00017634341706868403, "loss": 23.0, "step": 3747 }, { "epoch": 2.2496998799519807, "grad_norm": 0.0015775463543832302, "learning_rate": 0.00017633121420561131, "loss": 23.0, "step": 3748 }, { "epoch": 2.2503001200480193, "grad_norm": 0.0020231371745467186, "learning_rate": 0.00017631900861847389, "loss": 23.0, "step": 3749 }, { "epoch": 2.250900360144058, "grad_norm": 0.0012863476295024157, "learning_rate": 0.00017630680030770735, "loss": 23.0, "step": 3750 }, { "epoch": 2.251500600240096, "grad_norm": 0.0016621587565168738, "learning_rate": 0.0001762945892737474, "loss": 23.0, "step": 3751 }, { "epoch": 2.2521008403361344, "grad_norm": 0.00226738303899765, "learning_rate": 0.00017628237551702982, "loss": 23.0, "step": 3752 }, { "epoch": 2.252701080432173, "grad_norm": 0.0014520992990583181, "learning_rate": 0.00017627015903799048, "loss": 23.0, "step": 3753 }, { "epoch": 2.2533013205282115, "grad_norm": 0.0013434399152174592, "learning_rate": 0.00017625793983706533, "loss": 23.0, "step": 3754 }, { "epoch": 2.2539015606242496, "grad_norm": 0.0007136189378798008, "learning_rate": 0.00017624571791469048, "loss": 23.0, "step": 3755 }, { "epoch": 2.254501800720288, "grad_norm": 0.002455267123878002, "learning_rate": 0.00017623349327130206, "loss": 23.0, "step": 3756 }, { "epoch": 2.2551020408163267, "grad_norm": 0.0009659494389779866, "learning_rate": 0.00017622126590733644, "loss": 23.0, "step": 3757 }, { "epoch": 2.255702280912365, "grad_norm": 0.002808001358062029, "learning_rate": 0.00017620903582322983, "loss": 23.0, "step": 3758 }, { "epoch": 2.2563025210084033, "grad_norm": 0.0018349566962569952, "learning_rate": 0.0001761968030194188, "loss": 23.0, "step": 3759 }, { "epoch": 2.256902761104442, "grad_norm": 0.002876591170206666, "learning_rate": 0.00017618456749633987, "loss": 23.0, "step": 3760 }, { "epoch": 2.2575030012004804, "grad_norm": 0.0012843102449551225, "learning_rate": 0.00017617232925442968, "loss": 23.0, "step": 3761 }, { "epoch": 2.2581032412965185, "grad_norm": 0.0011905563296750188, "learning_rate": 0.00017616008829412502, "loss": 23.0, "step": 3762 }, { "epoch": 2.258703481392557, "grad_norm": 0.000979992444626987, "learning_rate": 0.00017614784461586266, "loss": 23.0, "step": 3763 }, { "epoch": 2.2593037214885956, "grad_norm": 0.002167720813304186, "learning_rate": 0.00017613559822007963, "loss": 23.0, "step": 3764 }, { "epoch": 2.2599039615846337, "grad_norm": 0.0008691643597558141, "learning_rate": 0.00017612334910721298, "loss": 23.0, "step": 3765 }, { "epoch": 2.2605042016806722, "grad_norm": 0.0019571988377720118, "learning_rate": 0.00017611109727769977, "loss": 23.0, "step": 3766 }, { "epoch": 2.2611044417767108, "grad_norm": 0.0012904416071251035, "learning_rate": 0.00017609884273197732, "loss": 23.0, "step": 3767 }, { "epoch": 2.2617046818727493, "grad_norm": 0.0017439661314710975, "learning_rate": 0.00017608658547048288, "loss": 23.0, "step": 3768 }, { "epoch": 2.2623049219687874, "grad_norm": 0.003602192969992757, "learning_rate": 0.00017607432549365394, "loss": 23.0, "step": 3769 }, { "epoch": 2.262905162064826, "grad_norm": 0.002050132257863879, "learning_rate": 0.00017606206280192801, "loss": 23.0, "step": 3770 }, { "epoch": 2.2635054021608645, "grad_norm": 0.0030350079759955406, "learning_rate": 0.00017604979739574273, "loss": 23.0, "step": 3771 }, { "epoch": 2.2641056422569026, "grad_norm": 0.0011385931866243482, "learning_rate": 0.0001760375292755358, "loss": 23.0, "step": 3772 }, { "epoch": 2.264705882352941, "grad_norm": 0.0015834002988412976, "learning_rate": 0.000176025258441745, "loss": 23.0, "step": 3773 }, { "epoch": 2.2653061224489797, "grad_norm": 0.002169420477002859, "learning_rate": 0.00017601298489480834, "loss": 23.0, "step": 3774 }, { "epoch": 2.265906362545018, "grad_norm": 0.0020891001913696527, "learning_rate": 0.00017600070863516373, "loss": 23.0, "step": 3775 }, { "epoch": 2.2665066026410563, "grad_norm": 0.0015407392056658864, "learning_rate": 0.00017598842966324937, "loss": 23.0, "step": 3776 }, { "epoch": 2.267106842737095, "grad_norm": 0.0012298728106543422, "learning_rate": 0.0001759761479795034, "loss": 23.0, "step": 3777 }, { "epoch": 2.2677070828331334, "grad_norm": 0.0022670759353786707, "learning_rate": 0.00017596386358436413, "loss": 23.0, "step": 3778 }, { "epoch": 2.2683073229291715, "grad_norm": 0.0009981478797271848, "learning_rate": 0.00017595157647827, "loss": 23.0, "step": 3779 }, { "epoch": 2.26890756302521, "grad_norm": 0.0004938762285746634, "learning_rate": 0.00017593928666165945, "loss": 23.0, "step": 3780 }, { "epoch": 2.2695078031212486, "grad_norm": 0.0011169624049216509, "learning_rate": 0.00017592699413497114, "loss": 23.0, "step": 3781 }, { "epoch": 2.270108043217287, "grad_norm": 0.003080305876210332, "learning_rate": 0.00017591469889864368, "loss": 23.0, "step": 3782 }, { "epoch": 2.270708283313325, "grad_norm": 0.004471518564969301, "learning_rate": 0.00017590240095311587, "loss": 23.0, "step": 3783 }, { "epoch": 2.2713085234093637, "grad_norm": 0.0011711474508047104, "learning_rate": 0.00017589010029882664, "loss": 23.0, "step": 3784 }, { "epoch": 2.2719087635054023, "grad_norm": 0.0011285408399999142, "learning_rate": 0.00017587779693621495, "loss": 23.0, "step": 3785 }, { "epoch": 2.2725090036014404, "grad_norm": 0.0020412777084857225, "learning_rate": 0.00017586549086571988, "loss": 23.0, "step": 3786 }, { "epoch": 2.273109243697479, "grad_norm": 0.001386416843160987, "learning_rate": 0.00017585318208778058, "loss": 23.0, "step": 3787 }, { "epoch": 2.2737094837935174, "grad_norm": 0.0021522578317672014, "learning_rate": 0.00017584087060283632, "loss": 23.0, "step": 3788 }, { "epoch": 2.274309723889556, "grad_norm": 0.0017842737725004554, "learning_rate": 0.00017582855641132646, "loss": 23.0, "step": 3789 }, { "epoch": 2.274909963985594, "grad_norm": 0.0008319435291923583, "learning_rate": 0.0001758162395136905, "loss": 23.0, "step": 3790 }, { "epoch": 2.2755102040816326, "grad_norm": 0.0022286258172243834, "learning_rate": 0.000175803919910368, "loss": 23.0, "step": 3791 }, { "epoch": 2.276110444177671, "grad_norm": 0.0015450834762305021, "learning_rate": 0.00017579159760179854, "loss": 23.0, "step": 3792 }, { "epoch": 2.2767106842737093, "grad_norm": 0.0017403220990672708, "learning_rate": 0.00017577927258842193, "loss": 23.0, "step": 3793 }, { "epoch": 2.277310924369748, "grad_norm": 0.0021773509215563536, "learning_rate": 0.000175766944870678, "loss": 23.0, "step": 3794 }, { "epoch": 2.2779111644657863, "grad_norm": 0.0007699204725213349, "learning_rate": 0.00017575461444900676, "loss": 23.0, "step": 3795 }, { "epoch": 2.278511404561825, "grad_norm": 0.001452354365028441, "learning_rate": 0.00017574228132384814, "loss": 23.0, "step": 3796 }, { "epoch": 2.279111644657863, "grad_norm": 0.0027013334911316633, "learning_rate": 0.00017572994549564235, "loss": 23.0, "step": 3797 }, { "epoch": 2.2797118847539015, "grad_norm": 0.0016655053477734327, "learning_rate": 0.0001757176069648296, "loss": 23.0, "step": 3798 }, { "epoch": 2.28031212484994, "grad_norm": 0.0017022525426000357, "learning_rate": 0.00017570526573185022, "loss": 23.0, "step": 3799 }, { "epoch": 2.280912364945978, "grad_norm": 0.0011267677182331681, "learning_rate": 0.00017569292179714466, "loss": 23.0, "step": 3800 }, { "epoch": 2.2815126050420167, "grad_norm": 0.0012624160153791308, "learning_rate": 0.00017568057516115343, "loss": 23.0, "step": 3801 }, { "epoch": 2.2821128451380552, "grad_norm": 0.0012623511720448732, "learning_rate": 0.00017566822582431714, "loss": 23.0, "step": 3802 }, { "epoch": 2.2827130852340938, "grad_norm": 0.002993621863424778, "learning_rate": 0.0001756558737870765, "loss": 23.0, "step": 3803 }, { "epoch": 2.283313325330132, "grad_norm": 0.001764954999089241, "learning_rate": 0.00017564351904987232, "loss": 23.0, "step": 3804 }, { "epoch": 2.2839135654261704, "grad_norm": 0.0033577526919543743, "learning_rate": 0.00017563116161314557, "loss": 23.0, "step": 3805 }, { "epoch": 2.284513805522209, "grad_norm": 0.0030459025874733925, "learning_rate": 0.0001756188014773372, "loss": 23.0, "step": 3806 }, { "epoch": 2.2851140456182475, "grad_norm": 0.0020036788191646338, "learning_rate": 0.00017560643864288831, "loss": 23.0, "step": 3807 }, { "epoch": 2.2857142857142856, "grad_norm": 0.003118950640782714, "learning_rate": 0.00017559407311024008, "loss": 23.0, "step": 3808 }, { "epoch": 2.286314525810324, "grad_norm": 0.0023817415349185467, "learning_rate": 0.00017558170487983388, "loss": 23.0, "step": 3809 }, { "epoch": 2.2869147659063627, "grad_norm": 0.0020011558663100004, "learning_rate": 0.00017556933395211099, "loss": 23.0, "step": 3810 }, { "epoch": 2.287515006002401, "grad_norm": 0.0019172454485669732, "learning_rate": 0.000175556960327513, "loss": 23.0, "step": 3811 }, { "epoch": 2.2881152460984393, "grad_norm": 0.005320819094777107, "learning_rate": 0.00017554458400648146, "loss": 23.0, "step": 3812 }, { "epoch": 2.288715486194478, "grad_norm": 0.0011504385620355606, "learning_rate": 0.00017553220498945802, "loss": 23.0, "step": 3813 }, { "epoch": 2.2893157262905164, "grad_norm": 0.004282000940293074, "learning_rate": 0.00017551982327688447, "loss": 23.0, "step": 3814 }, { "epoch": 2.2899159663865545, "grad_norm": 0.0016033421270549297, "learning_rate": 0.0001755074388692027, "loss": 23.0, "step": 3815 }, { "epoch": 2.290516206482593, "grad_norm": 0.001392611302435398, "learning_rate": 0.00017549505176685468, "loss": 23.0, "step": 3816 }, { "epoch": 2.2911164465786316, "grad_norm": 0.0024148111697286367, "learning_rate": 0.00017548266197028243, "loss": 23.0, "step": 3817 }, { "epoch": 2.29171668667467, "grad_norm": 0.0022317797411233187, "learning_rate": 0.00017547026947992817, "loss": 23.0, "step": 3818 }, { "epoch": 2.292316926770708, "grad_norm": 0.00245701358653605, "learning_rate": 0.0001754578742962341, "loss": 23.0, "step": 3819 }, { "epoch": 2.2929171668667467, "grad_norm": 0.0017128176987171173, "learning_rate": 0.00017544547641964263, "loss": 23.0, "step": 3820 }, { "epoch": 2.2935174069627853, "grad_norm": 0.0031775077804923058, "learning_rate": 0.00017543307585059613, "loss": 23.0, "step": 3821 }, { "epoch": 2.2941176470588234, "grad_norm": 0.002050775336101651, "learning_rate": 0.00017542067258953723, "loss": 23.0, "step": 3822 }, { "epoch": 2.294717887154862, "grad_norm": 0.0014890647726133466, "learning_rate": 0.00017540826663690851, "loss": 23.0, "step": 3823 }, { "epoch": 2.2953181272509005, "grad_norm": 0.004480647388845682, "learning_rate": 0.00017539585799315272, "loss": 23.0, "step": 3824 }, { "epoch": 2.295918367346939, "grad_norm": 0.0015886913752183318, "learning_rate": 0.0001753834466587127, "loss": 23.0, "step": 3825 }, { "epoch": 2.296518607442977, "grad_norm": 0.0019695230294018984, "learning_rate": 0.0001753710326340314, "loss": 23.0, "step": 3826 }, { "epoch": 2.2971188475390156, "grad_norm": 0.0018076825654134154, "learning_rate": 0.0001753586159195518, "loss": 23.0, "step": 3827 }, { "epoch": 2.297719087635054, "grad_norm": 0.002728229621425271, "learning_rate": 0.00017534619651571705, "loss": 23.0, "step": 3828 }, { "epoch": 2.2983193277310923, "grad_norm": 0.0017432133900001645, "learning_rate": 0.0001753337744229704, "loss": 23.0, "step": 3829 }, { "epoch": 2.298919567827131, "grad_norm": 0.0018603568896651268, "learning_rate": 0.00017532134964175504, "loss": 23.0, "step": 3830 }, { "epoch": 2.2995198079231693, "grad_norm": 0.002048965310677886, "learning_rate": 0.0001753089221725145, "loss": 23.0, "step": 3831 }, { "epoch": 2.300120048019208, "grad_norm": 0.001226543914526701, "learning_rate": 0.00017529649201569226, "loss": 23.0, "step": 3832 }, { "epoch": 2.300720288115246, "grad_norm": 0.004013392608612776, "learning_rate": 0.0001752840591717319, "loss": 23.0, "step": 3833 }, { "epoch": 2.3013205282112845, "grad_norm": 0.0008342837681993842, "learning_rate": 0.00017527162364107713, "loss": 23.0, "step": 3834 }, { "epoch": 2.301920768307323, "grad_norm": 0.0007289492059499025, "learning_rate": 0.00017525918542417171, "loss": 23.0, "step": 3835 }, { "epoch": 2.302521008403361, "grad_norm": 0.0029447299893945456, "learning_rate": 0.00017524674452145956, "loss": 23.0, "step": 3836 }, { "epoch": 2.3031212484993997, "grad_norm": 0.002568695694208145, "learning_rate": 0.00017523430093338468, "loss": 23.0, "step": 3837 }, { "epoch": 2.3037214885954382, "grad_norm": 0.0012608342804014683, "learning_rate": 0.0001752218546603911, "loss": 23.0, "step": 3838 }, { "epoch": 2.304321728691477, "grad_norm": 0.00195410312153399, "learning_rate": 0.00017520940570292303, "loss": 23.0, "step": 3839 }, { "epoch": 2.304921968787515, "grad_norm": 0.003413090016692877, "learning_rate": 0.00017519695406142472, "loss": 23.0, "step": 3840 }, { "epoch": 2.3055222088835534, "grad_norm": 0.002465988975018263, "learning_rate": 0.00017518449973634055, "loss": 23.0, "step": 3841 }, { "epoch": 2.306122448979592, "grad_norm": 0.00117593037430197, "learning_rate": 0.00017517204272811501, "loss": 23.0, "step": 3842 }, { "epoch": 2.30672268907563, "grad_norm": 0.00045529394992627203, "learning_rate": 0.0001751595830371926, "loss": 23.0, "step": 3843 }, { "epoch": 2.3073229291716686, "grad_norm": 0.0013716558460146189, "learning_rate": 0.000175147120664018, "loss": 23.0, "step": 3844 }, { "epoch": 2.307923169267707, "grad_norm": 0.0009694201871752739, "learning_rate": 0.000175134655609036, "loss": 23.0, "step": 3845 }, { "epoch": 2.3085234093637457, "grad_norm": 0.002793167717754841, "learning_rate": 0.00017512218787269138, "loss": 23.0, "step": 3846 }, { "epoch": 2.3091236494597838, "grad_norm": 0.0012160228798165917, "learning_rate": 0.0001751097174554291, "loss": 23.0, "step": 3847 }, { "epoch": 2.3097238895558223, "grad_norm": 0.0041273715905845165, "learning_rate": 0.00017509724435769427, "loss": 23.0, "step": 3848 }, { "epoch": 2.310324129651861, "grad_norm": 0.0019413263071328402, "learning_rate": 0.00017508476857993192, "loss": 23.0, "step": 3849 }, { "epoch": 2.310924369747899, "grad_norm": 0.000679299992043525, "learning_rate": 0.00017507229012258732, "loss": 23.0, "step": 3850 }, { "epoch": 2.3115246098439375, "grad_norm": 0.0014612323138862848, "learning_rate": 0.0001750598089861058, "loss": 23.0, "step": 3851 }, { "epoch": 2.312124849939976, "grad_norm": 0.002256182488054037, "learning_rate": 0.00017504732517093278, "loss": 23.0, "step": 3852 }, { "epoch": 2.3127250900360146, "grad_norm": 0.0009005808969959617, "learning_rate": 0.00017503483867751377, "loss": 23.0, "step": 3853 }, { "epoch": 2.3133253301320527, "grad_norm": 0.0015893650706857443, "learning_rate": 0.00017502234950629437, "loss": 23.0, "step": 3854 }, { "epoch": 2.313925570228091, "grad_norm": 0.001747640548273921, "learning_rate": 0.0001750098576577203, "loss": 23.0, "step": 3855 }, { "epoch": 2.3145258103241297, "grad_norm": 0.0005708754761144519, "learning_rate": 0.00017499736313223737, "loss": 23.0, "step": 3856 }, { "epoch": 2.315126050420168, "grad_norm": 0.0028467399533838034, "learning_rate": 0.00017498486593029144, "loss": 23.0, "step": 3857 }, { "epoch": 2.3157262905162064, "grad_norm": 0.0014178903074935079, "learning_rate": 0.00017497236605232855, "loss": 23.0, "step": 3858 }, { "epoch": 2.316326530612245, "grad_norm": 0.005164925009012222, "learning_rate": 0.00017495986349879477, "loss": 23.0, "step": 3859 }, { "epoch": 2.3169267707082835, "grad_norm": 0.0027939407154917717, "learning_rate": 0.00017494735827013627, "loss": 23.0, "step": 3860 }, { "epoch": 2.3175270108043216, "grad_norm": 0.002403461141511798, "learning_rate": 0.00017493485036679935, "loss": 23.0, "step": 3861 }, { "epoch": 2.31812725090036, "grad_norm": 0.003365919226780534, "learning_rate": 0.00017492233978923035, "loss": 23.0, "step": 3862 }, { "epoch": 2.3187274909963986, "grad_norm": 0.001767502399161458, "learning_rate": 0.00017490982653787577, "loss": 23.0, "step": 3863 }, { "epoch": 2.3193277310924367, "grad_norm": 0.0037461065221577883, "learning_rate": 0.0001748973106131822, "loss": 23.0, "step": 3864 }, { "epoch": 2.3199279711884753, "grad_norm": 0.0054776170291006565, "learning_rate": 0.00017488479201559627, "loss": 23.0, "step": 3865 }, { "epoch": 2.320528211284514, "grad_norm": 0.0020852498710155487, "learning_rate": 0.00017487227074556474, "loss": 23.0, "step": 3866 }, { "epoch": 2.3211284513805523, "grad_norm": 0.001896170899271965, "learning_rate": 0.00017485974680353446, "loss": 23.0, "step": 3867 }, { "epoch": 2.3217286914765904, "grad_norm": 0.0022559871431440115, "learning_rate": 0.00017484722018995235, "loss": 23.0, "step": 3868 }, { "epoch": 2.322328931572629, "grad_norm": 0.0008233110420405865, "learning_rate": 0.00017483469090526552, "loss": 23.0, "step": 3869 }, { "epoch": 2.3229291716686675, "grad_norm": 0.003072496736422181, "learning_rate": 0.00017482215894992106, "loss": 23.0, "step": 3870 }, { "epoch": 2.323529411764706, "grad_norm": 0.0034991884604096413, "learning_rate": 0.00017480962432436618, "loss": 23.0, "step": 3871 }, { "epoch": 2.324129651860744, "grad_norm": 0.002439623698592186, "learning_rate": 0.00017479708702904827, "loss": 23.0, "step": 3872 }, { "epoch": 2.3247298919567827, "grad_norm": 0.0018866292666643858, "learning_rate": 0.00017478454706441472, "loss": 23.0, "step": 3873 }, { "epoch": 2.3253301320528212, "grad_norm": 0.00198563514277339, "learning_rate": 0.00017477200443091306, "loss": 23.0, "step": 3874 }, { "epoch": 2.32593037214886, "grad_norm": 0.005812112707644701, "learning_rate": 0.0001747594591289909, "loss": 23.0, "step": 3875 }, { "epoch": 2.326530612244898, "grad_norm": 0.0013709774939343333, "learning_rate": 0.00017474691115909594, "loss": 23.0, "step": 3876 }, { "epoch": 2.3271308523409364, "grad_norm": 0.0018185654189437628, "learning_rate": 0.00017473436052167597, "loss": 23.0, "step": 3877 }, { "epoch": 2.327731092436975, "grad_norm": 0.002282187808305025, "learning_rate": 0.00017472180721717895, "loss": 23.0, "step": 3878 }, { "epoch": 2.328331332533013, "grad_norm": 0.0013887138338759542, "learning_rate": 0.00017470925124605282, "loss": 23.0, "step": 3879 }, { "epoch": 2.3289315726290516, "grad_norm": 0.00287288217805326, "learning_rate": 0.0001746966926087457, "loss": 23.0, "step": 3880 }, { "epoch": 2.32953181272509, "grad_norm": 0.0033778510987758636, "learning_rate": 0.00017468413130570577, "loss": 23.0, "step": 3881 }, { "epoch": 2.3301320528211287, "grad_norm": 0.0016036947490647435, "learning_rate": 0.00017467156733738128, "loss": 23.0, "step": 3882 }, { "epoch": 2.3307322929171668, "grad_norm": 0.0013769504148513079, "learning_rate": 0.00017465900070422064, "loss": 23.0, "step": 3883 }, { "epoch": 2.3313325330132053, "grad_norm": 0.00230945716612041, "learning_rate": 0.0001746464314066723, "loss": 23.0, "step": 3884 }, { "epoch": 2.331932773109244, "grad_norm": 0.0031504861544817686, "learning_rate": 0.00017463385944518485, "loss": 23.0, "step": 3885 }, { "epoch": 2.332533013205282, "grad_norm": 0.0008833253523334861, "learning_rate": 0.00017462128482020694, "loss": 23.0, "step": 3886 }, { "epoch": 2.3331332533013205, "grad_norm": 0.002419440308585763, "learning_rate": 0.00017460870753218733, "loss": 23.0, "step": 3887 }, { "epoch": 2.333733493397359, "grad_norm": 0.00045076129026710987, "learning_rate": 0.00017459612758157484, "loss": 23.0, "step": 3888 }, { "epoch": 2.3343337334933976, "grad_norm": 0.0007832158589735627, "learning_rate": 0.00017458354496881846, "loss": 23.0, "step": 3889 }, { "epoch": 2.3349339735894357, "grad_norm": 0.0010271031642332673, "learning_rate": 0.00017457095969436722, "loss": 23.0, "step": 3890 }, { "epoch": 2.335534213685474, "grad_norm": 0.002968541579321027, "learning_rate": 0.0001745583717586702, "loss": 23.0, "step": 3891 }, { "epoch": 2.3361344537815127, "grad_norm": 0.005150226876139641, "learning_rate": 0.00017454578116217674, "loss": 23.0, "step": 3892 }, { "epoch": 2.336734693877551, "grad_norm": 0.0014614604879170656, "learning_rate": 0.00017453318790533606, "loss": 23.0, "step": 3893 }, { "epoch": 2.3373349339735894, "grad_norm": 0.003210582537576556, "learning_rate": 0.00017452059198859766, "loss": 23.0, "step": 3894 }, { "epoch": 2.337935174069628, "grad_norm": 0.0043360283598303795, "learning_rate": 0.000174507993412411, "loss": 23.0, "step": 3895 }, { "epoch": 2.3385354141656665, "grad_norm": 0.0008345422684215009, "learning_rate": 0.00017449539217722571, "loss": 23.0, "step": 3896 }, { "epoch": 2.3391356542617046, "grad_norm": 0.003543426748365164, "learning_rate": 0.00017448278828349152, "loss": 23.0, "step": 3897 }, { "epoch": 2.339735894357743, "grad_norm": 0.0006024944595992565, "learning_rate": 0.00017447018173165818, "loss": 23.0, "step": 3898 }, { "epoch": 2.3403361344537816, "grad_norm": 0.002525531454011798, "learning_rate": 0.00017445757252217562, "loss": 23.0, "step": 3899 }, { "epoch": 2.3409363745498197, "grad_norm": 0.0018678716151043773, "learning_rate": 0.00017444496065549385, "loss": 23.0, "step": 3900 }, { "epoch": 2.3415366146458583, "grad_norm": 0.002660698490217328, "learning_rate": 0.00017443234613206293, "loss": 23.0, "step": 3901 }, { "epoch": 2.342136854741897, "grad_norm": 0.0027779710944741964, "learning_rate": 0.000174419728952333, "loss": 23.0, "step": 3902 }, { "epoch": 2.3427370948379354, "grad_norm": 0.0036743993405252695, "learning_rate": 0.0001744071091167544, "loss": 23.0, "step": 3903 }, { "epoch": 2.3433373349339734, "grad_norm": 0.002380775986239314, "learning_rate": 0.00017439448662577749, "loss": 23.0, "step": 3904 }, { "epoch": 2.343937575030012, "grad_norm": 0.0030045825988054276, "learning_rate": 0.0001743818614798527, "loss": 23.0, "step": 3905 }, { "epoch": 2.3445378151260505, "grad_norm": 0.0010804989142343402, "learning_rate": 0.00017436923367943058, "loss": 23.0, "step": 3906 }, { "epoch": 2.3451380552220886, "grad_norm": 0.0031437580473721027, "learning_rate": 0.00017435660322496187, "loss": 23.0, "step": 3907 }, { "epoch": 2.345738295318127, "grad_norm": 0.002179528819397092, "learning_rate": 0.0001743439701168972, "loss": 23.0, "step": 3908 }, { "epoch": 2.3463385354141657, "grad_norm": 0.0047058421187102795, "learning_rate": 0.0001743313343556875, "loss": 23.0, "step": 3909 }, { "epoch": 2.3469387755102042, "grad_norm": 0.0017768297111615539, "learning_rate": 0.00017431869594178366, "loss": 23.0, "step": 3910 }, { "epoch": 2.3475390156062423, "grad_norm": 0.0016148180002346635, "learning_rate": 0.00017430605487563677, "loss": 23.0, "step": 3911 }, { "epoch": 2.348139255702281, "grad_norm": 0.001844774466007948, "learning_rate": 0.00017429341115769787, "loss": 23.0, "step": 3912 }, { "epoch": 2.3487394957983194, "grad_norm": 0.001695389044471085, "learning_rate": 0.0001742807647884183, "loss": 23.0, "step": 3913 }, { "epoch": 2.3493397358943575, "grad_norm": 0.0021679711062461138, "learning_rate": 0.00017426811576824923, "loss": 23.0, "step": 3914 }, { "epoch": 2.349939975990396, "grad_norm": 0.0033461309503763914, "learning_rate": 0.00017425546409764218, "loss": 23.0, "step": 3915 }, { "epoch": 2.3505402160864346, "grad_norm": 0.0019258579704910517, "learning_rate": 0.00017424280977704863, "loss": 23.0, "step": 3916 }, { "epoch": 2.351140456182473, "grad_norm": 0.0012793492060154676, "learning_rate": 0.0001742301528069202, "loss": 23.0, "step": 3917 }, { "epoch": 2.3517406962785112, "grad_norm": 0.0014299890026450157, "learning_rate": 0.00017421749318770853, "loss": 23.0, "step": 3918 }, { "epoch": 2.3523409363745498, "grad_norm": 0.0012088280636817217, "learning_rate": 0.00017420483091986543, "loss": 23.0, "step": 3919 }, { "epoch": 2.3529411764705883, "grad_norm": 0.0010071590077131987, "learning_rate": 0.0001741921660038428, "loss": 23.0, "step": 3920 }, { "epoch": 2.3535414165666264, "grad_norm": 0.0018569778185337782, "learning_rate": 0.0001741794984400926, "loss": 23.0, "step": 3921 }, { "epoch": 2.354141656662665, "grad_norm": 0.0018122204346582294, "learning_rate": 0.00017416682822906694, "loss": 23.0, "step": 3922 }, { "epoch": 2.3547418967587035, "grad_norm": 0.0012169646797701716, "learning_rate": 0.00017415415537121797, "loss": 23.0, "step": 3923 }, { "epoch": 2.355342136854742, "grad_norm": 0.0032757974695414305, "learning_rate": 0.00017414147986699792, "loss": 23.0, "step": 3924 }, { "epoch": 2.35594237695078, "grad_norm": 0.0015983044868335128, "learning_rate": 0.00017412880171685914, "loss": 23.0, "step": 3925 }, { "epoch": 2.3565426170468187, "grad_norm": 0.000568926683627069, "learning_rate": 0.00017411612092125417, "loss": 23.0, "step": 3926 }, { "epoch": 2.357142857142857, "grad_norm": 0.0018514172406867146, "learning_rate": 0.00017410343748063545, "loss": 23.0, "step": 3927 }, { "epoch": 2.3577430972388957, "grad_norm": 0.0015337765216827393, "learning_rate": 0.00017409075139545568, "loss": 23.0, "step": 3928 }, { "epoch": 2.358343337334934, "grad_norm": 0.0009689840371720493, "learning_rate": 0.00017407806266616754, "loss": 23.0, "step": 3929 }, { "epoch": 2.3589435774309724, "grad_norm": 0.002194127067923546, "learning_rate": 0.00017406537129322394, "loss": 23.0, "step": 3930 }, { "epoch": 2.359543817527011, "grad_norm": 0.0015414162771776319, "learning_rate": 0.00017405267727707774, "loss": 23.0, "step": 3931 }, { "epoch": 2.3601440576230495, "grad_norm": 0.00167924037668854, "learning_rate": 0.000174039980618182, "loss": 23.0, "step": 3932 }, { "epoch": 2.3607442977190876, "grad_norm": 0.001501681748777628, "learning_rate": 0.00017402728131698977, "loss": 23.0, "step": 3933 }, { "epoch": 2.361344537815126, "grad_norm": 0.0010487270774319768, "learning_rate": 0.0001740145793739543, "loss": 23.0, "step": 3934 }, { "epoch": 2.3619447779111646, "grad_norm": 0.0017754529835656285, "learning_rate": 0.0001740018747895289, "loss": 23.0, "step": 3935 }, { "epoch": 2.3625450180072027, "grad_norm": 0.00277283089235425, "learning_rate": 0.0001739891675641669, "loss": 23.0, "step": 3936 }, { "epoch": 2.3631452581032413, "grad_norm": 0.0016696704551577568, "learning_rate": 0.00017397645769832187, "loss": 23.0, "step": 3937 }, { "epoch": 2.36374549819928, "grad_norm": 0.0018485120963305235, "learning_rate": 0.00017396374519244737, "loss": 23.0, "step": 3938 }, { "epoch": 2.3643457382953184, "grad_norm": 0.0054781329818069935, "learning_rate": 0.000173951030046997, "loss": 23.0, "step": 3939 }, { "epoch": 2.3649459783913565, "grad_norm": 0.0019205682910978794, "learning_rate": 0.00017393831226242466, "loss": 23.0, "step": 3940 }, { "epoch": 2.365546218487395, "grad_norm": 0.003620882984250784, "learning_rate": 0.0001739255918391841, "loss": 23.0, "step": 3941 }, { "epoch": 2.3661464585834335, "grad_norm": 0.0012869867496192455, "learning_rate": 0.00017391286877772933, "loss": 23.0, "step": 3942 }, { "epoch": 2.3667466986794716, "grad_norm": 0.002010358963161707, "learning_rate": 0.00017390014307851442, "loss": 23.0, "step": 3943 }, { "epoch": 2.36734693877551, "grad_norm": 0.0021779662929475307, "learning_rate": 0.0001738874147419935, "loss": 23.0, "step": 3944 }, { "epoch": 2.3679471788715487, "grad_norm": 0.0021605216898024082, "learning_rate": 0.0001738746837686208, "loss": 23.0, "step": 3945 }, { "epoch": 2.3685474189675872, "grad_norm": 0.0006803186261095107, "learning_rate": 0.00017386195015885064, "loss": 23.0, "step": 3946 }, { "epoch": 2.3691476590636253, "grad_norm": 0.0011268210364505649, "learning_rate": 0.0001738492139131375, "loss": 23.0, "step": 3947 }, { "epoch": 2.369747899159664, "grad_norm": 0.0007069764542393386, "learning_rate": 0.0001738364750319359, "loss": 23.0, "step": 3948 }, { "epoch": 2.3703481392557024, "grad_norm": 0.007430940866470337, "learning_rate": 0.0001738237335157004, "loss": 23.0, "step": 3949 }, { "epoch": 2.3709483793517405, "grad_norm": 0.003919681068509817, "learning_rate": 0.00017381098936488574, "loss": 23.0, "step": 3950 }, { "epoch": 2.371548619447779, "grad_norm": 0.0008578869164921343, "learning_rate": 0.0001737982425799468, "loss": 23.0, "step": 3951 }, { "epoch": 2.3721488595438176, "grad_norm": 0.0018030635546892881, "learning_rate": 0.00017378549316133835, "loss": 23.0, "step": 3952 }, { "epoch": 2.372749099639856, "grad_norm": 0.0018941910238936543, "learning_rate": 0.00017377274110951543, "loss": 23.0, "step": 3953 }, { "epoch": 2.3733493397358942, "grad_norm": 0.0007153527112677693, "learning_rate": 0.00017375998642493322, "loss": 23.0, "step": 3954 }, { "epoch": 2.3739495798319328, "grad_norm": 0.0009323756094090641, "learning_rate": 0.00017374722910804677, "loss": 23.0, "step": 3955 }, { "epoch": 2.3745498199279713, "grad_norm": 0.0036982784513384104, "learning_rate": 0.00017373446915931143, "loss": 23.0, "step": 3956 }, { "epoch": 2.3751500600240094, "grad_norm": 0.0014185566687956452, "learning_rate": 0.00017372170657918256, "loss": 23.0, "step": 3957 }, { "epoch": 2.375750300120048, "grad_norm": 0.002080840989947319, "learning_rate": 0.00017370894136811563, "loss": 23.0, "step": 3958 }, { "epoch": 2.3763505402160865, "grad_norm": 0.0006996280862949789, "learning_rate": 0.00017369617352656618, "loss": 23.0, "step": 3959 }, { "epoch": 2.376950780312125, "grad_norm": 0.0018821045523509383, "learning_rate": 0.00017368340305498985, "loss": 23.0, "step": 3960 }, { "epoch": 2.377551020408163, "grad_norm": 0.0013187207514420152, "learning_rate": 0.0001736706299538424, "loss": 23.0, "step": 3961 }, { "epoch": 2.3781512605042017, "grad_norm": 0.001449449686333537, "learning_rate": 0.0001736578542235797, "loss": 23.0, "step": 3962 }, { "epoch": 2.37875150060024, "grad_norm": 0.002266083611175418, "learning_rate": 0.00017364507586465762, "loss": 23.0, "step": 3963 }, { "epoch": 2.3793517406962783, "grad_norm": 0.002582366345450282, "learning_rate": 0.00017363229487753223, "loss": 23.0, "step": 3964 }, { "epoch": 2.379951980792317, "grad_norm": 0.0003937427536584437, "learning_rate": 0.00017361951126265964, "loss": 23.0, "step": 3965 }, { "epoch": 2.3805522208883554, "grad_norm": 0.0017639965517446399, "learning_rate": 0.00017360672502049606, "loss": 23.0, "step": 3966 }, { "epoch": 2.381152460984394, "grad_norm": 0.004503778647631407, "learning_rate": 0.0001735939361514978, "loss": 23.0, "step": 3967 }, { "epoch": 2.381752701080432, "grad_norm": 0.0036358425859361887, "learning_rate": 0.0001735811446561213, "loss": 23.0, "step": 3968 }, { "epoch": 2.3823529411764706, "grad_norm": 0.0023689253721386194, "learning_rate": 0.00017356835053482299, "loss": 23.0, "step": 3969 }, { "epoch": 2.382953181272509, "grad_norm": 0.002466338686645031, "learning_rate": 0.0001735555537880595, "loss": 23.0, "step": 3970 }, { "epoch": 2.383553421368547, "grad_norm": 0.0021039212588220835, "learning_rate": 0.0001735427544162875, "loss": 23.0, "step": 3971 }, { "epoch": 2.3841536614645857, "grad_norm": 0.0027853043284267187, "learning_rate": 0.00017352995241996374, "loss": 23.0, "step": 3972 }, { "epoch": 2.3847539015606243, "grad_norm": 0.0033636244479566813, "learning_rate": 0.00017351714779954516, "loss": 23.0, "step": 3973 }, { "epoch": 2.385354141656663, "grad_norm": 0.002105874475091696, "learning_rate": 0.00017350434055548867, "loss": 23.0, "step": 3974 }, { "epoch": 2.385954381752701, "grad_norm": 0.0009028427302837372, "learning_rate": 0.00017349153068825133, "loss": 23.0, "step": 3975 }, { "epoch": 2.3865546218487395, "grad_norm": 0.0030058857519179583, "learning_rate": 0.00017347871819829033, "loss": 23.0, "step": 3976 }, { "epoch": 2.387154861944778, "grad_norm": 0.0017001151572912931, "learning_rate": 0.00017346590308606286, "loss": 23.0, "step": 3977 }, { "epoch": 2.387755102040816, "grad_norm": 0.0008253180421888828, "learning_rate": 0.0001734530853520263, "loss": 23.0, "step": 3978 }, { "epoch": 2.3883553421368546, "grad_norm": 0.0019861324690282345, "learning_rate": 0.00017344026499663807, "loss": 23.0, "step": 3979 }, { "epoch": 2.388955582232893, "grad_norm": 0.002510624472051859, "learning_rate": 0.0001734274420203557, "loss": 23.0, "step": 3980 }, { "epoch": 2.3895558223289317, "grad_norm": 0.0011581958970054984, "learning_rate": 0.00017341461642363676, "loss": 23.0, "step": 3981 }, { "epoch": 2.39015606242497, "grad_norm": 0.002012391574680805, "learning_rate": 0.00017340178820693906, "loss": 23.0, "step": 3982 }, { "epoch": 2.3907563025210083, "grad_norm": 0.002154824323952198, "learning_rate": 0.0001733889573707203, "loss": 23.0, "step": 3983 }, { "epoch": 2.391356542617047, "grad_norm": 0.0009000212885439396, "learning_rate": 0.00017337612391543847, "loss": 23.0, "step": 3984 }, { "epoch": 2.3919567827130854, "grad_norm": 0.003189866431057453, "learning_rate": 0.0001733632878415515, "loss": 23.0, "step": 3985 }, { "epoch": 2.3925570228091235, "grad_norm": 0.0023710643872618675, "learning_rate": 0.00017335044914951753, "loss": 23.0, "step": 3986 }, { "epoch": 2.393157262905162, "grad_norm": 0.0008908773888833821, "learning_rate": 0.00017333760783979466, "loss": 23.0, "step": 3987 }, { "epoch": 2.3937575030012006, "grad_norm": 0.0027625104412436485, "learning_rate": 0.00017332476391284128, "loss": 23.0, "step": 3988 }, { "epoch": 2.394357743097239, "grad_norm": 0.002077084966003895, "learning_rate": 0.00017331191736911564, "loss": 23.0, "step": 3989 }, { "epoch": 2.3949579831932772, "grad_norm": 0.0020401370711624622, "learning_rate": 0.00017329906820907627, "loss": 23.0, "step": 3990 }, { "epoch": 2.395558223289316, "grad_norm": 0.00204063905403018, "learning_rate": 0.0001732862164331817, "loss": 23.0, "step": 3991 }, { "epoch": 2.3961584633853543, "grad_norm": 0.0018513710238039494, "learning_rate": 0.00017327336204189055, "loss": 23.0, "step": 3992 }, { "epoch": 2.3967587034813924, "grad_norm": 0.0022755179088562727, "learning_rate": 0.00017326050503566162, "loss": 23.0, "step": 3993 }, { "epoch": 2.397358943577431, "grad_norm": 0.0027569227386265993, "learning_rate": 0.00017324764541495373, "loss": 23.0, "step": 3994 }, { "epoch": 2.3979591836734695, "grad_norm": 0.0031000536400824785, "learning_rate": 0.00017323478318022578, "loss": 23.0, "step": 3995 }, { "epoch": 2.398559423769508, "grad_norm": 0.0016642792616039515, "learning_rate": 0.00017322191833193674, "loss": 23.0, "step": 3996 }, { "epoch": 2.399159663865546, "grad_norm": 0.0015426806639879942, "learning_rate": 0.00017320905087054585, "loss": 23.0, "step": 3997 }, { "epoch": 2.3997599039615847, "grad_norm": 0.0016276773530989885, "learning_rate": 0.00017319618079651226, "loss": 23.0, "step": 3998 }, { "epoch": 2.400360144057623, "grad_norm": 0.00199799588881433, "learning_rate": 0.0001731833081102952, "loss": 23.0, "step": 3999 }, { "epoch": 2.4009603841536613, "grad_norm": 0.001323958276771009, "learning_rate": 0.00017317043281235418, "loss": 23.0, "step": 4000 }, { "epoch": 2.4009603841536613, "eval_loss": 11.5, "eval_runtime": 5.4819, "eval_samples_per_second": 255.931, "eval_steps_per_second": 32.105, "step": 4000 } ], "logging_steps": 1, "max_steps": 16660, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1000, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 80021001510912.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }