{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 21768, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 1.1896751676677808, "learning_rate": 3.0581039755351686e-08, "loss": 0.3301, "step": 1 }, { "epoch": 0.0, "grad_norm": 1.0029278066787826, "learning_rate": 6.116207951070337e-08, "loss": 0.3915, "step": 2 }, { "epoch": 0.0, "grad_norm": 2.1703349053558765, "learning_rate": 9.174311926605506e-08, "loss": 0.6309, "step": 3 }, { "epoch": 0.0, "grad_norm": 0.9131179113708626, "learning_rate": 1.2232415902140674e-07, "loss": 0.275, "step": 4 }, { "epoch": 0.0, "grad_norm": 0.7030740475915367, "learning_rate": 1.5290519877675842e-07, "loss": 0.254, "step": 5 }, { "epoch": 0.0, "grad_norm": 4.848491169484364, "learning_rate": 1.8348623853211012e-07, "loss": 0.7207, "step": 6 }, { "epoch": 0.0, "grad_norm": 0.9297694531189934, "learning_rate": 2.140672782874618e-07, "loss": 0.2531, "step": 7 }, { "epoch": 0.0, "grad_norm": 1.4877978876141211, "learning_rate": 2.446483180428135e-07, "loss": 0.5763, "step": 8 }, { "epoch": 0.0, "grad_norm": 1.1114322130048153, "learning_rate": 2.752293577981652e-07, "loss": 0.4025, "step": 9 }, { "epoch": 0.0, "grad_norm": 0.9746775937464117, "learning_rate": 3.0581039755351683e-07, "loss": 0.2694, "step": 10 }, { "epoch": 0.0, "grad_norm": 0.9793302921045483, "learning_rate": 3.363914373088685e-07, "loss": 0.2089, "step": 11 }, { "epoch": 0.0, "grad_norm": 1.852204792652111, "learning_rate": 3.6697247706422023e-07, "loss": 0.5606, "step": 12 }, { "epoch": 0.0, "grad_norm": 0.9117889839097142, "learning_rate": 3.975535168195719e-07, "loss": 0.3266, "step": 13 }, { "epoch": 0.0, "grad_norm": 0.9864663310271774, "learning_rate": 4.281345565749236e-07, "loss": 0.3865, "step": 14 }, { "epoch": 0.0, "grad_norm": 2.506889830882708, "learning_rate": 4.587155963302753e-07, "loss": 0.7021, "step": 15 }, { "epoch": 0.0, "grad_norm": 1.7764826003955219, "learning_rate": 4.89296636085627e-07, "loss": 0.3314, "step": 16 }, { "epoch": 0.0, "grad_norm": 0.6380223564776178, "learning_rate": 5.198776758409786e-07, "loss": 0.2747, "step": 17 }, { "epoch": 0.0, "grad_norm": 1.12564089310686, "learning_rate": 5.504587155963304e-07, "loss": 0.3476, "step": 18 }, { "epoch": 0.0, "grad_norm": 1.3906186159580616, "learning_rate": 5.81039755351682e-07, "loss": 0.3971, "step": 19 }, { "epoch": 0.0, "grad_norm": 1.0055363674251718, "learning_rate": 6.116207951070337e-07, "loss": 0.3663, "step": 20 }, { "epoch": 0.0, "grad_norm": 1.1763220205271772, "learning_rate": 6.422018348623854e-07, "loss": 0.3956, "step": 21 }, { "epoch": 0.0, "grad_norm": 1.1771358477951004, "learning_rate": 6.72782874617737e-07, "loss": 0.3343, "step": 22 }, { "epoch": 0.0, "grad_norm": 0.8682733905020673, "learning_rate": 7.033639143730888e-07, "loss": 0.2151, "step": 23 }, { "epoch": 0.0, "grad_norm": 1.806191955924715, "learning_rate": 7.339449541284405e-07, "loss": 0.5262, "step": 24 }, { "epoch": 0.0, "grad_norm": 0.7920947235713969, "learning_rate": 7.645259938837921e-07, "loss": 0.3274, "step": 25 }, { "epoch": 0.0, "grad_norm": 1.3212618760626385, "learning_rate": 7.951070336391438e-07, "loss": 0.3825, "step": 26 }, { "epoch": 0.0, "grad_norm": 2.735046553729485, "learning_rate": 8.256880733944956e-07, "loss": 0.5674, "step": 27 }, { "epoch": 0.0, "grad_norm": 1.1243660525291819, "learning_rate": 8.562691131498472e-07, "loss": 0.3288, "step": 28 }, { "epoch": 0.0, "grad_norm": 1.0530811537369742, "learning_rate": 8.868501529051989e-07, "loss": 0.3487, "step": 29 }, { "epoch": 0.0, "grad_norm": 0.7831702324371507, "learning_rate": 9.174311926605506e-07, "loss": 0.2744, "step": 30 }, { "epoch": 0.0, "grad_norm": 1.054113376915378, "learning_rate": 9.480122324159022e-07, "loss": 0.3711, "step": 31 }, { "epoch": 0.0, "grad_norm": 1.7387637511827139, "learning_rate": 9.78593272171254e-07, "loss": 0.4615, "step": 32 }, { "epoch": 0.0, "grad_norm": 0.9137121198649181, "learning_rate": 1.0091743119266057e-06, "loss": 0.369, "step": 33 }, { "epoch": 0.0, "grad_norm": 1.9576881450474133, "learning_rate": 1.0397553516819571e-06, "loss": 0.3786, "step": 34 }, { "epoch": 0.0, "grad_norm": 1.332010783690538, "learning_rate": 1.070336391437309e-06, "loss": 0.4082, "step": 35 }, { "epoch": 0.0, "grad_norm": 0.7729452322569007, "learning_rate": 1.1009174311926608e-06, "loss": 0.3462, "step": 36 }, { "epoch": 0.0, "grad_norm": 1.3710484414254225, "learning_rate": 1.1314984709480122e-06, "loss": 0.3393, "step": 37 }, { "epoch": 0.0, "grad_norm": 0.8633454596814635, "learning_rate": 1.162079510703364e-06, "loss": 0.2741, "step": 38 }, { "epoch": 0.0, "grad_norm": 3.3227585736572496, "learning_rate": 1.1926605504587159e-06, "loss": 0.7708, "step": 39 }, { "epoch": 0.0, "grad_norm": 1.2254278901256441, "learning_rate": 1.2232415902140673e-06, "loss": 0.1408, "step": 40 }, { "epoch": 0.0, "grad_norm": 1.2658216734261158, "learning_rate": 1.253822629969419e-06, "loss": 0.3555, "step": 41 }, { "epoch": 0.0, "grad_norm": 1.8492790091748121, "learning_rate": 1.2844036697247707e-06, "loss": 0.5538, "step": 42 }, { "epoch": 0.0, "grad_norm": 0.6521772305476505, "learning_rate": 1.3149847094801224e-06, "loss": 0.189, "step": 43 }, { "epoch": 0.0, "grad_norm": 2.6694535297067086, "learning_rate": 1.345565749235474e-06, "loss": 0.434, "step": 44 }, { "epoch": 0.0, "grad_norm": 0.9831137254699878, "learning_rate": 1.3761467889908258e-06, "loss": 0.3866, "step": 45 }, { "epoch": 0.0, "grad_norm": 0.8810512708255911, "learning_rate": 1.4067278287461775e-06, "loss": 0.2705, "step": 46 }, { "epoch": 0.0, "grad_norm": 1.4724167975436921, "learning_rate": 1.437308868501529e-06, "loss": 0.4621, "step": 47 }, { "epoch": 0.0, "grad_norm": 1.7970235943355533, "learning_rate": 1.467889908256881e-06, "loss": 0.5871, "step": 48 }, { "epoch": 0.0, "grad_norm": 0.7397532216141224, "learning_rate": 1.4984709480122326e-06, "loss": 0.2425, "step": 49 }, { "epoch": 0.0, "grad_norm": 1.3531752520765654, "learning_rate": 1.5290519877675841e-06, "loss": 0.2837, "step": 50 }, { "epoch": 0.0, "grad_norm": 2.7721355044000253, "learning_rate": 1.559633027522936e-06, "loss": 0.7699, "step": 51 }, { "epoch": 0.0, "grad_norm": 2.6317311369397967, "learning_rate": 1.5902140672782875e-06, "loss": 0.6162, "step": 52 }, { "epoch": 0.0, "grad_norm": 0.7515064151592153, "learning_rate": 1.6207951070336392e-06, "loss": 0.2914, "step": 53 }, { "epoch": 0.0, "grad_norm": 1.7002464468128908, "learning_rate": 1.6513761467889911e-06, "loss": 0.5804, "step": 54 }, { "epoch": 0.0, "grad_norm": 0.9310827779153261, "learning_rate": 1.6819571865443426e-06, "loss": 0.187, "step": 55 }, { "epoch": 0.0, "grad_norm": 0.9453208569685909, "learning_rate": 1.7125382262996943e-06, "loss": 0.313, "step": 56 }, { "epoch": 0.0, "grad_norm": 1.3517635854510925, "learning_rate": 1.743119266055046e-06, "loss": 0.3995, "step": 57 }, { "epoch": 0.0, "grad_norm": 1.8537884159510714, "learning_rate": 1.7737003058103977e-06, "loss": 0.4677, "step": 58 }, { "epoch": 0.0, "grad_norm": 1.0516230279352063, "learning_rate": 1.8042813455657492e-06, "loss": 0.3637, "step": 59 }, { "epoch": 0.0, "grad_norm": 4.991851047334885, "learning_rate": 1.8348623853211011e-06, "loss": 0.776, "step": 60 }, { "epoch": 0.0, "grad_norm": 0.8244058625346203, "learning_rate": 1.8654434250764528e-06, "loss": 0.2856, "step": 61 }, { "epoch": 0.0, "grad_norm": 1.0513707917964694, "learning_rate": 1.8960244648318043e-06, "loss": 0.2224, "step": 62 }, { "epoch": 0.0, "grad_norm": 3.733463571230695, "learning_rate": 1.9266055045871564e-06, "loss": 0.7926, "step": 63 }, { "epoch": 0.0, "grad_norm": 1.4331701359581341, "learning_rate": 1.957186544342508e-06, "loss": 0.436, "step": 64 }, { "epoch": 0.0, "grad_norm": 0.968076559901853, "learning_rate": 1.9877675840978594e-06, "loss": 0.3406, "step": 65 }, { "epoch": 0.0, "grad_norm": 1.7069546427476723, "learning_rate": 2.0183486238532113e-06, "loss": 0.4375, "step": 66 }, { "epoch": 0.0, "grad_norm": 1.0824662002687286, "learning_rate": 2.048929663608563e-06, "loss": 0.1711, "step": 67 }, { "epoch": 0.0, "grad_norm": 1.287811817097283, "learning_rate": 2.0795107033639143e-06, "loss": 0.2932, "step": 68 }, { "epoch": 0.0, "grad_norm": 1.2058019894798737, "learning_rate": 2.110091743119266e-06, "loss": 0.3628, "step": 69 }, { "epoch": 0.0, "grad_norm": 1.9273038528574367, "learning_rate": 2.140672782874618e-06, "loss": 0.5324, "step": 70 }, { "epoch": 0.0, "grad_norm": 1.1423535036310135, "learning_rate": 2.1712538226299696e-06, "loss": 0.3543, "step": 71 }, { "epoch": 0.0, "grad_norm": 1.0949424822461358, "learning_rate": 2.2018348623853215e-06, "loss": 0.3636, "step": 72 }, { "epoch": 0.0, "grad_norm": 1.4549594179096172, "learning_rate": 2.232415902140673e-06, "loss": 0.2468, "step": 73 }, { "epoch": 0.0, "grad_norm": 0.8198094235029172, "learning_rate": 2.2629969418960245e-06, "loss": 0.2413, "step": 74 }, { "epoch": 0.0, "grad_norm": 1.6675227493932148, "learning_rate": 2.2935779816513764e-06, "loss": 0.4978, "step": 75 }, { "epoch": 0.0, "grad_norm": 1.446692978731366, "learning_rate": 2.324159021406728e-06, "loss": 0.4012, "step": 76 }, { "epoch": 0.0, "grad_norm": 1.4390957501457091, "learning_rate": 2.35474006116208e-06, "loss": 0.3624, "step": 77 }, { "epoch": 0.0, "grad_norm": 2.8151804511762015, "learning_rate": 2.3853211009174317e-06, "loss": 0.5645, "step": 78 }, { "epoch": 0.0, "grad_norm": 1.3426830160741372, "learning_rate": 2.415902140672783e-06, "loss": 0.2479, "step": 79 }, { "epoch": 0.0, "grad_norm": 1.2681889178622874, "learning_rate": 2.4464831804281347e-06, "loss": 0.3634, "step": 80 }, { "epoch": 0.0, "grad_norm": 1.1029696697831592, "learning_rate": 2.4770642201834866e-06, "loss": 0.3844, "step": 81 }, { "epoch": 0.0, "grad_norm": 2.1777977953320735, "learning_rate": 2.507645259938838e-06, "loss": 0.4662, "step": 82 }, { "epoch": 0.0, "grad_norm": 2.0930817836691333, "learning_rate": 2.5382262996941896e-06, "loss": 0.3283, "step": 83 }, { "epoch": 0.0, "grad_norm": 0.9618578596429419, "learning_rate": 2.5688073394495415e-06, "loss": 0.2871, "step": 84 }, { "epoch": 0.0, "grad_norm": 1.491372591620853, "learning_rate": 2.599388379204893e-06, "loss": 0.2788, "step": 85 }, { "epoch": 0.0, "grad_norm": 1.033756853768047, "learning_rate": 2.629969418960245e-06, "loss": 0.2904, "step": 86 }, { "epoch": 0.0, "grad_norm": 2.436778514618774, "learning_rate": 2.6605504587155968e-06, "loss": 0.5784, "step": 87 }, { "epoch": 0.0, "grad_norm": 1.093113945903888, "learning_rate": 2.691131498470948e-06, "loss": 0.3698, "step": 88 }, { "epoch": 0.0, "grad_norm": 1.0664203102086078, "learning_rate": 2.7217125382262998e-06, "loss": 0.3395, "step": 89 }, { "epoch": 0.0, "grad_norm": 1.4632709792867227, "learning_rate": 2.7522935779816517e-06, "loss": 0.3909, "step": 90 }, { "epoch": 0.0, "grad_norm": 3.7309198364057177, "learning_rate": 2.782874617737003e-06, "loss": 0.6458, "step": 91 }, { "epoch": 0.0, "grad_norm": 1.1422162518395853, "learning_rate": 2.813455657492355e-06, "loss": 0.3022, "step": 92 }, { "epoch": 0.0, "grad_norm": 1.6304582732152335, "learning_rate": 2.844036697247707e-06, "loss": 0.4352, "step": 93 }, { "epoch": 0.0, "grad_norm": 3.1863798722019894, "learning_rate": 2.874617737003058e-06, "loss": 0.6853, "step": 94 }, { "epoch": 0.0, "grad_norm": 0.9285198255375843, "learning_rate": 2.90519877675841e-06, "loss": 0.1771, "step": 95 }, { "epoch": 0.0, "grad_norm": 8.895422932528957, "learning_rate": 2.935779816513762e-06, "loss": 0.7249, "step": 96 }, { "epoch": 0.0, "grad_norm": 1.8827546621354185, "learning_rate": 2.9663608562691134e-06, "loss": 0.3843, "step": 97 }, { "epoch": 0.0, "grad_norm": 1.4098972750111525, "learning_rate": 2.9969418960244653e-06, "loss": 0.3115, "step": 98 }, { "epoch": 0.0, "grad_norm": 7.913707114335056, "learning_rate": 3.0275229357798168e-06, "loss": 0.6868, "step": 99 }, { "epoch": 0.0, "grad_norm": 1.204575966433304, "learning_rate": 3.0581039755351682e-06, "loss": 0.4232, "step": 100 }, { "epoch": 0.0, "grad_norm": 1.2922951097458462, "learning_rate": 3.08868501529052e-06, "loss": 0.2423, "step": 101 }, { "epoch": 0.0, "grad_norm": 1.1754783996270737, "learning_rate": 3.119266055045872e-06, "loss": 0.2179, "step": 102 }, { "epoch": 0.0, "grad_norm": 4.33751709349973, "learning_rate": 3.149847094801223e-06, "loss": 0.5415, "step": 103 }, { "epoch": 0.0, "grad_norm": 1.8223977037973127, "learning_rate": 3.180428134556575e-06, "loss": 0.3081, "step": 104 }, { "epoch": 0.0, "grad_norm": 1.6163955424306307, "learning_rate": 3.211009174311927e-06, "loss": 0.3782, "step": 105 }, { "epoch": 0.0, "grad_norm": 2.9741963944177927, "learning_rate": 3.2415902140672784e-06, "loss": 0.8341, "step": 106 }, { "epoch": 0.0, "grad_norm": 1.4032479374425009, "learning_rate": 3.2721712538226303e-06, "loss": 0.3134, "step": 107 }, { "epoch": 0.0, "grad_norm": 0.6730033777364434, "learning_rate": 3.3027522935779823e-06, "loss": 0.2241, "step": 108 }, { "epoch": 0.01, "grad_norm": 3.8738976822835736, "learning_rate": 3.3333333333333333e-06, "loss": 0.624, "step": 109 }, { "epoch": 0.01, "grad_norm": 1.399574507841844, "learning_rate": 3.3639143730886852e-06, "loss": 0.3419, "step": 110 }, { "epoch": 0.01, "grad_norm": 2.374032456694889, "learning_rate": 3.394495412844037e-06, "loss": 0.5837, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.920566353832276, "learning_rate": 3.4250764525993886e-06, "loss": 0.3732, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.5364781648366226, "learning_rate": 3.4556574923547405e-06, "loss": 0.3387, "step": 113 }, { "epoch": 0.01, "grad_norm": 0.6583827360487446, "learning_rate": 3.486238532110092e-06, "loss": 0.1138, "step": 114 }, { "epoch": 0.01, "grad_norm": 1.982102901622778, "learning_rate": 3.5168195718654435e-06, "loss": 0.5624, "step": 115 }, { "epoch": 0.01, "grad_norm": 0.7784898839165266, "learning_rate": 3.5474006116207954e-06, "loss": 0.302, "step": 116 }, { "epoch": 0.01, "grad_norm": 1.4422037410571615, "learning_rate": 3.5779816513761473e-06, "loss": 0.3872, "step": 117 }, { "epoch": 0.01, "grad_norm": 2.4858937811033655, "learning_rate": 3.6085626911314984e-06, "loss": 0.6512, "step": 118 }, { "epoch": 0.01, "grad_norm": 1.1312079116364202, "learning_rate": 3.6391437308868503e-06, "loss": 0.2889, "step": 119 }, { "epoch": 0.01, "grad_norm": 0.7188341571305118, "learning_rate": 3.6697247706422022e-06, "loss": 0.2514, "step": 120 }, { "epoch": 0.01, "grad_norm": 1.8181595950088452, "learning_rate": 3.7003058103975537e-06, "loss": 0.4464, "step": 121 }, { "epoch": 0.01, "grad_norm": 0.8359553931637378, "learning_rate": 3.7308868501529056e-06, "loss": 0.2763, "step": 122 }, { "epoch": 0.01, "grad_norm": 1.7071941676273454, "learning_rate": 3.7614678899082575e-06, "loss": 0.497, "step": 123 }, { "epoch": 0.01, "grad_norm": 0.91288758078914, "learning_rate": 3.7920489296636086e-06, "loss": 0.3321, "step": 124 }, { "epoch": 0.01, "grad_norm": 1.0036249295277695, "learning_rate": 3.8226299694189605e-06, "loss": 0.3437, "step": 125 }, { "epoch": 0.01, "grad_norm": 0.8191724463982983, "learning_rate": 3.853211009174313e-06, "loss": 0.2408, "step": 126 }, { "epoch": 0.01, "grad_norm": 2.194134844467503, "learning_rate": 3.8837920489296635e-06, "loss": 0.5882, "step": 127 }, { "epoch": 0.01, "grad_norm": 0.7138060615993572, "learning_rate": 3.914373088685016e-06, "loss": 0.2701, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.2306809170322444, "learning_rate": 3.944954128440367e-06, "loss": 0.4799, "step": 129 }, { "epoch": 0.01, "grad_norm": 2.734030366671196, "learning_rate": 3.975535168195719e-06, "loss": 0.8143, "step": 130 }, { "epoch": 0.01, "grad_norm": 0.813907380650915, "learning_rate": 4.00611620795107e-06, "loss": 0.2476, "step": 131 }, { "epoch": 0.01, "grad_norm": 0.9397904598149287, "learning_rate": 4.036697247706423e-06, "loss": 0.426, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.3889043403649437, "learning_rate": 4.067278287461774e-06, "loss": 0.4055, "step": 133 }, { "epoch": 0.01, "grad_norm": 0.6853913872092066, "learning_rate": 4.097859327217126e-06, "loss": 0.214, "step": 134 }, { "epoch": 0.01, "grad_norm": 2.608634645749338, "learning_rate": 4.128440366972478e-06, "loss": 0.7945, "step": 135 }, { "epoch": 0.01, "grad_norm": 0.8627309147561726, "learning_rate": 4.1590214067278286e-06, "loss": 0.3871, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.003891431102375, "learning_rate": 4.189602446483181e-06, "loss": 0.0966, "step": 137 }, { "epoch": 0.01, "grad_norm": 0.9150538390537358, "learning_rate": 4.220183486238532e-06, "loss": 0.4021, "step": 138 }, { "epoch": 0.01, "grad_norm": 0.7674051837465868, "learning_rate": 4.250764525993884e-06, "loss": 0.3073, "step": 139 }, { "epoch": 0.01, "grad_norm": 0.9959065873870295, "learning_rate": 4.281345565749236e-06, "loss": 0.3697, "step": 140 }, { "epoch": 0.01, "grad_norm": 0.8373783098933085, "learning_rate": 4.311926605504588e-06, "loss": 0.3278, "step": 141 }, { "epoch": 0.01, "grad_norm": 4.486915958162343, "learning_rate": 4.342507645259939e-06, "loss": 0.7064, "step": 142 }, { "epoch": 0.01, "grad_norm": 1.1654048996396578, "learning_rate": 4.373088685015291e-06, "loss": 0.4024, "step": 143 }, { "epoch": 0.01, "grad_norm": 0.7067746746186605, "learning_rate": 4.403669724770643e-06, "loss": 0.3566, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.84563082625386, "learning_rate": 4.4342507645259945e-06, "loss": 0.6108, "step": 145 }, { "epoch": 0.01, "grad_norm": 0.6681476122690627, "learning_rate": 4.464831804281346e-06, "loss": 0.1448, "step": 146 }, { "epoch": 0.01, "grad_norm": 1.0405232062853644, "learning_rate": 4.4954128440366975e-06, "loss": 0.3461, "step": 147 }, { "epoch": 0.01, "grad_norm": 0.7949934652837498, "learning_rate": 4.525993883792049e-06, "loss": 0.364, "step": 148 }, { "epoch": 0.01, "grad_norm": 1.0848711116616896, "learning_rate": 4.556574923547401e-06, "loss": 0.3632, "step": 149 }, { "epoch": 0.01, "grad_norm": 1.8959000993872266, "learning_rate": 4.587155963302753e-06, "loss": 0.4375, "step": 150 }, { "epoch": 0.01, "grad_norm": 1.0965700036512873, "learning_rate": 4.617737003058104e-06, "loss": 0.2331, "step": 151 }, { "epoch": 0.01, "grad_norm": 0.6315554425204258, "learning_rate": 4.648318042813456e-06, "loss": 0.2915, "step": 152 }, { "epoch": 0.01, "grad_norm": 1.8937871002407352, "learning_rate": 4.678899082568808e-06, "loss": 0.4949, "step": 153 }, { "epoch": 0.01, "grad_norm": 2.7295858132289914, "learning_rate": 4.70948012232416e-06, "loss": 0.5493, "step": 154 }, { "epoch": 0.01, "grad_norm": 1.4863363038682678, "learning_rate": 4.740061162079511e-06, "loss": 0.428, "step": 155 }, { "epoch": 0.01, "grad_norm": 0.7309226519349551, "learning_rate": 4.770642201834863e-06, "loss": 0.3548, "step": 156 }, { "epoch": 0.01, "grad_norm": 1.7476121950751375, "learning_rate": 4.801223241590214e-06, "loss": 0.2879, "step": 157 }, { "epoch": 0.01, "grad_norm": 0.8427509373166522, "learning_rate": 4.831804281345566e-06, "loss": 0.2142, "step": 158 }, { "epoch": 0.01, "grad_norm": 1.1557806766790273, "learning_rate": 4.862385321100918e-06, "loss": 0.3448, "step": 159 }, { "epoch": 0.01, "grad_norm": 1.9430914493078817, "learning_rate": 4.892966360856269e-06, "loss": 0.3876, "step": 160 }, { "epoch": 0.01, "grad_norm": 1.6970393626889206, "learning_rate": 4.923547400611622e-06, "loss": 0.4626, "step": 161 }, { "epoch": 0.01, "grad_norm": 0.9646979458527443, "learning_rate": 4.954128440366973e-06, "loss": 0.4257, "step": 162 }, { "epoch": 0.01, "grad_norm": 6.0685845081564675, "learning_rate": 4.984709480122325e-06, "loss": 0.4528, "step": 163 }, { "epoch": 0.01, "grad_norm": 0.6263312419143187, "learning_rate": 5.015290519877676e-06, "loss": 0.2578, "step": 164 }, { "epoch": 0.01, "grad_norm": 1.2354709901553151, "learning_rate": 5.045871559633028e-06, "loss": 0.4148, "step": 165 }, { "epoch": 0.01, "grad_norm": 2.1384651348668022, "learning_rate": 5.076452599388379e-06, "loss": 0.6045, "step": 166 }, { "epoch": 0.01, "grad_norm": 1.7593991316187028, "learning_rate": 5.1070336391437315e-06, "loss": 0.2988, "step": 167 }, { "epoch": 0.01, "grad_norm": 1.1070426511637235, "learning_rate": 5.137614678899083e-06, "loss": 0.3412, "step": 168 }, { "epoch": 0.01, "grad_norm": 16.755042729934523, "learning_rate": 5.168195718654435e-06, "loss": 0.834, "step": 169 }, { "epoch": 0.01, "grad_norm": 0.595942287487927, "learning_rate": 5.198776758409786e-06, "loss": 0.1784, "step": 170 }, { "epoch": 0.01, "grad_norm": 2.9781977459238878, "learning_rate": 5.229357798165137e-06, "loss": 0.4856, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.743707761854706, "learning_rate": 5.25993883792049e-06, "loss": 0.4108, "step": 172 }, { "epoch": 0.01, "grad_norm": 1.4335585377217397, "learning_rate": 5.290519877675841e-06, "loss": 0.2801, "step": 173 }, { "epoch": 0.01, "grad_norm": 3.268448706873278, "learning_rate": 5.3211009174311936e-06, "loss": 0.4064, "step": 174 }, { "epoch": 0.01, "grad_norm": 6.046064006825953, "learning_rate": 5.351681957186545e-06, "loss": 0.5894, "step": 175 }, { "epoch": 0.01, "grad_norm": 2.0907423816342123, "learning_rate": 5.382262996941896e-06, "loss": 0.3124, "step": 176 }, { "epoch": 0.01, "grad_norm": 1.0392059683210848, "learning_rate": 5.412844036697248e-06, "loss": 0.3503, "step": 177 }, { "epoch": 0.01, "grad_norm": 1.748876170924891, "learning_rate": 5.4434250764525995e-06, "loss": 0.5963, "step": 178 }, { "epoch": 0.01, "grad_norm": 0.7539196564526174, "learning_rate": 5.474006116207952e-06, "loss": 0.2244, "step": 179 }, { "epoch": 0.01, "grad_norm": 0.9629743745178759, "learning_rate": 5.504587155963303e-06, "loss": 0.2872, "step": 180 }, { "epoch": 0.01, "grad_norm": 4.06741027922091, "learning_rate": 5.535168195718656e-06, "loss": 0.6977, "step": 181 }, { "epoch": 0.01, "grad_norm": 1.9051407200342025, "learning_rate": 5.565749235474006e-06, "loss": 0.6034, "step": 182 }, { "epoch": 0.01, "grad_norm": 0.8719893024341318, "learning_rate": 5.596330275229358e-06, "loss": 0.329, "step": 183 }, { "epoch": 0.01, "grad_norm": 1.981439202429174, "learning_rate": 5.62691131498471e-06, "loss": 0.4728, "step": 184 }, { "epoch": 0.01, "grad_norm": 0.7483912688496023, "learning_rate": 5.657492354740062e-06, "loss": 0.2022, "step": 185 }, { "epoch": 0.01, "grad_norm": 1.9298684647192363, "learning_rate": 5.688073394495414e-06, "loss": 0.294, "step": 186 }, { "epoch": 0.01, "grad_norm": 1.1019510638621983, "learning_rate": 5.7186544342507654e-06, "loss": 0.4441, "step": 187 }, { "epoch": 0.01, "grad_norm": 1.0229267963551927, "learning_rate": 5.749235474006116e-06, "loss": 0.4189, "step": 188 }, { "epoch": 0.01, "grad_norm": 0.9956188357743009, "learning_rate": 5.7798165137614684e-06, "loss": 0.3087, "step": 189 }, { "epoch": 0.01, "grad_norm": 2.5129588509120038, "learning_rate": 5.81039755351682e-06, "loss": 0.6393, "step": 190 }, { "epoch": 0.01, "grad_norm": 0.6523042917995521, "learning_rate": 5.840978593272172e-06, "loss": 0.2511, "step": 191 }, { "epoch": 0.01, "grad_norm": 0.8102556333991116, "learning_rate": 5.871559633027524e-06, "loss": 0.4086, "step": 192 }, { "epoch": 0.01, "grad_norm": 1.0311729709467465, "learning_rate": 5.902140672782875e-06, "loss": 0.1756, "step": 193 }, { "epoch": 0.01, "grad_norm": 2.235756450108399, "learning_rate": 5.932721712538227e-06, "loss": 0.6144, "step": 194 }, { "epoch": 0.01, "grad_norm": 0.9232365776140682, "learning_rate": 5.963302752293578e-06, "loss": 0.3716, "step": 195 }, { "epoch": 0.01, "grad_norm": 0.8667312621772022, "learning_rate": 5.9938837920489305e-06, "loss": 0.3824, "step": 196 }, { "epoch": 0.01, "grad_norm": 0.8949942763725209, "learning_rate": 6.024464831804282e-06, "loss": 0.2913, "step": 197 }, { "epoch": 0.01, "grad_norm": 0.6167732198797761, "learning_rate": 6.0550458715596335e-06, "loss": 0.2619, "step": 198 }, { "epoch": 0.01, "grad_norm": 0.9634660569871618, "learning_rate": 6.085626911314986e-06, "loss": 0.3756, "step": 199 }, { "epoch": 0.01, "grad_norm": 1.5629625117733297, "learning_rate": 6.1162079510703365e-06, "loss": 0.5047, "step": 200 }, { "epoch": 0.01, "grad_norm": 0.7809240701431114, "learning_rate": 6.146788990825688e-06, "loss": 0.3506, "step": 201 }, { "epoch": 0.01, "grad_norm": 2.1375791584853383, "learning_rate": 6.17737003058104e-06, "loss": 0.6487, "step": 202 }, { "epoch": 0.01, "grad_norm": 0.7827517797926076, "learning_rate": 6.207951070336392e-06, "loss": 0.3578, "step": 203 }, { "epoch": 0.01, "grad_norm": 0.7016954304377935, "learning_rate": 6.238532110091744e-06, "loss": 0.2803, "step": 204 }, { "epoch": 0.01, "grad_norm": 1.0272677012145377, "learning_rate": 6.269113149847096e-06, "loss": 0.2799, "step": 205 }, { "epoch": 0.01, "grad_norm": 1.4383931929682603, "learning_rate": 6.299694189602446e-06, "loss": 0.4794, "step": 206 }, { "epoch": 0.01, "grad_norm": 0.8459267391845401, "learning_rate": 6.330275229357799e-06, "loss": 0.3339, "step": 207 }, { "epoch": 0.01, "grad_norm": 0.78025624675871, "learning_rate": 6.36085626911315e-06, "loss": 0.4105, "step": 208 }, { "epoch": 0.01, "grad_norm": 0.6623810180029133, "learning_rate": 6.391437308868502e-06, "loss": 0.1782, "step": 209 }, { "epoch": 0.01, "grad_norm": 0.7778848534162461, "learning_rate": 6.422018348623854e-06, "loss": 0.3507, "step": 210 }, { "epoch": 0.01, "grad_norm": 0.6560569098697357, "learning_rate": 6.452599388379206e-06, "loss": 0.3087, "step": 211 }, { "epoch": 0.01, "grad_norm": 1.7114425698835818, "learning_rate": 6.483180428134557e-06, "loss": 0.5154, "step": 212 }, { "epoch": 0.01, "grad_norm": 0.6889659100785178, "learning_rate": 6.513761467889908e-06, "loss": 0.2943, "step": 213 }, { "epoch": 0.01, "grad_norm": 2.2908756798050636, "learning_rate": 6.544342507645261e-06, "loss": 0.8062, "step": 214 }, { "epoch": 0.01, "grad_norm": 0.7967143432356965, "learning_rate": 6.574923547400612e-06, "loss": 0.3369, "step": 215 }, { "epoch": 0.01, "grad_norm": 0.5703746818280078, "learning_rate": 6.6055045871559645e-06, "loss": 0.2597, "step": 216 }, { "epoch": 0.01, "grad_norm": 1.1364167821993945, "learning_rate": 6.636085626911316e-06, "loss": 0.3605, "step": 217 }, { "epoch": 0.01, "grad_norm": 2.0648778888584127, "learning_rate": 6.666666666666667e-06, "loss": 0.6531, "step": 218 }, { "epoch": 0.01, "grad_norm": 0.6038783969023067, "learning_rate": 6.697247706422019e-06, "loss": 0.2848, "step": 219 }, { "epoch": 0.01, "grad_norm": 1.03834383898345, "learning_rate": 6.7278287461773705e-06, "loss": 0.4219, "step": 220 }, { "epoch": 0.01, "grad_norm": 1.9149126612284773, "learning_rate": 6.758409785932723e-06, "loss": 0.7638, "step": 221 }, { "epoch": 0.01, "grad_norm": 0.5476725930632746, "learning_rate": 6.788990825688074e-06, "loss": 0.2069, "step": 222 }, { "epoch": 0.01, "grad_norm": 0.6949295487001947, "learning_rate": 6.819571865443425e-06, "loss": 0.3383, "step": 223 }, { "epoch": 0.01, "grad_norm": 2.839944387833604, "learning_rate": 6.850152905198777e-06, "loss": 0.606, "step": 224 }, { "epoch": 0.01, "grad_norm": 0.7736955057458493, "learning_rate": 6.880733944954129e-06, "loss": 0.2625, "step": 225 }, { "epoch": 0.01, "grad_norm": 2.4174870123849326, "learning_rate": 6.911314984709481e-06, "loss": 0.8062, "step": 226 }, { "epoch": 0.01, "grad_norm": 0.8721822096067412, "learning_rate": 6.941896024464833e-06, "loss": 0.3815, "step": 227 }, { "epoch": 0.01, "grad_norm": 0.7188439344849527, "learning_rate": 6.972477064220184e-06, "loss": 0.2521, "step": 228 }, { "epoch": 0.01, "grad_norm": 2.377125921548495, "learning_rate": 7.0030581039755356e-06, "loss": 0.6094, "step": 229 }, { "epoch": 0.01, "grad_norm": 0.6777293145930288, "learning_rate": 7.033639143730887e-06, "loss": 0.2064, "step": 230 }, { "epoch": 0.01, "grad_norm": 0.7274148101358805, "learning_rate": 7.0642201834862385e-06, "loss": 0.3427, "step": 231 }, { "epoch": 0.01, "grad_norm": 1.228664566153047, "learning_rate": 7.094801223241591e-06, "loss": 0.3745, "step": 232 }, { "epoch": 0.01, "grad_norm": 2.3010104651391137, "learning_rate": 7.125382262996942e-06, "loss": 0.6975, "step": 233 }, { "epoch": 0.01, "grad_norm": 0.8441680062439891, "learning_rate": 7.155963302752295e-06, "loss": 0.3513, "step": 234 }, { "epoch": 0.01, "grad_norm": 1.0555790027411698, "learning_rate": 7.186544342507645e-06, "loss": 0.4059, "step": 235 }, { "epoch": 0.01, "grad_norm": 0.6459776115670207, "learning_rate": 7.217125382262997e-06, "loss": 0.1799, "step": 236 }, { "epoch": 0.01, "grad_norm": 0.7663421135004944, "learning_rate": 7.247706422018349e-06, "loss": 0.3065, "step": 237 }, { "epoch": 0.01, "grad_norm": 3.006211329319932, "learning_rate": 7.278287461773701e-06, "loss": 0.6771, "step": 238 }, { "epoch": 0.01, "grad_norm": 0.9203900291884897, "learning_rate": 7.308868501529053e-06, "loss": 0.4023, "step": 239 }, { "epoch": 0.01, "grad_norm": 0.8002503075901388, "learning_rate": 7.3394495412844045e-06, "loss": 0.3513, "step": 240 }, { "epoch": 0.01, "grad_norm": 2.5765533599687913, "learning_rate": 7.370030581039755e-06, "loss": 0.5199, "step": 241 }, { "epoch": 0.01, "grad_norm": 0.7466662650627771, "learning_rate": 7.4006116207951074e-06, "loss": 0.224, "step": 242 }, { "epoch": 0.01, "grad_norm": 0.9923797436053715, "learning_rate": 7.431192660550459e-06, "loss": 0.3222, "step": 243 }, { "epoch": 0.01, "grad_norm": 1.0301250651028047, "learning_rate": 7.461773700305811e-06, "loss": 0.4212, "step": 244 }, { "epoch": 0.01, "grad_norm": 2.3480764358410604, "learning_rate": 7.492354740061163e-06, "loss": 0.548, "step": 245 }, { "epoch": 0.01, "grad_norm": 0.7814353726760943, "learning_rate": 7.522935779816515e-06, "loss": 0.2655, "step": 246 }, { "epoch": 0.01, "grad_norm": 0.7208081461233595, "learning_rate": 7.553516819571866e-06, "loss": 0.3723, "step": 247 }, { "epoch": 0.01, "grad_norm": 1.110897860240016, "learning_rate": 7.584097859327217e-06, "loss": 0.0905, "step": 248 }, { "epoch": 0.01, "grad_norm": 0.7492195329269087, "learning_rate": 7.6146788990825695e-06, "loss": 0.3429, "step": 249 }, { "epoch": 0.01, "grad_norm": 1.1872761753193617, "learning_rate": 7.645259938837921e-06, "loss": 0.4847, "step": 250 }, { "epoch": 0.01, "grad_norm": 0.9754240277457226, "learning_rate": 7.675840978593273e-06, "loss": 0.3903, "step": 251 }, { "epoch": 0.01, "grad_norm": 0.9295037610052851, "learning_rate": 7.706422018348626e-06, "loss": 0.3871, "step": 252 }, { "epoch": 0.01, "grad_norm": 1.1093522955080313, "learning_rate": 7.737003058103975e-06, "loss": 0.5004, "step": 253 }, { "epoch": 0.01, "grad_norm": 1.0757285158880268, "learning_rate": 7.767584097859327e-06, "loss": 0.2359, "step": 254 }, { "epoch": 0.01, "grad_norm": 0.6359404616760351, "learning_rate": 7.79816513761468e-06, "loss": 0.2966, "step": 255 }, { "epoch": 0.01, "grad_norm": 2.156136784597946, "learning_rate": 7.828746177370032e-06, "loss": 0.8406, "step": 256 }, { "epoch": 0.01, "grad_norm": 1.1395863799096988, "learning_rate": 7.859327217125383e-06, "loss": 0.6139, "step": 257 }, { "epoch": 0.01, "grad_norm": 0.8198175381111662, "learning_rate": 7.889908256880735e-06, "loss": 0.2974, "step": 258 }, { "epoch": 0.01, "grad_norm": 0.7813166509177186, "learning_rate": 7.920489296636086e-06, "loss": 0.3595, "step": 259 }, { "epoch": 0.01, "grad_norm": 1.0388730778049184, "learning_rate": 7.951070336391438e-06, "loss": 0.3006, "step": 260 }, { "epoch": 0.01, "grad_norm": 1.5543601443420103, "learning_rate": 7.981651376146789e-06, "loss": 0.1913, "step": 261 }, { "epoch": 0.01, "grad_norm": 0.7933105741285864, "learning_rate": 8.01223241590214e-06, "loss": 0.4219, "step": 262 }, { "epoch": 0.01, "grad_norm": 0.7478939291210178, "learning_rate": 8.042813455657494e-06, "loss": 0.4328, "step": 263 }, { "epoch": 0.01, "grad_norm": 1.2206596211456053, "learning_rate": 8.073394495412845e-06, "loss": 0.1402, "step": 264 }, { "epoch": 0.01, "grad_norm": 0.8138014072241885, "learning_rate": 8.103975535168197e-06, "loss": 0.3852, "step": 265 }, { "epoch": 0.01, "grad_norm": 0.7242538877420546, "learning_rate": 8.134556574923548e-06, "loss": 0.3119, "step": 266 }, { "epoch": 0.01, "grad_norm": 0.8470395027208345, "learning_rate": 8.1651376146789e-06, "loss": 0.3194, "step": 267 }, { "epoch": 0.01, "grad_norm": 0.8269864606439408, "learning_rate": 8.195718654434251e-06, "loss": 0.3704, "step": 268 }, { "epoch": 0.01, "grad_norm": 1.4414825479093278, "learning_rate": 8.226299694189603e-06, "loss": 0.6307, "step": 269 }, { "epoch": 0.01, "grad_norm": 0.6648340963285406, "learning_rate": 8.256880733944956e-06, "loss": 0.1903, "step": 270 }, { "epoch": 0.01, "grad_norm": 0.6931424601917758, "learning_rate": 8.287461773700306e-06, "loss": 0.3134, "step": 271 }, { "epoch": 0.01, "grad_norm": 1.8853101134324053, "learning_rate": 8.318042813455657e-06, "loss": 0.6335, "step": 272 }, { "epoch": 0.01, "grad_norm": 1.1563705540376346, "learning_rate": 8.34862385321101e-06, "loss": 0.4397, "step": 273 }, { "epoch": 0.01, "grad_norm": 0.8124464000726055, "learning_rate": 8.379204892966362e-06, "loss": 0.3661, "step": 274 }, { "epoch": 0.01, "grad_norm": 0.8105427241463092, "learning_rate": 8.409785932721713e-06, "loss": 0.4586, "step": 275 }, { "epoch": 0.01, "grad_norm": 0.5357711109158649, "learning_rate": 8.440366972477065e-06, "loss": 0.2098, "step": 276 }, { "epoch": 0.01, "grad_norm": 1.5360722979697834, "learning_rate": 8.470948012232416e-06, "loss": 0.4153, "step": 277 }, { "epoch": 0.01, "grad_norm": 0.745352890390255, "learning_rate": 8.501529051987768e-06, "loss": 0.4, "step": 278 }, { "epoch": 0.01, "grad_norm": 1.4003080145679683, "learning_rate": 8.53211009174312e-06, "loss": 0.5465, "step": 279 }, { "epoch": 0.01, "grad_norm": 0.7829585221579652, "learning_rate": 8.562691131498472e-06, "loss": 0.3896, "step": 280 }, { "epoch": 0.01, "grad_norm": 0.8361081286916281, "learning_rate": 8.593272171253824e-06, "loss": 0.2742, "step": 281 }, { "epoch": 0.01, "grad_norm": 0.7728878594774012, "learning_rate": 8.623853211009175e-06, "loss": 0.3566, "step": 282 }, { "epoch": 0.01, "grad_norm": 0.7858461329116725, "learning_rate": 8.654434250764527e-06, "loss": 0.3368, "step": 283 }, { "epoch": 0.01, "grad_norm": 1.7053783257735307, "learning_rate": 8.685015290519878e-06, "loss": 0.5649, "step": 284 }, { "epoch": 0.01, "grad_norm": 0.9796624221853422, "learning_rate": 8.71559633027523e-06, "loss": 0.5042, "step": 285 }, { "epoch": 0.01, "grad_norm": 0.7502616242629604, "learning_rate": 8.746177370030581e-06, "loss": 0.3745, "step": 286 }, { "epoch": 0.01, "grad_norm": 0.7865669305134176, "learning_rate": 8.776758409785935e-06, "loss": 0.3314, "step": 287 }, { "epoch": 0.01, "grad_norm": 0.750507980180111, "learning_rate": 8.807339449541286e-06, "loss": 0.1672, "step": 288 }, { "epoch": 0.01, "grad_norm": 0.8574640040502772, "learning_rate": 8.837920489296636e-06, "loss": 0.4182, "step": 289 }, { "epoch": 0.01, "grad_norm": 0.9761410121659166, "learning_rate": 8.868501529051989e-06, "loss": 0.3252, "step": 290 }, { "epoch": 0.01, "grad_norm": 0.900373323607157, "learning_rate": 8.89908256880734e-06, "loss": 0.3753, "step": 291 }, { "epoch": 0.01, "grad_norm": 0.8811686643189814, "learning_rate": 8.929663608562692e-06, "loss": 0.4141, "step": 292 }, { "epoch": 0.01, "grad_norm": 1.6556180206640245, "learning_rate": 8.960244648318043e-06, "loss": 0.7313, "step": 293 }, { "epoch": 0.01, "grad_norm": 0.6197447614759172, "learning_rate": 8.990825688073395e-06, "loss": 0.2555, "step": 294 }, { "epoch": 0.01, "grad_norm": 0.6435016896033203, "learning_rate": 9.021406727828746e-06, "loss": 0.2879, "step": 295 }, { "epoch": 0.01, "grad_norm": 1.6833695167150602, "learning_rate": 9.051987767584098e-06, "loss": 0.6286, "step": 296 }, { "epoch": 0.01, "grad_norm": 1.4149862929273584, "learning_rate": 9.08256880733945e-06, "loss": 0.4438, "step": 297 }, { "epoch": 0.01, "grad_norm": 0.8498702304557568, "learning_rate": 9.113149847094803e-06, "loss": 0.3515, "step": 298 }, { "epoch": 0.01, "grad_norm": 0.8523773132344277, "learning_rate": 9.143730886850154e-06, "loss": 0.4076, "step": 299 }, { "epoch": 0.01, "grad_norm": 0.5624440781104854, "learning_rate": 9.174311926605506e-06, "loss": 0.1182, "step": 300 }, { "epoch": 0.01, "grad_norm": 0.8298233411139421, "learning_rate": 9.204892966360857e-06, "loss": 0.3659, "step": 301 }, { "epoch": 0.01, "grad_norm": 1.4104560359545986, "learning_rate": 9.235474006116209e-06, "loss": 0.405, "step": 302 }, { "epoch": 0.01, "grad_norm": 2.0032774097771338, "learning_rate": 9.26605504587156e-06, "loss": 0.4441, "step": 303 }, { "epoch": 0.01, "grad_norm": 0.748144233633211, "learning_rate": 9.296636085626912e-06, "loss": 0.3472, "step": 304 }, { "epoch": 0.01, "grad_norm": 2.418993936486022, "learning_rate": 9.327217125382265e-06, "loss": 0.7451, "step": 305 }, { "epoch": 0.01, "grad_norm": 0.6437626848442127, "learning_rate": 9.357798165137616e-06, "loss": 0.3129, "step": 306 }, { "epoch": 0.01, "grad_norm": 0.6505061969727871, "learning_rate": 9.388379204892966e-06, "loss": 0.2198, "step": 307 }, { "epoch": 0.01, "grad_norm": 2.3903685173655465, "learning_rate": 9.41896024464832e-06, "loss": 0.584, "step": 308 }, { "epoch": 0.01, "grad_norm": 1.8450947741650001, "learning_rate": 9.44954128440367e-06, "loss": 0.5697, "step": 309 }, { "epoch": 0.01, "grad_norm": 0.7760574948045484, "learning_rate": 9.480122324159022e-06, "loss": 0.2787, "step": 310 }, { "epoch": 0.01, "grad_norm": 1.0058871279995607, "learning_rate": 9.510703363914374e-06, "loss": 0.4273, "step": 311 }, { "epoch": 0.01, "grad_norm": 0.5370371077102385, "learning_rate": 9.541284403669727e-06, "loss": 0.1899, "step": 312 }, { "epoch": 0.01, "grad_norm": 0.8100499620762998, "learning_rate": 9.571865443425077e-06, "loss": 0.2656, "step": 313 }, { "epoch": 0.01, "grad_norm": 2.2157671474499936, "learning_rate": 9.602446483180428e-06, "loss": 0.4622, "step": 314 }, { "epoch": 0.01, "grad_norm": 2.4671811401365935, "learning_rate": 9.633027522935781e-06, "loss": 0.6748, "step": 315 }, { "epoch": 0.01, "grad_norm": 0.7547832083232264, "learning_rate": 9.663608562691133e-06, "loss": 0.2638, "step": 316 }, { "epoch": 0.01, "grad_norm": 1.9812979691121875, "learning_rate": 9.694189602446484e-06, "loss": 0.7611, "step": 317 }, { "epoch": 0.01, "grad_norm": 1.022682195549178, "learning_rate": 9.724770642201836e-06, "loss": 0.399, "step": 318 }, { "epoch": 0.01, "grad_norm": 0.6676515391118802, "learning_rate": 9.755351681957187e-06, "loss": 0.2653, "step": 319 }, { "epoch": 0.01, "grad_norm": 4.0568140936833395, "learning_rate": 9.785932721712539e-06, "loss": 0.4753, "step": 320 }, { "epoch": 0.01, "grad_norm": 2.587635635676488, "learning_rate": 9.81651376146789e-06, "loss": 0.8273, "step": 321 }, { "epoch": 0.01, "grad_norm": 0.7085024748208396, "learning_rate": 9.847094801223243e-06, "loss": 0.3223, "step": 322 }, { "epoch": 0.01, "grad_norm": 1.1549277159230098, "learning_rate": 9.877675840978595e-06, "loss": 0.3669, "step": 323 }, { "epoch": 0.01, "grad_norm": 2.319490537481074, "learning_rate": 9.908256880733946e-06, "loss": 0.6666, "step": 324 }, { "epoch": 0.01, "grad_norm": 0.960294437970511, "learning_rate": 9.938837920489298e-06, "loss": 0.3424, "step": 325 }, { "epoch": 0.01, "grad_norm": 1.2785524540762303, "learning_rate": 9.96941896024465e-06, "loss": 0.4015, "step": 326 }, { "epoch": 0.02, "grad_norm": 0.6456288537563728, "learning_rate": 1e-05, "loss": 0.2521, "step": 327 }, { "epoch": 0.02, "grad_norm": 0.759864669199683, "learning_rate": 1.0030581039755352e-05, "loss": 0.3502, "step": 328 }, { "epoch": 0.02, "grad_norm": 2.076650660134877, "learning_rate": 1.0061162079510704e-05, "loss": 0.5814, "step": 329 }, { "epoch": 0.02, "grad_norm": 0.637064214853258, "learning_rate": 1.0091743119266055e-05, "loss": 0.4012, "step": 330 }, { "epoch": 0.02, "grad_norm": 0.7931196364157752, "learning_rate": 1.0122324159021408e-05, "loss": 0.3373, "step": 331 }, { "epoch": 0.02, "grad_norm": 1.0645734733060122, "learning_rate": 1.0152905198776758e-05, "loss": 0.434, "step": 332 }, { "epoch": 0.02, "grad_norm": 0.6966006334314522, "learning_rate": 1.018348623853211e-05, "loss": 0.234, "step": 333 }, { "epoch": 0.02, "grad_norm": 0.7690031493695907, "learning_rate": 1.0214067278287463e-05, "loss": 0.3198, "step": 334 }, { "epoch": 0.02, "grad_norm": 1.1847149440874944, "learning_rate": 1.0244648318042814e-05, "loss": 0.4529, "step": 335 }, { "epoch": 0.02, "grad_norm": 1.5650718798411607, "learning_rate": 1.0275229357798166e-05, "loss": 0.4984, "step": 336 }, { "epoch": 0.02, "grad_norm": 0.6738602270311174, "learning_rate": 1.0305810397553517e-05, "loss": 0.339, "step": 337 }, { "epoch": 0.02, "grad_norm": 0.7598539416526311, "learning_rate": 1.033639143730887e-05, "loss": 0.3975, "step": 338 }, { "epoch": 0.02, "grad_norm": 0.6076053808679849, "learning_rate": 1.036697247706422e-05, "loss": 0.1222, "step": 339 }, { "epoch": 0.02, "grad_norm": 0.7186668506285876, "learning_rate": 1.0397553516819572e-05, "loss": 0.3186, "step": 340 }, { "epoch": 0.02, "grad_norm": 2.2237523565383626, "learning_rate": 1.0428134556574925e-05, "loss": 0.73, "step": 341 }, { "epoch": 0.02, "grad_norm": 1.0291621282285297, "learning_rate": 1.0458715596330275e-05, "loss": 0.4066, "step": 342 }, { "epoch": 0.02, "grad_norm": 0.7359736072379032, "learning_rate": 1.0489296636085628e-05, "loss": 0.3533, "step": 343 }, { "epoch": 0.02, "grad_norm": 2.33749940059033, "learning_rate": 1.051987767584098e-05, "loss": 0.7251, "step": 344 }, { "epoch": 0.02, "grad_norm": 0.6416158029010471, "learning_rate": 1.055045871559633e-05, "loss": 0.2455, "step": 345 }, { "epoch": 0.02, "grad_norm": 0.6058298620506867, "learning_rate": 1.0581039755351682e-05, "loss": 0.2532, "step": 346 }, { "epoch": 0.02, "grad_norm": 1.0936915943993688, "learning_rate": 1.0611620795107034e-05, "loss": 0.4751, "step": 347 }, { "epoch": 0.02, "grad_norm": 1.2240821373266129, "learning_rate": 1.0642201834862387e-05, "loss": 0.558, "step": 348 }, { "epoch": 0.02, "grad_norm": 0.7564767772488803, "learning_rate": 1.0672782874617737e-05, "loss": 0.2557, "step": 349 }, { "epoch": 0.02, "grad_norm": 0.7336968297136265, "learning_rate": 1.070336391437309e-05, "loss": 0.3602, "step": 350 }, { "epoch": 0.02, "grad_norm": 0.8095160037694071, "learning_rate": 1.0733944954128442e-05, "loss": 0.306, "step": 351 }, { "epoch": 0.02, "grad_norm": 0.6639109354982137, "learning_rate": 1.0764525993883791e-05, "loss": 0.2604, "step": 352 }, { "epoch": 0.02, "grad_norm": 0.9584361311046328, "learning_rate": 1.0795107033639145e-05, "loss": 0.3934, "step": 353 }, { "epoch": 0.02, "grad_norm": 0.8109570842065387, "learning_rate": 1.0825688073394496e-05, "loss": 0.42, "step": 354 }, { "epoch": 0.02, "grad_norm": 0.7546490824345372, "learning_rate": 1.085626911314985e-05, "loss": 0.2424, "step": 355 }, { "epoch": 0.02, "grad_norm": 2.2379281967102345, "learning_rate": 1.0886850152905199e-05, "loss": 0.7048, "step": 356 }, { "epoch": 0.02, "grad_norm": 1.9752761586037637, "learning_rate": 1.091743119266055e-05, "loss": 0.8772, "step": 357 }, { "epoch": 0.02, "grad_norm": 0.6279187050026355, "learning_rate": 1.0948012232415904e-05, "loss": 0.3152, "step": 358 }, { "epoch": 0.02, "grad_norm": 0.7609205821095003, "learning_rate": 1.0978593272171254e-05, "loss": 0.2513, "step": 359 }, { "epoch": 0.02, "grad_norm": 0.8830514530017181, "learning_rate": 1.1009174311926607e-05, "loss": 0.4218, "step": 360 }, { "epoch": 0.02, "grad_norm": 0.7209664013982029, "learning_rate": 1.1039755351681958e-05, "loss": 0.3377, "step": 361 }, { "epoch": 0.02, "grad_norm": 0.8470943324736788, "learning_rate": 1.1070336391437311e-05, "loss": 0.326, "step": 362 }, { "epoch": 0.02, "grad_norm": 2.4407430843873543, "learning_rate": 1.1100917431192661e-05, "loss": 0.7243, "step": 363 }, { "epoch": 0.02, "grad_norm": 0.9412489307585115, "learning_rate": 1.1131498470948013e-05, "loss": 0.3216, "step": 364 }, { "epoch": 0.02, "grad_norm": 1.0119696792791166, "learning_rate": 1.1162079510703366e-05, "loss": 0.4486, "step": 365 }, { "epoch": 0.02, "grad_norm": 0.5471987353259963, "learning_rate": 1.1192660550458716e-05, "loss": 0.2975, "step": 366 }, { "epoch": 0.02, "grad_norm": 1.009739717556094, "learning_rate": 1.1223241590214069e-05, "loss": 0.3711, "step": 367 }, { "epoch": 0.02, "grad_norm": 1.0376472336023075, "learning_rate": 1.125382262996942e-05, "loss": 0.3671, "step": 368 }, { "epoch": 0.02, "grad_norm": 0.812724228520615, "learning_rate": 1.128440366972477e-05, "loss": 0.3484, "step": 369 }, { "epoch": 0.02, "grad_norm": 0.726029139719463, "learning_rate": 1.1314984709480123e-05, "loss": 0.3279, "step": 370 }, { "epoch": 0.02, "grad_norm": 1.0894714071167073, "learning_rate": 1.1345565749235475e-05, "loss": 0.4861, "step": 371 }, { "epoch": 0.02, "grad_norm": 0.9872371730419399, "learning_rate": 1.1376146788990828e-05, "loss": 0.1539, "step": 372 }, { "epoch": 0.02, "grad_norm": 0.8118300125124747, "learning_rate": 1.1406727828746178e-05, "loss": 0.3325, "step": 373 }, { "epoch": 0.02, "grad_norm": 0.8190293509812799, "learning_rate": 1.1437308868501531e-05, "loss": 0.3736, "step": 374 }, { "epoch": 0.02, "grad_norm": 1.417328509273749, "learning_rate": 1.1467889908256882e-05, "loss": 0.5584, "step": 375 }, { "epoch": 0.02, "grad_norm": 1.6513182871456002, "learning_rate": 1.1498470948012232e-05, "loss": 0.3738, "step": 376 }, { "epoch": 0.02, "grad_norm": 0.9677129715153956, "learning_rate": 1.1529051987767585e-05, "loss": 0.4837, "step": 377 }, { "epoch": 0.02, "grad_norm": 0.6074026460827666, "learning_rate": 1.1559633027522937e-05, "loss": 0.2104, "step": 378 }, { "epoch": 0.02, "grad_norm": 0.9201543615627558, "learning_rate": 1.159021406727829e-05, "loss": 0.3436, "step": 379 }, { "epoch": 0.02, "grad_norm": 1.059686538958258, "learning_rate": 1.162079510703364e-05, "loss": 0.4311, "step": 380 }, { "epoch": 0.02, "grad_norm": 0.9011482100134448, "learning_rate": 1.1651376146788991e-05, "loss": 0.4742, "step": 381 }, { "epoch": 0.02, "grad_norm": 0.9137900547996819, "learning_rate": 1.1681957186544344e-05, "loss": 0.3068, "step": 382 }, { "epoch": 0.02, "grad_norm": 1.0100750082919883, "learning_rate": 1.1712538226299694e-05, "loss": 0.4867, "step": 383 }, { "epoch": 0.02, "grad_norm": 0.5061832127642694, "learning_rate": 1.1743119266055047e-05, "loss": 0.2197, "step": 384 }, { "epoch": 0.02, "grad_norm": 0.8539484631601367, "learning_rate": 1.1773700305810399e-05, "loss": 0.0943, "step": 385 }, { "epoch": 0.02, "grad_norm": 0.8151997765387853, "learning_rate": 1.180428134556575e-05, "loss": 0.3545, "step": 386 }, { "epoch": 0.02, "grad_norm": 1.8597978147295617, "learning_rate": 1.1834862385321102e-05, "loss": 0.5969, "step": 387 }, { "epoch": 0.02, "grad_norm": 1.009957260666378, "learning_rate": 1.1865443425076453e-05, "loss": 0.3068, "step": 388 }, { "epoch": 0.02, "grad_norm": 0.6987589274442721, "learning_rate": 1.1896024464831805e-05, "loss": 0.3651, "step": 389 }, { "epoch": 0.02, "grad_norm": 1.0381633184548078, "learning_rate": 1.1926605504587156e-05, "loss": 0.4402, "step": 390 }, { "epoch": 0.02, "grad_norm": 0.6409620176569877, "learning_rate": 1.195718654434251e-05, "loss": 0.1702, "step": 391 }, { "epoch": 0.02, "grad_norm": 1.0353341950998738, "learning_rate": 1.1987767584097861e-05, "loss": 0.436, "step": 392 }, { "epoch": 0.02, "grad_norm": 1.4944905572706597, "learning_rate": 1.2018348623853211e-05, "loss": 0.4473, "step": 393 }, { "epoch": 0.02, "grad_norm": 1.1226048672566518, "learning_rate": 1.2048929663608564e-05, "loss": 0.4166, "step": 394 }, { "epoch": 0.02, "grad_norm": 1.0359219477391735, "learning_rate": 1.2079510703363916e-05, "loss": 0.3408, "step": 395 }, { "epoch": 0.02, "grad_norm": 0.8237632127378013, "learning_rate": 1.2110091743119267e-05, "loss": 0.3221, "step": 396 }, { "epoch": 0.02, "grad_norm": 0.6501662996700841, "learning_rate": 1.2140672782874619e-05, "loss": 0.2828, "step": 397 }, { "epoch": 0.02, "grad_norm": 0.8528311194744408, "learning_rate": 1.2171253822629972e-05, "loss": 0.3213, "step": 398 }, { "epoch": 0.02, "grad_norm": 2.0044968679659902, "learning_rate": 1.2201834862385321e-05, "loss": 0.6022, "step": 399 }, { "epoch": 0.02, "grad_norm": 1.005543914958151, "learning_rate": 1.2232415902140673e-05, "loss": 0.3743, "step": 400 }, { "epoch": 0.02, "grad_norm": 1.001992040492003, "learning_rate": 1.2262996941896026e-05, "loss": 0.308, "step": 401 }, { "epoch": 0.02, "grad_norm": 0.6844145238324899, "learning_rate": 1.2293577981651376e-05, "loss": 0.3827, "step": 402 }, { "epoch": 0.02, "grad_norm": 0.7712804353737467, "learning_rate": 1.2324159021406729e-05, "loss": 0.1784, "step": 403 }, { "epoch": 0.02, "grad_norm": 0.927226474041373, "learning_rate": 1.235474006116208e-05, "loss": 0.3421, "step": 404 }, { "epoch": 0.02, "grad_norm": 0.9874817567285509, "learning_rate": 1.238532110091743e-05, "loss": 0.4119, "step": 405 }, { "epoch": 0.02, "grad_norm": 1.2850100671559304, "learning_rate": 1.2415902140672784e-05, "loss": 0.5462, "step": 406 }, { "epoch": 0.02, "grad_norm": 0.7945217936225786, "learning_rate": 1.2446483180428135e-05, "loss": 0.3457, "step": 407 }, { "epoch": 0.02, "grad_norm": 1.7028886133352672, "learning_rate": 1.2477064220183488e-05, "loss": 0.3181, "step": 408 }, { "epoch": 0.02, "grad_norm": 0.5108561208138009, "learning_rate": 1.2507645259938838e-05, "loss": 0.2417, "step": 409 }, { "epoch": 0.02, "grad_norm": 0.7507026872828835, "learning_rate": 1.2538226299694191e-05, "loss": 0.3939, "step": 410 }, { "epoch": 0.02, "grad_norm": 1.634793257460326, "learning_rate": 1.2568807339449543e-05, "loss": 0.515, "step": 411 }, { "epoch": 0.02, "grad_norm": 1.1705692756241106, "learning_rate": 1.2599388379204893e-05, "loss": 0.3827, "step": 412 }, { "epoch": 0.02, "grad_norm": 0.704842009959545, "learning_rate": 1.2629969418960246e-05, "loss": 0.3675, "step": 413 }, { "epoch": 0.02, "grad_norm": 0.9546621527533904, "learning_rate": 1.2660550458715597e-05, "loss": 0.3704, "step": 414 }, { "epoch": 0.02, "grad_norm": 0.6866097869958888, "learning_rate": 1.269113149847095e-05, "loss": 0.3019, "step": 415 }, { "epoch": 0.02, "grad_norm": 0.788637594215398, "learning_rate": 1.27217125382263e-05, "loss": 0.3802, "step": 416 }, { "epoch": 0.02, "grad_norm": 0.6524731806834094, "learning_rate": 1.2752293577981652e-05, "loss": 0.3276, "step": 417 }, { "epoch": 0.02, "grad_norm": 0.9544698235194905, "learning_rate": 1.2782874617737005e-05, "loss": 0.4293, "step": 418 }, { "epoch": 0.02, "grad_norm": 0.7443026046479974, "learning_rate": 1.2813455657492355e-05, "loss": 0.3396, "step": 419 }, { "epoch": 0.02, "grad_norm": 1.659484928471325, "learning_rate": 1.2844036697247708e-05, "loss": 0.761, "step": 420 }, { "epoch": 0.02, "grad_norm": 0.6879528706866395, "learning_rate": 1.287461773700306e-05, "loss": 0.3629, "step": 421 }, { "epoch": 0.02, "grad_norm": 0.7196086203688604, "learning_rate": 1.2905198776758412e-05, "loss": 0.3475, "step": 422 }, { "epoch": 0.02, "grad_norm": 0.5591856445738644, "learning_rate": 1.2935779816513762e-05, "loss": 0.248, "step": 423 }, { "epoch": 0.02, "grad_norm": 0.9675577284207191, "learning_rate": 1.2966360856269114e-05, "loss": 0.3126, "step": 424 }, { "epoch": 0.02, "grad_norm": 0.7340575606616077, "learning_rate": 1.2996941896024467e-05, "loss": 0.3291, "step": 425 }, { "epoch": 0.02, "grad_norm": 1.1196690378481349, "learning_rate": 1.3027522935779817e-05, "loss": 0.4513, "step": 426 }, { "epoch": 0.02, "grad_norm": 0.9605708680285826, "learning_rate": 1.305810397553517e-05, "loss": 0.4623, "step": 427 }, { "epoch": 0.02, "grad_norm": 0.667672776088915, "learning_rate": 1.3088685015290521e-05, "loss": 0.3887, "step": 428 }, { "epoch": 0.02, "grad_norm": 0.5978278305226283, "learning_rate": 1.3119266055045871e-05, "loss": 0.2308, "step": 429 }, { "epoch": 0.02, "grad_norm": 0.813447611175994, "learning_rate": 1.3149847094801224e-05, "loss": 0.3384, "step": 430 }, { "epoch": 0.02, "grad_norm": 0.8006386077076075, "learning_rate": 1.3180428134556576e-05, "loss": 0.3553, "step": 431 }, { "epoch": 0.02, "grad_norm": 1.5360859572371073, "learning_rate": 1.3211009174311929e-05, "loss": 0.5853, "step": 432 }, { "epoch": 0.02, "grad_norm": 0.586386662347721, "learning_rate": 1.3241590214067279e-05, "loss": 0.3862, "step": 433 }, { "epoch": 0.02, "grad_norm": 0.755074881509733, "learning_rate": 1.3272171253822632e-05, "loss": 0.3108, "step": 434 }, { "epoch": 0.02, "grad_norm": 0.5473555246521312, "learning_rate": 1.3302752293577984e-05, "loss": 0.2196, "step": 435 }, { "epoch": 0.02, "grad_norm": 1.1672039223034718, "learning_rate": 1.3333333333333333e-05, "loss": 0.5633, "step": 436 }, { "epoch": 0.02, "grad_norm": 0.606662784642807, "learning_rate": 1.3363914373088686e-05, "loss": 0.2706, "step": 437 }, { "epoch": 0.02, "grad_norm": 1.2423080988451263, "learning_rate": 1.3394495412844038e-05, "loss": 0.4528, "step": 438 }, { "epoch": 0.02, "grad_norm": 1.1074032355173484, "learning_rate": 1.3425076452599391e-05, "loss": 0.5942, "step": 439 }, { "epoch": 0.02, "grad_norm": 0.645449711454087, "learning_rate": 1.3455657492354741e-05, "loss": 0.2469, "step": 440 }, { "epoch": 0.02, "grad_norm": 0.4847833719261981, "learning_rate": 1.3486238532110092e-05, "loss": 0.2742, "step": 441 }, { "epoch": 0.02, "grad_norm": 2.1431202184548037, "learning_rate": 1.3516819571865446e-05, "loss": 0.8556, "step": 442 }, { "epoch": 0.02, "grad_norm": 0.6514009317026143, "learning_rate": 1.3547400611620795e-05, "loss": 0.2511, "step": 443 }, { "epoch": 0.02, "grad_norm": 1.4354544132986642, "learning_rate": 1.3577981651376149e-05, "loss": 0.5671, "step": 444 }, { "epoch": 0.02, "grad_norm": 0.7239924956114187, "learning_rate": 1.36085626911315e-05, "loss": 0.3976, "step": 445 }, { "epoch": 0.02, "grad_norm": 0.7279374850402145, "learning_rate": 1.363914373088685e-05, "loss": 0.334, "step": 446 }, { "epoch": 0.02, "grad_norm": 0.5945817494147503, "learning_rate": 1.3669724770642203e-05, "loss": 0.1589, "step": 447 }, { "epoch": 0.02, "grad_norm": 1.1648940925457805, "learning_rate": 1.3700305810397555e-05, "loss": 0.42, "step": 448 }, { "epoch": 0.02, "grad_norm": 0.7809969399888335, "learning_rate": 1.3730886850152908e-05, "loss": 0.3331, "step": 449 }, { "epoch": 0.02, "grad_norm": 1.192381980515498, "learning_rate": 1.3761467889908258e-05, "loss": 0.3608, "step": 450 }, { "epoch": 0.02, "grad_norm": 1.301344636000289, "learning_rate": 1.379204892966361e-05, "loss": 0.6585, "step": 451 }, { "epoch": 0.02, "grad_norm": 0.7395911814833751, "learning_rate": 1.3822629969418962e-05, "loss": 0.3299, "step": 452 }, { "epoch": 0.02, "grad_norm": 0.7770700230037193, "learning_rate": 1.3853211009174312e-05, "loss": 0.3101, "step": 453 }, { "epoch": 0.02, "grad_norm": 1.3610907448825111, "learning_rate": 1.3883792048929665e-05, "loss": 0.5689, "step": 454 }, { "epoch": 0.02, "grad_norm": 0.7271017440606534, "learning_rate": 1.3914373088685017e-05, "loss": 0.3369, "step": 455 }, { "epoch": 0.02, "grad_norm": 1.3096426775910195, "learning_rate": 1.3944954128440368e-05, "loss": 0.3313, "step": 456 }, { "epoch": 0.02, "grad_norm": 0.8885430971695621, "learning_rate": 1.397553516819572e-05, "loss": 0.3831, "step": 457 }, { "epoch": 0.02, "grad_norm": 0.6769653298219982, "learning_rate": 1.4006116207951071e-05, "loss": 0.3139, "step": 458 }, { "epoch": 0.02, "grad_norm": 1.7947572154075946, "learning_rate": 1.4036697247706423e-05, "loss": 0.7653, "step": 459 }, { "epoch": 0.02, "grad_norm": 0.8755100459118086, "learning_rate": 1.4067278287461774e-05, "loss": 0.358, "step": 460 }, { "epoch": 0.02, "grad_norm": 0.710535014517863, "learning_rate": 1.4097859327217127e-05, "loss": 0.3158, "step": 461 }, { "epoch": 0.02, "grad_norm": 0.8693653692462091, "learning_rate": 1.4128440366972477e-05, "loss": 0.4248, "step": 462 }, { "epoch": 0.02, "grad_norm": 0.8776195046761086, "learning_rate": 1.415902140672783e-05, "loss": 0.1549, "step": 463 }, { "epoch": 0.02, "grad_norm": 0.6691649362322377, "learning_rate": 1.4189602446483182e-05, "loss": 0.3263, "step": 464 }, { "epoch": 0.02, "grad_norm": 0.8216060926424095, "learning_rate": 1.4220183486238533e-05, "loss": 0.3765, "step": 465 }, { "epoch": 0.02, "grad_norm": 1.089918844569685, "learning_rate": 1.4250764525993885e-05, "loss": 0.5202, "step": 466 }, { "epoch": 0.02, "grad_norm": 0.7602998809240603, "learning_rate": 1.4281345565749236e-05, "loss": 0.3625, "step": 467 }, { "epoch": 0.02, "grad_norm": 1.2952893981457272, "learning_rate": 1.431192660550459e-05, "loss": 0.6468, "step": 468 }, { "epoch": 0.02, "grad_norm": 0.5256011312783779, "learning_rate": 1.434250764525994e-05, "loss": 0.2423, "step": 469 }, { "epoch": 0.02, "grad_norm": 0.7293705482607076, "learning_rate": 1.437308868501529e-05, "loss": 0.3174, "step": 470 }, { "epoch": 0.02, "grad_norm": 1.0463625981203963, "learning_rate": 1.4403669724770644e-05, "loss": 0.4852, "step": 471 }, { "epoch": 0.02, "grad_norm": 0.9194004666491546, "learning_rate": 1.4434250764525994e-05, "loss": 0.4247, "step": 472 }, { "epoch": 0.02, "grad_norm": 1.2220454533289145, "learning_rate": 1.4464831804281347e-05, "loss": 0.288, "step": 473 }, { "epoch": 0.02, "grad_norm": 0.8731377686752274, "learning_rate": 1.4495412844036698e-05, "loss": 0.4758, "step": 474 }, { "epoch": 0.02, "grad_norm": 6.683467004572762, "learning_rate": 1.4525993883792051e-05, "loss": 0.2428, "step": 475 }, { "epoch": 0.02, "grad_norm": 2.980093308442375, "learning_rate": 1.4556574923547401e-05, "loss": 0.2708, "step": 476 }, { "epoch": 0.02, "grad_norm": 1.1700476361636072, "learning_rate": 1.4587155963302753e-05, "loss": 0.3966, "step": 477 }, { "epoch": 0.02, "grad_norm": 2.468004893696746, "learning_rate": 1.4617737003058106e-05, "loss": 0.5948, "step": 478 }, { "epoch": 0.02, "grad_norm": 1.7645683586829224, "learning_rate": 1.4648318042813456e-05, "loss": 0.346, "step": 479 }, { "epoch": 0.02, "grad_norm": 1.2849940490044633, "learning_rate": 1.4678899082568809e-05, "loss": 0.4757, "step": 480 }, { "epoch": 0.02, "grad_norm": 1.961676230979329, "learning_rate": 1.470948012232416e-05, "loss": 0.2653, "step": 481 }, { "epoch": 0.02, "grad_norm": 1.2913928380581234, "learning_rate": 1.474006116207951e-05, "loss": 0.3029, "step": 482 }, { "epoch": 0.02, "grad_norm": 1.5455869052683289, "learning_rate": 1.4770642201834863e-05, "loss": 0.7507, "step": 483 }, { "epoch": 0.02, "grad_norm": 1.3602876553679932, "learning_rate": 1.4801223241590215e-05, "loss": 0.5013, "step": 484 }, { "epoch": 0.02, "grad_norm": 0.9929300678353943, "learning_rate": 1.4831804281345568e-05, "loss": 0.3224, "step": 485 }, { "epoch": 0.02, "grad_norm": 1.1985581374787173, "learning_rate": 1.4862385321100918e-05, "loss": 0.3821, "step": 486 }, { "epoch": 0.02, "grad_norm": 1.8770875744301763, "learning_rate": 1.4892966360856271e-05, "loss": 0.2011, "step": 487 }, { "epoch": 0.02, "grad_norm": 1.282651345783927, "learning_rate": 1.4923547400611623e-05, "loss": 0.3054, "step": 488 }, { "epoch": 0.02, "grad_norm": 0.9533178221936187, "learning_rate": 1.4954128440366972e-05, "loss": 0.3556, "step": 489 }, { "epoch": 0.02, "grad_norm": 1.6014454797828033, "learning_rate": 1.4984709480122325e-05, "loss": 0.5903, "step": 490 }, { "epoch": 0.02, "grad_norm": 1.1202974185833243, "learning_rate": 1.5015290519877677e-05, "loss": 0.4219, "step": 491 }, { "epoch": 0.02, "grad_norm": 0.7783539187647316, "learning_rate": 1.504587155963303e-05, "loss": 0.3357, "step": 492 }, { "epoch": 0.02, "grad_norm": 1.2577640719206675, "learning_rate": 1.507645259938838e-05, "loss": 0.2763, "step": 493 }, { "epoch": 0.02, "grad_norm": 1.1266647794434705, "learning_rate": 1.5107033639143731e-05, "loss": 0.3716, "step": 494 }, { "epoch": 0.02, "grad_norm": 1.126172315991585, "learning_rate": 1.5137614678899085e-05, "loss": 0.428, "step": 495 }, { "epoch": 0.02, "grad_norm": 1.3099132658854535, "learning_rate": 1.5168195718654434e-05, "loss": 0.4324, "step": 496 }, { "epoch": 0.02, "grad_norm": 0.6815149750374333, "learning_rate": 1.5198776758409788e-05, "loss": 0.3325, "step": 497 }, { "epoch": 0.02, "grad_norm": 0.9174933663288322, "learning_rate": 1.5229357798165139e-05, "loss": 0.4853, "step": 498 }, { "epoch": 0.02, "grad_norm": 1.3981923289607003, "learning_rate": 1.5259938837920492e-05, "loss": 0.1075, "step": 499 }, { "epoch": 0.02, "grad_norm": 0.7327084755885843, "learning_rate": 1.5290519877675842e-05, "loss": 0.3344, "step": 500 }, { "epoch": 0.02, "grad_norm": 1.0327508058416586, "learning_rate": 1.5321100917431192e-05, "loss": 0.3957, "step": 501 }, { "epoch": 0.02, "grad_norm": 2.255494235382191, "learning_rate": 1.5351681957186545e-05, "loss": 0.547, "step": 502 }, { "epoch": 0.02, "grad_norm": 0.9140387628508145, "learning_rate": 1.5382262996941898e-05, "loss": 0.3239, "step": 503 }, { "epoch": 0.02, "grad_norm": 1.04989856294697, "learning_rate": 1.541284403669725e-05, "loss": 0.4339, "step": 504 }, { "epoch": 0.02, "grad_norm": 0.8812860317360276, "learning_rate": 1.54434250764526e-05, "loss": 0.2674, "step": 505 }, { "epoch": 0.02, "grad_norm": 1.831856893445324, "learning_rate": 1.547400611620795e-05, "loss": 0.4357, "step": 506 }, { "epoch": 0.02, "grad_norm": 1.4056752506608128, "learning_rate": 1.5504587155963304e-05, "loss": 0.4382, "step": 507 }, { "epoch": 0.02, "grad_norm": 0.872167252270934, "learning_rate": 1.5535168195718654e-05, "loss": 0.4046, "step": 508 }, { "epoch": 0.02, "grad_norm": 1.0399986025759034, "learning_rate": 1.5565749235474007e-05, "loss": 0.1933, "step": 509 }, { "epoch": 0.02, "grad_norm": 0.8528721395695101, "learning_rate": 1.559633027522936e-05, "loss": 0.3721, "step": 510 }, { "epoch": 0.02, "grad_norm": 0.9150225679257917, "learning_rate": 1.5626911314984713e-05, "loss": 0.3397, "step": 511 }, { "epoch": 0.02, "grad_norm": 0.8806655034618698, "learning_rate": 1.5657492354740063e-05, "loss": 0.2922, "step": 512 }, { "epoch": 0.02, "grad_norm": 0.8949009978261282, "learning_rate": 1.5688073394495413e-05, "loss": 0.3993, "step": 513 }, { "epoch": 0.02, "grad_norm": 0.9601737049681667, "learning_rate": 1.5718654434250766e-05, "loss": 0.2832, "step": 514 }, { "epoch": 0.02, "grad_norm": 0.875586350918399, "learning_rate": 1.5749235474006116e-05, "loss": 0.2977, "step": 515 }, { "epoch": 0.02, "grad_norm": 0.7570352003479348, "learning_rate": 1.577981651376147e-05, "loss": 0.3545, "step": 516 }, { "epoch": 0.02, "grad_norm": 1.5486300962026418, "learning_rate": 1.5810397553516822e-05, "loss": 0.6702, "step": 517 }, { "epoch": 0.02, "grad_norm": 0.7814457491381758, "learning_rate": 1.5840978593272172e-05, "loss": 0.3575, "step": 518 }, { "epoch": 0.02, "grad_norm": 0.9243364397273003, "learning_rate": 1.5871559633027525e-05, "loss": 0.4523, "step": 519 }, { "epoch": 0.02, "grad_norm": 0.6499495745889232, "learning_rate": 1.5902140672782875e-05, "loss": 0.2645, "step": 520 }, { "epoch": 0.02, "grad_norm": 1.2694639813178892, "learning_rate": 1.593272171253823e-05, "loss": 0.4202, "step": 521 }, { "epoch": 0.02, "grad_norm": 0.7599068489701073, "learning_rate": 1.5963302752293578e-05, "loss": 0.3055, "step": 522 }, { "epoch": 0.02, "grad_norm": 0.9785632275788022, "learning_rate": 1.599388379204893e-05, "loss": 0.5777, "step": 523 }, { "epoch": 0.02, "grad_norm": 0.6790897030633063, "learning_rate": 1.602446483180428e-05, "loss": 0.3636, "step": 524 }, { "epoch": 0.02, "grad_norm": 0.9140843119700909, "learning_rate": 1.6055045871559634e-05, "loss": 0.2676, "step": 525 }, { "epoch": 0.02, "grad_norm": 0.8736694215579709, "learning_rate": 1.6085626911314988e-05, "loss": 0.2191, "step": 526 }, { "epoch": 0.02, "grad_norm": 2.049300027295384, "learning_rate": 1.6116207951070337e-05, "loss": 0.7223, "step": 527 }, { "epoch": 0.02, "grad_norm": 0.6778432735183949, "learning_rate": 1.614678899082569e-05, "loss": 0.2883, "step": 528 }, { "epoch": 0.02, "grad_norm": 1.0919065066083935, "learning_rate": 1.617737003058104e-05, "loss": 0.5876, "step": 529 }, { "epoch": 0.02, "grad_norm": 0.9459794370883273, "learning_rate": 1.6207951070336393e-05, "loss": 0.432, "step": 530 }, { "epoch": 0.02, "grad_norm": 0.7737494415657642, "learning_rate": 1.6238532110091743e-05, "loss": 0.2162, "step": 531 }, { "epoch": 0.02, "grad_norm": 0.7317896407309216, "learning_rate": 1.6269113149847096e-05, "loss": 0.4076, "step": 532 }, { "epoch": 0.02, "grad_norm": 0.8745741302314319, "learning_rate": 1.629969418960245e-05, "loss": 0.2933, "step": 533 }, { "epoch": 0.02, "grad_norm": 0.7632785121254949, "learning_rate": 1.63302752293578e-05, "loss": 0.3399, "step": 534 }, { "epoch": 0.02, "grad_norm": 1.756992746168261, "learning_rate": 1.6360856269113153e-05, "loss": 0.5201, "step": 535 }, { "epoch": 0.02, "grad_norm": 0.7508433564040993, "learning_rate": 1.6391437308868502e-05, "loss": 0.3777, "step": 536 }, { "epoch": 0.02, "grad_norm": 0.7453562420281863, "learning_rate": 1.6422018348623852e-05, "loss": 0.3603, "step": 537 }, { "epoch": 0.02, "grad_norm": 0.9641580952876424, "learning_rate": 1.6452599388379205e-05, "loss": 0.2439, "step": 538 }, { "epoch": 0.02, "grad_norm": 0.98778493857445, "learning_rate": 1.648318042813456e-05, "loss": 0.2677, "step": 539 }, { "epoch": 0.02, "grad_norm": 1.0128604636517755, "learning_rate": 1.6513761467889912e-05, "loss": 0.3895, "step": 540 }, { "epoch": 0.02, "grad_norm": 1.0705232861062697, "learning_rate": 1.654434250764526e-05, "loss": 0.4209, "step": 541 }, { "epoch": 0.02, "grad_norm": 1.4487232667085705, "learning_rate": 1.657492354740061e-05, "loss": 0.4326, "step": 542 }, { "epoch": 0.02, "grad_norm": 0.7209177262689151, "learning_rate": 1.6605504587155964e-05, "loss": 0.359, "step": 543 }, { "epoch": 0.02, "grad_norm": 0.7321592298041024, "learning_rate": 1.6636085626911314e-05, "loss": 0.3459, "step": 544 }, { "epoch": 0.03, "grad_norm": 0.8578179072398843, "learning_rate": 1.6666666666666667e-05, "loss": 0.187, "step": 545 }, { "epoch": 0.03, "grad_norm": 0.6947439082731846, "learning_rate": 1.669724770642202e-05, "loss": 0.3227, "step": 546 }, { "epoch": 0.03, "grad_norm": 1.4462703552851028, "learning_rate": 1.672782874617737e-05, "loss": 0.5991, "step": 547 }, { "epoch": 0.03, "grad_norm": 0.7741927327467244, "learning_rate": 1.6758409785932724e-05, "loss": 0.3243, "step": 548 }, { "epoch": 0.03, "grad_norm": 0.6849085032519028, "learning_rate": 1.6788990825688073e-05, "loss": 0.2896, "step": 549 }, { "epoch": 0.03, "grad_norm": 2.202215894736286, "learning_rate": 1.6819571865443427e-05, "loss": 0.8234, "step": 550 }, { "epoch": 0.03, "grad_norm": 1.2074253208145473, "learning_rate": 1.6850152905198776e-05, "loss": 0.4511, "step": 551 }, { "epoch": 0.03, "grad_norm": 0.6959016215409739, "learning_rate": 1.688073394495413e-05, "loss": 0.3307, "step": 552 }, { "epoch": 0.03, "grad_norm": 0.8729185268229616, "learning_rate": 1.6911314984709483e-05, "loss": 0.3558, "step": 553 }, { "epoch": 0.03, "grad_norm": 0.46330907769153035, "learning_rate": 1.6941896024464833e-05, "loss": 0.1798, "step": 554 }, { "epoch": 0.03, "grad_norm": 0.9333485199345983, "learning_rate": 1.6972477064220186e-05, "loss": 0.3855, "step": 555 }, { "epoch": 0.03, "grad_norm": 0.8694759665134488, "learning_rate": 1.7003058103975536e-05, "loss": 0.413, "step": 556 }, { "epoch": 0.03, "grad_norm": 2.140666296733427, "learning_rate": 1.703363914373089e-05, "loss": 0.4751, "step": 557 }, { "epoch": 0.03, "grad_norm": 0.8099459746301703, "learning_rate": 1.706422018348624e-05, "loss": 0.3288, "step": 558 }, { "epoch": 0.03, "grad_norm": 0.6498983749150069, "learning_rate": 1.709480122324159e-05, "loss": 0.2493, "step": 559 }, { "epoch": 0.03, "grad_norm": 0.8525040848039012, "learning_rate": 1.7125382262996945e-05, "loss": 0.344, "step": 560 }, { "epoch": 0.03, "grad_norm": 0.7954311518183899, "learning_rate": 1.7155963302752295e-05, "loss": 0.2824, "step": 561 }, { "epoch": 0.03, "grad_norm": 2.227621674589291, "learning_rate": 1.7186544342507648e-05, "loss": 0.7956, "step": 562 }, { "epoch": 0.03, "grad_norm": 1.7872297509319615, "learning_rate": 1.7217125382262998e-05, "loss": 0.5998, "step": 563 }, { "epoch": 0.03, "grad_norm": 0.8126031175413392, "learning_rate": 1.724770642201835e-05, "loss": 0.2885, "step": 564 }, { "epoch": 0.03, "grad_norm": 0.7245772600476947, "learning_rate": 1.72782874617737e-05, "loss": 0.2604, "step": 565 }, { "epoch": 0.03, "grad_norm": 2.337651420009429, "learning_rate": 1.7308868501529054e-05, "loss": 0.8435, "step": 566 }, { "epoch": 0.03, "grad_norm": 1.1724205316352214, "learning_rate": 1.7339449541284407e-05, "loss": 0.2932, "step": 567 }, { "epoch": 0.03, "grad_norm": 1.1874596269657847, "learning_rate": 1.7370030581039757e-05, "loss": 0.3898, "step": 568 }, { "epoch": 0.03, "grad_norm": 2.6023961807171903, "learning_rate": 1.740061162079511e-05, "loss": 0.6172, "step": 569 }, { "epoch": 0.03, "grad_norm": 0.6433772543521216, "learning_rate": 1.743119266055046e-05, "loss": 0.2791, "step": 570 }, { "epoch": 0.03, "grad_norm": 0.5450377729561972, "learning_rate": 1.746177370030581e-05, "loss": 0.2039, "step": 571 }, { "epoch": 0.03, "grad_norm": 0.9061500979243665, "learning_rate": 1.7492354740061163e-05, "loss": 0.3976, "step": 572 }, { "epoch": 0.03, "grad_norm": 0.8488602857785469, "learning_rate": 1.7522935779816516e-05, "loss": 0.299, "step": 573 }, { "epoch": 0.03, "grad_norm": 4.250866575345858, "learning_rate": 1.755351681957187e-05, "loss": 0.5778, "step": 574 }, { "epoch": 0.03, "grad_norm": 1.2581616477898643, "learning_rate": 1.758409785932722e-05, "loss": 0.4166, "step": 575 }, { "epoch": 0.03, "grad_norm": 0.751805311621028, "learning_rate": 1.7614678899082572e-05, "loss": 0.3315, "step": 576 }, { "epoch": 0.03, "grad_norm": 0.661720626741957, "learning_rate": 1.7645259938837922e-05, "loss": 0.252, "step": 577 }, { "epoch": 0.03, "grad_norm": 0.8223523821071027, "learning_rate": 1.767584097859327e-05, "loss": 0.3536, "step": 578 }, { "epoch": 0.03, "grad_norm": 0.7458239397231019, "learning_rate": 1.7706422018348625e-05, "loss": 0.3387, "step": 579 }, { "epoch": 0.03, "grad_norm": 1.3601661321129337, "learning_rate": 1.7737003058103978e-05, "loss": 0.3918, "step": 580 }, { "epoch": 0.03, "grad_norm": 2.352395200191048, "learning_rate": 1.7767584097859328e-05, "loss": 0.7019, "step": 581 }, { "epoch": 0.03, "grad_norm": 0.6301710310142474, "learning_rate": 1.779816513761468e-05, "loss": 0.3119, "step": 582 }, { "epoch": 0.03, "grad_norm": 0.742062036460496, "learning_rate": 1.782874617737003e-05, "loss": 0.3369, "step": 583 }, { "epoch": 0.03, "grad_norm": 0.4965118762174356, "learning_rate": 1.7859327217125384e-05, "loss": 0.2229, "step": 584 }, { "epoch": 0.03, "grad_norm": 0.6249186191082478, "learning_rate": 1.7889908256880734e-05, "loss": 0.3162, "step": 585 }, { "epoch": 0.03, "grad_norm": 1.7517714202897434, "learning_rate": 1.7920489296636087e-05, "loss": 0.6236, "step": 586 }, { "epoch": 0.03, "grad_norm": 0.8637113701816764, "learning_rate": 1.795107033639144e-05, "loss": 0.3579, "step": 587 }, { "epoch": 0.03, "grad_norm": 0.6399565201846126, "learning_rate": 1.798165137614679e-05, "loss": 0.3067, "step": 588 }, { "epoch": 0.03, "grad_norm": 1.6765949487586396, "learning_rate": 1.8012232415902143e-05, "loss": 0.7398, "step": 589 }, { "epoch": 0.03, "grad_norm": 0.43464764170156306, "learning_rate": 1.8042813455657493e-05, "loss": 0.1918, "step": 590 }, { "epoch": 0.03, "grad_norm": 0.6504943597783651, "learning_rate": 1.8073394495412846e-05, "loss": 0.3636, "step": 591 }, { "epoch": 0.03, "grad_norm": 0.6509793922605535, "learning_rate": 1.8103975535168196e-05, "loss": 0.3744, "step": 592 }, { "epoch": 0.03, "grad_norm": 1.536685794905627, "learning_rate": 1.813455657492355e-05, "loss": 0.6188, "step": 593 }, { "epoch": 0.03, "grad_norm": 0.6697907990817732, "learning_rate": 1.81651376146789e-05, "loss": 0.319, "step": 594 }, { "epoch": 0.03, "grad_norm": 0.8474550155861955, "learning_rate": 1.8195718654434252e-05, "loss": 0.47, "step": 595 }, { "epoch": 0.03, "grad_norm": 0.46761997406859507, "learning_rate": 1.8226299694189605e-05, "loss": 0.2631, "step": 596 }, { "epoch": 0.03, "grad_norm": 0.6926844267337992, "learning_rate": 1.8256880733944955e-05, "loss": 0.3056, "step": 597 }, { "epoch": 0.03, "grad_norm": 1.0323910442931747, "learning_rate": 1.8287461773700308e-05, "loss": 0.4921, "step": 598 }, { "epoch": 0.03, "grad_norm": 0.6445634672872429, "learning_rate": 1.8318042813455658e-05, "loss": 0.4193, "step": 599 }, { "epoch": 0.03, "grad_norm": 0.5852737619260164, "learning_rate": 1.834862385321101e-05, "loss": 0.2272, "step": 600 }, { "epoch": 0.03, "grad_norm": 0.8889726388942958, "learning_rate": 1.837920489296636e-05, "loss": 0.4455, "step": 601 }, { "epoch": 0.03, "grad_norm": 0.5006919792552491, "learning_rate": 1.8409785932721714e-05, "loss": 0.2565, "step": 602 }, { "epoch": 0.03, "grad_norm": 0.5649618018359593, "learning_rate": 1.8440366972477067e-05, "loss": 0.3215, "step": 603 }, { "epoch": 0.03, "grad_norm": 0.6099383032481673, "learning_rate": 1.8470948012232417e-05, "loss": 0.4097, "step": 604 }, { "epoch": 0.03, "grad_norm": 1.8919272703625916, "learning_rate": 1.850152905198777e-05, "loss": 0.8647, "step": 605 }, { "epoch": 0.03, "grad_norm": 0.6096233623462712, "learning_rate": 1.853211009174312e-05, "loss": 0.273, "step": 606 }, { "epoch": 0.03, "grad_norm": 1.8445469276492599, "learning_rate": 1.856269113149847e-05, "loss": 0.7694, "step": 607 }, { "epoch": 0.03, "grad_norm": 0.5655396878029959, "learning_rate": 1.8593272171253823e-05, "loss": 0.4023, "step": 608 }, { "epoch": 0.03, "grad_norm": 0.7003000363944234, "learning_rate": 1.8623853211009176e-05, "loss": 0.4023, "step": 609 }, { "epoch": 0.03, "grad_norm": 0.4445205314233728, "learning_rate": 1.865443425076453e-05, "loss": 0.1796, "step": 610 }, { "epoch": 0.03, "grad_norm": 0.6898680598343396, "learning_rate": 1.868501529051988e-05, "loss": 0.3761, "step": 611 }, { "epoch": 0.03, "grad_norm": 1.3202911728885998, "learning_rate": 1.8715596330275232e-05, "loss": 0.4715, "step": 612 }, { "epoch": 0.03, "grad_norm": 0.646909924603647, "learning_rate": 1.8746177370030582e-05, "loss": 0.2493, "step": 613 }, { "epoch": 0.03, "grad_norm": 1.0767242189370316, "learning_rate": 1.8776758409785932e-05, "loss": 0.5681, "step": 614 }, { "epoch": 0.03, "grad_norm": 0.6858692085214954, "learning_rate": 1.8807339449541285e-05, "loss": 0.3159, "step": 615 }, { "epoch": 0.03, "grad_norm": 0.48385196454453877, "learning_rate": 1.883792048929664e-05, "loss": 0.2351, "step": 616 }, { "epoch": 0.03, "grad_norm": 1.8706918032724593, "learning_rate": 1.886850152905199e-05, "loss": 0.78, "step": 617 }, { "epoch": 0.03, "grad_norm": 0.9088513540672198, "learning_rate": 1.889908256880734e-05, "loss": 0.4208, "step": 618 }, { "epoch": 0.03, "grad_norm": 0.5565090932500756, "learning_rate": 1.892966360856269e-05, "loss": 0.3059, "step": 619 }, { "epoch": 0.03, "grad_norm": 0.9793562355809654, "learning_rate": 1.8960244648318044e-05, "loss": 0.5401, "step": 620 }, { "epoch": 0.03, "grad_norm": 0.8538753484296357, "learning_rate": 1.8990825688073394e-05, "loss": 0.4252, "step": 621 }, { "epoch": 0.03, "grad_norm": 0.46124442975487, "learning_rate": 1.9021406727828747e-05, "loss": 0.2288, "step": 622 }, { "epoch": 0.03, "grad_norm": 0.6789815026384124, "learning_rate": 1.90519877675841e-05, "loss": 0.3355, "step": 623 }, { "epoch": 0.03, "grad_norm": 0.7111122903322493, "learning_rate": 1.9082568807339454e-05, "loss": 0.3881, "step": 624 }, { "epoch": 0.03, "grad_norm": 0.8226479233299241, "learning_rate": 1.9113149847094803e-05, "loss": 0.4525, "step": 625 }, { "epoch": 0.03, "grad_norm": 1.6297482386105937, "learning_rate": 1.9143730886850153e-05, "loss": 0.5456, "step": 626 }, { "epoch": 0.03, "grad_norm": 0.5737852969913682, "learning_rate": 1.9174311926605506e-05, "loss": 0.3054, "step": 627 }, { "epoch": 0.03, "grad_norm": 0.9456525867708102, "learning_rate": 1.9204892966360856e-05, "loss": 0.3724, "step": 628 }, { "epoch": 0.03, "grad_norm": 0.6013425890007309, "learning_rate": 1.923547400611621e-05, "loss": 0.1528, "step": 629 }, { "epoch": 0.03, "grad_norm": 1.2536824530197315, "learning_rate": 1.9266055045871563e-05, "loss": 0.433, "step": 630 }, { "epoch": 0.03, "grad_norm": 0.7670470180194572, "learning_rate": 1.9296636085626912e-05, "loss": 0.3641, "step": 631 }, { "epoch": 0.03, "grad_norm": 1.3528669925857149, "learning_rate": 1.9327217125382266e-05, "loss": 0.4655, "step": 632 }, { "epoch": 0.03, "grad_norm": 1.1429141623250159, "learning_rate": 1.9357798165137615e-05, "loss": 0.4517, "step": 633 }, { "epoch": 0.03, "grad_norm": 0.5564169184424588, "learning_rate": 1.938837920489297e-05, "loss": 0.2639, "step": 634 }, { "epoch": 0.03, "grad_norm": 0.7186284871364891, "learning_rate": 1.9418960244648318e-05, "loss": 0.4095, "step": 635 }, { "epoch": 0.03, "grad_norm": 0.745074349973661, "learning_rate": 1.944954128440367e-05, "loss": 0.1654, "step": 636 }, { "epoch": 0.03, "grad_norm": 1.0342172010641828, "learning_rate": 1.9480122324159025e-05, "loss": 0.424, "step": 637 }, { "epoch": 0.03, "grad_norm": 1.696175127438202, "learning_rate": 1.9510703363914374e-05, "loss": 0.6544, "step": 638 }, { "epoch": 0.03, "grad_norm": 0.6218422534174443, "learning_rate": 1.9541284403669728e-05, "loss": 0.3074, "step": 639 }, { "epoch": 0.03, "grad_norm": 0.6671118160732972, "learning_rate": 1.9571865443425077e-05, "loss": 0.4002, "step": 640 }, { "epoch": 0.03, "grad_norm": 2.13420600308768, "learning_rate": 1.960244648318043e-05, "loss": 0.8405, "step": 641 }, { "epoch": 0.03, "grad_norm": 1.079641507540786, "learning_rate": 1.963302752293578e-05, "loss": 0.2949, "step": 642 }, { "epoch": 0.03, "grad_norm": 0.8236367982538636, "learning_rate": 1.9663608562691134e-05, "loss": 0.3382, "step": 643 }, { "epoch": 0.03, "grad_norm": 0.7314061796585635, "learning_rate": 1.9694189602446487e-05, "loss": 0.3797, "step": 644 }, { "epoch": 0.03, "grad_norm": 0.5909264738534531, "learning_rate": 1.9724770642201837e-05, "loss": 0.2392, "step": 645 }, { "epoch": 0.03, "grad_norm": 0.8683891194604073, "learning_rate": 1.975535168195719e-05, "loss": 0.4147, "step": 646 }, { "epoch": 0.03, "grad_norm": 0.8060486115298693, "learning_rate": 1.978593272171254e-05, "loss": 0.4212, "step": 647 }, { "epoch": 0.03, "grad_norm": 1.2932079610196028, "learning_rate": 1.9816513761467893e-05, "loss": 0.5297, "step": 648 }, { "epoch": 0.03, "grad_norm": 0.5747385443367162, "learning_rate": 1.9847094801223243e-05, "loss": 0.3293, "step": 649 }, { "epoch": 0.03, "grad_norm": 0.576213068735911, "learning_rate": 1.9877675840978596e-05, "loss": 0.2407, "step": 650 }, { "epoch": 0.03, "grad_norm": 0.7787385181904303, "learning_rate": 1.9908256880733945e-05, "loss": 0.3833, "step": 651 }, { "epoch": 0.03, "grad_norm": 0.6811038827845374, "learning_rate": 1.99388379204893e-05, "loss": 0.2424, "step": 652 }, { "epoch": 0.03, "grad_norm": 1.3807143225609593, "learning_rate": 1.9969418960244652e-05, "loss": 0.6969, "step": 653 }, { "epoch": 0.03, "grad_norm": 0.7367678546216719, "learning_rate": 2e-05, "loss": 0.4626, "step": 654 }, { "epoch": 0.03, "grad_norm": 0.6994972573022487, "learning_rate": 1.999999988930482e-05, "loss": 0.2692, "step": 655 }, { "epoch": 0.03, "grad_norm": 0.4802055143096561, "learning_rate": 1.999999955721928e-05, "loss": 0.2192, "step": 656 }, { "epoch": 0.03, "grad_norm": 1.0916650335286944, "learning_rate": 1.9999999003743386e-05, "loss": 0.5638, "step": 657 }, { "epoch": 0.03, "grad_norm": 0.6510485788388284, "learning_rate": 1.9999998228877155e-05, "loss": 0.3021, "step": 658 }, { "epoch": 0.03, "grad_norm": 0.7121993966385886, "learning_rate": 1.99999972326206e-05, "loss": 0.3928, "step": 659 }, { "epoch": 0.03, "grad_norm": 1.2453033306706074, "learning_rate": 1.9999996014973747e-05, "loss": 0.5474, "step": 660 }, { "epoch": 0.03, "grad_norm": 0.6913780217277756, "learning_rate": 1.9999994575936615e-05, "loss": 0.339, "step": 661 }, { "epoch": 0.03, "grad_norm": 0.40348748248024663, "learning_rate": 1.9999992915509245e-05, "loss": 0.1173, "step": 662 }, { "epoch": 0.03, "grad_norm": 0.6535040215803822, "learning_rate": 1.999999103369167e-05, "loss": 0.3434, "step": 663 }, { "epoch": 0.03, "grad_norm": 0.6930306960745453, "learning_rate": 1.999998893048393e-05, "loss": 0.35, "step": 664 }, { "epoch": 0.03, "grad_norm": 1.9849130280483798, "learning_rate": 1.9999986605886072e-05, "loss": 0.5549, "step": 665 }, { "epoch": 0.03, "grad_norm": 0.6882180217021225, "learning_rate": 1.9999984059898153e-05, "loss": 0.4308, "step": 666 }, { "epoch": 0.03, "grad_norm": 0.6124594703728439, "learning_rate": 1.9999981292520222e-05, "loss": 0.3161, "step": 667 }, { "epoch": 0.03, "grad_norm": 0.45373797605211397, "learning_rate": 1.9999978303752342e-05, "loss": 0.1635, "step": 668 }, { "epoch": 0.03, "grad_norm": 1.6726889463237635, "learning_rate": 1.9999975093594583e-05, "loss": 0.7403, "step": 669 }, { "epoch": 0.03, "grad_norm": 0.706324175059597, "learning_rate": 1.9999971662047012e-05, "loss": 0.3227, "step": 670 }, { "epoch": 0.03, "grad_norm": 1.376503289477581, "learning_rate": 1.9999968009109708e-05, "loss": 0.3949, "step": 671 }, { "epoch": 0.03, "grad_norm": 1.561842210104898, "learning_rate": 1.999996413478275e-05, "loss": 0.6576, "step": 672 }, { "epoch": 0.03, "grad_norm": 0.5963514740766326, "learning_rate": 1.9999960039066226e-05, "loss": 0.3089, "step": 673 }, { "epoch": 0.03, "grad_norm": 0.5908896183227859, "learning_rate": 1.999995572196022e-05, "loss": 0.1836, "step": 674 }, { "epoch": 0.03, "grad_norm": 0.8489908552865938, "learning_rate": 1.9999951183464837e-05, "loss": 0.317, "step": 675 }, { "epoch": 0.03, "grad_norm": 0.7948053603194695, "learning_rate": 1.999994642358017e-05, "loss": 0.3421, "step": 676 }, { "epoch": 0.03, "grad_norm": 2.0197352368512274, "learning_rate": 1.9999941442306328e-05, "loss": 0.5878, "step": 677 }, { "epoch": 0.03, "grad_norm": 0.7028968072632593, "learning_rate": 1.9999936239643422e-05, "loss": 0.2938, "step": 678 }, { "epoch": 0.03, "grad_norm": 0.6473862209980831, "learning_rate": 1.9999930815591565e-05, "loss": 0.3149, "step": 679 }, { "epoch": 0.03, "grad_norm": 0.6194245546090891, "learning_rate": 1.9999925170150877e-05, "loss": 0.317, "step": 680 }, { "epoch": 0.03, "grad_norm": 0.7818454454963415, "learning_rate": 1.9999919303321482e-05, "loss": 0.246, "step": 681 }, { "epoch": 0.03, "grad_norm": 0.7564850406368873, "learning_rate": 1.9999913215103516e-05, "loss": 0.3363, "step": 682 }, { "epoch": 0.03, "grad_norm": 1.3707537220992398, "learning_rate": 1.9999906905497104e-05, "loss": 0.4397, "step": 683 }, { "epoch": 0.03, "grad_norm": 1.8947073083180777, "learning_rate": 1.9999900374502395e-05, "loss": 0.6268, "step": 684 }, { "epoch": 0.03, "grad_norm": 0.5537455145557966, "learning_rate": 1.999989362211953e-05, "loss": 0.317, "step": 685 }, { "epoch": 0.03, "grad_norm": 0.46942044717482495, "learning_rate": 1.9999886648348657e-05, "loss": 0.2324, "step": 686 }, { "epoch": 0.03, "grad_norm": 1.0547854666743743, "learning_rate": 1.999987945318993e-05, "loss": 0.4551, "step": 687 }, { "epoch": 0.03, "grad_norm": 0.9012278004185484, "learning_rate": 1.9999872036643514e-05, "loss": 0.3195, "step": 688 }, { "epoch": 0.03, "grad_norm": 1.8792789171490352, "learning_rate": 1.9999864398709565e-05, "loss": 0.6657, "step": 689 }, { "epoch": 0.03, "grad_norm": 0.9117257609069949, "learning_rate": 1.999985653938826e-05, "loss": 0.4492, "step": 690 }, { "epoch": 0.03, "grad_norm": 0.5059014074587862, "learning_rate": 1.9999848458679768e-05, "loss": 0.2697, "step": 691 }, { "epoch": 0.03, "grad_norm": 1.6784231504936034, "learning_rate": 1.9999840156584266e-05, "loss": 0.7376, "step": 692 }, { "epoch": 0.03, "grad_norm": 1.0279329861427517, "learning_rate": 1.999983163310194e-05, "loss": 0.3029, "step": 693 }, { "epoch": 0.03, "grad_norm": 0.7443388737145241, "learning_rate": 1.9999822888232987e-05, "loss": 0.2911, "step": 694 }, { "epoch": 0.03, "grad_norm": 0.8909206120929277, "learning_rate": 1.999981392197759e-05, "loss": 0.4313, "step": 695 }, { "epoch": 0.03, "grad_norm": 2.1491058685292717, "learning_rate": 1.999980473433595e-05, "loss": 0.8305, "step": 696 }, { "epoch": 0.03, "grad_norm": 0.6283181234864429, "learning_rate": 1.999979532530827e-05, "loss": 0.2077, "step": 697 }, { "epoch": 0.03, "grad_norm": 0.452576294961913, "learning_rate": 1.999978569489476e-05, "loss": 0.2857, "step": 698 }, { "epoch": 0.03, "grad_norm": 0.5860738060514112, "learning_rate": 1.9999775843095636e-05, "loss": 0.4072, "step": 699 }, { "epoch": 0.03, "grad_norm": 0.6489991442796038, "learning_rate": 1.9999765769911108e-05, "loss": 0.3565, "step": 700 }, { "epoch": 0.03, "grad_norm": 1.0568446265222562, "learning_rate": 1.9999755475341403e-05, "loss": 0.4024, "step": 701 }, { "epoch": 0.03, "grad_norm": 0.7019954406751752, "learning_rate": 1.9999744959386753e-05, "loss": 0.3835, "step": 702 }, { "epoch": 0.03, "grad_norm": 0.7141355700787837, "learning_rate": 1.9999734222047387e-05, "loss": 0.3248, "step": 703 }, { "epoch": 0.03, "grad_norm": 0.9652208499912159, "learning_rate": 1.9999723263323543e-05, "loss": 0.4238, "step": 704 }, { "epoch": 0.03, "grad_norm": 0.7627714465831604, "learning_rate": 1.9999712083215465e-05, "loss": 0.6007, "step": 705 }, { "epoch": 0.03, "grad_norm": 0.45046026371794445, "learning_rate": 1.9999700681723396e-05, "loss": 0.1914, "step": 706 }, { "epoch": 0.03, "grad_norm": 0.7714181503044668, "learning_rate": 1.9999689058847595e-05, "loss": 0.3423, "step": 707 }, { "epoch": 0.03, "grad_norm": 2.076608431677552, "learning_rate": 1.9999677214588314e-05, "loss": 0.825, "step": 708 }, { "epoch": 0.03, "grad_norm": 0.5633517160121146, "learning_rate": 1.999966514894582e-05, "loss": 0.3005, "step": 709 }, { "epoch": 0.03, "grad_norm": 0.898140516968242, "learning_rate": 1.9999652861920374e-05, "loss": 0.3885, "step": 710 }, { "epoch": 0.03, "grad_norm": 0.8505430394287419, "learning_rate": 1.999964035351225e-05, "loss": 0.4604, "step": 711 }, { "epoch": 0.03, "grad_norm": 0.6266940564645365, "learning_rate": 1.999962762372173e-05, "loss": 0.3346, "step": 712 }, { "epoch": 0.03, "grad_norm": 0.7517726933464418, "learning_rate": 1.9999614672549092e-05, "loss": 0.3314, "step": 713 }, { "epoch": 0.03, "grad_norm": 0.531404795184928, "learning_rate": 1.9999601499994625e-05, "loss": 0.2808, "step": 714 }, { "epoch": 0.03, "grad_norm": 0.6691745818531178, "learning_rate": 1.9999588106058613e-05, "loss": 0.3373, "step": 715 }, { "epoch": 0.03, "grad_norm": 0.8449221358998767, "learning_rate": 1.999957449074136e-05, "loss": 0.4503, "step": 716 }, { "epoch": 0.03, "grad_norm": 0.735217362518927, "learning_rate": 1.999956065404317e-05, "loss": 0.3327, "step": 717 }, { "epoch": 0.03, "grad_norm": 0.7037397792101784, "learning_rate": 1.9999546595964338e-05, "loss": 0.3773, "step": 718 }, { "epoch": 0.03, "grad_norm": 0.6250477789870648, "learning_rate": 1.9999532316505185e-05, "loss": 0.3406, "step": 719 }, { "epoch": 0.03, "grad_norm": 0.5633685793357558, "learning_rate": 1.9999517815666027e-05, "loss": 0.2003, "step": 720 }, { "epoch": 0.03, "grad_norm": 0.5982309218100806, "learning_rate": 1.999950309344718e-05, "loss": 0.3029, "step": 721 }, { "epoch": 0.03, "grad_norm": 0.560957703357082, "learning_rate": 1.9999488149848972e-05, "loss": 0.3891, "step": 722 }, { "epoch": 0.03, "grad_norm": 1.0400463559915587, "learning_rate": 1.9999472984871734e-05, "loss": 0.5104, "step": 723 }, { "epoch": 0.03, "grad_norm": 0.6184426632319019, "learning_rate": 1.9999457598515798e-05, "loss": 0.3455, "step": 724 }, { "epoch": 0.03, "grad_norm": 0.857855399864822, "learning_rate": 1.9999441990781515e-05, "loss": 0.4999, "step": 725 }, { "epoch": 0.03, "grad_norm": 0.470679232366824, "learning_rate": 1.999942616166922e-05, "loss": 0.3035, "step": 726 }, { "epoch": 0.03, "grad_norm": 0.4450416279979177, "learning_rate": 1.999941011117927e-05, "loss": 0.2428, "step": 727 }, { "epoch": 0.03, "grad_norm": 0.8144791238556535, "learning_rate": 1.9999393839312016e-05, "loss": 0.4535, "step": 728 }, { "epoch": 0.03, "grad_norm": 1.332367753540305, "learning_rate": 1.999937734606782e-05, "loss": 0.638, "step": 729 }, { "epoch": 0.03, "grad_norm": 0.5830639493935122, "learning_rate": 1.9999360631447044e-05, "loss": 0.2595, "step": 730 }, { "epoch": 0.03, "grad_norm": 0.7894196025658753, "learning_rate": 1.9999343695450063e-05, "loss": 0.4279, "step": 731 }, { "epoch": 0.03, "grad_norm": 0.43328149280680284, "learning_rate": 1.9999326538077254e-05, "loss": 0.2092, "step": 732 }, { "epoch": 0.03, "grad_norm": 0.9174107851471719, "learning_rate": 1.999930915932899e-05, "loss": 0.3295, "step": 733 }, { "epoch": 0.03, "grad_norm": 0.9850803191033781, "learning_rate": 1.999929155920566e-05, "loss": 0.4172, "step": 734 }, { "epoch": 0.03, "grad_norm": 0.9405791870277539, "learning_rate": 1.9999273737707648e-05, "loss": 0.485, "step": 735 }, { "epoch": 0.03, "grad_norm": 1.298388079580774, "learning_rate": 1.9999255694835357e-05, "loss": 0.5279, "step": 736 }, { "epoch": 0.03, "grad_norm": 0.6531127356655981, "learning_rate": 1.999923743058918e-05, "loss": 0.29, "step": 737 }, { "epoch": 0.03, "grad_norm": 0.5994652674352005, "learning_rate": 1.9999218944969528e-05, "loss": 0.3155, "step": 738 }, { "epoch": 0.03, "grad_norm": 0.9980673029573037, "learning_rate": 1.99992002379768e-05, "loss": 0.4091, "step": 739 }, { "epoch": 0.03, "grad_norm": 0.6376066531632776, "learning_rate": 1.999918130961142e-05, "loss": 0.2926, "step": 740 }, { "epoch": 0.03, "grad_norm": 2.343414190770355, "learning_rate": 1.9999162159873802e-05, "loss": 0.6812, "step": 741 }, { "epoch": 0.03, "grad_norm": 0.5565838678048582, "learning_rate": 1.9999142788764373e-05, "loss": 0.3333, "step": 742 }, { "epoch": 0.03, "grad_norm": 0.7311130707357333, "learning_rate": 1.9999123196283557e-05, "loss": 0.3241, "step": 743 }, { "epoch": 0.03, "grad_norm": 1.2046386666563171, "learning_rate": 1.999910338243179e-05, "loss": 0.5745, "step": 744 }, { "epoch": 0.03, "grad_norm": 0.6964171700147567, "learning_rate": 1.9999083347209515e-05, "loss": 0.4752, "step": 745 }, { "epoch": 0.03, "grad_norm": 0.43233961864693127, "learning_rate": 1.999906309061717e-05, "loss": 0.2196, "step": 746 }, { "epoch": 0.03, "grad_norm": 0.5324720210341239, "learning_rate": 1.9999042612655205e-05, "loss": 0.3211, "step": 747 }, { "epoch": 0.03, "grad_norm": 0.899735971909227, "learning_rate": 1.9999021913324074e-05, "loss": 0.4005, "step": 748 }, { "epoch": 0.03, "grad_norm": 0.7035874211150875, "learning_rate": 1.9999000992624236e-05, "loss": 0.3465, "step": 749 }, { "epoch": 0.03, "grad_norm": 0.6538429942639787, "learning_rate": 1.9998979850556152e-05, "loss": 0.3829, "step": 750 }, { "epoch": 0.03, "grad_norm": 0.6407790926645832, "learning_rate": 1.9998958487120294e-05, "loss": 0.3873, "step": 751 }, { "epoch": 0.03, "grad_norm": 0.47937420104128425, "learning_rate": 1.999893690231713e-05, "loss": 0.2545, "step": 752 }, { "epoch": 0.03, "grad_norm": 0.463671622172868, "learning_rate": 1.9998915096147137e-05, "loss": 0.2495, "step": 753 }, { "epoch": 0.03, "grad_norm": 0.6910165827091868, "learning_rate": 1.9998893068610804e-05, "loss": 0.3842, "step": 754 }, { "epoch": 0.03, "grad_norm": 0.6016532613526538, "learning_rate": 1.9998870819708617e-05, "loss": 0.344, "step": 755 }, { "epoch": 0.03, "grad_norm": 0.977890154867551, "learning_rate": 1.999884834944106e-05, "loss": 0.5149, "step": 756 }, { "epoch": 0.03, "grad_norm": 1.0264565966166421, "learning_rate": 1.9998825657808647e-05, "loss": 0.4745, "step": 757 }, { "epoch": 0.03, "grad_norm": 0.5712145317691718, "learning_rate": 1.9998802744811867e-05, "loss": 0.3134, "step": 758 }, { "epoch": 0.03, "grad_norm": 0.39939493805463433, "learning_rate": 1.9998779610451232e-05, "loss": 0.1477, "step": 759 }, { "epoch": 0.03, "grad_norm": 0.8016835113226563, "learning_rate": 1.999875625472725e-05, "loss": 0.4425, "step": 760 }, { "epoch": 0.03, "grad_norm": 0.6525164426236323, "learning_rate": 1.9998732677640445e-05, "loss": 0.4001, "step": 761 }, { "epoch": 0.04, "grad_norm": 0.5765724325576432, "learning_rate": 1.9998708879191336e-05, "loss": 0.3834, "step": 762 }, { "epoch": 0.04, "grad_norm": 0.6221839046652744, "learning_rate": 1.999868485938045e-05, "loss": 0.2987, "step": 763 }, { "epoch": 0.04, "grad_norm": 0.4736990471163756, "learning_rate": 1.999866061820831e-05, "loss": 0.2708, "step": 764 }, { "epoch": 0.04, "grad_norm": 0.7464858462524168, "learning_rate": 1.9998636155675467e-05, "loss": 0.3757, "step": 765 }, { "epoch": 0.04, "grad_norm": 0.52752991918669, "learning_rate": 1.999861147178246e-05, "loss": 0.2853, "step": 766 }, { "epoch": 0.04, "grad_norm": 0.6628785333272043, "learning_rate": 1.9998586566529825e-05, "loss": 0.3721, "step": 767 }, { "epoch": 0.04, "grad_norm": 1.0671708414897525, "learning_rate": 1.9998561439918123e-05, "loss": 0.5772, "step": 768 }, { "epoch": 0.04, "grad_norm": 0.6821973641096672, "learning_rate": 1.9998536091947907e-05, "loss": 0.3511, "step": 769 }, { "epoch": 0.04, "grad_norm": 0.604087790137277, "learning_rate": 1.999851052261974e-05, "loss": 0.3212, "step": 770 }, { "epoch": 0.04, "grad_norm": 0.48022168622977923, "learning_rate": 1.9998484731934185e-05, "loss": 0.1839, "step": 771 }, { "epoch": 0.04, "grad_norm": 1.2865781233443245, "learning_rate": 1.9998458719891815e-05, "loss": 0.4076, "step": 772 }, { "epoch": 0.04, "grad_norm": 0.660244300049914, "learning_rate": 1.9998432486493206e-05, "loss": 0.3894, "step": 773 }, { "epoch": 0.04, "grad_norm": 0.5950558360000362, "learning_rate": 1.999840603173894e-05, "loss": 0.4217, "step": 774 }, { "epoch": 0.04, "grad_norm": 2.08724364052135, "learning_rate": 1.99983793556296e-05, "loss": 0.7936, "step": 775 }, { "epoch": 0.04, "grad_norm": 0.5568481907840251, "learning_rate": 1.9998352458165776e-05, "loss": 0.2852, "step": 776 }, { "epoch": 0.04, "grad_norm": 0.48050320675218167, "learning_rate": 1.9998325339348066e-05, "loss": 0.2171, "step": 777 }, { "epoch": 0.04, "grad_norm": 0.7895863179300946, "learning_rate": 1.999829799917707e-05, "loss": 0.4113, "step": 778 }, { "epoch": 0.04, "grad_norm": 0.5901676127366048, "learning_rate": 1.9998270437653392e-05, "loss": 0.316, "step": 779 }, { "epoch": 0.04, "grad_norm": 2.0261322048310695, "learning_rate": 1.9998242654777643e-05, "loss": 0.8467, "step": 780 }, { "epoch": 0.04, "grad_norm": 0.5924618213299074, "learning_rate": 1.9998214650550437e-05, "loss": 0.3668, "step": 781 }, { "epoch": 0.04, "grad_norm": 0.5018233916959908, "learning_rate": 1.9998186424972397e-05, "loss": 0.2948, "step": 782 }, { "epoch": 0.04, "grad_norm": 0.43261885864187866, "learning_rate": 1.9998157978044147e-05, "loss": 0.1883, "step": 783 }, { "epoch": 0.04, "grad_norm": 1.1660603663169706, "learning_rate": 1.9998129309766315e-05, "loss": 0.4999, "step": 784 }, { "epoch": 0.04, "grad_norm": 0.6537275769543484, "learning_rate": 1.9998100420139534e-05, "loss": 0.3049, "step": 785 }, { "epoch": 0.04, "grad_norm": 0.6802858286840942, "learning_rate": 1.9998071309164446e-05, "loss": 0.3758, "step": 786 }, { "epoch": 0.04, "grad_norm": 1.178637154802895, "learning_rate": 1.9998041976841698e-05, "loss": 0.4392, "step": 787 }, { "epoch": 0.04, "grad_norm": 0.5818540616134477, "learning_rate": 1.9998012423171935e-05, "loss": 0.3322, "step": 788 }, { "epoch": 0.04, "grad_norm": 0.5056956222475707, "learning_rate": 1.9997982648155813e-05, "loss": 0.2565, "step": 789 }, { "epoch": 0.04, "grad_norm": 0.8828440529641679, "learning_rate": 1.9997952651793994e-05, "loss": 0.4737, "step": 790 }, { "epoch": 0.04, "grad_norm": 0.5756106810974572, "learning_rate": 1.9997922434087137e-05, "loss": 0.3312, "step": 791 }, { "epoch": 0.04, "grad_norm": 1.5897186352977308, "learning_rate": 1.9997891995035914e-05, "loss": 0.6355, "step": 792 }, { "epoch": 0.04, "grad_norm": 0.5499526164154648, "learning_rate": 1.9997861334640997e-05, "loss": 0.4087, "step": 793 }, { "epoch": 0.04, "grad_norm": 0.5433584967866018, "learning_rate": 1.999783045290307e-05, "loss": 0.3033, "step": 794 }, { "epoch": 0.04, "grad_norm": 0.475453192226817, "learning_rate": 1.9997799349822812e-05, "loss": 0.1851, "step": 795 }, { "epoch": 0.04, "grad_norm": 0.7651960299450754, "learning_rate": 1.9997768025400908e-05, "loss": 0.4275, "step": 796 }, { "epoch": 0.04, "grad_norm": 0.5969652380177083, "learning_rate": 1.9997736479638063e-05, "loss": 0.3367, "step": 797 }, { "epoch": 0.04, "grad_norm": 0.7123633668305078, "learning_rate": 1.999770471253496e-05, "loss": 0.3366, "step": 798 }, { "epoch": 0.04, "grad_norm": 2.303039805237259, "learning_rate": 1.9997672724092315e-05, "loss": 0.7682, "step": 799 }, { "epoch": 0.04, "grad_norm": 0.5502241043167116, "learning_rate": 1.9997640514310832e-05, "loss": 0.3126, "step": 800 }, { "epoch": 0.04, "grad_norm": 0.9772419010667845, "learning_rate": 1.999760808319122e-05, "loss": 0.5978, "step": 801 }, { "epoch": 0.04, "grad_norm": 0.4668644488856414, "learning_rate": 1.999757543073421e-05, "loss": 0.3053, "step": 802 }, { "epoch": 0.04, "grad_norm": 0.6413898020191837, "learning_rate": 1.9997542556940508e-05, "loss": 0.3607, "step": 803 }, { "epoch": 0.04, "grad_norm": 0.9423226395695657, "learning_rate": 1.9997509461810848e-05, "loss": 0.373, "step": 804 }, { "epoch": 0.04, "grad_norm": 0.668903962348119, "learning_rate": 1.999747614534597e-05, "loss": 0.3338, "step": 805 }, { "epoch": 0.04, "grad_norm": 0.604130696686784, "learning_rate": 1.9997442607546603e-05, "loss": 0.349, "step": 806 }, { "epoch": 0.04, "grad_norm": 0.8830409901952293, "learning_rate": 1.9997408848413494e-05, "loss": 0.5988, "step": 807 }, { "epoch": 0.04, "grad_norm": 0.5792570557704654, "learning_rate": 1.9997374867947385e-05, "loss": 0.2605, "step": 808 }, { "epoch": 0.04, "grad_norm": 0.6560120247516701, "learning_rate": 1.9997340666149036e-05, "loss": 0.3209, "step": 809 }, { "epoch": 0.04, "grad_norm": 0.5655796778710414, "learning_rate": 1.99973062430192e-05, "loss": 0.3671, "step": 810 }, { "epoch": 0.04, "grad_norm": 0.49596274420367475, "learning_rate": 1.9997271598558637e-05, "loss": 0.1698, "step": 811 }, { "epoch": 0.04, "grad_norm": 0.5286044937573018, "learning_rate": 1.999723673276812e-05, "loss": 0.3255, "step": 812 }, { "epoch": 0.04, "grad_norm": 0.7029184033007397, "learning_rate": 1.9997201645648413e-05, "loss": 0.4832, "step": 813 }, { "epoch": 0.04, "grad_norm": 0.7475112048108062, "learning_rate": 1.99971663372003e-05, "loss": 0.4719, "step": 814 }, { "epoch": 0.04, "grad_norm": 0.40384113305517166, "learning_rate": 1.9997130807424556e-05, "loss": 0.2039, "step": 815 }, { "epoch": 0.04, "grad_norm": 1.631823826655496, "learning_rate": 1.9997095056321974e-05, "loss": 0.8259, "step": 816 }, { "epoch": 0.04, "grad_norm": 0.5845073966189925, "learning_rate": 1.999705908389334e-05, "loss": 0.3072, "step": 817 }, { "epoch": 0.04, "grad_norm": 0.5392523134790606, "learning_rate": 1.9997022890139455e-05, "loss": 0.2682, "step": 818 }, { "epoch": 0.04, "grad_norm": 1.0400808956552139, "learning_rate": 1.999698647506112e-05, "loss": 0.54, "step": 819 }, { "epoch": 0.04, "grad_norm": 1.5583154274696405, "learning_rate": 1.999694983865914e-05, "loss": 0.7755, "step": 820 }, { "epoch": 0.04, "grad_norm": 0.45759376013182856, "learning_rate": 1.9996912980934326e-05, "loss": 0.2487, "step": 821 }, { "epoch": 0.04, "grad_norm": 0.6254106544368976, "learning_rate": 1.999687590188749e-05, "loss": 0.4166, "step": 822 }, { "epoch": 0.04, "grad_norm": 0.4941280875203359, "learning_rate": 1.999683860151946e-05, "loss": 0.2888, "step": 823 }, { "epoch": 0.04, "grad_norm": 0.5173581643301941, "learning_rate": 1.9996801079831057e-05, "loss": 0.238, "step": 824 }, { "epoch": 0.04, "grad_norm": 0.5541502312038257, "learning_rate": 1.9996763336823112e-05, "loss": 0.4238, "step": 825 }, { "epoch": 0.04, "grad_norm": 1.6780349291313532, "learning_rate": 1.9996725372496463e-05, "loss": 0.7396, "step": 826 }, { "epoch": 0.04, "grad_norm": 0.6125175798532856, "learning_rate": 1.999668718685195e-05, "loss": 0.3582, "step": 827 }, { "epoch": 0.04, "grad_norm": 0.327647314937058, "learning_rate": 1.9996648779890416e-05, "loss": 0.1781, "step": 828 }, { "epoch": 0.04, "grad_norm": 0.6104878366275504, "learning_rate": 1.9996610151612716e-05, "loss": 0.3823, "step": 829 }, { "epoch": 0.04, "grad_norm": 0.5377376482925117, "learning_rate": 1.99965713020197e-05, "loss": 0.3828, "step": 830 }, { "epoch": 0.04, "grad_norm": 0.6800835935442078, "learning_rate": 1.999653223111223e-05, "loss": 0.4031, "step": 831 }, { "epoch": 0.04, "grad_norm": 1.2927429471967855, "learning_rate": 1.999649293889117e-05, "loss": 0.7099, "step": 832 }, { "epoch": 0.04, "grad_norm": 0.5187102319329004, "learning_rate": 1.999645342535739e-05, "loss": 0.3049, "step": 833 }, { "epoch": 0.04, "grad_norm": 0.875908360169765, "learning_rate": 1.9996413690511768e-05, "loss": 0.345, "step": 834 }, { "epoch": 0.04, "grad_norm": 0.6354933385684809, "learning_rate": 1.9996373734355183e-05, "loss": 0.3811, "step": 835 }, { "epoch": 0.04, "grad_norm": 0.5057055711998557, "learning_rate": 1.9996333556888517e-05, "loss": 0.3419, "step": 836 }, { "epoch": 0.04, "grad_norm": 0.5339816110697241, "learning_rate": 1.9996293158112663e-05, "loss": 0.2469, "step": 837 }, { "epoch": 0.04, "grad_norm": 1.5121879087425267, "learning_rate": 1.999625253802851e-05, "loss": 0.7203, "step": 838 }, { "epoch": 0.04, "grad_norm": 0.881828144279427, "learning_rate": 1.999621169663696e-05, "loss": 0.4695, "step": 839 }, { "epoch": 0.04, "grad_norm": 0.7222669613699906, "learning_rate": 1.9996170633938917e-05, "loss": 0.3987, "step": 840 }, { "epoch": 0.04, "grad_norm": 0.955440623602667, "learning_rate": 1.9996129349935293e-05, "loss": 0.3968, "step": 841 }, { "epoch": 0.04, "grad_norm": 0.832416792512411, "learning_rate": 1.9996087844627e-05, "loss": 0.2785, "step": 842 }, { "epoch": 0.04, "grad_norm": 0.3870388775845241, "learning_rate": 1.9996046118014955e-05, "loss": 0.2392, "step": 843 }, { "epoch": 0.04, "grad_norm": 1.8022179139572887, "learning_rate": 1.9996004170100083e-05, "loss": 0.4769, "step": 844 }, { "epoch": 0.04, "grad_norm": 0.6175786547193028, "learning_rate": 1.9995962000883312e-05, "loss": 0.3192, "step": 845 }, { "epoch": 0.04, "grad_norm": 0.6220709199747196, "learning_rate": 1.9995919610365577e-05, "loss": 0.3886, "step": 846 }, { "epoch": 0.04, "grad_norm": 0.989885066157605, "learning_rate": 1.999587699854782e-05, "loss": 0.4966, "step": 847 }, { "epoch": 0.04, "grad_norm": 0.5692009900079642, "learning_rate": 1.9995834165430975e-05, "loss": 0.2627, "step": 848 }, { "epoch": 0.04, "grad_norm": 0.4848685945627893, "learning_rate": 1.9995791111016e-05, "loss": 0.2935, "step": 849 }, { "epoch": 0.04, "grad_norm": 2.2832035975787934, "learning_rate": 1.999574783530384e-05, "loss": 0.8151, "step": 850 }, { "epoch": 0.04, "grad_norm": 0.7475504853648355, "learning_rate": 1.9995704338295462e-05, "loss": 0.2695, "step": 851 }, { "epoch": 0.04, "grad_norm": 0.9218909512968196, "learning_rate": 1.9995660619991817e-05, "loss": 0.4347, "step": 852 }, { "epoch": 0.04, "grad_norm": 0.7031620247783074, "learning_rate": 1.9995616680393885e-05, "loss": 0.4121, "step": 853 }, { "epoch": 0.04, "grad_norm": 0.8582604939052164, "learning_rate": 1.9995572519502632e-05, "loss": 0.2955, "step": 854 }, { "epoch": 0.04, "grad_norm": 0.5399389797209696, "learning_rate": 1.999552813731904e-05, "loss": 0.22, "step": 855 }, { "epoch": 0.04, "grad_norm": 1.910275505356983, "learning_rate": 1.9995483533844086e-05, "loss": 0.7414, "step": 856 }, { "epoch": 0.04, "grad_norm": 0.48641088009816946, "learning_rate": 1.9995438709078757e-05, "loss": 0.2598, "step": 857 }, { "epoch": 0.04, "grad_norm": 0.7821998840861388, "learning_rate": 1.9995393663024054e-05, "loss": 0.421, "step": 858 }, { "epoch": 0.04, "grad_norm": 1.0579596603493202, "learning_rate": 1.9995348395680968e-05, "loss": 0.6033, "step": 859 }, { "epoch": 0.04, "grad_norm": 0.41411602326651864, "learning_rate": 1.99953029070505e-05, "loss": 0.0846, "step": 860 }, { "epoch": 0.04, "grad_norm": 0.6336592227158708, "learning_rate": 1.999525719713366e-05, "loss": 0.2826, "step": 861 }, { "epoch": 0.04, "grad_norm": 2.6536167160593815, "learning_rate": 1.999521126593146e-05, "loss": 0.7309, "step": 862 }, { "epoch": 0.04, "grad_norm": 0.9089598618310353, "learning_rate": 1.9995165113444917e-05, "loss": 0.4851, "step": 863 }, { "epoch": 0.04, "grad_norm": 0.6241762585201575, "learning_rate": 1.999511873967505e-05, "loss": 0.3413, "step": 864 }, { "epoch": 0.04, "grad_norm": 0.6650235501057201, "learning_rate": 1.9995072144622888e-05, "loss": 0.4154, "step": 865 }, { "epoch": 0.04, "grad_norm": 1.1397443913282685, "learning_rate": 1.999502532828946e-05, "loss": 0.4706, "step": 866 }, { "epoch": 0.04, "grad_norm": 0.47825851561854876, "learning_rate": 1.999497829067581e-05, "loss": 0.2042, "step": 867 }, { "epoch": 0.04, "grad_norm": 2.215742964071151, "learning_rate": 1.999493103178297e-05, "loss": 0.7824, "step": 868 }, { "epoch": 0.04, "grad_norm": 0.6332233760929028, "learning_rate": 1.9994883551611993e-05, "loss": 0.3492, "step": 869 }, { "epoch": 0.04, "grad_norm": 0.6475903648006387, "learning_rate": 1.9994835850163926e-05, "loss": 0.3539, "step": 870 }, { "epoch": 0.04, "grad_norm": 1.294760104625491, "learning_rate": 1.9994787927439825e-05, "loss": 0.7046, "step": 871 }, { "epoch": 0.04, "grad_norm": 0.8632421083751662, "learning_rate": 1.9994739783440753e-05, "loss": 0.331, "step": 872 }, { "epoch": 0.04, "grad_norm": 0.7451891562684548, "learning_rate": 1.9994691418167775e-05, "loss": 0.2965, "step": 873 }, { "epoch": 0.04, "grad_norm": 0.46212658527416045, "learning_rate": 1.9994642831621964e-05, "loss": 0.2377, "step": 874 }, { "epoch": 0.04, "grad_norm": 0.9597196637391371, "learning_rate": 1.999459402380439e-05, "loss": 0.4885, "step": 875 }, { "epoch": 0.04, "grad_norm": 0.6448242288360911, "learning_rate": 1.999454499471614e-05, "loss": 0.3554, "step": 876 }, { "epoch": 0.04, "grad_norm": 0.619971338954494, "learning_rate": 1.9994495744358296e-05, "loss": 0.343, "step": 877 }, { "epoch": 0.04, "grad_norm": 1.0786091152311266, "learning_rate": 1.999444627273195e-05, "loss": 0.4976, "step": 878 }, { "epoch": 0.04, "grad_norm": 0.43500808758377507, "learning_rate": 1.9994396579838195e-05, "loss": 0.2842, "step": 879 }, { "epoch": 0.04, "grad_norm": 0.7349777479099476, "learning_rate": 1.9994346665678133e-05, "loss": 0.3091, "step": 880 }, { "epoch": 0.04, "grad_norm": 1.1785563316895715, "learning_rate": 1.999429653025287e-05, "loss": 0.4329, "step": 881 }, { "epoch": 0.04, "grad_norm": 0.5816170744190584, "learning_rate": 1.999424617356351e-05, "loss": 0.3438, "step": 882 }, { "epoch": 0.04, "grad_norm": 1.2066082102731222, "learning_rate": 1.9994195595611175e-05, "loss": 0.4574, "step": 883 }, { "epoch": 0.04, "grad_norm": 0.6383000046702992, "learning_rate": 1.9994144796396985e-05, "loss": 0.3347, "step": 884 }, { "epoch": 0.04, "grad_norm": 0.5339145127830404, "learning_rate": 1.9994093775922058e-05, "loss": 0.341, "step": 885 }, { "epoch": 0.04, "grad_norm": 0.689674617401782, "learning_rate": 1.999404253418753e-05, "loss": 0.2249, "step": 886 }, { "epoch": 0.04, "grad_norm": 0.9687013764004975, "learning_rate": 1.999399107119453e-05, "loss": 0.4649, "step": 887 }, { "epoch": 0.04, "grad_norm": 0.7504387593965892, "learning_rate": 1.9993939386944198e-05, "loss": 0.3673, "step": 888 }, { "epoch": 0.04, "grad_norm": 0.6970812273061224, "learning_rate": 1.9993887481437684e-05, "loss": 0.3862, "step": 889 }, { "epoch": 0.04, "grad_norm": 0.8115876633920088, "learning_rate": 1.999383535467613e-05, "loss": 0.0776, "step": 890 }, { "epoch": 0.04, "grad_norm": 0.7432659082859107, "learning_rate": 1.99937830066607e-05, "loss": 0.3653, "step": 891 }, { "epoch": 0.04, "grad_norm": 0.5814620025765047, "learning_rate": 1.999373043739254e-05, "loss": 0.3523, "step": 892 }, { "epoch": 0.04, "grad_norm": 0.49674721409289674, "learning_rate": 1.9993677646872826e-05, "loss": 0.2556, "step": 893 }, { "epoch": 0.04, "grad_norm": 0.6177008259762825, "learning_rate": 1.9993624635102712e-05, "loss": 0.362, "step": 894 }, { "epoch": 0.04, "grad_norm": 1.5699728279519556, "learning_rate": 1.9993571402083388e-05, "loss": 0.7655, "step": 895 }, { "epoch": 0.04, "grad_norm": 0.5942911656256071, "learning_rate": 1.9993517947816025e-05, "loss": 0.3037, "step": 896 }, { "epoch": 0.04, "grad_norm": 0.6175935867396398, "learning_rate": 1.9993464272301803e-05, "loss": 0.2869, "step": 897 }, { "epoch": 0.04, "grad_norm": 0.7461719565078261, "learning_rate": 1.9993410375541915e-05, "loss": 0.5671, "step": 898 }, { "epoch": 0.04, "grad_norm": 0.4062962453943622, "learning_rate": 1.9993356257537556e-05, "loss": 0.1353, "step": 899 }, { "epoch": 0.04, "grad_norm": 0.6205281397746988, "learning_rate": 1.9993301918289916e-05, "loss": 0.3416, "step": 900 }, { "epoch": 0.04, "grad_norm": 0.5543827196612083, "learning_rate": 1.9993247357800207e-05, "loss": 0.3173, "step": 901 }, { "epoch": 0.04, "grad_norm": 1.3417071420297146, "learning_rate": 1.999319257606963e-05, "loss": 0.6472, "step": 902 }, { "epoch": 0.04, "grad_norm": 0.6164035518734712, "learning_rate": 1.9993137573099403e-05, "loss": 0.3073, "step": 903 }, { "epoch": 0.04, "grad_norm": 0.6895871347085274, "learning_rate": 1.999308234889074e-05, "loss": 0.4406, "step": 904 }, { "epoch": 0.04, "grad_norm": 0.73236998569119, "learning_rate": 1.9993026903444868e-05, "loss": 0.4064, "step": 905 }, { "epoch": 0.04, "grad_norm": 0.516429519223869, "learning_rate": 1.9992971236763012e-05, "loss": 0.2289, "step": 906 }, { "epoch": 0.04, "grad_norm": 0.41154720105171755, "learning_rate": 1.9992915348846403e-05, "loss": 0.1912, "step": 907 }, { "epoch": 0.04, "grad_norm": 0.6310419490572622, "learning_rate": 1.9992859239696278e-05, "loss": 0.3594, "step": 908 }, { "epoch": 0.04, "grad_norm": 0.5160161500729866, "learning_rate": 1.9992802909313882e-05, "loss": 0.2982, "step": 909 }, { "epoch": 0.04, "grad_norm": 0.9306496601334961, "learning_rate": 1.999274635770046e-05, "loss": 0.5419, "step": 910 }, { "epoch": 0.04, "grad_norm": 0.8721081683280697, "learning_rate": 1.999268958485727e-05, "loss": 0.5229, "step": 911 }, { "epoch": 0.04, "grad_norm": 0.5658151351013206, "learning_rate": 1.999263259078556e-05, "loss": 0.2608, "step": 912 }, { "epoch": 0.04, "grad_norm": 0.37172475684252704, "learning_rate": 1.9992575375486592e-05, "loss": 0.2807, "step": 913 }, { "epoch": 0.04, "grad_norm": 1.0857608129906875, "learning_rate": 1.9992517938961638e-05, "loss": 0.4929, "step": 914 }, { "epoch": 0.04, "grad_norm": 0.5245775587465598, "learning_rate": 1.9992460281211966e-05, "loss": 0.3234, "step": 915 }, { "epoch": 0.04, "grad_norm": 0.4792296883052154, "learning_rate": 1.9992402402238858e-05, "loss": 0.3239, "step": 916 }, { "epoch": 0.04, "grad_norm": 1.3727946687334898, "learning_rate": 1.999234430204359e-05, "loss": 0.7596, "step": 917 }, { "epoch": 0.04, "grad_norm": 0.5324991095707883, "learning_rate": 1.9992285980627452e-05, "loss": 0.3098, "step": 918 }, { "epoch": 0.04, "grad_norm": 0.40234298372205257, "learning_rate": 1.999222743799173e-05, "loss": 0.1326, "step": 919 }, { "epoch": 0.04, "grad_norm": 0.6719099559651767, "learning_rate": 1.9992168674137724e-05, "loss": 0.4133, "step": 920 }, { "epoch": 0.04, "grad_norm": 0.5362409628455475, "learning_rate": 1.9992109689066733e-05, "loss": 0.3546, "step": 921 }, { "epoch": 0.04, "grad_norm": 0.9298121537431454, "learning_rate": 1.9992050482780067e-05, "loss": 0.4538, "step": 922 }, { "epoch": 0.04, "grad_norm": 1.530785493231479, "learning_rate": 1.9991991055279033e-05, "loss": 0.8836, "step": 923 }, { "epoch": 0.04, "grad_norm": 0.5833791293185981, "learning_rate": 1.9991931406564944e-05, "loss": 0.3268, "step": 924 }, { "epoch": 0.04, "grad_norm": 0.4364867901706965, "learning_rate": 1.9991871536639128e-05, "loss": 0.2026, "step": 925 }, { "epoch": 0.04, "grad_norm": 1.105569502819989, "learning_rate": 1.9991811445502905e-05, "loss": 0.5508, "step": 926 }, { "epoch": 0.04, "grad_norm": 0.5787875579017364, "learning_rate": 1.9991751133157608e-05, "loss": 0.3206, "step": 927 }, { "epoch": 0.04, "grad_norm": 0.600147492158044, "learning_rate": 1.999169059960457e-05, "loss": 0.3739, "step": 928 }, { "epoch": 0.04, "grad_norm": 0.718451581768904, "learning_rate": 1.9991629844845132e-05, "loss": 0.3561, "step": 929 }, { "epoch": 0.04, "grad_norm": 0.5456314912511349, "learning_rate": 1.999156886888064e-05, "loss": 0.303, "step": 930 }, { "epoch": 0.04, "grad_norm": 1.5076021551695222, "learning_rate": 1.9991507671712444e-05, "loss": 0.5714, "step": 931 }, { "epoch": 0.04, "grad_norm": 0.5595340697514456, "learning_rate": 1.99914462533419e-05, "loss": 0.3581, "step": 932 }, { "epoch": 0.04, "grad_norm": 0.414383331599564, "learning_rate": 1.999138461377036e-05, "loss": 0.235, "step": 933 }, { "epoch": 0.04, "grad_norm": 0.702445961710312, "learning_rate": 1.9991322752999195e-05, "loss": 0.3137, "step": 934 }, { "epoch": 0.04, "grad_norm": 2.5204391132755144, "learning_rate": 1.9991260671029777e-05, "loss": 0.6434, "step": 935 }, { "epoch": 0.04, "grad_norm": 0.4131843698077403, "learning_rate": 1.999119836786348e-05, "loss": 0.2977, "step": 936 }, { "epoch": 0.04, "grad_norm": 0.7913090216802677, "learning_rate": 1.9991135843501675e-05, "loss": 0.3975, "step": 937 }, { "epoch": 0.04, "grad_norm": 0.80133344769209, "learning_rate": 1.9991073097945756e-05, "loss": 0.369, "step": 938 }, { "epoch": 0.04, "grad_norm": 0.5160021159393685, "learning_rate": 1.9991010131197106e-05, "loss": 0.2451, "step": 939 }, { "epoch": 0.04, "grad_norm": 0.8689536784771379, "learning_rate": 1.999094694325712e-05, "loss": 0.4339, "step": 940 }, { "epoch": 0.04, "grad_norm": 1.238713261021553, "learning_rate": 1.99908835341272e-05, "loss": 0.5047, "step": 941 }, { "epoch": 0.04, "grad_norm": 0.5275999017994039, "learning_rate": 1.999081990380875e-05, "loss": 0.2429, "step": 942 }, { "epoch": 0.04, "grad_norm": 0.9978396791273116, "learning_rate": 1.9990756052303175e-05, "loss": 0.5525, "step": 943 }, { "epoch": 0.04, "grad_norm": 0.5125599041463396, "learning_rate": 1.999069197961189e-05, "loss": 0.3969, "step": 944 }, { "epoch": 0.04, "grad_norm": 0.39525693737493667, "learning_rate": 1.999062768573631e-05, "loss": 0.161, "step": 945 }, { "epoch": 0.04, "grad_norm": 0.5262891431816694, "learning_rate": 1.9990563170677867e-05, "loss": 0.2563, "step": 946 }, { "epoch": 0.04, "grad_norm": 0.740560031195802, "learning_rate": 1.9990498434437983e-05, "loss": 0.4753, "step": 947 }, { "epoch": 0.04, "grad_norm": 0.4780722786968319, "learning_rate": 1.999043347701809e-05, "loss": 0.2442, "step": 948 }, { "epoch": 0.04, "grad_norm": 0.9456155504350245, "learning_rate": 1.9990368298419635e-05, "loss": 0.469, "step": 949 }, { "epoch": 0.04, "grad_norm": 1.452877476684087, "learning_rate": 1.999030289864405e-05, "loss": 0.6181, "step": 950 }, { "epoch": 0.04, "grad_norm": 0.3759676570878058, "learning_rate": 1.9990237277692787e-05, "loss": 0.2051, "step": 951 }, { "epoch": 0.04, "grad_norm": 0.4485445477430428, "learning_rate": 1.99901714355673e-05, "loss": 0.2937, "step": 952 }, { "epoch": 0.04, "grad_norm": 1.523344595178022, "learning_rate": 1.999010537226905e-05, "loss": 0.7377, "step": 953 }, { "epoch": 0.04, "grad_norm": 0.584859474115927, "learning_rate": 1.999003908779949e-05, "loss": 0.3591, "step": 954 }, { "epoch": 0.04, "grad_norm": 0.5521426218392671, "learning_rate": 1.9989972582160097e-05, "loss": 0.4074, "step": 955 }, { "epoch": 0.04, "grad_norm": 0.5800887310270454, "learning_rate": 1.998990585535234e-05, "loss": 0.3842, "step": 956 }, { "epoch": 0.04, "grad_norm": 0.4881910111488858, "learning_rate": 1.9989838907377692e-05, "loss": 0.2524, "step": 957 }, { "epoch": 0.04, "grad_norm": 0.4790424890435586, "learning_rate": 1.998977173823764e-05, "loss": 0.1943, "step": 958 }, { "epoch": 0.04, "grad_norm": 0.6962271254232019, "learning_rate": 1.998970434793367e-05, "loss": 0.4686, "step": 959 }, { "epoch": 0.04, "grad_norm": 0.4835271628604268, "learning_rate": 1.9989636736467278e-05, "loss": 0.3358, "step": 960 }, { "epoch": 0.04, "grad_norm": 0.5692985075528063, "learning_rate": 1.9989568903839952e-05, "loss": 0.3927, "step": 961 }, { "epoch": 0.04, "grad_norm": 1.4956237327350015, "learning_rate": 1.99895008500532e-05, "loss": 0.6386, "step": 962 }, { "epoch": 0.04, "grad_norm": 0.4565069798471337, "learning_rate": 1.998943257510853e-05, "loss": 0.1955, "step": 963 }, { "epoch": 0.04, "grad_norm": 0.4647760789653528, "learning_rate": 1.9989364079007446e-05, "loss": 0.2637, "step": 964 }, { "epoch": 0.04, "grad_norm": 0.9861343491695227, "learning_rate": 1.998929536175147e-05, "loss": 0.5807, "step": 965 }, { "epoch": 0.04, "grad_norm": 0.7161360280077669, "learning_rate": 1.9989226423342127e-05, "loss": 0.3899, "step": 966 }, { "epoch": 0.04, "grad_norm": 0.60958303854926, "learning_rate": 1.9989157263780934e-05, "loss": 0.3888, "step": 967 }, { "epoch": 0.04, "grad_norm": 0.5243349855444062, "learning_rate": 1.9989087883069428e-05, "loss": 0.3113, "step": 968 }, { "epoch": 0.04, "grad_norm": 0.8467787643272635, "learning_rate": 1.9989018281209145e-05, "loss": 0.3414, "step": 969 }, { "epoch": 0.04, "grad_norm": 0.40066678020707, "learning_rate": 1.9988948458201625e-05, "loss": 0.2312, "step": 970 }, { "epoch": 0.04, "grad_norm": 1.3659289740799592, "learning_rate": 1.998887841404841e-05, "loss": 0.4124, "step": 971 }, { "epoch": 0.04, "grad_norm": 0.519433988464514, "learning_rate": 1.998880814875106e-05, "loss": 0.3273, "step": 972 }, { "epoch": 0.04, "grad_norm": 0.6223772842466917, "learning_rate": 1.9988737662311123e-05, "loss": 0.4644, "step": 973 }, { "epoch": 0.04, "grad_norm": 1.3361472257312175, "learning_rate": 1.998866695473016e-05, "loss": 0.4604, "step": 974 }, { "epoch": 0.04, "grad_norm": 0.5071431762912818, "learning_rate": 1.9988596026009735e-05, "loss": 0.2091, "step": 975 }, { "epoch": 0.04, "grad_norm": 0.7147933990191029, "learning_rate": 1.9988524876151425e-05, "loss": 0.3791, "step": 976 }, { "epoch": 0.04, "grad_norm": 0.9227266475815541, "learning_rate": 1.99884535051568e-05, "loss": 0.3679, "step": 977 }, { "epoch": 0.04, "grad_norm": 0.4881554860063917, "learning_rate": 1.998838191302744e-05, "loss": 0.2979, "step": 978 }, { "epoch": 0.04, "grad_norm": 0.794042099865485, "learning_rate": 1.9988310099764937e-05, "loss": 0.4219, "step": 979 }, { "epoch": 0.05, "grad_norm": 0.7035929358392993, "learning_rate": 1.9988238065370872e-05, "loss": 0.3602, "step": 980 }, { "epoch": 0.05, "grad_norm": 1.137024646625258, "learning_rate": 1.9988165809846843e-05, "loss": 0.2319, "step": 981 }, { "epoch": 0.05, "grad_norm": 0.4694239440620951, "learning_rate": 1.9988093333194447e-05, "loss": 0.2726, "step": 982 }, { "epoch": 0.05, "grad_norm": 0.6202300838670854, "learning_rate": 1.9988020635415295e-05, "loss": 0.4678, "step": 983 }, { "epoch": 0.05, "grad_norm": 0.49001755380737105, "learning_rate": 1.9987947716510988e-05, "loss": 0.237, "step": 984 }, { "epoch": 0.05, "grad_norm": 0.4831203909365543, "learning_rate": 1.998787457648315e-05, "loss": 0.2941, "step": 985 }, { "epoch": 0.05, "grad_norm": 1.6855712597431922, "learning_rate": 1.9987801215333395e-05, "loss": 0.671, "step": 986 }, { "epoch": 0.05, "grad_norm": 0.504797432938001, "learning_rate": 1.9987727633063344e-05, "loss": 0.2733, "step": 987 }, { "epoch": 0.05, "grad_norm": 0.45478965084470363, "learning_rate": 1.9987653829674633e-05, "loss": 0.3022, "step": 988 }, { "epoch": 0.05, "grad_norm": 1.0636559602385816, "learning_rate": 1.998757980516889e-05, "loss": 0.5604, "step": 989 }, { "epoch": 0.05, "grad_norm": 1.1651161689430332, "learning_rate": 1.998750555954776e-05, "loss": 0.6223, "step": 990 }, { "epoch": 0.05, "grad_norm": 0.430859085606783, "learning_rate": 1.998743109281288e-05, "loss": 0.2104, "step": 991 }, { "epoch": 0.05, "grad_norm": 0.5855933886814805, "learning_rate": 1.99873564049659e-05, "loss": 0.346, "step": 992 }, { "epoch": 0.05, "grad_norm": 0.8515855886420527, "learning_rate": 1.9987281496008476e-05, "loss": 0.3773, "step": 993 }, { "epoch": 0.05, "grad_norm": 0.6328950487110743, "learning_rate": 1.998720636594227e-05, "loss": 0.344, "step": 994 }, { "epoch": 0.05, "grad_norm": 0.8083515397381871, "learning_rate": 1.9987131014768936e-05, "loss": 0.4596, "step": 995 }, { "epoch": 0.05, "grad_norm": 0.5809661808750618, "learning_rate": 1.9987055442490148e-05, "loss": 0.3206, "step": 996 }, { "epoch": 0.05, "grad_norm": 0.40468680142753666, "learning_rate": 1.998697964910758e-05, "loss": 0.222, "step": 997 }, { "epoch": 0.05, "grad_norm": 0.8677605740898781, "learning_rate": 1.9986903634622907e-05, "loss": 0.3158, "step": 998 }, { "epoch": 0.05, "grad_norm": 0.49550263527990357, "learning_rate": 1.998682739903781e-05, "loss": 0.3925, "step": 999 }, { "epoch": 0.05, "grad_norm": 0.427527637484721, "learning_rate": 1.9986750942353983e-05, "loss": 0.2965, "step": 1000 }, { "epoch": 0.05, "grad_norm": 1.2149984943321288, "learning_rate": 1.9986674264573115e-05, "loss": 0.6908, "step": 1001 }, { "epoch": 0.05, "grad_norm": 1.2751362179001977, "learning_rate": 1.9986597365696908e-05, "loss": 0.727, "step": 1002 }, { "epoch": 0.05, "grad_norm": 0.479898713590962, "learning_rate": 1.9986520245727054e-05, "loss": 0.2536, "step": 1003 }, { "epoch": 0.05, "grad_norm": 0.5075103598388898, "learning_rate": 1.998644290466527e-05, "loss": 0.254, "step": 1004 }, { "epoch": 0.05, "grad_norm": 0.7275548516290863, "learning_rate": 1.9986365342513266e-05, "loss": 0.4644, "step": 1005 }, { "epoch": 0.05, "grad_norm": 0.500596740936002, "learning_rate": 1.9986287559272758e-05, "loss": 0.3731, "step": 1006 }, { "epoch": 0.05, "grad_norm": 0.5977477165322007, "learning_rate": 1.9986209554945467e-05, "loss": 0.3486, "step": 1007 }, { "epoch": 0.05, "grad_norm": 0.6004062653354671, "learning_rate": 1.998613132953312e-05, "loss": 0.4105, "step": 1008 }, { "epoch": 0.05, "grad_norm": 0.40608649574557887, "learning_rate": 1.9986052883037452e-05, "loss": 0.2632, "step": 1009 }, { "epoch": 0.05, "grad_norm": 0.5048406079782527, "learning_rate": 1.9985974215460198e-05, "loss": 0.2388, "step": 1010 }, { "epoch": 0.05, "grad_norm": 0.5465757473460651, "learning_rate": 1.9985895326803096e-05, "loss": 0.3937, "step": 1011 }, { "epoch": 0.05, "grad_norm": 0.5161979502632593, "learning_rate": 1.99858162170679e-05, "loss": 0.2919, "step": 1012 }, { "epoch": 0.05, "grad_norm": 0.9728933744806545, "learning_rate": 1.998573688625636e-05, "loss": 0.4861, "step": 1013 }, { "epoch": 0.05, "grad_norm": 0.7469536263271251, "learning_rate": 1.9985657334370227e-05, "loss": 0.3688, "step": 1014 }, { "epoch": 0.05, "grad_norm": 0.5988767394089402, "learning_rate": 1.9985577561411263e-05, "loss": 0.3644, "step": 1015 }, { "epoch": 0.05, "grad_norm": 0.39362226905559555, "learning_rate": 1.9985497567381237e-05, "loss": 0.269, "step": 1016 }, { "epoch": 0.05, "grad_norm": 0.9895627757818932, "learning_rate": 1.9985417352281918e-05, "loss": 0.4768, "step": 1017 }, { "epoch": 0.05, "grad_norm": 0.4899535177969121, "learning_rate": 1.9985336916115083e-05, "loss": 0.283, "step": 1018 }, { "epoch": 0.05, "grad_norm": 0.49523275071095835, "learning_rate": 1.9985256258882515e-05, "loss": 0.3636, "step": 1019 }, { "epoch": 0.05, "grad_norm": 1.762096303898484, "learning_rate": 1.9985175380585996e-05, "loss": 0.6448, "step": 1020 }, { "epoch": 0.05, "grad_norm": 0.4963071965254022, "learning_rate": 1.998509428122732e-05, "loss": 0.3384, "step": 1021 }, { "epoch": 0.05, "grad_norm": 0.46614153685632953, "learning_rate": 1.9985012960808275e-05, "loss": 0.2927, "step": 1022 }, { "epoch": 0.05, "grad_norm": 0.7771375243734702, "learning_rate": 1.998493141933067e-05, "loss": 0.3728, "step": 1023 }, { "epoch": 0.05, "grad_norm": 0.5522348263572887, "learning_rate": 1.998484965679631e-05, "loss": 0.3055, "step": 1024 }, { "epoch": 0.05, "grad_norm": 1.9954383433286353, "learning_rate": 1.9984767673206996e-05, "loss": 0.7861, "step": 1025 }, { "epoch": 0.05, "grad_norm": 1.9708932766226501, "learning_rate": 1.9984685468564553e-05, "loss": 0.3874, "step": 1026 }, { "epoch": 0.05, "grad_norm": 0.6335637508220593, "learning_rate": 1.9984603042870797e-05, "loss": 0.2982, "step": 1027 }, { "epoch": 0.05, "grad_norm": 1.0324386333682112, "learning_rate": 1.9984520396127554e-05, "loss": 0.5388, "step": 1028 }, { "epoch": 0.05, "grad_norm": 0.6289412374997954, "learning_rate": 1.9984437528336648e-05, "loss": 0.2839, "step": 1029 }, { "epoch": 0.05, "grad_norm": 0.62307764767172, "learning_rate": 1.9984354439499923e-05, "loss": 0.2518, "step": 1030 }, { "epoch": 0.05, "grad_norm": 0.6896124271842595, "learning_rate": 1.9984271129619214e-05, "loss": 0.4185, "step": 1031 }, { "epoch": 0.05, "grad_norm": 2.0140401899036675, "learning_rate": 1.9984187598696364e-05, "loss": 0.8853, "step": 1032 }, { "epoch": 0.05, "grad_norm": 0.6469496887899895, "learning_rate": 1.9984103846733222e-05, "loss": 0.2348, "step": 1033 }, { "epoch": 0.05, "grad_norm": 1.3677696866643623, "learning_rate": 1.998401987373164e-05, "loss": 0.5303, "step": 1034 }, { "epoch": 0.05, "grad_norm": 0.5418242656364944, "learning_rate": 1.9983935679693487e-05, "loss": 0.2929, "step": 1035 }, { "epoch": 0.05, "grad_norm": 0.44840025797545957, "learning_rate": 1.998385126462062e-05, "loss": 0.1952, "step": 1036 }, { "epoch": 0.05, "grad_norm": 1.5791718978340326, "learning_rate": 1.9983766628514907e-05, "loss": 0.7071, "step": 1037 }, { "epoch": 0.05, "grad_norm": 1.3593488386457846, "learning_rate": 1.9983681771378222e-05, "loss": 0.7262, "step": 1038 }, { "epoch": 0.05, "grad_norm": 0.5275877158878143, "learning_rate": 1.9983596693212447e-05, "loss": 0.2899, "step": 1039 }, { "epoch": 0.05, "grad_norm": 0.8240702281240538, "learning_rate": 1.9983511394019462e-05, "loss": 0.4713, "step": 1040 }, { "epoch": 0.05, "grad_norm": 0.42025613280846374, "learning_rate": 1.9983425873801158e-05, "loss": 0.1943, "step": 1041 }, { "epoch": 0.05, "grad_norm": 0.5338642696283458, "learning_rate": 1.998334013255943e-05, "loss": 0.3105, "step": 1042 }, { "epoch": 0.05, "grad_norm": 0.5578723726040757, "learning_rate": 1.998325417029617e-05, "loss": 0.3026, "step": 1043 }, { "epoch": 0.05, "grad_norm": 1.2462571758614014, "learning_rate": 1.9983167987013285e-05, "loss": 0.6758, "step": 1044 }, { "epoch": 0.05, "grad_norm": 0.5031919742624393, "learning_rate": 1.9983081582712684e-05, "loss": 0.3212, "step": 1045 }, { "epoch": 0.05, "grad_norm": 0.8114986414865737, "learning_rate": 1.9982994957396277e-05, "loss": 0.4849, "step": 1046 }, { "epoch": 0.05, "grad_norm": 0.5545200382339603, "learning_rate": 1.9982908111065986e-05, "loss": 0.2796, "step": 1047 }, { "epoch": 0.05, "grad_norm": 0.6211476578082482, "learning_rate": 1.9982821043723728e-05, "loss": 0.3077, "step": 1048 }, { "epoch": 0.05, "grad_norm": 0.5494028574397645, "learning_rate": 1.9982733755371434e-05, "loss": 0.2118, "step": 1049 }, { "epoch": 0.05, "grad_norm": 0.9982660593738515, "learning_rate": 1.9982646246011036e-05, "loss": 0.4784, "step": 1050 }, { "epoch": 0.05, "grad_norm": 0.5671751617924845, "learning_rate": 1.9982558515644475e-05, "loss": 0.3144, "step": 1051 }, { "epoch": 0.05, "grad_norm": 0.6048241028893799, "learning_rate": 1.9982470564273687e-05, "loss": 0.3359, "step": 1052 }, { "epoch": 0.05, "grad_norm": 0.43923445306440884, "learning_rate": 1.9982382391900624e-05, "loss": 0.2496, "step": 1053 }, { "epoch": 0.05, "grad_norm": 0.6220298356411785, "learning_rate": 1.9982293998527234e-05, "loss": 0.3267, "step": 1054 }, { "epoch": 0.05, "grad_norm": 0.5569834664710244, "learning_rate": 1.9982205384155473e-05, "loss": 0.3483, "step": 1055 }, { "epoch": 0.05, "grad_norm": 1.57446670838614, "learning_rate": 1.998211654878731e-05, "loss": 0.4894, "step": 1056 }, { "epoch": 0.05, "grad_norm": 0.4831801337142973, "learning_rate": 1.9982027492424708e-05, "loss": 0.3317, "step": 1057 }, { "epoch": 0.05, "grad_norm": 0.9697348897283845, "learning_rate": 1.9981938215069637e-05, "loss": 0.6091, "step": 1058 }, { "epoch": 0.05, "grad_norm": 0.6014356049322473, "learning_rate": 1.9981848716724074e-05, "loss": 0.3035, "step": 1059 }, { "epoch": 0.05, "grad_norm": 0.4370543057318882, "learning_rate": 1.998175899739e-05, "loss": 0.228, "step": 1060 }, { "epoch": 0.05, "grad_norm": 0.6289187465066766, "learning_rate": 1.99816690570694e-05, "loss": 0.2922, "step": 1061 }, { "epoch": 0.05, "grad_norm": 1.3422247285624014, "learning_rate": 1.9981578895764272e-05, "loss": 0.4315, "step": 1062 }, { "epoch": 0.05, "grad_norm": 0.48809492203062965, "learning_rate": 1.998148851347661e-05, "loss": 0.2954, "step": 1063 }, { "epoch": 0.05, "grad_norm": 1.7160386605376932, "learning_rate": 1.9981397910208408e-05, "loss": 0.7015, "step": 1064 }, { "epoch": 0.05, "grad_norm": 0.5456047373459513, "learning_rate": 1.9981307085961678e-05, "loss": 0.1695, "step": 1065 }, { "epoch": 0.05, "grad_norm": 0.5819838951697648, "learning_rate": 1.998121604073843e-05, "loss": 0.2246, "step": 1066 }, { "epoch": 0.05, "grad_norm": 0.6198302151296564, "learning_rate": 1.9981124774540676e-05, "loss": 0.3251, "step": 1067 }, { "epoch": 0.05, "grad_norm": 1.3423867896019503, "learning_rate": 1.9981033287370443e-05, "loss": 0.5623, "step": 1068 }, { "epoch": 0.05, "grad_norm": 0.4959791278675541, "learning_rate": 1.9980941579229748e-05, "loss": 0.2262, "step": 1069 }, { "epoch": 0.05, "grad_norm": 0.7183727038215335, "learning_rate": 1.9980849650120634e-05, "loss": 0.4454, "step": 1070 }, { "epoch": 0.05, "grad_norm": 0.5291848831758632, "learning_rate": 1.998075750004512e-05, "loss": 0.3841, "step": 1071 }, { "epoch": 0.05, "grad_norm": 0.4547122867092, "learning_rate": 1.998066512900526e-05, "loss": 0.1591, "step": 1072 }, { "epoch": 0.05, "grad_norm": 0.7470972871569668, "learning_rate": 1.998057253700309e-05, "loss": 0.3448, "step": 1073 }, { "epoch": 0.05, "grad_norm": 1.6902504409348107, "learning_rate": 1.9980479724040665e-05, "loss": 0.6228, "step": 1074 }, { "epoch": 0.05, "grad_norm": 0.450645603372596, "learning_rate": 1.998038669012004e-05, "loss": 0.2734, "step": 1075 }, { "epoch": 0.05, "grad_norm": 0.6597542532327847, "learning_rate": 1.9980293435243272e-05, "loss": 0.4375, "step": 1076 }, { "epoch": 0.05, "grad_norm": 1.2660495124764697, "learning_rate": 1.9980199959412426e-05, "loss": 0.6177, "step": 1077 }, { "epoch": 0.05, "grad_norm": 0.4420534105983837, "learning_rate": 1.998010626262957e-05, "loss": 0.1925, "step": 1078 }, { "epoch": 0.05, "grad_norm": 0.6523094156153382, "learning_rate": 1.9980012344896784e-05, "loss": 0.3956, "step": 1079 }, { "epoch": 0.05, "grad_norm": 1.8663892962641737, "learning_rate": 1.9979918206216142e-05, "loss": 0.5795, "step": 1080 }, { "epoch": 0.05, "grad_norm": 0.5987547105991187, "learning_rate": 1.9979823846589726e-05, "loss": 0.3388, "step": 1081 }, { "epoch": 0.05, "grad_norm": 0.4010706910791419, "learning_rate": 1.9979729266019635e-05, "loss": 0.1605, "step": 1082 }, { "epoch": 0.05, "grad_norm": 0.6072503371257313, "learning_rate": 1.9979634464507953e-05, "loss": 0.3862, "step": 1083 }, { "epoch": 0.05, "grad_norm": 0.8258915030622537, "learning_rate": 1.9979539442056782e-05, "loss": 0.3913, "step": 1084 }, { "epoch": 0.05, "grad_norm": 0.9548213668298036, "learning_rate": 1.9979444198668228e-05, "loss": 0.3867, "step": 1085 }, { "epoch": 0.05, "grad_norm": 0.49754332537663454, "learning_rate": 1.99793487343444e-05, "loss": 0.323, "step": 1086 }, { "epoch": 0.05, "grad_norm": 1.0226892357096733, "learning_rate": 1.9979253049087408e-05, "loss": 0.4947, "step": 1087 }, { "epoch": 0.05, "grad_norm": 0.4175762377100039, "learning_rate": 1.9979157142899367e-05, "loss": 0.2273, "step": 1088 }, { "epoch": 0.05, "grad_norm": 1.629751589344646, "learning_rate": 1.9979061015782406e-05, "loss": 0.6893, "step": 1089 }, { "epoch": 0.05, "grad_norm": 0.6745406433383159, "learning_rate": 1.9978964667738655e-05, "loss": 0.4126, "step": 1090 }, { "epoch": 0.05, "grad_norm": 0.5351538557462366, "learning_rate": 1.9978868098770244e-05, "loss": 0.3442, "step": 1091 }, { "epoch": 0.05, "grad_norm": 1.0247962491319385, "learning_rate": 1.997877130887931e-05, "loss": 0.5144, "step": 1092 }, { "epoch": 0.05, "grad_norm": 0.5227267877144892, "learning_rate": 1.9978674298067995e-05, "loss": 0.2979, "step": 1093 }, { "epoch": 0.05, "grad_norm": 0.4854007208947763, "learning_rate": 1.997857706633845e-05, "loss": 0.2995, "step": 1094 }, { "epoch": 0.05, "grad_norm": 0.5445875162885575, "learning_rate": 1.9978479613692827e-05, "loss": 0.2978, "step": 1095 }, { "epoch": 0.05, "grad_norm": 0.7484940288710152, "learning_rate": 1.9978381940133286e-05, "loss": 0.4461, "step": 1096 }, { "epoch": 0.05, "grad_norm": 0.5580674618364786, "learning_rate": 1.997828404566198e-05, "loss": 0.3942, "step": 1097 }, { "epoch": 0.05, "grad_norm": 0.48268761513020386, "learning_rate": 1.9978185930281087e-05, "loss": 0.3199, "step": 1098 }, { "epoch": 0.05, "grad_norm": 0.5457081587158406, "learning_rate": 1.997808759399277e-05, "loss": 0.3314, "step": 1099 }, { "epoch": 0.05, "grad_norm": 0.5473826611804381, "learning_rate": 1.9977989036799215e-05, "loss": 0.3073, "step": 1100 }, { "epoch": 0.05, "grad_norm": 0.6596614452992666, "learning_rate": 1.99778902587026e-05, "loss": 0.3253, "step": 1101 }, { "epoch": 0.05, "grad_norm": 0.5332226474026266, "learning_rate": 1.9977791259705105e-05, "loss": 0.3777, "step": 1102 }, { "epoch": 0.05, "grad_norm": 0.5168865944923321, "learning_rate": 1.9977692039808937e-05, "loss": 0.3262, "step": 1103 }, { "epoch": 0.05, "grad_norm": 1.4717359427446952, "learning_rate": 1.9977592599016277e-05, "loss": 0.7971, "step": 1104 }, { "epoch": 0.05, "grad_norm": 0.36266198048414167, "learning_rate": 1.9977492937329334e-05, "loss": 0.1477, "step": 1105 }, { "epoch": 0.05, "grad_norm": 0.42333667900727157, "learning_rate": 1.997739305475032e-05, "loss": 0.2717, "step": 1106 }, { "epoch": 0.05, "grad_norm": 0.5257587548497785, "learning_rate": 1.9977292951281433e-05, "loss": 0.4129, "step": 1107 }, { "epoch": 0.05, "grad_norm": 0.6042337283916402, "learning_rate": 1.99771926269249e-05, "loss": 0.3296, "step": 1108 }, { "epoch": 0.05, "grad_norm": 0.5814381820672428, "learning_rate": 1.9977092081682938e-05, "loss": 0.3625, "step": 1109 }, { "epoch": 0.05, "grad_norm": 0.6856570988627066, "learning_rate": 1.997699131555777e-05, "loss": 0.4113, "step": 1110 }, { "epoch": 0.05, "grad_norm": 0.5147832923867899, "learning_rate": 1.9976890328551633e-05, "loss": 0.2491, "step": 1111 }, { "epoch": 0.05, "grad_norm": 0.34001413591187396, "learning_rate": 1.9976789120666763e-05, "loss": 0.2075, "step": 1112 }, { "epoch": 0.05, "grad_norm": 0.7650426852052872, "learning_rate": 1.9976687691905394e-05, "loss": 0.5617, "step": 1113 }, { "epoch": 0.05, "grad_norm": 0.4391616377403253, "learning_rate": 1.9976586042269776e-05, "loss": 0.2973, "step": 1114 }, { "epoch": 0.05, "grad_norm": 0.6077550113941413, "learning_rate": 1.9976484171762158e-05, "loss": 0.345, "step": 1115 }, { "epoch": 0.05, "grad_norm": 1.069603696377442, "learning_rate": 1.9976382080384797e-05, "loss": 0.7186, "step": 1116 }, { "epoch": 0.05, "grad_norm": 0.46568385108253085, "learning_rate": 1.9976279768139953e-05, "loss": 0.2744, "step": 1117 }, { "epoch": 0.05, "grad_norm": 0.46952148027976065, "learning_rate": 1.997617723502989e-05, "loss": 0.2935, "step": 1118 }, { "epoch": 0.05, "grad_norm": 0.5630416726388018, "learning_rate": 1.9976074481056874e-05, "loss": 0.4171, "step": 1119 }, { "epoch": 0.05, "grad_norm": 0.47563302779439093, "learning_rate": 1.9975971506223187e-05, "loss": 0.2043, "step": 1120 }, { "epoch": 0.05, "grad_norm": 0.5720707343523462, "learning_rate": 1.9975868310531107e-05, "loss": 0.286, "step": 1121 }, { "epoch": 0.05, "grad_norm": 0.49894132428643306, "learning_rate": 1.9975764893982914e-05, "loss": 0.3826, "step": 1122 }, { "epoch": 0.05, "grad_norm": 1.3697330448786862, "learning_rate": 1.9975661256580902e-05, "loss": 0.7577, "step": 1123 }, { "epoch": 0.05, "grad_norm": 0.3984135182494554, "learning_rate": 1.997555739832737e-05, "loss": 0.2721, "step": 1124 }, { "epoch": 0.05, "grad_norm": 0.41592499396802457, "learning_rate": 1.9975453319224604e-05, "loss": 0.2977, "step": 1125 }, { "epoch": 0.05, "grad_norm": 0.5216556970166795, "learning_rate": 1.9975349019274918e-05, "loss": 0.377, "step": 1126 }, { "epoch": 0.05, "grad_norm": 0.5078698752888756, "learning_rate": 1.9975244498480623e-05, "loss": 0.2344, "step": 1127 }, { "epoch": 0.05, "grad_norm": 1.4877637172474483, "learning_rate": 1.9975139756844024e-05, "loss": 0.7742, "step": 1128 }, { "epoch": 0.05, "grad_norm": 0.6587749047373671, "learning_rate": 1.9975034794367448e-05, "loss": 0.469, "step": 1129 }, { "epoch": 0.05, "grad_norm": 0.4371718169764839, "learning_rate": 1.9974929611053214e-05, "loss": 0.336, "step": 1130 }, { "epoch": 0.05, "grad_norm": 0.5209726642539527, "learning_rate": 1.9974824206903657e-05, "loss": 0.3851, "step": 1131 }, { "epoch": 0.05, "grad_norm": 0.45724511141227425, "learning_rate": 1.99747185819211e-05, "loss": 0.1378, "step": 1132 }, { "epoch": 0.05, "grad_norm": 0.5862257872561927, "learning_rate": 1.997461273610789e-05, "loss": 0.3401, "step": 1133 }, { "epoch": 0.05, "grad_norm": 0.5599331181086553, "learning_rate": 1.997450666946637e-05, "loss": 0.3318, "step": 1134 }, { "epoch": 0.05, "grad_norm": 0.7651768267156948, "learning_rate": 1.9974400381998882e-05, "loss": 0.5061, "step": 1135 }, { "epoch": 0.05, "grad_norm": 0.5631345370123252, "learning_rate": 1.9974293873707784e-05, "loss": 0.3179, "step": 1136 }, { "epoch": 0.05, "grad_norm": 0.45161314694438126, "learning_rate": 1.9974187144595433e-05, "loss": 0.2789, "step": 1137 }, { "epoch": 0.05, "grad_norm": 0.4473631541633391, "learning_rate": 1.9974080194664195e-05, "loss": 0.2921, "step": 1138 }, { "epoch": 0.05, "grad_norm": 0.553464902501885, "learning_rate": 1.9973973023916428e-05, "loss": 0.312, "step": 1139 }, { "epoch": 0.05, "grad_norm": 1.0043129206370265, "learning_rate": 1.9973865632354516e-05, "loss": 0.5639, "step": 1140 }, { "epoch": 0.05, "grad_norm": 0.9102169898636161, "learning_rate": 1.997375801998083e-05, "loss": 0.5519, "step": 1141 }, { "epoch": 0.05, "grad_norm": 0.4778856945763343, "learning_rate": 1.9973650186797756e-05, "loss": 0.3003, "step": 1142 }, { "epoch": 0.05, "grad_norm": 0.6506466136706037, "learning_rate": 1.997354213280768e-05, "loss": 0.4795, "step": 1143 }, { "epoch": 0.05, "grad_norm": 0.29929405489103755, "learning_rate": 1.9973433858012992e-05, "loss": 0.1139, "step": 1144 }, { "epoch": 0.05, "grad_norm": 0.5257793408683905, "learning_rate": 1.9973325362416093e-05, "loss": 0.3036, "step": 1145 }, { "epoch": 0.05, "grad_norm": 0.5383563349492909, "learning_rate": 1.997321664601938e-05, "loss": 0.3935, "step": 1146 }, { "epoch": 0.05, "grad_norm": 1.2905773133172835, "learning_rate": 1.9973107708825264e-05, "loss": 0.4963, "step": 1147 }, { "epoch": 0.05, "grad_norm": 0.49895310084793865, "learning_rate": 1.9972998550836155e-05, "loss": 0.3061, "step": 1148 }, { "epoch": 0.05, "grad_norm": 1.5504762430691623, "learning_rate": 1.9972889172054472e-05, "loss": 0.6912, "step": 1149 }, { "epoch": 0.05, "grad_norm": 0.34121759701594295, "learning_rate": 1.9972779572482636e-05, "loss": 0.1934, "step": 1150 }, { "epoch": 0.05, "grad_norm": 0.5769537082363037, "learning_rate": 1.997266975212307e-05, "loss": 0.3179, "step": 1151 }, { "epoch": 0.05, "grad_norm": 1.3138973019842257, "learning_rate": 1.9972559710978206e-05, "loss": 0.6918, "step": 1152 }, { "epoch": 0.05, "grad_norm": 0.6634364913490587, "learning_rate": 1.997244944905048e-05, "loss": 0.3939, "step": 1153 }, { "epoch": 0.05, "grad_norm": 0.5236383855268214, "learning_rate": 1.9972338966342338e-05, "loss": 0.3066, "step": 1154 }, { "epoch": 0.05, "grad_norm": 0.7119741323801098, "learning_rate": 1.9972228262856223e-05, "loss": 0.4269, "step": 1155 }, { "epoch": 0.05, "grad_norm": 0.4093004517259618, "learning_rate": 1.9972117338594585e-05, "loss": 0.2031, "step": 1156 }, { "epoch": 0.05, "grad_norm": 0.5096893494305453, "learning_rate": 1.997200619355988e-05, "loss": 0.2303, "step": 1157 }, { "epoch": 0.05, "grad_norm": 0.5790033442524483, "learning_rate": 1.9971894827754574e-05, "loss": 0.3883, "step": 1158 }, { "epoch": 0.05, "grad_norm": 1.329675028815536, "learning_rate": 1.997178324118112e-05, "loss": 0.5448, "step": 1159 }, { "epoch": 0.05, "grad_norm": 0.46028544540181093, "learning_rate": 1.9971671433842e-05, "loss": 0.2487, "step": 1160 }, { "epoch": 0.05, "grad_norm": 1.1316403843781724, "learning_rate": 1.9971559405739683e-05, "loss": 0.6383, "step": 1161 }, { "epoch": 0.05, "grad_norm": 0.5483716526744153, "learning_rate": 1.9971447156876653e-05, "loss": 0.3645, "step": 1162 }, { "epoch": 0.05, "grad_norm": 0.3739125651290316, "learning_rate": 1.9971334687255394e-05, "loss": 0.188, "step": 1163 }, { "epoch": 0.05, "grad_norm": 1.2154986854383885, "learning_rate": 1.9971221996878395e-05, "loss": 0.5902, "step": 1164 }, { "epoch": 0.05, "grad_norm": 0.7677637238380506, "learning_rate": 1.997110908574815e-05, "loss": 0.4656, "step": 1165 }, { "epoch": 0.05, "grad_norm": 0.3686923177327155, "learning_rate": 1.9970995953867162e-05, "loss": 0.2229, "step": 1166 }, { "epoch": 0.05, "grad_norm": 2.5245175932036164, "learning_rate": 1.997088260123793e-05, "loss": 0.6066, "step": 1167 }, { "epoch": 0.05, "grad_norm": 0.6489208359182874, "learning_rate": 1.9970769027862973e-05, "loss": 0.3428, "step": 1168 }, { "epoch": 0.05, "grad_norm": 0.5311720896746537, "learning_rate": 1.9970655233744795e-05, "loss": 0.3121, "step": 1169 }, { "epoch": 0.05, "grad_norm": 0.690687377660559, "learning_rate": 1.9970541218885924e-05, "loss": 0.3616, "step": 1170 }, { "epoch": 0.05, "grad_norm": 0.7520060512569802, "learning_rate": 1.997042698328888e-05, "loss": 0.3526, "step": 1171 }, { "epoch": 0.05, "grad_norm": 0.5684629660274684, "learning_rate": 1.997031252695619e-05, "loss": 0.2889, "step": 1172 }, { "epoch": 0.05, "grad_norm": 0.7526467351528564, "learning_rate": 1.9970197849890388e-05, "loss": 0.3293, "step": 1173 }, { "epoch": 0.05, "grad_norm": 0.5315589654161116, "learning_rate": 1.997008295209402e-05, "loss": 0.3593, "step": 1174 }, { "epoch": 0.05, "grad_norm": 0.5537541499354507, "learning_rate": 1.996996783356962e-05, "loss": 0.3049, "step": 1175 }, { "epoch": 0.05, "grad_norm": 0.8059848374189049, "learning_rate": 1.9969852494319743e-05, "loss": 0.4338, "step": 1176 }, { "epoch": 0.05, "grad_norm": 0.639588083045552, "learning_rate": 1.9969736934346945e-05, "loss": 0.2725, "step": 1177 }, { "epoch": 0.05, "grad_norm": 0.46054505164548587, "learning_rate": 1.9969621153653772e-05, "loss": 0.31, "step": 1178 }, { "epoch": 0.05, "grad_norm": 0.43197130870603695, "learning_rate": 1.9969505152242805e-05, "loss": 0.2088, "step": 1179 }, { "epoch": 0.05, "grad_norm": 1.2127338665901193, "learning_rate": 1.9969388930116596e-05, "loss": 0.6592, "step": 1180 }, { "epoch": 0.05, "grad_norm": 0.46409025597188236, "learning_rate": 1.9969272487277728e-05, "loss": 0.3495, "step": 1181 }, { "epoch": 0.05, "grad_norm": 0.44776772173672413, "learning_rate": 1.9969155823728772e-05, "loss": 0.3453, "step": 1182 }, { "epoch": 0.05, "grad_norm": 0.4168916081093098, "learning_rate": 1.9969038939472315e-05, "loss": 0.1991, "step": 1183 }, { "epoch": 0.05, "grad_norm": 0.3811016333317376, "learning_rate": 1.996892183451095e-05, "loss": 0.2652, "step": 1184 }, { "epoch": 0.05, "grad_norm": 1.3337985420000187, "learning_rate": 1.9968804508847256e-05, "loss": 0.6179, "step": 1185 }, { "epoch": 0.05, "grad_norm": 0.5655449980818164, "learning_rate": 1.9968686962483842e-05, "loss": 0.3613, "step": 1186 }, { "epoch": 0.05, "grad_norm": 0.5902771393544244, "learning_rate": 1.9968569195423307e-05, "loss": 0.3264, "step": 1187 }, { "epoch": 0.05, "grad_norm": 1.1272976638182726, "learning_rate": 1.9968451207668257e-05, "loss": 0.6194, "step": 1188 }, { "epoch": 0.05, "grad_norm": 0.5389020560484016, "learning_rate": 1.9968332999221306e-05, "loss": 0.3242, "step": 1189 }, { "epoch": 0.05, "grad_norm": 0.5691163887433358, "learning_rate": 1.996821457008507e-05, "loss": 0.1716, "step": 1190 }, { "epoch": 0.05, "grad_norm": 0.44446067140285683, "learning_rate": 1.996809592026217e-05, "loss": 0.2887, "step": 1191 }, { "epoch": 0.05, "grad_norm": 1.0470164185527266, "learning_rate": 1.9967977049755233e-05, "loss": 0.4804, "step": 1192 }, { "epoch": 0.05, "grad_norm": 0.5735372632845884, "learning_rate": 1.9967857958566893e-05, "loss": 0.3033, "step": 1193 }, { "epoch": 0.05, "grad_norm": 0.4896150687434479, "learning_rate": 1.996773864669978e-05, "loss": 0.3534, "step": 1194 }, { "epoch": 0.05, "grad_norm": 1.436543974309954, "learning_rate": 1.996761911415655e-05, "loss": 0.7047, "step": 1195 }, { "epoch": 0.05, "grad_norm": 0.4051021454899342, "learning_rate": 1.9967499360939833e-05, "loss": 0.1719, "step": 1196 }, { "epoch": 0.05, "grad_norm": 0.49399555124374717, "learning_rate": 1.996737938705229e-05, "loss": 0.3191, "step": 1197 }, { "epoch": 0.06, "grad_norm": 0.6825799557939733, "learning_rate": 1.996725919249657e-05, "loss": 0.4338, "step": 1198 }, { "epoch": 0.06, "grad_norm": 0.5032434668565547, "learning_rate": 1.996713877727534e-05, "loss": 0.3081, "step": 1199 }, { "epoch": 0.06, "grad_norm": 0.8112355102822437, "learning_rate": 1.9967018141391262e-05, "loss": 0.4496, "step": 1200 }, { "epoch": 0.06, "grad_norm": 0.7108022616565274, "learning_rate": 1.996689728484701e-05, "loss": 0.4164, "step": 1201 }, { "epoch": 0.06, "grad_norm": 0.3522073846522065, "learning_rate": 1.9966776207645263e-05, "loss": 0.2108, "step": 1202 }, { "epoch": 0.06, "grad_norm": 0.45129365969278645, "learning_rate": 1.996665490978869e-05, "loss": 0.2648, "step": 1203 }, { "epoch": 0.06, "grad_norm": 0.8470233341936348, "learning_rate": 1.9966533391279988e-05, "loss": 0.5805, "step": 1204 }, { "epoch": 0.06, "grad_norm": 0.5153136741122437, "learning_rate": 1.996641165212184e-05, "loss": 0.2479, "step": 1205 }, { "epoch": 0.06, "grad_norm": 0.5738378417603217, "learning_rate": 1.9966289692316944e-05, "loss": 0.3532, "step": 1206 }, { "epoch": 0.06, "grad_norm": 1.2502564440994315, "learning_rate": 1.9966167511868005e-05, "loss": 0.7154, "step": 1207 }, { "epoch": 0.06, "grad_norm": 0.5143350617149302, "learning_rate": 1.9966045110777718e-05, "loss": 0.2714, "step": 1208 }, { "epoch": 0.06, "grad_norm": 0.3699257819047486, "learning_rate": 1.9965922489048795e-05, "loss": 0.252, "step": 1209 }, { "epoch": 0.06, "grad_norm": 0.6633411906644198, "learning_rate": 1.9965799646683958e-05, "loss": 0.4369, "step": 1210 }, { "epoch": 0.06, "grad_norm": 1.1584443332841368, "learning_rate": 1.996567658368592e-05, "loss": 0.5005, "step": 1211 }, { "epoch": 0.06, "grad_norm": 0.515885224793534, "learning_rate": 1.996555330005741e-05, "loss": 0.2788, "step": 1212 }, { "epoch": 0.06, "grad_norm": 0.5443877693120539, "learning_rate": 1.9965429795801152e-05, "loss": 0.3646, "step": 1213 }, { "epoch": 0.06, "grad_norm": 0.8790145860331129, "learning_rate": 1.9965306070919883e-05, "loss": 0.3856, "step": 1214 }, { "epoch": 0.06, "grad_norm": 0.38804465056044746, "learning_rate": 1.9965182125416344e-05, "loss": 0.2972, "step": 1215 }, { "epoch": 0.06, "grad_norm": 0.5951957334833682, "learning_rate": 1.9965057959293275e-05, "loss": 0.3267, "step": 1216 }, { "epoch": 0.06, "grad_norm": 0.5252805011592172, "learning_rate": 1.9964933572553428e-05, "loss": 0.3398, "step": 1217 }, { "epoch": 0.06, "grad_norm": 0.5157599734716378, "learning_rate": 1.9964808965199557e-05, "loss": 0.3381, "step": 1218 }, { "epoch": 0.06, "grad_norm": 1.1431014920552056, "learning_rate": 1.996468413723442e-05, "loss": 0.5341, "step": 1219 }, { "epoch": 0.06, "grad_norm": 0.5841309783027583, "learning_rate": 1.996455908866078e-05, "loss": 0.4152, "step": 1220 }, { "epoch": 0.06, "grad_norm": 0.5453515610399624, "learning_rate": 1.9964433819481407e-05, "loss": 0.3707, "step": 1221 }, { "epoch": 0.06, "grad_norm": 0.3520302625232259, "learning_rate": 1.996430832969907e-05, "loss": 0.2273, "step": 1222 }, { "epoch": 0.06, "grad_norm": 0.7856306784416885, "learning_rate": 1.996418261931655e-05, "loss": 0.3456, "step": 1223 }, { "epoch": 0.06, "grad_norm": 0.5934987523065206, "learning_rate": 1.9964056688336636e-05, "loss": 0.3311, "step": 1224 }, { "epoch": 0.06, "grad_norm": 0.527845873764024, "learning_rate": 1.9963930536762105e-05, "loss": 0.3184, "step": 1225 }, { "epoch": 0.06, "grad_norm": 0.8184956370164702, "learning_rate": 1.996380416459576e-05, "loss": 0.475, "step": 1226 }, { "epoch": 0.06, "grad_norm": 0.4663782765782296, "learning_rate": 1.9963677571840386e-05, "loss": 0.3588, "step": 1227 }, { "epoch": 0.06, "grad_norm": 0.3800536653508172, "learning_rate": 1.99635507584988e-05, "loss": 0.1972, "step": 1228 }, { "epoch": 0.06, "grad_norm": 0.5452266758475576, "learning_rate": 1.99634237245738e-05, "loss": 0.3563, "step": 1229 }, { "epoch": 0.06, "grad_norm": 0.5494057860457114, "learning_rate": 1.9963296470068207e-05, "loss": 0.3135, "step": 1230 }, { "epoch": 0.06, "grad_norm": 0.9303041072959958, "learning_rate": 1.9963168994984827e-05, "loss": 0.5988, "step": 1231 }, { "epoch": 0.06, "grad_norm": 0.6055607481087258, "learning_rate": 1.9963041299326492e-05, "loss": 0.4, "step": 1232 }, { "epoch": 0.06, "grad_norm": 0.47677557574217383, "learning_rate": 1.9962913383096025e-05, "loss": 0.3105, "step": 1233 }, { "epoch": 0.06, "grad_norm": 0.7722442172118236, "learning_rate": 1.9962785246296253e-05, "loss": 0.414, "step": 1234 }, { "epoch": 0.06, "grad_norm": 0.33160680602751264, "learning_rate": 1.9962656888930023e-05, "loss": 0.1252, "step": 1235 }, { "epoch": 0.06, "grad_norm": 0.5972832031786504, "learning_rate": 1.9962528311000172e-05, "loss": 0.3753, "step": 1236 }, { "epoch": 0.06, "grad_norm": 0.5168366680825283, "learning_rate": 1.9962399512509544e-05, "loss": 0.3595, "step": 1237 }, { "epoch": 0.06, "grad_norm": 1.2684477070233244, "learning_rate": 1.996227049346099e-05, "loss": 0.4019, "step": 1238 }, { "epoch": 0.06, "grad_norm": 0.6127849174333978, "learning_rate": 1.9962141253857376e-05, "loss": 0.3589, "step": 1239 }, { "epoch": 0.06, "grad_norm": 0.5002453328543703, "learning_rate": 1.996201179370155e-05, "loss": 0.2551, "step": 1240 }, { "epoch": 0.06, "grad_norm": 0.39307595653253935, "learning_rate": 1.9961882112996387e-05, "loss": 0.2185, "step": 1241 }, { "epoch": 0.06, "grad_norm": 0.6800073989911231, "learning_rate": 1.9961752211744753e-05, "loss": 0.347, "step": 1242 }, { "epoch": 0.06, "grad_norm": 1.7181547448702197, "learning_rate": 1.996162208994953e-05, "loss": 0.6094, "step": 1243 }, { "epoch": 0.06, "grad_norm": 1.096282627015062, "learning_rate": 1.9961491747613594e-05, "loss": 0.5667, "step": 1244 }, { "epoch": 0.06, "grad_norm": 0.478931563960749, "learning_rate": 1.9961361184739832e-05, "loss": 0.2532, "step": 1245 }, { "epoch": 0.06, "grad_norm": 0.5158876102855414, "learning_rate": 1.9961230401331132e-05, "loss": 0.2678, "step": 1246 }, { "epoch": 0.06, "grad_norm": 0.7717896304223325, "learning_rate": 1.9961099397390395e-05, "loss": 0.2956, "step": 1247 }, { "epoch": 0.06, "grad_norm": 0.7745740952118951, "learning_rate": 1.9960968172920516e-05, "loss": 0.2651, "step": 1248 }, { "epoch": 0.06, "grad_norm": 1.3475506089916525, "learning_rate": 1.9960836727924403e-05, "loss": 0.4069, "step": 1249 }, { "epoch": 0.06, "grad_norm": 2.3245086561547255, "learning_rate": 1.9960705062404963e-05, "loss": 0.6258, "step": 1250 }, { "epoch": 0.06, "grad_norm": 0.43870123509861536, "learning_rate": 1.9960573176365114e-05, "loss": 0.2467, "step": 1251 }, { "epoch": 0.06, "grad_norm": 1.3441387246331329, "learning_rate": 1.9960441069807778e-05, "loss": 0.6879, "step": 1252 }, { "epoch": 0.06, "grad_norm": 0.4774527582591661, "learning_rate": 1.996030874273587e-05, "loss": 0.2664, "step": 1253 }, { "epoch": 0.06, "grad_norm": 0.8850089900335283, "learning_rate": 1.9960176195152336e-05, "loss": 0.2881, "step": 1254 }, { "epoch": 0.06, "grad_norm": 2.5999705037944874, "learning_rate": 1.9960043427060093e-05, "loss": 0.6018, "step": 1255 }, { "epoch": 0.06, "grad_norm": 1.3197289026862369, "learning_rate": 1.995991043846209e-05, "loss": 0.4981, "step": 1256 }, { "epoch": 0.06, "grad_norm": 0.568404623518023, "learning_rate": 1.995977722936127e-05, "loss": 0.3411, "step": 1257 }, { "epoch": 0.06, "grad_norm": 0.7496475426077246, "learning_rate": 1.9959643799760583e-05, "loss": 0.3459, "step": 1258 }, { "epoch": 0.06, "grad_norm": 0.5640299892939787, "learning_rate": 1.9959510149662978e-05, "loss": 0.2973, "step": 1259 }, { "epoch": 0.06, "grad_norm": 0.6051759840439525, "learning_rate": 1.9959376279071422e-05, "loss": 0.3291, "step": 1260 }, { "epoch": 0.06, "grad_norm": 1.29615684564551, "learning_rate": 1.995924218798887e-05, "loss": 0.3918, "step": 1261 }, { "epoch": 0.06, "grad_norm": 0.6555339729114142, "learning_rate": 1.9959107876418295e-05, "loss": 0.2988, "step": 1262 }, { "epoch": 0.06, "grad_norm": 0.6154034643074809, "learning_rate": 1.9958973344362674e-05, "loss": 0.304, "step": 1263 }, { "epoch": 0.06, "grad_norm": 0.8794967563409657, "learning_rate": 1.995883859182498e-05, "loss": 0.3652, "step": 1264 }, { "epoch": 0.06, "grad_norm": 0.5332218584085938, "learning_rate": 1.9958703618808198e-05, "loss": 0.2899, "step": 1265 }, { "epoch": 0.06, "grad_norm": 0.5896183731518845, "learning_rate": 1.9958568425315316e-05, "loss": 0.3349, "step": 1266 }, { "epoch": 0.06, "grad_norm": 0.8506147131814346, "learning_rate": 1.9958433011349325e-05, "loss": 0.5098, "step": 1267 }, { "epoch": 0.06, "grad_norm": 0.4929205925320681, "learning_rate": 1.9958297376913226e-05, "loss": 0.3107, "step": 1268 }, { "epoch": 0.06, "grad_norm": 0.4779271708323994, "learning_rate": 1.9958161522010022e-05, "loss": 0.2827, "step": 1269 }, { "epoch": 0.06, "grad_norm": 1.7930503818602477, "learning_rate": 1.9958025446642722e-05, "loss": 0.6931, "step": 1270 }, { "epoch": 0.06, "grad_norm": 1.2154969649836658, "learning_rate": 1.9957889150814332e-05, "loss": 0.481, "step": 1271 }, { "epoch": 0.06, "grad_norm": 0.5080763311824805, "learning_rate": 1.9957752634527877e-05, "loss": 0.3212, "step": 1272 }, { "epoch": 0.06, "grad_norm": 0.6730797743213048, "learning_rate": 1.9957615897786377e-05, "loss": 0.3944, "step": 1273 }, { "epoch": 0.06, "grad_norm": 0.3214645245013888, "learning_rate": 1.9957478940592852e-05, "loss": 0.1526, "step": 1274 }, { "epoch": 0.06, "grad_norm": 0.5740035016500189, "learning_rate": 1.9957341762950346e-05, "loss": 0.3094, "step": 1275 }, { "epoch": 0.06, "grad_norm": 0.7623673966363658, "learning_rate": 1.9957204364861892e-05, "loss": 0.4458, "step": 1276 }, { "epoch": 0.06, "grad_norm": 0.5685605689429389, "learning_rate": 1.9957066746330524e-05, "loss": 0.335, "step": 1277 }, { "epoch": 0.06, "grad_norm": 0.5057445049492054, "learning_rate": 1.99569289073593e-05, "loss": 0.3326, "step": 1278 }, { "epoch": 0.06, "grad_norm": 1.1787415254414606, "learning_rate": 1.9956790847951264e-05, "loss": 0.614, "step": 1279 }, { "epoch": 0.06, "grad_norm": 0.33658493100232073, "learning_rate": 1.9956652568109477e-05, "loss": 0.2038, "step": 1280 }, { "epoch": 0.06, "grad_norm": 0.5516853961866517, "learning_rate": 1.9956514067836994e-05, "loss": 0.2721, "step": 1281 }, { "epoch": 0.06, "grad_norm": 1.0924449694984035, "learning_rate": 1.9956375347136893e-05, "loss": 0.5797, "step": 1282 }, { "epoch": 0.06, "grad_norm": 0.654942123347561, "learning_rate": 1.9956236406012232e-05, "loss": 0.5146, "step": 1283 }, { "epoch": 0.06, "grad_norm": 0.45894806726904186, "learning_rate": 1.9956097244466094e-05, "loss": 0.2759, "step": 1284 }, { "epoch": 0.06, "grad_norm": 0.6541774053942259, "learning_rate": 1.995595786250156e-05, "loss": 0.3758, "step": 1285 }, { "epoch": 0.06, "grad_norm": 0.5299085439080724, "learning_rate": 1.9955818260121716e-05, "loss": 0.2107, "step": 1286 }, { "epoch": 0.06, "grad_norm": 0.406442686592732, "learning_rate": 1.995567843732965e-05, "loss": 0.2077, "step": 1287 }, { "epoch": 0.06, "grad_norm": 1.2376583449428011, "learning_rate": 1.995553839412846e-05, "loss": 0.5901, "step": 1288 }, { "epoch": 0.06, "grad_norm": 0.5412765152955386, "learning_rate": 1.995539813052124e-05, "loss": 0.4048, "step": 1289 }, { "epoch": 0.06, "grad_norm": 0.47036725375015975, "learning_rate": 1.9955257646511105e-05, "loss": 0.2542, "step": 1290 }, { "epoch": 0.06, "grad_norm": 1.798913404077829, "learning_rate": 1.995511694210116e-05, "loss": 0.8552, "step": 1291 }, { "epoch": 0.06, "grad_norm": 0.5073032030697933, "learning_rate": 1.995497601729452e-05, "loss": 0.3683, "step": 1292 }, { "epoch": 0.06, "grad_norm": 0.38696405464427724, "learning_rate": 1.9954834872094308e-05, "loss": 0.1622, "step": 1293 }, { "epoch": 0.06, "grad_norm": 0.5258666353465125, "learning_rate": 1.9954693506503644e-05, "loss": 0.315, "step": 1294 }, { "epoch": 0.06, "grad_norm": 1.124654269885573, "learning_rate": 1.9954551920525662e-05, "loss": 0.5263, "step": 1295 }, { "epoch": 0.06, "grad_norm": 0.7879964457044714, "learning_rate": 1.9954410114163494e-05, "loss": 0.3652, "step": 1296 }, { "epoch": 0.06, "grad_norm": 0.45746256424010084, "learning_rate": 1.995426808742028e-05, "loss": 0.2989, "step": 1297 }, { "epoch": 0.06, "grad_norm": 0.3600814320667932, "learning_rate": 1.9954125840299165e-05, "loss": 0.1781, "step": 1298 }, { "epoch": 0.06, "grad_norm": 0.5763941836749205, "learning_rate": 1.99539833728033e-05, "loss": 0.3014, "step": 1299 }, { "epoch": 0.06, "grad_norm": 0.5026134104003527, "learning_rate": 1.995384068493583e-05, "loss": 0.371, "step": 1300 }, { "epoch": 0.06, "grad_norm": 0.5890066865568168, "learning_rate": 1.9953697776699926e-05, "loss": 0.3957, "step": 1301 }, { "epoch": 0.06, "grad_norm": 0.4951438894740915, "learning_rate": 1.9953554648098748e-05, "loss": 0.3129, "step": 1302 }, { "epoch": 0.06, "grad_norm": 0.6959817049180712, "learning_rate": 1.995341129913546e-05, "loss": 0.3745, "step": 1303 }, { "epoch": 0.06, "grad_norm": 0.5105233304054243, "learning_rate": 1.995326772981324e-05, "loss": 0.3345, "step": 1304 }, { "epoch": 0.06, "grad_norm": 0.40858323744546404, "learning_rate": 1.9953123940135265e-05, "loss": 0.2513, "step": 1305 }, { "epoch": 0.06, "grad_norm": 0.48725681968798995, "learning_rate": 1.995297993010472e-05, "loss": 0.3861, "step": 1306 }, { "epoch": 0.06, "grad_norm": 0.4517315042238337, "learning_rate": 1.9952835699724796e-05, "loss": 0.2908, "step": 1307 }, { "epoch": 0.06, "grad_norm": 0.427422462477044, "learning_rate": 1.9952691248998676e-05, "loss": 0.296, "step": 1308 }, { "epoch": 0.06, "grad_norm": 0.5576014079317251, "learning_rate": 1.995254657792957e-05, "loss": 0.3494, "step": 1309 }, { "epoch": 0.06, "grad_norm": 0.4068060189230465, "learning_rate": 1.995240168652067e-05, "loss": 0.1495, "step": 1310 }, { "epoch": 0.06, "grad_norm": 0.5180145317515017, "learning_rate": 1.995225657477519e-05, "loss": 0.3435, "step": 1311 }, { "epoch": 0.06, "grad_norm": 0.5316091913828168, "learning_rate": 1.995211124269634e-05, "loss": 0.4337, "step": 1312 }, { "epoch": 0.06, "grad_norm": 0.37967668038502944, "learning_rate": 1.9951965690287344e-05, "loss": 0.2661, "step": 1313 }, { "epoch": 0.06, "grad_norm": 0.7427038515583471, "learning_rate": 1.9951819917551418e-05, "loss": 0.363, "step": 1314 }, { "epoch": 0.06, "grad_norm": 0.6165636373261265, "learning_rate": 1.995167392449179e-05, "loss": 0.3872, "step": 1315 }, { "epoch": 0.06, "grad_norm": 0.41667590799126847, "learning_rate": 1.9951527711111692e-05, "loss": 0.3138, "step": 1316 }, { "epoch": 0.06, "grad_norm": 0.6860358935750404, "learning_rate": 1.995138127741436e-05, "loss": 0.3853, "step": 1317 }, { "epoch": 0.06, "grad_norm": 0.49486025768093755, "learning_rate": 1.995123462340304e-05, "loss": 0.3889, "step": 1318 }, { "epoch": 0.06, "grad_norm": 0.3075310113082781, "learning_rate": 1.995108774908098e-05, "loss": 0.1049, "step": 1319 }, { "epoch": 0.06, "grad_norm": 0.4416736251717402, "learning_rate": 1.9950940654451423e-05, "loss": 0.2971, "step": 1320 }, { "epoch": 0.06, "grad_norm": 0.5406353233958303, "learning_rate": 1.9950793339517632e-05, "loss": 0.3685, "step": 1321 }, { "epoch": 0.06, "grad_norm": 1.013042561872607, "learning_rate": 1.9950645804282867e-05, "loss": 0.6152, "step": 1322 }, { "epoch": 0.06, "grad_norm": 0.41939724424822994, "learning_rate": 1.9950498048750398e-05, "loss": 0.3398, "step": 1323 }, { "epoch": 0.06, "grad_norm": 0.5424359787162983, "learning_rate": 1.9950350072923487e-05, "loss": 0.3816, "step": 1324 }, { "epoch": 0.06, "grad_norm": 0.3701024490172656, "learning_rate": 1.995020187680542e-05, "loss": 0.2179, "step": 1325 }, { "epoch": 0.06, "grad_norm": 0.4756271645305226, "learning_rate": 1.9950053460399472e-05, "loss": 0.2305, "step": 1326 }, { "epoch": 0.06, "grad_norm": 0.7042955325013491, "learning_rate": 1.994990482370893e-05, "loss": 0.4529, "step": 1327 }, { "epoch": 0.06, "grad_norm": 0.4481337085040498, "learning_rate": 1.9949755966737087e-05, "loss": 0.3656, "step": 1328 }, { "epoch": 0.06, "grad_norm": 0.7429431662803592, "learning_rate": 1.9949606889487234e-05, "loss": 0.3592, "step": 1329 }, { "epoch": 0.06, "grad_norm": 0.6292078347675322, "learning_rate": 1.9949457591962675e-05, "loss": 0.3749, "step": 1330 }, { "epoch": 0.06, "grad_norm": 0.3988898580370855, "learning_rate": 1.9949308074166714e-05, "loss": 0.2006, "step": 1331 }, { "epoch": 0.06, "grad_norm": 0.4739221449065497, "learning_rate": 1.9949158336102664e-05, "loss": 0.3152, "step": 1332 }, { "epoch": 0.06, "grad_norm": 0.5057646602244956, "learning_rate": 1.9949008377773835e-05, "loss": 0.284, "step": 1333 }, { "epoch": 0.06, "grad_norm": 0.7320121911521591, "learning_rate": 1.9948858199183548e-05, "loss": 0.5449, "step": 1334 }, { "epoch": 0.06, "grad_norm": 0.915663331664433, "learning_rate": 1.9948707800335133e-05, "loss": 0.4888, "step": 1335 }, { "epoch": 0.06, "grad_norm": 0.44308116410686177, "learning_rate": 1.9948557181231915e-05, "loss": 0.2774, "step": 1336 }, { "epoch": 0.06, "grad_norm": 0.4888480715509671, "learning_rate": 1.994840634187723e-05, "loss": 0.2911, "step": 1337 }, { "epoch": 0.06, "grad_norm": 0.4952590136618777, "learning_rate": 1.9948255282274414e-05, "loss": 0.3024, "step": 1338 }, { "epoch": 0.06, "grad_norm": 0.44755179272388174, "learning_rate": 1.9948104002426814e-05, "loss": 0.3281, "step": 1339 }, { "epoch": 0.06, "grad_norm": 0.4654316373348151, "learning_rate": 1.9947952502337783e-05, "loss": 0.3495, "step": 1340 }, { "epoch": 0.06, "grad_norm": 0.6241173653307627, "learning_rate": 1.9947800782010672e-05, "loss": 0.3984, "step": 1341 }, { "epoch": 0.06, "grad_norm": 0.5647403277861793, "learning_rate": 1.994764884144884e-05, "loss": 0.2754, "step": 1342 }, { "epoch": 0.06, "grad_norm": 0.4318564591793054, "learning_rate": 1.9947496680655643e-05, "loss": 0.2167, "step": 1343 }, { "epoch": 0.06, "grad_norm": 0.42741855314127203, "learning_rate": 1.9947344299634464e-05, "loss": 0.2985, "step": 1344 }, { "epoch": 0.06, "grad_norm": 0.6073580058069129, "learning_rate": 1.9947191698388667e-05, "loss": 0.3502, "step": 1345 }, { "epoch": 0.06, "grad_norm": 0.7933908596241529, "learning_rate": 1.9947038876921634e-05, "loss": 0.4735, "step": 1346 }, { "epoch": 0.06, "grad_norm": 0.6761816913979622, "learning_rate": 1.9946885835236746e-05, "loss": 0.3859, "step": 1347 }, { "epoch": 0.06, "grad_norm": 0.5207255678585393, "learning_rate": 1.9946732573337396e-05, "loss": 0.3227, "step": 1348 }, { "epoch": 0.06, "grad_norm": 0.46105928182721706, "learning_rate": 1.994657909122697e-05, "loss": 0.2337, "step": 1349 }, { "epoch": 0.06, "grad_norm": 1.2304705823078474, "learning_rate": 1.994642538890887e-05, "loss": 0.6115, "step": 1350 }, { "epoch": 0.06, "grad_norm": 0.453079955860743, "learning_rate": 1.9946271466386498e-05, "loss": 0.3136, "step": 1351 }, { "epoch": 0.06, "grad_norm": 0.363419597438667, "learning_rate": 1.9946117323663265e-05, "loss": 0.2992, "step": 1352 }, { "epoch": 0.06, "grad_norm": 0.8591501316939716, "learning_rate": 1.9945962960742578e-05, "loss": 0.4189, "step": 1353 }, { "epoch": 0.06, "grad_norm": 0.5160097384483079, "learning_rate": 1.9945808377627857e-05, "loss": 0.3161, "step": 1354 }, { "epoch": 0.06, "grad_norm": 1.0901912471437143, "learning_rate": 1.9945653574322527e-05, "loss": 0.2919, "step": 1355 }, { "epoch": 0.06, "grad_norm": 0.4962465559181494, "learning_rate": 1.994549855083001e-05, "loss": 0.3596, "step": 1356 }, { "epoch": 0.06, "grad_norm": 0.5056413382856962, "learning_rate": 1.994534330715374e-05, "loss": 0.3541, "step": 1357 }, { "epoch": 0.06, "grad_norm": 1.0663421784113676, "learning_rate": 1.9945187843297157e-05, "loss": 0.6049, "step": 1358 }, { "epoch": 0.06, "grad_norm": 0.3707171474832705, "learning_rate": 1.9945032159263702e-05, "loss": 0.1753, "step": 1359 }, { "epoch": 0.06, "grad_norm": 0.5170023769837524, "learning_rate": 1.9944876255056817e-05, "loss": 0.3105, "step": 1360 }, { "epoch": 0.06, "grad_norm": 1.4738522844014403, "learning_rate": 1.994472013067996e-05, "loss": 0.5927, "step": 1361 }, { "epoch": 0.06, "grad_norm": 0.5351778052632282, "learning_rate": 1.9944563786136578e-05, "loss": 0.3517, "step": 1362 }, { "epoch": 0.06, "grad_norm": 0.5659327296889876, "learning_rate": 1.9944407221430144e-05, "loss": 0.384, "step": 1363 }, { "epoch": 0.06, "grad_norm": 0.5874721856277222, "learning_rate": 1.9944250436564116e-05, "loss": 0.4253, "step": 1364 }, { "epoch": 0.06, "grad_norm": 0.4516469260065655, "learning_rate": 1.994409343154197e-05, "loss": 0.0793, "step": 1365 }, { "epoch": 0.06, "grad_norm": 0.6171126877631664, "learning_rate": 1.9943936206367176e-05, "loss": 0.3698, "step": 1366 }, { "epoch": 0.06, "grad_norm": 1.4353796430746342, "learning_rate": 1.9943778761043223e-05, "loss": 0.6095, "step": 1367 }, { "epoch": 0.06, "grad_norm": 0.44048046571661476, "learning_rate": 1.9943621095573588e-05, "loss": 0.3495, "step": 1368 }, { "epoch": 0.06, "grad_norm": 0.506301107956799, "learning_rate": 1.9943463209961767e-05, "loss": 0.3212, "step": 1369 }, { "epoch": 0.06, "grad_norm": 0.5019659309405168, "learning_rate": 1.9943305104211256e-05, "loss": 0.3082, "step": 1370 }, { "epoch": 0.06, "grad_norm": 0.5159478238754939, "learning_rate": 1.9943146778325553e-05, "loss": 0.2569, "step": 1371 }, { "epoch": 0.06, "grad_norm": 0.48151152268807706, "learning_rate": 1.9942988232308163e-05, "loss": 0.2661, "step": 1372 }, { "epoch": 0.06, "grad_norm": 1.1273418440273861, "learning_rate": 1.9942829466162595e-05, "loss": 0.5935, "step": 1373 }, { "epoch": 0.06, "grad_norm": 0.9127036204733932, "learning_rate": 1.9942670479892367e-05, "loss": 0.5675, "step": 1374 }, { "epoch": 0.06, "grad_norm": 0.44651839490263934, "learning_rate": 1.9942511273500997e-05, "loss": 0.2486, "step": 1375 }, { "epoch": 0.06, "grad_norm": 0.5439851095740903, "learning_rate": 1.9942351846992012e-05, "loss": 0.3753, "step": 1376 }, { "epoch": 0.06, "grad_norm": 0.4251608321252265, "learning_rate": 1.994219220036894e-05, "loss": 0.1818, "step": 1377 }, { "epoch": 0.06, "grad_norm": 0.4742163618692392, "learning_rate": 1.994203233363531e-05, "loss": 0.2269, "step": 1378 }, { "epoch": 0.06, "grad_norm": 2.0607449153008806, "learning_rate": 1.994187224679467e-05, "loss": 0.5876, "step": 1379 }, { "epoch": 0.06, "grad_norm": 0.6541229335899382, "learning_rate": 1.9941711939850563e-05, "loss": 0.4012, "step": 1380 }, { "epoch": 0.06, "grad_norm": 0.5420574076321603, "learning_rate": 1.9941551412806533e-05, "loss": 0.2398, "step": 1381 }, { "epoch": 0.06, "grad_norm": 1.8417737989564364, "learning_rate": 1.9941390665666135e-05, "loss": 0.7334, "step": 1382 }, { "epoch": 0.06, "grad_norm": 0.4411964092154671, "learning_rate": 1.994122969843293e-05, "loss": 0.2422, "step": 1383 }, { "epoch": 0.06, "grad_norm": 0.6355271912700533, "learning_rate": 1.9941068511110485e-05, "loss": 0.324, "step": 1384 }, { "epoch": 0.06, "grad_norm": 2.1290160257842237, "learning_rate": 1.994090710370236e-05, "loss": 0.4236, "step": 1385 }, { "epoch": 0.06, "grad_norm": 1.4768260632328456, "learning_rate": 1.9940745476212135e-05, "loss": 0.6352, "step": 1386 }, { "epoch": 0.06, "grad_norm": 0.4683855589913403, "learning_rate": 1.9940583628643385e-05, "loss": 0.2897, "step": 1387 }, { "epoch": 0.06, "grad_norm": 0.6605832940412394, "learning_rate": 1.9940421560999693e-05, "loss": 0.3126, "step": 1388 }, { "epoch": 0.06, "grad_norm": 0.4215509151095274, "learning_rate": 1.994025927328465e-05, "loss": 0.1975, "step": 1389 }, { "epoch": 0.06, "grad_norm": 0.6943782380010035, "learning_rate": 1.994009676550185e-05, "loss": 0.2953, "step": 1390 }, { "epoch": 0.06, "grad_norm": 1.5067796441086032, "learning_rate": 1.9939934037654885e-05, "loss": 0.4328, "step": 1391 }, { "epoch": 0.06, "grad_norm": 4.195905971471969, "learning_rate": 1.9939771089747358e-05, "loss": 0.4647, "step": 1392 }, { "epoch": 0.06, "grad_norm": 0.5339538520452175, "learning_rate": 1.9939607921782884e-05, "loss": 0.3157, "step": 1393 }, { "epoch": 0.06, "grad_norm": 1.8817361852608496, "learning_rate": 1.993944453376507e-05, "loss": 0.5078, "step": 1394 }, { "epoch": 0.06, "grad_norm": 0.3657077680941286, "learning_rate": 1.993928092569753e-05, "loss": 0.237, "step": 1395 }, { "epoch": 0.06, "grad_norm": 0.9695611371347761, "learning_rate": 1.9939117097583894e-05, "loss": 0.2744, "step": 1396 }, { "epoch": 0.06, "grad_norm": 1.9459512517195656, "learning_rate": 1.9938953049427782e-05, "loss": 0.5594, "step": 1397 }, { "epoch": 0.06, "grad_norm": 2.8070238056153074, "learning_rate": 1.9938788781232833e-05, "loss": 0.5795, "step": 1398 }, { "epoch": 0.06, "grad_norm": 0.6498849817484506, "learning_rate": 1.9938624293002674e-05, "loss": 0.3326, "step": 1399 }, { "epoch": 0.06, "grad_norm": 0.686408096844314, "learning_rate": 1.9938459584740955e-05, "loss": 0.351, "step": 1400 }, { "epoch": 0.06, "grad_norm": 0.8540767453773233, "learning_rate": 1.993829465645132e-05, "loss": 0.3163, "step": 1401 }, { "epoch": 0.06, "grad_norm": 0.5608648450411757, "learning_rate": 1.9938129508137417e-05, "loss": 0.3095, "step": 1402 }, { "epoch": 0.06, "grad_norm": 0.7857766805375322, "learning_rate": 1.993796413980291e-05, "loss": 0.3517, "step": 1403 }, { "epoch": 0.06, "grad_norm": 0.6758768420556903, "learning_rate": 1.993779855145145e-05, "loss": 0.2749, "step": 1404 }, { "epoch": 0.06, "grad_norm": 0.5500003071239046, "learning_rate": 1.9937632743086712e-05, "loss": 0.3197, "step": 1405 }, { "epoch": 0.06, "grad_norm": 1.9210820689038117, "learning_rate": 1.993746671471236e-05, "loss": 0.7352, "step": 1406 }, { "epoch": 0.06, "grad_norm": 0.5820966766615646, "learning_rate": 1.9937300466332078e-05, "loss": 0.3374, "step": 1407 }, { "epoch": 0.06, "grad_norm": 0.4649022836543558, "learning_rate": 1.993713399794954e-05, "loss": 0.3109, "step": 1408 }, { "epoch": 0.06, "grad_norm": 0.5155670330597211, "learning_rate": 1.9936967309568427e-05, "loss": 0.2718, "step": 1409 }, { "epoch": 0.06, "grad_norm": 1.5081446333829827, "learning_rate": 1.993680040119244e-05, "loss": 0.7935, "step": 1410 }, { "epoch": 0.06, "grad_norm": 0.5975098847159008, "learning_rate": 1.993663327282527e-05, "loss": 0.2451, "step": 1411 }, { "epoch": 0.06, "grad_norm": 0.7221236999671103, "learning_rate": 1.9936465924470612e-05, "loss": 0.3723, "step": 1412 }, { "epoch": 0.06, "grad_norm": 1.199042087144706, "learning_rate": 1.993629835613218e-05, "loss": 0.5797, "step": 1413 }, { "epoch": 0.06, "grad_norm": 0.5513120312953381, "learning_rate": 1.9936130567813675e-05, "loss": 0.3087, "step": 1414 }, { "epoch": 0.07, "grad_norm": 0.603239125871396, "learning_rate": 1.9935962559518817e-05, "loss": 0.298, "step": 1415 }, { "epoch": 0.07, "grad_norm": 0.6072141420920694, "learning_rate": 1.993579433125133e-05, "loss": 0.3598, "step": 1416 }, { "epoch": 0.07, "grad_norm": 0.5124345274165617, "learning_rate": 1.9935625883014925e-05, "loss": 0.2189, "step": 1417 }, { "epoch": 0.07, "grad_norm": 0.7524818405958181, "learning_rate": 1.9935457214813344e-05, "loss": 0.4685, "step": 1418 }, { "epoch": 0.07, "grad_norm": 0.6688746450540434, "learning_rate": 1.9935288326650314e-05, "loss": 0.4157, "step": 1419 }, { "epoch": 0.07, "grad_norm": 0.5529417729383638, "learning_rate": 1.9935119218529574e-05, "loss": 0.2666, "step": 1420 }, { "epoch": 0.07, "grad_norm": 0.5216176923810515, "learning_rate": 1.9934949890454877e-05, "loss": 0.2659, "step": 1421 }, { "epoch": 0.07, "grad_norm": 0.8265677056398746, "learning_rate": 1.993478034242996e-05, "loss": 0.3322, "step": 1422 }, { "epoch": 0.07, "grad_norm": 0.4760150613711586, "learning_rate": 1.993461057445858e-05, "loss": 0.3288, "step": 1423 }, { "epoch": 0.07, "grad_norm": 0.48461423825758154, "learning_rate": 1.9934440586544498e-05, "loss": 0.3287, "step": 1424 }, { "epoch": 0.07, "grad_norm": 0.8964440325041704, "learning_rate": 1.9934270378691478e-05, "loss": 0.5651, "step": 1425 }, { "epoch": 0.07, "grad_norm": 0.5473409725961967, "learning_rate": 1.9934099950903286e-05, "loss": 0.3215, "step": 1426 }, { "epoch": 0.07, "grad_norm": 0.39444844342964397, "learning_rate": 1.9933929303183695e-05, "loss": 0.2279, "step": 1427 }, { "epoch": 0.07, "grad_norm": 0.4616044700934544, "learning_rate": 1.9933758435536485e-05, "loss": 0.2847, "step": 1428 }, { "epoch": 0.07, "grad_norm": 0.5396450467791876, "learning_rate": 1.9933587347965437e-05, "loss": 0.3402, "step": 1429 }, { "epoch": 0.07, "grad_norm": 0.6030172337779957, "learning_rate": 1.993341604047434e-05, "loss": 0.3584, "step": 1430 }, { "epoch": 0.07, "grad_norm": 0.5031896289017802, "learning_rate": 1.9933244513066983e-05, "loss": 0.417, "step": 1431 }, { "epoch": 0.07, "grad_norm": 0.5594265126655634, "learning_rate": 1.9933072765747167e-05, "loss": 0.3136, "step": 1432 }, { "epoch": 0.07, "grad_norm": 0.6178087688788707, "learning_rate": 1.99329007985187e-05, "loss": 0.3099, "step": 1433 }, { "epoch": 0.07, "grad_norm": 0.3741865565321857, "learning_rate": 1.9932728611385376e-05, "loss": 0.2447, "step": 1434 }, { "epoch": 0.07, "grad_norm": 0.4776112652831589, "learning_rate": 1.993255620435101e-05, "loss": 0.3269, "step": 1435 }, { "epoch": 0.07, "grad_norm": 0.49017365764501275, "learning_rate": 1.9932383577419432e-05, "loss": 0.3511, "step": 1436 }, { "epoch": 0.07, "grad_norm": 0.6691951883879022, "learning_rate": 1.993221073059445e-05, "loss": 0.4507, "step": 1437 }, { "epoch": 0.07, "grad_norm": 0.6986990730730509, "learning_rate": 1.9932037663879897e-05, "loss": 0.3707, "step": 1438 }, { "epoch": 0.07, "grad_norm": 0.49199227274235935, "learning_rate": 1.99318643772796e-05, "loss": 0.3571, "step": 1439 }, { "epoch": 0.07, "grad_norm": 0.34763800920784327, "learning_rate": 1.99316908707974e-05, "loss": 0.1926, "step": 1440 }, { "epoch": 0.07, "grad_norm": 0.603460983873073, "learning_rate": 1.9931517144437136e-05, "loss": 0.374, "step": 1441 }, { "epoch": 0.07, "grad_norm": 0.5006650612381522, "learning_rate": 1.9931343198202655e-05, "loss": 0.414, "step": 1442 }, { "epoch": 0.07, "grad_norm": 0.5237408236945418, "learning_rate": 1.9931169032097807e-05, "loss": 0.3036, "step": 1443 }, { "epoch": 0.07, "grad_norm": 0.7634324151572106, "learning_rate": 1.993099464612645e-05, "loss": 0.3371, "step": 1444 }, { "epoch": 0.07, "grad_norm": 0.5353228489857171, "learning_rate": 1.993082004029244e-05, "loss": 0.3511, "step": 1445 }, { "epoch": 0.07, "grad_norm": 0.3745865828741059, "learning_rate": 1.9930645214599648e-05, "loss": 0.1723, "step": 1446 }, { "epoch": 0.07, "grad_norm": 0.40050656205923973, "learning_rate": 1.993047016905194e-05, "loss": 0.3005, "step": 1447 }, { "epoch": 0.07, "grad_norm": 0.6434613549824244, "learning_rate": 1.9930294903653195e-05, "loss": 0.4429, "step": 1448 }, { "epoch": 0.07, "grad_norm": 0.5421359110896032, "learning_rate": 1.9930119418407296e-05, "loss": 0.3722, "step": 1449 }, { "epoch": 0.07, "grad_norm": 0.44766153773917966, "learning_rate": 1.992994371331812e-05, "loss": 0.2364, "step": 1450 }, { "epoch": 0.07, "grad_norm": 0.5746163874371876, "learning_rate": 1.992976778838956e-05, "loss": 0.3713, "step": 1451 }, { "epoch": 0.07, "grad_norm": 0.6913309350112251, "learning_rate": 1.9929591643625512e-05, "loss": 0.4251, "step": 1452 }, { "epoch": 0.07, "grad_norm": 0.7273438091097474, "learning_rate": 1.9929415279029875e-05, "loss": 0.3806, "step": 1453 }, { "epoch": 0.07, "grad_norm": 0.5013361599907687, "learning_rate": 1.9929238694606556e-05, "loss": 0.3458, "step": 1454 }, { "epoch": 0.07, "grad_norm": 0.3755126757137734, "learning_rate": 1.9929061890359457e-05, "loss": 0.2421, "step": 1455 }, { "epoch": 0.07, "grad_norm": 0.5256354272483413, "learning_rate": 1.9928884866292502e-05, "loss": 0.2143, "step": 1456 }, { "epoch": 0.07, "grad_norm": 0.5697943591646129, "learning_rate": 1.9928707622409605e-05, "loss": 0.3685, "step": 1457 }, { "epoch": 0.07, "grad_norm": 0.6684369488372943, "learning_rate": 1.9928530158714692e-05, "loss": 0.4263, "step": 1458 }, { "epoch": 0.07, "grad_norm": 0.6262869460056422, "learning_rate": 1.992835247521169e-05, "loss": 0.3949, "step": 1459 }, { "epoch": 0.07, "grad_norm": 0.4468050847767522, "learning_rate": 1.992817457190453e-05, "loss": 0.3098, "step": 1460 }, { "epoch": 0.07, "grad_norm": 0.3950713564536097, "learning_rate": 1.9927996448797157e-05, "loss": 0.1835, "step": 1461 }, { "epoch": 0.07, "grad_norm": 0.9146197308180377, "learning_rate": 1.992781810589351e-05, "loss": 0.4685, "step": 1462 }, { "epoch": 0.07, "grad_norm": 0.4591940260608514, "learning_rate": 1.992763954319754e-05, "loss": 0.2703, "step": 1463 }, { "epoch": 0.07, "grad_norm": 1.0069955891120153, "learning_rate": 1.9927460760713198e-05, "loss": 0.6176, "step": 1464 }, { "epoch": 0.07, "grad_norm": 0.6308389150493487, "learning_rate": 1.992728175844444e-05, "loss": 0.4691, "step": 1465 }, { "epoch": 0.07, "grad_norm": 0.5517675743027622, "learning_rate": 1.992710253639524e-05, "loss": 0.2532, "step": 1466 }, { "epoch": 0.07, "grad_norm": 0.4074979338072206, "learning_rate": 1.992692309456955e-05, "loss": 0.3079, "step": 1467 }, { "epoch": 0.07, "grad_norm": 0.521291902171804, "learning_rate": 1.9926743432971355e-05, "loss": 0.237, "step": 1468 }, { "epoch": 0.07, "grad_norm": 0.6524320103631052, "learning_rate": 1.9926563551604622e-05, "loss": 0.2853, "step": 1469 }, { "epoch": 0.07, "grad_norm": 0.9386537246669457, "learning_rate": 1.9926383450473344e-05, "loss": 0.4479, "step": 1470 }, { "epoch": 0.07, "grad_norm": 0.610611819142604, "learning_rate": 1.9926203129581503e-05, "loss": 0.3963, "step": 1471 }, { "epoch": 0.07, "grad_norm": 0.5208484063746759, "learning_rate": 1.9926022588933093e-05, "loss": 0.3101, "step": 1472 }, { "epoch": 0.07, "grad_norm": 0.3787873873359929, "learning_rate": 1.9925841828532108e-05, "loss": 0.1316, "step": 1473 }, { "epoch": 0.07, "grad_norm": 0.6247565233932612, "learning_rate": 1.9925660848382554e-05, "loss": 0.3647, "step": 1474 }, { "epoch": 0.07, "grad_norm": 0.4865431538527934, "learning_rate": 1.992547964848843e-05, "loss": 0.2888, "step": 1475 }, { "epoch": 0.07, "grad_norm": 1.4687961061338841, "learning_rate": 1.992529822885376e-05, "loss": 0.4877, "step": 1476 }, { "epoch": 0.07, "grad_norm": 1.0292343610276768, "learning_rate": 1.9925116589482548e-05, "loss": 0.5292, "step": 1477 }, { "epoch": 0.07, "grad_norm": 0.49795888908904185, "learning_rate": 1.992493473037882e-05, "loss": 0.3316, "step": 1478 }, { "epoch": 0.07, "grad_norm": 0.5689199824202983, "learning_rate": 1.9924752651546604e-05, "loss": 0.3312, "step": 1479 }, { "epoch": 0.07, "grad_norm": 0.4995229966348174, "learning_rate": 1.9924570352989932e-05, "loss": 0.1336, "step": 1480 }, { "epoch": 0.07, "grad_norm": 0.6928466225986046, "learning_rate": 1.9924387834712836e-05, "loss": 0.3538, "step": 1481 }, { "epoch": 0.07, "grad_norm": 2.1706820467100805, "learning_rate": 1.992420509671936e-05, "loss": 0.4919, "step": 1482 }, { "epoch": 0.07, "grad_norm": 0.5526627998334057, "learning_rate": 1.9924022139013548e-05, "loss": 0.3236, "step": 1483 }, { "epoch": 0.07, "grad_norm": 0.5865691830523876, "learning_rate": 1.992383896159945e-05, "loss": 0.347, "step": 1484 }, { "epoch": 0.07, "grad_norm": 1.3838575907475468, "learning_rate": 1.992365556448112e-05, "loss": 0.693, "step": 1485 }, { "epoch": 0.07, "grad_norm": 0.3732546480240207, "learning_rate": 1.9923471947662624e-05, "loss": 0.2009, "step": 1486 }, { "epoch": 0.07, "grad_norm": 0.6289618250378007, "learning_rate": 1.9923288111148022e-05, "loss": 0.3557, "step": 1487 }, { "epoch": 0.07, "grad_norm": 2.225963206120445, "learning_rate": 1.9923104054941386e-05, "loss": 0.578, "step": 1488 }, { "epoch": 0.07, "grad_norm": 0.6390694047929903, "learning_rate": 1.992291977904679e-05, "loss": 0.343, "step": 1489 }, { "epoch": 0.07, "grad_norm": 0.6043409143185197, "learning_rate": 1.9922735283468314e-05, "loss": 0.3202, "step": 1490 }, { "epoch": 0.07, "grad_norm": 0.6294854949311045, "learning_rate": 1.992255056821004e-05, "loss": 0.3677, "step": 1491 }, { "epoch": 0.07, "grad_norm": 0.5293231067889055, "learning_rate": 1.992236563327606e-05, "loss": 0.2172, "step": 1492 }, { "epoch": 0.07, "grad_norm": 0.5918134445860066, "learning_rate": 1.992218047867047e-05, "loss": 0.3259, "step": 1493 }, { "epoch": 0.07, "grad_norm": 1.008386117279372, "learning_rate": 1.992199510439737e-05, "loss": 0.4155, "step": 1494 }, { "epoch": 0.07, "grad_norm": 0.8220185485267679, "learning_rate": 1.992180951046086e-05, "loss": 0.3406, "step": 1495 }, { "epoch": 0.07, "grad_norm": 0.5218961313792428, "learning_rate": 1.9921623696865046e-05, "loss": 0.3019, "step": 1496 }, { "epoch": 0.07, "grad_norm": 1.6689870911268383, "learning_rate": 1.992143766361405e-05, "loss": 0.6691, "step": 1497 }, { "epoch": 0.07, "grad_norm": 0.4236442596716266, "learning_rate": 1.9921251410711986e-05, "loss": 0.2986, "step": 1498 }, { "epoch": 0.07, "grad_norm": 0.478992561272367, "learning_rate": 1.992106493816298e-05, "loss": 0.2932, "step": 1499 }, { "epoch": 0.07, "grad_norm": 0.5363452749761587, "learning_rate": 1.9920878245971152e-05, "loss": 0.3342, "step": 1500 }, { "epoch": 0.07, "grad_norm": 1.481371912253873, "learning_rate": 1.9920691334140646e-05, "loss": 0.737, "step": 1501 }, { "epoch": 0.07, "grad_norm": 0.5341309178144115, "learning_rate": 1.9920504202675595e-05, "loss": 0.2471, "step": 1502 }, { "epoch": 0.07, "grad_norm": 0.6164072971562811, "learning_rate": 1.9920316851580142e-05, "loss": 0.36, "step": 1503 }, { "epoch": 0.07, "grad_norm": 0.9386550228673339, "learning_rate": 1.9920129280858434e-05, "loss": 0.5267, "step": 1504 }, { "epoch": 0.07, "grad_norm": 0.4840531695952259, "learning_rate": 1.991994149051463e-05, "loss": 0.2331, "step": 1505 }, { "epoch": 0.07, "grad_norm": 0.3435908178834481, "learning_rate": 1.9919753480552877e-05, "loss": 0.2319, "step": 1506 }, { "epoch": 0.07, "grad_norm": 0.7148718727366655, "learning_rate": 1.9919565250977345e-05, "loss": 0.4447, "step": 1507 }, { "epoch": 0.07, "grad_norm": 0.4735822461374496, "learning_rate": 1.9919376801792198e-05, "loss": 0.2341, "step": 1508 }, { "epoch": 0.07, "grad_norm": 1.0735293429647461, "learning_rate": 1.991918813300161e-05, "loss": 0.6065, "step": 1509 }, { "epoch": 0.07, "grad_norm": 0.6691131889075466, "learning_rate": 1.9918999244609757e-05, "loss": 0.4491, "step": 1510 }, { "epoch": 0.07, "grad_norm": 0.46447420797096395, "learning_rate": 1.9918810136620818e-05, "loss": 0.2825, "step": 1511 }, { "epoch": 0.07, "grad_norm": 0.35224650826100323, "learning_rate": 1.9918620809038987e-05, "loss": 0.1635, "step": 1512 }, { "epoch": 0.07, "grad_norm": 1.6048888561669619, "learning_rate": 1.9918431261868445e-05, "loss": 0.7987, "step": 1513 }, { "epoch": 0.07, "grad_norm": 0.5269809647470082, "learning_rate": 1.99182414951134e-05, "loss": 0.3221, "step": 1514 }, { "epoch": 0.07, "grad_norm": 0.5024405326355051, "learning_rate": 1.9918051508778045e-05, "loss": 0.2753, "step": 1515 }, { "epoch": 0.07, "grad_norm": 1.0102530515698134, "learning_rate": 1.991786130286659e-05, "loss": 0.5391, "step": 1516 }, { "epoch": 0.07, "grad_norm": 0.5095096149203909, "learning_rate": 1.9917670877383244e-05, "loss": 0.3395, "step": 1517 }, { "epoch": 0.07, "grad_norm": 0.34202074954775563, "learning_rate": 1.9917480232332226e-05, "loss": 0.1801, "step": 1518 }, { "epoch": 0.07, "grad_norm": 0.6726957961539836, "learning_rate": 1.9917289367717748e-05, "loss": 0.4603, "step": 1519 }, { "epoch": 0.07, "grad_norm": 0.49330561121157185, "learning_rate": 1.9917098283544046e-05, "loss": 0.285, "step": 1520 }, { "epoch": 0.07, "grad_norm": 0.8908499294300708, "learning_rate": 1.9916906979815345e-05, "loss": 0.4642, "step": 1521 }, { "epoch": 0.07, "grad_norm": 0.4936213530434846, "learning_rate": 1.9916715456535884e-05, "loss": 0.3922, "step": 1522 }, { "epoch": 0.07, "grad_norm": 0.5360738411411892, "learning_rate": 1.9916523713709898e-05, "loss": 0.2964, "step": 1523 }, { "epoch": 0.07, "grad_norm": 0.4024242535447471, "learning_rate": 1.9916331751341635e-05, "loss": 0.2468, "step": 1524 }, { "epoch": 0.07, "grad_norm": 0.5146570721272373, "learning_rate": 1.9916139569435345e-05, "loss": 0.3566, "step": 1525 }, { "epoch": 0.07, "grad_norm": 0.5241297550316266, "learning_rate": 1.9915947167995286e-05, "loss": 0.3294, "step": 1526 }, { "epoch": 0.07, "grad_norm": 0.4992925608496907, "learning_rate": 1.991575454702571e-05, "loss": 0.4009, "step": 1527 }, { "epoch": 0.07, "grad_norm": 0.9095677394369446, "learning_rate": 1.9915561706530882e-05, "loss": 0.4231, "step": 1528 }, { "epoch": 0.07, "grad_norm": 0.5337551094289965, "learning_rate": 1.991536864651508e-05, "loss": 0.3156, "step": 1529 }, { "epoch": 0.07, "grad_norm": 0.36667257920464214, "learning_rate": 1.991517536698257e-05, "loss": 0.2976, "step": 1530 }, { "epoch": 0.07, "grad_norm": 1.3485829050691673, "learning_rate": 1.9914981867937635e-05, "loss": 0.6222, "step": 1531 }, { "epoch": 0.07, "grad_norm": 0.40615256703752833, "learning_rate": 1.991478814938456e-05, "loss": 0.2492, "step": 1532 }, { "epoch": 0.07, "grad_norm": 0.9322147976390529, "learning_rate": 1.9914594211327623e-05, "loss": 0.5351, "step": 1533 }, { "epoch": 0.07, "grad_norm": 0.517942649927803, "learning_rate": 1.9914400053771136e-05, "loss": 0.3193, "step": 1534 }, { "epoch": 0.07, "grad_norm": 0.42162433110708175, "learning_rate": 1.9914205676719382e-05, "loss": 0.3005, "step": 1535 }, { "epoch": 0.07, "grad_norm": 0.48892814476864305, "learning_rate": 1.991401108017667e-05, "loss": 0.2688, "step": 1536 }, { "epoch": 0.07, "grad_norm": 1.244581596447113, "learning_rate": 1.991381626414731e-05, "loss": 0.7364, "step": 1537 }, { "epoch": 0.07, "grad_norm": 0.49450559805278826, "learning_rate": 1.9913621228635608e-05, "loss": 0.2743, "step": 1538 }, { "epoch": 0.07, "grad_norm": 0.52999713367487, "learning_rate": 1.9913425973645894e-05, "loss": 0.378, "step": 1539 }, { "epoch": 0.07, "grad_norm": 0.5227068460743706, "learning_rate": 1.991323049918248e-05, "loss": 0.274, "step": 1540 }, { "epoch": 0.07, "grad_norm": 0.4820267632534573, "learning_rate": 1.9913034805249697e-05, "loss": 0.2009, "step": 1541 }, { "epoch": 0.07, "grad_norm": 0.43346931641409564, "learning_rate": 1.9912838891851877e-05, "loss": 0.334, "step": 1542 }, { "epoch": 0.07, "grad_norm": 1.0289571281384937, "learning_rate": 1.991264275899336e-05, "loss": 0.5898, "step": 1543 }, { "epoch": 0.07, "grad_norm": 0.5082834174720121, "learning_rate": 1.9912446406678484e-05, "loss": 0.2666, "step": 1544 }, { "epoch": 0.07, "grad_norm": 0.3681903439352201, "learning_rate": 1.99122498349116e-05, "loss": 0.2482, "step": 1545 }, { "epoch": 0.07, "grad_norm": 0.5488576444182283, "learning_rate": 1.9912053043697058e-05, "loss": 0.4004, "step": 1546 }, { "epoch": 0.07, "grad_norm": 0.4155364622945553, "learning_rate": 1.9911856033039213e-05, "loss": 0.2196, "step": 1547 }, { "epoch": 0.07, "grad_norm": 0.6581225781750124, "learning_rate": 1.9911658802942432e-05, "loss": 0.4075, "step": 1548 }, { "epoch": 0.07, "grad_norm": 0.8759597520374959, "learning_rate": 1.9911461353411074e-05, "loss": 0.6097, "step": 1549 }, { "epoch": 0.07, "grad_norm": 0.38823356006275106, "learning_rate": 1.991126368444952e-05, "loss": 0.3148, "step": 1550 }, { "epoch": 0.07, "grad_norm": 0.6430386672639617, "learning_rate": 1.9911065796062137e-05, "loss": 0.3295, "step": 1551 }, { "epoch": 0.07, "grad_norm": 0.501096721596823, "learning_rate": 1.9910867688253307e-05, "loss": 0.2124, "step": 1552 }, { "epoch": 0.07, "grad_norm": 0.5579527929957838, "learning_rate": 1.9910669361027425e-05, "loss": 0.2916, "step": 1553 }, { "epoch": 0.07, "grad_norm": 0.45283228337814474, "learning_rate": 1.991047081438887e-05, "loss": 0.2913, "step": 1554 }, { "epoch": 0.07, "grad_norm": 1.0624903975926498, "learning_rate": 1.991027204834205e-05, "loss": 0.5811, "step": 1555 }, { "epoch": 0.07, "grad_norm": 0.49870610844660096, "learning_rate": 1.991007306289135e-05, "loss": 0.3519, "step": 1556 }, { "epoch": 0.07, "grad_norm": 0.49115759949122106, "learning_rate": 1.9909873858041187e-05, "loss": 0.2112, "step": 1557 }, { "epoch": 0.07, "grad_norm": 0.3832792079133785, "learning_rate": 1.990967443379597e-05, "loss": 0.2972, "step": 1558 }, { "epoch": 0.07, "grad_norm": 0.9625658885904004, "learning_rate": 1.990947479016011e-05, "loss": 0.5549, "step": 1559 }, { "epoch": 0.07, "grad_norm": 0.5932491643804502, "learning_rate": 1.990927492713803e-05, "loss": 0.297, "step": 1560 }, { "epoch": 0.07, "grad_norm": 0.8675220525746768, "learning_rate": 1.9909074844734152e-05, "loss": 0.4535, "step": 1561 }, { "epoch": 0.07, "grad_norm": 0.4960835082167606, "learning_rate": 1.990887454295291e-05, "loss": 0.2949, "step": 1562 }, { "epoch": 0.07, "grad_norm": 0.5461508613100479, "learning_rate": 1.9908674021798735e-05, "loss": 0.3808, "step": 1563 }, { "epoch": 0.07, "grad_norm": 0.30497126466041974, "learning_rate": 1.9908473281276068e-05, "loss": 0.0824, "step": 1564 }, { "epoch": 0.07, "grad_norm": 0.8057675810488799, "learning_rate": 1.990827232138935e-05, "loss": 0.366, "step": 1565 }, { "epoch": 0.07, "grad_norm": 0.556448624705394, "learning_rate": 1.9908071142143036e-05, "loss": 0.3168, "step": 1566 }, { "epoch": 0.07, "grad_norm": 1.172294514574269, "learning_rate": 1.9907869743541576e-05, "loss": 0.4791, "step": 1567 }, { "epoch": 0.07, "grad_norm": 0.6424740817969817, "learning_rate": 1.9907668125589424e-05, "loss": 0.3808, "step": 1568 }, { "epoch": 0.07, "grad_norm": 0.45521081546171216, "learning_rate": 1.9907466288291054e-05, "loss": 0.2831, "step": 1569 }, { "epoch": 0.07, "grad_norm": 0.3853403378918902, "learning_rate": 1.9907264231650927e-05, "loss": 0.22, "step": 1570 }, { "epoch": 0.07, "grad_norm": 0.728215779597287, "learning_rate": 1.990706195567352e-05, "loss": 0.3672, "step": 1571 }, { "epoch": 0.07, "grad_norm": 0.7422748526949783, "learning_rate": 1.9906859460363307e-05, "loss": 0.3681, "step": 1572 }, { "epoch": 0.07, "grad_norm": 1.8082452053295823, "learning_rate": 1.990665674572478e-05, "loss": 0.5557, "step": 1573 }, { "epoch": 0.07, "grad_norm": 0.4457112867292302, "learning_rate": 1.9906453811762415e-05, "loss": 0.257, "step": 1574 }, { "epoch": 0.07, "grad_norm": 0.7499465778804616, "learning_rate": 1.9906250658480712e-05, "loss": 0.4179, "step": 1575 }, { "epoch": 0.07, "grad_norm": 0.3588120538579702, "learning_rate": 1.9906047285884168e-05, "loss": 0.232, "step": 1576 }, { "epoch": 0.07, "grad_norm": 1.1577489811422432, "learning_rate": 1.9905843693977288e-05, "loss": 0.3668, "step": 1577 }, { "epoch": 0.07, "grad_norm": 0.5303478905504729, "learning_rate": 1.9905639882764573e-05, "loss": 0.3144, "step": 1578 }, { "epoch": 0.07, "grad_norm": 1.4562165819409276, "learning_rate": 1.9905435852250535e-05, "loss": 0.5832, "step": 1579 }, { "epoch": 0.07, "grad_norm": 0.5423490235388511, "learning_rate": 1.9905231602439697e-05, "loss": 0.2935, "step": 1580 }, { "epoch": 0.07, "grad_norm": 0.5110195187141678, "learning_rate": 1.990502713333658e-05, "loss": 0.3359, "step": 1581 }, { "epoch": 0.07, "grad_norm": 0.4071794586068742, "learning_rate": 1.9904822444945706e-05, "loss": 0.2927, "step": 1582 }, { "epoch": 0.07, "grad_norm": 0.5213134783081707, "learning_rate": 1.9904617537271608e-05, "loss": 0.1887, "step": 1583 }, { "epoch": 0.07, "grad_norm": 0.8555721123981634, "learning_rate": 1.9904412410318828e-05, "loss": 0.4186, "step": 1584 }, { "epoch": 0.07, "grad_norm": 1.8746265717332116, "learning_rate": 1.99042070640919e-05, "loss": 0.8166, "step": 1585 }, { "epoch": 0.07, "grad_norm": 0.5361763529706056, "learning_rate": 1.9904001498595374e-05, "loss": 0.3668, "step": 1586 }, { "epoch": 0.07, "grad_norm": 0.4967554784829038, "learning_rate": 1.99037957138338e-05, "loss": 0.2397, "step": 1587 }, { "epoch": 0.07, "grad_norm": 0.36306897827220835, "learning_rate": 1.990358970981174e-05, "loss": 0.2037, "step": 1588 }, { "epoch": 0.07, "grad_norm": 0.4731443444917627, "learning_rate": 1.9903383486533743e-05, "loss": 0.4076, "step": 1589 }, { "epoch": 0.07, "grad_norm": 0.5574544132989624, "learning_rate": 1.990317704400438e-05, "loss": 0.2661, "step": 1590 }, { "epoch": 0.07, "grad_norm": 0.9888429485348341, "learning_rate": 1.9902970382228226e-05, "loss": 0.5912, "step": 1591 }, { "epoch": 0.07, "grad_norm": 0.9528458373653957, "learning_rate": 1.990276350120985e-05, "loss": 0.5419, "step": 1592 }, { "epoch": 0.07, "grad_norm": 0.517579710946933, "learning_rate": 1.9902556400953835e-05, "loss": 0.2476, "step": 1593 }, { "epoch": 0.07, "grad_norm": 0.4500146942769244, "learning_rate": 1.9902349081464767e-05, "loss": 0.3548, "step": 1594 }, { "epoch": 0.07, "grad_norm": 0.8633345033696577, "learning_rate": 1.9902141542747233e-05, "loss": 0.5357, "step": 1595 }, { "epoch": 0.07, "grad_norm": 0.4842912604598192, "learning_rate": 1.9901933784805828e-05, "loss": 0.1659, "step": 1596 }, { "epoch": 0.07, "grad_norm": 0.5689434009108335, "learning_rate": 1.9901725807645154e-05, "loss": 0.3734, "step": 1597 }, { "epoch": 0.07, "grad_norm": 1.4465191235184665, "learning_rate": 1.9901517611269813e-05, "loss": 0.7006, "step": 1598 }, { "epoch": 0.07, "grad_norm": 0.5449836359935476, "learning_rate": 1.9901309195684418e-05, "loss": 0.2953, "step": 1599 }, { "epoch": 0.07, "grad_norm": 0.994237056401829, "learning_rate": 1.9901100560893578e-05, "loss": 0.4323, "step": 1600 }, { "epoch": 0.07, "grad_norm": 0.552301594696816, "learning_rate": 1.9900891706901914e-05, "loss": 0.3929, "step": 1601 }, { "epoch": 0.07, "grad_norm": 0.399950324865002, "learning_rate": 1.990068263371405e-05, "loss": 0.2592, "step": 1602 }, { "epoch": 0.07, "grad_norm": 0.3735408961654906, "learning_rate": 1.9900473341334616e-05, "loss": 0.135, "step": 1603 }, { "epoch": 0.07, "grad_norm": 0.9387129568378368, "learning_rate": 1.9900263829768246e-05, "loss": 0.5299, "step": 1604 }, { "epoch": 0.07, "grad_norm": 0.5585915389768993, "learning_rate": 1.9900054099019575e-05, "loss": 0.3238, "step": 1605 }, { "epoch": 0.07, "grad_norm": 0.5122355485072088, "learning_rate": 1.9899844149093244e-05, "loss": 0.3286, "step": 1606 }, { "epoch": 0.07, "grad_norm": 0.800521640669671, "learning_rate": 1.9899633979993913e-05, "loss": 0.4826, "step": 1607 }, { "epoch": 0.07, "grad_norm": 0.507745206351051, "learning_rate": 1.989942359172622e-05, "loss": 0.3077, "step": 1608 }, { "epoch": 0.07, "grad_norm": 0.3302013299867463, "learning_rate": 1.9899212984294837e-05, "loss": 0.2203, "step": 1609 }, { "epoch": 0.07, "grad_norm": 1.0595432551140034, "learning_rate": 1.9899002157704412e-05, "loss": 0.5075, "step": 1610 }, { "epoch": 0.07, "grad_norm": 0.49726877068949554, "learning_rate": 1.9898791111959624e-05, "loss": 0.3101, "step": 1611 }, { "epoch": 0.07, "grad_norm": 0.6763077556030332, "learning_rate": 1.9898579847065143e-05, "loss": 0.5238, "step": 1612 }, { "epoch": 0.07, "grad_norm": 0.49801115849229144, "learning_rate": 1.989836836302564e-05, "loss": 0.3203, "step": 1613 }, { "epoch": 0.07, "grad_norm": 0.4240985732303542, "learning_rate": 1.9898156659845806e-05, "loss": 0.2942, "step": 1614 }, { "epoch": 0.07, "grad_norm": 0.4849334992439375, "learning_rate": 1.9897944737530325e-05, "loss": 0.2358, "step": 1615 }, { "epoch": 0.07, "grad_norm": 1.192371844720865, "learning_rate": 1.989773259608388e-05, "loss": 0.4501, "step": 1616 }, { "epoch": 0.07, "grad_norm": 0.45438057048368363, "learning_rate": 1.9897520235511177e-05, "loss": 0.3287, "step": 1617 }, { "epoch": 0.07, "grad_norm": 0.47832547543209186, "learning_rate": 1.989730765581692e-05, "loss": 0.3981, "step": 1618 }, { "epoch": 0.07, "grad_norm": 1.0887922022788834, "learning_rate": 1.9897094857005808e-05, "loss": 0.3157, "step": 1619 }, { "epoch": 0.07, "grad_norm": 0.37112109980242863, "learning_rate": 1.9896881839082554e-05, "loss": 0.2486, "step": 1620 }, { "epoch": 0.07, "grad_norm": 0.49782844225620065, "learning_rate": 1.9896668602051877e-05, "loss": 0.2301, "step": 1621 }, { "epoch": 0.07, "grad_norm": 0.484764191192648, "learning_rate": 1.9896455145918493e-05, "loss": 0.3139, "step": 1622 }, { "epoch": 0.07, "grad_norm": 0.4246721921128543, "learning_rate": 1.9896241470687132e-05, "loss": 0.3033, "step": 1623 }, { "epoch": 0.07, "grad_norm": 0.8093219665446579, "learning_rate": 1.9896027576362523e-05, "loss": 0.5553, "step": 1624 }, { "epoch": 0.07, "grad_norm": 0.5067173743916127, "learning_rate": 1.98958134629494e-05, "loss": 0.3656, "step": 1625 }, { "epoch": 0.07, "grad_norm": 0.4778987820016766, "learning_rate": 1.9895599130452507e-05, "loss": 0.224, "step": 1626 }, { "epoch": 0.07, "grad_norm": 0.6483303406555019, "learning_rate": 1.9895384578876586e-05, "loss": 0.3679, "step": 1627 }, { "epoch": 0.07, "grad_norm": 0.7685597554944757, "learning_rate": 1.9895169808226384e-05, "loss": 0.5533, "step": 1628 }, { "epoch": 0.07, "grad_norm": 0.45344718766203446, "learning_rate": 1.9894954818506666e-05, "loss": 0.2295, "step": 1629 }, { "epoch": 0.07, "grad_norm": 0.41250133428465596, "learning_rate": 1.989473960972218e-05, "loss": 0.3166, "step": 1630 }, { "epoch": 0.07, "grad_norm": 1.303157109297041, "learning_rate": 1.9894524181877696e-05, "loss": 0.6755, "step": 1631 }, { "epoch": 0.07, "grad_norm": 0.4882174012369254, "learning_rate": 1.9894308534977985e-05, "loss": 0.2341, "step": 1632 }, { "epoch": 0.08, "grad_norm": 0.3770057595932876, "learning_rate": 1.989409266902782e-05, "loss": 0.2714, "step": 1633 }, { "epoch": 0.08, "grad_norm": 0.9844196557664995, "learning_rate": 1.9893876584031975e-05, "loss": 0.5868, "step": 1634 }, { "epoch": 0.08, "grad_norm": 0.4450450253321704, "learning_rate": 1.9893660279995244e-05, "loss": 0.298, "step": 1635 }, { "epoch": 0.08, "grad_norm": 0.4597287665483372, "learning_rate": 1.9893443756922407e-05, "loss": 0.3177, "step": 1636 }, { "epoch": 0.08, "grad_norm": 0.4405564572491822, "learning_rate": 1.989322701481826e-05, "loss": 0.3429, "step": 1637 }, { "epoch": 0.08, "grad_norm": 0.4728958715696773, "learning_rate": 1.9893010053687606e-05, "loss": 0.2939, "step": 1638 }, { "epoch": 0.08, "grad_norm": 1.1689618272043967, "learning_rate": 1.989279287353524e-05, "loss": 0.4529, "step": 1639 }, { "epoch": 0.08, "grad_norm": 0.8526981173087761, "learning_rate": 1.9892575474365977e-05, "loss": 0.5214, "step": 1640 }, { "epoch": 0.08, "grad_norm": 0.4527155098074377, "learning_rate": 1.989235785618463e-05, "loss": 0.2924, "step": 1641 }, { "epoch": 0.08, "grad_norm": 0.3097026637349731, "learning_rate": 1.9892140018996006e-05, "loss": 0.1641, "step": 1642 }, { "epoch": 0.08, "grad_norm": 1.0630176550954846, "learning_rate": 1.9891921962804942e-05, "loss": 0.5816, "step": 1643 }, { "epoch": 0.08, "grad_norm": 0.5414931523287791, "learning_rate": 1.989170368761626e-05, "loss": 0.3299, "step": 1644 }, { "epoch": 0.08, "grad_norm": 0.4976880333796028, "learning_rate": 1.9891485193434793e-05, "loss": 0.2925, "step": 1645 }, { "epoch": 0.08, "grad_norm": 1.1646133362805333, "learning_rate": 1.9891266480265375e-05, "loss": 0.5288, "step": 1646 }, { "epoch": 0.08, "grad_norm": 0.4953483727227327, "learning_rate": 1.989104754811285e-05, "loss": 0.2767, "step": 1647 }, { "epoch": 0.08, "grad_norm": 0.3138336748197105, "learning_rate": 1.9890828396982068e-05, "loss": 0.1629, "step": 1648 }, { "epoch": 0.08, "grad_norm": 0.5506180846955351, "learning_rate": 1.9890609026877877e-05, "loss": 0.3613, "step": 1649 }, { "epoch": 0.08, "grad_norm": 0.5467883967262682, "learning_rate": 1.9890389437805132e-05, "loss": 0.3132, "step": 1650 }, { "epoch": 0.08, "grad_norm": 0.6905715706838206, "learning_rate": 1.98901696297687e-05, "loss": 0.427, "step": 1651 }, { "epoch": 0.08, "grad_norm": 1.2294580035663532, "learning_rate": 1.9889949602773444e-05, "loss": 0.476, "step": 1652 }, { "epoch": 0.08, "grad_norm": 0.3916650481120573, "learning_rate": 1.9889729356824236e-05, "loss": 0.2915, "step": 1653 }, { "epoch": 0.08, "grad_norm": 0.38776614631871353, "learning_rate": 1.9889508891925953e-05, "loss": 0.2589, "step": 1654 }, { "epoch": 0.08, "grad_norm": 1.4931787681862343, "learning_rate": 1.9889288208083476e-05, "loss": 0.3153, "step": 1655 }, { "epoch": 0.08, "grad_norm": 0.5027262591833506, "learning_rate": 1.9889067305301685e-05, "loss": 0.2891, "step": 1656 }, { "epoch": 0.08, "grad_norm": 0.6581571670767872, "learning_rate": 1.9888846183585478e-05, "loss": 0.378, "step": 1657 }, { "epoch": 0.08, "grad_norm": 0.9304593286937063, "learning_rate": 1.988862484293975e-05, "loss": 0.4036, "step": 1658 }, { "epoch": 0.08, "grad_norm": 0.45142523695020226, "learning_rate": 1.9888403283369393e-05, "loss": 0.3302, "step": 1659 }, { "epoch": 0.08, "grad_norm": 0.6149164576347504, "learning_rate": 1.9888181504879323e-05, "loss": 0.3209, "step": 1660 }, { "epoch": 0.08, "grad_norm": 0.4547645917702171, "learning_rate": 1.988795950747444e-05, "loss": 0.2543, "step": 1661 }, { "epoch": 0.08, "grad_norm": 0.5828886577255246, "learning_rate": 1.9887737291159665e-05, "loss": 0.3375, "step": 1662 }, { "epoch": 0.08, "grad_norm": 0.7524311304727527, "learning_rate": 1.988751485593992e-05, "loss": 0.4427, "step": 1663 }, { "epoch": 0.08, "grad_norm": 0.7206443752254421, "learning_rate": 1.9887292201820125e-05, "loss": 0.4471, "step": 1664 }, { "epoch": 0.08, "grad_norm": 0.46760598212826837, "learning_rate": 1.9887069328805207e-05, "loss": 0.2397, "step": 1665 }, { "epoch": 0.08, "grad_norm": 0.4200051755598856, "learning_rate": 1.9886846236900102e-05, "loss": 0.2593, "step": 1666 }, { "epoch": 0.08, "grad_norm": 0.5352550944129146, "learning_rate": 1.9886622926109755e-05, "loss": 0.3131, "step": 1667 }, { "epoch": 0.08, "grad_norm": 0.47558925357094656, "learning_rate": 1.9886399396439104e-05, "loss": 0.2583, "step": 1668 }, { "epoch": 0.08, "grad_norm": 0.5195161085138071, "learning_rate": 1.9886175647893097e-05, "loss": 0.375, "step": 1669 }, { "epoch": 0.08, "grad_norm": 1.23648971252555, "learning_rate": 1.9885951680476693e-05, "loss": 0.6727, "step": 1670 }, { "epoch": 0.08, "grad_norm": 0.5432546410343398, "learning_rate": 1.9885727494194843e-05, "loss": 0.2455, "step": 1671 }, { "epoch": 0.08, "grad_norm": 0.38313882128025817, "learning_rate": 1.9885503089052517e-05, "loss": 0.2278, "step": 1672 }, { "epoch": 0.08, "grad_norm": 0.5411188084812046, "learning_rate": 1.9885278465054682e-05, "loss": 0.3454, "step": 1673 }, { "epoch": 0.08, "grad_norm": 0.5998430639642894, "learning_rate": 1.9885053622206305e-05, "loss": 0.3152, "step": 1674 }, { "epoch": 0.08, "grad_norm": 0.8232777457317849, "learning_rate": 1.988482856051237e-05, "loss": 0.4278, "step": 1675 }, { "epoch": 0.08, "grad_norm": 1.2179614324615915, "learning_rate": 1.9884603279977858e-05, "loss": 0.7844, "step": 1676 }, { "epoch": 0.08, "grad_norm": 0.48192141350880985, "learning_rate": 1.9884377780607755e-05, "loss": 0.3221, "step": 1677 }, { "epoch": 0.08, "grad_norm": 0.5357704901220921, "learning_rate": 1.9884152062407057e-05, "loss": 0.299, "step": 1678 }, { "epoch": 0.08, "grad_norm": 0.44068915798364067, "learning_rate": 1.9883926125380757e-05, "loss": 0.2689, "step": 1679 }, { "epoch": 0.08, "grad_norm": 0.6731271429884765, "learning_rate": 1.988369996953386e-05, "loss": 0.4098, "step": 1680 }, { "epoch": 0.08, "grad_norm": 0.5144584101710767, "learning_rate": 1.988347359487137e-05, "loss": 0.2695, "step": 1681 }, { "epoch": 0.08, "grad_norm": 1.4463496848907313, "learning_rate": 1.9883247001398304e-05, "loss": 0.7756, "step": 1682 }, { "epoch": 0.08, "grad_norm": 0.9946146263286346, "learning_rate": 1.988302018911967e-05, "loss": 0.4598, "step": 1683 }, { "epoch": 0.08, "grad_norm": 0.3525617375334301, "learning_rate": 1.9882793158040498e-05, "loss": 0.2057, "step": 1684 }, { "epoch": 0.08, "grad_norm": 0.5596522464997875, "learning_rate": 1.9882565908165808e-05, "loss": 0.3088, "step": 1685 }, { "epoch": 0.08, "grad_norm": 1.0155448381599104, "learning_rate": 1.9882338439500634e-05, "loss": 0.5071, "step": 1686 }, { "epoch": 0.08, "grad_norm": 0.49575001764748106, "learning_rate": 1.9882110752050015e-05, "loss": 0.2744, "step": 1687 }, { "epoch": 0.08, "grad_norm": 1.6863284735308244, "learning_rate": 1.9881882845818987e-05, "loss": 0.8465, "step": 1688 }, { "epoch": 0.08, "grad_norm": 0.40225741232124107, "learning_rate": 1.9881654720812594e-05, "loss": 0.2899, "step": 1689 }, { "epoch": 0.08, "grad_norm": 0.6141325518413121, "learning_rate": 1.9881426377035897e-05, "loss": 0.3594, "step": 1690 }, { "epoch": 0.08, "grad_norm": 1.069362132590462, "learning_rate": 1.9881197814493936e-05, "loss": 0.4805, "step": 1691 }, { "epoch": 0.08, "grad_norm": 0.4252447823942229, "learning_rate": 1.9880969033191782e-05, "loss": 0.2438, "step": 1692 }, { "epoch": 0.08, "grad_norm": 0.40439029522586517, "learning_rate": 1.98807400331345e-05, "loss": 0.2649, "step": 1693 }, { "epoch": 0.08, "grad_norm": 1.1276575940889462, "learning_rate": 1.988051081432715e-05, "loss": 0.6103, "step": 1694 }, { "epoch": 0.08, "grad_norm": 0.7899311739463363, "learning_rate": 1.988028137677482e-05, "loss": 0.368, "step": 1695 }, { "epoch": 0.08, "grad_norm": 0.5751096057935386, "learning_rate": 1.9880051720482578e-05, "loss": 0.3401, "step": 1696 }, { "epoch": 0.08, "grad_norm": 0.6877863918955138, "learning_rate": 1.9879821845455518e-05, "loss": 0.3739, "step": 1697 }, { "epoch": 0.08, "grad_norm": 0.31648028887624996, "learning_rate": 1.9879591751698722e-05, "loss": 0.1836, "step": 1698 }, { "epoch": 0.08, "grad_norm": 0.523113514850932, "learning_rate": 1.9879361439217284e-05, "loss": 0.3132, "step": 1699 }, { "epoch": 0.08, "grad_norm": 0.7125821880293861, "learning_rate": 1.987913090801631e-05, "loss": 0.4693, "step": 1700 }, { "epoch": 0.08, "grad_norm": 0.5035830238003098, "learning_rate": 1.98789001581009e-05, "loss": 0.2794, "step": 1701 }, { "epoch": 0.08, "grad_norm": 0.5551740711498908, "learning_rate": 1.987866918947616e-05, "loss": 0.3586, "step": 1702 }, { "epoch": 0.08, "grad_norm": 0.7045511744517999, "learning_rate": 1.98784380021472e-05, "loss": 0.5288, "step": 1703 }, { "epoch": 0.08, "grad_norm": 0.39000663888317577, "learning_rate": 1.987820659611915e-05, "loss": 0.1985, "step": 1704 }, { "epoch": 0.08, "grad_norm": 0.36800394086173355, "learning_rate": 1.9877974971397127e-05, "loss": 0.2606, "step": 1705 }, { "epoch": 0.08, "grad_norm": 1.725130084538193, "learning_rate": 1.9877743127986257e-05, "loss": 0.8952, "step": 1706 }, { "epoch": 0.08, "grad_norm": 0.649753449726772, "learning_rate": 1.9877511065891676e-05, "loss": 0.3451, "step": 1707 }, { "epoch": 0.08, "grad_norm": 0.5207297072375531, "learning_rate": 1.987727878511852e-05, "loss": 0.3086, "step": 1708 }, { "epoch": 0.08, "grad_norm": 0.6524409046184836, "learning_rate": 1.987704628567193e-05, "loss": 0.4395, "step": 1709 }, { "epoch": 0.08, "grad_norm": 0.4795991306948634, "learning_rate": 1.9876813567557054e-05, "loss": 0.2167, "step": 1710 }, { "epoch": 0.08, "grad_norm": 0.4254843411252256, "learning_rate": 1.9876580630779047e-05, "loss": 0.2845, "step": 1711 }, { "epoch": 0.08, "grad_norm": 0.721172479639759, "learning_rate": 1.9876347475343062e-05, "loss": 0.4632, "step": 1712 }, { "epoch": 0.08, "grad_norm": 0.3624708519004964, "learning_rate": 1.9876114101254264e-05, "loss": 0.2649, "step": 1713 }, { "epoch": 0.08, "grad_norm": 0.4770494234188718, "learning_rate": 1.9875880508517818e-05, "loss": 0.2813, "step": 1714 }, { "epoch": 0.08, "grad_norm": 0.7501680679197721, "learning_rate": 1.9875646697138896e-05, "loss": 0.5356, "step": 1715 }, { "epoch": 0.08, "grad_norm": 0.513524764590981, "learning_rate": 1.9875412667122674e-05, "loss": 0.3498, "step": 1716 }, { "epoch": 0.08, "grad_norm": 0.3873540607627214, "learning_rate": 1.9875178418474336e-05, "loss": 0.1962, "step": 1717 }, { "epoch": 0.08, "grad_norm": 0.6252522171839381, "learning_rate": 1.9874943951199063e-05, "loss": 0.3652, "step": 1718 }, { "epoch": 0.08, "grad_norm": 0.9612689489621633, "learning_rate": 1.987470926530205e-05, "loss": 0.542, "step": 1719 }, { "epoch": 0.08, "grad_norm": 0.4203414924716156, "learning_rate": 1.9874474360788493e-05, "loss": 0.2743, "step": 1720 }, { "epoch": 0.08, "grad_norm": 0.5713223355242646, "learning_rate": 1.9874239237663588e-05, "loss": 0.3982, "step": 1721 }, { "epoch": 0.08, "grad_norm": 1.5007293139938762, "learning_rate": 1.9874003895932544e-05, "loss": 0.722, "step": 1722 }, { "epoch": 0.08, "grad_norm": 0.4415202174568797, "learning_rate": 1.987376833560057e-05, "loss": 0.2498, "step": 1723 }, { "epoch": 0.08, "grad_norm": 0.6759396856638009, "learning_rate": 1.987353255667288e-05, "loss": 0.389, "step": 1724 }, { "epoch": 0.08, "grad_norm": 0.5071586810012434, "learning_rate": 1.98732965591547e-05, "loss": 0.3577, "step": 1725 }, { "epoch": 0.08, "grad_norm": 0.4968893859447733, "learning_rate": 1.9873060343051246e-05, "loss": 0.3081, "step": 1726 }, { "epoch": 0.08, "grad_norm": 0.3893735651853771, "learning_rate": 1.987282390836775e-05, "loss": 0.1274, "step": 1727 }, { "epoch": 0.08, "grad_norm": 0.5759343988896747, "learning_rate": 1.9872587255109455e-05, "loss": 0.3751, "step": 1728 }, { "epoch": 0.08, "grad_norm": 0.4852073022232205, "learning_rate": 1.9872350383281588e-05, "loss": 0.2954, "step": 1729 }, { "epoch": 0.08, "grad_norm": 0.9299259179887859, "learning_rate": 1.9872113292889402e-05, "loss": 0.5046, "step": 1730 }, { "epoch": 0.08, "grad_norm": 0.48448437195419847, "learning_rate": 1.987187598393814e-05, "loss": 0.368, "step": 1731 }, { "epoch": 0.08, "grad_norm": 0.5847601671824171, "learning_rate": 1.987163845643306e-05, "loss": 0.3299, "step": 1732 }, { "epoch": 0.08, "grad_norm": 0.45951701738600986, "learning_rate": 1.987140071037942e-05, "loss": 0.2686, "step": 1733 }, { "epoch": 0.08, "grad_norm": 0.950985944503552, "learning_rate": 1.987116274578248e-05, "loss": 0.5239, "step": 1734 }, { "epoch": 0.08, "grad_norm": 0.480880458174946, "learning_rate": 1.9870924562647512e-05, "loss": 0.3316, "step": 1735 }, { "epoch": 0.08, "grad_norm": 0.48103525747567766, "learning_rate": 1.9870686160979785e-05, "loss": 0.3294, "step": 1736 }, { "epoch": 0.08, "grad_norm": 0.7709234986862589, "learning_rate": 1.9870447540784584e-05, "loss": 0.541, "step": 1737 }, { "epoch": 0.08, "grad_norm": 0.45204059827988025, "learning_rate": 1.9870208702067185e-05, "loss": 0.2738, "step": 1738 }, { "epoch": 0.08, "grad_norm": 0.42987515861131753, "learning_rate": 1.986996964483288e-05, "loss": 0.1659, "step": 1739 }, { "epoch": 0.08, "grad_norm": 0.5177532861292381, "learning_rate": 1.9869730369086962e-05, "loss": 0.3035, "step": 1740 }, { "epoch": 0.08, "grad_norm": 0.4789341495917472, "learning_rate": 1.9869490874834723e-05, "loss": 0.3422, "step": 1741 }, { "epoch": 0.08, "grad_norm": 0.9670744592369863, "learning_rate": 1.9869251162081468e-05, "loss": 0.5874, "step": 1742 }, { "epoch": 0.08, "grad_norm": 0.7457490954902003, "learning_rate": 1.9869011230832503e-05, "loss": 0.4634, "step": 1743 }, { "epoch": 0.08, "grad_norm": 0.4377343714746419, "learning_rate": 1.9868771081093145e-05, "loss": 0.3087, "step": 1744 }, { "epoch": 0.08, "grad_norm": 0.3930404623250186, "learning_rate": 1.9868530712868705e-05, "loss": 0.2206, "step": 1745 }, { "epoch": 0.08, "grad_norm": 1.6604068065097912, "learning_rate": 1.9868290126164507e-05, "loss": 0.4942, "step": 1746 }, { "epoch": 0.08, "grad_norm": 0.4944818233324763, "learning_rate": 1.9868049320985877e-05, "loss": 0.2795, "step": 1747 }, { "epoch": 0.08, "grad_norm": 0.5084430788038516, "learning_rate": 1.9867808297338148e-05, "loss": 0.3857, "step": 1748 }, { "epoch": 0.08, "grad_norm": 0.9046739113643756, "learning_rate": 1.9867567055226652e-05, "loss": 0.4903, "step": 1749 }, { "epoch": 0.08, "grad_norm": 0.5544441773985028, "learning_rate": 1.986732559465673e-05, "loss": 0.2935, "step": 1750 }, { "epoch": 0.08, "grad_norm": 0.38825085950069643, "learning_rate": 1.986708391563373e-05, "loss": 0.1905, "step": 1751 }, { "epoch": 0.08, "grad_norm": 0.6985435776079773, "learning_rate": 1.9866842018163e-05, "loss": 0.3823, "step": 1752 }, { "epoch": 0.08, "grad_norm": 0.5052266103402563, "learning_rate": 1.9866599902249905e-05, "loss": 0.2225, "step": 1753 }, { "epoch": 0.08, "grad_norm": 1.1630305720705896, "learning_rate": 1.986635756789979e-05, "loss": 0.5532, "step": 1754 }, { "epoch": 0.08, "grad_norm": 1.9834602718784935, "learning_rate": 1.9866115015118034e-05, "loss": 0.7262, "step": 1755 }, { "epoch": 0.08, "grad_norm": 0.5009715796752674, "learning_rate": 1.9865872243909997e-05, "loss": 0.2629, "step": 1756 }, { "epoch": 0.08, "grad_norm": 0.39269810813349826, "learning_rate": 1.986562925428106e-05, "loss": 0.2608, "step": 1757 }, { "epoch": 0.08, "grad_norm": 1.8067721336369729, "learning_rate": 1.9865386046236597e-05, "loss": 0.7001, "step": 1758 }, { "epoch": 0.08, "grad_norm": 0.9627950670229396, "learning_rate": 1.9865142619781996e-05, "loss": 0.3014, "step": 1759 }, { "epoch": 0.08, "grad_norm": 0.9837848782304732, "learning_rate": 1.9864898974922645e-05, "loss": 0.4019, "step": 1760 }, { "epoch": 0.08, "grad_norm": 1.7191488952086396, "learning_rate": 1.9864655111663943e-05, "loss": 0.8414, "step": 1761 }, { "epoch": 0.08, "grad_norm": 0.5333491767759412, "learning_rate": 1.9864411030011278e-05, "loss": 0.2293, "step": 1762 }, { "epoch": 0.08, "grad_norm": 0.4167151639940066, "learning_rate": 1.9864166729970066e-05, "loss": 0.1753, "step": 1763 }, { "epoch": 0.08, "grad_norm": 0.6035918577050162, "learning_rate": 1.9863922211545708e-05, "loss": 0.3739, "step": 1764 }, { "epoch": 0.08, "grad_norm": 0.6751172389225426, "learning_rate": 1.9863677474743618e-05, "loss": 0.3295, "step": 1765 }, { "epoch": 0.08, "grad_norm": 1.8045785028446848, "learning_rate": 1.9863432519569215e-05, "loss": 0.5187, "step": 1766 }, { "epoch": 0.08, "grad_norm": 0.6610463641175794, "learning_rate": 1.9863187346027922e-05, "loss": 0.4342, "step": 1767 }, { "epoch": 0.08, "grad_norm": 0.557196577372603, "learning_rate": 1.986294195412517e-05, "loss": 0.3035, "step": 1768 }, { "epoch": 0.08, "grad_norm": 0.4139781351442848, "learning_rate": 1.986269634386639e-05, "loss": 0.1926, "step": 1769 }, { "epoch": 0.08, "grad_norm": 1.1762058447433266, "learning_rate": 1.9862450515257015e-05, "loss": 0.547, "step": 1770 }, { "epoch": 0.08, "grad_norm": 0.7487274928627795, "learning_rate": 1.9862204468302492e-05, "loss": 0.4105, "step": 1771 }, { "epoch": 0.08, "grad_norm": 0.5146472282125227, "learning_rate": 1.986195820300827e-05, "loss": 0.2934, "step": 1772 }, { "epoch": 0.08, "grad_norm": 1.3678804523076638, "learning_rate": 1.9861711719379793e-05, "loss": 0.7715, "step": 1773 }, { "epoch": 0.08, "grad_norm": 0.6364653823076378, "learning_rate": 1.986146501742253e-05, "loss": 0.3269, "step": 1774 }, { "epoch": 0.08, "grad_norm": 0.29646971919735887, "learning_rate": 1.986121809714193e-05, "loss": 0.1932, "step": 1775 }, { "epoch": 0.08, "grad_norm": 0.5820540971559472, "learning_rate": 1.986097095854347e-05, "loss": 0.4151, "step": 1776 }, { "epoch": 0.08, "grad_norm": 0.434658750201213, "learning_rate": 1.9860723601632616e-05, "loss": 0.3497, "step": 1777 }, { "epoch": 0.08, "grad_norm": 0.688245621447827, "learning_rate": 1.9860476026414846e-05, "loss": 0.45, "step": 1778 }, { "epoch": 0.08, "grad_norm": 0.5694979366994609, "learning_rate": 1.986022823289564e-05, "loss": 0.3346, "step": 1779 }, { "epoch": 0.08, "grad_norm": 0.44001999226644634, "learning_rate": 1.9859980221080483e-05, "loss": 0.3175, "step": 1780 }, { "epoch": 0.08, "grad_norm": 0.6980227695687897, "learning_rate": 1.9859731990974867e-05, "loss": 0.3857, "step": 1781 }, { "epoch": 0.08, "grad_norm": 0.3721466893422118, "learning_rate": 1.985948354258429e-05, "loss": 0.2584, "step": 1782 }, { "epoch": 0.08, "grad_norm": 0.5466344289648984, "learning_rate": 1.9859234875914247e-05, "loss": 0.3093, "step": 1783 }, { "epoch": 0.08, "grad_norm": 0.4921616624470089, "learning_rate": 1.9858985990970252e-05, "loss": 0.3327, "step": 1784 }, { "epoch": 0.08, "grad_norm": 0.8966039834652199, "learning_rate": 1.9858736887757802e-05, "loss": 0.5317, "step": 1785 }, { "epoch": 0.08, "grad_norm": 0.6351571047753894, "learning_rate": 1.9858487566282424e-05, "loss": 0.3013, "step": 1786 }, { "epoch": 0.08, "grad_norm": 0.4874977634065661, "learning_rate": 1.9858238026549634e-05, "loss": 0.3698, "step": 1787 }, { "epoch": 0.08, "grad_norm": 0.36207404343623495, "learning_rate": 1.9857988268564955e-05, "loss": 0.318, "step": 1788 }, { "epoch": 0.08, "grad_norm": 0.4828406802128624, "learning_rate": 1.9857738292333915e-05, "loss": 0.1871, "step": 1789 }, { "epoch": 0.08, "grad_norm": 0.5878710318490176, "learning_rate": 1.985748809786205e-05, "loss": 0.3832, "step": 1790 }, { "epoch": 0.08, "grad_norm": 0.5976773730710854, "learning_rate": 1.9857237685154897e-05, "loss": 0.4466, "step": 1791 }, { "epoch": 0.08, "grad_norm": 0.4130798822183903, "learning_rate": 1.9856987054218007e-05, "loss": 0.2146, "step": 1792 }, { "epoch": 0.08, "grad_norm": 0.45883273817813086, "learning_rate": 1.985673620505692e-05, "loss": 0.3917, "step": 1793 }, { "epoch": 0.08, "grad_norm": 0.7021508110705673, "learning_rate": 1.9856485137677197e-05, "loss": 0.5366, "step": 1794 }, { "epoch": 0.08, "grad_norm": 0.3291559587571528, "learning_rate": 1.985623385208439e-05, "loss": 0.1417, "step": 1795 }, { "epoch": 0.08, "grad_norm": 0.4927305062406124, "learning_rate": 1.9855982348284066e-05, "loss": 0.3386, "step": 1796 }, { "epoch": 0.08, "grad_norm": 1.3701714066718698, "learning_rate": 1.985573062628179e-05, "loss": 0.7233, "step": 1797 }, { "epoch": 0.08, "grad_norm": 0.435791366350211, "learning_rate": 1.985547868608314e-05, "loss": 0.2557, "step": 1798 }, { "epoch": 0.08, "grad_norm": 0.6093149419417401, "learning_rate": 1.9855226527693693e-05, "loss": 0.4452, "step": 1799 }, { "epoch": 0.08, "grad_norm": 0.5144921774910622, "learning_rate": 1.9854974151119027e-05, "loss": 0.386, "step": 1800 }, { "epoch": 0.08, "grad_norm": 0.4209592504991791, "learning_rate": 1.9854721556364734e-05, "loss": 0.1833, "step": 1801 }, { "epoch": 0.08, "grad_norm": 0.6771450206752724, "learning_rate": 1.98544687434364e-05, "loss": 0.4158, "step": 1802 }, { "epoch": 0.08, "grad_norm": 0.3667182150744982, "learning_rate": 1.9854215712339626e-05, "loss": 0.2946, "step": 1803 }, { "epoch": 0.08, "grad_norm": 0.7983659536596921, "learning_rate": 1.9853962463080013e-05, "loss": 0.4547, "step": 1804 }, { "epoch": 0.08, "grad_norm": 0.42993724589799986, "learning_rate": 1.985370899566317e-05, "loss": 0.3045, "step": 1805 }, { "epoch": 0.08, "grad_norm": 0.890410847893566, "learning_rate": 1.985345531009471e-05, "loss": 0.5417, "step": 1806 }, { "epoch": 0.08, "grad_norm": 0.5845268120611091, "learning_rate": 1.9853201406380243e-05, "loss": 0.3584, "step": 1807 }, { "epoch": 0.08, "grad_norm": 0.4383520410767089, "learning_rate": 1.9852947284525393e-05, "loss": 0.2795, "step": 1808 }, { "epoch": 0.08, "grad_norm": 0.383028993786552, "learning_rate": 1.9852692944535786e-05, "loss": 0.2108, "step": 1809 }, { "epoch": 0.08, "grad_norm": 0.7577384456563768, "learning_rate": 1.9852438386417058e-05, "loss": 0.4691, "step": 1810 }, { "epoch": 0.08, "grad_norm": 0.4020921198349881, "learning_rate": 1.9852183610174833e-05, "loss": 0.285, "step": 1811 }, { "epoch": 0.08, "grad_norm": 0.5996892203156494, "learning_rate": 1.9851928615814764e-05, "loss": 0.3627, "step": 1812 }, { "epoch": 0.08, "grad_norm": 1.0401280549156477, "learning_rate": 1.9851673403342488e-05, "loss": 0.5368, "step": 1813 }, { "epoch": 0.08, "grad_norm": 0.3531853456030447, "learning_rate": 1.985141797276366e-05, "loss": 0.2163, "step": 1814 }, { "epoch": 0.08, "grad_norm": 0.3824840866553206, "learning_rate": 1.9851162324083933e-05, "loss": 0.2431, "step": 1815 }, { "epoch": 0.08, "grad_norm": 0.5074746833733829, "learning_rate": 1.9850906457308965e-05, "loss": 0.3609, "step": 1816 }, { "epoch": 0.08, "grad_norm": 0.5263925597285825, "learning_rate": 1.9850650372444425e-05, "loss": 0.3741, "step": 1817 }, { "epoch": 0.08, "grad_norm": 0.7174660642026556, "learning_rate": 1.9850394069495976e-05, "loss": 0.4505, "step": 1818 }, { "epoch": 0.08, "grad_norm": 0.4094659702876812, "learning_rate": 1.98501375484693e-05, "loss": 0.3153, "step": 1819 }, { "epoch": 0.08, "grad_norm": 0.5446998550688928, "learning_rate": 1.984988080937007e-05, "loss": 0.3498, "step": 1820 }, { "epoch": 0.08, "grad_norm": 0.4845316922040717, "learning_rate": 1.9849623852203975e-05, "loss": 0.2985, "step": 1821 }, { "epoch": 0.08, "grad_norm": 0.493235423958602, "learning_rate": 1.9849366676976697e-05, "loss": 0.2963, "step": 1822 }, { "epoch": 0.08, "grad_norm": 0.5050256645091589, "learning_rate": 1.9849109283693937e-05, "loss": 0.3276, "step": 1823 }, { "epoch": 0.08, "grad_norm": 0.5386128973100998, "learning_rate": 1.9848851672361392e-05, "loss": 0.3148, "step": 1824 }, { "epoch": 0.08, "grad_norm": 1.0259265337866235, "learning_rate": 1.984859384298476e-05, "loss": 0.4823, "step": 1825 }, { "epoch": 0.08, "grad_norm": 0.5128208762718018, "learning_rate": 1.984833579556975e-05, "loss": 0.3309, "step": 1826 }, { "epoch": 0.08, "grad_norm": 0.6095455133225118, "learning_rate": 1.9848077530122083e-05, "loss": 0.4099, "step": 1827 }, { "epoch": 0.08, "grad_norm": 0.4824538325559773, "learning_rate": 1.984781904664747e-05, "loss": 0.2529, "step": 1828 }, { "epoch": 0.08, "grad_norm": 0.375963897631843, "learning_rate": 1.984756034515163e-05, "loss": 0.2803, "step": 1829 }, { "epoch": 0.08, "grad_norm": 2.4802573941507466, "learning_rate": 1.98473014256403e-05, "loss": 0.8471, "step": 1830 }, { "epoch": 0.08, "grad_norm": 0.5463633190470593, "learning_rate": 1.9847042288119205e-05, "loss": 0.2898, "step": 1831 }, { "epoch": 0.08, "grad_norm": 0.5258542414770248, "learning_rate": 1.9846782932594085e-05, "loss": 0.3369, "step": 1832 }, { "epoch": 0.08, "grad_norm": 0.9534239370229391, "learning_rate": 1.9846523359070683e-05, "loss": 0.5282, "step": 1833 }, { "epoch": 0.08, "grad_norm": 0.6923769803438491, "learning_rate": 1.9846263567554744e-05, "loss": 0.2063, "step": 1834 }, { "epoch": 0.08, "grad_norm": 0.60034200594085, "learning_rate": 1.984600355805202e-05, "loss": 0.3114, "step": 1835 }, { "epoch": 0.08, "grad_norm": 0.5918890816462327, "learning_rate": 1.9845743330568265e-05, "loss": 0.3474, "step": 1836 }, { "epoch": 0.08, "grad_norm": 0.7038753312985933, "learning_rate": 1.984548288510924e-05, "loss": 0.2313, "step": 1837 }, { "epoch": 0.08, "grad_norm": 0.5526065822192627, "learning_rate": 1.9845222221680717e-05, "loss": 0.3343, "step": 1838 }, { "epoch": 0.08, "grad_norm": 0.66080857145344, "learning_rate": 1.984496134028846e-05, "loss": 0.3968, "step": 1839 }, { "epoch": 0.08, "grad_norm": 1.159336585806815, "learning_rate": 1.984470024093825e-05, "loss": 0.6108, "step": 1840 }, { "epoch": 0.08, "grad_norm": 0.5783577561227164, "learning_rate": 1.9844438923635866e-05, "loss": 0.1855, "step": 1841 }, { "epoch": 0.08, "grad_norm": 0.7087109712689228, "learning_rate": 1.9844177388387093e-05, "loss": 0.3379, "step": 1842 }, { "epoch": 0.08, "grad_norm": 0.5127281358865282, "learning_rate": 1.9843915635197714e-05, "loss": 0.3677, "step": 1843 }, { "epoch": 0.08, "grad_norm": 0.4869597953292276, "learning_rate": 1.9843653664073535e-05, "loss": 0.2701, "step": 1844 }, { "epoch": 0.08, "grad_norm": 1.20001843304987, "learning_rate": 1.9843391475020352e-05, "loss": 0.5731, "step": 1845 }, { "epoch": 0.08, "grad_norm": 0.6501968017424933, "learning_rate": 1.984312906804397e-05, "loss": 0.2681, "step": 1846 }, { "epoch": 0.08, "grad_norm": 0.4616482986061454, "learning_rate": 1.9842866443150192e-05, "loss": 0.2688, "step": 1847 }, { "epoch": 0.08, "grad_norm": 0.5552073882286912, "learning_rate": 1.9842603600344844e-05, "loss": 0.2315, "step": 1848 }, { "epoch": 0.08, "grad_norm": 1.1855278689203255, "learning_rate": 1.9842340539633734e-05, "loss": 0.6302, "step": 1849 }, { "epoch": 0.08, "grad_norm": 0.5182621508611136, "learning_rate": 1.984207726102269e-05, "loss": 0.2915, "step": 1850 }, { "epoch": 0.09, "grad_norm": 0.47268756722408256, "learning_rate": 1.9841813764517548e-05, "loss": 0.3682, "step": 1851 }, { "epoch": 0.09, "grad_norm": 1.016167795419829, "learning_rate": 1.984155005012413e-05, "loss": 0.5965, "step": 1852 }, { "epoch": 0.09, "grad_norm": 0.4463406306772212, "learning_rate": 1.984128611784828e-05, "loss": 0.3036, "step": 1853 }, { "epoch": 0.09, "grad_norm": 0.35674620848399313, "learning_rate": 1.984102196769584e-05, "loss": 0.0996, "step": 1854 }, { "epoch": 0.09, "grad_norm": 0.63074319011239, "learning_rate": 1.984075759967266e-05, "loss": 0.3864, "step": 1855 }, { "epoch": 0.09, "grad_norm": 0.5936145519888961, "learning_rate": 1.984049301378459e-05, "loss": 0.3223, "step": 1856 }, { "epoch": 0.09, "grad_norm": 0.9966760633366543, "learning_rate": 1.984022821003749e-05, "loss": 0.3896, "step": 1857 }, { "epoch": 0.09, "grad_norm": 0.6550273216994056, "learning_rate": 1.9839963188437223e-05, "loss": 0.3966, "step": 1858 }, { "epoch": 0.09, "grad_norm": 0.43336951763895726, "learning_rate": 1.983969794898965e-05, "loss": 0.2901, "step": 1859 }, { "epoch": 0.09, "grad_norm": 0.36139528585867514, "learning_rate": 1.9839432491700653e-05, "loss": 0.1936, "step": 1860 }, { "epoch": 0.09, "grad_norm": 0.9222199873128843, "learning_rate": 1.9839166816576097e-05, "loss": 0.5312, "step": 1861 }, { "epoch": 0.09, "grad_norm": 0.4844929993518195, "learning_rate": 1.983890092362188e-05, "loss": 0.3177, "step": 1862 }, { "epoch": 0.09, "grad_norm": 0.5090428114844479, "learning_rate": 1.9838634812843875e-05, "loss": 0.31, "step": 1863 }, { "epoch": 0.09, "grad_norm": 1.966088143719992, "learning_rate": 1.9838368484247976e-05, "loss": 0.8561, "step": 1864 }, { "epoch": 0.09, "grad_norm": 0.49432071929148247, "learning_rate": 1.9838101937840085e-05, "loss": 0.3002, "step": 1865 }, { "epoch": 0.09, "grad_norm": 0.45064531408217734, "learning_rate": 1.9837835173626095e-05, "loss": 0.1859, "step": 1866 }, { "epoch": 0.09, "grad_norm": 0.5952836236105027, "learning_rate": 1.983756819161192e-05, "loss": 0.3557, "step": 1867 }, { "epoch": 0.09, "grad_norm": 0.4480817831374804, "learning_rate": 1.9837300991803462e-05, "loss": 0.2876, "step": 1868 }, { "epoch": 0.09, "grad_norm": 1.5133015864181774, "learning_rate": 1.9837033574206644e-05, "loss": 0.8068, "step": 1869 }, { "epoch": 0.09, "grad_norm": 0.5169006779804324, "learning_rate": 1.9836765938827385e-05, "loss": 0.2915, "step": 1870 }, { "epoch": 0.09, "grad_norm": 0.49168121245995894, "learning_rate": 1.9836498085671613e-05, "loss": 0.2727, "step": 1871 }, { "epoch": 0.09, "grad_norm": 0.4821743322605833, "learning_rate": 1.9836230014745248e-05, "loss": 0.2206, "step": 1872 }, { "epoch": 0.09, "grad_norm": 0.7327361527476197, "learning_rate": 1.9835961726054228e-05, "loss": 0.468, "step": 1873 }, { "epoch": 0.09, "grad_norm": 0.5315220831053276, "learning_rate": 1.98356932196045e-05, "loss": 0.3028, "step": 1874 }, { "epoch": 0.09, "grad_norm": 0.5963145940506028, "learning_rate": 1.9835424495402004e-05, "loss": 0.3769, "step": 1875 }, { "epoch": 0.09, "grad_norm": 1.1877550815088374, "learning_rate": 1.983515555345269e-05, "loss": 0.4056, "step": 1876 }, { "epoch": 0.09, "grad_norm": 0.5433237951246551, "learning_rate": 1.983488639376251e-05, "loss": 0.2983, "step": 1877 }, { "epoch": 0.09, "grad_norm": 0.48176949042586076, "learning_rate": 1.9834617016337424e-05, "loss": 0.374, "step": 1878 }, { "epoch": 0.09, "grad_norm": 0.4802190161939199, "learning_rate": 1.9834347421183392e-05, "loss": 0.3549, "step": 1879 }, { "epoch": 0.09, "grad_norm": 0.3852920657080918, "learning_rate": 1.9834077608306392e-05, "loss": 0.205, "step": 1880 }, { "epoch": 0.09, "grad_norm": 1.5455878296498895, "learning_rate": 1.9833807577712393e-05, "loss": 0.893, "step": 1881 }, { "epoch": 0.09, "grad_norm": 1.1486914008528828, "learning_rate": 1.983353732940737e-05, "loss": 0.6357, "step": 1882 }, { "epoch": 0.09, "grad_norm": 0.4181837906322127, "learning_rate": 1.9833266863397304e-05, "loss": 0.2527, "step": 1883 }, { "epoch": 0.09, "grad_norm": 0.9567670849628994, "learning_rate": 1.983299617968819e-05, "loss": 0.557, "step": 1884 }, { "epoch": 0.09, "grad_norm": 0.3771553785315941, "learning_rate": 1.983272527828602e-05, "loss": 0.29, "step": 1885 }, { "epoch": 0.09, "grad_norm": 0.47636028529950875, "learning_rate": 1.983245415919679e-05, "loss": 0.2405, "step": 1886 }, { "epoch": 0.09, "grad_norm": 0.5099678036440669, "learning_rate": 1.9832182822426498e-05, "loss": 0.3435, "step": 1887 }, { "epoch": 0.09, "grad_norm": 1.1601399744986034, "learning_rate": 1.9831911267981156e-05, "loss": 0.5918, "step": 1888 }, { "epoch": 0.09, "grad_norm": 0.432514674027098, "learning_rate": 1.9831639495866774e-05, "loss": 0.221, "step": 1889 }, { "epoch": 0.09, "grad_norm": 0.8471092428022736, "learning_rate": 1.983136750608937e-05, "loss": 0.5272, "step": 1890 }, { "epoch": 0.09, "grad_norm": 0.46684174340132695, "learning_rate": 1.9831095298654964e-05, "loss": 0.2521, "step": 1891 }, { "epoch": 0.09, "grad_norm": 0.8182503063277783, "learning_rate": 1.9830822873569585e-05, "loss": 0.4021, "step": 1892 }, { "epoch": 0.09, "grad_norm": 0.5375459177786547, "learning_rate": 1.9830550230839263e-05, "loss": 0.3312, "step": 1893 }, { "epoch": 0.09, "grad_norm": 0.49702408568767525, "learning_rate": 1.9830277370470035e-05, "loss": 0.3616, "step": 1894 }, { "epoch": 0.09, "grad_norm": 0.6809466244501035, "learning_rate": 1.9830004292467936e-05, "loss": 0.4166, "step": 1895 }, { "epoch": 0.09, "grad_norm": 0.47835888050493636, "learning_rate": 1.982973099683902e-05, "loss": 0.3534, "step": 1896 }, { "epoch": 0.09, "grad_norm": 0.6585766413239953, "learning_rate": 1.982945748358933e-05, "loss": 0.2595, "step": 1897 }, { "epoch": 0.09, "grad_norm": 0.46306406550497015, "learning_rate": 1.982918375272493e-05, "loss": 0.33, "step": 1898 }, { "epoch": 0.09, "grad_norm": 0.37369491938377014, "learning_rate": 1.982890980425187e-05, "loss": 0.2453, "step": 1899 }, { "epoch": 0.09, "grad_norm": 1.4683752686530696, "learning_rate": 1.9828635638176226e-05, "loss": 0.6446, "step": 1900 }, { "epoch": 0.09, "grad_norm": 0.4897173634114251, "learning_rate": 1.9828361254504057e-05, "loss": 0.3536, "step": 1901 }, { "epoch": 0.09, "grad_norm": 0.5582314579275497, "learning_rate": 1.9828086653241444e-05, "loss": 0.3461, "step": 1902 }, { "epoch": 0.09, "grad_norm": 0.5483590179233153, "learning_rate": 1.9827811834394467e-05, "loss": 0.3553, "step": 1903 }, { "epoch": 0.09, "grad_norm": 0.5245475272236104, "learning_rate": 1.9827536797969205e-05, "loss": 0.3042, "step": 1904 }, { "epoch": 0.09, "grad_norm": 0.7548663663818748, "learning_rate": 1.982726154397175e-05, "loss": 0.4534, "step": 1905 }, { "epoch": 0.09, "grad_norm": 0.32805362298999935, "learning_rate": 1.98269860724082e-05, "loss": 0.226, "step": 1906 }, { "epoch": 0.09, "grad_norm": 0.6275196456082323, "learning_rate": 1.982671038328465e-05, "loss": 0.4124, "step": 1907 }, { "epoch": 0.09, "grad_norm": 0.5440372823144775, "learning_rate": 1.9826434476607198e-05, "loss": 0.3913, "step": 1908 }, { "epoch": 0.09, "grad_norm": 0.9669380376912023, "learning_rate": 1.982615835238196e-05, "loss": 0.4756, "step": 1909 }, { "epoch": 0.09, "grad_norm": 0.8046501352707974, "learning_rate": 1.9825882010615048e-05, "loss": 0.3438, "step": 1910 }, { "epoch": 0.09, "grad_norm": 0.5108115968976568, "learning_rate": 1.9825605451312574e-05, "loss": 0.3112, "step": 1911 }, { "epoch": 0.09, "grad_norm": 0.5182733888660727, "learning_rate": 1.9825328674480672e-05, "loss": 0.2714, "step": 1912 }, { "epoch": 0.09, "grad_norm": 0.6780928161168133, "learning_rate": 1.982505168012546e-05, "loss": 0.287, "step": 1913 }, { "epoch": 0.09, "grad_norm": 0.5094638805766286, "learning_rate": 1.982477446825307e-05, "loss": 0.3294, "step": 1914 }, { "epoch": 0.09, "grad_norm": 0.6781153046639767, "learning_rate": 1.982449703886965e-05, "loss": 0.3558, "step": 1915 }, { "epoch": 0.09, "grad_norm": 0.7549813902725848, "learning_rate": 1.9824219391981326e-05, "loss": 0.4052, "step": 1916 }, { "epoch": 0.09, "grad_norm": 0.4238395156147871, "learning_rate": 1.982394152759426e-05, "loss": 0.2722, "step": 1917 }, { "epoch": 0.09, "grad_norm": 0.5458126639026571, "learning_rate": 1.9823663445714595e-05, "loss": 0.4178, "step": 1918 }, { "epoch": 0.09, "grad_norm": 0.36298129912973, "learning_rate": 1.9823385146348485e-05, "loss": 0.2388, "step": 1919 }, { "epoch": 0.09, "grad_norm": 0.531623486243174, "learning_rate": 1.98231066295021e-05, "loss": 0.353, "step": 1920 }, { "epoch": 0.09, "grad_norm": 1.1972605224079003, "learning_rate": 1.9822827895181603e-05, "loss": 0.6799, "step": 1921 }, { "epoch": 0.09, "grad_norm": 0.4282930503954081, "learning_rate": 1.9822548943393164e-05, "loss": 0.26, "step": 1922 }, { "epoch": 0.09, "grad_norm": 0.5391102249757683, "learning_rate": 1.9822269774142954e-05, "loss": 0.3595, "step": 1923 }, { "epoch": 0.09, "grad_norm": 0.6983716142445426, "learning_rate": 1.982199038743716e-05, "loss": 0.5432, "step": 1924 }, { "epoch": 0.09, "grad_norm": 0.29483046464153667, "learning_rate": 1.9821710783281966e-05, "loss": 0.0814, "step": 1925 }, { "epoch": 0.09, "grad_norm": 0.5168243373029131, "learning_rate": 1.9821430961683565e-05, "loss": 0.3348, "step": 1926 }, { "epoch": 0.09, "grad_norm": 0.6724981218084933, "learning_rate": 1.9821150922648143e-05, "loss": 0.4434, "step": 1927 }, { "epoch": 0.09, "grad_norm": 0.555236279380417, "learning_rate": 1.982087066618191e-05, "loss": 0.2933, "step": 1928 }, { "epoch": 0.09, "grad_norm": 0.49561793290000494, "learning_rate": 1.982059019229106e-05, "loss": 0.3557, "step": 1929 }, { "epoch": 0.09, "grad_norm": 0.46832332643153696, "learning_rate": 1.9820309500981813e-05, "loss": 0.393, "step": 1930 }, { "epoch": 0.09, "grad_norm": 0.42306146417495866, "learning_rate": 1.9820028592260375e-05, "loss": 0.228, "step": 1931 }, { "epoch": 0.09, "grad_norm": 0.35120687273762274, "learning_rate": 1.9819747466132972e-05, "loss": 0.1989, "step": 1932 }, { "epoch": 0.09, "grad_norm": 1.606184885870421, "learning_rate": 1.9819466122605826e-05, "loss": 0.7207, "step": 1933 }, { "epoch": 0.09, "grad_norm": 0.4476171944299043, "learning_rate": 1.981918456168516e-05, "loss": 0.3204, "step": 1934 }, { "epoch": 0.09, "grad_norm": 0.5184225689386348, "learning_rate": 1.9818902783377215e-05, "loss": 0.3083, "step": 1935 }, { "epoch": 0.09, "grad_norm": 1.0494532166868527, "learning_rate": 1.981862078768822e-05, "loss": 0.5774, "step": 1936 }, { "epoch": 0.09, "grad_norm": 0.3605269751141371, "learning_rate": 1.981833857462443e-05, "loss": 0.2021, "step": 1937 }, { "epoch": 0.09, "grad_norm": 0.4253002447023497, "learning_rate": 1.9818056144192084e-05, "loss": 0.2744, "step": 1938 }, { "epoch": 0.09, "grad_norm": 0.60020737342717, "learning_rate": 1.981777349639744e-05, "loss": 0.3808, "step": 1939 }, { "epoch": 0.09, "grad_norm": 0.8660255411677589, "learning_rate": 1.9817490631246754e-05, "loss": 0.4866, "step": 1940 }, { "epoch": 0.09, "grad_norm": 0.5004577618248672, "learning_rate": 1.9817207548746286e-05, "loss": 0.3435, "step": 1941 }, { "epoch": 0.09, "grad_norm": 0.4751157918722982, "learning_rate": 1.9816924248902304e-05, "loss": 0.3391, "step": 1942 }, { "epoch": 0.09, "grad_norm": 0.3914551342958525, "learning_rate": 1.9816640731721086e-05, "loss": 0.2103, "step": 1943 }, { "epoch": 0.09, "grad_norm": 0.40678447806936613, "learning_rate": 1.9816356997208896e-05, "loss": 0.2896, "step": 1944 }, { "epoch": 0.09, "grad_norm": 0.9977725035841613, "learning_rate": 1.9816073045372026e-05, "loss": 0.475, "step": 1945 }, { "epoch": 0.09, "grad_norm": 0.5266811102400007, "learning_rate": 1.981578887621676e-05, "loss": 0.3888, "step": 1946 }, { "epoch": 0.09, "grad_norm": 0.4468288918273721, "learning_rate": 1.981550448974939e-05, "loss": 0.3284, "step": 1947 }, { "epoch": 0.09, "grad_norm": 0.7049747137680635, "learning_rate": 1.981521988597621e-05, "loss": 0.4502, "step": 1948 }, { "epoch": 0.09, "grad_norm": 0.39492775420163345, "learning_rate": 1.981493506490352e-05, "loss": 0.1844, "step": 1949 }, { "epoch": 0.09, "grad_norm": 0.4020384021093621, "learning_rate": 1.9814650026537632e-05, "loss": 0.2834, "step": 1950 }, { "epoch": 0.09, "grad_norm": 0.6384599910049662, "learning_rate": 1.9814364770884847e-05, "loss": 0.341, "step": 1951 }, { "epoch": 0.09, "grad_norm": 0.9892789303944801, "learning_rate": 1.9814079297951486e-05, "loss": 0.5043, "step": 1952 }, { "epoch": 0.09, "grad_norm": 0.42087800341626025, "learning_rate": 1.981379360774387e-05, "loss": 0.3228, "step": 1953 }, { "epoch": 0.09, "grad_norm": 0.5039570434846425, "learning_rate": 1.9813507700268323e-05, "loss": 0.3576, "step": 1954 }, { "epoch": 0.09, "grad_norm": 0.3541316994376223, "learning_rate": 1.9813221575531172e-05, "loss": 0.1761, "step": 1955 }, { "epoch": 0.09, "grad_norm": 0.36132319946466196, "learning_rate": 1.9812935233538754e-05, "loss": 0.2645, "step": 1956 }, { "epoch": 0.09, "grad_norm": 1.1792060738055052, "learning_rate": 1.9812648674297405e-05, "loss": 0.6376, "step": 1957 }, { "epoch": 0.09, "grad_norm": 0.4593508859237822, "learning_rate": 1.9812361897813477e-05, "loss": 0.3236, "step": 1958 }, { "epoch": 0.09, "grad_norm": 0.47948513319936614, "learning_rate": 1.981207490409331e-05, "loss": 0.3293, "step": 1959 }, { "epoch": 0.09, "grad_norm": 1.2340169439532689, "learning_rate": 1.9811787693143266e-05, "loss": 0.7185, "step": 1960 }, { "epoch": 0.09, "grad_norm": 0.41089348937240583, "learning_rate": 1.9811500264969695e-05, "loss": 0.2091, "step": 1961 }, { "epoch": 0.09, "grad_norm": 0.5366054330211604, "learning_rate": 1.9811212619578967e-05, "loss": 0.3185, "step": 1962 }, { "epoch": 0.09, "grad_norm": 0.4995655785619846, "learning_rate": 1.9810924756977444e-05, "loss": 0.3339, "step": 1963 }, { "epoch": 0.09, "grad_norm": 1.029761756265269, "learning_rate": 1.9810636677171506e-05, "loss": 0.4914, "step": 1964 }, { "epoch": 0.09, "grad_norm": 0.47787231645546624, "learning_rate": 1.9810348380167527e-05, "loss": 0.2889, "step": 1965 }, { "epoch": 0.09, "grad_norm": 0.5414596156521655, "learning_rate": 1.9810059865971892e-05, "loss": 0.3776, "step": 1966 }, { "epoch": 0.09, "grad_norm": 1.536746540311559, "learning_rate": 1.9809771134590983e-05, "loss": 0.7409, "step": 1967 }, { "epoch": 0.09, "grad_norm": 0.33350452955558973, "learning_rate": 1.9809482186031198e-05, "loss": 0.186, "step": 1968 }, { "epoch": 0.09, "grad_norm": 0.8274444251539987, "learning_rate": 1.9809193020298932e-05, "loss": 0.3997, "step": 1969 }, { "epoch": 0.09, "grad_norm": 0.56137419880525, "learning_rate": 1.9808903637400584e-05, "loss": 0.3844, "step": 1970 }, { "epoch": 0.09, "grad_norm": 0.4168610505692883, "learning_rate": 1.980861403734257e-05, "loss": 0.2404, "step": 1971 }, { "epoch": 0.09, "grad_norm": 1.1072294839575338, "learning_rate": 1.9808324220131287e-05, "loss": 0.6665, "step": 1972 }, { "epoch": 0.09, "grad_norm": 0.6059469190844824, "learning_rate": 1.9808034185773164e-05, "loss": 0.4175, "step": 1973 }, { "epoch": 0.09, "grad_norm": 0.3642313557994086, "learning_rate": 1.9807743934274614e-05, "loss": 0.2197, "step": 1974 }, { "epoch": 0.09, "grad_norm": 0.8088831330634305, "learning_rate": 1.980745346564207e-05, "loss": 0.5195, "step": 1975 }, { "epoch": 0.09, "grad_norm": 0.4992232557524347, "learning_rate": 1.9807162779881956e-05, "loss": 0.3658, "step": 1976 }, { "epoch": 0.09, "grad_norm": 0.4623638893758382, "learning_rate": 1.980687187700071e-05, "loss": 0.2323, "step": 1977 }, { "epoch": 0.09, "grad_norm": 0.5360289981400598, "learning_rate": 1.9806580757004776e-05, "loss": 0.3769, "step": 1978 }, { "epoch": 0.09, "grad_norm": 1.0185027830756135, "learning_rate": 1.9806289419900592e-05, "loss": 0.5417, "step": 1979 }, { "epoch": 0.09, "grad_norm": 0.6933488528653476, "learning_rate": 1.9805997865694616e-05, "loss": 0.3246, "step": 1980 }, { "epoch": 0.09, "grad_norm": 0.5207463884013039, "learning_rate": 1.9805706094393293e-05, "loss": 0.3525, "step": 1981 }, { "epoch": 0.09, "grad_norm": 0.42637707857514257, "learning_rate": 1.980541410600309e-05, "loss": 0.3118, "step": 1982 }, { "epoch": 0.09, "grad_norm": 0.5951078075831547, "learning_rate": 1.980512190053047e-05, "loss": 0.3669, "step": 1983 }, { "epoch": 0.09, "grad_norm": 0.41427740290486037, "learning_rate": 1.98048294779819e-05, "loss": 0.2212, "step": 1984 }, { "epoch": 0.09, "grad_norm": 0.6137914985401658, "learning_rate": 1.9804536838363858e-05, "loss": 0.4008, "step": 1985 }, { "epoch": 0.09, "grad_norm": 0.4086097098322167, "learning_rate": 1.9804243981682817e-05, "loss": 0.3002, "step": 1986 }, { "epoch": 0.09, "grad_norm": 0.7064764250559041, "learning_rate": 1.9803950907945267e-05, "loss": 0.4466, "step": 1987 }, { "epoch": 0.09, "grad_norm": 0.5554398889456554, "learning_rate": 1.9803657617157693e-05, "loss": 0.3282, "step": 1988 }, { "epoch": 0.09, "grad_norm": 0.47965803260818074, "learning_rate": 1.9803364109326586e-05, "loss": 0.2963, "step": 1989 }, { "epoch": 0.09, "grad_norm": 0.39610035644393304, "learning_rate": 1.9803070384458448e-05, "loss": 0.2239, "step": 1990 }, { "epoch": 0.09, "grad_norm": 0.9274105785659928, "learning_rate": 1.9802776442559777e-05, "loss": 0.5518, "step": 1991 }, { "epoch": 0.09, "grad_norm": 0.47310440867051357, "learning_rate": 1.9802482283637086e-05, "loss": 0.3364, "step": 1992 }, { "epoch": 0.09, "grad_norm": 0.8922765734240776, "learning_rate": 1.9802187907696884e-05, "loss": 0.5365, "step": 1993 }, { "epoch": 0.09, "grad_norm": 0.44591853283532534, "learning_rate": 1.980189331474569e-05, "loss": 0.3024, "step": 1994 }, { "epoch": 0.09, "grad_norm": 0.5205649119233964, "learning_rate": 1.9801598504790025e-05, "loss": 0.3215, "step": 1995 }, { "epoch": 0.09, "grad_norm": 0.3804715994250329, "learning_rate": 1.9801303477836417e-05, "loss": 0.2474, "step": 1996 }, { "epoch": 0.09, "grad_norm": 0.7508012739459844, "learning_rate": 1.9801008233891395e-05, "loss": 0.3524, "step": 1997 }, { "epoch": 0.09, "grad_norm": 0.528287447103849, "learning_rate": 1.98007127729615e-05, "loss": 0.3137, "step": 1998 }, { "epoch": 0.09, "grad_norm": 0.5863772715882213, "learning_rate": 1.9800417095053266e-05, "loss": 0.4451, "step": 1999 }, { "epoch": 0.09, "grad_norm": 1.1865521045685201, "learning_rate": 1.9800121200173246e-05, "loss": 0.5324, "step": 2000 }, { "epoch": 0.09, "grad_norm": 0.5161589307886086, "learning_rate": 1.9799825088327987e-05, "loss": 0.3308, "step": 2001 }, { "epoch": 0.09, "grad_norm": 0.34655198264312825, "learning_rate": 1.9799528759524045e-05, "loss": 0.2596, "step": 2002 }, { "epoch": 0.09, "grad_norm": 1.100367717431487, "learning_rate": 1.979923221376798e-05, "loss": 0.4394, "step": 2003 }, { "epoch": 0.09, "grad_norm": 0.4849782370565985, "learning_rate": 1.9798935451066363e-05, "loss": 0.3643, "step": 2004 }, { "epoch": 0.09, "grad_norm": 0.47977694449325736, "learning_rate": 1.9798638471425756e-05, "loss": 0.3233, "step": 2005 }, { "epoch": 0.09, "grad_norm": 0.6553709123209148, "learning_rate": 1.979834127485274e-05, "loss": 0.4269, "step": 2006 }, { "epoch": 0.09, "grad_norm": 0.3923977542751138, "learning_rate": 1.979804386135389e-05, "loss": 0.245, "step": 2007 }, { "epoch": 0.09, "grad_norm": 0.33925261433225296, "learning_rate": 1.9797746230935792e-05, "loss": 0.1793, "step": 2008 }, { "epoch": 0.09, "grad_norm": 0.9151751027601358, "learning_rate": 1.9797448383605036e-05, "loss": 0.3925, "step": 2009 }, { "epoch": 0.09, "grad_norm": 0.5475192505680544, "learning_rate": 1.979715031936822e-05, "loss": 0.2806, "step": 2010 }, { "epoch": 0.09, "grad_norm": 1.468885005215159, "learning_rate": 1.9796852038231932e-05, "loss": 0.7366, "step": 2011 }, { "epoch": 0.09, "grad_norm": 1.3694146325131684, "learning_rate": 1.9796553540202785e-05, "loss": 0.8477, "step": 2012 }, { "epoch": 0.09, "grad_norm": 0.4893060853244445, "learning_rate": 1.9796254825287385e-05, "loss": 0.2319, "step": 2013 }, { "epoch": 0.09, "grad_norm": 0.459106924236629, "learning_rate": 1.9795955893492344e-05, "loss": 0.3488, "step": 2014 }, { "epoch": 0.09, "grad_norm": 0.377399440407178, "learning_rate": 1.979565674482428e-05, "loss": 0.2906, "step": 2015 }, { "epoch": 0.09, "grad_norm": 0.5028002809836686, "learning_rate": 1.979535737928982e-05, "loss": 0.2265, "step": 2016 }, { "epoch": 0.09, "grad_norm": 0.48636333265568715, "learning_rate": 1.9795057796895585e-05, "loss": 0.3696, "step": 2017 }, { "epoch": 0.09, "grad_norm": 0.6451088666394826, "learning_rate": 1.9794757997648214e-05, "loss": 0.4372, "step": 2018 }, { "epoch": 0.09, "grad_norm": 0.8553634158607151, "learning_rate": 1.979445798155434e-05, "loss": 0.3966, "step": 2019 }, { "epoch": 0.09, "grad_norm": 0.4190236061141445, "learning_rate": 1.9794157748620604e-05, "loss": 0.3416, "step": 2020 }, { "epoch": 0.09, "grad_norm": 0.42135222458560323, "learning_rate": 1.9793857298853657e-05, "loss": 0.2884, "step": 2021 }, { "epoch": 0.09, "grad_norm": 0.4012091778228384, "learning_rate": 1.979355663226015e-05, "loss": 0.2461, "step": 2022 }, { "epoch": 0.09, "grad_norm": 0.48755856788473173, "learning_rate": 1.9793255748846737e-05, "loss": 0.2858, "step": 2023 }, { "epoch": 0.09, "grad_norm": 1.5215206163835893, "learning_rate": 1.9792954648620083e-05, "loss": 0.9342, "step": 2024 }, { "epoch": 0.09, "grad_norm": 0.4541215652729916, "learning_rate": 1.9792653331586852e-05, "loss": 0.3108, "step": 2025 }, { "epoch": 0.09, "grad_norm": 0.4299551654401156, "learning_rate": 1.979235179775371e-05, "loss": 0.3176, "step": 2026 }, { "epoch": 0.09, "grad_norm": 0.5191101721597068, "learning_rate": 1.9792050047127337e-05, "loss": 0.3524, "step": 2027 }, { "epoch": 0.09, "grad_norm": 0.37183006099288957, "learning_rate": 1.979174807971442e-05, "loss": 0.2259, "step": 2028 }, { "epoch": 0.09, "grad_norm": 0.46329440797192506, "learning_rate": 1.9791445895521634e-05, "loss": 0.2856, "step": 2029 }, { "epoch": 0.09, "grad_norm": 0.692076040525972, "learning_rate": 1.9791143494555672e-05, "loss": 0.4928, "step": 2030 }, { "epoch": 0.09, "grad_norm": 0.5874302361207011, "learning_rate": 1.979084087682323e-05, "loss": 0.4136, "step": 2031 }, { "epoch": 0.09, "grad_norm": 0.47679929964558293, "learning_rate": 1.979053804233101e-05, "loss": 0.3918, "step": 2032 }, { "epoch": 0.09, "grad_norm": 0.48170021632104, "learning_rate": 1.979023499108571e-05, "loss": 0.2938, "step": 2033 }, { "epoch": 0.09, "grad_norm": 0.3427586317710395, "learning_rate": 1.9789931723094046e-05, "loss": 0.1929, "step": 2034 }, { "epoch": 0.09, "grad_norm": 0.4991603878462252, "learning_rate": 1.978962823836273e-05, "loss": 0.3136, "step": 2035 }, { "epoch": 0.09, "grad_norm": 0.7892557411119868, "learning_rate": 1.978932453689848e-05, "loss": 0.4781, "step": 2036 }, { "epoch": 0.09, "grad_norm": 0.47873530717926904, "learning_rate": 1.9789020618708018e-05, "loss": 0.2869, "step": 2037 }, { "epoch": 0.09, "grad_norm": 0.48280893308741524, "learning_rate": 1.9788716483798077e-05, "loss": 0.4015, "step": 2038 }, { "epoch": 0.09, "grad_norm": 1.040557718900965, "learning_rate": 1.9788412132175386e-05, "loss": 0.4278, "step": 2039 }, { "epoch": 0.09, "grad_norm": 0.30399268222380765, "learning_rate": 1.9788107563846682e-05, "loss": 0.1874, "step": 2040 }, { "epoch": 0.09, "grad_norm": 0.3850387225362169, "learning_rate": 1.9787802778818713e-05, "loss": 0.3136, "step": 2041 }, { "epoch": 0.09, "grad_norm": 0.9479317914244941, "learning_rate": 1.9787497777098226e-05, "loss": 0.4567, "step": 2042 }, { "epoch": 0.09, "grad_norm": 0.5846011877712373, "learning_rate": 1.978719255869197e-05, "loss": 0.4012, "step": 2043 }, { "epoch": 0.09, "grad_norm": 0.6486162644774188, "learning_rate": 1.97868871236067e-05, "loss": 0.3492, "step": 2044 }, { "epoch": 0.09, "grad_norm": 0.501669820525786, "learning_rate": 1.9786581471849186e-05, "loss": 0.3645, "step": 2045 }, { "epoch": 0.09, "grad_norm": 0.2555958187319086, "learning_rate": 1.978627560342619e-05, "loss": 0.0791, "step": 2046 }, { "epoch": 0.09, "grad_norm": 0.5619358701796047, "learning_rate": 1.9785969518344484e-05, "loss": 0.3803, "step": 2047 }, { "epoch": 0.09, "grad_norm": 1.256864624742129, "learning_rate": 1.9785663216610843e-05, "loss": 0.5627, "step": 2048 }, { "epoch": 0.09, "grad_norm": 0.4012730803791754, "learning_rate": 1.9785356698232052e-05, "loss": 0.2836, "step": 2049 }, { "epoch": 0.09, "grad_norm": 0.5840905122322081, "learning_rate": 1.9785049963214895e-05, "loss": 0.3802, "step": 2050 }, { "epoch": 0.09, "grad_norm": 1.662399195733512, "learning_rate": 1.9784743011566162e-05, "loss": 0.8109, "step": 2051 }, { "epoch": 0.09, "grad_norm": 0.251756321730411, "learning_rate": 1.9784435843292652e-05, "loss": 0.1006, "step": 2052 }, { "epoch": 0.09, "grad_norm": 0.4683704180769018, "learning_rate": 1.978412845840116e-05, "loss": 0.304, "step": 2053 }, { "epoch": 0.09, "grad_norm": 1.2834791222468118, "learning_rate": 1.9783820856898492e-05, "loss": 0.5557, "step": 2054 }, { "epoch": 0.09, "grad_norm": 0.766612481050488, "learning_rate": 1.9783513038791463e-05, "loss": 0.3642, "step": 2055 }, { "epoch": 0.09, "grad_norm": 0.5001740353759953, "learning_rate": 1.9783205004086884e-05, "loss": 0.2982, "step": 2056 }, { "epoch": 0.09, "grad_norm": 0.5271195116157156, "learning_rate": 1.9782896752791576e-05, "loss": 0.3426, "step": 2057 }, { "epoch": 0.09, "grad_norm": 0.5304167482899875, "learning_rate": 1.978258828491236e-05, "loss": 0.2965, "step": 2058 }, { "epoch": 0.09, "grad_norm": 0.44599342972225464, "learning_rate": 1.978227960045607e-05, "loss": 0.2245, "step": 2059 }, { "epoch": 0.09, "grad_norm": 1.685826861566089, "learning_rate": 1.978197069942954e-05, "loss": 0.5888, "step": 2060 }, { "epoch": 0.09, "grad_norm": 0.5086787170099829, "learning_rate": 1.9781661581839607e-05, "loss": 0.3477, "step": 2061 }, { "epoch": 0.09, "grad_norm": 0.5054693740003389, "learning_rate": 1.978135224769311e-05, "loss": 0.2533, "step": 2062 }, { "epoch": 0.09, "grad_norm": 0.9128541613747733, "learning_rate": 1.9781042696996904e-05, "loss": 0.6258, "step": 2063 }, { "epoch": 0.09, "grad_norm": 0.43737761989527796, "learning_rate": 1.978073292975784e-05, "loss": 0.2444, "step": 2064 }, { "epoch": 0.09, "grad_norm": 0.4287635026159656, "learning_rate": 1.9780422945982775e-05, "loss": 0.2481, "step": 2065 }, { "epoch": 0.09, "grad_norm": 0.9657517402785877, "learning_rate": 1.9780112745678574e-05, "loss": 0.4264, "step": 2066 }, { "epoch": 0.09, "grad_norm": 0.9765950969579139, "learning_rate": 1.97798023288521e-05, "loss": 0.5766, "step": 2067 }, { "epoch": 0.1, "grad_norm": 0.5419086256816057, "learning_rate": 1.9779491695510233e-05, "loss": 0.3022, "step": 2068 }, { "epoch": 0.1, "grad_norm": 0.5163604585646017, "learning_rate": 1.977918084565984e-05, "loss": 0.2897, "step": 2069 }, { "epoch": 0.1, "grad_norm": 0.6337207858066779, "learning_rate": 1.977886977930781e-05, "loss": 0.3566, "step": 2070 }, { "epoch": 0.1, "grad_norm": 0.41916703099448366, "learning_rate": 1.9778558496461032e-05, "loss": 0.3015, "step": 2071 }, { "epoch": 0.1, "grad_norm": 0.3741898432727063, "learning_rate": 1.977824699712639e-05, "loss": 0.3057, "step": 2072 }, { "epoch": 0.1, "grad_norm": 0.6469142559572005, "learning_rate": 1.9777935281310784e-05, "loss": 0.3782, "step": 2073 }, { "epoch": 0.1, "grad_norm": 0.46062939843566225, "learning_rate": 1.9777623349021117e-05, "loss": 0.3036, "step": 2074 }, { "epoch": 0.1, "grad_norm": 1.374122882349871, "learning_rate": 1.9777311200264292e-05, "loss": 0.3458, "step": 2075 }, { "epoch": 0.1, "grad_norm": 0.5805875086386575, "learning_rate": 1.977699883504722e-05, "loss": 0.4343, "step": 2076 }, { "epoch": 0.1, "grad_norm": 0.4474600042586244, "learning_rate": 1.9776686253376816e-05, "loss": 0.3003, "step": 2077 }, { "epoch": 0.1, "grad_norm": 0.4214045696450386, "learning_rate": 1.977637345526e-05, "loss": 0.2739, "step": 2078 }, { "epoch": 0.1, "grad_norm": 1.021478995118339, "learning_rate": 1.9776060440703703e-05, "loss": 0.6443, "step": 2079 }, { "epoch": 0.1, "grad_norm": 0.4122736946293039, "learning_rate": 1.9775747209714847e-05, "loss": 0.2602, "step": 2080 }, { "epoch": 0.1, "grad_norm": 0.5680351370776875, "learning_rate": 1.977543376230037e-05, "loss": 0.368, "step": 2081 }, { "epoch": 0.1, "grad_norm": 0.8472518439061804, "learning_rate": 1.9775120098467212e-05, "loss": 0.4055, "step": 2082 }, { "epoch": 0.1, "grad_norm": 0.48007071057726103, "learning_rate": 1.9774806218222317e-05, "loss": 0.2993, "step": 2083 }, { "epoch": 0.1, "grad_norm": 0.5235336868669804, "learning_rate": 1.977449212157263e-05, "loss": 0.338, "step": 2084 }, { "epoch": 0.1, "grad_norm": 0.4714849739363711, "learning_rate": 1.9774177808525113e-05, "loss": 0.3259, "step": 2085 }, { "epoch": 0.1, "grad_norm": 0.43486199444321916, "learning_rate": 1.9773863279086717e-05, "loss": 0.3063, "step": 2086 }, { "epoch": 0.1, "grad_norm": 0.5823190847229298, "learning_rate": 1.9773548533264406e-05, "loss": 0.3337, "step": 2087 }, { "epoch": 0.1, "grad_norm": 0.59517639040424, "learning_rate": 1.9773233571065154e-05, "loss": 0.3688, "step": 2088 }, { "epoch": 0.1, "grad_norm": 0.37481076902184907, "learning_rate": 1.977291839249593e-05, "loss": 0.2996, "step": 2089 }, { "epoch": 0.1, "grad_norm": 1.055614153837425, "learning_rate": 1.977260299756371e-05, "loss": 0.6411, "step": 2090 }, { "epoch": 0.1, "grad_norm": 0.4096261817559592, "learning_rate": 1.977228738627548e-05, "loss": 0.1683, "step": 2091 }, { "epoch": 0.1, "grad_norm": 0.3919653632217819, "learning_rate": 1.9771971558638226e-05, "loss": 0.2482, "step": 2092 }, { "epoch": 0.1, "grad_norm": 0.4981627376003498, "learning_rate": 1.977165551465894e-05, "loss": 0.342, "step": 2093 }, { "epoch": 0.1, "grad_norm": 0.7949045293190264, "learning_rate": 1.9771339254344616e-05, "loss": 0.4958, "step": 2094 }, { "epoch": 0.1, "grad_norm": 0.37134000339919704, "learning_rate": 1.977102277770226e-05, "loss": 0.2488, "step": 2095 }, { "epoch": 0.1, "grad_norm": 1.1299938719523501, "learning_rate": 1.9770706084738884e-05, "loss": 0.6859, "step": 2096 }, { "epoch": 0.1, "grad_norm": 0.48977463551737904, "learning_rate": 1.9770389175461485e-05, "loss": 0.3657, "step": 2097 }, { "epoch": 0.1, "grad_norm": 0.4427595300958791, "learning_rate": 1.9770072049877087e-05, "loss": 0.2127, "step": 2098 }, { "epoch": 0.1, "grad_norm": 0.40201157488549977, "learning_rate": 1.9769754707992712e-05, "loss": 0.2358, "step": 2099 }, { "epoch": 0.1, "grad_norm": 0.46702125298681396, "learning_rate": 1.9769437149815384e-05, "loss": 0.4077, "step": 2100 }, { "epoch": 0.1, "grad_norm": 0.4040077546107262, "learning_rate": 1.9769119375352133e-05, "loss": 0.211, "step": 2101 }, { "epoch": 0.1, "grad_norm": 1.4289465176348344, "learning_rate": 1.9768801384609996e-05, "loss": 0.6883, "step": 2102 }, { "epoch": 0.1, "grad_norm": 0.5395786803833855, "learning_rate": 1.9768483177596008e-05, "loss": 0.3235, "step": 2103 }, { "epoch": 0.1, "grad_norm": 0.39806268988383775, "learning_rate": 1.9768164754317222e-05, "loss": 0.2221, "step": 2104 }, { "epoch": 0.1, "grad_norm": 0.36788733168080495, "learning_rate": 1.976784611478068e-05, "loss": 0.3318, "step": 2105 }, { "epoch": 0.1, "grad_norm": 0.8086084709742454, "learning_rate": 1.976752725899344e-05, "loss": 0.5056, "step": 2106 }, { "epoch": 0.1, "grad_norm": 0.554104515266716, "learning_rate": 1.976720818696256e-05, "loss": 0.3109, "step": 2107 }, { "epoch": 0.1, "grad_norm": 0.456639022945078, "learning_rate": 1.9766888898695105e-05, "loss": 0.2955, "step": 2108 }, { "epoch": 0.1, "grad_norm": 1.5126332944899685, "learning_rate": 1.9766569394198143e-05, "loss": 0.8181, "step": 2109 }, { "epoch": 0.1, "grad_norm": 0.47664182530428684, "learning_rate": 1.976624967347875e-05, "loss": 0.2993, "step": 2110 }, { "epoch": 0.1, "grad_norm": 0.6344329945938374, "learning_rate": 1.9765929736544e-05, "loss": 0.3781, "step": 2111 }, { "epoch": 0.1, "grad_norm": 0.4082381123444978, "learning_rate": 1.9765609583400977e-05, "loss": 0.2751, "step": 2112 }, { "epoch": 0.1, "grad_norm": 0.5200583902347743, "learning_rate": 1.9765289214056774e-05, "loss": 0.3014, "step": 2113 }, { "epoch": 0.1, "grad_norm": 1.0514423799906136, "learning_rate": 1.9764968628518475e-05, "loss": 0.4267, "step": 2114 }, { "epoch": 0.1, "grad_norm": 0.9210822209678126, "learning_rate": 1.9764647826793185e-05, "loss": 0.6372, "step": 2115 }, { "epoch": 0.1, "grad_norm": 0.44777294294102676, "learning_rate": 1.9764326808888002e-05, "loss": 0.3094, "step": 2116 }, { "epoch": 0.1, "grad_norm": 0.4648348455228835, "learning_rate": 1.9764005574810035e-05, "loss": 0.3278, "step": 2117 }, { "epoch": 0.1, "grad_norm": 0.41801974858853064, "learning_rate": 1.9763684124566395e-05, "loss": 0.2601, "step": 2118 }, { "epoch": 0.1, "grad_norm": 0.722316656245178, "learning_rate": 1.97633624581642e-05, "loss": 0.2869, "step": 2119 }, { "epoch": 0.1, "grad_norm": 0.4596482776364018, "learning_rate": 1.9763040575610567e-05, "loss": 0.3356, "step": 2120 }, { "epoch": 0.1, "grad_norm": 0.5270898639748991, "learning_rate": 1.976271847691263e-05, "loss": 0.3632, "step": 2121 }, { "epoch": 0.1, "grad_norm": 0.5050544539553165, "learning_rate": 1.9762396162077513e-05, "loss": 0.3335, "step": 2122 }, { "epoch": 0.1, "grad_norm": 0.4876806477381173, "learning_rate": 1.9762073631112354e-05, "loss": 0.4309, "step": 2123 }, { "epoch": 0.1, "grad_norm": 0.40867237784946964, "learning_rate": 1.9761750884024297e-05, "loss": 0.2297, "step": 2124 }, { "epoch": 0.1, "grad_norm": 0.6079082435175089, "learning_rate": 1.976142792082048e-05, "loss": 0.2331, "step": 2125 }, { "epoch": 0.1, "grad_norm": 0.6431651858303786, "learning_rate": 1.976110474150806e-05, "loss": 0.3978, "step": 2126 }, { "epoch": 0.1, "grad_norm": 0.7325508129241449, "learning_rate": 1.9760781346094183e-05, "loss": 0.4293, "step": 2127 }, { "epoch": 0.1, "grad_norm": 0.4172392382591397, "learning_rate": 1.9760457734586017e-05, "loss": 0.3331, "step": 2128 }, { "epoch": 0.1, "grad_norm": 0.4303314234027147, "learning_rate": 1.9760133906990728e-05, "loss": 0.3529, "step": 2129 }, { "epoch": 0.1, "grad_norm": 0.28199684538308967, "learning_rate": 1.9759809863315477e-05, "loss": 0.0994, "step": 2130 }, { "epoch": 0.1, "grad_norm": 0.5277874028977486, "learning_rate": 1.9759485603567447e-05, "loss": 0.2926, "step": 2131 }, { "epoch": 0.1, "grad_norm": 0.6983155816937586, "learning_rate": 1.9759161127753804e-05, "loss": 0.4114, "step": 2132 }, { "epoch": 0.1, "grad_norm": 0.5376766004189407, "learning_rate": 1.9758836435881745e-05, "loss": 0.3788, "step": 2133 }, { "epoch": 0.1, "grad_norm": 0.42761906447658266, "learning_rate": 1.9758511527958456e-05, "loss": 0.2919, "step": 2134 }, { "epoch": 0.1, "grad_norm": 1.3183544276901813, "learning_rate": 1.9758186403991122e-05, "loss": 0.7132, "step": 2135 }, { "epoch": 0.1, "grad_norm": 0.3789106890907285, "learning_rate": 1.975786106398695e-05, "loss": 0.2787, "step": 2136 }, { "epoch": 0.1, "grad_norm": 0.2955599002249521, "learning_rate": 1.9757535507953133e-05, "loss": 0.1185, "step": 2137 }, { "epoch": 0.1, "grad_norm": 0.5656064428404165, "learning_rate": 1.975720973589689e-05, "loss": 0.3506, "step": 2138 }, { "epoch": 0.1, "grad_norm": 0.6859807498607633, "learning_rate": 1.9756883747825424e-05, "loss": 0.5143, "step": 2139 }, { "epoch": 0.1, "grad_norm": 0.3989102319005813, "learning_rate": 1.975655754374596e-05, "loss": 0.2443, "step": 2140 }, { "epoch": 0.1, "grad_norm": 0.5420401994781315, "learning_rate": 1.975623112366571e-05, "loss": 0.3969, "step": 2141 }, { "epoch": 0.1, "grad_norm": 0.481622320257203, "learning_rate": 1.9755904487591907e-05, "loss": 0.2168, "step": 2142 }, { "epoch": 0.1, "grad_norm": 0.33653438827375337, "learning_rate": 1.9755577635531783e-05, "loss": 0.1599, "step": 2143 }, { "epoch": 0.1, "grad_norm": 0.45511025554911416, "learning_rate": 1.9755250567492573e-05, "loss": 0.3386, "step": 2144 }, { "epoch": 0.1, "grad_norm": 1.0130892911225096, "learning_rate": 1.975492328348152e-05, "loss": 0.5371, "step": 2145 }, { "epoch": 0.1, "grad_norm": 0.5973837723028796, "learning_rate": 1.9754595783505864e-05, "loss": 0.4098, "step": 2146 }, { "epoch": 0.1, "grad_norm": 0.4891071058211198, "learning_rate": 1.975426806757286e-05, "loss": 0.2778, "step": 2147 }, { "epoch": 0.1, "grad_norm": 0.46478000544467324, "learning_rate": 1.9753940135689762e-05, "loss": 0.3744, "step": 2148 }, { "epoch": 0.1, "grad_norm": 0.34344842587494384, "learning_rate": 1.975361198786383e-05, "loss": 0.1672, "step": 2149 }, { "epoch": 0.1, "grad_norm": 0.6224444421866723, "learning_rate": 1.975328362410233e-05, "loss": 0.3083, "step": 2150 }, { "epoch": 0.1, "grad_norm": 1.6624986319441464, "learning_rate": 1.975295504441253e-05, "loss": 0.5495, "step": 2151 }, { "epoch": 0.1, "grad_norm": 0.4083532970107414, "learning_rate": 1.9752626248801707e-05, "loss": 0.3145, "step": 2152 }, { "epoch": 0.1, "grad_norm": 0.6140521327996014, "learning_rate": 1.9752297237277136e-05, "loss": 0.2937, "step": 2153 }, { "epoch": 0.1, "grad_norm": 0.5924161105777705, "learning_rate": 1.9751968009846107e-05, "loss": 0.2758, "step": 2154 }, { "epoch": 0.1, "grad_norm": 0.4711705892669173, "learning_rate": 1.9751638566515903e-05, "loss": 0.2411, "step": 2155 }, { "epoch": 0.1, "grad_norm": 0.5645447959083201, "learning_rate": 1.975130890729382e-05, "loss": 0.2908, "step": 2156 }, { "epoch": 0.1, "grad_norm": 1.8859694042073942, "learning_rate": 1.9750979032187157e-05, "loss": 0.5671, "step": 2157 }, { "epoch": 0.1, "grad_norm": 1.0915836262542027, "learning_rate": 1.9750648941203216e-05, "loss": 0.536, "step": 2158 }, { "epoch": 0.1, "grad_norm": 0.48920467992871297, "learning_rate": 1.9750318634349302e-05, "loss": 0.3078, "step": 2159 }, { "epoch": 0.1, "grad_norm": 0.5036544758332026, "learning_rate": 1.9749988111632735e-05, "loss": 0.2984, "step": 2160 }, { "epoch": 0.1, "grad_norm": 0.5472862968831728, "learning_rate": 1.974965737306083e-05, "loss": 0.2938, "step": 2161 }, { "epoch": 0.1, "grad_norm": 0.7784049130906795, "learning_rate": 1.9749326418640903e-05, "loss": 0.3686, "step": 2162 }, { "epoch": 0.1, "grad_norm": 1.9306301938359, "learning_rate": 1.974899524838029e-05, "loss": 0.518, "step": 2163 }, { "epoch": 0.1, "grad_norm": 0.4303476937648894, "learning_rate": 1.9748663862286315e-05, "loss": 0.3009, "step": 2164 }, { "epoch": 0.1, "grad_norm": 0.41065126036491767, "learning_rate": 1.9748332260366316e-05, "loss": 0.2957, "step": 2165 }, { "epoch": 0.1, "grad_norm": 1.2998310779507305, "learning_rate": 1.974800044262764e-05, "loss": 0.3834, "step": 2166 }, { "epoch": 0.1, "grad_norm": 1.251330312265991, "learning_rate": 1.9747668409077628e-05, "loss": 0.4423, "step": 2167 }, { "epoch": 0.1, "grad_norm": 0.5378470923839239, "learning_rate": 1.9747336159723636e-05, "loss": 0.3399, "step": 2168 }, { "epoch": 0.1, "grad_norm": 0.42478663739553907, "learning_rate": 1.974700369457301e-05, "loss": 0.2637, "step": 2169 }, { "epoch": 0.1, "grad_norm": 0.476230097921976, "learning_rate": 1.9746671013633123e-05, "loss": 0.2838, "step": 2170 }, { "epoch": 0.1, "grad_norm": 0.5707161414998441, "learning_rate": 1.974633811691133e-05, "loss": 0.3511, "step": 2171 }, { "epoch": 0.1, "grad_norm": 0.5050082456780789, "learning_rate": 1.9746005004415004e-05, "loss": 0.3462, "step": 2172 }, { "epoch": 0.1, "grad_norm": 1.1402183452298285, "learning_rate": 1.974567167615152e-05, "loss": 0.4573, "step": 2173 }, { "epoch": 0.1, "grad_norm": 0.45057377023130346, "learning_rate": 1.9745338132128262e-05, "loss": 0.3138, "step": 2174 }, { "epoch": 0.1, "grad_norm": 0.6112395290137412, "learning_rate": 1.9745004372352606e-05, "loss": 0.4187, "step": 2175 }, { "epoch": 0.1, "grad_norm": 0.36464829458990694, "learning_rate": 1.974467039683195e-05, "loss": 0.1944, "step": 2176 }, { "epoch": 0.1, "grad_norm": 0.432123913201605, "learning_rate": 1.974433620557368e-05, "loss": 0.276, "step": 2177 }, { "epoch": 0.1, "grad_norm": 1.147450516210223, "learning_rate": 1.97440017985852e-05, "loss": 0.5794, "step": 2178 }, { "epoch": 0.1, "grad_norm": 0.7020237605619556, "learning_rate": 1.9743667175873908e-05, "loss": 0.3555, "step": 2179 }, { "epoch": 0.1, "grad_norm": 0.4817949909637308, "learning_rate": 1.9743332337447222e-05, "loss": 0.3336, "step": 2180 }, { "epoch": 0.1, "grad_norm": 1.2154682722530228, "learning_rate": 1.9742997283312543e-05, "loss": 0.7213, "step": 2181 }, { "epoch": 0.1, "grad_norm": 0.35944610430046714, "learning_rate": 1.97426620134773e-05, "loss": 0.2309, "step": 2182 }, { "epoch": 0.1, "grad_norm": 0.419296109841567, "learning_rate": 1.9742326527948904e-05, "loss": 0.2486, "step": 2183 }, { "epoch": 0.1, "grad_norm": 0.6056328246067001, "learning_rate": 1.9741990826734793e-05, "loss": 0.3643, "step": 2184 }, { "epoch": 0.1, "grad_norm": 0.9564733493580043, "learning_rate": 1.9741654909842394e-05, "loss": 0.4695, "step": 2185 }, { "epoch": 0.1, "grad_norm": 0.5255979956132542, "learning_rate": 1.9741318777279143e-05, "loss": 0.2375, "step": 2186 }, { "epoch": 0.1, "grad_norm": 1.533314518458618, "learning_rate": 1.974098242905248e-05, "loss": 0.7751, "step": 2187 }, { "epoch": 0.1, "grad_norm": 0.3643456814371708, "learning_rate": 1.974064586516986e-05, "loss": 0.2498, "step": 2188 }, { "epoch": 0.1, "grad_norm": 0.3337031822908557, "learning_rate": 1.9740309085638727e-05, "loss": 0.1804, "step": 2189 }, { "epoch": 0.1, "grad_norm": 0.968982998919056, "learning_rate": 1.9739972090466538e-05, "loss": 0.5333, "step": 2190 }, { "epoch": 0.1, "grad_norm": 0.6593580144607655, "learning_rate": 1.9739634879660758e-05, "loss": 0.3819, "step": 2191 }, { "epoch": 0.1, "grad_norm": 0.4545542454932407, "learning_rate": 1.9739297453228842e-05, "loss": 0.2464, "step": 2192 }, { "epoch": 0.1, "grad_norm": 1.69648692975947, "learning_rate": 1.9738959811178273e-05, "loss": 0.8943, "step": 2193 }, { "epoch": 0.1, "grad_norm": 3.8701638473312925, "learning_rate": 1.9738621953516517e-05, "loss": 0.2653, "step": 2194 }, { "epoch": 0.1, "grad_norm": 0.42370326478041115, "learning_rate": 1.973828388025106e-05, "loss": 0.2296, "step": 2195 }, { "epoch": 0.1, "grad_norm": 0.9719842354945029, "learning_rate": 1.9737945591389384e-05, "loss": 0.3647, "step": 2196 }, { "epoch": 0.1, "grad_norm": 1.3562021016350019, "learning_rate": 1.9737607086938975e-05, "loss": 0.5287, "step": 2197 }, { "epoch": 0.1, "grad_norm": 0.5507630385567706, "learning_rate": 1.9737268366907328e-05, "loss": 0.2891, "step": 2198 }, { "epoch": 0.1, "grad_norm": 1.5700491189311163, "learning_rate": 1.973692943130195e-05, "loss": 0.6074, "step": 2199 }, { "epoch": 0.1, "grad_norm": 0.8445721338943345, "learning_rate": 1.973659028013034e-05, "loss": 0.3308, "step": 2200 }, { "epoch": 0.1, "grad_norm": 0.5566301262255688, "learning_rate": 1.97362509134e-05, "loss": 0.3363, "step": 2201 }, { "epoch": 0.1, "grad_norm": 1.0507449152666521, "learning_rate": 1.9735911331118452e-05, "loss": 0.3163, "step": 2202 }, { "epoch": 0.1, "grad_norm": 0.704474598330572, "learning_rate": 1.973557153329321e-05, "loss": 0.3802, "step": 2203 }, { "epoch": 0.1, "grad_norm": 0.680440487823801, "learning_rate": 1.9735231519931796e-05, "loss": 0.3331, "step": 2204 }, { "epoch": 0.1, "grad_norm": 1.0006056529581375, "learning_rate": 1.973489129104174e-05, "loss": 0.3693, "step": 2205 }, { "epoch": 0.1, "grad_norm": 0.8237427228004932, "learning_rate": 1.9734550846630575e-05, "loss": 0.494, "step": 2206 }, { "epoch": 0.1, "grad_norm": 0.6219840196045429, "learning_rate": 1.9734210186705837e-05, "loss": 0.3057, "step": 2207 }, { "epoch": 0.1, "grad_norm": 0.5726745966277148, "learning_rate": 1.9733869311275063e-05, "loss": 0.3983, "step": 2208 }, { "epoch": 0.1, "grad_norm": 0.5883624012694447, "learning_rate": 1.973352822034581e-05, "loss": 0.0855, "step": 2209 }, { "epoch": 0.1, "grad_norm": 0.4618386052318375, "learning_rate": 1.973318691392562e-05, "loss": 0.2819, "step": 2210 }, { "epoch": 0.1, "grad_norm": 0.6166845564878932, "learning_rate": 1.9732845392022052e-05, "loss": 0.4593, "step": 2211 }, { "epoch": 0.1, "grad_norm": 0.5345335800266068, "learning_rate": 1.9732503654642667e-05, "loss": 0.3187, "step": 2212 }, { "epoch": 0.1, "grad_norm": 0.4769875630202992, "learning_rate": 1.9732161701795034e-05, "loss": 0.3323, "step": 2213 }, { "epoch": 0.1, "grad_norm": 0.4248093276101492, "learning_rate": 1.973181953348672e-05, "loss": 0.2411, "step": 2214 }, { "epoch": 0.1, "grad_norm": 0.4968614159676831, "learning_rate": 1.97314771497253e-05, "loss": 0.309, "step": 2215 }, { "epoch": 0.1, "grad_norm": 0.4524940166547822, "learning_rate": 1.9731134550518355e-05, "loss": 0.3231, "step": 2216 }, { "epoch": 0.1, "grad_norm": 1.4989127752968847, "learning_rate": 1.9730791735873472e-05, "loss": 0.8732, "step": 2217 }, { "epoch": 0.1, "grad_norm": 0.6223564638310751, "learning_rate": 1.973044870579824e-05, "loss": 0.4624, "step": 2218 }, { "epoch": 0.1, "grad_norm": 0.4481740607186792, "learning_rate": 1.973010546030025e-05, "loss": 0.2929, "step": 2219 }, { "epoch": 0.1, "grad_norm": 0.6396224296821759, "learning_rate": 1.9729761999387102e-05, "loss": 0.3939, "step": 2220 }, { "epoch": 0.1, "grad_norm": 0.5671267141358581, "learning_rate": 1.9729418323066403e-05, "loss": 0.1872, "step": 2221 }, { "epoch": 0.1, "grad_norm": 0.45026434296421153, "learning_rate": 1.972907443134576e-05, "loss": 0.2227, "step": 2222 }, { "epoch": 0.1, "grad_norm": 0.5685168347868768, "learning_rate": 1.9728730324232782e-05, "loss": 0.3887, "step": 2223 }, { "epoch": 0.1, "grad_norm": 0.7394597761937198, "learning_rate": 1.9728386001735097e-05, "loss": 0.4272, "step": 2224 }, { "epoch": 0.1, "grad_norm": 0.499493928200407, "learning_rate": 1.9728041463860318e-05, "loss": 0.2515, "step": 2225 }, { "epoch": 0.1, "grad_norm": 0.6790780673355199, "learning_rate": 1.972769671061608e-05, "loss": 0.4053, "step": 2226 }, { "epoch": 0.1, "grad_norm": 0.4318757865373186, "learning_rate": 1.972735174201001e-05, "loss": 0.2584, "step": 2227 }, { "epoch": 0.1, "grad_norm": 0.396611312679269, "learning_rate": 1.972700655804975e-05, "loss": 0.2247, "step": 2228 }, { "epoch": 0.1, "grad_norm": 1.0111621756301359, "learning_rate": 1.9726661158742938e-05, "loss": 0.6302, "step": 2229 }, { "epoch": 0.1, "grad_norm": 0.9110780265475183, "learning_rate": 1.9726315544097223e-05, "loss": 0.5314, "step": 2230 }, { "epoch": 0.1, "grad_norm": 0.4741694506451656, "learning_rate": 1.9725969714120256e-05, "loss": 0.2604, "step": 2231 }, { "epoch": 0.1, "grad_norm": 0.6072701530107135, "learning_rate": 1.9725623668819694e-05, "loss": 0.3967, "step": 2232 }, { "epoch": 0.1, "grad_norm": 0.3971734623387778, "learning_rate": 1.9725277408203198e-05, "loss": 0.1874, "step": 2233 }, { "epoch": 0.1, "grad_norm": 0.5137023465851119, "learning_rate": 1.9724930932278435e-05, "loss": 0.3089, "step": 2234 }, { "epoch": 0.1, "grad_norm": 0.6769462325112753, "learning_rate": 1.972458424105307e-05, "loss": 0.325, "step": 2235 }, { "epoch": 0.1, "grad_norm": 1.0178131297168105, "learning_rate": 1.9724237334534786e-05, "loss": 0.4665, "step": 2236 }, { "epoch": 0.1, "grad_norm": 0.4030555267181865, "learning_rate": 1.972389021273126e-05, "loss": 0.3009, "step": 2237 }, { "epoch": 0.1, "grad_norm": 0.7399447482774754, "learning_rate": 1.9723542875650176e-05, "loss": 0.4178, "step": 2238 }, { "epoch": 0.1, "grad_norm": 0.35790169784215436, "learning_rate": 1.9723195323299222e-05, "loss": 0.2483, "step": 2239 }, { "epoch": 0.1, "grad_norm": 0.4964662695625661, "learning_rate": 1.9722847555686094e-05, "loss": 0.2842, "step": 2240 }, { "epoch": 0.1, "grad_norm": 0.5974334542618154, "learning_rate": 1.9722499572818496e-05, "loss": 0.3776, "step": 2241 }, { "epoch": 0.1, "grad_norm": 0.9527547463324139, "learning_rate": 1.9722151374704128e-05, "loss": 0.5839, "step": 2242 }, { "epoch": 0.1, "grad_norm": 0.46939326020246824, "learning_rate": 1.97218029613507e-05, "loss": 0.2844, "step": 2243 }, { "epoch": 0.1, "grad_norm": 0.4265678947988853, "learning_rate": 1.9721454332765918e-05, "loss": 0.2808, "step": 2244 }, { "epoch": 0.1, "grad_norm": 0.34949276753249836, "learning_rate": 1.972110548895751e-05, "loss": 0.2108, "step": 2245 }, { "epoch": 0.1, "grad_norm": 0.626401990070787, "learning_rate": 1.9720756429933198e-05, "loss": 0.3742, "step": 2246 }, { "epoch": 0.1, "grad_norm": 0.39177326150665026, "learning_rate": 1.9720407155700706e-05, "loss": 0.327, "step": 2247 }, { "epoch": 0.1, "grad_norm": 1.2161482680855527, "learning_rate": 1.972005766626777e-05, "loss": 0.4649, "step": 2248 }, { "epoch": 0.1, "grad_norm": 0.49884535459800067, "learning_rate": 1.9719707961642122e-05, "loss": 0.3418, "step": 2249 }, { "epoch": 0.1, "grad_norm": 0.6630128481249439, "learning_rate": 1.971935804183151e-05, "loss": 0.4039, "step": 2250 }, { "epoch": 0.1, "grad_norm": 0.37111506961829666, "learning_rate": 1.971900790684368e-05, "loss": 0.2383, "step": 2251 }, { "epoch": 0.1, "grad_norm": 0.4530526857397637, "learning_rate": 1.971865755668638e-05, "loss": 0.2644, "step": 2252 }, { "epoch": 0.1, "grad_norm": 0.8473836291047714, "learning_rate": 1.971830699136737e-05, "loss": 0.4428, "step": 2253 }, { "epoch": 0.1, "grad_norm": 0.9113872612642971, "learning_rate": 1.971795621089441e-05, "loss": 0.5937, "step": 2254 }, { "epoch": 0.1, "grad_norm": 0.42573822166032294, "learning_rate": 1.9717605215275263e-05, "loss": 0.3136, "step": 2255 }, { "epoch": 0.1, "grad_norm": 0.5890520441848602, "learning_rate": 1.9717254004517707e-05, "loss": 0.4004, "step": 2256 }, { "epoch": 0.1, "grad_norm": 0.38592348159489226, "learning_rate": 1.9716902578629507e-05, "loss": 0.2235, "step": 2257 }, { "epoch": 0.1, "grad_norm": 0.4591218882363621, "learning_rate": 1.9716550937618456e-05, "loss": 0.352, "step": 2258 }, { "epoch": 0.1, "grad_norm": 0.3987410712178617, "learning_rate": 1.971619908149233e-05, "loss": 0.3331, "step": 2259 }, { "epoch": 0.1, "grad_norm": 1.0796229290513142, "learning_rate": 1.9715847010258918e-05, "loss": 0.6681, "step": 2260 }, { "epoch": 0.1, "grad_norm": 0.3073222948611999, "learning_rate": 1.9715494723926017e-05, "loss": 0.116, "step": 2261 }, { "epoch": 0.1, "grad_norm": 0.5328137682362506, "learning_rate": 1.971514222250143e-05, "loss": 0.3421, "step": 2262 }, { "epoch": 0.1, "grad_norm": 0.43378195094021044, "learning_rate": 1.9714789505992956e-05, "loss": 0.3493, "step": 2263 }, { "epoch": 0.1, "grad_norm": 0.6188836183555025, "learning_rate": 1.9714436574408408e-05, "loss": 0.2951, "step": 2264 }, { "epoch": 0.1, "grad_norm": 0.36961998241477345, "learning_rate": 1.9714083427755594e-05, "loss": 0.2968, "step": 2265 }, { "epoch": 0.1, "grad_norm": 1.626537460344871, "learning_rate": 1.9713730066042335e-05, "loss": 0.7628, "step": 2266 }, { "epoch": 0.1, "grad_norm": 0.3217633037971974, "learning_rate": 1.971337648927646e-05, "loss": 0.201, "step": 2267 }, { "epoch": 0.1, "grad_norm": 0.5669891635865614, "learning_rate": 1.9713022697465786e-05, "loss": 0.3473, "step": 2268 }, { "epoch": 0.1, "grad_norm": 0.950178402909665, "learning_rate": 1.9712668690618154e-05, "loss": 0.5388, "step": 2269 }, { "epoch": 0.1, "grad_norm": 0.4800787406092234, "learning_rate": 1.9712314468741395e-05, "loss": 0.2972, "step": 2270 }, { "epoch": 0.1, "grad_norm": 0.5235121405261489, "learning_rate": 1.9711960031843358e-05, "loss": 0.3611, "step": 2271 }, { "epoch": 0.1, "grad_norm": 0.6653556774438358, "learning_rate": 1.9711605379931885e-05, "loss": 0.4358, "step": 2272 }, { "epoch": 0.1, "grad_norm": 0.3263249248311736, "learning_rate": 1.971125051301483e-05, "loss": 0.1506, "step": 2273 }, { "epoch": 0.1, "grad_norm": 0.49670218041613534, "learning_rate": 1.971089543110005e-05, "loss": 0.256, "step": 2274 }, { "epoch": 0.1, "grad_norm": 0.44875306108868424, "learning_rate": 1.97105401341954e-05, "loss": 0.3648, "step": 2275 }, { "epoch": 0.1, "grad_norm": 0.5842243289372618, "learning_rate": 1.971018462230875e-05, "loss": 0.4289, "step": 2276 }, { "epoch": 0.1, "grad_norm": 0.5064043570294681, "learning_rate": 1.9709828895447977e-05, "loss": 0.2941, "step": 2277 }, { "epoch": 0.1, "grad_norm": 0.4430669483631709, "learning_rate": 1.9709472953620946e-05, "loss": 0.2962, "step": 2278 }, { "epoch": 0.1, "grad_norm": 0.4198493266243077, "learning_rate": 1.9709116796835543e-05, "loss": 0.2628, "step": 2279 }, { "epoch": 0.1, "grad_norm": 0.4310531107621618, "learning_rate": 1.9708760425099653e-05, "loss": 0.2535, "step": 2280 }, { "epoch": 0.1, "grad_norm": 0.6593456595309842, "learning_rate": 1.9708403838421164e-05, "loss": 0.4895, "step": 2281 }, { "epoch": 0.1, "grad_norm": 0.8936465390574527, "learning_rate": 1.9708047036807973e-05, "loss": 0.4559, "step": 2282 }, { "epoch": 0.1, "grad_norm": 0.3979566798767169, "learning_rate": 1.9707690020267974e-05, "loss": 0.2706, "step": 2283 }, { "epoch": 0.1, "grad_norm": 1.436365570492759, "learning_rate": 1.9707332788809076e-05, "loss": 0.8557, "step": 2284 }, { "epoch": 0.1, "grad_norm": 0.37399829132552814, "learning_rate": 1.9706975342439186e-05, "loss": 0.1981, "step": 2285 }, { "epoch": 0.11, "grad_norm": 0.4437140486410228, "learning_rate": 1.970661768116622e-05, "loss": 0.3429, "step": 2286 }, { "epoch": 0.11, "grad_norm": 0.45692170794703907, "learning_rate": 1.9706259804998093e-05, "loss": 0.3335, "step": 2287 }, { "epoch": 0.11, "grad_norm": 0.8920294403839797, "learning_rate": 1.9705901713942723e-05, "loss": 0.4798, "step": 2288 }, { "epoch": 0.11, "grad_norm": 0.4551100221045932, "learning_rate": 1.970554340800805e-05, "loss": 0.2958, "step": 2289 }, { "epoch": 0.11, "grad_norm": 0.5016047979117828, "learning_rate": 1.9705184887202e-05, "loss": 0.354, "step": 2290 }, { "epoch": 0.11, "grad_norm": 0.3913487023187884, "learning_rate": 1.9704826151532506e-05, "loss": 0.2756, "step": 2291 }, { "epoch": 0.11, "grad_norm": 0.4048141065071724, "learning_rate": 1.970446720100752e-05, "loss": 0.2602, "step": 2292 }, { "epoch": 0.11, "grad_norm": 0.5973975246368928, "learning_rate": 1.970410803563498e-05, "loss": 0.4444, "step": 2293 }, { "epoch": 0.11, "grad_norm": 0.44958090868219014, "learning_rate": 1.9703748655422843e-05, "loss": 0.3168, "step": 2294 }, { "epoch": 0.11, "grad_norm": 0.4434498982362864, "learning_rate": 1.970338906037906e-05, "loss": 0.299, "step": 2295 }, { "epoch": 0.11, "grad_norm": 0.9322793622952742, "learning_rate": 1.97030292505116e-05, "loss": 0.5895, "step": 2296 }, { "epoch": 0.11, "grad_norm": 0.5157730057179291, "learning_rate": 1.9702669225828422e-05, "loss": 0.3392, "step": 2297 }, { "epoch": 0.11, "grad_norm": 0.39372034878125683, "learning_rate": 1.9702308986337498e-05, "loss": 0.3096, "step": 2298 }, { "epoch": 0.11, "grad_norm": 0.34520994581152353, "learning_rate": 1.9701948532046805e-05, "loss": 0.2943, "step": 2299 }, { "epoch": 0.11, "grad_norm": 0.6190294890137501, "learning_rate": 1.9701587862964325e-05, "loss": 0.1776, "step": 2300 }, { "epoch": 0.11, "grad_norm": 0.46899139204107076, "learning_rate": 1.9701226979098037e-05, "loss": 0.3436, "step": 2301 }, { "epoch": 0.11, "grad_norm": 0.5673506229928984, "learning_rate": 1.9700865880455936e-05, "loss": 0.4129, "step": 2302 }, { "epoch": 0.11, "grad_norm": 0.46478924737985755, "learning_rate": 1.9700504567046013e-05, "loss": 0.3162, "step": 2303 }, { "epoch": 0.11, "grad_norm": 0.39984235194352996, "learning_rate": 1.9700143038876267e-05, "loss": 0.3537, "step": 2304 }, { "epoch": 0.11, "grad_norm": 0.38542220679275685, "learning_rate": 1.9699781295954706e-05, "loss": 0.2166, "step": 2305 }, { "epoch": 0.11, "grad_norm": 0.4182811546124366, "learning_rate": 1.9699419338289335e-05, "loss": 0.2833, "step": 2306 }, { "epoch": 0.11, "grad_norm": 0.46318718450068797, "learning_rate": 1.9699057165888165e-05, "loss": 0.326, "step": 2307 }, { "epoch": 0.11, "grad_norm": 1.4660170163116364, "learning_rate": 1.969869477875922e-05, "loss": 0.8417, "step": 2308 }, { "epoch": 0.11, "grad_norm": 0.7266525695581226, "learning_rate": 1.9698332176910524e-05, "loss": 0.5034, "step": 2309 }, { "epoch": 0.11, "grad_norm": 0.4125397360182415, "learning_rate": 1.9697969360350098e-05, "loss": 0.2242, "step": 2310 }, { "epoch": 0.11, "grad_norm": 0.341030502798929, "learning_rate": 1.9697606329085977e-05, "loss": 0.247, "step": 2311 }, { "epoch": 0.11, "grad_norm": 0.9922297064690038, "learning_rate": 1.9697243083126197e-05, "loss": 0.4661, "step": 2312 }, { "epoch": 0.11, "grad_norm": 0.40636916342167856, "learning_rate": 1.96968796224788e-05, "loss": 0.2399, "step": 2313 }, { "epoch": 0.11, "grad_norm": 0.47112356019254353, "learning_rate": 1.969651594715184e-05, "loss": 0.372, "step": 2314 }, { "epoch": 0.11, "grad_norm": 0.9050273930829859, "learning_rate": 1.969615205715336e-05, "loss": 0.5187, "step": 2315 }, { "epoch": 0.11, "grad_norm": 0.39464002527936254, "learning_rate": 1.9695787952491415e-05, "loss": 0.2685, "step": 2316 }, { "epoch": 0.11, "grad_norm": 0.4289571238393471, "learning_rate": 1.9695423633174076e-05, "loss": 0.2043, "step": 2317 }, { "epoch": 0.11, "grad_norm": 0.524019893612768, "learning_rate": 1.96950590992094e-05, "loss": 0.3444, "step": 2318 }, { "epoch": 0.11, "grad_norm": 0.4516141622200916, "learning_rate": 1.9694694350605456e-05, "loss": 0.2414, "step": 2319 }, { "epoch": 0.11, "grad_norm": 0.9882801512526483, "learning_rate": 1.9694329387370327e-05, "loss": 0.5164, "step": 2320 }, { "epoch": 0.11, "grad_norm": 0.8613955224711916, "learning_rate": 1.9693964209512088e-05, "loss": 0.5408, "step": 2321 }, { "epoch": 0.11, "grad_norm": 0.49972580825040785, "learning_rate": 1.9693598817038825e-05, "loss": 0.2689, "step": 2322 }, { "epoch": 0.11, "grad_norm": 0.5241971213303899, "learning_rate": 1.9693233209958627e-05, "loss": 0.3003, "step": 2323 }, { "epoch": 0.11, "grad_norm": 0.4827230185558488, "learning_rate": 1.9692867388279587e-05, "loss": 0.2544, "step": 2324 }, { "epoch": 0.11, "grad_norm": 0.4946438710570667, "learning_rate": 1.9692501352009804e-05, "loss": 0.3156, "step": 2325 }, { "epoch": 0.11, "grad_norm": 1.0013816215685254, "learning_rate": 1.9692135101157387e-05, "loss": 0.3808, "step": 2326 }, { "epoch": 0.11, "grad_norm": 0.7641323296084365, "learning_rate": 1.969176863573044e-05, "loss": 0.4481, "step": 2327 }, { "epoch": 0.11, "grad_norm": 0.4855079265978535, "learning_rate": 1.969140195573707e-05, "loss": 0.3072, "step": 2328 }, { "epoch": 0.11, "grad_norm": 0.27373720446990485, "learning_rate": 1.969103506118541e-05, "loss": 0.1506, "step": 2329 }, { "epoch": 0.11, "grad_norm": 0.5283255658404069, "learning_rate": 1.969066795208357e-05, "loss": 0.3595, "step": 2330 }, { "epoch": 0.11, "grad_norm": 0.5246319920094062, "learning_rate": 1.969030062843968e-05, "loss": 0.3168, "step": 2331 }, { "epoch": 0.11, "grad_norm": 0.8437737828629368, "learning_rate": 1.9689933090261873e-05, "loss": 0.4091, "step": 2332 }, { "epoch": 0.11, "grad_norm": 1.3223682756115784, "learning_rate": 1.968956533755829e-05, "loss": 0.7679, "step": 2333 }, { "epoch": 0.11, "grad_norm": 0.492603283821184, "learning_rate": 1.9689197370337068e-05, "loss": 0.2861, "step": 2334 }, { "epoch": 0.11, "grad_norm": 0.5087001736309223, "learning_rate": 1.9688829188606356e-05, "loss": 0.3953, "step": 2335 }, { "epoch": 0.11, "grad_norm": 0.32484926435058337, "learning_rate": 1.96884607923743e-05, "loss": 0.1071, "step": 2336 }, { "epoch": 0.11, "grad_norm": 0.46769170515929204, "learning_rate": 1.9688092181649065e-05, "loss": 0.312, "step": 2337 }, { "epoch": 0.11, "grad_norm": 0.5712527710560433, "learning_rate": 1.9687723356438804e-05, "loss": 0.4243, "step": 2338 }, { "epoch": 0.11, "grad_norm": 0.5608539838733599, "learning_rate": 1.9687354316751685e-05, "loss": 0.3181, "step": 2339 }, { "epoch": 0.11, "grad_norm": 0.4449033548842678, "learning_rate": 1.968698506259588e-05, "loss": 0.2744, "step": 2340 }, { "epoch": 0.11, "grad_norm": 1.4178183084397107, "learning_rate": 1.968661559397956e-05, "loss": 0.664, "step": 2341 }, { "epoch": 0.11, "grad_norm": 0.3429835611742759, "learning_rate": 1.9686245910910908e-05, "loss": 0.2051, "step": 2342 }, { "epoch": 0.11, "grad_norm": 0.42099036261680933, "learning_rate": 1.9685876013398108e-05, "loss": 0.3086, "step": 2343 }, { "epoch": 0.11, "grad_norm": 0.8195551267189042, "learning_rate": 1.9685505901449346e-05, "loss": 0.5072, "step": 2344 }, { "epoch": 0.11, "grad_norm": 0.5813314876408059, "learning_rate": 1.968513557507282e-05, "loss": 0.324, "step": 2345 }, { "epoch": 0.11, "grad_norm": 0.5221771940454054, "learning_rate": 1.9684765034276726e-05, "loss": 0.2825, "step": 2346 }, { "epoch": 0.11, "grad_norm": 0.5376458699304676, "learning_rate": 1.968439427906927e-05, "loss": 0.3704, "step": 2347 }, { "epoch": 0.11, "grad_norm": 0.5146347641997501, "learning_rate": 1.968402330945866e-05, "loss": 0.3641, "step": 2348 }, { "epoch": 0.11, "grad_norm": 0.4719791361874272, "learning_rate": 1.9683652125453102e-05, "loss": 0.2579, "step": 2349 }, { "epoch": 0.11, "grad_norm": 0.4665111779833458, "learning_rate": 1.9683280727060824e-05, "loss": 0.353, "step": 2350 }, { "epoch": 0.11, "grad_norm": 0.4655544231639097, "learning_rate": 1.968290911429004e-05, "loss": 0.2705, "step": 2351 }, { "epoch": 0.11, "grad_norm": 0.4449371778839725, "learning_rate": 1.9682537287148985e-05, "loss": 0.192, "step": 2352 }, { "epoch": 0.11, "grad_norm": 0.6383687052984196, "learning_rate": 1.9682165245645884e-05, "loss": 0.3859, "step": 2353 }, { "epoch": 0.11, "grad_norm": 0.4805221497471112, "learning_rate": 1.9681792989788973e-05, "loss": 0.3926, "step": 2354 }, { "epoch": 0.11, "grad_norm": 0.36974950787201116, "learning_rate": 1.9681420519586502e-05, "loss": 0.2764, "step": 2355 }, { "epoch": 0.11, "grad_norm": 0.8837184645065015, "learning_rate": 1.9681047835046708e-05, "loss": 0.5753, "step": 2356 }, { "epoch": 0.11, "grad_norm": 0.3731665676523108, "learning_rate": 1.968067493617785e-05, "loss": 0.2162, "step": 2357 }, { "epoch": 0.11, "grad_norm": 0.39261007704737544, "learning_rate": 1.9680301822988177e-05, "loss": 0.2562, "step": 2358 }, { "epoch": 0.11, "grad_norm": 1.1331640205725197, "learning_rate": 1.967992849548595e-05, "loss": 0.6137, "step": 2359 }, { "epoch": 0.11, "grad_norm": 0.6394315504444733, "learning_rate": 1.967955495367944e-05, "loss": 0.5028, "step": 2360 }, { "epoch": 0.11, "grad_norm": 0.49067529604623294, "learning_rate": 1.9679181197576907e-05, "loss": 0.3592, "step": 2361 }, { "epoch": 0.11, "grad_norm": 0.42068399055756417, "learning_rate": 1.9678807227186635e-05, "loss": 0.2836, "step": 2362 }, { "epoch": 0.11, "grad_norm": 0.42734307212767414, "learning_rate": 1.96784330425169e-05, "loss": 0.2476, "step": 2363 }, { "epoch": 0.11, "grad_norm": 0.7765022388761557, "learning_rate": 1.9678058643575985e-05, "loss": 0.364, "step": 2364 }, { "epoch": 0.11, "grad_norm": 0.4421099752995076, "learning_rate": 1.9677684030372178e-05, "loss": 0.3123, "step": 2365 }, { "epoch": 0.11, "grad_norm": 0.4848143681057836, "learning_rate": 1.9677309202913773e-05, "loss": 0.4045, "step": 2366 }, { "epoch": 0.11, "grad_norm": 0.750116212197387, "learning_rate": 1.9676934161209073e-05, "loss": 0.3247, "step": 2367 }, { "epoch": 0.11, "grad_norm": 0.5062295245693124, "learning_rate": 1.9676558905266377e-05, "loss": 0.3082, "step": 2368 }, { "epoch": 0.11, "grad_norm": 0.33450938278833114, "learning_rate": 1.967618343509399e-05, "loss": 0.213, "step": 2369 }, { "epoch": 0.11, "grad_norm": 0.47346910146204113, "learning_rate": 1.967580775070023e-05, "loss": 0.3283, "step": 2370 }, { "epoch": 0.11, "grad_norm": 0.4488088485374118, "learning_rate": 1.967543185209341e-05, "loss": 0.3223, "step": 2371 }, { "epoch": 0.11, "grad_norm": 0.7034320705884325, "learning_rate": 1.9675055739281857e-05, "loss": 0.4846, "step": 2372 }, { "epoch": 0.11, "grad_norm": 0.6995491227820101, "learning_rate": 1.9674679412273892e-05, "loss": 0.4032, "step": 2373 }, { "epoch": 0.11, "grad_norm": 0.4161193266503635, "learning_rate": 1.967430287107785e-05, "loss": 0.294, "step": 2374 }, { "epoch": 0.11, "grad_norm": 0.2910120292770254, "learning_rate": 1.9673926115702067e-05, "loss": 0.1261, "step": 2375 }, { "epoch": 0.11, "grad_norm": 0.5575687745251163, "learning_rate": 1.9673549146154886e-05, "loss": 0.3105, "step": 2376 }, { "epoch": 0.11, "grad_norm": 0.5889349088699504, "learning_rate": 1.967317196244465e-05, "loss": 0.4214, "step": 2377 }, { "epoch": 0.11, "grad_norm": 0.44187637622413756, "learning_rate": 1.9672794564579707e-05, "loss": 0.3589, "step": 2378 }, { "epoch": 0.11, "grad_norm": 0.48682463745918714, "learning_rate": 1.9672416952568416e-05, "loss": 0.3324, "step": 2379 }, { "epoch": 0.11, "grad_norm": 0.7262201717926974, "learning_rate": 1.967203912641914e-05, "loss": 0.488, "step": 2380 }, { "epoch": 0.11, "grad_norm": 0.26635591346581416, "learning_rate": 1.9671661086140235e-05, "loss": 0.1659, "step": 2381 }, { "epoch": 0.11, "grad_norm": 0.5081223906297634, "learning_rate": 1.9671282831740076e-05, "loss": 0.3423, "step": 2382 }, { "epoch": 0.11, "grad_norm": 0.42080558228184156, "learning_rate": 1.9670904363227036e-05, "loss": 0.342, "step": 2383 }, { "epoch": 0.11, "grad_norm": 0.8568149216352813, "learning_rate": 1.96705256806095e-05, "loss": 0.4452, "step": 2384 }, { "epoch": 0.11, "grad_norm": 0.6437641145671199, "learning_rate": 1.967014678389584e-05, "loss": 0.3009, "step": 2385 }, { "epoch": 0.11, "grad_norm": 0.44522865414107443, "learning_rate": 1.966976767309445e-05, "loss": 0.3158, "step": 2386 }, { "epoch": 0.11, "grad_norm": 0.5461252924749982, "learning_rate": 1.9669388348213726e-05, "loss": 0.3187, "step": 2387 }, { "epoch": 0.11, "grad_norm": 0.8720392968612125, "learning_rate": 1.9669008809262064e-05, "loss": 0.2945, "step": 2388 }, { "epoch": 0.11, "grad_norm": 0.4284640951176341, "learning_rate": 1.9668629056247863e-05, "loss": 0.3095, "step": 2389 }, { "epoch": 0.11, "grad_norm": 0.5064080059835947, "learning_rate": 1.9668249089179535e-05, "loss": 0.3599, "step": 2390 }, { "epoch": 0.11, "grad_norm": 0.4588697907573374, "learning_rate": 1.966786890806549e-05, "loss": 0.2216, "step": 2391 }, { "epoch": 0.11, "grad_norm": 0.5445103855022165, "learning_rate": 1.966748851291415e-05, "loss": 0.383, "step": 2392 }, { "epoch": 0.11, "grad_norm": 0.4870176632134462, "learning_rate": 1.9667107903733925e-05, "loss": 0.2889, "step": 2393 }, { "epoch": 0.11, "grad_norm": 0.5962244225037607, "learning_rate": 1.9666727080533253e-05, "loss": 0.2977, "step": 2394 }, { "epoch": 0.11, "grad_norm": 0.560557254061879, "learning_rate": 1.966634604332056e-05, "loss": 0.3931, "step": 2395 }, { "epoch": 0.11, "grad_norm": 0.5072354116095269, "learning_rate": 1.966596479210428e-05, "loss": 0.3467, "step": 2396 }, { "epoch": 0.11, "grad_norm": 0.3826299882424985, "learning_rate": 1.9665583326892858e-05, "loss": 0.2462, "step": 2397 }, { "epoch": 0.11, "grad_norm": 0.5294948670019645, "learning_rate": 1.9665201647694733e-05, "loss": 0.3579, "step": 2398 }, { "epoch": 0.11, "grad_norm": 0.9156658333563149, "learning_rate": 1.9664819754518363e-05, "loss": 0.5674, "step": 2399 }, { "epoch": 0.11, "grad_norm": 0.6824253083979831, "learning_rate": 1.9664437647372196e-05, "loss": 0.4738, "step": 2400 }, { "epoch": 0.11, "grad_norm": 0.34888225860389926, "learning_rate": 1.9664055326264698e-05, "loss": 0.1922, "step": 2401 }, { "epoch": 0.11, "grad_norm": 0.39238652146208075, "learning_rate": 1.9663672791204328e-05, "loss": 0.2781, "step": 2402 }, { "epoch": 0.11, "grad_norm": 1.471132361075159, "learning_rate": 1.9663290042199552e-05, "loss": 0.8546, "step": 2403 }, { "epoch": 0.11, "grad_norm": 0.42277430974815894, "learning_rate": 1.9662907079258852e-05, "loss": 0.2392, "step": 2404 }, { "epoch": 0.11, "grad_norm": 0.640294752779203, "learning_rate": 1.96625239023907e-05, "loss": 0.3998, "step": 2405 }, { "epoch": 0.11, "grad_norm": 0.582639360292068, "learning_rate": 1.9662140511603586e-05, "loss": 0.3524, "step": 2406 }, { "epoch": 0.11, "grad_norm": 0.3601152110052125, "learning_rate": 1.966175690690599e-05, "loss": 0.1786, "step": 2407 }, { "epoch": 0.11, "grad_norm": 0.5731455817783268, "learning_rate": 1.966137308830641e-05, "loss": 0.2841, "step": 2408 }, { "epoch": 0.11, "grad_norm": 0.5987671643358456, "learning_rate": 1.9660989055813342e-05, "loss": 0.3866, "step": 2409 }, { "epoch": 0.11, "grad_norm": 0.4117176835138656, "learning_rate": 1.9660604809435283e-05, "loss": 0.2624, "step": 2410 }, { "epoch": 0.11, "grad_norm": 0.9891552063131267, "learning_rate": 1.966022034918075e-05, "loss": 0.5101, "step": 2411 }, { "epoch": 0.11, "grad_norm": 0.5955023863126869, "learning_rate": 1.965983567505825e-05, "loss": 0.455, "step": 2412 }, { "epoch": 0.11, "grad_norm": 0.5111277897495989, "learning_rate": 1.9659450787076293e-05, "loss": 0.2904, "step": 2413 }, { "epoch": 0.11, "grad_norm": 0.3748361645902313, "learning_rate": 1.9659065685243407e-05, "loss": 0.2111, "step": 2414 }, { "epoch": 0.11, "grad_norm": 1.0423318623867175, "learning_rate": 1.965868036956812e-05, "loss": 0.6038, "step": 2415 }, { "epoch": 0.11, "grad_norm": 0.4674103147166009, "learning_rate": 1.9658294840058954e-05, "loss": 0.3061, "step": 2416 }, { "epoch": 0.11, "grad_norm": 0.6217743661339244, "learning_rate": 1.9657909096724452e-05, "loss": 0.3627, "step": 2417 }, { "epoch": 0.11, "grad_norm": 0.876919447188364, "learning_rate": 1.9657523139573153e-05, "loss": 0.474, "step": 2418 }, { "epoch": 0.11, "grad_norm": 0.6327828274375091, "learning_rate": 1.9657136968613594e-05, "loss": 0.3158, "step": 2419 }, { "epoch": 0.11, "grad_norm": 0.40976144372861417, "learning_rate": 1.9656750583854333e-05, "loss": 0.1528, "step": 2420 }, { "epoch": 0.11, "grad_norm": 0.5336734787318893, "learning_rate": 1.9656363985303923e-05, "loss": 0.363, "step": 2421 }, { "epoch": 0.11, "grad_norm": 0.5466365781237489, "learning_rate": 1.9655977172970918e-05, "loss": 0.3013, "step": 2422 }, { "epoch": 0.11, "grad_norm": 1.1839309100426945, "learning_rate": 1.9655590146863886e-05, "loss": 0.5106, "step": 2423 }, { "epoch": 0.11, "grad_norm": 0.9057626938144441, "learning_rate": 1.9655202906991397e-05, "loss": 0.3574, "step": 2424 }, { "epoch": 0.11, "grad_norm": 0.4935735595061597, "learning_rate": 1.9654815453362016e-05, "loss": 0.2755, "step": 2425 }, { "epoch": 0.11, "grad_norm": 0.43094582068701714, "learning_rate": 1.9654427785984335e-05, "loss": 0.2918, "step": 2426 }, { "epoch": 0.11, "grad_norm": 0.3800624781621088, "learning_rate": 1.9654039904866922e-05, "loss": 0.1789, "step": 2427 }, { "epoch": 0.11, "grad_norm": 0.5765702230825885, "learning_rate": 1.965365181001837e-05, "loss": 0.3449, "step": 2428 }, { "epoch": 0.11, "grad_norm": 0.6543214592410309, "learning_rate": 1.965326350144727e-05, "loss": 0.4019, "step": 2429 }, { "epoch": 0.11, "grad_norm": 0.49558833507167893, "learning_rate": 1.965287497916222e-05, "loss": 0.26, "step": 2430 }, { "epoch": 0.11, "grad_norm": 0.5212071543740817, "learning_rate": 1.9652486243171826e-05, "loss": 0.3091, "step": 2431 }, { "epoch": 0.11, "grad_norm": 0.43370427733776956, "learning_rate": 1.9652097293484688e-05, "loss": 0.1907, "step": 2432 }, { "epoch": 0.11, "grad_norm": 0.5184147397473483, "learning_rate": 1.965170813010942e-05, "loss": 0.3014, "step": 2433 }, { "epoch": 0.11, "grad_norm": 0.5018202790728643, "learning_rate": 1.9651318753054634e-05, "loss": 0.3442, "step": 2434 }, { "epoch": 0.11, "grad_norm": 0.8547234252386541, "learning_rate": 1.9650929162328953e-05, "loss": 0.5453, "step": 2435 }, { "epoch": 0.11, "grad_norm": 1.4158918523416615, "learning_rate": 1.9650539357941003e-05, "loss": 0.7095, "step": 2436 }, { "epoch": 0.11, "grad_norm": 0.4588509042023961, "learning_rate": 1.965014933989941e-05, "loss": 0.2181, "step": 2437 }, { "epoch": 0.11, "grad_norm": 0.4125769319076145, "learning_rate": 1.9649759108212817e-05, "loss": 0.2544, "step": 2438 }, { "epoch": 0.11, "grad_norm": 0.8590755874566982, "learning_rate": 1.9649368662889852e-05, "loss": 0.5389, "step": 2439 }, { "epoch": 0.11, "grad_norm": 0.454662488223262, "learning_rate": 1.9648978003939168e-05, "loss": 0.2847, "step": 2440 }, { "epoch": 0.11, "grad_norm": 0.5428993964845165, "learning_rate": 1.964858713136941e-05, "loss": 0.4009, "step": 2441 }, { "epoch": 0.11, "grad_norm": 1.3635984648202488, "learning_rate": 1.9648196045189233e-05, "loss": 0.6687, "step": 2442 }, { "epoch": 0.11, "grad_norm": 0.4395503969060432, "learning_rate": 1.9647804745407296e-05, "loss": 0.2543, "step": 2443 }, { "epoch": 0.11, "grad_norm": 0.5135995298295045, "learning_rate": 1.9647413232032258e-05, "loss": 0.316, "step": 2444 }, { "epoch": 0.11, "grad_norm": 0.4953730600389147, "learning_rate": 1.964702150507279e-05, "loss": 0.3704, "step": 2445 }, { "epoch": 0.11, "grad_norm": 0.48400705566838687, "learning_rate": 1.9646629564537565e-05, "loss": 0.2814, "step": 2446 }, { "epoch": 0.11, "grad_norm": 1.566093461921978, "learning_rate": 1.964623741043526e-05, "loss": 0.8214, "step": 2447 }, { "epoch": 0.11, "grad_norm": 0.5647696547481772, "learning_rate": 1.9645845042774555e-05, "loss": 0.2667, "step": 2448 }, { "epoch": 0.11, "grad_norm": 0.45791487647353035, "learning_rate": 1.9645452461564135e-05, "loss": 0.2856, "step": 2449 }, { "epoch": 0.11, "grad_norm": 0.4999026052685404, "learning_rate": 1.9645059666812695e-05, "loss": 0.2993, "step": 2450 }, { "epoch": 0.11, "grad_norm": 0.8224378440474981, "learning_rate": 1.964466665852893e-05, "loss": 0.5036, "step": 2451 }, { "epoch": 0.11, "grad_norm": 0.5172168079855265, "learning_rate": 1.964427343672154e-05, "loss": 0.2956, "step": 2452 }, { "epoch": 0.11, "grad_norm": 0.42987443309026996, "learning_rate": 1.9643880001399233e-05, "loss": 0.285, "step": 2453 }, { "epoch": 0.11, "grad_norm": 0.3681416022092646, "learning_rate": 1.964348635257072e-05, "loss": 0.2037, "step": 2454 }, { "epoch": 0.11, "grad_norm": 0.4903733064433681, "learning_rate": 1.964309249024471e-05, "loss": 0.301, "step": 2455 }, { "epoch": 0.11, "grad_norm": 0.6079645380248424, "learning_rate": 1.964269841442993e-05, "loss": 0.3653, "step": 2456 }, { "epoch": 0.11, "grad_norm": 0.464806254709901, "learning_rate": 1.9642304125135095e-05, "loss": 0.3637, "step": 2457 }, { "epoch": 0.11, "grad_norm": 0.5037759049866136, "learning_rate": 1.9641909622368948e-05, "loss": 0.3619, "step": 2458 }, { "epoch": 0.11, "grad_norm": 0.24591148715329916, "learning_rate": 1.9641514906140207e-05, "loss": 0.1636, "step": 2459 }, { "epoch": 0.11, "grad_norm": 1.4557600217045132, "learning_rate": 1.9641119976457623e-05, "loss": 0.7008, "step": 2460 }, { "epoch": 0.11, "grad_norm": 0.36452873337576785, "learning_rate": 1.9640724833329935e-05, "loss": 0.294, "step": 2461 }, { "epoch": 0.11, "grad_norm": 0.583030146846671, "learning_rate": 1.964032947676589e-05, "loss": 0.3876, "step": 2462 }, { "epoch": 0.11, "grad_norm": 0.7351575830096171, "learning_rate": 1.9639933906774244e-05, "loss": 0.4525, "step": 2463 }, { "epoch": 0.11, "grad_norm": 0.4118058482394788, "learning_rate": 1.963953812336375e-05, "loss": 0.2855, "step": 2464 }, { "epoch": 0.11, "grad_norm": 0.6267854507803734, "learning_rate": 1.963914212654317e-05, "loss": 0.4096, "step": 2465 }, { "epoch": 0.11, "grad_norm": 0.33644446828390706, "learning_rate": 1.9638745916321274e-05, "loss": 0.1967, "step": 2466 }, { "epoch": 0.11, "grad_norm": 0.47832127396601737, "learning_rate": 1.963834949270684e-05, "loss": 0.3049, "step": 2467 }, { "epoch": 0.11, "grad_norm": 0.6754383493657978, "learning_rate": 1.9637952855708634e-05, "loss": 0.4249, "step": 2468 }, { "epoch": 0.11, "grad_norm": 0.4226792942667077, "learning_rate": 1.963755600533544e-05, "loss": 0.3167, "step": 2469 }, { "epoch": 0.11, "grad_norm": 0.5468397253092615, "learning_rate": 1.9637158941596045e-05, "loss": 0.3083, "step": 2470 }, { "epoch": 0.11, "grad_norm": 0.6211164157736757, "learning_rate": 1.963676166449924e-05, "loss": 0.4443, "step": 2471 }, { "epoch": 0.11, "grad_norm": 0.2598726879832464, "learning_rate": 1.9636364174053818e-05, "loss": 0.1899, "step": 2472 }, { "epoch": 0.11, "grad_norm": 0.5140077027708775, "learning_rate": 1.9635966470268583e-05, "loss": 0.2845, "step": 2473 }, { "epoch": 0.11, "grad_norm": 0.658779319196403, "learning_rate": 1.9635568553152337e-05, "loss": 0.4607, "step": 2474 }, { "epoch": 0.11, "grad_norm": 1.090078169678851, "learning_rate": 1.9635170422713892e-05, "loss": 0.6085, "step": 2475 }, { "epoch": 0.11, "grad_norm": 0.47826234688342517, "learning_rate": 1.963477207896206e-05, "loss": 0.2426, "step": 2476 }, { "epoch": 0.11, "grad_norm": 0.4548323560606868, "learning_rate": 1.9634373521905655e-05, "loss": 0.3307, "step": 2477 }, { "epoch": 0.11, "grad_norm": 0.3389298727393347, "learning_rate": 1.963397475155351e-05, "loss": 0.1704, "step": 2478 }, { "epoch": 0.11, "grad_norm": 0.48496244887825124, "learning_rate": 1.963357576791445e-05, "loss": 0.2822, "step": 2479 }, { "epoch": 0.11, "grad_norm": 0.5814114494301109, "learning_rate": 1.9633176570997308e-05, "loss": 0.4218, "step": 2480 }, { "epoch": 0.11, "grad_norm": 0.5286279026544006, "learning_rate": 1.963277716081092e-05, "loss": 0.3772, "step": 2481 }, { "epoch": 0.11, "grad_norm": 0.4247354690432872, "learning_rate": 1.9632377537364128e-05, "loss": 0.2448, "step": 2482 }, { "epoch": 0.11, "grad_norm": 1.5729899262906724, "learning_rate": 1.9631977700665784e-05, "loss": 0.8685, "step": 2483 }, { "epoch": 0.11, "grad_norm": 0.42038930551682085, "learning_rate": 1.9631577650724734e-05, "loss": 0.3248, "step": 2484 }, { "epoch": 0.11, "grad_norm": 0.4224808804097906, "learning_rate": 1.9631177387549842e-05, "loss": 0.2681, "step": 2485 }, { "epoch": 0.11, "grad_norm": 0.45915120979969043, "learning_rate": 1.9630776911149963e-05, "loss": 0.3279, "step": 2486 }, { "epoch": 0.11, "grad_norm": 1.5001476633278168, "learning_rate": 1.9630376221533965e-05, "loss": 0.6549, "step": 2487 }, { "epoch": 0.11, "grad_norm": 0.9648544411504661, "learning_rate": 1.962997531871072e-05, "loss": 0.5401, "step": 2488 }, { "epoch": 0.11, "grad_norm": 0.3942882783530878, "learning_rate": 1.9629574202689104e-05, "loss": 0.2611, "step": 2489 }, { "epoch": 0.11, "grad_norm": 0.6908872590412966, "learning_rate": 1.9629172873477995e-05, "loss": 0.4929, "step": 2490 }, { "epoch": 0.11, "grad_norm": 0.5686480329778618, "learning_rate": 1.962877133108628e-05, "loss": 0.3588, "step": 2491 }, { "epoch": 0.11, "grad_norm": 0.3068569960292089, "learning_rate": 1.9628369575522847e-05, "loss": 0.1801, "step": 2492 }, { "epoch": 0.11, "grad_norm": 0.5063842798640841, "learning_rate": 1.962796760679659e-05, "loss": 0.3587, "step": 2493 }, { "epoch": 0.11, "grad_norm": 0.7493501625042578, "learning_rate": 1.962756542491641e-05, "loss": 0.4149, "step": 2494 }, { "epoch": 0.11, "grad_norm": 0.43914600283490773, "learning_rate": 1.9627163029891216e-05, "loss": 0.2781, "step": 2495 }, { "epoch": 0.11, "grad_norm": 0.6065097569979341, "learning_rate": 1.9626760421729905e-05, "loss": 0.4289, "step": 2496 }, { "epoch": 0.11, "grad_norm": 0.4798517732590873, "learning_rate": 1.9626357600441403e-05, "loss": 0.3514, "step": 2497 }, { "epoch": 0.11, "grad_norm": 0.32827023814861495, "learning_rate": 1.9625954566034618e-05, "loss": 0.2118, "step": 2498 }, { "epoch": 0.11, "grad_norm": 0.5232440038921418, "learning_rate": 1.9625551318518473e-05, "loss": 0.321, "step": 2499 }, { "epoch": 0.11, "grad_norm": 0.55490372608103, "learning_rate": 1.9625147857901906e-05, "loss": 0.3896, "step": 2500 }, { "epoch": 0.11, "grad_norm": 0.405644720765052, "learning_rate": 1.962474418419384e-05, "loss": 0.3303, "step": 2501 }, { "epoch": 0.11, "grad_norm": 0.8621462924834044, "learning_rate": 1.962434029740321e-05, "loss": 0.4427, "step": 2502 }, { "epoch": 0.11, "grad_norm": 0.4898793865322952, "learning_rate": 1.9623936197538968e-05, "loss": 0.3012, "step": 2503 }, { "epoch": 0.12, "grad_norm": 0.3543885215722855, "learning_rate": 1.962353188461005e-05, "loss": 0.2247, "step": 2504 }, { "epoch": 0.12, "grad_norm": 0.48334954248837375, "learning_rate": 1.962312735862541e-05, "loss": 0.3256, "step": 2505 }, { "epoch": 0.12, "grad_norm": 1.0627911535540613, "learning_rate": 1.962272261959401e-05, "loss": 0.5518, "step": 2506 }, { "epoch": 0.12, "grad_norm": 0.47535627422165594, "learning_rate": 1.9622317667524805e-05, "loss": 0.3293, "step": 2507 }, { "epoch": 0.12, "grad_norm": 0.4857915805759971, "learning_rate": 1.962191250242676e-05, "loss": 0.3401, "step": 2508 }, { "epoch": 0.12, "grad_norm": 0.9186063961948101, "learning_rate": 1.9621507124308845e-05, "loss": 0.4586, "step": 2509 }, { "epoch": 0.12, "grad_norm": 0.376313178768367, "learning_rate": 1.9621101533180034e-05, "loss": 0.2648, "step": 2510 }, { "epoch": 0.12, "grad_norm": 0.3017447138594228, "learning_rate": 1.9620695729049314e-05, "loss": 0.1271, "step": 2511 }, { "epoch": 0.12, "grad_norm": 0.5160262999333746, "learning_rate": 1.962028971192566e-05, "loss": 0.3481, "step": 2512 }, { "epoch": 0.12, "grad_norm": 0.4030337475232264, "learning_rate": 1.961988348181806e-05, "loss": 0.3389, "step": 2513 }, { "epoch": 0.12, "grad_norm": 0.7817185212644212, "learning_rate": 1.9619477038735516e-05, "loss": 0.5071, "step": 2514 }, { "epoch": 0.12, "grad_norm": 0.515840366138476, "learning_rate": 1.9619070382687017e-05, "loss": 0.2076, "step": 2515 }, { "epoch": 0.12, "grad_norm": 0.37041109243283304, "learning_rate": 1.9618663513681574e-05, "loss": 0.2584, "step": 2516 }, { "epoch": 0.12, "grad_norm": 0.4094582097499317, "learning_rate": 1.961825643172819e-05, "loss": 0.2986, "step": 2517 }, { "epoch": 0.12, "grad_norm": 0.6615393808111882, "learning_rate": 1.9617849136835883e-05, "loss": 0.2989, "step": 2518 }, { "epoch": 0.12, "grad_norm": 0.5266020916942835, "learning_rate": 1.961744162901366e-05, "loss": 0.3427, "step": 2519 }, { "epoch": 0.12, "grad_norm": 0.5857525692361484, "learning_rate": 1.9617033908270552e-05, "loss": 0.4278, "step": 2520 }, { "epoch": 0.12, "grad_norm": 0.455309869545046, "learning_rate": 1.9616625974615584e-05, "loss": 0.2621, "step": 2521 }, { "epoch": 0.12, "grad_norm": 0.4708513817946174, "learning_rate": 1.9616217828057783e-05, "loss": 0.3574, "step": 2522 }, { "epoch": 0.12, "grad_norm": 0.4112295799455282, "learning_rate": 1.961580946860619e-05, "loss": 0.2479, "step": 2523 }, { "epoch": 0.12, "grad_norm": 0.5126351633448597, "learning_rate": 1.961540089626984e-05, "loss": 0.3039, "step": 2524 }, { "epoch": 0.12, "grad_norm": 0.6655348146016421, "learning_rate": 1.9614992111057782e-05, "loss": 0.3612, "step": 2525 }, { "epoch": 0.12, "grad_norm": 1.0867455288399606, "learning_rate": 1.9614583112979068e-05, "loss": 0.5969, "step": 2526 }, { "epoch": 0.12, "grad_norm": 1.7758515189825195, "learning_rate": 1.961417390204275e-05, "loss": 0.7333, "step": 2527 }, { "epoch": 0.12, "grad_norm": 0.33919878708268514, "learning_rate": 1.9613764478257885e-05, "loss": 0.2285, "step": 2528 }, { "epoch": 0.12, "grad_norm": 0.44066905360494996, "learning_rate": 1.9613354841633544e-05, "loss": 0.2939, "step": 2529 }, { "epoch": 0.12, "grad_norm": 1.5003467422606467, "learning_rate": 1.9612944992178792e-05, "loss": 0.5441, "step": 2530 }, { "epoch": 0.12, "grad_norm": 0.5355212530017303, "learning_rate": 1.9612534929902702e-05, "loss": 0.2687, "step": 2531 }, { "epoch": 0.12, "grad_norm": 0.4063100131457627, "learning_rate": 1.961212465481435e-05, "loss": 0.2885, "step": 2532 }, { "epoch": 0.12, "grad_norm": 0.5124389903296207, "learning_rate": 1.9611714166922827e-05, "loss": 0.3705, "step": 2533 }, { "epoch": 0.12, "grad_norm": 0.4689745929162062, "learning_rate": 1.961130346623722e-05, "loss": 0.2565, "step": 2534 }, { "epoch": 0.12, "grad_norm": 0.6562092988879855, "learning_rate": 1.9610892552766607e-05, "loss": 0.3119, "step": 2535 }, { "epoch": 0.12, "grad_norm": 0.5583476063000099, "learning_rate": 1.9610481426520103e-05, "loss": 0.3636, "step": 2536 }, { "epoch": 0.12, "grad_norm": 0.501664987497338, "learning_rate": 1.96100700875068e-05, "loss": 0.284, "step": 2537 }, { "epoch": 0.12, "grad_norm": 0.4380259118738081, "learning_rate": 1.960965853573581e-05, "loss": 0.264, "step": 2538 }, { "epoch": 0.12, "grad_norm": 0.9151286481633896, "learning_rate": 1.9609246771216242e-05, "loss": 0.4504, "step": 2539 }, { "epoch": 0.12, "grad_norm": 0.4851332386356835, "learning_rate": 1.960883479395721e-05, "loss": 0.3185, "step": 2540 }, { "epoch": 0.12, "grad_norm": 0.42442989030626066, "learning_rate": 1.9608422603967838e-05, "loss": 0.3289, "step": 2541 }, { "epoch": 0.12, "grad_norm": 0.9241926229256986, "learning_rate": 1.9608010201257247e-05, "loss": 0.557, "step": 2542 }, { "epoch": 0.12, "grad_norm": 0.44366428315734924, "learning_rate": 1.9607597585834573e-05, "loss": 0.2982, "step": 2543 }, { "epoch": 0.12, "grad_norm": 0.3209855852335516, "learning_rate": 1.9607184757708953e-05, "loss": 0.194, "step": 2544 }, { "epoch": 0.12, "grad_norm": 1.0496640620970652, "learning_rate": 1.9606771716889517e-05, "loss": 0.6465, "step": 2545 }, { "epoch": 0.12, "grad_norm": 0.41262532035728755, "learning_rate": 1.9606358463385414e-05, "loss": 0.3153, "step": 2546 }, { "epoch": 0.12, "grad_norm": 0.9805713474863744, "learning_rate": 1.9605944997205795e-05, "loss": 0.4389, "step": 2547 }, { "epoch": 0.12, "grad_norm": 0.44107125206284997, "learning_rate": 1.9605531318359812e-05, "loss": 0.3506, "step": 2548 }, { "epoch": 0.12, "grad_norm": 0.4272222145185835, "learning_rate": 1.9605117426856622e-05, "loss": 0.2971, "step": 2549 }, { "epoch": 0.12, "grad_norm": 0.5466258017698202, "learning_rate": 1.960470332270539e-05, "loss": 0.2807, "step": 2550 }, { "epoch": 0.12, "grad_norm": 0.410555547951116, "learning_rate": 1.9604289005915286e-05, "loss": 0.2192, "step": 2551 }, { "epoch": 0.12, "grad_norm": 0.3710278484686862, "learning_rate": 1.960387447649548e-05, "loss": 0.2864, "step": 2552 }, { "epoch": 0.12, "grad_norm": 1.1449905899715322, "learning_rate": 1.9603459734455147e-05, "loss": 0.5276, "step": 2553 }, { "epoch": 0.12, "grad_norm": 0.7578743666263131, "learning_rate": 1.9603044779803474e-05, "loss": 0.3884, "step": 2554 }, { "epoch": 0.12, "grad_norm": 0.5241030932793594, "learning_rate": 1.9602629612549643e-05, "loss": 0.3063, "step": 2555 }, { "epoch": 0.12, "grad_norm": 0.3879330306472563, "learning_rate": 1.9602214232702846e-05, "loss": 0.2895, "step": 2556 }, { "epoch": 0.12, "grad_norm": 0.3773987632788053, "learning_rate": 1.9601798640272283e-05, "loss": 0.119, "step": 2557 }, { "epoch": 0.12, "grad_norm": 0.48224869475081095, "learning_rate": 1.9601382835267154e-05, "loss": 0.314, "step": 2558 }, { "epoch": 0.12, "grad_norm": 1.4454864527126023, "learning_rate": 1.9600966817696657e-05, "loss": 0.5681, "step": 2559 }, { "epoch": 0.12, "grad_norm": 0.605761703630391, "learning_rate": 1.9600550587570015e-05, "loss": 0.2952, "step": 2560 }, { "epoch": 0.12, "grad_norm": 0.463168681266348, "learning_rate": 1.9600134144896433e-05, "loss": 0.2902, "step": 2561 }, { "epoch": 0.12, "grad_norm": 0.3851500863074438, "learning_rate": 1.9599717489685134e-05, "loss": 0.201, "step": 2562 }, { "epoch": 0.12, "grad_norm": 1.0509635302282927, "learning_rate": 1.959930062194534e-05, "loss": 0.612, "step": 2563 }, { "epoch": 0.12, "grad_norm": 0.5313588202115289, "learning_rate": 1.9598883541686287e-05, "loss": 0.2639, "step": 2564 }, { "epoch": 0.12, "grad_norm": 0.8251710814771125, "learning_rate": 1.9598466248917202e-05, "loss": 0.5301, "step": 2565 }, { "epoch": 0.12, "grad_norm": 1.5995774823438609, "learning_rate": 1.9598048743647323e-05, "loss": 0.8728, "step": 2566 }, { "epoch": 0.12, "grad_norm": 0.4540988239175763, "learning_rate": 1.9597631025885898e-05, "loss": 0.2303, "step": 2567 }, { "epoch": 0.12, "grad_norm": 0.47480477527524195, "learning_rate": 1.959721309564217e-05, "loss": 0.3462, "step": 2568 }, { "epoch": 0.12, "grad_norm": 0.40273697247110785, "learning_rate": 1.9596794952925397e-05, "loss": 0.2094, "step": 2569 }, { "epoch": 0.12, "grad_norm": 0.4384801286881789, "learning_rate": 1.959637659774483e-05, "loss": 0.2837, "step": 2570 }, { "epoch": 0.12, "grad_norm": 1.0485722826765629, "learning_rate": 1.9595958030109736e-05, "loss": 0.58, "step": 2571 }, { "epoch": 0.12, "grad_norm": 0.5660588269828383, "learning_rate": 1.959553925002938e-05, "loss": 0.3428, "step": 2572 }, { "epoch": 0.12, "grad_norm": 0.45685738686334393, "learning_rate": 1.9595120257513035e-05, "loss": 0.2092, "step": 2573 }, { "epoch": 0.12, "grad_norm": 0.47452120488728067, "learning_rate": 1.959470105256997e-05, "loss": 0.2755, "step": 2574 }, { "epoch": 0.12, "grad_norm": 0.48581645087313585, "learning_rate": 1.9594281635209476e-05, "loss": 0.3543, "step": 2575 }, { "epoch": 0.12, "grad_norm": 0.593662076779592, "learning_rate": 1.9593862005440836e-05, "loss": 0.3469, "step": 2576 }, { "epoch": 0.12, "grad_norm": 0.4431907716001137, "learning_rate": 1.959344216327333e-05, "loss": 0.3146, "step": 2577 }, { "epoch": 0.12, "grad_norm": 1.748148954253344, "learning_rate": 1.959302210871627e-05, "loss": 0.8162, "step": 2578 }, { "epoch": 0.12, "grad_norm": 0.4822448004821637, "learning_rate": 1.959260184177894e-05, "loss": 0.2998, "step": 2579 }, { "epoch": 0.12, "grad_norm": 0.418449652833715, "learning_rate": 1.9592181362470653e-05, "loss": 0.2845, "step": 2580 }, { "epoch": 0.12, "grad_norm": 0.5364238361570387, "learning_rate": 1.9591760670800714e-05, "loss": 0.3368, "step": 2581 }, { "epoch": 0.12, "grad_norm": 0.5334928936842943, "learning_rate": 1.959133976677844e-05, "loss": 0.3643, "step": 2582 }, { "epoch": 0.12, "grad_norm": 0.35874488141141914, "learning_rate": 1.9590918650413146e-05, "loss": 0.1847, "step": 2583 }, { "epoch": 0.12, "grad_norm": 0.510757997443261, "learning_rate": 1.959049732171416e-05, "loss": 0.3629, "step": 2584 }, { "epoch": 0.12, "grad_norm": 0.41799222287937543, "learning_rate": 1.9590075780690805e-05, "loss": 0.3048, "step": 2585 }, { "epoch": 0.12, "grad_norm": 1.0725572883415753, "learning_rate": 1.9589654027352412e-05, "loss": 0.3993, "step": 2586 }, { "epoch": 0.12, "grad_norm": 0.5957783600453085, "learning_rate": 1.9589232061708325e-05, "loss": 0.4507, "step": 2587 }, { "epoch": 0.12, "grad_norm": 0.4736260109350406, "learning_rate": 1.9588809883767884e-05, "loss": 0.2914, "step": 2588 }, { "epoch": 0.12, "grad_norm": 0.3897620682324087, "learning_rate": 1.958838749354043e-05, "loss": 0.1907, "step": 2589 }, { "epoch": 0.12, "grad_norm": 1.8147010221046953, "learning_rate": 1.958796489103532e-05, "loss": 0.6194, "step": 2590 }, { "epoch": 0.12, "grad_norm": 0.6789291023220007, "learning_rate": 1.958754207626191e-05, "loss": 0.3645, "step": 2591 }, { "epoch": 0.12, "grad_norm": 0.4735617288983153, "learning_rate": 1.9587119049229558e-05, "loss": 0.3079, "step": 2592 }, { "epoch": 0.12, "grad_norm": 0.9069992616130493, "learning_rate": 1.958669580994763e-05, "loss": 0.45, "step": 2593 }, { "epoch": 0.12, "grad_norm": 0.3858744376084387, "learning_rate": 1.9586272358425494e-05, "loss": 0.1982, "step": 2594 }, { "epoch": 0.12, "grad_norm": 0.4004544868787489, "learning_rate": 1.9585848694672533e-05, "loss": 0.2823, "step": 2595 }, { "epoch": 0.12, "grad_norm": 0.5412561975312115, "learning_rate": 1.958542481869812e-05, "loss": 0.3143, "step": 2596 }, { "epoch": 0.12, "grad_norm": 0.5197965944717913, "learning_rate": 1.9585000730511635e-05, "loss": 0.3114, "step": 2597 }, { "epoch": 0.12, "grad_norm": 0.590601159444078, "learning_rate": 1.9584576430122473e-05, "loss": 0.4247, "step": 2598 }, { "epoch": 0.12, "grad_norm": 0.5076060346432055, "learning_rate": 1.958415191754003e-05, "loss": 0.3656, "step": 2599 }, { "epoch": 0.12, "grad_norm": 0.39928875463415014, "learning_rate": 1.9583727192773698e-05, "loss": 0.2966, "step": 2600 }, { "epoch": 0.12, "grad_norm": 0.3452853931989869, "learning_rate": 1.9583302255832883e-05, "loss": 0.2325, "step": 2601 }, { "epoch": 0.12, "grad_norm": 1.5265472210689686, "learning_rate": 1.9582877106726994e-05, "loss": 0.8948, "step": 2602 }, { "epoch": 0.12, "grad_norm": 0.44827496586425275, "learning_rate": 1.9582451745465444e-05, "loss": 0.2731, "step": 2603 }, { "epoch": 0.12, "grad_norm": 0.46652685291081547, "learning_rate": 1.9582026172057644e-05, "loss": 0.3243, "step": 2604 }, { "epoch": 0.12, "grad_norm": 0.8103679080665189, "learning_rate": 1.9581600386513022e-05, "loss": 0.5497, "step": 2605 }, { "epoch": 0.12, "grad_norm": 0.418313476703401, "learning_rate": 1.9581174388841002e-05, "loss": 0.2371, "step": 2606 }, { "epoch": 0.12, "grad_norm": 0.4520783504053943, "learning_rate": 1.9580748179051013e-05, "loss": 0.2523, "step": 2607 }, { "epoch": 0.12, "grad_norm": 0.5158814182806029, "learning_rate": 1.95803217571525e-05, "loss": 0.389, "step": 2608 }, { "epoch": 0.12, "grad_norm": 0.5308513677170296, "learning_rate": 1.957989512315489e-05, "loss": 0.2663, "step": 2609 }, { "epoch": 0.12, "grad_norm": 0.4817956087165091, "learning_rate": 1.957946827706764e-05, "loss": 0.4003, "step": 2610 }, { "epoch": 0.12, "grad_norm": 0.4443214604983722, "learning_rate": 1.957904121890019e-05, "loss": 0.3561, "step": 2611 }, { "epoch": 0.12, "grad_norm": 0.4591132937153668, "learning_rate": 1.9578613948662005e-05, "loss": 0.1495, "step": 2612 }, { "epoch": 0.12, "grad_norm": 0.3845310937869779, "learning_rate": 1.9578186466362538e-05, "loss": 0.2947, "step": 2613 }, { "epoch": 0.12, "grad_norm": 0.4844498422257844, "learning_rate": 1.9577758772011252e-05, "loss": 0.3465, "step": 2614 }, { "epoch": 0.12, "grad_norm": 0.5119467988951399, "learning_rate": 1.9577330865617618e-05, "loss": 0.341, "step": 2615 }, { "epoch": 0.12, "grad_norm": 0.35294840394706045, "learning_rate": 1.957690274719111e-05, "loss": 0.2953, "step": 2616 }, { "epoch": 0.12, "grad_norm": 1.455303062444168, "learning_rate": 1.9576474416741206e-05, "loss": 0.7456, "step": 2617 }, { "epoch": 0.12, "grad_norm": 0.7147676567215108, "learning_rate": 1.957604587427739e-05, "loss": 0.4223, "step": 2618 }, { "epoch": 0.12, "grad_norm": 0.33271723827782546, "learning_rate": 1.9575617119809144e-05, "loss": 0.2456, "step": 2619 }, { "epoch": 0.12, "grad_norm": 1.6133911631795532, "learning_rate": 1.9575188153345966e-05, "loss": 0.9356, "step": 2620 }, { "epoch": 0.12, "grad_norm": 0.5610447400749208, "learning_rate": 1.957475897489735e-05, "loss": 0.3892, "step": 2621 }, { "epoch": 0.12, "grad_norm": 0.399179052450849, "learning_rate": 1.95743295844728e-05, "loss": 0.2332, "step": 2622 }, { "epoch": 0.12, "grad_norm": 0.49918721276215844, "learning_rate": 1.957389998208182e-05, "loss": 0.3602, "step": 2623 }, { "epoch": 0.12, "grad_norm": 0.7045487648291536, "learning_rate": 1.9573470167733926e-05, "loss": 0.3295, "step": 2624 }, { "epoch": 0.12, "grad_norm": 0.34135872303828413, "learning_rate": 1.9573040141438625e-05, "loss": 0.205, "step": 2625 }, { "epoch": 0.12, "grad_norm": 0.6924770682946492, "learning_rate": 1.9572609903205442e-05, "loss": 0.5041, "step": 2626 }, { "epoch": 0.12, "grad_norm": 0.4248369267366425, "learning_rate": 1.9572179453043905e-05, "loss": 0.3279, "step": 2627 }, { "epoch": 0.12, "grad_norm": 0.438879360586693, "learning_rate": 1.957174879096354e-05, "loss": 0.3113, "step": 2628 }, { "epoch": 0.12, "grad_norm": 0.3521882108441771, "learning_rate": 1.9571317916973877e-05, "loss": 0.1488, "step": 2629 }, { "epoch": 0.12, "grad_norm": 0.9036686206433856, "learning_rate": 1.9570886831084466e-05, "loss": 0.4536, "step": 2630 }, { "epoch": 0.12, "grad_norm": 0.3535453112761301, "learning_rate": 1.957045553330484e-05, "loss": 0.2469, "step": 2631 }, { "epoch": 0.12, "grad_norm": 0.45898846848902886, "learning_rate": 1.957002402364456e-05, "loss": 0.3555, "step": 2632 }, { "epoch": 0.12, "grad_norm": 0.7710637409088189, "learning_rate": 1.9569592302113164e-05, "loss": 0.455, "step": 2633 }, { "epoch": 0.12, "grad_norm": 0.3949491521042867, "learning_rate": 1.956916036872022e-05, "loss": 0.2399, "step": 2634 }, { "epoch": 0.12, "grad_norm": 0.5231927274189248, "learning_rate": 1.9568728223475292e-05, "loss": 0.2975, "step": 2635 }, { "epoch": 0.12, "grad_norm": 1.0538937915345412, "learning_rate": 1.956829586638794e-05, "loss": 0.4178, "step": 2636 }, { "epoch": 0.12, "grad_norm": 0.41415657038140974, "learning_rate": 1.956786329746774e-05, "loss": 0.3151, "step": 2637 }, { "epoch": 0.12, "grad_norm": 0.963778736543261, "learning_rate": 1.9567430516724268e-05, "loss": 0.4265, "step": 2638 }, { "epoch": 0.12, "grad_norm": 0.4774903334790501, "learning_rate": 1.9566997524167108e-05, "loss": 0.3312, "step": 2639 }, { "epoch": 0.12, "grad_norm": 0.5319709754758282, "learning_rate": 1.9566564319805842e-05, "loss": 0.3954, "step": 2640 }, { "epoch": 0.12, "grad_norm": 0.37041739189839096, "learning_rate": 1.9566130903650064e-05, "loss": 0.2008, "step": 2641 }, { "epoch": 0.12, "grad_norm": 0.4821871487489271, "learning_rate": 1.9565697275709366e-05, "loss": 0.1478, "step": 2642 }, { "epoch": 0.12, "grad_norm": 0.5570046882565686, "learning_rate": 1.956526343599335e-05, "loss": 0.3584, "step": 2643 }, { "epoch": 0.12, "grad_norm": 0.6624509850092503, "learning_rate": 1.956482938451162e-05, "loss": 0.3824, "step": 2644 }, { "epoch": 0.12, "grad_norm": 0.69034167169188, "learning_rate": 1.9564395121273785e-05, "loss": 0.3418, "step": 2645 }, { "epoch": 0.12, "grad_norm": 0.5521586322251684, "learning_rate": 1.9563960646289464e-05, "loss": 0.3196, "step": 2646 }, { "epoch": 0.12, "grad_norm": 0.3333338962503935, "learning_rate": 1.956352595956827e-05, "loss": 0.2506, "step": 2647 }, { "epoch": 0.12, "grad_norm": 0.6441977581805436, "learning_rate": 1.956309106111983e-05, "loss": 0.1974, "step": 2648 }, { "epoch": 0.12, "grad_norm": 0.48900449265425344, "learning_rate": 1.9562655950953768e-05, "loss": 0.3206, "step": 2649 }, { "epoch": 0.12, "grad_norm": 1.191370054413781, "learning_rate": 1.9562220629079723e-05, "loss": 0.5293, "step": 2650 }, { "epoch": 0.12, "grad_norm": 0.4894821562012367, "learning_rate": 1.9561785095507327e-05, "loss": 0.2617, "step": 2651 }, { "epoch": 0.12, "grad_norm": 0.5084907722530567, "learning_rate": 1.9561349350246226e-05, "loss": 0.3337, "step": 2652 }, { "epoch": 0.12, "grad_norm": 0.44079058431848234, "learning_rate": 1.9560913393306068e-05, "loss": 0.1741, "step": 2653 }, { "epoch": 0.12, "grad_norm": 1.0370178279451354, "learning_rate": 1.95604772246965e-05, "loss": 0.5233, "step": 2654 }, { "epoch": 0.12, "grad_norm": 0.41637920036119297, "learning_rate": 1.956004084442718e-05, "loss": 0.2493, "step": 2655 }, { "epoch": 0.12, "grad_norm": 0.9393828595331755, "learning_rate": 1.9559604252507768e-05, "loss": 0.5775, "step": 2656 }, { "epoch": 0.12, "grad_norm": 1.4046058257688947, "learning_rate": 1.9559167448947937e-05, "loss": 0.7905, "step": 2657 }, { "epoch": 0.12, "grad_norm": 0.5229844929324957, "learning_rate": 1.9558730433757348e-05, "loss": 0.222, "step": 2658 }, { "epoch": 0.12, "grad_norm": 0.3029112203647432, "learning_rate": 1.955829320694568e-05, "loss": 0.2186, "step": 2659 }, { "epoch": 0.12, "grad_norm": 1.0222830837202752, "learning_rate": 1.9557855768522613e-05, "loss": 0.4525, "step": 2660 }, { "epoch": 0.12, "grad_norm": 0.4159857051421944, "learning_rate": 1.9557418118497832e-05, "loss": 0.2738, "step": 2661 }, { "epoch": 0.12, "grad_norm": 1.0006003860946577, "learning_rate": 1.9556980256881025e-05, "loss": 0.63, "step": 2662 }, { "epoch": 0.12, "grad_norm": 0.5365428375772864, "learning_rate": 1.955654218368189e-05, "loss": 0.3614, "step": 2663 }, { "epoch": 0.12, "grad_norm": 0.48757551217910594, "learning_rate": 1.9556103898910116e-05, "loss": 0.2877, "step": 2664 }, { "epoch": 0.12, "grad_norm": 0.3386132806310522, "learning_rate": 1.9555665402575418e-05, "loss": 0.1427, "step": 2665 }, { "epoch": 0.12, "grad_norm": 1.0713907556699434, "learning_rate": 1.9555226694687492e-05, "loss": 0.5201, "step": 2666 }, { "epoch": 0.12, "grad_norm": 0.4196810875497365, "learning_rate": 1.955478777525606e-05, "loss": 0.2907, "step": 2667 }, { "epoch": 0.12, "grad_norm": 0.5527529652206862, "learning_rate": 1.9554348644290832e-05, "loss": 0.3468, "step": 2668 }, { "epoch": 0.12, "grad_norm": 1.5884057836820071, "learning_rate": 1.9553909301801536e-05, "loss": 0.8254, "step": 2669 }, { "epoch": 0.12, "grad_norm": 0.4361367584529289, "learning_rate": 1.9553469747797898e-05, "loss": 0.3137, "step": 2670 }, { "epoch": 0.12, "grad_norm": 0.373542468620049, "learning_rate": 1.9553029982289645e-05, "loss": 0.222, "step": 2671 }, { "epoch": 0.12, "grad_norm": 0.7936844764878231, "learning_rate": 1.9552590005286518e-05, "loss": 0.3865, "step": 2672 }, { "epoch": 0.12, "grad_norm": 0.49784027194747066, "learning_rate": 1.9552149816798255e-05, "loss": 0.3009, "step": 2673 }, { "epoch": 0.12, "grad_norm": 1.118725279014554, "learning_rate": 1.9551709416834596e-05, "loss": 0.4737, "step": 2674 }, { "epoch": 0.12, "grad_norm": 0.5623489336116214, "learning_rate": 1.9551268805405302e-05, "loss": 0.3561, "step": 2675 }, { "epoch": 0.12, "grad_norm": 0.4415744540421797, "learning_rate": 1.9550827982520122e-05, "loss": 0.2686, "step": 2676 }, { "epoch": 0.12, "grad_norm": 0.9916534503157868, "learning_rate": 1.9550386948188814e-05, "loss": 0.5826, "step": 2677 }, { "epoch": 0.12, "grad_norm": 0.32826891612978104, "learning_rate": 1.9549945702421144e-05, "loss": 0.2929, "step": 2678 }, { "epoch": 0.12, "grad_norm": 0.49601626008277505, "learning_rate": 1.954950424522688e-05, "loss": 0.3042, "step": 2679 }, { "epoch": 0.12, "grad_norm": 0.4769741156261984, "learning_rate": 1.9549062576615797e-05, "loss": 0.262, "step": 2680 }, { "epoch": 0.12, "grad_norm": 0.5833148390207082, "learning_rate": 1.9548620696597672e-05, "loss": 0.2852, "step": 2681 }, { "epoch": 0.12, "grad_norm": 0.5273574815493769, "learning_rate": 1.954817860518229e-05, "loss": 0.2929, "step": 2682 }, { "epoch": 0.12, "grad_norm": 0.4717920748783304, "learning_rate": 1.9547736302379433e-05, "loss": 0.3656, "step": 2683 }, { "epoch": 0.12, "grad_norm": 0.5333053224613801, "learning_rate": 1.95472937881989e-05, "loss": 0.3024, "step": 2684 }, { "epoch": 0.12, "grad_norm": 0.3265390233175672, "learning_rate": 1.954685106265048e-05, "loss": 0.2243, "step": 2685 }, { "epoch": 0.12, "grad_norm": 1.2659938909769657, "learning_rate": 1.954640812574398e-05, "loss": 0.7928, "step": 2686 }, { "epoch": 0.12, "grad_norm": 0.4823140218116708, "learning_rate": 1.9545964977489205e-05, "loss": 0.3005, "step": 2687 }, { "epoch": 0.12, "grad_norm": 0.5172891358452325, "learning_rate": 1.9545521617895965e-05, "loss": 0.2906, "step": 2688 }, { "epoch": 0.12, "grad_norm": 0.6262499379706858, "learning_rate": 1.954507804697408e-05, "loss": 0.5276, "step": 2689 }, { "epoch": 0.12, "grad_norm": 0.5455439660363581, "learning_rate": 1.9544634264733363e-05, "loss": 0.4374, "step": 2690 }, { "epoch": 0.12, "grad_norm": 0.3950906285573646, "learning_rate": 1.9544190271183647e-05, "loss": 0.214, "step": 2691 }, { "epoch": 0.12, "grad_norm": 0.58345490444392, "learning_rate": 1.9543746066334755e-05, "loss": 0.2675, "step": 2692 }, { "epoch": 0.12, "grad_norm": 1.3264339841866752, "learning_rate": 1.9543301650196523e-05, "loss": 0.7517, "step": 2693 }, { "epoch": 0.12, "grad_norm": 0.4158528564419858, "learning_rate": 1.954285702277879e-05, "loss": 0.257, "step": 2694 }, { "epoch": 0.12, "grad_norm": 0.4822177004526829, "learning_rate": 1.95424121840914e-05, "loss": 0.3639, "step": 2695 }, { "epoch": 0.12, "grad_norm": 0.48643286227064786, "learning_rate": 1.95419671341442e-05, "loss": 0.3103, "step": 2696 }, { "epoch": 0.12, "grad_norm": 0.3485184890450811, "learning_rate": 1.954152187294705e-05, "loss": 0.1499, "step": 2697 }, { "epoch": 0.12, "grad_norm": 0.6371826709355921, "learning_rate": 1.9541076400509798e-05, "loss": 0.4582, "step": 2698 }, { "epoch": 0.12, "grad_norm": 0.5122405831985051, "learning_rate": 1.954063071684231e-05, "loss": 0.3606, "step": 2699 }, { "epoch": 0.12, "grad_norm": 0.4386015764678328, "learning_rate": 1.9540184821954456e-05, "loss": 0.2422, "step": 2700 }, { "epoch": 0.12, "grad_norm": 0.6397039168501374, "learning_rate": 1.95397387158561e-05, "loss": 0.4529, "step": 2701 }, { "epoch": 0.12, "grad_norm": 0.6542233140675675, "learning_rate": 1.953929239855713e-05, "loss": 0.3905, "step": 2702 }, { "epoch": 0.12, "grad_norm": 0.4864412335597301, "learning_rate": 1.9538845870067412e-05, "loss": 0.3011, "step": 2703 }, { "epoch": 0.12, "grad_norm": 0.30948337924147673, "learning_rate": 1.953839913039685e-05, "loss": 0.1829, "step": 2704 }, { "epoch": 0.12, "grad_norm": 0.8308036852282087, "learning_rate": 1.9537952179555315e-05, "loss": 0.6176, "step": 2705 }, { "epoch": 0.12, "grad_norm": 0.48109952612974693, "learning_rate": 1.9537505017552716e-05, "loss": 0.3544, "step": 2706 }, { "epoch": 0.12, "grad_norm": 0.41061095648212154, "learning_rate": 1.9537057644398948e-05, "loss": 0.3038, "step": 2707 }, { "epoch": 0.12, "grad_norm": 1.2575042671889498, "learning_rate": 1.9536610060103916e-05, "loss": 0.7074, "step": 2708 }, { "epoch": 0.12, "grad_norm": 0.3627395772924548, "learning_rate": 1.953616226467753e-05, "loss": 0.2251, "step": 2709 }, { "epoch": 0.12, "grad_norm": 0.5395140255526679, "learning_rate": 1.95357142581297e-05, "loss": 0.2933, "step": 2710 }, { "epoch": 0.12, "grad_norm": 0.4149019537642368, "learning_rate": 1.953526604047035e-05, "loss": 0.3772, "step": 2711 }, { "epoch": 0.12, "grad_norm": 0.37795623499851605, "learning_rate": 1.9534817611709395e-05, "loss": 0.3069, "step": 2712 }, { "epoch": 0.12, "grad_norm": 0.3837095307584616, "learning_rate": 1.953436897185677e-05, "loss": 0.2139, "step": 2713 }, { "epoch": 0.12, "grad_norm": 0.4884241444771125, "learning_rate": 1.9533920120922407e-05, "loss": 0.3586, "step": 2714 }, { "epoch": 0.12, "grad_norm": 0.8181213710817211, "learning_rate": 1.953347105891624e-05, "loss": 0.404, "step": 2715 }, { "epoch": 0.12, "grad_norm": 0.3781155672368854, "learning_rate": 1.9533021785848215e-05, "loss": 0.2652, "step": 2716 }, { "epoch": 0.12, "grad_norm": 0.674419641095578, "learning_rate": 1.9532572301728274e-05, "loss": 0.4299, "step": 2717 }, { "epoch": 0.12, "grad_norm": 0.5301144849367732, "learning_rate": 1.9532122606566368e-05, "loss": 0.3221, "step": 2718 }, { "epoch": 0.12, "grad_norm": 0.38442693168779324, "learning_rate": 1.9531672700372457e-05, "loss": 0.2619, "step": 2719 }, { "epoch": 0.12, "grad_norm": 0.893107632772404, "learning_rate": 1.9531222583156496e-05, "loss": 0.3969, "step": 2720 }, { "epoch": 0.12, "grad_norm": 0.6587154115642132, "learning_rate": 1.953077225492846e-05, "loss": 0.4252, "step": 2721 }, { "epoch": 0.13, "grad_norm": 0.432918225197781, "learning_rate": 1.9530321715698303e-05, "loss": 0.3274, "step": 2722 }, { "epoch": 0.13, "grad_norm": 0.6454133785954299, "learning_rate": 1.9529870965476016e-05, "loss": 0.3587, "step": 2723 }, { "epoch": 0.13, "grad_norm": 0.49642465655349327, "learning_rate": 1.9529420004271568e-05, "loss": 0.3567, "step": 2724 }, { "epoch": 0.13, "grad_norm": 0.3257606577489552, "learning_rate": 1.9528968832094947e-05, "loss": 0.1918, "step": 2725 }, { "epoch": 0.13, "grad_norm": 0.4811564882075819, "learning_rate": 1.9528517448956137e-05, "loss": 0.2942, "step": 2726 }, { "epoch": 0.13, "grad_norm": 0.5025518396030556, "learning_rate": 1.9528065854865137e-05, "loss": 0.3021, "step": 2727 }, { "epoch": 0.13, "grad_norm": 0.7289336550926332, "learning_rate": 1.952761404983194e-05, "loss": 0.3908, "step": 2728 }, { "epoch": 0.13, "grad_norm": 1.2657848401552678, "learning_rate": 1.9527162033866553e-05, "loss": 0.5196, "step": 2729 }, { "epoch": 0.13, "grad_norm": 0.33520501341426207, "learning_rate": 1.952670980697898e-05, "loss": 0.2596, "step": 2730 }, { "epoch": 0.13, "grad_norm": 0.32280453011747723, "learning_rate": 1.9526257369179234e-05, "loss": 0.2117, "step": 2731 }, { "epoch": 0.13, "grad_norm": 1.5288177718008797, "learning_rate": 1.9525804720477334e-05, "loss": 0.709, "step": 2732 }, { "epoch": 0.13, "grad_norm": 0.48520347157174504, "learning_rate": 1.9525351860883295e-05, "loss": 0.1335, "step": 2733 }, { "epoch": 0.13, "grad_norm": 0.5573358080341992, "learning_rate": 1.952489879040715e-05, "loss": 0.3427, "step": 2734 }, { "epoch": 0.13, "grad_norm": 0.8793106741548435, "learning_rate": 1.952444550905892e-05, "loss": 0.454, "step": 2735 }, { "epoch": 0.13, "grad_norm": 0.4004608023974184, "learning_rate": 1.952399201684865e-05, "loss": 0.1364, "step": 2736 }, { "epoch": 0.13, "grad_norm": 0.3657707487604822, "learning_rate": 1.9523538313786375e-05, "loss": 0.2389, "step": 2737 }, { "epoch": 0.13, "grad_norm": 0.5697021320997692, "learning_rate": 1.9523084399882143e-05, "loss": 0.3455, "step": 2738 }, { "epoch": 0.13, "grad_norm": 0.44594186799801006, "learning_rate": 1.9522630275146e-05, "loss": 0.1242, "step": 2739 }, { "epoch": 0.13, "grad_norm": 0.6536382645589643, "learning_rate": 1.9522175939588003e-05, "loss": 0.3949, "step": 2740 }, { "epoch": 0.13, "grad_norm": 0.9872161811986501, "learning_rate": 1.9521721393218204e-05, "loss": 0.5995, "step": 2741 }, { "epoch": 0.13, "grad_norm": 0.4859685734691315, "learning_rate": 1.9521266636046672e-05, "loss": 0.3523, "step": 2742 }, { "epoch": 0.13, "grad_norm": 0.36719922944291267, "learning_rate": 1.9520811668083472e-05, "loss": 0.2042, "step": 2743 }, { "epoch": 0.13, "grad_norm": 0.6789302601937945, "learning_rate": 1.9520356489338682e-05, "loss": 0.3481, "step": 2744 }, { "epoch": 0.13, "grad_norm": 0.68770105102681, "learning_rate": 1.951990109982237e-05, "loss": 0.4135, "step": 2745 }, { "epoch": 0.13, "grad_norm": 0.600709015073133, "learning_rate": 1.9519445499544628e-05, "loss": 0.2862, "step": 2746 }, { "epoch": 0.13, "grad_norm": 0.8118126572157136, "learning_rate": 1.9518989688515533e-05, "loss": 0.4451, "step": 2747 }, { "epoch": 0.13, "grad_norm": 0.7424200738399783, "learning_rate": 1.9518533666745183e-05, "loss": 0.3801, "step": 2748 }, { "epoch": 0.13, "grad_norm": 0.2869061808105938, "learning_rate": 1.951807743424367e-05, "loss": 0.167, "step": 2749 }, { "epoch": 0.13, "grad_norm": 0.3930092415093938, "learning_rate": 1.95176209910211e-05, "loss": 0.3283, "step": 2750 }, { "epoch": 0.13, "grad_norm": 0.9737698862633957, "learning_rate": 1.9517164337087575e-05, "loss": 0.5595, "step": 2751 }, { "epoch": 0.13, "grad_norm": 0.4183785959187085, "learning_rate": 1.95167074724532e-05, "loss": 0.2799, "step": 2752 }, { "epoch": 0.13, "grad_norm": 1.1085401405518995, "learning_rate": 1.9516250397128095e-05, "loss": 0.7747, "step": 2753 }, { "epoch": 0.13, "grad_norm": 0.48546134492392634, "learning_rate": 1.951579311112238e-05, "loss": 0.3428, "step": 2754 }, { "epoch": 0.13, "grad_norm": 0.44941389051691616, "learning_rate": 1.9515335614446172e-05, "loss": 0.3003, "step": 2755 }, { "epoch": 0.13, "grad_norm": 0.353062941887427, "learning_rate": 1.9514877907109612e-05, "loss": 0.1523, "step": 2756 }, { "epoch": 0.13, "grad_norm": 0.7607167501632541, "learning_rate": 1.951441998912282e-05, "loss": 0.4257, "step": 2757 }, { "epoch": 0.13, "grad_norm": 0.41644459660926225, "learning_rate": 1.951396186049594e-05, "loss": 0.2949, "step": 2758 }, { "epoch": 0.13, "grad_norm": 1.2735480429693264, "learning_rate": 1.9513503521239116e-05, "loss": 0.5326, "step": 2759 }, { "epoch": 0.13, "grad_norm": 0.8172160465077501, "learning_rate": 1.9513044971362494e-05, "loss": 0.5044, "step": 2760 }, { "epoch": 0.13, "grad_norm": 0.32402605057839046, "learning_rate": 1.9512586210876223e-05, "loss": 0.2122, "step": 2761 }, { "epoch": 0.13, "grad_norm": 0.5973014932966829, "learning_rate": 1.9512127239790463e-05, "loss": 0.3008, "step": 2762 }, { "epoch": 0.13, "grad_norm": 1.2406595123437545, "learning_rate": 1.9511668058115375e-05, "loss": 0.5092, "step": 2763 }, { "epoch": 0.13, "grad_norm": 0.521271451529703, "learning_rate": 1.951120866586112e-05, "loss": 0.3167, "step": 2764 }, { "epoch": 0.13, "grad_norm": 1.4250463987824893, "learning_rate": 1.9510749063037876e-05, "loss": 0.5902, "step": 2765 }, { "epoch": 0.13, "grad_norm": 0.4387400393617523, "learning_rate": 1.951028924965581e-05, "loss": 0.3096, "step": 2766 }, { "epoch": 0.13, "grad_norm": 0.5729286084890407, "learning_rate": 1.950982922572511e-05, "loss": 0.3358, "step": 2767 }, { "epoch": 0.13, "grad_norm": 0.5016881309631069, "learning_rate": 1.9509368991255955e-05, "loss": 0.2683, "step": 2768 }, { "epoch": 0.13, "grad_norm": 1.4319347970899394, "learning_rate": 1.9508908546258535e-05, "loss": 0.4604, "step": 2769 }, { "epoch": 0.13, "grad_norm": 0.406908777435969, "learning_rate": 1.950844789074305e-05, "loss": 0.2859, "step": 2770 }, { "epoch": 0.13, "grad_norm": 0.60347220104888, "learning_rate": 1.9507987024719686e-05, "loss": 0.4303, "step": 2771 }, { "epoch": 0.13, "grad_norm": 0.9701522684664564, "learning_rate": 1.9507525948198657e-05, "loss": 0.2933, "step": 2772 }, { "epoch": 0.13, "grad_norm": 0.42463834956329255, "learning_rate": 1.950706466119016e-05, "loss": 0.2956, "step": 2773 }, { "epoch": 0.13, "grad_norm": 0.4943662887424194, "learning_rate": 1.9506603163704427e-05, "loss": 0.3764, "step": 2774 }, { "epoch": 0.13, "grad_norm": 0.40481840936892105, "learning_rate": 1.9506141455751652e-05, "loss": 0.1575, "step": 2775 }, { "epoch": 0.13, "grad_norm": 0.5260020648484066, "learning_rate": 1.9505679537342073e-05, "loss": 0.3128, "step": 2776 }, { "epoch": 0.13, "grad_norm": 1.5157984955215067, "learning_rate": 1.950521740848591e-05, "loss": 0.7835, "step": 2777 }, { "epoch": 0.13, "grad_norm": 0.4688222092875608, "learning_rate": 1.950475506919339e-05, "loss": 0.2621, "step": 2778 }, { "epoch": 0.13, "grad_norm": 0.48815828119810367, "learning_rate": 1.950429251947476e-05, "loss": 0.3263, "step": 2779 }, { "epoch": 0.13, "grad_norm": 0.7914354656834245, "learning_rate": 1.950382975934025e-05, "loss": 0.5288, "step": 2780 }, { "epoch": 0.13, "grad_norm": 0.35107923717638584, "learning_rate": 1.950336678880011e-05, "loss": 0.2091, "step": 2781 }, { "epoch": 0.13, "grad_norm": 0.4383983500720953, "learning_rate": 1.950290360786459e-05, "loss": 0.2193, "step": 2782 }, { "epoch": 0.13, "grad_norm": 0.6058417904690261, "learning_rate": 1.950244021654394e-05, "loss": 0.4111, "step": 2783 }, { "epoch": 0.13, "grad_norm": 0.9889919122225294, "learning_rate": 1.9501976614848425e-05, "loss": 0.5436, "step": 2784 }, { "epoch": 0.13, "grad_norm": 0.39388507729982825, "learning_rate": 1.9501512802788306e-05, "loss": 0.2278, "step": 2785 }, { "epoch": 0.13, "grad_norm": 0.4630320384460615, "learning_rate": 1.9501048780373853e-05, "loss": 0.3645, "step": 2786 }, { "epoch": 0.13, "grad_norm": 0.3185265786835577, "learning_rate": 1.9500584547615332e-05, "loss": 0.1688, "step": 2787 }, { "epoch": 0.13, "grad_norm": 0.3920268706444493, "learning_rate": 1.9500120104523027e-05, "loss": 0.2269, "step": 2788 }, { "epoch": 0.13, "grad_norm": 0.5784021965223299, "learning_rate": 1.9499655451107223e-05, "loss": 0.4243, "step": 2789 }, { "epoch": 0.13, "grad_norm": 0.6113989804919907, "learning_rate": 1.94991905873782e-05, "loss": 0.4376, "step": 2790 }, { "epoch": 0.13, "grad_norm": 0.3915034966628301, "learning_rate": 1.9498725513346254e-05, "loss": 0.3049, "step": 2791 }, { "epoch": 0.13, "grad_norm": 0.8975716931366301, "learning_rate": 1.9498260229021683e-05, "loss": 0.4373, "step": 2792 }, { "epoch": 0.13, "grad_norm": 0.2975147193454297, "learning_rate": 1.9497794734414782e-05, "loss": 0.2056, "step": 2793 }, { "epoch": 0.13, "grad_norm": 0.3666551769459927, "learning_rate": 1.949732902953586e-05, "loss": 0.2901, "step": 2794 }, { "epoch": 0.13, "grad_norm": 0.8456670959164763, "learning_rate": 1.9496863114395223e-05, "loss": 0.6062, "step": 2795 }, { "epoch": 0.13, "grad_norm": 0.739838159372834, "learning_rate": 1.9496396989003195e-05, "loss": 0.4867, "step": 2796 }, { "epoch": 0.13, "grad_norm": 0.4258264861736871, "learning_rate": 1.9495930653370088e-05, "loss": 0.3199, "step": 2797 }, { "epoch": 0.13, "grad_norm": 0.4784883452351987, "learning_rate": 1.949546410750623e-05, "loss": 0.297, "step": 2798 }, { "epoch": 0.13, "grad_norm": 0.43358302967145923, "learning_rate": 1.9494997351421946e-05, "loss": 0.2555, "step": 2799 }, { "epoch": 0.13, "grad_norm": 0.42473575408629693, "learning_rate": 1.9494530385127578e-05, "loss": 0.281, "step": 2800 }, { "epoch": 0.13, "grad_norm": 0.3931367615956359, "learning_rate": 1.949406320863345e-05, "loss": 0.2273, "step": 2801 }, { "epoch": 0.13, "grad_norm": 0.7312989154615723, "learning_rate": 1.949359582194992e-05, "loss": 0.4464, "step": 2802 }, { "epoch": 0.13, "grad_norm": 0.5037413164123241, "learning_rate": 1.9493128225087325e-05, "loss": 0.2646, "step": 2803 }, { "epoch": 0.13, "grad_norm": 0.7587385168440014, "learning_rate": 1.9492660418056023e-05, "loss": 0.4265, "step": 2804 }, { "epoch": 0.13, "grad_norm": 0.4501135382174161, "learning_rate": 1.9492192400866366e-05, "loss": 0.2954, "step": 2805 }, { "epoch": 0.13, "grad_norm": 0.48163449896690286, "learning_rate": 1.949172417352872e-05, "loss": 0.3377, "step": 2806 }, { "epoch": 0.13, "grad_norm": 0.39728511648694087, "learning_rate": 1.9491255736053448e-05, "loss": 0.2648, "step": 2807 }, { "epoch": 0.13, "grad_norm": 1.0107534819519373, "learning_rate": 1.9490787088450922e-05, "loss": 0.4283, "step": 2808 }, { "epoch": 0.13, "grad_norm": 0.3748556884861427, "learning_rate": 1.949031823073152e-05, "loss": 0.2419, "step": 2809 }, { "epoch": 0.13, "grad_norm": 0.5628163699959556, "learning_rate": 1.9489849162905613e-05, "loss": 0.3876, "step": 2810 }, { "epoch": 0.13, "grad_norm": 0.8488054922242871, "learning_rate": 1.9489379884983594e-05, "loss": 0.2922, "step": 2811 }, { "epoch": 0.13, "grad_norm": 0.4282767645109543, "learning_rate": 1.948891039697585e-05, "loss": 0.298, "step": 2812 }, { "epoch": 0.13, "grad_norm": 1.100961880061495, "learning_rate": 1.9488440698892777e-05, "loss": 0.5963, "step": 2813 }, { "epoch": 0.13, "grad_norm": 0.3958890772853788, "learning_rate": 1.9487970790744774e-05, "loss": 0.2841, "step": 2814 }, { "epoch": 0.13, "grad_norm": 0.31484311993082836, "learning_rate": 1.9487500672542242e-05, "loss": 0.234, "step": 2815 }, { "epoch": 0.13, "grad_norm": 1.480546280574842, "learning_rate": 1.9487030344295586e-05, "loss": 0.7548, "step": 2816 }, { "epoch": 0.13, "grad_norm": 0.5488601431227028, "learning_rate": 1.9486559806015223e-05, "loss": 0.3529, "step": 2817 }, { "epoch": 0.13, "grad_norm": 0.40819259012276216, "learning_rate": 1.948608905771157e-05, "loss": 0.2239, "step": 2818 }, { "epoch": 0.13, "grad_norm": 1.2813114906017016, "learning_rate": 1.948561809939505e-05, "loss": 0.6409, "step": 2819 }, { "epoch": 0.13, "grad_norm": 0.5388129318395684, "learning_rate": 1.948514693107608e-05, "loss": 0.3754, "step": 2820 }, { "epoch": 0.13, "grad_norm": 0.3285410010221218, "learning_rate": 1.9484675552765107e-05, "loss": 0.1574, "step": 2821 }, { "epoch": 0.13, "grad_norm": 0.5001823189301893, "learning_rate": 1.9484203964472558e-05, "loss": 0.3216, "step": 2822 }, { "epoch": 0.13, "grad_norm": 1.5421405736348646, "learning_rate": 1.948373216620887e-05, "loss": 0.6466, "step": 2823 }, { "epoch": 0.13, "grad_norm": 0.4583724636482592, "learning_rate": 1.9483260157984497e-05, "loss": 0.2094, "step": 2824 }, { "epoch": 0.13, "grad_norm": 0.5272388263955063, "learning_rate": 1.948278793980988e-05, "loss": 0.3917, "step": 2825 }, { "epoch": 0.13, "grad_norm": 0.5005979324059838, "learning_rate": 1.948231551169548e-05, "loss": 0.3848, "step": 2826 }, { "epoch": 0.13, "grad_norm": 0.24490559384719637, "learning_rate": 1.9481842873651752e-05, "loss": 0.1347, "step": 2827 }, { "epoch": 0.13, "grad_norm": 0.750757486712475, "learning_rate": 1.948137002568916e-05, "loss": 0.4101, "step": 2828 }, { "epoch": 0.13, "grad_norm": 0.528174893075401, "learning_rate": 1.9480896967818176e-05, "loss": 0.3684, "step": 2829 }, { "epoch": 0.13, "grad_norm": 0.5074187237832363, "learning_rate": 1.9480423700049275e-05, "loss": 0.294, "step": 2830 }, { "epoch": 0.13, "grad_norm": 0.4785170340488487, "learning_rate": 1.9479950222392925e-05, "loss": 0.3579, "step": 2831 }, { "epoch": 0.13, "grad_norm": 0.9110532536162825, "learning_rate": 1.9479476534859615e-05, "loss": 0.6253, "step": 2832 }, { "epoch": 0.13, "grad_norm": 0.328390340221259, "learning_rate": 1.9479002637459835e-05, "loss": 0.2467, "step": 2833 }, { "epoch": 0.13, "grad_norm": 0.3993954503399109, "learning_rate": 1.9478528530204068e-05, "loss": 0.2065, "step": 2834 }, { "epoch": 0.13, "grad_norm": 1.2971328917007663, "learning_rate": 1.9478054213102817e-05, "loss": 0.6084, "step": 2835 }, { "epoch": 0.13, "grad_norm": 0.6269204282202119, "learning_rate": 1.9477579686166578e-05, "loss": 0.3612, "step": 2836 }, { "epoch": 0.13, "grad_norm": 0.4722885581933981, "learning_rate": 1.9477104949405862e-05, "loss": 0.3153, "step": 2837 }, { "epoch": 0.13, "grad_norm": 0.5527156876296377, "learning_rate": 1.9476630002831175e-05, "loss": 0.4068, "step": 2838 }, { "epoch": 0.13, "grad_norm": 0.6674494946320816, "learning_rate": 1.9476154846453037e-05, "loss": 0.32, "step": 2839 }, { "epoch": 0.13, "grad_norm": 0.4067762750922892, "learning_rate": 1.947567948028196e-05, "loss": 0.1805, "step": 2840 }, { "epoch": 0.13, "grad_norm": 0.40555496929204754, "learning_rate": 1.9475203904328476e-05, "loss": 0.3547, "step": 2841 }, { "epoch": 0.13, "grad_norm": 0.6624134475721274, "learning_rate": 1.9474728118603107e-05, "loss": 0.3937, "step": 2842 }, { "epoch": 0.13, "grad_norm": 0.5252666646488344, "learning_rate": 1.9474252123116388e-05, "loss": 0.3821, "step": 2843 }, { "epoch": 0.13, "grad_norm": 1.146066423973684, "learning_rate": 1.9473775917878862e-05, "loss": 0.5262, "step": 2844 }, { "epoch": 0.13, "grad_norm": 0.3592800142084382, "learning_rate": 1.9473299502901065e-05, "loss": 0.2678, "step": 2845 }, { "epoch": 0.13, "grad_norm": 0.3227891643343318, "learning_rate": 1.947282287819355e-05, "loss": 0.2198, "step": 2846 }, { "epoch": 0.13, "grad_norm": 0.8967841621019016, "learning_rate": 1.9472346043766866e-05, "loss": 0.4268, "step": 2847 }, { "epoch": 0.13, "grad_norm": 0.6206643221196362, "learning_rate": 1.947186899963157e-05, "loss": 0.4109, "step": 2848 }, { "epoch": 0.13, "grad_norm": 0.4411361484433389, "learning_rate": 1.947139174579822e-05, "loss": 0.3295, "step": 2849 }, { "epoch": 0.13, "grad_norm": 0.5668312019195174, "learning_rate": 1.9470914282277387e-05, "loss": 0.3178, "step": 2850 }, { "epoch": 0.13, "grad_norm": 0.4546291005809553, "learning_rate": 1.9470436609079645e-05, "loss": 0.3065, "step": 2851 }, { "epoch": 0.13, "grad_norm": 0.29049155184985687, "learning_rate": 1.946995872621556e-05, "loss": 0.1896, "step": 2852 }, { "epoch": 0.13, "grad_norm": 0.4881503094894983, "learning_rate": 1.9469480633695715e-05, "loss": 0.3225, "step": 2853 }, { "epoch": 0.13, "grad_norm": 0.7418212927463746, "learning_rate": 1.9469002331530696e-05, "loss": 0.393, "step": 2854 }, { "epoch": 0.13, "grad_norm": 0.4980044292765687, "learning_rate": 1.9468523819731095e-05, "loss": 0.3537, "step": 2855 }, { "epoch": 0.13, "grad_norm": 1.2872589102002467, "learning_rate": 1.94680450983075e-05, "loss": 0.7791, "step": 2856 }, { "epoch": 0.13, "grad_norm": 0.44645215780859826, "learning_rate": 1.946756616727051e-05, "loss": 0.2753, "step": 2857 }, { "epoch": 0.13, "grad_norm": 0.2772434930065425, "learning_rate": 1.9467087026630733e-05, "loss": 0.2063, "step": 2858 }, { "epoch": 0.13, "grad_norm": 0.8195769010881503, "learning_rate": 1.9466607676398773e-05, "loss": 0.519, "step": 2859 }, { "epoch": 0.13, "grad_norm": 0.49380699577152537, "learning_rate": 1.9466128116585242e-05, "loss": 0.2066, "step": 2860 }, { "epoch": 0.13, "grad_norm": 0.35838641909007946, "learning_rate": 1.946564834720076e-05, "loss": 0.3095, "step": 2861 }, { "epoch": 0.13, "grad_norm": 1.5934028222744971, "learning_rate": 1.9465168368255946e-05, "loss": 0.8863, "step": 2862 }, { "epoch": 0.13, "grad_norm": 0.44995941036687864, "learning_rate": 1.946468817976143e-05, "loss": 0.233, "step": 2863 }, { "epoch": 0.13, "grad_norm": 0.4008155861561321, "learning_rate": 1.9464207781727837e-05, "loss": 0.3047, "step": 2864 }, { "epoch": 0.13, "grad_norm": 0.4961502799107277, "learning_rate": 1.9463727174165802e-05, "loss": 0.3546, "step": 2865 }, { "epoch": 0.13, "grad_norm": 0.29642968218164567, "learning_rate": 1.9463246357085973e-05, "loss": 0.1611, "step": 2866 }, { "epoch": 0.13, "grad_norm": 0.5524041298144406, "learning_rate": 1.946276533049899e-05, "loss": 0.3652, "step": 2867 }, { "epoch": 0.13, "grad_norm": 1.6191332874432969, "learning_rate": 1.94622840944155e-05, "loss": 0.8323, "step": 2868 }, { "epoch": 0.13, "grad_norm": 0.40941459468805647, "learning_rate": 1.9461802648846163e-05, "loss": 0.3313, "step": 2869 }, { "epoch": 0.13, "grad_norm": 0.47907680930525204, "learning_rate": 1.9461320993801633e-05, "loss": 0.2515, "step": 2870 }, { "epoch": 0.13, "grad_norm": 0.48112552535124037, "learning_rate": 1.9460839129292575e-05, "loss": 0.357, "step": 2871 }, { "epoch": 0.13, "grad_norm": 0.349387993020198, "learning_rate": 1.946035705532966e-05, "loss": 0.1902, "step": 2872 }, { "epoch": 0.13, "grad_norm": 0.44940603178533056, "learning_rate": 1.9459874771923556e-05, "loss": 0.2777, "step": 2873 }, { "epoch": 0.13, "grad_norm": 1.366761921031347, "learning_rate": 1.9459392279084942e-05, "loss": 0.8208, "step": 2874 }, { "epoch": 0.13, "grad_norm": 0.9086740026426672, "learning_rate": 1.94589095768245e-05, "loss": 0.5648, "step": 2875 }, { "epoch": 0.13, "grad_norm": 0.40287550340105055, "learning_rate": 1.9458426665152918e-05, "loss": 0.2746, "step": 2876 }, { "epoch": 0.13, "grad_norm": 0.39515145655812706, "learning_rate": 1.9457943544080883e-05, "loss": 0.3032, "step": 2877 }, { "epoch": 0.13, "grad_norm": 0.4553196784369032, "learning_rate": 1.9457460213619096e-05, "loss": 0.2111, "step": 2878 }, { "epoch": 0.13, "grad_norm": 0.441589637020675, "learning_rate": 1.945697667377825e-05, "loss": 0.2403, "step": 2879 }, { "epoch": 0.13, "grad_norm": 1.1382514092871128, "learning_rate": 1.9456492924569063e-05, "loss": 0.6977, "step": 2880 }, { "epoch": 0.13, "grad_norm": 0.43872769219695046, "learning_rate": 1.9456008966002235e-05, "loss": 0.3335, "step": 2881 }, { "epoch": 0.13, "grad_norm": 0.42143152021122965, "learning_rate": 1.945552479808848e-05, "loss": 0.3347, "step": 2882 }, { "epoch": 0.13, "grad_norm": 0.8543263808496883, "learning_rate": 1.9455040420838517e-05, "loss": 0.4054, "step": 2883 }, { "epoch": 0.13, "grad_norm": 0.3054398881733468, "learning_rate": 1.9454555834263077e-05, "loss": 0.2242, "step": 2884 }, { "epoch": 0.13, "grad_norm": 0.419589585139474, "learning_rate": 1.945407103837288e-05, "loss": 0.3164, "step": 2885 }, { "epoch": 0.13, "grad_norm": 0.9345784559852062, "learning_rate": 1.945358603317866e-05, "loss": 0.4571, "step": 2886 }, { "epoch": 0.13, "grad_norm": 0.7164887451346594, "learning_rate": 1.9453100818691162e-05, "loss": 0.4616, "step": 2887 }, { "epoch": 0.13, "grad_norm": 0.4635393380843925, "learning_rate": 1.9452615394921124e-05, "loss": 0.3126, "step": 2888 }, { "epoch": 0.13, "grad_norm": 0.4317265100389451, "learning_rate": 1.9452129761879287e-05, "loss": 0.3021, "step": 2889 }, { "epoch": 0.13, "grad_norm": 0.402334428219918, "learning_rate": 1.945164391957641e-05, "loss": 0.1945, "step": 2890 }, { "epoch": 0.13, "grad_norm": 0.47512840904404013, "learning_rate": 1.9451157868023244e-05, "loss": 0.2964, "step": 2891 }, { "epoch": 0.13, "grad_norm": 0.831216127013056, "learning_rate": 1.9450671607230555e-05, "loss": 0.3398, "step": 2892 }, { "epoch": 0.13, "grad_norm": 0.6826246846647532, "learning_rate": 1.94501851372091e-05, "loss": 0.4102, "step": 2893 }, { "epoch": 0.13, "grad_norm": 0.3846588778394741, "learning_rate": 1.944969845796966e-05, "loss": 0.3048, "step": 2894 }, { "epoch": 0.13, "grad_norm": 1.2903335362931123, "learning_rate": 1.9449211569523002e-05, "loss": 0.5959, "step": 2895 }, { "epoch": 0.13, "grad_norm": 0.3835317202047741, "learning_rate": 1.9448724471879905e-05, "loss": 0.233, "step": 2896 }, { "epoch": 0.13, "grad_norm": 0.4579775810180769, "learning_rate": 1.9448237165051155e-05, "loss": 0.2938, "step": 2897 }, { "epoch": 0.13, "grad_norm": 0.6574760559842552, "learning_rate": 1.944774964904754e-05, "loss": 0.3793, "step": 2898 }, { "epoch": 0.13, "grad_norm": 1.5577957221159746, "learning_rate": 1.9447261923879858e-05, "loss": 0.4172, "step": 2899 }, { "epoch": 0.13, "grad_norm": 0.47983638241508664, "learning_rate": 1.94467739895589e-05, "loss": 0.2951, "step": 2900 }, { "epoch": 0.13, "grad_norm": 0.535976341839264, "learning_rate": 1.944628584609547e-05, "loss": 0.3682, "step": 2901 }, { "epoch": 0.13, "grad_norm": 1.2942518610775824, "learning_rate": 1.9445797493500377e-05, "loss": 0.3474, "step": 2902 }, { "epoch": 0.13, "grad_norm": 0.4702898809117938, "learning_rate": 1.944530893178443e-05, "loss": 0.3526, "step": 2903 }, { "epoch": 0.13, "grad_norm": 0.8470772800967312, "learning_rate": 1.944482016095845e-05, "loss": 0.3839, "step": 2904 }, { "epoch": 0.13, "grad_norm": 0.6883578292978668, "learning_rate": 1.9444331181033253e-05, "loss": 0.3245, "step": 2905 }, { "epoch": 0.13, "grad_norm": 0.41948378401097774, "learning_rate": 1.9443841992019666e-05, "loss": 0.2088, "step": 2906 }, { "epoch": 0.13, "grad_norm": 0.9363271850852859, "learning_rate": 1.9443352593928518e-05, "loss": 0.5534, "step": 2907 }, { "epoch": 0.13, "grad_norm": 0.4654242006795171, "learning_rate": 1.9442862986770645e-05, "loss": 0.3661, "step": 2908 }, { "epoch": 0.13, "grad_norm": 0.4610474265065332, "learning_rate": 1.944237317055689e-05, "loss": 0.2211, "step": 2909 }, { "epoch": 0.13, "grad_norm": 0.7112185195432745, "learning_rate": 1.944188314529809e-05, "loss": 0.5284, "step": 2910 }, { "epoch": 0.13, "grad_norm": 0.32162694484719206, "learning_rate": 1.94413929110051e-05, "loss": 0.2361, "step": 2911 }, { "epoch": 0.13, "grad_norm": 0.40477685546820236, "learning_rate": 1.9440902467688772e-05, "loss": 0.2152, "step": 2912 }, { "epoch": 0.13, "grad_norm": 0.49410989293304114, "learning_rate": 1.9440411815359957e-05, "loss": 0.3489, "step": 2913 }, { "epoch": 0.13, "grad_norm": 1.3391030784347844, "learning_rate": 1.9439920954029527e-05, "loss": 0.7046, "step": 2914 }, { "epoch": 0.13, "grad_norm": 0.36208087905490566, "learning_rate": 1.9439429883708344e-05, "loss": 0.2545, "step": 2915 }, { "epoch": 0.13, "grad_norm": 0.4673203435459726, "learning_rate": 1.9438938604407283e-05, "loss": 0.4, "step": 2916 }, { "epoch": 0.13, "grad_norm": 0.4034777431903411, "learning_rate": 1.9438447116137218e-05, "loss": 0.2821, "step": 2917 }, { "epoch": 0.13, "grad_norm": 0.3745479288801289, "learning_rate": 1.943795541890903e-05, "loss": 0.2484, "step": 2918 }, { "epoch": 0.13, "grad_norm": 1.0478111204784013, "learning_rate": 1.9437463512733607e-05, "loss": 0.3924, "step": 2919 }, { "epoch": 0.13, "grad_norm": 0.5589443557700237, "learning_rate": 1.9436971397621834e-05, "loss": 0.3898, "step": 2920 }, { "epoch": 0.13, "grad_norm": 0.4728030809067571, "learning_rate": 1.9436479073584617e-05, "loss": 0.3024, "step": 2921 }, { "epoch": 0.13, "grad_norm": 0.6878128655053624, "learning_rate": 1.9435986540632843e-05, "loss": 0.4158, "step": 2922 }, { "epoch": 0.13, "grad_norm": 0.3528471135911017, "learning_rate": 1.943549379877742e-05, "loss": 0.1992, "step": 2923 }, { "epoch": 0.13, "grad_norm": 0.4735949154677886, "learning_rate": 1.943500084802926e-05, "loss": 0.284, "step": 2924 }, { "epoch": 0.13, "grad_norm": 0.4967112187595432, "learning_rate": 1.943450768839928e-05, "loss": 0.2843, "step": 2925 }, { "epoch": 0.13, "grad_norm": 0.8239924272925735, "learning_rate": 1.9434014319898383e-05, "loss": 0.5752, "step": 2926 }, { "epoch": 0.13, "grad_norm": 0.49230049065217124, "learning_rate": 1.943352074253751e-05, "loss": 0.2829, "step": 2927 }, { "epoch": 0.13, "grad_norm": 0.33377275743423607, "learning_rate": 1.9433026956327577e-05, "loss": 0.3189, "step": 2928 }, { "epoch": 0.13, "grad_norm": 1.3410700961867785, "learning_rate": 1.9432532961279516e-05, "loss": 0.7849, "step": 2929 }, { "epoch": 0.13, "grad_norm": 0.28759336199283464, "learning_rate": 1.943203875740427e-05, "loss": 0.1828, "step": 2930 }, { "epoch": 0.13, "grad_norm": 0.6866012706335546, "learning_rate": 1.9431544344712776e-05, "loss": 0.3896, "step": 2931 }, { "epoch": 0.13, "grad_norm": 0.5417934821151584, "learning_rate": 1.943104972321598e-05, "loss": 0.3065, "step": 2932 }, { "epoch": 0.13, "grad_norm": 0.4522012087546732, "learning_rate": 1.9430554892924834e-05, "loss": 0.3078, "step": 2933 }, { "epoch": 0.13, "grad_norm": 0.9097237924793209, "learning_rate": 1.9430059853850292e-05, "loss": 0.6023, "step": 2934 }, { "epoch": 0.13, "grad_norm": 0.3400763036064946, "learning_rate": 1.942956460600331e-05, "loss": 0.1673, "step": 2935 }, { "epoch": 0.13, "grad_norm": 0.42301433597288635, "learning_rate": 1.942906914939486e-05, "loss": 0.2607, "step": 2936 }, { "epoch": 0.13, "grad_norm": 0.562921678379757, "learning_rate": 1.9428573484035905e-05, "loss": 0.3279, "step": 2937 }, { "epoch": 0.13, "grad_norm": 0.845392382995151, "learning_rate": 1.9428077609937422e-05, "loss": 0.4339, "step": 2938 }, { "epoch": 0.14, "grad_norm": 0.43403781503553085, "learning_rate": 1.9427581527110387e-05, "loss": 0.2954, "step": 2939 }, { "epoch": 0.14, "grad_norm": 0.6488486027284759, "learning_rate": 1.942708523556578e-05, "loss": 0.3914, "step": 2940 }, { "epoch": 0.14, "grad_norm": 0.35372505871879095, "learning_rate": 1.9426588735314596e-05, "loss": 0.2445, "step": 2941 }, { "epoch": 0.14, "grad_norm": 0.43198754897761893, "learning_rate": 1.9426092026367822e-05, "loss": 0.2214, "step": 2942 }, { "epoch": 0.14, "grad_norm": 0.5302542697717891, "learning_rate": 1.9425595108736454e-05, "loss": 0.323, "step": 2943 }, { "epoch": 0.14, "grad_norm": 0.61066221031102, "learning_rate": 1.94250979824315e-05, "loss": 0.3537, "step": 2944 }, { "epoch": 0.14, "grad_norm": 0.5196896967586498, "learning_rate": 1.9424600647463955e-05, "loss": 0.2577, "step": 2945 }, { "epoch": 0.14, "grad_norm": 0.6336482565745153, "learning_rate": 1.9424103103844837e-05, "loss": 0.4593, "step": 2946 }, { "epoch": 0.14, "grad_norm": 0.9534744796712975, "learning_rate": 1.9423605351585157e-05, "loss": 0.5325, "step": 2947 }, { "epoch": 0.14, "grad_norm": 0.3405004762116534, "learning_rate": 1.9423107390695942e-05, "loss": 0.1878, "step": 2948 }, { "epoch": 0.14, "grad_norm": 0.39998822062285394, "learning_rate": 1.9422609221188208e-05, "loss": 0.3219, "step": 2949 }, { "epoch": 0.14, "grad_norm": 0.8719287307134982, "learning_rate": 1.9422110843072986e-05, "loss": 0.5167, "step": 2950 }, { "epoch": 0.14, "grad_norm": 0.43666795576944717, "learning_rate": 1.942161225636131e-05, "loss": 0.2342, "step": 2951 }, { "epoch": 0.14, "grad_norm": 0.4562447822500738, "learning_rate": 1.9421113461064226e-05, "loss": 0.3518, "step": 2952 }, { "epoch": 0.14, "grad_norm": 1.4931287653994332, "learning_rate": 1.942061445719277e-05, "loss": 0.7072, "step": 2953 }, { "epoch": 0.14, "grad_norm": 0.30232457338830604, "learning_rate": 1.9420115244757985e-05, "loss": 0.1788, "step": 2954 }, { "epoch": 0.14, "grad_norm": 0.5324271347870605, "learning_rate": 1.941961582377093e-05, "loss": 0.2682, "step": 2955 }, { "epoch": 0.14, "grad_norm": 0.4826746887337732, "learning_rate": 1.9419116194242655e-05, "loss": 0.3702, "step": 2956 }, { "epoch": 0.14, "grad_norm": 0.6661161353932512, "learning_rate": 1.9418616356184233e-05, "loss": 0.3446, "step": 2957 }, { "epoch": 0.14, "grad_norm": 0.5828021767397665, "learning_rate": 1.9418116309606717e-05, "loss": 0.3189, "step": 2958 }, { "epoch": 0.14, "grad_norm": 1.8387653815033052, "learning_rate": 1.9417616054521186e-05, "loss": 0.7676, "step": 2959 }, { "epoch": 0.14, "grad_norm": 0.38570063542358163, "learning_rate": 1.941711559093871e-05, "loss": 0.2972, "step": 2960 }, { "epoch": 0.14, "grad_norm": 0.40789276258751733, "learning_rate": 1.9416614918870372e-05, "loss": 0.2603, "step": 2961 }, { "epoch": 0.14, "grad_norm": 0.5091158507427102, "learning_rate": 1.9416114038327255e-05, "loss": 0.3391, "step": 2962 }, { "epoch": 0.14, "grad_norm": 1.0085101271785766, "learning_rate": 1.9415612949320453e-05, "loss": 0.5476, "step": 2963 }, { "epoch": 0.14, "grad_norm": 0.4459637566524281, "learning_rate": 1.9415111651861052e-05, "loss": 0.2719, "step": 2964 }, { "epoch": 0.14, "grad_norm": 1.1746904239405833, "learning_rate": 1.941461014596015e-05, "loss": 0.558, "step": 2965 }, { "epoch": 0.14, "grad_norm": 0.5812730136497452, "learning_rate": 1.9414108431628857e-05, "loss": 0.3659, "step": 2966 }, { "epoch": 0.14, "grad_norm": 0.48438880590568284, "learning_rate": 1.941360650887828e-05, "loss": 0.3337, "step": 2967 }, { "epoch": 0.14, "grad_norm": 0.4054303183908276, "learning_rate": 1.9413104377719523e-05, "loss": 0.2784, "step": 2968 }, { "epoch": 0.14, "grad_norm": 0.46448095576359333, "learning_rate": 1.941260203816371e-05, "loss": 0.2318, "step": 2969 }, { "epoch": 0.14, "grad_norm": 0.5127598735175937, "learning_rate": 1.941209949022196e-05, "loss": 0.3921, "step": 2970 }, { "epoch": 0.14, "grad_norm": 1.196186185364542, "learning_rate": 1.9411596733905393e-05, "loss": 0.4027, "step": 2971 }, { "epoch": 0.14, "grad_norm": 0.4135414056053703, "learning_rate": 1.941109376922515e-05, "loss": 0.3104, "step": 2972 }, { "epoch": 0.14, "grad_norm": 0.5123966751028497, "learning_rate": 1.9410590596192362e-05, "loss": 0.3943, "step": 2973 }, { "epoch": 0.14, "grad_norm": 0.3762696736388155, "learning_rate": 1.9410087214818167e-05, "loss": 0.2127, "step": 2974 }, { "epoch": 0.14, "grad_norm": 0.4995647581073475, "learning_rate": 1.940958362511371e-05, "loss": 0.3121, "step": 2975 }, { "epoch": 0.14, "grad_norm": 0.5240746054911626, "learning_rate": 1.9409079827090145e-05, "loss": 0.3458, "step": 2976 }, { "epoch": 0.14, "grad_norm": 0.5060787714269666, "learning_rate": 1.9408575820758616e-05, "loss": 0.3205, "step": 2977 }, { "epoch": 0.14, "grad_norm": 0.6420422982925267, "learning_rate": 1.9408071606130288e-05, "loss": 0.4106, "step": 2978 }, { "epoch": 0.14, "grad_norm": 0.4614270003903362, "learning_rate": 1.9407567183216323e-05, "loss": 0.3525, "step": 2979 }, { "epoch": 0.14, "grad_norm": 0.3361710807752136, "learning_rate": 1.9407062552027887e-05, "loss": 0.2336, "step": 2980 }, { "epoch": 0.14, "grad_norm": 0.7876959945937185, "learning_rate": 1.9406557712576154e-05, "loss": 0.4422, "step": 2981 }, { "epoch": 0.14, "grad_norm": 0.38380778398443377, "learning_rate": 1.94060526648723e-05, "loss": 0.2811, "step": 2982 }, { "epoch": 0.14, "grad_norm": 0.5302023483735993, "learning_rate": 1.9405547408927504e-05, "loss": 0.4274, "step": 2983 }, { "epoch": 0.14, "grad_norm": 0.46290824341095616, "learning_rate": 1.9405041944752953e-05, "loss": 0.3127, "step": 2984 }, { "epoch": 0.14, "grad_norm": 0.4634207737682182, "learning_rate": 1.9404536272359838e-05, "loss": 0.3202, "step": 2985 }, { "epoch": 0.14, "grad_norm": 0.5826415696599602, "learning_rate": 1.9404030391759353e-05, "loss": 0.3342, "step": 2986 }, { "epoch": 0.14, "grad_norm": 0.449570055496348, "learning_rate": 1.9403524302962703e-05, "loss": 0.2501, "step": 2987 }, { "epoch": 0.14, "grad_norm": 0.35044173197799666, "learning_rate": 1.9403018005981086e-05, "loss": 0.2813, "step": 2988 }, { "epoch": 0.14, "grad_norm": 1.0127999136447283, "learning_rate": 1.9402511500825712e-05, "loss": 0.5636, "step": 2989 }, { "epoch": 0.14, "grad_norm": 0.4692235448132522, "learning_rate": 1.9402004787507798e-05, "loss": 0.3376, "step": 2990 }, { "epoch": 0.14, "grad_norm": 0.48765783876564567, "learning_rate": 1.940149786603856e-05, "loss": 0.3464, "step": 2991 }, { "epoch": 0.14, "grad_norm": 0.5072996441214087, "learning_rate": 1.9400990736429217e-05, "loss": 0.3354, "step": 2992 }, { "epoch": 0.14, "grad_norm": 0.4573707719052176, "learning_rate": 1.9400483398691e-05, "loss": 0.2482, "step": 2993 }, { "epoch": 0.14, "grad_norm": 0.40323224385066714, "learning_rate": 1.9399975852835142e-05, "loss": 0.2523, "step": 2994 }, { "epoch": 0.14, "grad_norm": 0.5338930338356309, "learning_rate": 1.939946809887288e-05, "loss": 0.3419, "step": 2995 }, { "epoch": 0.14, "grad_norm": 0.5482413294478314, "learning_rate": 1.9398960136815454e-05, "loss": 0.3692, "step": 2996 }, { "epoch": 0.14, "grad_norm": 0.42729666056444005, "learning_rate": 1.9398451966674108e-05, "loss": 0.2581, "step": 2997 }, { "epoch": 0.14, "grad_norm": 1.1707424468587229, "learning_rate": 1.9397943588460094e-05, "loss": 0.7097, "step": 2998 }, { "epoch": 0.14, "grad_norm": 0.5667960415253117, "learning_rate": 1.9397435002184665e-05, "loss": 0.3675, "step": 2999 }, { "epoch": 0.14, "grad_norm": 0.38333966835026145, "learning_rate": 1.9396926207859085e-05, "loss": 0.2488, "step": 3000 }, { "epoch": 0.14, "grad_norm": 0.5120685900681801, "learning_rate": 1.9396417205494614e-05, "loss": 0.3642, "step": 3001 }, { "epoch": 0.14, "grad_norm": 0.4793709871047172, "learning_rate": 1.9395907995102524e-05, "loss": 0.2621, "step": 3002 }, { "epoch": 0.14, "grad_norm": 0.3926186664332781, "learning_rate": 1.9395398576694087e-05, "loss": 0.2368, "step": 3003 }, { "epoch": 0.14, "grad_norm": 0.5465883412686288, "learning_rate": 1.939488895028058e-05, "loss": 0.3192, "step": 3004 }, { "epoch": 0.14, "grad_norm": 1.3117216261122207, "learning_rate": 1.939437911587329e-05, "loss": 0.792, "step": 3005 }, { "epoch": 0.14, "grad_norm": 0.39981982058204735, "learning_rate": 1.9393869073483492e-05, "loss": 0.2712, "step": 3006 }, { "epoch": 0.14, "grad_norm": 0.5020979732613493, "learning_rate": 1.9393358823122496e-05, "loss": 0.3882, "step": 3007 }, { "epoch": 0.14, "grad_norm": 0.30887277093037674, "learning_rate": 1.9392848364801583e-05, "loss": 0.2223, "step": 3008 }, { "epoch": 0.14, "grad_norm": 0.46107887850317086, "learning_rate": 1.939233769853206e-05, "loss": 0.2854, "step": 3009 }, { "epoch": 0.14, "grad_norm": 1.1581240759842242, "learning_rate": 1.9391826824325238e-05, "loss": 0.4464, "step": 3010 }, { "epoch": 0.14, "grad_norm": 0.42789056194553304, "learning_rate": 1.939131574219242e-05, "loss": 0.3396, "step": 3011 }, { "epoch": 0.14, "grad_norm": 0.43944187394143236, "learning_rate": 1.9390804452144922e-05, "loss": 0.2942, "step": 3012 }, { "epoch": 0.14, "grad_norm": 1.0499571738476294, "learning_rate": 1.939029295419406e-05, "loss": 0.4391, "step": 3013 }, { "epoch": 0.14, "grad_norm": 0.3356106445064085, "learning_rate": 1.9389781248351168e-05, "loss": 0.2261, "step": 3014 }, { "epoch": 0.14, "grad_norm": 0.4533181388320035, "learning_rate": 1.938926933462757e-05, "loss": 0.2816, "step": 3015 }, { "epoch": 0.14, "grad_norm": 0.519220593449586, "learning_rate": 1.9388757213034596e-05, "loss": 0.2955, "step": 3016 }, { "epoch": 0.14, "grad_norm": 1.4025788590626416, "learning_rate": 1.9388244883583587e-05, "loss": 0.5466, "step": 3017 }, { "epoch": 0.14, "grad_norm": 0.4295631653910816, "learning_rate": 1.9387732346285885e-05, "loss": 0.2972, "step": 3018 }, { "epoch": 0.14, "grad_norm": 0.3803640394142232, "learning_rate": 1.9387219601152837e-05, "loss": 0.3173, "step": 3019 }, { "epoch": 0.14, "grad_norm": 0.38648590453852244, "learning_rate": 1.9386706648195793e-05, "loss": 0.2187, "step": 3020 }, { "epoch": 0.14, "grad_norm": 0.4419829471251474, "learning_rate": 1.938619348742611e-05, "loss": 0.2873, "step": 3021 }, { "epoch": 0.14, "grad_norm": 1.1436234691471576, "learning_rate": 1.9385680118855153e-05, "loss": 0.7067, "step": 3022 }, { "epoch": 0.14, "grad_norm": 0.7051584421199674, "learning_rate": 1.938516654249428e-05, "loss": 0.3338, "step": 3023 }, { "epoch": 0.14, "grad_norm": 0.501472317795451, "learning_rate": 1.9384652758354872e-05, "loss": 0.3227, "step": 3024 }, { "epoch": 0.14, "grad_norm": 1.0605457377255982, "learning_rate": 1.938413876644829e-05, "loss": 0.613, "step": 3025 }, { "epoch": 0.14, "grad_norm": 0.2598785475579807, "learning_rate": 1.9383624566785924e-05, "loss": 0.0994, "step": 3026 }, { "epoch": 0.14, "grad_norm": 0.4354805647913339, "learning_rate": 1.9383110159379154e-05, "loss": 0.2906, "step": 3027 }, { "epoch": 0.14, "grad_norm": 0.5142853282298973, "learning_rate": 1.938259554423937e-05, "loss": 0.3468, "step": 3028 }, { "epoch": 0.14, "grad_norm": 0.6778219088987136, "learning_rate": 1.938208072137796e-05, "loss": 0.3724, "step": 3029 }, { "epoch": 0.14, "grad_norm": 0.4504311081264537, "learning_rate": 1.9381565690806328e-05, "loss": 0.2896, "step": 3030 }, { "epoch": 0.14, "grad_norm": 0.5953598184661302, "learning_rate": 1.9381050452535873e-05, "loss": 0.4465, "step": 3031 }, { "epoch": 0.14, "grad_norm": 0.3231538294241132, "learning_rate": 1.9380535006578e-05, "loss": 0.2206, "step": 3032 }, { "epoch": 0.14, "grad_norm": 0.38436959237511403, "learning_rate": 1.9380019352944127e-05, "loss": 0.2183, "step": 3033 }, { "epoch": 0.14, "grad_norm": 0.7741207498064205, "learning_rate": 1.9379503491645666e-05, "loss": 0.5124, "step": 3034 }, { "epoch": 0.14, "grad_norm": 0.45391623262727476, "learning_rate": 1.9378987422694035e-05, "loss": 0.3435, "step": 3035 }, { "epoch": 0.14, "grad_norm": 0.37519019234806844, "learning_rate": 1.9378471146100662e-05, "loss": 0.2625, "step": 3036 }, { "epoch": 0.14, "grad_norm": 0.9484861915160815, "learning_rate": 1.937795466187698e-05, "loss": 0.671, "step": 3037 }, { "epoch": 0.14, "grad_norm": 0.3412270275660992, "learning_rate": 1.937743797003442e-05, "loss": 0.1877, "step": 3038 }, { "epoch": 0.14, "grad_norm": 0.336172842286069, "learning_rate": 1.937692107058442e-05, "loss": 0.2427, "step": 3039 }, { "epoch": 0.14, "grad_norm": 0.581892218083669, "learning_rate": 1.9376403963538424e-05, "loss": 0.3541, "step": 3040 }, { "epoch": 0.14, "grad_norm": 1.0103575848048958, "learning_rate": 1.9375886648907882e-05, "loss": 0.5249, "step": 3041 }, { "epoch": 0.14, "grad_norm": 0.3988257348087596, "learning_rate": 1.937536912670425e-05, "loss": 0.2335, "step": 3042 }, { "epoch": 0.14, "grad_norm": 0.6395909430725215, "learning_rate": 1.9374851396938978e-05, "loss": 0.3624, "step": 3043 }, { "epoch": 0.14, "grad_norm": 0.43720429592972626, "learning_rate": 1.9374333459623532e-05, "loss": 0.2892, "step": 3044 }, { "epoch": 0.14, "grad_norm": 0.35300830278020523, "learning_rate": 1.9373815314769375e-05, "loss": 0.2373, "step": 3045 }, { "epoch": 0.14, "grad_norm": 1.1798288142491746, "learning_rate": 1.9373296962387988e-05, "loss": 0.4301, "step": 3046 }, { "epoch": 0.14, "grad_norm": 0.452486458373176, "learning_rate": 1.9372778402490834e-05, "loss": 0.3841, "step": 3047 }, { "epoch": 0.14, "grad_norm": 0.726923586227702, "learning_rate": 1.9372259635089405e-05, "loss": 0.3832, "step": 3048 }, { "epoch": 0.14, "grad_norm": 0.5641466893554887, "learning_rate": 1.9371740660195178e-05, "loss": 0.2882, "step": 3049 }, { "epoch": 0.14, "grad_norm": 0.36370897264492574, "learning_rate": 1.9371221477819647e-05, "loss": 0.2267, "step": 3050 }, { "epoch": 0.14, "grad_norm": 0.515165584941363, "learning_rate": 1.9370702087974302e-05, "loss": 0.2964, "step": 3051 }, { "epoch": 0.14, "grad_norm": 0.9801416042731813, "learning_rate": 1.9370182490670645e-05, "loss": 0.3482, "step": 3052 }, { "epoch": 0.14, "grad_norm": 1.6724616429235706, "learning_rate": 1.936966268592018e-05, "loss": 0.7983, "step": 3053 }, { "epoch": 0.14, "grad_norm": 0.4570869717991394, "learning_rate": 1.9369142673734416e-05, "loss": 0.3219, "step": 3054 }, { "epoch": 0.14, "grad_norm": 0.412349700555376, "learning_rate": 1.9368622454124863e-05, "loss": 0.3023, "step": 3055 }, { "epoch": 0.14, "grad_norm": 1.194767227056874, "learning_rate": 1.9368102027103032e-05, "loss": 0.6455, "step": 3056 }, { "epoch": 0.14, "grad_norm": 0.35822342598006096, "learning_rate": 1.9367581392680458e-05, "loss": 0.2533, "step": 3057 }, { "epoch": 0.14, "grad_norm": 0.6267124207515261, "learning_rate": 1.9367060550868657e-05, "loss": 0.4291, "step": 3058 }, { "epoch": 0.14, "grad_norm": 0.38445614554466684, "learning_rate": 1.9366539501679165e-05, "loss": 0.2431, "step": 3059 }, { "epoch": 0.14, "grad_norm": 0.49512261636507643, "learning_rate": 1.9366018245123515e-05, "loss": 0.2905, "step": 3060 }, { "epoch": 0.14, "grad_norm": 1.4357130953093995, "learning_rate": 1.9365496781213248e-05, "loss": 0.7668, "step": 3061 }, { "epoch": 0.14, "grad_norm": 0.5340813233536238, "learning_rate": 1.9364975109959913e-05, "loss": 0.3313, "step": 3062 }, { "epoch": 0.14, "grad_norm": 0.44446208227026246, "learning_rate": 1.9364453231375048e-05, "loss": 0.2806, "step": 3063 }, { "epoch": 0.14, "grad_norm": 0.5316307284252908, "learning_rate": 1.9363931145470222e-05, "loss": 0.401, "step": 3064 }, { "epoch": 0.14, "grad_norm": 0.28721554476489247, "learning_rate": 1.936340885225698e-05, "loss": 0.1263, "step": 3065 }, { "epoch": 0.14, "grad_norm": 0.749450635299404, "learning_rate": 1.936288635174689e-05, "loss": 0.3665, "step": 3066 }, { "epoch": 0.14, "grad_norm": 0.4376586622221232, "learning_rate": 1.9362363643951524e-05, "loss": 0.3126, "step": 3067 }, { "epoch": 0.14, "grad_norm": 0.8901577863002759, "learning_rate": 1.9361840728882447e-05, "loss": 0.4311, "step": 3068 }, { "epoch": 0.14, "grad_norm": 0.6894238174113327, "learning_rate": 1.936131760655124e-05, "loss": 0.3789, "step": 3069 }, { "epoch": 0.14, "grad_norm": 0.4774387998226648, "learning_rate": 1.9360794276969487e-05, "loss": 0.2726, "step": 3070 }, { "epoch": 0.14, "grad_norm": 0.5593482846428411, "learning_rate": 1.9360270740148766e-05, "loss": 0.2847, "step": 3071 }, { "epoch": 0.14, "grad_norm": 0.39694825605655887, "learning_rate": 1.9359746996100676e-05, "loss": 0.2226, "step": 3072 }, { "epoch": 0.14, "grad_norm": 0.603305899650726, "learning_rate": 1.9359223044836807e-05, "loss": 0.3926, "step": 3073 }, { "epoch": 0.14, "grad_norm": 1.0420300824190283, "learning_rate": 1.935869888636876e-05, "loss": 0.5284, "step": 3074 }, { "epoch": 0.14, "grad_norm": 0.41835801848217313, "learning_rate": 1.935817452070814e-05, "loss": 0.266, "step": 3075 }, { "epoch": 0.14, "grad_norm": 0.5765255494226064, "learning_rate": 1.9357649947866558e-05, "loss": 0.4027, "step": 3076 }, { "epoch": 0.14, "grad_norm": 0.5960438968910211, "learning_rate": 1.935712516785562e-05, "loss": 0.2547, "step": 3077 }, { "epoch": 0.14, "grad_norm": 0.3032855485650015, "learning_rate": 1.9356600180686954e-05, "loss": 0.1758, "step": 3078 }, { "epoch": 0.14, "grad_norm": 0.45961567964926103, "learning_rate": 1.9356074986372176e-05, "loss": 0.3327, "step": 3079 }, { "epoch": 0.14, "grad_norm": 1.1170804601775841, "learning_rate": 1.9355549584922917e-05, "loss": 0.5158, "step": 3080 }, { "epoch": 0.14, "grad_norm": 0.4279031917199713, "learning_rate": 1.9355023976350805e-05, "loss": 0.2418, "step": 3081 }, { "epoch": 0.14, "grad_norm": 0.5915021649071351, "learning_rate": 1.935449816066748e-05, "loss": 0.4054, "step": 3082 }, { "epoch": 0.14, "grad_norm": 0.3735567539083631, "learning_rate": 1.935397213788458e-05, "loss": 0.2869, "step": 3083 }, { "epoch": 0.14, "grad_norm": 1.0356493275063283, "learning_rate": 1.9353445908013756e-05, "loss": 0.602, "step": 3084 }, { "epoch": 0.14, "grad_norm": 0.42929026095302564, "learning_rate": 1.935291947106665e-05, "loss": 0.236, "step": 3085 }, { "epoch": 0.14, "grad_norm": 0.868352016420955, "learning_rate": 1.9352392827054926e-05, "loss": 0.4177, "step": 3086 }, { "epoch": 0.14, "grad_norm": 0.626643714235681, "learning_rate": 1.9351865975990235e-05, "loss": 0.3575, "step": 3087 }, { "epoch": 0.14, "grad_norm": 0.43976400904505125, "learning_rate": 1.9351338917884247e-05, "loss": 0.2362, "step": 3088 }, { "epoch": 0.14, "grad_norm": 1.2571203273599227, "learning_rate": 1.9350811652748625e-05, "loss": 0.7756, "step": 3089 }, { "epoch": 0.14, "grad_norm": 0.39479254298439054, "learning_rate": 1.935028418059505e-05, "loss": 0.2748, "step": 3090 }, { "epoch": 0.14, "grad_norm": 0.38658171780407424, "learning_rate": 1.9349756501435192e-05, "loss": 0.2367, "step": 3091 }, { "epoch": 0.14, "grad_norm": 0.8964361605996685, "learning_rate": 1.9349228615280736e-05, "loss": 0.5108, "step": 3092 }, { "epoch": 0.14, "grad_norm": 0.5883277176850754, "learning_rate": 1.9348700522143374e-05, "loss": 0.3627, "step": 3093 }, { "epoch": 0.14, "grad_norm": 0.6356564615886784, "learning_rate": 1.934817222203479e-05, "loss": 0.2501, "step": 3094 }, { "epoch": 0.14, "grad_norm": 0.513464236082426, "learning_rate": 1.9347643714966682e-05, "loss": 0.3859, "step": 3095 }, { "epoch": 0.14, "grad_norm": 0.7817588579059302, "learning_rate": 1.934711500095075e-05, "loss": 0.4073, "step": 3096 }, { "epoch": 0.14, "grad_norm": 0.5613951357956724, "learning_rate": 1.9346586079998705e-05, "loss": 0.3567, "step": 3097 }, { "epoch": 0.14, "grad_norm": 0.4103688167604076, "learning_rate": 1.934605695212225e-05, "loss": 0.2424, "step": 3098 }, { "epoch": 0.14, "grad_norm": 0.5219125871738478, "learning_rate": 1.9345527617333103e-05, "loss": 0.3449, "step": 3099 }, { "epoch": 0.14, "grad_norm": 0.5713456411238049, "learning_rate": 1.934499807564298e-05, "loss": 0.3535, "step": 3100 }, { "epoch": 0.14, "grad_norm": 0.9670463387595841, "learning_rate": 1.934446832706361e-05, "loss": 0.5215, "step": 3101 }, { "epoch": 0.14, "grad_norm": 0.7128032153454966, "learning_rate": 1.9343938371606714e-05, "loss": 0.4638, "step": 3102 }, { "epoch": 0.14, "grad_norm": 0.5117238957593905, "learning_rate": 1.934340820928403e-05, "loss": 0.2961, "step": 3103 }, { "epoch": 0.14, "grad_norm": 0.3838912001935049, "learning_rate": 1.9342877840107292e-05, "loss": 0.2009, "step": 3104 }, { "epoch": 0.14, "grad_norm": 0.9074265473426006, "learning_rate": 1.9342347264088245e-05, "loss": 0.4875, "step": 3105 }, { "epoch": 0.14, "grad_norm": 0.5009958674211009, "learning_rate": 1.934181648123863e-05, "loss": 0.3176, "step": 3106 }, { "epoch": 0.14, "grad_norm": 0.4623723583297876, "learning_rate": 1.9341285491570207e-05, "loss": 0.3031, "step": 3107 }, { "epoch": 0.14, "grad_norm": 0.7211731899186699, "learning_rate": 1.9340754295094726e-05, "loss": 0.4879, "step": 3108 }, { "epoch": 0.14, "grad_norm": 0.4307073952641585, "learning_rate": 1.9340222891823943e-05, "loss": 0.3471, "step": 3109 }, { "epoch": 0.14, "grad_norm": 0.6207881965037696, "learning_rate": 1.9339691281769632e-05, "loss": 0.3446, "step": 3110 }, { "epoch": 0.14, "grad_norm": 0.3605653062959438, "learning_rate": 1.9339159464943558e-05, "loss": 0.2151, "step": 3111 }, { "epoch": 0.14, "grad_norm": 0.48203588067908704, "learning_rate": 1.933862744135749e-05, "loss": 0.317, "step": 3112 }, { "epoch": 0.14, "grad_norm": 1.5983382295056547, "learning_rate": 1.9338095211023216e-05, "loss": 0.8199, "step": 3113 }, { "epoch": 0.14, "grad_norm": 0.6175340310973247, "learning_rate": 1.9337562773952512e-05, "loss": 0.2933, "step": 3114 }, { "epoch": 0.14, "grad_norm": 0.47368056372614004, "learning_rate": 1.9337030130157166e-05, "loss": 0.3058, "step": 3115 }, { "epoch": 0.14, "grad_norm": 0.47135945769868387, "learning_rate": 1.9336497279648977e-05, "loss": 0.347, "step": 3116 }, { "epoch": 0.14, "grad_norm": 0.27242105592464244, "learning_rate": 1.9335964222439733e-05, "loss": 0.1132, "step": 3117 }, { "epoch": 0.14, "grad_norm": 0.5139362719838799, "learning_rate": 1.933543095854124e-05, "loss": 0.3274, "step": 3118 }, { "epoch": 0.14, "grad_norm": 0.5148103179931479, "learning_rate": 1.93348974879653e-05, "loss": 0.3557, "step": 3119 }, { "epoch": 0.14, "grad_norm": 1.0088769052468993, "learning_rate": 1.9334363810723733e-05, "loss": 0.434, "step": 3120 }, { "epoch": 0.14, "grad_norm": 0.47735853819042645, "learning_rate": 1.9333829926828347e-05, "loss": 0.2941, "step": 3121 }, { "epoch": 0.14, "grad_norm": 0.4882950644129855, "learning_rate": 1.933329583629096e-05, "loss": 0.3453, "step": 3122 }, { "epoch": 0.14, "grad_norm": 0.35974849404424575, "learning_rate": 1.93327615391234e-05, "loss": 0.2024, "step": 3123 }, { "epoch": 0.14, "grad_norm": 0.41255318366801796, "learning_rate": 1.933222703533749e-05, "loss": 0.2407, "step": 3124 }, { "epoch": 0.14, "grad_norm": 1.1115162413354707, "learning_rate": 1.9331692324945072e-05, "loss": 0.5387, "step": 3125 }, { "epoch": 0.14, "grad_norm": 0.553485779764722, "learning_rate": 1.933115740795798e-05, "loss": 0.3811, "step": 3126 }, { "epoch": 0.14, "grad_norm": 0.38948878861094655, "learning_rate": 1.9330622284388057e-05, "loss": 0.2385, "step": 3127 }, { "epoch": 0.14, "grad_norm": 1.2777550969012572, "learning_rate": 1.933008695424715e-05, "loss": 0.696, "step": 3128 }, { "epoch": 0.14, "grad_norm": 0.37741015210642403, "learning_rate": 1.932955141754711e-05, "loss": 0.175, "step": 3129 }, { "epoch": 0.14, "grad_norm": 0.40205784396575917, "learning_rate": 1.932901567429979e-05, "loss": 0.2546, "step": 3130 }, { "epoch": 0.14, "grad_norm": 0.6602560551868947, "learning_rate": 1.9328479724517057e-05, "loss": 0.4296, "step": 3131 }, { "epoch": 0.14, "grad_norm": 1.1272302028814996, "learning_rate": 1.9327943568210775e-05, "loss": 0.5583, "step": 3132 }, { "epoch": 0.14, "grad_norm": 0.42758808120611264, "learning_rate": 1.932740720539281e-05, "loss": 0.2219, "step": 3133 }, { "epoch": 0.14, "grad_norm": 0.5187066250663277, "learning_rate": 1.9326870636075042e-05, "loss": 0.3824, "step": 3134 }, { "epoch": 0.14, "grad_norm": 0.39098493002892803, "learning_rate": 1.9326333860269347e-05, "loss": 0.1976, "step": 3135 }, { "epoch": 0.14, "grad_norm": 0.46286981207879774, "learning_rate": 1.9325796877987607e-05, "loss": 0.3069, "step": 3136 }, { "epoch": 0.14, "grad_norm": 1.1093022522423166, "learning_rate": 1.9325259689241714e-05, "loss": 0.4565, "step": 3137 }, { "epoch": 0.14, "grad_norm": 0.5166068260655114, "learning_rate": 1.932472229404356e-05, "loss": 0.3738, "step": 3138 }, { "epoch": 0.14, "grad_norm": 0.45517193194226285, "learning_rate": 1.932418469240504e-05, "loss": 0.2971, "step": 3139 }, { "epoch": 0.14, "grad_norm": 0.8685505899577974, "learning_rate": 1.932364688433806e-05, "loss": 0.4191, "step": 3140 }, { "epoch": 0.14, "grad_norm": 0.334812296149803, "learning_rate": 1.9323108869854522e-05, "loss": 0.1751, "step": 3141 }, { "epoch": 0.14, "grad_norm": 0.48383810120579757, "learning_rate": 1.932257064896634e-05, "loss": 0.3219, "step": 3142 }, { "epoch": 0.14, "grad_norm": 0.5473558823070933, "learning_rate": 1.932203222168543e-05, "loss": 0.3468, "step": 3143 }, { "epoch": 0.14, "grad_norm": 1.3030465460043923, "learning_rate": 1.932149358802371e-05, "loss": 0.9103, "step": 3144 }, { "epoch": 0.14, "grad_norm": 0.4264402447689228, "learning_rate": 1.9320954747993106e-05, "loss": 0.2879, "step": 3145 }, { "epoch": 0.14, "grad_norm": 0.5747687122894152, "learning_rate": 1.932041570160555e-05, "loss": 0.3869, "step": 3146 }, { "epoch": 0.14, "grad_norm": 0.3499749620250195, "learning_rate": 1.931987644887297e-05, "loss": 0.2106, "step": 3147 }, { "epoch": 0.14, "grad_norm": 0.3958265029799334, "learning_rate": 1.931933698980731e-05, "loss": 0.3117, "step": 3148 }, { "epoch": 0.14, "grad_norm": 0.727048942262723, "learning_rate": 1.931879732442051e-05, "loss": 0.5368, "step": 3149 }, { "epoch": 0.14, "grad_norm": 0.5044311380775349, "learning_rate": 1.931825745272452e-05, "loss": 0.3228, "step": 3150 }, { "epoch": 0.14, "grad_norm": 0.5023875892921424, "learning_rate": 1.9317717374731292e-05, "loss": 0.3057, "step": 3151 }, { "epoch": 0.14, "grad_norm": 0.7016889597521573, "learning_rate": 1.931717709045278e-05, "loss": 0.3991, "step": 3152 }, { "epoch": 0.14, "grad_norm": 0.40331104420299063, "learning_rate": 1.9316636599900947e-05, "loss": 0.3522, "step": 3153 }, { "epoch": 0.14, "grad_norm": 0.5241706320398022, "learning_rate": 1.931609590308776e-05, "loss": 0.3225, "step": 3154 }, { "epoch": 0.14, "grad_norm": 0.3740078675819883, "learning_rate": 1.931555500002519e-05, "loss": 0.2415, "step": 3155 }, { "epoch": 0.14, "grad_norm": 1.1405798232241413, "learning_rate": 1.931501389072521e-05, "loss": 0.586, "step": 3156 }, { "epoch": 0.15, "grad_norm": 0.49033640783815846, "learning_rate": 1.93144725751998e-05, "loss": 0.2898, "step": 3157 }, { "epoch": 0.15, "grad_norm": 0.4694372411955041, "learning_rate": 1.931393105346094e-05, "loss": 0.3511, "step": 3158 }, { "epoch": 0.15, "grad_norm": 1.0157328287272502, "learning_rate": 1.931338932552063e-05, "loss": 0.503, "step": 3159 }, { "epoch": 0.15, "grad_norm": 0.44054732427689425, "learning_rate": 1.9312847391390853e-05, "loss": 0.2743, "step": 3160 }, { "epoch": 0.15, "grad_norm": 0.45787188658909833, "learning_rate": 1.9312305251083613e-05, "loss": 0.3205, "step": 3161 }, { "epoch": 0.15, "grad_norm": 0.47298131094982027, "learning_rate": 1.9311762904610913e-05, "loss": 0.3438, "step": 3162 }, { "epoch": 0.15, "grad_norm": 0.3221861595436087, "learning_rate": 1.931122035198475e-05, "loss": 0.1875, "step": 3163 }, { "epoch": 0.15, "grad_norm": 1.4479287194271593, "learning_rate": 1.9310677593217148e-05, "loss": 0.6514, "step": 3164 }, { "epoch": 0.15, "grad_norm": 0.7957697410146628, "learning_rate": 1.9310134628320116e-05, "loss": 0.4793, "step": 3165 }, { "epoch": 0.15, "grad_norm": 0.5345584119848411, "learning_rate": 1.9309591457305673e-05, "loss": 0.2487, "step": 3166 }, { "epoch": 0.15, "grad_norm": 0.5586270857642788, "learning_rate": 1.930904808018585e-05, "loss": 0.2693, "step": 3167 }, { "epoch": 0.15, "grad_norm": 0.5025095582070777, "learning_rate": 1.9308504496972676e-05, "loss": 0.3158, "step": 3168 }, { "epoch": 0.15, "grad_norm": 0.4276138329993246, "learning_rate": 1.9307960707678185e-05, "loss": 0.1915, "step": 3169 }, { "epoch": 0.15, "grad_norm": 0.39186645826195876, "learning_rate": 1.9307416712314415e-05, "loss": 0.341, "step": 3170 }, { "epoch": 0.15, "grad_norm": 0.807479002631538, "learning_rate": 1.9306872510893407e-05, "loss": 0.4967, "step": 3171 }, { "epoch": 0.15, "grad_norm": 0.7918766368098512, "learning_rate": 1.9306328103427214e-05, "loss": 0.3508, "step": 3172 }, { "epoch": 0.15, "grad_norm": 0.4446293951684391, "learning_rate": 1.9305783489927886e-05, "loss": 0.2895, "step": 3173 }, { "epoch": 0.15, "grad_norm": 0.4767187685024565, "learning_rate": 1.930523867040748e-05, "loss": 0.3787, "step": 3174 }, { "epoch": 0.15, "grad_norm": 0.40019431117495385, "learning_rate": 1.9304693644878057e-05, "loss": 0.1505, "step": 3175 }, { "epoch": 0.15, "grad_norm": 0.4455918680395822, "learning_rate": 1.9304148413351687e-05, "loss": 0.3268, "step": 3176 }, { "epoch": 0.15, "grad_norm": 0.8683809062909452, "learning_rate": 1.9303602975840436e-05, "loss": 0.5015, "step": 3177 }, { "epoch": 0.15, "grad_norm": 0.39139701711935504, "learning_rate": 1.9303057332356386e-05, "loss": 0.2954, "step": 3178 }, { "epoch": 0.15, "grad_norm": 0.5558429432628517, "learning_rate": 1.930251148291161e-05, "loss": 0.2984, "step": 3179 }, { "epoch": 0.15, "grad_norm": 0.5010111188600969, "learning_rate": 1.9301965427518196e-05, "loss": 0.3111, "step": 3180 }, { "epoch": 0.15, "grad_norm": 0.34024890323423984, "learning_rate": 1.9301419166188237e-05, "loss": 0.2288, "step": 3181 }, { "epoch": 0.15, "grad_norm": 0.4033983913869017, "learning_rate": 1.9300872698933814e-05, "loss": 0.286, "step": 3182 }, { "epoch": 0.15, "grad_norm": 0.8657700252769647, "learning_rate": 1.930032602576704e-05, "loss": 0.5075, "step": 3183 }, { "epoch": 0.15, "grad_norm": 0.49056725907574783, "learning_rate": 1.929977914670001e-05, "loss": 0.3039, "step": 3184 }, { "epoch": 0.15, "grad_norm": 0.6186625125551448, "learning_rate": 1.9299232061744833e-05, "loss": 0.3722, "step": 3185 }, { "epoch": 0.15, "grad_norm": 0.4533090921206725, "learning_rate": 1.929868477091362e-05, "loss": 0.3045, "step": 3186 }, { "epoch": 0.15, "grad_norm": 0.3303742698411355, "learning_rate": 1.929813727421849e-05, "loss": 0.2245, "step": 3187 }, { "epoch": 0.15, "grad_norm": 0.6080446515393702, "learning_rate": 1.929758957167156e-05, "loss": 0.3689, "step": 3188 }, { "epoch": 0.15, "grad_norm": 0.3923103014251482, "learning_rate": 1.9297041663284962e-05, "loss": 0.2681, "step": 3189 }, { "epoch": 0.15, "grad_norm": 0.7392780901983406, "learning_rate": 1.929649354907082e-05, "loss": 0.3038, "step": 3190 }, { "epoch": 0.15, "grad_norm": 0.5710596378624694, "learning_rate": 1.9295945229041272e-05, "loss": 0.3741, "step": 3191 }, { "epoch": 0.15, "grad_norm": 1.0158258086576393, "learning_rate": 1.9295396703208454e-05, "loss": 0.479, "step": 3192 }, { "epoch": 0.15, "grad_norm": 0.5678705213468687, "learning_rate": 1.9294847971584513e-05, "loss": 0.3769, "step": 3193 }, { "epoch": 0.15, "grad_norm": 0.4670509314420693, "learning_rate": 1.9294299034181594e-05, "loss": 0.3384, "step": 3194 }, { "epoch": 0.15, "grad_norm": 0.27623614490639964, "learning_rate": 1.9293749891011855e-05, "loss": 0.1386, "step": 3195 }, { "epoch": 0.15, "grad_norm": 0.7682915154555132, "learning_rate": 1.9293200542087453e-05, "loss": 0.3626, "step": 3196 }, { "epoch": 0.15, "grad_norm": 0.5279769404968618, "learning_rate": 1.9292650987420543e-05, "loss": 0.3554, "step": 3197 }, { "epoch": 0.15, "grad_norm": 0.5299545096333286, "learning_rate": 1.92921012270233e-05, "loss": 0.3284, "step": 3198 }, { "epoch": 0.15, "grad_norm": 0.4617630524083326, "learning_rate": 1.929155126090789e-05, "loss": 0.2786, "step": 3199 }, { "epoch": 0.15, "grad_norm": 0.5454302394682277, "learning_rate": 1.929100108908649e-05, "loss": 0.4754, "step": 3200 }, { "epoch": 0.15, "grad_norm": 0.4338124441517534, "learning_rate": 1.9290450711571282e-05, "loss": 0.2396, "step": 3201 }, { "epoch": 0.15, "grad_norm": 0.37686532247718624, "learning_rate": 1.9289900128374446e-05, "loss": 0.2331, "step": 3202 }, { "epoch": 0.15, "grad_norm": 0.5891473713972789, "learning_rate": 1.9289349339508182e-05, "loss": 0.4028, "step": 3203 }, { "epoch": 0.15, "grad_norm": 0.7410257508227323, "learning_rate": 1.9288798344984673e-05, "loss": 0.5607, "step": 3204 }, { "epoch": 0.15, "grad_norm": 0.38872939624007946, "learning_rate": 1.928824714481612e-05, "loss": 0.2891, "step": 3205 }, { "epoch": 0.15, "grad_norm": 0.43577287495685746, "learning_rate": 1.928769573901473e-05, "loss": 0.3048, "step": 3206 }, { "epoch": 0.15, "grad_norm": 0.36404204777898613, "learning_rate": 1.9287144127592704e-05, "loss": 0.2077, "step": 3207 }, { "epoch": 0.15, "grad_norm": 0.5096703451805814, "learning_rate": 1.928659231056226e-05, "loss": 0.1539, "step": 3208 }, { "epoch": 0.15, "grad_norm": 0.5258365712449604, "learning_rate": 1.9286040287935614e-05, "loss": 0.3614, "step": 3209 }, { "epoch": 0.15, "grad_norm": 0.4864640441382751, "learning_rate": 1.9285488059724985e-05, "loss": 0.3875, "step": 3210 }, { "epoch": 0.15, "grad_norm": 0.6650448955162372, "learning_rate": 1.92849356259426e-05, "loss": 0.4279, "step": 3211 }, { "epoch": 0.15, "grad_norm": 0.42961559333754157, "learning_rate": 1.9284382986600692e-05, "loss": 0.2781, "step": 3212 }, { "epoch": 0.15, "grad_norm": 0.35629413293473033, "learning_rate": 1.9283830141711487e-05, "loss": 0.1621, "step": 3213 }, { "epoch": 0.15, "grad_norm": 0.47219871543789527, "learning_rate": 1.9283277091287237e-05, "loss": 0.3068, "step": 3214 }, { "epoch": 0.15, "grad_norm": 0.44318002424973796, "learning_rate": 1.9282723835340173e-05, "loss": 0.272, "step": 3215 }, { "epoch": 0.15, "grad_norm": 0.8280396135063036, "learning_rate": 1.9282170373882553e-05, "loss": 0.4922, "step": 3216 }, { "epoch": 0.15, "grad_norm": 0.4185485193047068, "learning_rate": 1.9281616706926632e-05, "loss": 0.3226, "step": 3217 }, { "epoch": 0.15, "grad_norm": 0.4690936911060301, "learning_rate": 1.9281062834484656e-05, "loss": 0.2356, "step": 3218 }, { "epoch": 0.15, "grad_norm": 0.31955870983017265, "learning_rate": 1.9280508756568895e-05, "loss": 0.1843, "step": 3219 }, { "epoch": 0.15, "grad_norm": 0.5254575889635318, "learning_rate": 1.927995447319162e-05, "loss": 0.3301, "step": 3220 }, { "epoch": 0.15, "grad_norm": 0.4872702392818005, "learning_rate": 1.9279399984365094e-05, "loss": 0.2988, "step": 3221 }, { "epoch": 0.15, "grad_norm": 0.48131124066296466, "learning_rate": 1.9278845290101594e-05, "loss": 0.3855, "step": 3222 }, { "epoch": 0.15, "grad_norm": 0.6557331026414057, "learning_rate": 1.9278290390413404e-05, "loss": 0.3521, "step": 3223 }, { "epoch": 0.15, "grad_norm": 0.5461575474809912, "learning_rate": 1.9277735285312805e-05, "loss": 0.3378, "step": 3224 }, { "epoch": 0.15, "grad_norm": 0.36294688277412485, "learning_rate": 1.9277179974812094e-05, "loss": 0.249, "step": 3225 }, { "epoch": 0.15, "grad_norm": 0.5608482701557713, "learning_rate": 1.9276624458923555e-05, "loss": 0.3109, "step": 3226 }, { "epoch": 0.15, "grad_norm": 0.4228448283263457, "learning_rate": 1.9276068737659495e-05, "loss": 0.2883, "step": 3227 }, { "epoch": 0.15, "grad_norm": 0.6885275099330229, "learning_rate": 1.9275512811032212e-05, "loss": 0.4361, "step": 3228 }, { "epoch": 0.15, "grad_norm": 0.4318053598840434, "learning_rate": 1.9274956679054012e-05, "loss": 0.3014, "step": 3229 }, { "epoch": 0.15, "grad_norm": 0.48066167373300417, "learning_rate": 1.9274400341737214e-05, "loss": 0.3261, "step": 3230 }, { "epoch": 0.15, "grad_norm": 0.31439854213144336, "learning_rate": 1.927384379909413e-05, "loss": 0.1202, "step": 3231 }, { "epoch": 0.15, "grad_norm": 0.42552318931994654, "learning_rate": 1.927328705113708e-05, "loss": 0.3098, "step": 3232 }, { "epoch": 0.15, "grad_norm": 0.3759589021155831, "learning_rate": 1.92727300978784e-05, "loss": 0.3337, "step": 3233 }, { "epoch": 0.15, "grad_norm": 0.601441605695641, "learning_rate": 1.9272172939330406e-05, "loss": 0.3329, "step": 3234 }, { "epoch": 0.15, "grad_norm": 0.8761752522400605, "learning_rate": 1.9271615575505445e-05, "loss": 0.5781, "step": 3235 }, { "epoch": 0.15, "grad_norm": 0.45208895347602995, "learning_rate": 1.9271058006415845e-05, "loss": 0.3135, "step": 3236 }, { "epoch": 0.15, "grad_norm": 0.6807572315940577, "learning_rate": 1.927050023207396e-05, "loss": 0.4518, "step": 3237 }, { "epoch": 0.15, "grad_norm": 0.3062312840277606, "learning_rate": 1.9269942252492134e-05, "loss": 0.212, "step": 3238 }, { "epoch": 0.15, "grad_norm": 0.45363753036145527, "learning_rate": 1.9269384067682725e-05, "loss": 0.3244, "step": 3239 }, { "epoch": 0.15, "grad_norm": 0.8604366655507797, "learning_rate": 1.926882567765808e-05, "loss": 0.5354, "step": 3240 }, { "epoch": 0.15, "grad_norm": 0.43135930644390114, "learning_rate": 1.9268267082430573e-05, "loss": 0.2805, "step": 3241 }, { "epoch": 0.15, "grad_norm": 0.5364504020433147, "learning_rate": 1.9267708282012563e-05, "loss": 0.2905, "step": 3242 }, { "epoch": 0.15, "grad_norm": 0.5488661320997008, "learning_rate": 1.9267149276416424e-05, "loss": 0.3489, "step": 3243 }, { "epoch": 0.15, "grad_norm": 0.9155370263954979, "learning_rate": 1.9266590065654532e-05, "loss": 0.298, "step": 3244 }, { "epoch": 0.15, "grad_norm": 0.3535429216755445, "learning_rate": 1.926603064973927e-05, "loss": 0.2624, "step": 3245 }, { "epoch": 0.15, "grad_norm": 0.5368167740178603, "learning_rate": 1.9265471028683017e-05, "loss": 0.416, "step": 3246 }, { "epoch": 0.15, "grad_norm": 0.9234398128792525, "learning_rate": 1.9264911202498164e-05, "loss": 0.4423, "step": 3247 }, { "epoch": 0.15, "grad_norm": 0.4684647557990439, "learning_rate": 1.926435117119711e-05, "loss": 0.2688, "step": 3248 }, { "epoch": 0.15, "grad_norm": 0.5907174287261161, "learning_rate": 1.926379093479225e-05, "loss": 0.3488, "step": 3249 }, { "epoch": 0.15, "grad_norm": 0.6682518049715541, "learning_rate": 1.9263230493295987e-05, "loss": 0.3993, "step": 3250 }, { "epoch": 0.15, "grad_norm": 0.31436921504914705, "learning_rate": 1.9262669846720727e-05, "loss": 0.2154, "step": 3251 }, { "epoch": 0.15, "grad_norm": 0.5731635351710359, "learning_rate": 1.9262108995078885e-05, "loss": 0.3459, "step": 3252 }, { "epoch": 0.15, "grad_norm": 0.5271963303375132, "learning_rate": 1.926154793838288e-05, "loss": 0.336, "step": 3253 }, { "epoch": 0.15, "grad_norm": 0.45455298292902047, "learning_rate": 1.9260986676645127e-05, "loss": 0.2069, "step": 3254 }, { "epoch": 0.15, "grad_norm": 1.0058343659208009, "learning_rate": 1.9260425209878052e-05, "loss": 0.6, "step": 3255 }, { "epoch": 0.15, "grad_norm": 0.5564218257548972, "learning_rate": 1.9259863538094096e-05, "loss": 0.4186, "step": 3256 }, { "epoch": 0.15, "grad_norm": 0.37200959636958075, "learning_rate": 1.925930166130568e-05, "loss": 0.2095, "step": 3257 }, { "epoch": 0.15, "grad_norm": 0.39796237168670145, "learning_rate": 1.925873957952525e-05, "loss": 0.3125, "step": 3258 }, { "epoch": 0.15, "grad_norm": 0.5023912325281488, "learning_rate": 1.925817729276525e-05, "loss": 0.2842, "step": 3259 }, { "epoch": 0.15, "grad_norm": 0.4407457891965165, "learning_rate": 1.9257614801038128e-05, "loss": 0.2177, "step": 3260 }, { "epoch": 0.15, "grad_norm": 0.4762798895675324, "learning_rate": 1.9257052104356337e-05, "loss": 0.3455, "step": 3261 }, { "epoch": 0.15, "grad_norm": 0.886704237193615, "learning_rate": 1.9256489202732333e-05, "loss": 0.5161, "step": 3262 }, { "epoch": 0.15, "grad_norm": 0.4157789720900731, "learning_rate": 1.925592609617858e-05, "loss": 0.3021, "step": 3263 }, { "epoch": 0.15, "grad_norm": 0.4377061734221589, "learning_rate": 1.9255362784707546e-05, "loss": 0.1923, "step": 3264 }, { "epoch": 0.15, "grad_norm": 0.36430024358809976, "learning_rate": 1.92547992683317e-05, "loss": 0.2754, "step": 3265 }, { "epoch": 0.15, "grad_norm": 0.4119056127851174, "learning_rate": 1.925423554706352e-05, "loss": 0.2939, "step": 3266 }, { "epoch": 0.15, "grad_norm": 0.9998265212843225, "learning_rate": 1.9253671620915478e-05, "loss": 0.4127, "step": 3267 }, { "epoch": 0.15, "grad_norm": 0.8562086879603014, "learning_rate": 1.925310748990007e-05, "loss": 0.5161, "step": 3268 }, { "epoch": 0.15, "grad_norm": 0.35901322617884873, "learning_rate": 1.925254315402978e-05, "loss": 0.2742, "step": 3269 }, { "epoch": 0.15, "grad_norm": 0.6241828175210817, "learning_rate": 1.9251978613317104e-05, "loss": 0.3282, "step": 3270 }, { "epoch": 0.15, "grad_norm": 0.40436323721775413, "learning_rate": 1.9251413867774537e-05, "loss": 0.2135, "step": 3271 }, { "epoch": 0.15, "grad_norm": 0.5536224034960677, "learning_rate": 1.9250848917414582e-05, "loss": 0.2964, "step": 3272 }, { "epoch": 0.15, "grad_norm": 0.5790627036452501, "learning_rate": 1.9250283762249748e-05, "loss": 0.3672, "step": 3273 }, { "epoch": 0.15, "grad_norm": 1.327590276026709, "learning_rate": 1.924971840229255e-05, "loss": 0.4477, "step": 3274 }, { "epoch": 0.15, "grad_norm": 0.49081487666703755, "learning_rate": 1.92491528375555e-05, "loss": 0.2879, "step": 3275 }, { "epoch": 0.15, "grad_norm": 0.6970790993951184, "learning_rate": 1.924858706805112e-05, "loss": 0.3895, "step": 3276 }, { "epoch": 0.15, "grad_norm": 0.31340868493387325, "learning_rate": 1.9248021093791935e-05, "loss": 0.1995, "step": 3277 }, { "epoch": 0.15, "grad_norm": 0.5390803529145244, "learning_rate": 1.924745491479048e-05, "loss": 0.2784, "step": 3278 }, { "epoch": 0.15, "grad_norm": 1.2557325667823025, "learning_rate": 1.924688853105928e-05, "loss": 0.4389, "step": 3279 }, { "epoch": 0.15, "grad_norm": 0.9167091316434258, "learning_rate": 1.9246321942610884e-05, "loss": 0.404, "step": 3280 }, { "epoch": 0.15, "grad_norm": 0.44770955422012515, "learning_rate": 1.9245755149457833e-05, "loss": 0.2887, "step": 3281 }, { "epoch": 0.15, "grad_norm": 0.5937622152708458, "learning_rate": 1.9245188151612674e-05, "loss": 0.4438, "step": 3282 }, { "epoch": 0.15, "grad_norm": 0.45601425257337, "learning_rate": 1.924462094908796e-05, "loss": 0.2287, "step": 3283 }, { "epoch": 0.15, "grad_norm": 0.4649645056399888, "learning_rate": 1.9244053541896246e-05, "loss": 0.2988, "step": 3284 }, { "epoch": 0.15, "grad_norm": 0.5502184276511177, "learning_rate": 1.92434859300501e-05, "loss": 0.3709, "step": 3285 }, { "epoch": 0.15, "grad_norm": 0.5138328953697913, "learning_rate": 1.924291811356208e-05, "loss": 0.2486, "step": 3286 }, { "epoch": 0.15, "grad_norm": 0.49727748473383454, "learning_rate": 1.9242350092444763e-05, "loss": 0.2345, "step": 3287 }, { "epoch": 0.15, "grad_norm": 1.560662131944898, "learning_rate": 1.9241781866710726e-05, "loss": 0.7828, "step": 3288 }, { "epoch": 0.15, "grad_norm": 0.4750498647626232, "learning_rate": 1.9241213436372543e-05, "loss": 0.3789, "step": 3289 }, { "epoch": 0.15, "grad_norm": 0.3475992208925108, "learning_rate": 1.9240644801442802e-05, "loss": 0.2783, "step": 3290 }, { "epoch": 0.15, "grad_norm": 0.40579746051318505, "learning_rate": 1.9240075961934092e-05, "loss": 0.2665, "step": 3291 }, { "epoch": 0.15, "grad_norm": 0.38512043530242285, "learning_rate": 1.9239506917859004e-05, "loss": 0.2552, "step": 3292 }, { "epoch": 0.15, "grad_norm": 0.4311164021082418, "learning_rate": 1.9238937669230138e-05, "loss": 0.226, "step": 3293 }, { "epoch": 0.15, "grad_norm": 0.9940823119976907, "learning_rate": 1.9238368216060098e-05, "loss": 0.4922, "step": 3294 }, { "epoch": 0.15, "grad_norm": 1.209824418830558, "learning_rate": 1.9237798558361488e-05, "loss": 0.511, "step": 3295 }, { "epoch": 0.15, "grad_norm": 0.4371190551947611, "learning_rate": 1.9237228696146922e-05, "loss": 0.2472, "step": 3296 }, { "epoch": 0.15, "grad_norm": 0.34082713099362383, "learning_rate": 1.9236658629429014e-05, "loss": 0.2703, "step": 3297 }, { "epoch": 0.15, "grad_norm": 0.4643349039847566, "learning_rate": 1.9236088358220392e-05, "loss": 0.3044, "step": 3298 }, { "epoch": 0.15, "grad_norm": 0.6497774195822112, "learning_rate": 1.923551788253367e-05, "loss": 0.3098, "step": 3299 }, { "epoch": 0.15, "grad_norm": 0.5661247585432231, "learning_rate": 1.9234947202381487e-05, "loss": 0.2929, "step": 3300 }, { "epoch": 0.15, "grad_norm": 0.9197302245810614, "learning_rate": 1.923437631777647e-05, "loss": 0.4342, "step": 3301 }, { "epoch": 0.15, "grad_norm": 0.7841073884232832, "learning_rate": 1.9233805228731265e-05, "loss": 0.3298, "step": 3302 }, { "epoch": 0.15, "grad_norm": 0.49044323148364, "learning_rate": 1.923323393525851e-05, "loss": 0.2034, "step": 3303 }, { "epoch": 0.15, "grad_norm": 0.40820254885043755, "learning_rate": 1.9232662437370855e-05, "loss": 0.2612, "step": 3304 }, { "epoch": 0.15, "grad_norm": 0.40701801935866583, "learning_rate": 1.9232090735080953e-05, "loss": 0.276, "step": 3305 }, { "epoch": 0.15, "grad_norm": 0.6368522448134105, "learning_rate": 1.9231518828401458e-05, "loss": 0.3135, "step": 3306 }, { "epoch": 0.15, "grad_norm": 0.8722022723109727, "learning_rate": 1.9230946717345035e-05, "loss": 0.5006, "step": 3307 }, { "epoch": 0.15, "grad_norm": 0.4875013214388049, "learning_rate": 1.923037440192435e-05, "loss": 0.3015, "step": 3308 }, { "epoch": 0.15, "grad_norm": 0.4699682926855817, "learning_rate": 1.922980188215207e-05, "loss": 0.268, "step": 3309 }, { "epoch": 0.15, "grad_norm": 0.3307216533842796, "learning_rate": 1.9229229158040872e-05, "loss": 0.2484, "step": 3310 }, { "epoch": 0.15, "grad_norm": 0.8660069976275132, "learning_rate": 1.9228656229603436e-05, "loss": 0.4969, "step": 3311 }, { "epoch": 0.15, "grad_norm": 0.44977681045476764, "learning_rate": 1.922808309685245e-05, "loss": 0.3255, "step": 3312 }, { "epoch": 0.15, "grad_norm": 0.4331173901424907, "learning_rate": 1.9227509759800595e-05, "loss": 0.353, "step": 3313 }, { "epoch": 0.15, "grad_norm": 0.5969217504370009, "learning_rate": 1.9226936218460567e-05, "loss": 0.3377, "step": 3314 }, { "epoch": 0.15, "grad_norm": 0.49554752360815485, "learning_rate": 1.9226362472845062e-05, "loss": 0.3384, "step": 3315 }, { "epoch": 0.15, "grad_norm": 0.30298122227568713, "learning_rate": 1.9225788522966787e-05, "loss": 0.1937, "step": 3316 }, { "epoch": 0.15, "grad_norm": 0.6917326400031215, "learning_rate": 1.922521436883845e-05, "loss": 0.3643, "step": 3317 }, { "epoch": 0.15, "grad_norm": 0.46419337025078505, "learning_rate": 1.922464001047275e-05, "loss": 0.3588, "step": 3318 }, { "epoch": 0.15, "grad_norm": 0.7904336551585145, "learning_rate": 1.922406544788242e-05, "loss": 0.4341, "step": 3319 }, { "epoch": 0.15, "grad_norm": 0.5421949033752944, "learning_rate": 1.9223490681080164e-05, "loss": 0.3205, "step": 3320 }, { "epoch": 0.15, "grad_norm": 0.39750841010299504, "learning_rate": 1.9222915710078717e-05, "loss": 0.2973, "step": 3321 }, { "epoch": 0.15, "grad_norm": 0.26899841262128654, "learning_rate": 1.9222340534890803e-05, "loss": 0.1205, "step": 3322 }, { "epoch": 0.15, "grad_norm": 0.48270028212241406, "learning_rate": 1.922176515552916e-05, "loss": 0.3246, "step": 3323 }, { "epoch": 0.15, "grad_norm": 0.6829021997980356, "learning_rate": 1.9221189572006524e-05, "loss": 0.4476, "step": 3324 }, { "epoch": 0.15, "grad_norm": 0.6278132018412705, "learning_rate": 1.922061378433564e-05, "loss": 0.3565, "step": 3325 }, { "epoch": 0.15, "grad_norm": 0.3825649980388092, "learning_rate": 1.922003779252925e-05, "loss": 0.2129, "step": 3326 }, { "epoch": 0.15, "grad_norm": 0.605153418199528, "learning_rate": 1.9219461596600112e-05, "loss": 0.3806, "step": 3327 }, { "epoch": 0.15, "grad_norm": 0.28271677535211903, "learning_rate": 1.9218885196560984e-05, "loss": 0.2254, "step": 3328 }, { "epoch": 0.15, "grad_norm": 0.5680727950276573, "learning_rate": 1.921830859242462e-05, "loss": 0.3252, "step": 3329 }, { "epoch": 0.15, "grad_norm": 0.44884564616846256, "learning_rate": 1.9217731784203786e-05, "loss": 0.3358, "step": 3330 }, { "epoch": 0.15, "grad_norm": 0.794181012103662, "learning_rate": 1.9217154771911256e-05, "loss": 0.6, "step": 3331 }, { "epoch": 0.15, "grad_norm": 0.4905123050727757, "learning_rate": 1.9216577555559805e-05, "loss": 0.2562, "step": 3332 }, { "epoch": 0.15, "grad_norm": 0.505954927323551, "learning_rate": 1.9216000135162206e-05, "loss": 0.3101, "step": 3333 }, { "epoch": 0.15, "grad_norm": 0.36472486665209713, "learning_rate": 1.921542251073125e-05, "loss": 0.2122, "step": 3334 }, { "epoch": 0.15, "grad_norm": 0.6716010643391185, "learning_rate": 1.921484468227972e-05, "loss": 0.3156, "step": 3335 }, { "epoch": 0.15, "grad_norm": 0.4671365378819608, "learning_rate": 1.921426664982041e-05, "loss": 0.3208, "step": 3336 }, { "epoch": 0.15, "grad_norm": 0.6686578842401671, "learning_rate": 1.9213688413366118e-05, "loss": 0.4174, "step": 3337 }, { "epoch": 0.15, "grad_norm": 0.8549037510537436, "learning_rate": 1.9213109972929645e-05, "loss": 0.5043, "step": 3338 }, { "epoch": 0.15, "grad_norm": 0.3905145913697109, "learning_rate": 1.9212531328523796e-05, "loss": 0.2331, "step": 3339 }, { "epoch": 0.15, "grad_norm": 0.37922556052393236, "learning_rate": 1.9211952480161382e-05, "loss": 0.2735, "step": 3340 }, { "epoch": 0.15, "grad_norm": 0.8212454911391099, "learning_rate": 1.921137342785522e-05, "loss": 0.4546, "step": 3341 }, { "epoch": 0.15, "grad_norm": 0.4001899040816719, "learning_rate": 1.9210794171618127e-05, "loss": 0.1844, "step": 3342 }, { "epoch": 0.15, "grad_norm": 1.2866075066813119, "learning_rate": 1.9210214711462928e-05, "loss": 0.9067, "step": 3343 }, { "epoch": 0.15, "grad_norm": 0.4348203154457227, "learning_rate": 1.9209635047402456e-05, "loss": 0.2832, "step": 3344 }, { "epoch": 0.15, "grad_norm": 0.4473319107634509, "learning_rate": 1.920905517944954e-05, "loss": 0.2804, "step": 3345 }, { "epoch": 0.15, "grad_norm": 0.761625305575963, "learning_rate": 1.9208475107617012e-05, "loss": 0.5267, "step": 3346 }, { "epoch": 0.15, "grad_norm": 0.4407027160960978, "learning_rate": 1.9207894831917725e-05, "loss": 0.2989, "step": 3347 }, { "epoch": 0.15, "grad_norm": 0.43965056638242483, "learning_rate": 1.9207314352364523e-05, "loss": 0.2671, "step": 3348 }, { "epoch": 0.15, "grad_norm": 0.4122549034445267, "learning_rate": 1.920673366897025e-05, "loss": 0.2853, "step": 3349 }, { "epoch": 0.15, "grad_norm": 1.8982321521251455, "learning_rate": 1.9206152781747772e-05, "loss": 0.9683, "step": 3350 }, { "epoch": 0.15, "grad_norm": 0.4109761683590333, "learning_rate": 1.9205571690709942e-05, "loss": 0.2913, "step": 3351 }, { "epoch": 0.15, "grad_norm": 0.42602643517806127, "learning_rate": 1.9204990395869626e-05, "loss": 0.33, "step": 3352 }, { "epoch": 0.15, "grad_norm": 0.8126048368642423, "learning_rate": 1.9204408897239697e-05, "loss": 0.5386, "step": 3353 }, { "epoch": 0.15, "grad_norm": 0.4784871389361435, "learning_rate": 1.9203827194833026e-05, "loss": 0.3086, "step": 3354 }, { "epoch": 0.15, "grad_norm": 0.36940173638560225, "learning_rate": 1.9203245288662492e-05, "loss": 0.1842, "step": 3355 }, { "epoch": 0.15, "grad_norm": 0.3547309936891921, "learning_rate": 1.9202663178740978e-05, "loss": 0.2654, "step": 3356 }, { "epoch": 0.15, "grad_norm": 0.4096928430914158, "learning_rate": 1.920208086508137e-05, "loss": 0.2973, "step": 3357 }, { "epoch": 0.15, "grad_norm": 0.7595927508156719, "learning_rate": 1.9201498347696563e-05, "loss": 0.4186, "step": 3358 }, { "epoch": 0.15, "grad_norm": 0.956723091357573, "learning_rate": 1.9200915626599442e-05, "loss": 0.5554, "step": 3359 }, { "epoch": 0.15, "grad_norm": 0.49713162807509637, "learning_rate": 1.9200332701802925e-05, "loss": 0.3111, "step": 3360 }, { "epoch": 0.15, "grad_norm": 0.3590860993104815, "learning_rate": 1.919974957331991e-05, "loss": 0.2399, "step": 3361 }, { "epoch": 0.15, "grad_norm": 0.40672914501526425, "learning_rate": 1.9199166241163302e-05, "loss": 0.2572, "step": 3362 }, { "epoch": 0.15, "grad_norm": 0.4879901757286407, "learning_rate": 1.9198582705346023e-05, "loss": 0.3213, "step": 3363 }, { "epoch": 0.15, "grad_norm": 0.4802292393967778, "learning_rate": 1.919799896588099e-05, "loss": 0.374, "step": 3364 }, { "epoch": 0.15, "grad_norm": 0.6971322839437677, "learning_rate": 1.919741502278112e-05, "loss": 0.3281, "step": 3365 }, { "epoch": 0.15, "grad_norm": 0.5631305327923952, "learning_rate": 1.9196830876059348e-05, "loss": 0.2896, "step": 3366 }, { "epoch": 0.15, "grad_norm": 0.4551873078435857, "learning_rate": 1.9196246525728607e-05, "loss": 0.3234, "step": 3367 }, { "epoch": 0.15, "grad_norm": 0.32605135390133727, "learning_rate": 1.9195661971801825e-05, "loss": 0.2283, "step": 3368 }, { "epoch": 0.15, "grad_norm": 0.4036311476491305, "learning_rate": 1.9195077214291955e-05, "loss": 0.3072, "step": 3369 }, { "epoch": 0.15, "grad_norm": 0.7624643434653674, "learning_rate": 1.919449225321194e-05, "loss": 0.4935, "step": 3370 }, { "epoch": 0.15, "grad_norm": 0.532283504393621, "learning_rate": 1.9193907088574725e-05, "loss": 0.279, "step": 3371 }, { "epoch": 0.15, "grad_norm": 0.33530039765016545, "learning_rate": 1.9193321720393267e-05, "loss": 0.2849, "step": 3372 }, { "epoch": 0.15, "grad_norm": 1.4523890547065752, "learning_rate": 1.9192736148680525e-05, "loss": 0.8646, "step": 3373 }, { "epoch": 0.15, "grad_norm": 0.2990860317267795, "learning_rate": 1.9192150373449473e-05, "loss": 0.2273, "step": 3374 }, { "epoch": 0.16, "grad_norm": 0.3977582439527299, "learning_rate": 1.9191564394713063e-05, "loss": 0.3181, "step": 3375 }, { "epoch": 0.16, "grad_norm": 0.5060475766557591, "learning_rate": 1.919097821248428e-05, "loss": 0.3583, "step": 3376 }, { "epoch": 0.16, "grad_norm": 1.08003385667202, "learning_rate": 1.9190391826776097e-05, "loss": 0.6451, "step": 3377 }, { "epoch": 0.16, "grad_norm": 0.3419907478698583, "learning_rate": 1.9189805237601497e-05, "loss": 0.2109, "step": 3378 }, { "epoch": 0.16, "grad_norm": 1.4844841124003776, "learning_rate": 1.9189218444973467e-05, "loss": 0.8914, "step": 3379 }, { "epoch": 0.16, "grad_norm": 0.35211594593899825, "learning_rate": 1.9188631448904998e-05, "loss": 0.2978, "step": 3380 }, { "epoch": 0.16, "grad_norm": 0.3880115842542888, "learning_rate": 1.9188044249409082e-05, "loss": 0.2112, "step": 3381 }, { "epoch": 0.16, "grad_norm": 0.4912770402845999, "learning_rate": 1.9187456846498722e-05, "loss": 0.365, "step": 3382 }, { "epoch": 0.16, "grad_norm": 0.447897537730246, "learning_rate": 1.9186869240186925e-05, "loss": 0.3441, "step": 3383 }, { "epoch": 0.16, "grad_norm": 0.3663215331948095, "learning_rate": 1.9186281430486695e-05, "loss": 0.2055, "step": 3384 }, { "epoch": 0.16, "grad_norm": 1.0933820119881812, "learning_rate": 1.9185693417411053e-05, "loss": 0.6602, "step": 3385 }, { "epoch": 0.16, "grad_norm": 0.6421661385304326, "learning_rate": 1.9185105200973004e-05, "loss": 0.4421, "step": 3386 }, { "epoch": 0.16, "grad_norm": 0.39422910514625803, "learning_rate": 1.918451678118558e-05, "loss": 0.3096, "step": 3387 }, { "epoch": 0.16, "grad_norm": 0.26765993085072315, "learning_rate": 1.9183928158061814e-05, "loss": 0.1752, "step": 3388 }, { "epoch": 0.16, "grad_norm": 1.8074355500271955, "learning_rate": 1.9183339331614723e-05, "loss": 0.8148, "step": 3389 }, { "epoch": 0.16, "grad_norm": 0.4041699650984824, "learning_rate": 1.9182750301857354e-05, "loss": 0.2987, "step": 3390 }, { "epoch": 0.16, "grad_norm": 0.8503668681469448, "learning_rate": 1.9182161068802742e-05, "loss": 0.4359, "step": 3391 }, { "epoch": 0.16, "grad_norm": 0.3990898977772437, "learning_rate": 1.9181571632463933e-05, "loss": 0.3653, "step": 3392 }, { "epoch": 0.16, "grad_norm": 0.41311632453767994, "learning_rate": 1.918098199285398e-05, "loss": 0.3074, "step": 3393 }, { "epoch": 0.16, "grad_norm": 0.25178970137932033, "learning_rate": 1.918039214998593e-05, "loss": 0.078, "step": 3394 }, { "epoch": 0.16, "grad_norm": 0.44107135034782713, "learning_rate": 1.917980210387285e-05, "loss": 0.3349, "step": 3395 }, { "epoch": 0.16, "grad_norm": 0.4798315229064209, "learning_rate": 1.91792118545278e-05, "loss": 0.2967, "step": 3396 }, { "epoch": 0.16, "grad_norm": 0.6197963655649253, "learning_rate": 1.9178621401963843e-05, "loss": 0.3414, "step": 3397 }, { "epoch": 0.16, "grad_norm": 0.7427572385598589, "learning_rate": 1.9178030746194055e-05, "loss": 0.4925, "step": 3398 }, { "epoch": 0.16, "grad_norm": 0.4876514619448412, "learning_rate": 1.917743988723152e-05, "loss": 0.3023, "step": 3399 }, { "epoch": 0.16, "grad_norm": 0.3335945675313101, "learning_rate": 1.91768488250893e-05, "loss": 0.2768, "step": 3400 }, { "epoch": 0.16, "grad_norm": 0.33143204372264295, "learning_rate": 1.9176257559780497e-05, "loss": 0.184, "step": 3401 }, { "epoch": 0.16, "grad_norm": 0.5032840776863647, "learning_rate": 1.9175666091318196e-05, "loss": 0.3482, "step": 3402 }, { "epoch": 0.16, "grad_norm": 0.6995482609409982, "learning_rate": 1.917507441971549e-05, "loss": 0.4259, "step": 3403 }, { "epoch": 0.16, "grad_norm": 0.4070389390470506, "learning_rate": 1.917448254498548e-05, "loss": 0.3093, "step": 3404 }, { "epoch": 0.16, "grad_norm": 0.5142480387539397, "learning_rate": 1.9173890467141268e-05, "loss": 0.2743, "step": 3405 }, { "epoch": 0.16, "grad_norm": 0.31929163897954016, "learning_rate": 1.9173298186195964e-05, "loss": 0.2215, "step": 3406 }, { "epoch": 0.16, "grad_norm": 0.4772866803527093, "learning_rate": 1.917270570216268e-05, "loss": 0.3001, "step": 3407 }, { "epoch": 0.16, "grad_norm": 0.3553699338870784, "learning_rate": 1.917211301505453e-05, "loss": 0.2764, "step": 3408 }, { "epoch": 0.16, "grad_norm": 0.8210697437366932, "learning_rate": 1.9171520124884643e-05, "loss": 0.5633, "step": 3409 }, { "epoch": 0.16, "grad_norm": 0.5964754285802557, "learning_rate": 1.9170927031666137e-05, "loss": 0.429, "step": 3410 }, { "epoch": 0.16, "grad_norm": 0.4364954694802805, "learning_rate": 1.9170333735412147e-05, "loss": 0.2703, "step": 3411 }, { "epoch": 0.16, "grad_norm": 0.4253643041858598, "learning_rate": 1.9169740236135804e-05, "loss": 0.3128, "step": 3412 }, { "epoch": 0.16, "grad_norm": 0.3971326643000123, "learning_rate": 1.9169146533850252e-05, "loss": 0.2333, "step": 3413 }, { "epoch": 0.16, "grad_norm": 0.46388794944744244, "learning_rate": 1.9168552628568632e-05, "loss": 0.2498, "step": 3414 }, { "epoch": 0.16, "grad_norm": 0.7429689066988482, "learning_rate": 1.916795852030409e-05, "loss": 0.4178, "step": 3415 }, { "epoch": 0.16, "grad_norm": 0.4329306168174274, "learning_rate": 1.916736420906979e-05, "loss": 0.3474, "step": 3416 }, { "epoch": 0.16, "grad_norm": 0.42073630767930015, "learning_rate": 1.9166769694878877e-05, "loss": 0.19, "step": 3417 }, { "epoch": 0.16, "grad_norm": 0.36779516695066033, "learning_rate": 1.916617497774452e-05, "loss": 0.2655, "step": 3418 }, { "epoch": 0.16, "grad_norm": 0.3588948714738646, "learning_rate": 1.916558005767988e-05, "loss": 0.312, "step": 3419 }, { "epoch": 0.16, "grad_norm": 0.3962461274970632, "learning_rate": 1.9164984934698136e-05, "loss": 0.2415, "step": 3420 }, { "epoch": 0.16, "grad_norm": 0.6543796323648982, "learning_rate": 1.9164389608812458e-05, "loss": 0.4531, "step": 3421 }, { "epoch": 0.16, "grad_norm": 1.1775146613630108, "learning_rate": 1.9163794080036026e-05, "loss": 0.6552, "step": 3422 }, { "epoch": 0.16, "grad_norm": 0.3538804275266444, "learning_rate": 1.9163198348382023e-05, "loss": 0.2347, "step": 3423 }, { "epoch": 0.16, "grad_norm": 0.40215654435324955, "learning_rate": 1.9162602413863646e-05, "loss": 0.2693, "step": 3424 }, { "epoch": 0.16, "grad_norm": 0.47727541533278633, "learning_rate": 1.916200627649408e-05, "loss": 0.3294, "step": 3425 }, { "epoch": 0.16, "grad_norm": 0.5434506729464939, "learning_rate": 1.9161409936286524e-05, "loss": 0.4095, "step": 3426 }, { "epoch": 0.16, "grad_norm": 0.48657745932793073, "learning_rate": 1.9160813393254182e-05, "loss": 0.2871, "step": 3427 }, { "epoch": 0.16, "grad_norm": 0.45459399273711115, "learning_rate": 1.916021664741026e-05, "loss": 0.3614, "step": 3428 }, { "epoch": 0.16, "grad_norm": 0.47962185870991336, "learning_rate": 1.9159619698767972e-05, "loss": 0.3369, "step": 3429 }, { "epoch": 0.16, "grad_norm": 0.5735934177845237, "learning_rate": 1.915902254734053e-05, "loss": 0.3043, "step": 3430 }, { "epoch": 0.16, "grad_norm": 0.29270488331025524, "learning_rate": 1.915842519314116e-05, "loss": 0.2378, "step": 3431 }, { "epoch": 0.16, "grad_norm": 0.472462457346063, "learning_rate": 1.915782763618308e-05, "loss": 0.3445, "step": 3432 }, { "epoch": 0.16, "grad_norm": 0.5143698222543387, "learning_rate": 1.9157229876479525e-05, "loss": 0.338, "step": 3433 }, { "epoch": 0.16, "grad_norm": 2.114173546019667, "learning_rate": 1.9156631914043723e-05, "loss": 0.7025, "step": 3434 }, { "epoch": 0.16, "grad_norm": 0.5089458134745325, "learning_rate": 1.9156033748888918e-05, "loss": 0.3349, "step": 3435 }, { "epoch": 0.16, "grad_norm": 0.34787092617807697, "learning_rate": 1.9155435381028348e-05, "loss": 0.2452, "step": 3436 }, { "epoch": 0.16, "grad_norm": 0.5140115462147723, "learning_rate": 1.9154836810475266e-05, "loss": 0.3419, "step": 3437 }, { "epoch": 0.16, "grad_norm": 0.46848933564646866, "learning_rate": 1.915423803724292e-05, "loss": 0.2454, "step": 3438 }, { "epoch": 0.16, "grad_norm": 0.4286154581992925, "learning_rate": 1.9153639061344568e-05, "loss": 0.2899, "step": 3439 }, { "epoch": 0.16, "grad_norm": 0.48910352139556, "learning_rate": 1.9153039882793466e-05, "loss": 0.2988, "step": 3440 }, { "epoch": 0.16, "grad_norm": 0.651841107884288, "learning_rate": 1.9152440501602885e-05, "loss": 0.362, "step": 3441 }, { "epoch": 0.16, "grad_norm": 0.4416154036761676, "learning_rate": 1.9151840917786092e-05, "loss": 0.3526, "step": 3442 }, { "epoch": 0.16, "grad_norm": 0.37928587249066165, "learning_rate": 1.915124113135636e-05, "loss": 0.3134, "step": 3443 }, { "epoch": 0.16, "grad_norm": 0.43733033871745575, "learning_rate": 1.9150641142326975e-05, "loss": 0.2404, "step": 3444 }, { "epoch": 0.16, "grad_norm": 0.5038621414655685, "learning_rate": 1.915004095071121e-05, "loss": 0.3618, "step": 3445 }, { "epoch": 0.16, "grad_norm": 0.34215194936473936, "learning_rate": 1.9149440556522357e-05, "loss": 0.1736, "step": 3446 }, { "epoch": 0.16, "grad_norm": 0.4216096476780459, "learning_rate": 1.9148839959773712e-05, "loss": 0.3052, "step": 3447 }, { "epoch": 0.16, "grad_norm": 0.46783037600400706, "learning_rate": 1.9148239160478565e-05, "loss": 0.3401, "step": 3448 }, { "epoch": 0.16, "grad_norm": 0.6148351220840176, "learning_rate": 1.914763815865022e-05, "loss": 0.4, "step": 3449 }, { "epoch": 0.16, "grad_norm": 0.49464379906017614, "learning_rate": 1.9147036954301986e-05, "loss": 0.322, "step": 3450 }, { "epoch": 0.16, "grad_norm": 0.4569241533831194, "learning_rate": 1.9146435547447168e-05, "loss": 0.3166, "step": 3451 }, { "epoch": 0.16, "grad_norm": 0.30321510146375885, "learning_rate": 1.914583393809908e-05, "loss": 0.2677, "step": 3452 }, { "epoch": 0.16, "grad_norm": 0.6365400162442456, "learning_rate": 1.9145232126271045e-05, "loss": 0.2831, "step": 3453 }, { "epoch": 0.16, "grad_norm": 0.39671575129971, "learning_rate": 1.9144630111976385e-05, "loss": 0.3473, "step": 3454 }, { "epoch": 0.16, "grad_norm": 0.47449816609150136, "learning_rate": 1.914402789522843e-05, "loss": 0.3903, "step": 3455 }, { "epoch": 0.16, "grad_norm": 0.5516235976536191, "learning_rate": 1.9143425476040508e-05, "loss": 0.1499, "step": 3456 }, { "epoch": 0.16, "grad_norm": 0.4340650598047938, "learning_rate": 1.914282285442596e-05, "loss": 0.3322, "step": 3457 }, { "epoch": 0.16, "grad_norm": 0.32812347694788363, "learning_rate": 1.9142220030398128e-05, "loss": 0.1965, "step": 3458 }, { "epoch": 0.16, "grad_norm": 0.35545950408669325, "learning_rate": 1.914161700397035e-05, "loss": 0.2608, "step": 3459 }, { "epoch": 0.16, "grad_norm": 0.4413803333802271, "learning_rate": 1.914101377515599e-05, "loss": 0.3494, "step": 3460 }, { "epoch": 0.16, "grad_norm": 0.9095260286058134, "learning_rate": 1.914041034396839e-05, "loss": 0.5996, "step": 3461 }, { "epoch": 0.16, "grad_norm": 0.731698288828411, "learning_rate": 1.9139806710420914e-05, "loss": 0.2648, "step": 3462 }, { "epoch": 0.16, "grad_norm": 0.3937954904865098, "learning_rate": 1.913920287452693e-05, "loss": 0.3103, "step": 3463 }, { "epoch": 0.16, "grad_norm": 0.3678625175062782, "learning_rate": 1.91385988362998e-05, "loss": 0.27, "step": 3464 }, { "epoch": 0.16, "grad_norm": 0.4891904157853559, "learning_rate": 1.91379945957529e-05, "loss": 0.3213, "step": 3465 }, { "epoch": 0.16, "grad_norm": 0.4692580810977307, "learning_rate": 1.9137390152899608e-05, "loss": 0.2742, "step": 3466 }, { "epoch": 0.16, "grad_norm": 0.4351870902893192, "learning_rate": 1.9136785507753302e-05, "loss": 0.3367, "step": 3467 }, { "epoch": 0.16, "grad_norm": 0.771071937960453, "learning_rate": 1.9136180660327377e-05, "loss": 0.5116, "step": 3468 }, { "epoch": 0.16, "grad_norm": 0.4155420541972083, "learning_rate": 1.913557561063521e-05, "loss": 0.2303, "step": 3469 }, { "epoch": 0.16, "grad_norm": 0.31922242719289784, "learning_rate": 1.913497035869021e-05, "loss": 0.2344, "step": 3470 }, { "epoch": 0.16, "grad_norm": 0.43879675886993946, "learning_rate": 1.913436490450577e-05, "loss": 0.3395, "step": 3471 }, { "epoch": 0.16, "grad_norm": 0.41411630055213, "learning_rate": 1.9133759248095294e-05, "loss": 0.2825, "step": 3472 }, { "epoch": 0.16, "grad_norm": 1.1173247654134, "learning_rate": 1.913315338947219e-05, "loss": 0.6101, "step": 3473 }, { "epoch": 0.16, "grad_norm": 1.4161497313563582, "learning_rate": 1.9132547328649873e-05, "loss": 0.8446, "step": 3474 }, { "epoch": 0.16, "grad_norm": 0.3899908204352992, "learning_rate": 1.913194106564176e-05, "loss": 0.2504, "step": 3475 }, { "epoch": 0.16, "grad_norm": 0.8812685354613593, "learning_rate": 1.9131334600461274e-05, "loss": 0.5134, "step": 3476 }, { "epoch": 0.16, "grad_norm": 0.44594836535757465, "learning_rate": 1.9130727933121842e-05, "loss": 0.3571, "step": 3477 }, { "epoch": 0.16, "grad_norm": 0.360254478660426, "learning_rate": 1.9130121063636893e-05, "loss": 0.2119, "step": 3478 }, { "epoch": 0.16, "grad_norm": 0.48687590351124066, "learning_rate": 1.9129513992019864e-05, "loss": 0.2759, "step": 3479 }, { "epoch": 0.16, "grad_norm": 1.4028502930318765, "learning_rate": 1.9128906718284192e-05, "loss": 0.779, "step": 3480 }, { "epoch": 0.16, "grad_norm": 0.3800523846803639, "learning_rate": 1.9128299242443325e-05, "loss": 0.2863, "step": 3481 }, { "epoch": 0.16, "grad_norm": 0.7864540152394304, "learning_rate": 1.9127691564510714e-05, "loss": 0.3981, "step": 3482 }, { "epoch": 0.16, "grad_norm": 0.42160130001516083, "learning_rate": 1.9127083684499805e-05, "loss": 0.3363, "step": 3483 }, { "epoch": 0.16, "grad_norm": 0.526323565600426, "learning_rate": 1.912647560242406e-05, "loss": 0.2797, "step": 3484 }, { "epoch": 0.16, "grad_norm": 0.36132367162232637, "learning_rate": 1.9125867318296946e-05, "loss": 0.1459, "step": 3485 }, { "epoch": 0.16, "grad_norm": 0.48669425168280406, "learning_rate": 1.912525883213192e-05, "loss": 0.3961, "step": 3486 }, { "epoch": 0.16, "grad_norm": 0.44839878549127704, "learning_rate": 1.912465014394246e-05, "loss": 0.3372, "step": 3487 }, { "epoch": 0.16, "grad_norm": 0.6853048696835087, "learning_rate": 1.9124041253742042e-05, "loss": 0.3756, "step": 3488 }, { "epoch": 0.16, "grad_norm": 1.1239841290208872, "learning_rate": 1.9123432161544143e-05, "loss": 0.6775, "step": 3489 }, { "epoch": 0.16, "grad_norm": 0.32899443409281237, "learning_rate": 1.912282286736225e-05, "loss": 0.2369, "step": 3490 }, { "epoch": 0.16, "grad_norm": 0.3911369336831165, "learning_rate": 1.9122213371209848e-05, "loss": 0.2899, "step": 3491 }, { "epoch": 0.16, "grad_norm": 1.608779414341514, "learning_rate": 1.912160367310044e-05, "loss": 0.5468, "step": 3492 }, { "epoch": 0.16, "grad_norm": 0.39956878497461185, "learning_rate": 1.9120993773047512e-05, "loss": 0.2927, "step": 3493 }, { "epoch": 0.16, "grad_norm": 1.0418945565542326, "learning_rate": 1.9120383671064577e-05, "loss": 0.5134, "step": 3494 }, { "epoch": 0.16, "grad_norm": 0.4534093265244846, "learning_rate": 1.911977336716514e-05, "loss": 0.2572, "step": 3495 }, { "epoch": 0.16, "grad_norm": 0.5345755583661993, "learning_rate": 1.9119162861362703e-05, "loss": 0.2663, "step": 3496 }, { "epoch": 0.16, "grad_norm": 0.4224818044341632, "learning_rate": 1.9118552153670796e-05, "loss": 0.1854, "step": 3497 }, { "epoch": 0.16, "grad_norm": 0.48274037404054887, "learning_rate": 1.911794124410293e-05, "loss": 0.2899, "step": 3498 }, { "epoch": 0.16, "grad_norm": 0.44682123197430085, "learning_rate": 1.9117330132672633e-05, "loss": 0.3191, "step": 3499 }, { "epoch": 0.16, "grad_norm": 0.8358462328703415, "learning_rate": 1.9116718819393434e-05, "loss": 0.5083, "step": 3500 }, { "epoch": 0.16, "grad_norm": 0.37676259324671696, "learning_rate": 1.9116107304278867e-05, "loss": 0.2359, "step": 3501 }, { "epoch": 0.16, "grad_norm": 0.5242522303620496, "learning_rate": 1.911549558734247e-05, "loss": 0.2884, "step": 3502 }, { "epoch": 0.16, "grad_norm": 0.3322886814798018, "learning_rate": 1.911488366859779e-05, "loss": 0.2728, "step": 3503 }, { "epoch": 0.16, "grad_norm": 0.9102443678886043, "learning_rate": 1.9114271548058365e-05, "loss": 0.6597, "step": 3504 }, { "epoch": 0.16, "grad_norm": 0.4137721630784232, "learning_rate": 1.9113659225737757e-05, "loss": 0.2154, "step": 3505 }, { "epoch": 0.16, "grad_norm": 0.6215459450322264, "learning_rate": 1.9113046701649517e-05, "loss": 0.4272, "step": 3506 }, { "epoch": 0.16, "grad_norm": 0.5743797803037992, "learning_rate": 1.9112433975807204e-05, "loss": 0.3924, "step": 3507 }, { "epoch": 0.16, "grad_norm": 0.3846197950569047, "learning_rate": 1.9111821048224387e-05, "loss": 0.2312, "step": 3508 }, { "epoch": 0.16, "grad_norm": 0.3409559485094247, "learning_rate": 1.9111207918914633e-05, "loss": 0.2005, "step": 3509 }, { "epoch": 0.16, "grad_norm": 0.5229713941797228, "learning_rate": 1.911059458789152e-05, "loss": 0.4135, "step": 3510 }, { "epoch": 0.16, "grad_norm": 0.36540609633968346, "learning_rate": 1.9109981055168624e-05, "loss": 0.2571, "step": 3511 }, { "epoch": 0.16, "grad_norm": 0.8106590512115882, "learning_rate": 1.9109367320759522e-05, "loss": 0.5676, "step": 3512 }, { "epoch": 0.16, "grad_norm": 1.3875298946431176, "learning_rate": 1.910875338467781e-05, "loss": 0.6127, "step": 3513 }, { "epoch": 0.16, "grad_norm": 0.4258125040812, "learning_rate": 1.910813924693708e-05, "loss": 0.299, "step": 3514 }, { "epoch": 0.16, "grad_norm": 0.45367298094079395, "learning_rate": 1.9107524907550922e-05, "loss": 0.2196, "step": 3515 }, { "epoch": 0.16, "grad_norm": 0.42890909475885713, "learning_rate": 1.910691036653294e-05, "loss": 0.3248, "step": 3516 }, { "epoch": 0.16, "grad_norm": 0.455499110905812, "learning_rate": 1.9106295623896744e-05, "loss": 0.3159, "step": 3517 }, { "epoch": 0.16, "grad_norm": 1.2991342100155205, "learning_rate": 1.9105680679655938e-05, "loss": 0.4528, "step": 3518 }, { "epoch": 0.16, "grad_norm": 0.4756550473255202, "learning_rate": 1.9105065533824136e-05, "loss": 0.3887, "step": 3519 }, { "epoch": 0.16, "grad_norm": 0.7340965260393196, "learning_rate": 1.9104450186414963e-05, "loss": 0.4344, "step": 3520 }, { "epoch": 0.16, "grad_norm": 0.35557001322735815, "learning_rate": 1.9103834637442035e-05, "loss": 0.1699, "step": 3521 }, { "epoch": 0.16, "grad_norm": 0.49549043709383356, "learning_rate": 1.9103218886918983e-05, "loss": 0.3834, "step": 3522 }, { "epoch": 0.16, "grad_norm": 0.8288347004829953, "learning_rate": 1.9102602934859437e-05, "loss": 0.3933, "step": 3523 }, { "epoch": 0.16, "grad_norm": 0.481243823133131, "learning_rate": 1.9101986781277037e-05, "loss": 0.2759, "step": 3524 }, { "epoch": 0.16, "grad_norm": 1.0482720217005597, "learning_rate": 1.9101370426185418e-05, "loss": 0.5761, "step": 3525 }, { "epoch": 0.16, "grad_norm": 0.5176275388076008, "learning_rate": 1.9100753869598237e-05, "loss": 0.3192, "step": 3526 }, { "epoch": 0.16, "grad_norm": 0.34008250491417535, "learning_rate": 1.9100137111529135e-05, "loss": 0.2906, "step": 3527 }, { "epoch": 0.16, "grad_norm": 0.6799676695944602, "learning_rate": 1.9099520151991765e-05, "loss": 0.4285, "step": 3528 }, { "epoch": 0.16, "grad_norm": 0.47546095129369154, "learning_rate": 1.909890299099979e-05, "loss": 0.3255, "step": 3529 }, { "epoch": 0.16, "grad_norm": 0.3252836851202552, "learning_rate": 1.9098285628566872e-05, "loss": 0.2619, "step": 3530 }, { "epoch": 0.16, "grad_norm": 0.5159822451330393, "learning_rate": 1.909766806470668e-05, "loss": 0.2857, "step": 3531 }, { "epoch": 0.16, "grad_norm": 0.426437760138583, "learning_rate": 1.9097050299432886e-05, "loss": 0.3026, "step": 3532 }, { "epoch": 0.16, "grad_norm": 0.4684530024818983, "learning_rate": 1.9096432332759167e-05, "loss": 0.2929, "step": 3533 }, { "epoch": 0.16, "grad_norm": 0.4249940951000583, "learning_rate": 1.90958141646992e-05, "loss": 0.3507, "step": 3534 }, { "epoch": 0.16, "grad_norm": 0.4155006261853128, "learning_rate": 1.9095195795266677e-05, "loss": 0.2711, "step": 3535 }, { "epoch": 0.16, "grad_norm": 1.362589731001263, "learning_rate": 1.9094577224475283e-05, "loss": 0.8216, "step": 3536 }, { "epoch": 0.16, "grad_norm": 0.3732635666647341, "learning_rate": 1.9093958452338717e-05, "loss": 0.1632, "step": 3537 }, { "epoch": 0.16, "grad_norm": 0.4733395209874384, "learning_rate": 1.9093339478870675e-05, "loss": 0.3164, "step": 3538 }, { "epoch": 0.16, "grad_norm": 0.40573139968984, "learning_rate": 1.909272030408486e-05, "loss": 0.3534, "step": 3539 }, { "epoch": 0.16, "grad_norm": 0.7161759689409551, "learning_rate": 1.9092100927994982e-05, "loss": 0.483, "step": 3540 }, { "epoch": 0.16, "grad_norm": 0.48984749549176204, "learning_rate": 1.9091481350614753e-05, "loss": 0.1812, "step": 3541 }, { "epoch": 0.16, "grad_norm": 0.34212256922895506, "learning_rate": 1.9090861571957887e-05, "loss": 0.2624, "step": 3542 }, { "epoch": 0.16, "grad_norm": 0.43209077874933327, "learning_rate": 1.909024159203811e-05, "loss": 0.3102, "step": 3543 }, { "epoch": 0.16, "grad_norm": 0.4261400814812236, "learning_rate": 1.908962141086915e-05, "loss": 0.2099, "step": 3544 }, { "epoch": 0.16, "grad_norm": 0.4177690076918309, "learning_rate": 1.9089001028464724e-05, "loss": 0.368, "step": 3545 }, { "epoch": 0.16, "grad_norm": 0.6405738950003872, "learning_rate": 1.908838044483858e-05, "loss": 0.4079, "step": 3546 }, { "epoch": 0.16, "grad_norm": 0.3800116210254458, "learning_rate": 1.9087759660004452e-05, "loss": 0.2426, "step": 3547 }, { "epoch": 0.16, "grad_norm": 0.4106437041733624, "learning_rate": 1.9087138673976086e-05, "loss": 0.2787, "step": 3548 }, { "epoch": 0.16, "grad_norm": 0.5493707968947954, "learning_rate": 1.9086517486767223e-05, "loss": 0.3256, "step": 3549 }, { "epoch": 0.16, "grad_norm": 0.35510871794918764, "learning_rate": 1.9085896098391624e-05, "loss": 0.2515, "step": 3550 }, { "epoch": 0.16, "grad_norm": 0.6256709964613038, "learning_rate": 1.9085274508863043e-05, "loss": 0.406, "step": 3551 }, { "epoch": 0.16, "grad_norm": 0.776056461390364, "learning_rate": 1.9084652718195237e-05, "loss": 0.538, "step": 3552 }, { "epoch": 0.16, "grad_norm": 0.6968350263339173, "learning_rate": 1.908403072640198e-05, "loss": 0.3582, "step": 3553 }, { "epoch": 0.16, "grad_norm": 0.37635818534348725, "learning_rate": 1.9083408533497037e-05, "loss": 0.2567, "step": 3554 }, { "epoch": 0.16, "grad_norm": 0.3236260358672682, "learning_rate": 1.908278613949418e-05, "loss": 0.2421, "step": 3555 }, { "epoch": 0.16, "grad_norm": 0.6384570131214236, "learning_rate": 1.9082163544407198e-05, "loss": 0.3756, "step": 3556 }, { "epoch": 0.16, "grad_norm": 0.40641684592730404, "learning_rate": 1.9081540748249864e-05, "loss": 0.311, "step": 3557 }, { "epoch": 0.16, "grad_norm": 0.4862360695997552, "learning_rate": 1.908091775103597e-05, "loss": 0.3496, "step": 3558 }, { "epoch": 0.16, "grad_norm": 0.8602029369380233, "learning_rate": 1.9080294552779313e-05, "loss": 0.5222, "step": 3559 }, { "epoch": 0.16, "grad_norm": 0.3788894269648852, "learning_rate": 1.9079671153493687e-05, "loss": 0.257, "step": 3560 }, { "epoch": 0.16, "grad_norm": 0.43110993990624596, "learning_rate": 1.907904755319289e-05, "loss": 0.2675, "step": 3561 }, { "epoch": 0.16, "grad_norm": 0.4268204931062618, "learning_rate": 1.9078423751890734e-05, "loss": 0.3406, "step": 3562 }, { "epoch": 0.16, "grad_norm": 0.41412028995200467, "learning_rate": 1.9077799749601024e-05, "loss": 0.2681, "step": 3563 }, { "epoch": 0.16, "grad_norm": 1.6606216242409337, "learning_rate": 1.9077175546337575e-05, "loss": 0.7719, "step": 3564 }, { "epoch": 0.16, "grad_norm": 0.7436470464305392, "learning_rate": 1.907655114211421e-05, "loss": 0.5102, "step": 3565 }, { "epoch": 0.16, "grad_norm": 0.4041107815644691, "learning_rate": 1.907592653694475e-05, "loss": 0.292, "step": 3566 }, { "epoch": 0.16, "grad_norm": 0.2492568307381671, "learning_rate": 1.907530173084302e-05, "loss": 0.1567, "step": 3567 }, { "epoch": 0.16, "grad_norm": 0.6179193626268321, "learning_rate": 1.9074676723822864e-05, "loss": 0.408, "step": 3568 }, { "epoch": 0.16, "grad_norm": 0.504672466144843, "learning_rate": 1.907405151589811e-05, "loss": 0.3128, "step": 3569 }, { "epoch": 0.16, "grad_norm": 0.46512781059897446, "learning_rate": 1.90734261070826e-05, "loss": 0.2629, "step": 3570 }, { "epoch": 0.16, "grad_norm": 0.7054780195795514, "learning_rate": 1.907280049739018e-05, "loss": 0.3951, "step": 3571 }, { "epoch": 0.16, "grad_norm": 0.47315208394519115, "learning_rate": 1.9072174686834703e-05, "loss": 0.3346, "step": 3572 }, { "epoch": 0.16, "grad_norm": 0.7189326714559875, "learning_rate": 1.9071548675430018e-05, "loss": 0.4339, "step": 3573 }, { "epoch": 0.16, "grad_norm": 0.3447179161207031, "learning_rate": 1.9070922463189993e-05, "loss": 0.2593, "step": 3574 }, { "epoch": 0.16, "grad_norm": 0.38130961293646937, "learning_rate": 1.9070296050128486e-05, "loss": 0.2497, "step": 3575 }, { "epoch": 0.16, "grad_norm": 0.8425483811393244, "learning_rate": 1.906966943625937e-05, "loss": 0.4708, "step": 3576 }, { "epoch": 0.16, "grad_norm": 0.9982821552480875, "learning_rate": 1.906904262159651e-05, "loss": 0.5133, "step": 3577 }, { "epoch": 0.16, "grad_norm": 0.35449776971243824, "learning_rate": 1.906841560615379e-05, "loss": 0.3014, "step": 3578 }, { "epoch": 0.16, "grad_norm": 0.7820312405111502, "learning_rate": 1.9067788389945083e-05, "loss": 0.494, "step": 3579 }, { "epoch": 0.16, "grad_norm": 0.3380813604525425, "learning_rate": 1.9067160972984283e-05, "loss": 0.0794, "step": 3580 }, { "epoch": 0.16, "grad_norm": 0.3862253788321075, "learning_rate": 1.906653335528528e-05, "loss": 0.2648, "step": 3581 }, { "epoch": 0.16, "grad_norm": 0.4780269964938683, "learning_rate": 1.9065905536861967e-05, "loss": 0.3483, "step": 3582 }, { "epoch": 0.16, "grad_norm": 0.5801806308924962, "learning_rate": 1.9065277517728244e-05, "loss": 0.3385, "step": 3583 }, { "epoch": 0.16, "grad_norm": 0.4345445178658295, "learning_rate": 1.9064649297898016e-05, "loss": 0.3254, "step": 3584 }, { "epoch": 0.16, "grad_norm": 0.7853055884869111, "learning_rate": 1.9064020877385184e-05, "loss": 0.4993, "step": 3585 }, { "epoch": 0.16, "grad_norm": 0.3297062011323884, "learning_rate": 1.9063392256203668e-05, "loss": 0.2008, "step": 3586 }, { "epoch": 0.16, "grad_norm": 0.46474404208477543, "learning_rate": 1.9062763434367384e-05, "loss": 0.3087, "step": 3587 }, { "epoch": 0.16, "grad_norm": 0.5215583393876929, "learning_rate": 1.906213441189025e-05, "loss": 0.3033, "step": 3588 }, { "epoch": 0.16, "grad_norm": 0.38592173162921733, "learning_rate": 1.9061505188786196e-05, "loss": 0.263, "step": 3589 }, { "epoch": 0.16, "grad_norm": 0.42886632720046736, "learning_rate": 1.9060875765069148e-05, "loss": 0.3275, "step": 3590 }, { "epoch": 0.16, "grad_norm": 1.000636545965187, "learning_rate": 1.9060246140753047e-05, "loss": 0.4981, "step": 3591 }, { "epoch": 0.17, "grad_norm": 0.42929827215701666, "learning_rate": 1.9059616315851827e-05, "loss": 0.2574, "step": 3592 }, { "epoch": 0.17, "grad_norm": 0.3188737702757787, "learning_rate": 1.9058986290379432e-05, "loss": 0.1808, "step": 3593 }, { "epoch": 0.17, "grad_norm": 0.4732152715317135, "learning_rate": 1.9058356064349818e-05, "loss": 0.3413, "step": 3594 }, { "epoch": 0.17, "grad_norm": 0.9192125721587081, "learning_rate": 1.9057725637776924e-05, "loss": 0.5591, "step": 3595 }, { "epoch": 0.17, "grad_norm": 0.4184446728067903, "learning_rate": 1.9057095010674716e-05, "loss": 0.271, "step": 3596 }, { "epoch": 0.17, "grad_norm": 0.4888435262433435, "learning_rate": 1.9056464183057157e-05, "loss": 0.4041, "step": 3597 }, { "epoch": 0.17, "grad_norm": 0.5630761644754007, "learning_rate": 1.9055833154938208e-05, "loss": 0.365, "step": 3598 }, { "epoch": 0.17, "grad_norm": 0.24796852044454393, "learning_rate": 1.9055201926331843e-05, "loss": 0.159, "step": 3599 }, { "epoch": 0.17, "grad_norm": 1.4934362222001998, "learning_rate": 1.9054570497252033e-05, "loss": 0.7869, "step": 3600 }, { "epoch": 0.17, "grad_norm": 0.6271211164870093, "learning_rate": 1.9053938867712756e-05, "loss": 0.4057, "step": 3601 }, { "epoch": 0.17, "grad_norm": 0.33537383736379334, "learning_rate": 1.9053307037728005e-05, "loss": 0.2546, "step": 3602 }, { "epoch": 0.17, "grad_norm": 1.1058377418057972, "learning_rate": 1.9052675007311757e-05, "loss": 0.594, "step": 3603 }, { "epoch": 0.17, "grad_norm": 0.46280358685002165, "learning_rate": 1.905204277647801e-05, "loss": 0.2907, "step": 3604 }, { "epoch": 0.17, "grad_norm": 0.37174055584584664, "learning_rate": 1.9051410345240762e-05, "loss": 0.2769, "step": 3605 }, { "epoch": 0.17, "grad_norm": 0.3267902390276805, "learning_rate": 1.905077771361401e-05, "loss": 0.2267, "step": 3606 }, { "epoch": 0.17, "grad_norm": 0.7727724927058522, "learning_rate": 1.9050144881611766e-05, "loss": 0.492, "step": 3607 }, { "epoch": 0.17, "grad_norm": 0.518280909747992, "learning_rate": 1.9049511849248038e-05, "loss": 0.3186, "step": 3608 }, { "epoch": 0.17, "grad_norm": 0.40515053873025125, "learning_rate": 1.9048878616536837e-05, "loss": 0.2929, "step": 3609 }, { "epoch": 0.17, "grad_norm": 0.4648485607367147, "learning_rate": 1.9048245183492183e-05, "loss": 0.3139, "step": 3610 }, { "epoch": 0.17, "grad_norm": 0.4494859829406393, "learning_rate": 1.9047611550128103e-05, "loss": 0.3202, "step": 3611 }, { "epoch": 0.17, "grad_norm": 0.5285339484543778, "learning_rate": 1.9046977716458627e-05, "loss": 0.2604, "step": 3612 }, { "epoch": 0.17, "grad_norm": 0.551302806007306, "learning_rate": 1.9046343682497782e-05, "loss": 0.4146, "step": 3613 }, { "epoch": 0.17, "grad_norm": 0.36153224534077544, "learning_rate": 1.9045709448259605e-05, "loss": 0.265, "step": 3614 }, { "epoch": 0.17, "grad_norm": 0.3812848254682872, "learning_rate": 1.904507501375814e-05, "loss": 0.2146, "step": 3615 }, { "epoch": 0.17, "grad_norm": 1.2345076073027008, "learning_rate": 1.9044440379007433e-05, "loss": 0.6762, "step": 3616 }, { "epoch": 0.17, "grad_norm": 0.31548109578694955, "learning_rate": 1.9043805544021533e-05, "loss": 0.2256, "step": 3617 }, { "epoch": 0.17, "grad_norm": 0.4729073558754234, "learning_rate": 1.9043170508814493e-05, "loss": 0.351, "step": 3618 }, { "epoch": 0.17, "grad_norm": 0.6990092133802741, "learning_rate": 1.9042535273400377e-05, "loss": 0.4435, "step": 3619 }, { "epoch": 0.17, "grad_norm": 0.4181696480935202, "learning_rate": 1.904189983779324e-05, "loss": 0.2916, "step": 3620 }, { "epoch": 0.17, "grad_norm": 0.5446886838396515, "learning_rate": 1.9041264202007158e-05, "loss": 0.3072, "step": 3621 }, { "epoch": 0.17, "grad_norm": 0.44333955207391407, "learning_rate": 1.9040628366056203e-05, "loss": 0.2916, "step": 3622 }, { "epoch": 0.17, "grad_norm": 0.4053059276584727, "learning_rate": 1.903999232995445e-05, "loss": 0.305, "step": 3623 }, { "epoch": 0.17, "grad_norm": 0.7694280759508905, "learning_rate": 1.9039356093715975e-05, "loss": 0.5049, "step": 3624 }, { "epoch": 0.17, "grad_norm": 0.4229212744386553, "learning_rate": 1.903871965735487e-05, "loss": 0.3236, "step": 3625 }, { "epoch": 0.17, "grad_norm": 0.4928128434745314, "learning_rate": 1.9038083020885224e-05, "loss": 0.2895, "step": 3626 }, { "epoch": 0.17, "grad_norm": 0.3265285220116368, "learning_rate": 1.9037446184321133e-05, "loss": 0.2367, "step": 3627 }, { "epoch": 0.17, "grad_norm": 1.2869534349387113, "learning_rate": 1.9036809147676693e-05, "loss": 0.3916, "step": 3628 }, { "epoch": 0.17, "grad_norm": 0.47749848407116285, "learning_rate": 1.9036171910966005e-05, "loss": 0.2833, "step": 3629 }, { "epoch": 0.17, "grad_norm": 0.4088996013471531, "learning_rate": 1.9035534474203183e-05, "loss": 0.3243, "step": 3630 }, { "epoch": 0.17, "grad_norm": 0.7890569422241258, "learning_rate": 1.9034896837402334e-05, "loss": 0.5587, "step": 3631 }, { "epoch": 0.17, "grad_norm": 0.39812506377254775, "learning_rate": 1.903425900057758e-05, "loss": 0.2378, "step": 3632 }, { "epoch": 0.17, "grad_norm": 0.3656773819609009, "learning_rate": 1.9033620963743037e-05, "loss": 0.2394, "step": 3633 }, { "epoch": 0.17, "grad_norm": 2.643147033178939, "learning_rate": 1.9032982726912833e-05, "loss": 0.6897, "step": 3634 }, { "epoch": 0.17, "grad_norm": 0.39306352171887404, "learning_rate": 1.9032344290101098e-05, "loss": 0.2279, "step": 3635 }, { "epoch": 0.17, "grad_norm": 0.8084941610203455, "learning_rate": 1.9031705653321967e-05, "loss": 0.5142, "step": 3636 }, { "epoch": 0.17, "grad_norm": 0.4312529594105459, "learning_rate": 1.9031066816589575e-05, "loss": 0.3485, "step": 3637 }, { "epoch": 0.17, "grad_norm": 2.599628694406278, "learning_rate": 1.9030427779918072e-05, "loss": 0.2689, "step": 3638 }, { "epoch": 0.17, "grad_norm": 1.2895489892850096, "learning_rate": 1.90297885433216e-05, "loss": 0.1837, "step": 3639 }, { "epoch": 0.17, "grad_norm": 1.087871744668784, "learning_rate": 1.902914910681431e-05, "loss": 0.565, "step": 3640 }, { "epoch": 0.17, "grad_norm": 1.0998215245215566, "learning_rate": 1.9028509470410363e-05, "loss": 0.3347, "step": 3641 }, { "epoch": 0.17, "grad_norm": 0.44266237041915735, "learning_rate": 1.9027869634123918e-05, "loss": 0.3116, "step": 3642 }, { "epoch": 0.17, "grad_norm": 0.9624466259785366, "learning_rate": 1.902722959796914e-05, "loss": 0.6728, "step": 3643 }, { "epoch": 0.17, "grad_norm": 2.1412342745639976, "learning_rate": 1.90265893619602e-05, "loss": 0.4162, "step": 3644 }, { "epoch": 0.17, "grad_norm": 0.5096362669807545, "learning_rate": 1.902594892611127e-05, "loss": 0.2385, "step": 3645 }, { "epoch": 0.17, "grad_norm": 2.0974737301163393, "learning_rate": 1.902530829043653e-05, "loss": 0.3994, "step": 3646 }, { "epoch": 0.17, "grad_norm": 4.640916146038519, "learning_rate": 1.9024667454950165e-05, "loss": 0.3906, "step": 3647 }, { "epoch": 0.17, "grad_norm": 0.6593392264993503, "learning_rate": 1.902402641966636e-05, "loss": 0.3145, "step": 3648 }, { "epoch": 0.17, "grad_norm": 0.733442156835296, "learning_rate": 1.9023385184599308e-05, "loss": 0.3593, "step": 3649 }, { "epoch": 0.17, "grad_norm": 2.1011254509141817, "learning_rate": 1.9022743749763205e-05, "loss": 0.3722, "step": 3650 }, { "epoch": 0.17, "grad_norm": 5.13940781479072, "learning_rate": 1.902210211517225e-05, "loss": 0.2424, "step": 3651 }, { "epoch": 0.17, "grad_norm": 0.9980933983271467, "learning_rate": 1.902146028084065e-05, "loss": 0.546, "step": 3652 }, { "epoch": 0.17, "grad_norm": 0.6194413557929157, "learning_rate": 1.9020818246782614e-05, "loss": 0.3113, "step": 3653 }, { "epoch": 0.17, "grad_norm": 0.6029196987120017, "learning_rate": 1.902017601301236e-05, "loss": 0.3381, "step": 3654 }, { "epoch": 0.17, "grad_norm": 10.300518900494996, "learning_rate": 1.90195335795441e-05, "loss": 0.5999, "step": 3655 }, { "epoch": 0.17, "grad_norm": 1.8183861237381742, "learning_rate": 1.901889094639206e-05, "loss": 0.3865, "step": 3656 }, { "epoch": 0.17, "grad_norm": 0.8968419988136566, "learning_rate": 1.9018248113570467e-05, "loss": 0.3532, "step": 3657 }, { "epoch": 0.17, "grad_norm": 1.6323725567344978, "learning_rate": 1.901760508109355e-05, "loss": 0.2662, "step": 3658 }, { "epoch": 0.17, "grad_norm": 0.6921417386206159, "learning_rate": 1.9016961848975554e-05, "loss": 0.3472, "step": 3659 }, { "epoch": 0.17, "grad_norm": 0.8121433824324087, "learning_rate": 1.901631841723071e-05, "loss": 0.4131, "step": 3660 }, { "epoch": 0.17, "grad_norm": 1.0053874535722547, "learning_rate": 1.9015674785873262e-05, "loss": 0.3475, "step": 3661 }, { "epoch": 0.17, "grad_norm": 1.5901095939584549, "learning_rate": 1.901503095491747e-05, "loss": 0.3862, "step": 3662 }, { "epoch": 0.17, "grad_norm": 1.2729058474439323, "learning_rate": 1.9014386924377583e-05, "loss": 0.3844, "step": 3663 }, { "epoch": 0.17, "grad_norm": 0.7690873668403949, "learning_rate": 1.901374269426785e-05, "loss": 0.325, "step": 3664 }, { "epoch": 0.17, "grad_norm": 1.4446244583011774, "learning_rate": 1.9013098264602546e-05, "loss": 0.3943, "step": 3665 }, { "epoch": 0.17, "grad_norm": 0.5810935057751412, "learning_rate": 1.901245363539593e-05, "loss": 0.2938, "step": 3666 }, { "epoch": 0.17, "grad_norm": 1.9797060112978142, "learning_rate": 1.901180880666228e-05, "loss": 0.9236, "step": 3667 }, { "epoch": 0.17, "grad_norm": 0.6829773557541724, "learning_rate": 1.901116377841587e-05, "loss": 0.1498, "step": 3668 }, { "epoch": 0.17, "grad_norm": 1.0244203633578972, "learning_rate": 1.9010518550670976e-05, "loss": 0.335, "step": 3669 }, { "epoch": 0.17, "grad_norm": 1.2171900333239536, "learning_rate": 1.9009873123441888e-05, "loss": 0.4846, "step": 3670 }, { "epoch": 0.17, "grad_norm": 0.6489798659947853, "learning_rate": 1.900922749674289e-05, "loss": 0.1805, "step": 3671 }, { "epoch": 0.17, "grad_norm": 0.9612107847301576, "learning_rate": 1.900858167058828e-05, "loss": 0.3967, "step": 3672 }, { "epoch": 0.17, "grad_norm": 0.6960731937377788, "learning_rate": 1.9007935644992357e-05, "loss": 0.373, "step": 3673 }, { "epoch": 0.17, "grad_norm": 0.5099680196369142, "learning_rate": 1.9007289419969418e-05, "loss": 0.2442, "step": 3674 }, { "epoch": 0.17, "grad_norm": 0.7771283130293353, "learning_rate": 1.9006642995533774e-05, "loss": 0.4185, "step": 3675 }, { "epoch": 0.17, "grad_norm": 0.8646403974277469, "learning_rate": 1.900599637169973e-05, "loss": 0.4995, "step": 3676 }, { "epoch": 0.17, "grad_norm": 0.6510585997859475, "learning_rate": 1.9005349548481612e-05, "loss": 0.2154, "step": 3677 }, { "epoch": 0.17, "grad_norm": 0.6310483037752359, "learning_rate": 1.900470252589373e-05, "loss": 0.2444, "step": 3678 }, { "epoch": 0.17, "grad_norm": 1.167449208825793, "learning_rate": 1.900405530395042e-05, "loss": 0.7863, "step": 3679 }, { "epoch": 0.17, "grad_norm": 1.118863241997588, "learning_rate": 1.9003407882665998e-05, "loss": 0.5513, "step": 3680 }, { "epoch": 0.17, "grad_norm": 0.5547192715644842, "learning_rate": 1.9002760262054803e-05, "loss": 0.2657, "step": 3681 }, { "epoch": 0.17, "grad_norm": 0.6968001941131859, "learning_rate": 1.9002112442131176e-05, "loss": 0.5383, "step": 3682 }, { "epoch": 0.17, "grad_norm": 0.6571420225621227, "learning_rate": 1.900146442290945e-05, "loss": 0.1766, "step": 3683 }, { "epoch": 0.17, "grad_norm": 0.5429096860541407, "learning_rate": 1.9000816204403985e-05, "loss": 0.2802, "step": 3684 }, { "epoch": 0.17, "grad_norm": 0.5502346063745076, "learning_rate": 1.900016778662912e-05, "loss": 0.3408, "step": 3685 }, { "epoch": 0.17, "grad_norm": 0.8931061630021733, "learning_rate": 1.8999519169599217e-05, "loss": 0.4635, "step": 3686 }, { "epoch": 0.17, "grad_norm": 0.43906256365063023, "learning_rate": 1.899887035332863e-05, "loss": 0.323, "step": 3687 }, { "epoch": 0.17, "grad_norm": 2.000663032252841, "learning_rate": 1.8998221337831726e-05, "loss": 0.8754, "step": 3688 }, { "epoch": 0.17, "grad_norm": 0.440839046465537, "learning_rate": 1.899757212312288e-05, "loss": 0.2802, "step": 3689 }, { "epoch": 0.17, "grad_norm": 0.4263753706872294, "learning_rate": 1.8996922709216456e-05, "loss": 0.1935, "step": 3690 }, { "epoch": 0.17, "grad_norm": 1.4325165553682218, "learning_rate": 1.8996273096126834e-05, "loss": 0.8307, "step": 3691 }, { "epoch": 0.17, "grad_norm": 0.9189253696946754, "learning_rate": 1.8995623283868396e-05, "loss": 0.4023, "step": 3692 }, { "epoch": 0.17, "grad_norm": 0.5549957230873905, "learning_rate": 1.8994973272455527e-05, "loss": 0.3206, "step": 3693 }, { "epoch": 0.17, "grad_norm": 0.46522126407074005, "learning_rate": 1.8994323061902623e-05, "loss": 0.3439, "step": 3694 }, { "epoch": 0.17, "grad_norm": 0.4977730831598749, "learning_rate": 1.8993672652224074e-05, "loss": 0.2276, "step": 3695 }, { "epoch": 0.17, "grad_norm": 0.39686913329361523, "learning_rate": 1.899302204343428e-05, "loss": 0.266, "step": 3696 }, { "epoch": 0.17, "grad_norm": 0.6202783335228893, "learning_rate": 1.8992371235547647e-05, "loss": 0.3211, "step": 3697 }, { "epoch": 0.17, "grad_norm": 1.1883678431577398, "learning_rate": 1.899172022857858e-05, "loss": 0.549, "step": 3698 }, { "epoch": 0.17, "grad_norm": 0.4235299168201256, "learning_rate": 1.8991069022541495e-05, "loss": 0.3161, "step": 3699 }, { "epoch": 0.17, "grad_norm": 0.4434834887863107, "learning_rate": 1.8990417617450806e-05, "loss": 0.277, "step": 3700 }, { "epoch": 0.17, "grad_norm": 0.5024827734215425, "learning_rate": 1.8989766013320938e-05, "loss": 0.2186, "step": 3701 }, { "epoch": 0.17, "grad_norm": 0.4047567569233461, "learning_rate": 1.8989114210166312e-05, "loss": 0.2381, "step": 3702 }, { "epoch": 0.17, "grad_norm": 1.616415806603672, "learning_rate": 1.8988462208001363e-05, "loss": 0.4627, "step": 3703 }, { "epoch": 0.17, "grad_norm": 0.6845391748682544, "learning_rate": 1.8987810006840525e-05, "loss": 0.3676, "step": 3704 }, { "epoch": 0.17, "grad_norm": 0.4026300469286597, "learning_rate": 1.8987157606698234e-05, "loss": 0.2959, "step": 3705 }, { "epoch": 0.17, "grad_norm": 1.3782966715533436, "learning_rate": 1.8986505007588936e-05, "loss": 0.6863, "step": 3706 }, { "epoch": 0.17, "grad_norm": 0.42262451196313494, "learning_rate": 1.898585220952708e-05, "loss": 0.206, "step": 3707 }, { "epoch": 0.17, "grad_norm": 0.5353271489538961, "learning_rate": 1.8985199212527115e-05, "loss": 0.3356, "step": 3708 }, { "epoch": 0.17, "grad_norm": 0.7424185279194552, "learning_rate": 1.8984546016603503e-05, "loss": 0.384, "step": 3709 }, { "epoch": 0.17, "grad_norm": 1.4662784354012395, "learning_rate": 1.89838926217707e-05, "loss": 0.399, "step": 3710 }, { "epoch": 0.17, "grad_norm": 0.4676236451135859, "learning_rate": 1.898323902804317e-05, "loss": 0.3036, "step": 3711 }, { "epoch": 0.17, "grad_norm": 0.3998719552726794, "learning_rate": 1.8982585235435385e-05, "loss": 0.2779, "step": 3712 }, { "epoch": 0.17, "grad_norm": 0.4791024924881441, "learning_rate": 1.8981931243961823e-05, "loss": 0.2139, "step": 3713 }, { "epoch": 0.17, "grad_norm": 0.40480283423769126, "learning_rate": 1.8981277053636963e-05, "loss": 0.2267, "step": 3714 }, { "epoch": 0.17, "grad_norm": 1.395365682942663, "learning_rate": 1.898062266447528e-05, "loss": 0.4894, "step": 3715 }, { "epoch": 0.17, "grad_norm": 0.5614093371067592, "learning_rate": 1.8979968076491273e-05, "loss": 0.3111, "step": 3716 }, { "epoch": 0.17, "grad_norm": 0.46041510522170354, "learning_rate": 1.8979313289699422e-05, "loss": 0.3126, "step": 3717 }, { "epoch": 0.17, "grad_norm": 0.6571036633173082, "learning_rate": 1.8978658304114234e-05, "loss": 0.36, "step": 3718 }, { "epoch": 0.17, "grad_norm": 1.8866654247788257, "learning_rate": 1.8978003119750203e-05, "loss": 0.7089, "step": 3719 }, { "epoch": 0.17, "grad_norm": 0.42528689593962826, "learning_rate": 1.8977347736621834e-05, "loss": 0.2556, "step": 3720 }, { "epoch": 0.17, "grad_norm": 0.565851674040267, "learning_rate": 1.8976692154743638e-05, "loss": 0.373, "step": 3721 }, { "epoch": 0.17, "grad_norm": 1.4838162471382552, "learning_rate": 1.8976036374130134e-05, "loss": 0.5963, "step": 3722 }, { "epoch": 0.17, "grad_norm": 0.29957076529803106, "learning_rate": 1.8975380394795833e-05, "loss": 0.1552, "step": 3723 }, { "epoch": 0.17, "grad_norm": 1.858469939406834, "learning_rate": 1.8974724216755262e-05, "loss": 0.7371, "step": 3724 }, { "epoch": 0.17, "grad_norm": 0.4807925018011489, "learning_rate": 1.8974067840022945e-05, "loss": 0.3649, "step": 3725 }, { "epoch": 0.17, "grad_norm": 0.3858511358962396, "learning_rate": 1.897341126461342e-05, "loss": 0.2759, "step": 3726 }, { "epoch": 0.17, "grad_norm": 1.6854265381497475, "learning_rate": 1.897275449054121e-05, "loss": 0.5761, "step": 3727 }, { "epoch": 0.17, "grad_norm": 0.5749659330347707, "learning_rate": 1.8972097517820873e-05, "loss": 0.3638, "step": 3728 }, { "epoch": 0.17, "grad_norm": 0.3751925093523235, "learning_rate": 1.8971440346466937e-05, "loss": 0.21, "step": 3729 }, { "epoch": 0.17, "grad_norm": 0.3891359935281408, "learning_rate": 1.897078297649396e-05, "loss": 0.2091, "step": 3730 }, { "epoch": 0.17, "grad_norm": 1.064027229087353, "learning_rate": 1.8970125407916497e-05, "loss": 0.5092, "step": 3731 }, { "epoch": 0.17, "grad_norm": 0.4922246591920602, "learning_rate": 1.89694676407491e-05, "loss": 0.3039, "step": 3732 }, { "epoch": 0.17, "grad_norm": 0.41657869002765374, "learning_rate": 1.8968809675006334e-05, "loss": 0.3078, "step": 3733 }, { "epoch": 0.17, "grad_norm": 1.2776787780400445, "learning_rate": 1.896815151070277e-05, "loss": 0.7771, "step": 3734 }, { "epoch": 0.17, "grad_norm": 0.39400485456716355, "learning_rate": 1.8967493147852974e-05, "loss": 0.3021, "step": 3735 }, { "epoch": 0.17, "grad_norm": 0.3494657797789317, "learning_rate": 1.8966834586471517e-05, "loss": 0.1948, "step": 3736 }, { "epoch": 0.17, "grad_norm": 1.089432436252414, "learning_rate": 1.896617582657299e-05, "loss": 0.5613, "step": 3737 }, { "epoch": 0.17, "grad_norm": 0.4486100407780712, "learning_rate": 1.8965516868171973e-05, "loss": 0.3441, "step": 3738 }, { "epoch": 0.17, "grad_norm": 0.7203657736566297, "learning_rate": 1.896485771128305e-05, "loss": 0.4487, "step": 3739 }, { "epoch": 0.17, "grad_norm": 0.4852045214703237, "learning_rate": 1.896419835592082e-05, "loss": 0.3736, "step": 3740 }, { "epoch": 0.17, "grad_norm": 0.46394766813715893, "learning_rate": 1.8963538802099875e-05, "loss": 0.3125, "step": 3741 }, { "epoch": 0.17, "grad_norm": 0.4394214497636867, "learning_rate": 1.8962879049834825e-05, "loss": 0.1049, "step": 3742 }, { "epoch": 0.17, "grad_norm": 1.0139946835718965, "learning_rate": 1.8962219099140268e-05, "loss": 0.4577, "step": 3743 }, { "epoch": 0.17, "grad_norm": 0.404295228616198, "learning_rate": 1.896155895003082e-05, "loss": 0.2854, "step": 3744 }, { "epoch": 0.17, "grad_norm": 0.6598352667598504, "learning_rate": 1.8960898602521093e-05, "loss": 0.4018, "step": 3745 }, { "epoch": 0.17, "grad_norm": 0.9699982477864619, "learning_rate": 1.8960238056625707e-05, "loss": 0.5274, "step": 3746 }, { "epoch": 0.17, "grad_norm": 0.49111293582629667, "learning_rate": 1.8959577312359287e-05, "loss": 0.2835, "step": 3747 }, { "epoch": 0.17, "grad_norm": 0.30084833323717486, "learning_rate": 1.895891636973646e-05, "loss": 0.2266, "step": 3748 }, { "epoch": 0.17, "grad_norm": 0.5663267396998604, "learning_rate": 1.895825522877186e-05, "loss": 0.373, "step": 3749 }, { "epoch": 0.17, "grad_norm": 0.5180450970564421, "learning_rate": 1.8957593889480127e-05, "loss": 0.3266, "step": 3750 }, { "epoch": 0.17, "grad_norm": 0.6340330213163782, "learning_rate": 1.8956932351875893e-05, "loss": 0.409, "step": 3751 }, { "epoch": 0.17, "grad_norm": 0.47712638623116027, "learning_rate": 1.895627061597381e-05, "loss": 0.2895, "step": 3752 }, { "epoch": 0.17, "grad_norm": 0.4533859551426609, "learning_rate": 1.8955608681788532e-05, "loss": 0.2782, "step": 3753 }, { "epoch": 0.17, "grad_norm": 0.3021569639092531, "learning_rate": 1.8954946549334707e-05, "loss": 0.2485, "step": 3754 }, { "epoch": 0.17, "grad_norm": 0.5965317285807341, "learning_rate": 1.8954284218626995e-05, "loss": 0.4855, "step": 3755 }, { "epoch": 0.17, "grad_norm": 0.34036434232534696, "learning_rate": 1.895362168968006e-05, "loss": 0.2222, "step": 3756 }, { "epoch": 0.17, "grad_norm": 0.5941958584774611, "learning_rate": 1.895295896250857e-05, "loss": 0.4021, "step": 3757 }, { "epoch": 0.17, "grad_norm": 1.7159526561327243, "learning_rate": 1.89522960371272e-05, "loss": 0.8156, "step": 3758 }, { "epoch": 0.17, "grad_norm": 0.45745800275554843, "learning_rate": 1.8951632913550625e-05, "loss": 0.2111, "step": 3759 }, { "epoch": 0.17, "grad_norm": 0.48009331366598, "learning_rate": 1.895096959179352e-05, "loss": 0.3866, "step": 3760 }, { "epoch": 0.17, "grad_norm": 0.4238114569873109, "learning_rate": 1.8950306071870583e-05, "loss": 0.3381, "step": 3761 }, { "epoch": 0.17, "grad_norm": 0.33041634711102796, "learning_rate": 1.894964235379649e-05, "loss": 0.1903, "step": 3762 }, { "epoch": 0.17, "grad_norm": 1.1525106016945428, "learning_rate": 1.8948978437585946e-05, "loss": 0.7198, "step": 3763 }, { "epoch": 0.17, "grad_norm": 0.4729686781775277, "learning_rate": 1.894831432325364e-05, "loss": 0.376, "step": 3764 }, { "epoch": 0.17, "grad_norm": 0.3300459819102284, "learning_rate": 1.894765001081428e-05, "loss": 0.0855, "step": 3765 }, { "epoch": 0.17, "grad_norm": 0.4431413127446698, "learning_rate": 1.8946985500282574e-05, "loss": 0.3642, "step": 3766 }, { "epoch": 0.17, "grad_norm": 0.30472923790560014, "learning_rate": 1.8946320791673232e-05, "loss": 0.2598, "step": 3767 }, { "epoch": 0.17, "grad_norm": 0.4798823303764402, "learning_rate": 1.894565588500097e-05, "loss": 0.3291, "step": 3768 }, { "epoch": 0.17, "grad_norm": 0.4142473003065064, "learning_rate": 1.894499078028051e-05, "loss": 0.2792, "step": 3769 }, { "epoch": 0.17, "grad_norm": 1.2834734537805503, "learning_rate": 1.8944325477526573e-05, "loss": 0.7206, "step": 3770 }, { "epoch": 0.17, "grad_norm": 0.5993293374844946, "learning_rate": 1.894365997675389e-05, "loss": 0.3461, "step": 3771 }, { "epoch": 0.17, "grad_norm": 0.3529961841380468, "learning_rate": 1.8942994277977197e-05, "loss": 0.2848, "step": 3772 }, { "epoch": 0.17, "grad_norm": 0.7505628723974592, "learning_rate": 1.894232838121123e-05, "loss": 0.5152, "step": 3773 }, { "epoch": 0.17, "grad_norm": 0.29326615637552883, "learning_rate": 1.894166228647073e-05, "loss": 0.1495, "step": 3774 }, { "epoch": 0.17, "grad_norm": 0.5303874621499461, "learning_rate": 1.8940995993770448e-05, "loss": 0.308, "step": 3775 }, { "epoch": 0.17, "grad_norm": 0.6100775975676903, "learning_rate": 1.894032950312513e-05, "loss": 0.3779, "step": 3776 }, { "epoch": 0.17, "grad_norm": 0.5380217322695732, "learning_rate": 1.8939662814549532e-05, "loss": 0.3409, "step": 3777 }, { "epoch": 0.17, "grad_norm": 0.5186684794969783, "learning_rate": 1.8938995928058417e-05, "loss": 0.3427, "step": 3778 }, { "epoch": 0.17, "grad_norm": 0.3438986406149454, "learning_rate": 1.8938328843666548e-05, "loss": 0.2101, "step": 3779 }, { "epoch": 0.17, "grad_norm": 0.3621507198293156, "learning_rate": 1.8937661561388694e-05, "loss": 0.2961, "step": 3780 }, { "epoch": 0.17, "grad_norm": 0.6170979497152046, "learning_rate": 1.8936994081239627e-05, "loss": 0.3863, "step": 3781 }, { "epoch": 0.17, "grad_norm": 0.7438517089597998, "learning_rate": 1.8936326403234125e-05, "loss": 0.4586, "step": 3782 }, { "epoch": 0.17, "grad_norm": 0.5604726344443632, "learning_rate": 1.893565852738697e-05, "loss": 0.4279, "step": 3783 }, { "epoch": 0.17, "grad_norm": 0.4244975284524602, "learning_rate": 1.8934990453712944e-05, "loss": 0.3269, "step": 3784 }, { "epoch": 0.17, "grad_norm": 0.39896152818694464, "learning_rate": 1.8934322182226843e-05, "loss": 0.2274, "step": 3785 }, { "epoch": 0.17, "grad_norm": 0.412941709907691, "learning_rate": 1.8933653712943457e-05, "loss": 0.2226, "step": 3786 }, { "epoch": 0.17, "grad_norm": 0.43685101865741244, "learning_rate": 1.8932985045877594e-05, "loss": 0.308, "step": 3787 }, { "epoch": 0.17, "grad_norm": 0.4610269608574551, "learning_rate": 1.893231618104405e-05, "loss": 0.3087, "step": 3788 }, { "epoch": 0.17, "grad_norm": 0.6830710206289271, "learning_rate": 1.893164711845763e-05, "loss": 0.3556, "step": 3789 }, { "epoch": 0.17, "grad_norm": 0.42993448675483253, "learning_rate": 1.8930977858133157e-05, "loss": 0.3573, "step": 3790 }, { "epoch": 0.17, "grad_norm": 0.9703143784984662, "learning_rate": 1.893030840008544e-05, "loss": 0.2729, "step": 3791 }, { "epoch": 0.17, "grad_norm": 0.29601278255293695, "learning_rate": 1.89296387443293e-05, "loss": 0.2396, "step": 3792 }, { "epoch": 0.17, "grad_norm": 0.5202364884330402, "learning_rate": 1.8928968890879567e-05, "loss": 0.3816, "step": 3793 }, { "epoch": 0.17, "grad_norm": 1.4523684326657587, "learning_rate": 1.892829883975107e-05, "loss": 0.524, "step": 3794 }, { "epoch": 0.17, "grad_norm": 0.34641037648278644, "learning_rate": 1.8927628590958643e-05, "loss": 0.2271, "step": 3795 }, { "epoch": 0.17, "grad_norm": 0.3838078551354471, "learning_rate": 1.8926958144517118e-05, "loss": 0.33, "step": 3796 }, { "epoch": 0.17, "grad_norm": 1.167711146671076, "learning_rate": 1.8926287500441346e-05, "loss": 0.6616, "step": 3797 }, { "epoch": 0.17, "grad_norm": 0.33171824424706625, "learning_rate": 1.8925616658746174e-05, "loss": 0.1562, "step": 3798 }, { "epoch": 0.17, "grad_norm": 0.6671980643214174, "learning_rate": 1.8924945619446452e-05, "loss": 0.4283, "step": 3799 }, { "epoch": 0.17, "grad_norm": 0.5880208623248997, "learning_rate": 1.8924274382557033e-05, "loss": 0.3771, "step": 3800 }, { "epoch": 0.17, "grad_norm": 0.5875173546201404, "learning_rate": 1.8923602948092782e-05, "loss": 0.2291, "step": 3801 }, { "epoch": 0.17, "grad_norm": 0.4422039565180075, "learning_rate": 1.892293131606856e-05, "loss": 0.3301, "step": 3802 }, { "epoch": 0.17, "grad_norm": 0.5016023794249276, "learning_rate": 1.8922259486499243e-05, "loss": 0.3912, "step": 3803 }, { "epoch": 0.17, "grad_norm": 0.43038263397043336, "learning_rate": 1.8921587459399696e-05, "loss": 0.2512, "step": 3804 }, { "epoch": 0.17, "grad_norm": 0.43277972727728126, "learning_rate": 1.8920915234784805e-05, "loss": 0.3244, "step": 3805 }, { "epoch": 0.17, "grad_norm": 0.8482089424796287, "learning_rate": 1.8920242812669448e-05, "loss": 0.5095, "step": 3806 }, { "epoch": 0.17, "grad_norm": 0.35872356684728096, "learning_rate": 1.891957019306851e-05, "loss": 0.2119, "step": 3807 }, { "epoch": 0.17, "grad_norm": 0.3555964240337658, "learning_rate": 1.891889737599689e-05, "loss": 0.2288, "step": 3808 }, { "epoch": 0.17, "grad_norm": 1.2664678687376982, "learning_rate": 1.8918224361469474e-05, "loss": 0.6336, "step": 3809 }, { "epoch": 0.18, "grad_norm": 0.8025907102716647, "learning_rate": 1.8917551149501165e-05, "loss": 0.5077, "step": 3810 }, { "epoch": 0.18, "grad_norm": 0.3534170430749267, "learning_rate": 1.891687774010687e-05, "loss": 0.2932, "step": 3811 }, { "epoch": 0.18, "grad_norm": 0.522821627782289, "learning_rate": 1.89162041333015e-05, "loss": 0.3986, "step": 3812 }, { "epoch": 0.18, "grad_norm": 0.3694861954705531, "learning_rate": 1.8915530329099958e-05, "loss": 0.1883, "step": 3813 }, { "epoch": 0.18, "grad_norm": 0.4084187952089302, "learning_rate": 1.891485632751717e-05, "loss": 0.2293, "step": 3814 }, { "epoch": 0.18, "grad_norm": 0.5450867963604158, "learning_rate": 1.8914182128568055e-05, "loss": 0.3867, "step": 3815 }, { "epoch": 0.18, "grad_norm": 0.49892401452017954, "learning_rate": 1.891350773226754e-05, "loss": 0.3567, "step": 3816 }, { "epoch": 0.18, "grad_norm": 0.392024968654682, "learning_rate": 1.8912833138630552e-05, "loss": 0.2647, "step": 3817 }, { "epoch": 0.18, "grad_norm": 0.6253472137590154, "learning_rate": 1.8912158347672032e-05, "loss": 0.4868, "step": 3818 }, { "epoch": 0.18, "grad_norm": 0.38146153639857605, "learning_rate": 1.891148335940692e-05, "loss": 0.2716, "step": 3819 }, { "epoch": 0.18, "grad_norm": 0.4345114657610416, "learning_rate": 1.8910808173850145e-05, "loss": 0.3154, "step": 3820 }, { "epoch": 0.18, "grad_norm": 0.34524599137834444, "learning_rate": 1.891013279101667e-05, "loss": 0.1257, "step": 3821 }, { "epoch": 0.18, "grad_norm": 0.7634413534659626, "learning_rate": 1.890945721092144e-05, "loss": 0.5379, "step": 3822 }, { "epoch": 0.18, "grad_norm": 0.3935076634723586, "learning_rate": 1.890878143357942e-05, "loss": 0.2809, "step": 3823 }, { "epoch": 0.18, "grad_norm": 0.36663621107665906, "learning_rate": 1.8908105459005565e-05, "loss": 0.3159, "step": 3824 }, { "epoch": 0.18, "grad_norm": 0.45265457528388015, "learning_rate": 1.890742928721484e-05, "loss": 0.291, "step": 3825 }, { "epoch": 0.18, "grad_norm": 0.2948815824052943, "learning_rate": 1.8906752918222213e-05, "loss": 0.1988, "step": 3826 }, { "epoch": 0.18, "grad_norm": 0.4890414945176667, "learning_rate": 1.890607635204266e-05, "loss": 0.2877, "step": 3827 }, { "epoch": 0.18, "grad_norm": 0.5198007296058611, "learning_rate": 1.8905399588691165e-05, "loss": 0.3936, "step": 3828 }, { "epoch": 0.18, "grad_norm": 0.3617933876138676, "learning_rate": 1.8904722628182702e-05, "loss": 0.3082, "step": 3829 }, { "epoch": 0.18, "grad_norm": 0.9034954700428638, "learning_rate": 1.8904045470532268e-05, "loss": 0.5778, "step": 3830 }, { "epoch": 0.18, "grad_norm": 0.3950364556417189, "learning_rate": 1.8903368115754843e-05, "loss": 0.2959, "step": 3831 }, { "epoch": 0.18, "grad_norm": 0.2993735687870497, "learning_rate": 1.8902690563865432e-05, "loss": 0.2316, "step": 3832 }, { "epoch": 0.18, "grad_norm": 0.5808594972599243, "learning_rate": 1.890201281487903e-05, "loss": 0.2789, "step": 3833 }, { "epoch": 0.18, "grad_norm": 0.8378721245037347, "learning_rate": 1.8901334868810647e-05, "loss": 0.3803, "step": 3834 }, { "epoch": 0.18, "grad_norm": 0.45497158778151686, "learning_rate": 1.8900656725675287e-05, "loss": 0.3097, "step": 3835 }, { "epoch": 0.18, "grad_norm": 0.43442316088443067, "learning_rate": 1.889997838548797e-05, "loss": 0.382, "step": 3836 }, { "epoch": 0.18, "grad_norm": 0.29611133006842777, "learning_rate": 1.8899299848263704e-05, "loss": 0.1558, "step": 3837 }, { "epoch": 0.18, "grad_norm": 0.41694271447006276, "learning_rate": 1.8898621114017522e-05, "loss": 0.3016, "step": 3838 }, { "epoch": 0.18, "grad_norm": 0.3353252128316122, "learning_rate": 1.889794218276444e-05, "loss": 0.2553, "step": 3839 }, { "epoch": 0.18, "grad_norm": 1.1959355485543364, "learning_rate": 1.8897263054519498e-05, "loss": 0.4312, "step": 3840 }, { "epoch": 0.18, "grad_norm": 0.3813352310210866, "learning_rate": 1.8896583729297727e-05, "loss": 0.2738, "step": 3841 }, { "epoch": 0.18, "grad_norm": 1.2637744074166148, "learning_rate": 1.8895904207114163e-05, "loss": 0.8373, "step": 3842 }, { "epoch": 0.18, "grad_norm": 0.4368784760704501, "learning_rate": 1.8895224487983857e-05, "loss": 0.2876, "step": 3843 }, { "epoch": 0.18, "grad_norm": 0.2999088756258195, "learning_rate": 1.8894544571921854e-05, "loss": 0.2319, "step": 3844 }, { "epoch": 0.18, "grad_norm": 0.6898977886656115, "learning_rate": 1.8893864458943207e-05, "loss": 0.3323, "step": 3845 }, { "epoch": 0.18, "grad_norm": 1.2348608321140955, "learning_rate": 1.8893184149062972e-05, "loss": 0.5165, "step": 3846 }, { "epoch": 0.18, "grad_norm": 0.36173545688184566, "learning_rate": 1.8892503642296208e-05, "loss": 0.2414, "step": 3847 }, { "epoch": 0.18, "grad_norm": 0.6278927132092558, "learning_rate": 1.889182293865799e-05, "loss": 0.4224, "step": 3848 }, { "epoch": 0.18, "grad_norm": 1.1807056155577, "learning_rate": 1.889114203816338e-05, "loss": 0.6531, "step": 3849 }, { "epoch": 0.18, "grad_norm": 0.28722350765323346, "learning_rate": 1.8890460940827452e-05, "loss": 0.1689, "step": 3850 }, { "epoch": 0.18, "grad_norm": 0.4039621331491625, "learning_rate": 1.888977964666529e-05, "loss": 0.2926, "step": 3851 }, { "epoch": 0.18, "grad_norm": 1.3788718914310258, "learning_rate": 1.8889098155691977e-05, "loss": 0.5037, "step": 3852 }, { "epoch": 0.18, "grad_norm": 0.42768862789949474, "learning_rate": 1.8888416467922597e-05, "loss": 0.2118, "step": 3853 }, { "epoch": 0.18, "grad_norm": 1.3971066508389074, "learning_rate": 1.888773458337224e-05, "loss": 0.7888, "step": 3854 }, { "epoch": 0.18, "grad_norm": 0.641727292763276, "learning_rate": 1.8887052502056007e-05, "loss": 0.3738, "step": 3855 }, { "epoch": 0.18, "grad_norm": 0.4532146043193909, "learning_rate": 1.8886370223989e-05, "loss": 0.2164, "step": 3856 }, { "epoch": 0.18, "grad_norm": 0.9723851103072669, "learning_rate": 1.888568774918632e-05, "loss": 0.4761, "step": 3857 }, { "epoch": 0.18, "grad_norm": 0.31782148237399105, "learning_rate": 1.8885005077663075e-05, "loss": 0.1688, "step": 3858 }, { "epoch": 0.18, "grad_norm": 0.4753434466667399, "learning_rate": 1.888432220943438e-05, "loss": 0.3018, "step": 3859 }, { "epoch": 0.18, "grad_norm": 0.5392007280491453, "learning_rate": 1.8883639144515354e-05, "loss": 0.3199, "step": 3860 }, { "epoch": 0.18, "grad_norm": 1.6760844446586534, "learning_rate": 1.8882955882921122e-05, "loss": 0.6275, "step": 3861 }, { "epoch": 0.18, "grad_norm": 0.4712316602502112, "learning_rate": 1.888227242466681e-05, "loss": 0.3025, "step": 3862 }, { "epoch": 0.18, "grad_norm": 0.5933829035910467, "learning_rate": 1.8881588769767547e-05, "loss": 0.3335, "step": 3863 }, { "epoch": 0.18, "grad_norm": 0.32632522735749997, "learning_rate": 1.8880904918238465e-05, "loss": 0.1875, "step": 3864 }, { "epoch": 0.18, "grad_norm": 0.5371370840025611, "learning_rate": 1.888022087009471e-05, "loss": 0.2619, "step": 3865 }, { "epoch": 0.18, "grad_norm": 1.6491936302405654, "learning_rate": 1.8879536625351424e-05, "loss": 0.6028, "step": 3866 }, { "epoch": 0.18, "grad_norm": 0.48920174311228604, "learning_rate": 1.8878852184023754e-05, "loss": 0.3611, "step": 3867 }, { "epoch": 0.18, "grad_norm": 0.39620543445130757, "learning_rate": 1.8878167546126856e-05, "loss": 0.2995, "step": 3868 }, { "epoch": 0.18, "grad_norm": 1.1037585380451898, "learning_rate": 1.8877482711675884e-05, "loss": 0.4111, "step": 3869 }, { "epoch": 0.18, "grad_norm": 0.35130993083203155, "learning_rate": 1.8876797680686e-05, "loss": 0.2259, "step": 3870 }, { "epoch": 0.18, "grad_norm": 0.5273308499525118, "learning_rate": 1.8876112453172374e-05, "loss": 0.285, "step": 3871 }, { "epoch": 0.18, "grad_norm": 0.608234119264191, "learning_rate": 1.887542702915017e-05, "loss": 0.3734, "step": 3872 }, { "epoch": 0.18, "grad_norm": 0.7127655731209267, "learning_rate": 1.887474140863457e-05, "loss": 0.4171, "step": 3873 }, { "epoch": 0.18, "grad_norm": 0.5279635456035189, "learning_rate": 1.8874055591640746e-05, "loss": 0.3436, "step": 3874 }, { "epoch": 0.18, "grad_norm": 0.7064370602473943, "learning_rate": 1.8873369578183883e-05, "loss": 0.3274, "step": 3875 }, { "epoch": 0.18, "grad_norm": 0.32552355853185594, "learning_rate": 1.887268336827917e-05, "loss": 0.0776, "step": 3876 }, { "epoch": 0.18, "grad_norm": 0.4353113290739778, "learning_rate": 1.88719969619418e-05, "loss": 0.2982, "step": 3877 }, { "epoch": 0.18, "grad_norm": 0.6465512140580408, "learning_rate": 1.8871310359186968e-05, "loss": 0.4675, "step": 3878 }, { "epoch": 0.18, "grad_norm": 0.5136237005851464, "learning_rate": 1.8870623560029875e-05, "loss": 0.3179, "step": 3879 }, { "epoch": 0.18, "grad_norm": 0.5428223674298324, "learning_rate": 1.8869936564485726e-05, "loss": 0.3759, "step": 3880 }, { "epoch": 0.18, "grad_norm": 0.5489668008826314, "learning_rate": 1.8869249372569732e-05, "loss": 0.38, "step": 3881 }, { "epoch": 0.18, "grad_norm": 0.3717546248072272, "learning_rate": 1.8868561984297104e-05, "loss": 0.1922, "step": 3882 }, { "epoch": 0.18, "grad_norm": 0.5037923779674243, "learning_rate": 1.886787439968306e-05, "loss": 0.2435, "step": 3883 }, { "epoch": 0.18, "grad_norm": 1.4710045643917589, "learning_rate": 1.8867186618742822e-05, "loss": 0.8185, "step": 3884 }, { "epoch": 0.18, "grad_norm": 0.7708456774429172, "learning_rate": 1.8866498641491625e-05, "loss": 0.5095, "step": 3885 }, { "epoch": 0.18, "grad_norm": 0.37758381447136374, "learning_rate": 1.886581046794469e-05, "loss": 0.2429, "step": 3886 }, { "epoch": 0.18, "grad_norm": 0.4885306099521553, "learning_rate": 1.8865122098117254e-05, "loss": 0.3459, "step": 3887 }, { "epoch": 0.18, "grad_norm": 0.5470028462415637, "learning_rate": 1.886443353202456e-05, "loss": 0.3385, "step": 3888 }, { "epoch": 0.18, "grad_norm": 0.40661722093809755, "learning_rate": 1.8863744769681855e-05, "loss": 0.1665, "step": 3889 }, { "epoch": 0.18, "grad_norm": 0.4238715713843672, "learning_rate": 1.886305581110438e-05, "loss": 0.3671, "step": 3890 }, { "epoch": 0.18, "grad_norm": 0.4846235788549622, "learning_rate": 1.8862366656307394e-05, "loss": 0.3636, "step": 3891 }, { "epoch": 0.18, "grad_norm": 0.41183815855461015, "learning_rate": 1.886167730530615e-05, "loss": 0.1101, "step": 3892 }, { "epoch": 0.18, "grad_norm": 0.5399374011104692, "learning_rate": 1.8860987758115913e-05, "loss": 0.3803, "step": 3893 }, { "epoch": 0.18, "grad_norm": 0.380833251564716, "learning_rate": 1.8860298014751947e-05, "loss": 0.2513, "step": 3894 }, { "epoch": 0.18, "grad_norm": 0.4729171085864037, "learning_rate": 1.885960807522952e-05, "loss": 0.2761, "step": 3895 }, { "epoch": 0.18, "grad_norm": 0.4299675158107653, "learning_rate": 1.8858917939563914e-05, "loss": 0.306, "step": 3896 }, { "epoch": 0.18, "grad_norm": 0.7221040664145163, "learning_rate": 1.8858227607770398e-05, "loss": 0.5218, "step": 3897 }, { "epoch": 0.18, "grad_norm": 0.39728819416102146, "learning_rate": 1.8857537079864265e-05, "loss": 0.1902, "step": 3898 }, { "epoch": 0.18, "grad_norm": 0.3994820490714649, "learning_rate": 1.8856846355860796e-05, "loss": 0.2599, "step": 3899 }, { "epoch": 0.18, "grad_norm": 1.1052495011921455, "learning_rate": 1.8856155435775284e-05, "loss": 0.6198, "step": 3900 }, { "epoch": 0.18, "grad_norm": 0.6922221992822799, "learning_rate": 1.8855464319623028e-05, "loss": 0.435, "step": 3901 }, { "epoch": 0.18, "grad_norm": 0.4321182006922516, "learning_rate": 1.8854773007419328e-05, "loss": 0.2927, "step": 3902 }, { "epoch": 0.18, "grad_norm": 0.4233142991833385, "learning_rate": 1.8854081499179485e-05, "loss": 0.367, "step": 3903 }, { "epoch": 0.18, "grad_norm": 0.29217460573373005, "learning_rate": 1.8853389794918816e-05, "loss": 0.191, "step": 3904 }, { "epoch": 0.18, "grad_norm": 0.5217776676310971, "learning_rate": 1.8852697894652623e-05, "loss": 0.2958, "step": 3905 }, { "epoch": 0.18, "grad_norm": 0.42087506173347405, "learning_rate": 1.8852005798396236e-05, "loss": 0.3757, "step": 3906 }, { "epoch": 0.18, "grad_norm": 0.8375875796325938, "learning_rate": 1.885131350616497e-05, "loss": 0.4458, "step": 3907 }, { "epoch": 0.18, "grad_norm": 0.38043122864022877, "learning_rate": 1.8850621017974157e-05, "loss": 0.319, "step": 3908 }, { "epoch": 0.18, "grad_norm": 0.3482098235881958, "learning_rate": 1.8849928333839124e-05, "loss": 0.2111, "step": 3909 }, { "epoch": 0.18, "grad_norm": 0.5169427562811084, "learning_rate": 1.8849235453775202e-05, "loss": 0.3115, "step": 3910 }, { "epoch": 0.18, "grad_norm": 0.41956398094053515, "learning_rate": 1.884854237779774e-05, "loss": 0.3244, "step": 3911 }, { "epoch": 0.18, "grad_norm": 0.7759240460824689, "learning_rate": 1.884784910592208e-05, "loss": 0.394, "step": 3912 }, { "epoch": 0.18, "grad_norm": 0.5961789640505579, "learning_rate": 1.884715563816357e-05, "loss": 0.4292, "step": 3913 }, { "epoch": 0.18, "grad_norm": 0.4079529022437299, "learning_rate": 1.8846461974537555e-05, "loss": 0.3269, "step": 3914 }, { "epoch": 0.18, "grad_norm": 0.45105514497906485, "learning_rate": 1.88457681150594e-05, "loss": 0.2733, "step": 3915 }, { "epoch": 0.18, "grad_norm": 0.33294928967557225, "learning_rate": 1.884507405974447e-05, "loss": 0.1638, "step": 3916 }, { "epoch": 0.18, "grad_norm": 0.4607174219785187, "learning_rate": 1.884437980860812e-05, "loss": 0.3391, "step": 3917 }, { "epoch": 0.18, "grad_norm": 0.5605722332414717, "learning_rate": 1.8843685361665724e-05, "loss": 0.2816, "step": 3918 }, { "epoch": 0.18, "grad_norm": 0.4209384522413471, "learning_rate": 1.884299071893266e-05, "loss": 0.3579, "step": 3919 }, { "epoch": 0.18, "grad_norm": 0.43460967310403115, "learning_rate": 1.8842295880424305e-05, "loss": 0.345, "step": 3920 }, { "epoch": 0.18, "grad_norm": 1.0217785382138418, "learning_rate": 1.884160084615604e-05, "loss": 0.6139, "step": 3921 }, { "epoch": 0.18, "grad_norm": 0.26862609546890037, "learning_rate": 1.884090561614326e-05, "loss": 0.2095, "step": 3922 }, { "epoch": 0.18, "grad_norm": 0.3684892915603788, "learning_rate": 1.884021019040134e-05, "loss": 0.239, "step": 3923 }, { "epoch": 0.18, "grad_norm": 0.8032374201687222, "learning_rate": 1.8839514568945695e-05, "loss": 0.5168, "step": 3924 }, { "epoch": 0.18, "grad_norm": 0.6035613389473641, "learning_rate": 1.8838818751791715e-05, "loss": 0.3413, "step": 3925 }, { "epoch": 0.18, "grad_norm": 0.3967095742381883, "learning_rate": 1.8838122738954808e-05, "loss": 0.3016, "step": 3926 }, { "epoch": 0.18, "grad_norm": 0.4420037435702996, "learning_rate": 1.883742653045038e-05, "loss": 0.3321, "step": 3927 }, { "epoch": 0.18, "grad_norm": 0.2352552598300315, "learning_rate": 1.8836730126293847e-05, "loss": 0.1002, "step": 3928 }, { "epoch": 0.18, "grad_norm": 0.39331906113518034, "learning_rate": 1.8836033526500624e-05, "loss": 0.2964, "step": 3929 }, { "epoch": 0.18, "grad_norm": 0.5226894532481836, "learning_rate": 1.883533673108614e-05, "loss": 0.366, "step": 3930 }, { "epoch": 0.18, "grad_norm": 0.9833547568042474, "learning_rate": 1.883463974006581e-05, "loss": 0.3414, "step": 3931 }, { "epoch": 0.18, "grad_norm": 0.39721134128298635, "learning_rate": 1.8833942553455073e-05, "loss": 0.3309, "step": 3932 }, { "epoch": 0.18, "grad_norm": 1.439852403871312, "learning_rate": 1.883324517126936e-05, "loss": 0.7941, "step": 3933 }, { "epoch": 0.18, "grad_norm": 0.3435326846372854, "learning_rate": 1.8832547593524116e-05, "loss": 0.2686, "step": 3934 }, { "epoch": 0.18, "grad_norm": 0.28307856292848027, "learning_rate": 1.8831849820234782e-05, "loss": 0.1787, "step": 3935 }, { "epoch": 0.18, "grad_norm": 0.853265183821286, "learning_rate": 1.8831151851416803e-05, "loss": 0.5033, "step": 3936 }, { "epoch": 0.18, "grad_norm": 0.7803789230857403, "learning_rate": 1.8830453687085636e-05, "loss": 0.4685, "step": 3937 }, { "epoch": 0.18, "grad_norm": 0.42602051284773895, "learning_rate": 1.8829755327256736e-05, "loss": 0.235, "step": 3938 }, { "epoch": 0.18, "grad_norm": 0.4847530285218601, "learning_rate": 1.882905677194556e-05, "loss": 0.3517, "step": 3939 }, { "epoch": 0.18, "grad_norm": 0.2970371314718206, "learning_rate": 1.8828358021167576e-05, "loss": 0.1724, "step": 3940 }, { "epoch": 0.18, "grad_norm": 0.39085924878888245, "learning_rate": 1.8827659074938256e-05, "loss": 0.2169, "step": 3941 }, { "epoch": 0.18, "grad_norm": 0.5718488465816467, "learning_rate": 1.8826959933273074e-05, "loss": 0.3857, "step": 3942 }, { "epoch": 0.18, "grad_norm": 1.1427216333066101, "learning_rate": 1.8826260596187505e-05, "loss": 0.5974, "step": 3943 }, { "epoch": 0.18, "grad_norm": 0.3818328714214559, "learning_rate": 1.882556106369703e-05, "loss": 0.2215, "step": 3944 }, { "epoch": 0.18, "grad_norm": 1.1751020398434424, "learning_rate": 1.8824861335817145e-05, "loss": 0.6769, "step": 3945 }, { "epoch": 0.18, "grad_norm": 0.4465930326278031, "learning_rate": 1.8824161412563333e-05, "loss": 0.3592, "step": 3946 }, { "epoch": 0.18, "grad_norm": 0.3206620712832924, "learning_rate": 1.8823461293951092e-05, "loss": 0.2402, "step": 3947 }, { "epoch": 0.18, "grad_norm": 0.709438796656978, "learning_rate": 1.8822760979995923e-05, "loss": 0.3202, "step": 3948 }, { "epoch": 0.18, "grad_norm": 1.583815003891385, "learning_rate": 1.8822060470713327e-05, "loss": 0.8699, "step": 3949 }, { "epoch": 0.18, "grad_norm": 0.3904867367574193, "learning_rate": 1.8821359766118817e-05, "loss": 0.3093, "step": 3950 }, { "epoch": 0.18, "grad_norm": 0.5167241733809467, "learning_rate": 1.8820658866227902e-05, "loss": 0.3045, "step": 3951 }, { "epoch": 0.18, "grad_norm": 0.7646413270054491, "learning_rate": 1.8819957771056106e-05, "loss": 0.535, "step": 3952 }, { "epoch": 0.18, "grad_norm": 0.3940579288068486, "learning_rate": 1.881925648061894e-05, "loss": 0.2932, "step": 3953 }, { "epoch": 0.18, "grad_norm": 0.48095373274264436, "learning_rate": 1.8818554994931938e-05, "loss": 0.3203, "step": 3954 }, { "epoch": 0.18, "grad_norm": 0.369192953693975, "learning_rate": 1.881785331401063e-05, "loss": 0.2235, "step": 3955 }, { "epoch": 0.18, "grad_norm": 0.43588704969082, "learning_rate": 1.8817151437870544e-05, "loss": 0.2705, "step": 3956 }, { "epoch": 0.18, "grad_norm": 0.9923244449979725, "learning_rate": 1.881644936652723e-05, "loss": 0.4432, "step": 3957 }, { "epoch": 0.18, "grad_norm": 0.3960451843575307, "learning_rate": 1.8815747099996216e-05, "loss": 0.3275, "step": 3958 }, { "epoch": 0.18, "grad_norm": 0.43814326696069184, "learning_rate": 1.8815044638293064e-05, "loss": 0.282, "step": 3959 }, { "epoch": 0.18, "grad_norm": 0.48216360379279827, "learning_rate": 1.8814341981433318e-05, "loss": 0.3669, "step": 3960 }, { "epoch": 0.18, "grad_norm": 0.3066220899732937, "learning_rate": 1.8813639129432532e-05, "loss": 0.2072, "step": 3961 }, { "epoch": 0.18, "grad_norm": 0.4020740979705203, "learning_rate": 1.8812936082306275e-05, "loss": 0.2816, "step": 3962 }, { "epoch": 0.18, "grad_norm": 0.5933214722023372, "learning_rate": 1.8812232840070106e-05, "loss": 0.406, "step": 3963 }, { "epoch": 0.18, "grad_norm": 0.7013550338251123, "learning_rate": 1.8811529402739596e-05, "loss": 0.4013, "step": 3964 }, { "epoch": 0.18, "grad_norm": 0.3862024184424891, "learning_rate": 1.8810825770330317e-05, "loss": 0.3058, "step": 3965 }, { "epoch": 0.18, "grad_norm": 0.4144961077843554, "learning_rate": 1.8810121942857848e-05, "loss": 0.3245, "step": 3966 }, { "epoch": 0.18, "grad_norm": 0.24528902713484785, "learning_rate": 1.880941792033777e-05, "loss": 0.1027, "step": 3967 }, { "epoch": 0.18, "grad_norm": 0.393894016662953, "learning_rate": 1.880871370278567e-05, "loss": 0.2989, "step": 3968 }, { "epoch": 0.18, "grad_norm": 1.3351816873229205, "learning_rate": 1.880800929021714e-05, "loss": 0.8207, "step": 3969 }, { "epoch": 0.18, "grad_norm": 0.4223318351673896, "learning_rate": 1.880730468264777e-05, "loss": 0.3268, "step": 3970 }, { "epoch": 0.18, "grad_norm": 0.42740360955404016, "learning_rate": 1.8806599880093163e-05, "loss": 0.3379, "step": 3971 }, { "epoch": 0.18, "grad_norm": 1.121006182489048, "learning_rate": 1.8805894882568926e-05, "loss": 0.6971, "step": 3972 }, { "epoch": 0.18, "grad_norm": 0.3441684976160639, "learning_rate": 1.880518969009066e-05, "loss": 0.2092, "step": 3973 }, { "epoch": 0.18, "grad_norm": 0.3909019338070255, "learning_rate": 1.8804484302673982e-05, "loss": 0.2264, "step": 3974 }, { "epoch": 0.18, "grad_norm": 0.63977440618754, "learning_rate": 1.8803778720334512e-05, "loss": 0.3957, "step": 3975 }, { "epoch": 0.18, "grad_norm": 0.6828658953875684, "learning_rate": 1.8803072943087862e-05, "loss": 0.4941, "step": 3976 }, { "epoch": 0.18, "grad_norm": 0.3809574398008861, "learning_rate": 1.8802366970949658e-05, "loss": 0.2071, "step": 3977 }, { "epoch": 0.18, "grad_norm": 0.342393643855471, "learning_rate": 1.880166080393554e-05, "loss": 0.3061, "step": 3978 }, { "epoch": 0.18, "grad_norm": 0.5528930196264737, "learning_rate": 1.880095444206113e-05, "loss": 0.3781, "step": 3979 }, { "epoch": 0.18, "grad_norm": 0.38230488258442985, "learning_rate": 1.8800247885342074e-05, "loss": 0.2143, "step": 3980 }, { "epoch": 0.18, "grad_norm": 0.47129070528425276, "learning_rate": 1.879954113379401e-05, "loss": 0.3592, "step": 3981 }, { "epoch": 0.18, "grad_norm": 0.43917426646226443, "learning_rate": 1.879883418743259e-05, "loss": 0.3466, "step": 3982 }, { "epoch": 0.18, "grad_norm": 0.37607739445703614, "learning_rate": 1.8798127046273457e-05, "loss": 0.2165, "step": 3983 }, { "epoch": 0.18, "grad_norm": 0.9513792039567668, "learning_rate": 1.8797419710332274e-05, "loss": 0.5904, "step": 3984 }, { "epoch": 0.18, "grad_norm": 1.583406491013997, "learning_rate": 1.8796712179624695e-05, "loss": 0.8648, "step": 3985 }, { "epoch": 0.18, "grad_norm": 0.31664098503101346, "learning_rate": 1.8796004454166386e-05, "loss": 0.2788, "step": 3986 }, { "epoch": 0.18, "grad_norm": 0.3429370441062218, "learning_rate": 1.879529653397302e-05, "loss": 0.2118, "step": 3987 }, { "epoch": 0.18, "grad_norm": 0.44357907170776373, "learning_rate": 1.8794588419060266e-05, "loss": 0.3543, "step": 3988 }, { "epoch": 0.18, "grad_norm": 0.4359297537588003, "learning_rate": 1.8793880109443797e-05, "loss": 0.3024, "step": 3989 }, { "epoch": 0.18, "grad_norm": 0.459443742994942, "learning_rate": 1.87931716051393e-05, "loss": 0.2728, "step": 3990 }, { "epoch": 0.18, "grad_norm": 0.9940553723656974, "learning_rate": 1.879246290616246e-05, "loss": 0.6284, "step": 3991 }, { "epoch": 0.18, "grad_norm": 0.47324194536475916, "learning_rate": 1.8791754012528962e-05, "loss": 0.2911, "step": 3992 }, { "epoch": 0.18, "grad_norm": 0.48096273663155553, "learning_rate": 1.8791044924254506e-05, "loss": 0.3563, "step": 3993 }, { "epoch": 0.18, "grad_norm": 0.2844311415627377, "learning_rate": 1.8790335641354785e-05, "loss": 0.2265, "step": 3994 }, { "epoch": 0.18, "grad_norm": 0.772733129284855, "learning_rate": 1.878962616384551e-05, "loss": 0.4001, "step": 3995 }, { "epoch": 0.18, "grad_norm": 0.46483450329263487, "learning_rate": 1.878891649174238e-05, "loss": 0.3068, "step": 3996 }, { "epoch": 0.18, "grad_norm": 0.42198199768045014, "learning_rate": 1.8788206625061113e-05, "loss": 0.313, "step": 3997 }, { "epoch": 0.18, "grad_norm": 0.49150202576998525, "learning_rate": 1.878749656381742e-05, "loss": 0.3141, "step": 3998 }, { "epoch": 0.18, "grad_norm": 0.6198365518355851, "learning_rate": 1.878678630802702e-05, "loss": 0.4014, "step": 3999 }, { "epoch": 0.18, "grad_norm": 0.2507835431138398, "learning_rate": 1.8786075857705645e-05, "loss": 0.1162, "step": 4000 }, { "epoch": 0.18, "grad_norm": 0.43906986133311776, "learning_rate": 1.8785365212869014e-05, "loss": 0.3368, "step": 4001 }, { "epoch": 0.18, "grad_norm": 0.3619943054589335, "learning_rate": 1.8784654373532867e-05, "loss": 0.3196, "step": 4002 }, { "epoch": 0.18, "grad_norm": 0.8933023955303412, "learning_rate": 1.8783943339712937e-05, "loss": 0.5193, "step": 4003 }, { "epoch": 0.18, "grad_norm": 0.49435648601329596, "learning_rate": 1.878323211142497e-05, "loss": 0.3666, "step": 4004 }, { "epoch": 0.18, "grad_norm": 0.6520068088457173, "learning_rate": 1.8782520688684708e-05, "loss": 0.4388, "step": 4005 }, { "epoch": 0.18, "grad_norm": 0.2632204696032936, "learning_rate": 1.8781809071507905e-05, "loss": 0.1954, "step": 4006 }, { "epoch": 0.18, "grad_norm": 0.4209301099390935, "learning_rate": 1.878109725991031e-05, "loss": 0.2624, "step": 4007 }, { "epoch": 0.18, "grad_norm": 0.6354671119006841, "learning_rate": 1.8780385253907683e-05, "loss": 0.4402, "step": 4008 }, { "epoch": 0.18, "grad_norm": 0.5536095833548786, "learning_rate": 1.8779673053515794e-05, "loss": 0.4206, "step": 4009 }, { "epoch": 0.18, "grad_norm": 0.3450921443156242, "learning_rate": 1.8778960658750406e-05, "loss": 0.2437, "step": 4010 }, { "epoch": 0.18, "grad_norm": 0.5914851412022977, "learning_rate": 1.8778248069627288e-05, "loss": 0.4123, "step": 4011 }, { "epoch": 0.18, "grad_norm": 0.34498119809539757, "learning_rate": 1.8777535286162217e-05, "loss": 0.2332, "step": 4012 }, { "epoch": 0.18, "grad_norm": 0.297294478664778, "learning_rate": 1.8776822308370977e-05, "loss": 0.0684, "step": 4013 }, { "epoch": 0.18, "grad_norm": 0.4616099498090414, "learning_rate": 1.877610913626935e-05, "loss": 0.2927, "step": 4014 }, { "epoch": 0.18, "grad_norm": 1.1792369777909457, "learning_rate": 1.8775395769873124e-05, "loss": 0.4645, "step": 4015 }, { "epoch": 0.18, "grad_norm": 0.4927609070659288, "learning_rate": 1.8774682209198092e-05, "loss": 0.249, "step": 4016 }, { "epoch": 0.18, "grad_norm": 0.4674389794346865, "learning_rate": 1.8773968454260055e-05, "loss": 0.3306, "step": 4017 }, { "epoch": 0.18, "grad_norm": 0.6153935103914157, "learning_rate": 1.8773254505074812e-05, "loss": 0.3685, "step": 4018 }, { "epoch": 0.18, "grad_norm": 0.26515885277279033, "learning_rate": 1.8772540361658172e-05, "loss": 0.1478, "step": 4019 }, { "epoch": 0.18, "grad_norm": 0.6890604781427501, "learning_rate": 1.8771826024025944e-05, "loss": 0.3908, "step": 4020 }, { "epoch": 0.18, "grad_norm": 0.9466866723666275, "learning_rate": 1.877111149219394e-05, "loss": 0.4097, "step": 4021 }, { "epoch": 0.18, "grad_norm": 0.6618791638080433, "learning_rate": 1.8770396766177982e-05, "loss": 0.3621, "step": 4022 }, { "epoch": 0.18, "grad_norm": 0.4216990855645584, "learning_rate": 1.8769681845993894e-05, "loss": 0.2901, "step": 4023 }, { "epoch": 0.18, "grad_norm": 0.49743356128522165, "learning_rate": 1.8768966731657498e-05, "loss": 0.3494, "step": 4024 }, { "epoch": 0.18, "grad_norm": 0.3537594222234786, "learning_rate": 1.8768251423184637e-05, "loss": 0.2497, "step": 4025 }, { "epoch": 0.18, "grad_norm": 0.45936464636574476, "learning_rate": 1.8767535920591133e-05, "loss": 0.2645, "step": 4026 }, { "epoch": 0.18, "grad_norm": 0.9689641996080465, "learning_rate": 1.876682022389284e-05, "loss": 0.4773, "step": 4027 }, { "epoch": 0.19, "grad_norm": 0.7421056731019856, "learning_rate": 1.8766104333105594e-05, "loss": 0.4472, "step": 4028 }, { "epoch": 0.19, "grad_norm": 0.48311824901737943, "learning_rate": 1.8765388248245247e-05, "loss": 0.2723, "step": 4029 }, { "epoch": 0.19, "grad_norm": 0.3871914291360428, "learning_rate": 1.8764671969327653e-05, "loss": 0.3277, "step": 4030 }, { "epoch": 0.19, "grad_norm": 0.30945069964211885, "learning_rate": 1.8763955496368668e-05, "loss": 0.1455, "step": 4031 }, { "epoch": 0.19, "grad_norm": 0.4520331679045915, "learning_rate": 1.8763238829384156e-05, "loss": 0.2891, "step": 4032 }, { "epoch": 0.19, "grad_norm": 0.5625587918102873, "learning_rate": 1.8762521968389983e-05, "loss": 0.3667, "step": 4033 }, { "epoch": 0.19, "grad_norm": 0.8315770904840278, "learning_rate": 1.876180491340202e-05, "loss": 0.4981, "step": 4034 }, { "epoch": 0.19, "grad_norm": 0.4537648300491066, "learning_rate": 1.8761087664436137e-05, "loss": 0.3291, "step": 4035 }, { "epoch": 0.19, "grad_norm": 1.0620814465976745, "learning_rate": 1.876037022150822e-05, "loss": 0.2689, "step": 4036 }, { "epoch": 0.19, "grad_norm": 0.27215117133615047, "learning_rate": 1.875965258463415e-05, "loss": 0.226, "step": 4037 }, { "epoch": 0.19, "grad_norm": 0.3821132470527215, "learning_rate": 1.8758934753829813e-05, "loss": 0.3199, "step": 4038 }, { "epoch": 0.19, "grad_norm": 0.7466891878586411, "learning_rate": 1.8758216729111104e-05, "loss": 0.3953, "step": 4039 }, { "epoch": 0.19, "grad_norm": 0.6796077368785677, "learning_rate": 1.8757498510493915e-05, "loss": 0.3621, "step": 4040 }, { "epoch": 0.19, "grad_norm": 0.3951030452651413, "learning_rate": 1.875678009799415e-05, "loss": 0.3169, "step": 4041 }, { "epoch": 0.19, "grad_norm": 0.4989226873074533, "learning_rate": 1.8756061491627716e-05, "loss": 0.3128, "step": 4042 }, { "epoch": 0.19, "grad_norm": 0.3658374309040895, "learning_rate": 1.875534269141052e-05, "loss": 0.2183, "step": 4043 }, { "epoch": 0.19, "grad_norm": 0.4424638152107735, "learning_rate": 1.8754623697358473e-05, "loss": 0.3673, "step": 4044 }, { "epoch": 0.19, "grad_norm": 0.3017595486460874, "learning_rate": 1.8753904509487497e-05, "loss": 0.2676, "step": 4045 }, { "epoch": 0.19, "grad_norm": 0.6307815847309481, "learning_rate": 1.8753185127813512e-05, "loss": 0.4126, "step": 4046 }, { "epoch": 0.19, "grad_norm": 0.5025268019894982, "learning_rate": 1.8752465552352443e-05, "loss": 0.3221, "step": 4047 }, { "epoch": 0.19, "grad_norm": 1.1178648532467765, "learning_rate": 1.8751745783120224e-05, "loss": 0.663, "step": 4048 }, { "epoch": 0.19, "grad_norm": 0.40062289889231617, "learning_rate": 1.875102582013279e-05, "loss": 0.3093, "step": 4049 }, { "epoch": 0.19, "grad_norm": 0.38130032841038297, "learning_rate": 1.8750305663406077e-05, "loss": 0.3046, "step": 4050 }, { "epoch": 0.19, "grad_norm": 0.36260549148975374, "learning_rate": 1.8749585312956028e-05, "loss": 0.2348, "step": 4051 }, { "epoch": 0.19, "grad_norm": 0.6269668264209588, "learning_rate": 1.8748864768798596e-05, "loss": 0.2773, "step": 4052 }, { "epoch": 0.19, "grad_norm": 0.4098295625980208, "learning_rate": 1.8748144030949728e-05, "loss": 0.2931, "step": 4053 }, { "epoch": 0.19, "grad_norm": 0.6008618136743048, "learning_rate": 1.8747423099425387e-05, "loss": 0.3829, "step": 4054 }, { "epoch": 0.19, "grad_norm": 0.5761730553079765, "learning_rate": 1.8746701974241525e-05, "loss": 0.3717, "step": 4055 }, { "epoch": 0.19, "grad_norm": 0.40136981386742165, "learning_rate": 1.8745980655414113e-05, "loss": 0.3303, "step": 4056 }, { "epoch": 0.19, "grad_norm": 0.3515300111033161, "learning_rate": 1.8745259142959117e-05, "loss": 0.2043, "step": 4057 }, { "epoch": 0.19, "grad_norm": 0.4967389945058046, "learning_rate": 1.8744537436892517e-05, "loss": 0.2918, "step": 4058 }, { "epoch": 0.19, "grad_norm": 0.45419702859968686, "learning_rate": 1.8743815537230284e-05, "loss": 0.3364, "step": 4059 }, { "epoch": 0.19, "grad_norm": 0.8970131137846125, "learning_rate": 1.87430934439884e-05, "loss": 0.5287, "step": 4060 }, { "epoch": 0.19, "grad_norm": 0.3993861416302628, "learning_rate": 1.8742371157182856e-05, "loss": 0.3451, "step": 4061 }, { "epoch": 0.19, "grad_norm": 0.4173887333637627, "learning_rate": 1.874164867682964e-05, "loss": 0.2502, "step": 4062 }, { "epoch": 0.19, "grad_norm": 0.3176562227560707, "learning_rate": 1.8740926002944747e-05, "loss": 0.1888, "step": 4063 }, { "epoch": 0.19, "grad_norm": 0.8815948896931961, "learning_rate": 1.874020313554418e-05, "loss": 0.5814, "step": 4064 }, { "epoch": 0.19, "grad_norm": 0.34095189836042045, "learning_rate": 1.8739480074643936e-05, "loss": 0.2387, "step": 4065 }, { "epoch": 0.19, "grad_norm": 0.5847801595052117, "learning_rate": 1.873875682026003e-05, "loss": 0.4254, "step": 4066 }, { "epoch": 0.19, "grad_norm": 0.8731200352332138, "learning_rate": 1.8738033372408467e-05, "loss": 0.507, "step": 4067 }, { "epoch": 0.19, "grad_norm": 0.3858910815996692, "learning_rate": 1.8737309731105266e-05, "loss": 0.205, "step": 4068 }, { "epoch": 0.19, "grad_norm": 0.3283125645812928, "learning_rate": 1.8736585896366452e-05, "loss": 0.2331, "step": 4069 }, { "epoch": 0.19, "grad_norm": 1.5158346111340124, "learning_rate": 1.8735861868208047e-05, "loss": 0.8187, "step": 4070 }, { "epoch": 0.19, "grad_norm": 0.39669100089283027, "learning_rate": 1.8735137646646078e-05, "loss": 0.2271, "step": 4071 }, { "epoch": 0.19, "grad_norm": 1.0898667717787507, "learning_rate": 1.873441323169658e-05, "loss": 0.5402, "step": 4072 }, { "epoch": 0.19, "grad_norm": 0.4406687861116597, "learning_rate": 1.8733688623375595e-05, "loss": 0.3533, "step": 4073 }, { "epoch": 0.19, "grad_norm": 0.39628465706299576, "learning_rate": 1.8732963821699158e-05, "loss": 0.291, "step": 4074 }, { "epoch": 0.19, "grad_norm": 0.36005318460321234, "learning_rate": 1.873223882668332e-05, "loss": 0.1215, "step": 4075 }, { "epoch": 0.19, "grad_norm": 0.6954297748866335, "learning_rate": 1.8731513638344128e-05, "loss": 0.4379, "step": 4076 }, { "epoch": 0.19, "grad_norm": 0.3925983123524538, "learning_rate": 1.8730788256697642e-05, "loss": 0.3001, "step": 4077 }, { "epoch": 0.19, "grad_norm": 0.7017349425695114, "learning_rate": 1.873006268175992e-05, "loss": 0.2901, "step": 4078 }, { "epoch": 0.19, "grad_norm": 0.7681959949278588, "learning_rate": 1.872933691354702e-05, "loss": 0.5355, "step": 4079 }, { "epoch": 0.19, "grad_norm": 0.45255785397919956, "learning_rate": 1.872861095207502e-05, "loss": 0.2759, "step": 4080 }, { "epoch": 0.19, "grad_norm": 0.37721507703032003, "learning_rate": 1.8727884797359984e-05, "loss": 0.2624, "step": 4081 }, { "epoch": 0.19, "grad_norm": 0.9145538686303181, "learning_rate": 1.8727158449417993e-05, "loss": 0.5851, "step": 4082 }, { "epoch": 0.19, "grad_norm": 0.4739293217815633, "learning_rate": 1.872643190826512e-05, "loss": 0.3127, "step": 4083 }, { "epoch": 0.19, "grad_norm": 0.5254250299584181, "learning_rate": 1.872570517391746e-05, "loss": 0.2548, "step": 4084 }, { "epoch": 0.19, "grad_norm": 0.4839695123647533, "learning_rate": 1.8724978246391094e-05, "loss": 0.3261, "step": 4085 }, { "epoch": 0.19, "grad_norm": 0.38929072820636357, "learning_rate": 1.872425112570212e-05, "loss": 0.2709, "step": 4086 }, { "epoch": 0.19, "grad_norm": 1.173006971425519, "learning_rate": 1.8723523811866634e-05, "loss": 0.5855, "step": 4087 }, { "epoch": 0.19, "grad_norm": 0.5049895709995518, "learning_rate": 1.872279630490074e-05, "loss": 0.323, "step": 4088 }, { "epoch": 0.19, "grad_norm": 0.31590522510821323, "learning_rate": 1.8722068604820546e-05, "loss": 0.2689, "step": 4089 }, { "epoch": 0.19, "grad_norm": 0.4719365460858741, "learning_rate": 1.8721340711642153e-05, "loss": 0.3619, "step": 4090 }, { "epoch": 0.19, "grad_norm": 0.29880143211145316, "learning_rate": 1.8720612625381685e-05, "loss": 0.1143, "step": 4091 }, { "epoch": 0.19, "grad_norm": 0.45132429195794876, "learning_rate": 1.871988434605526e-05, "loss": 0.2855, "step": 4092 }, { "epoch": 0.19, "grad_norm": 0.4825642241460887, "learning_rate": 1.8719155873678997e-05, "loss": 0.3348, "step": 4093 }, { "epoch": 0.19, "grad_norm": 0.7928324056232585, "learning_rate": 1.8718427208269028e-05, "loss": 0.4324, "step": 4094 }, { "epoch": 0.19, "grad_norm": 0.4017577310825777, "learning_rate": 1.8717698349841487e-05, "loss": 0.3051, "step": 4095 }, { "epoch": 0.19, "grad_norm": 0.9593971553006387, "learning_rate": 1.87169692984125e-05, "loss": 0.6747, "step": 4096 }, { "epoch": 0.19, "grad_norm": 0.2939174551982533, "learning_rate": 1.8716240053998216e-05, "loss": 0.1987, "step": 4097 }, { "epoch": 0.19, "grad_norm": 0.4579487831070095, "learning_rate": 1.8715510616614783e-05, "loss": 0.3136, "step": 4098 }, { "epoch": 0.19, "grad_norm": 0.570487794214327, "learning_rate": 1.871478098627834e-05, "loss": 0.3833, "step": 4099 }, { "epoch": 0.19, "grad_norm": 0.4427923904129169, "learning_rate": 1.871405116300505e-05, "loss": 0.3745, "step": 4100 }, { "epoch": 0.19, "grad_norm": 0.43200144056182965, "learning_rate": 1.8713321146811064e-05, "loss": 0.2459, "step": 4101 }, { "epoch": 0.19, "grad_norm": 0.600004724323595, "learning_rate": 1.871259093771254e-05, "loss": 0.4287, "step": 4102 }, { "epoch": 0.19, "grad_norm": 0.38035863225806904, "learning_rate": 1.871186053572566e-05, "loss": 0.2405, "step": 4103 }, { "epoch": 0.19, "grad_norm": 0.45845124013764627, "learning_rate": 1.8711129940866577e-05, "loss": 0.2121, "step": 4104 }, { "epoch": 0.19, "grad_norm": 0.4329663941746019, "learning_rate": 1.8710399153151475e-05, "loss": 0.3076, "step": 4105 }, { "epoch": 0.19, "grad_norm": 0.6992489900673649, "learning_rate": 1.870966817259653e-05, "loss": 0.5098, "step": 4106 }, { "epoch": 0.19, "grad_norm": 0.34395180714971296, "learning_rate": 1.8708936999217932e-05, "loss": 0.268, "step": 4107 }, { "epoch": 0.19, "grad_norm": 0.6000487984948208, "learning_rate": 1.8708205633031857e-05, "loss": 0.3971, "step": 4108 }, { "epoch": 0.19, "grad_norm": 0.3487440062477048, "learning_rate": 1.87074740740545e-05, "loss": 0.2301, "step": 4109 }, { "epoch": 0.19, "grad_norm": 0.3970407382973214, "learning_rate": 1.8706742322302064e-05, "loss": 0.2353, "step": 4110 }, { "epoch": 0.19, "grad_norm": 1.0152483241237586, "learning_rate": 1.8706010377790746e-05, "loss": 0.6636, "step": 4111 }, { "epoch": 0.19, "grad_norm": 0.5107988446633184, "learning_rate": 1.8705278240536745e-05, "loss": 0.4164, "step": 4112 }, { "epoch": 0.19, "grad_norm": 0.3555898251270073, "learning_rate": 1.8704545910556278e-05, "loss": 0.2935, "step": 4113 }, { "epoch": 0.19, "grad_norm": 0.47716167363553574, "learning_rate": 1.870381338786555e-05, "loss": 0.2921, "step": 4114 }, { "epoch": 0.19, "grad_norm": 0.35111418159760394, "learning_rate": 1.8703080672480784e-05, "loss": 0.1804, "step": 4115 }, { "epoch": 0.19, "grad_norm": 0.4738758554667891, "learning_rate": 1.87023477644182e-05, "loss": 0.2972, "step": 4116 }, { "epoch": 0.19, "grad_norm": 0.3603507317470979, "learning_rate": 1.8701614663694023e-05, "loss": 0.2855, "step": 4117 }, { "epoch": 0.19, "grad_norm": 0.6554457055049226, "learning_rate": 1.8700881370324486e-05, "loss": 0.5147, "step": 4118 }, { "epoch": 0.19, "grad_norm": 0.7088819981565134, "learning_rate": 1.8700147884325822e-05, "loss": 0.4062, "step": 4119 }, { "epoch": 0.19, "grad_norm": 0.42379127833525326, "learning_rate": 1.8699414205714265e-05, "loss": 0.2808, "step": 4120 }, { "epoch": 0.19, "grad_norm": 0.3074587918302642, "learning_rate": 1.8698680334506067e-05, "loss": 0.2379, "step": 4121 }, { "epoch": 0.19, "grad_norm": 0.6688850761808294, "learning_rate": 1.8697946270717468e-05, "loss": 0.3302, "step": 4122 }, { "epoch": 0.19, "grad_norm": 0.44008791084786675, "learning_rate": 1.8697212014364724e-05, "loss": 0.3611, "step": 4123 }, { "epoch": 0.19, "grad_norm": 0.48351556889028213, "learning_rate": 1.8696477565464085e-05, "loss": 0.3271, "step": 4124 }, { "epoch": 0.19, "grad_norm": 0.4091845901403694, "learning_rate": 1.8695742924031816e-05, "loss": 0.3032, "step": 4125 }, { "epoch": 0.19, "grad_norm": 0.535129187692363, "learning_rate": 1.8695008090084178e-05, "loss": 0.3933, "step": 4126 }, { "epoch": 0.19, "grad_norm": 0.24036217655968822, "learning_rate": 1.8694273063637444e-05, "loss": 0.0749, "step": 4127 }, { "epoch": 0.19, "grad_norm": 0.37495575057809033, "learning_rate": 1.8693537844707884e-05, "loss": 0.3313, "step": 4128 }, { "epoch": 0.19, "grad_norm": 0.4601205933578822, "learning_rate": 1.8692802433311773e-05, "loss": 0.3508, "step": 4129 }, { "epoch": 0.19, "grad_norm": 0.6783083901461556, "learning_rate": 1.8692066829465396e-05, "loss": 0.4322, "step": 4130 }, { "epoch": 0.19, "grad_norm": 0.5122276613740804, "learning_rate": 1.8691331033185036e-05, "loss": 0.3441, "step": 4131 }, { "epoch": 0.19, "grad_norm": 0.6176107316002839, "learning_rate": 1.8690595044486983e-05, "loss": 0.3468, "step": 4132 }, { "epoch": 0.19, "grad_norm": 0.2615467933068633, "learning_rate": 1.8689858863387534e-05, "loss": 0.2058, "step": 4133 }, { "epoch": 0.19, "grad_norm": 0.5766915048836134, "learning_rate": 1.8689122489902983e-05, "loss": 0.3756, "step": 4134 }, { "epoch": 0.19, "grad_norm": 0.4496838726514699, "learning_rate": 1.8688385924049636e-05, "loss": 0.3492, "step": 4135 }, { "epoch": 0.19, "grad_norm": 0.394662739207563, "learning_rate": 1.8687649165843797e-05, "loss": 0.3553, "step": 4136 }, { "epoch": 0.19, "grad_norm": 0.4783256277908455, "learning_rate": 1.868691221530178e-05, "loss": 0.1731, "step": 4137 }, { "epoch": 0.19, "grad_norm": 0.4981937629809839, "learning_rate": 1.86861750724399e-05, "loss": 0.342, "step": 4138 }, { "epoch": 0.19, "grad_norm": 0.41986243297898335, "learning_rate": 1.8685437737274476e-05, "loss": 0.2903, "step": 4139 }, { "epoch": 0.19, "grad_norm": 0.3768983281775736, "learning_rate": 1.8684700209821832e-05, "loss": 0.244, "step": 4140 }, { "epoch": 0.19, "grad_norm": 0.3595236474921723, "learning_rate": 1.8683962490098293e-05, "loss": 0.3426, "step": 4141 }, { "epoch": 0.19, "grad_norm": 0.47211723054074545, "learning_rate": 1.8683224578120197e-05, "loss": 0.2549, "step": 4142 }, { "epoch": 0.19, "grad_norm": 0.43463807481612116, "learning_rate": 1.8682486473903876e-05, "loss": 0.2459, "step": 4143 }, { "epoch": 0.19, "grad_norm": 0.3479296531679587, "learning_rate": 1.8681748177465673e-05, "loss": 0.3038, "step": 4144 }, { "epoch": 0.19, "grad_norm": 0.9129937729350435, "learning_rate": 1.8681009688821932e-05, "loss": 0.5846, "step": 4145 }, { "epoch": 0.19, "grad_norm": 0.41380624137609584, "learning_rate": 1.8680271007989007e-05, "loss": 0.2922, "step": 4146 }, { "epoch": 0.19, "grad_norm": 0.5819401863951018, "learning_rate": 1.8679532134983242e-05, "loss": 0.3784, "step": 4147 }, { "epoch": 0.19, "grad_norm": 0.2808789867807087, "learning_rate": 1.8678793069821006e-05, "loss": 0.2439, "step": 4148 }, { "epoch": 0.19, "grad_norm": 0.70544131912115, "learning_rate": 1.8678053812518657e-05, "loss": 0.4382, "step": 4149 }, { "epoch": 0.19, "grad_norm": 0.43774151905000996, "learning_rate": 1.8677314363092555e-05, "loss": 0.2528, "step": 4150 }, { "epoch": 0.19, "grad_norm": 0.6932091860919112, "learning_rate": 1.867657472155908e-05, "loss": 0.4767, "step": 4151 }, { "epoch": 0.19, "grad_norm": 0.3625520053029062, "learning_rate": 1.8675834887934604e-05, "loss": 0.3098, "step": 4152 }, { "epoch": 0.19, "grad_norm": 0.410908978760509, "learning_rate": 1.8675094862235502e-05, "loss": 0.23, "step": 4153 }, { "epoch": 0.19, "grad_norm": 0.40719130806744624, "learning_rate": 1.8674354644478163e-05, "loss": 0.2568, "step": 4154 }, { "epoch": 0.19, "grad_norm": 1.49686109623509, "learning_rate": 1.867361423467897e-05, "loss": 0.8323, "step": 4155 }, { "epoch": 0.19, "grad_norm": 0.31741884244535146, "learning_rate": 1.8672873632854322e-05, "loss": 0.2386, "step": 4156 }, { "epoch": 0.19, "grad_norm": 0.7604389779477356, "learning_rate": 1.867213283902061e-05, "loss": 0.4853, "step": 4157 }, { "epoch": 0.19, "grad_norm": 0.6199582958052203, "learning_rate": 1.8671391853194235e-05, "loss": 0.4581, "step": 4158 }, { "epoch": 0.19, "grad_norm": 0.32601407224753537, "learning_rate": 1.86706506753916e-05, "loss": 0.1849, "step": 4159 }, { "epoch": 0.19, "grad_norm": 0.44842277184067525, "learning_rate": 1.866990930562912e-05, "loss": 0.3469, "step": 4160 }, { "epoch": 0.19, "grad_norm": 0.3945089350021445, "learning_rate": 1.86691677439232e-05, "loss": 0.2633, "step": 4161 }, { "epoch": 0.19, "grad_norm": 0.4444742041732898, "learning_rate": 1.866842599029026e-05, "loss": 0.3224, "step": 4162 }, { "epoch": 0.19, "grad_norm": 1.286569326951523, "learning_rate": 1.866768404474673e-05, "loss": 0.4192, "step": 4163 }, { "epoch": 0.19, "grad_norm": 0.40065726187994355, "learning_rate": 1.8666941907309026e-05, "loss": 0.3188, "step": 4164 }, { "epoch": 0.19, "grad_norm": 0.4234475112488729, "learning_rate": 1.866619957799358e-05, "loss": 0.3161, "step": 4165 }, { "epoch": 0.19, "grad_norm": 0.4275319204453874, "learning_rate": 1.866545705681683e-05, "loss": 0.1917, "step": 4166 }, { "epoch": 0.19, "grad_norm": 0.38726615664524344, "learning_rate": 1.8664714343795213e-05, "loss": 0.2314, "step": 4167 }, { "epoch": 0.19, "grad_norm": 0.6271993644247684, "learning_rate": 1.866397143894517e-05, "loss": 0.3397, "step": 4168 }, { "epoch": 0.19, "grad_norm": 0.6373485797217358, "learning_rate": 1.866322834228315e-05, "loss": 0.3424, "step": 4169 }, { "epoch": 0.19, "grad_norm": 0.9843045911060582, "learning_rate": 1.86624850538256e-05, "loss": 0.3838, "step": 4170 }, { "epoch": 0.19, "grad_norm": 0.4276787637856182, "learning_rate": 1.8661741573588984e-05, "loss": 0.3215, "step": 4171 }, { "epoch": 0.19, "grad_norm": 0.47136134009969677, "learning_rate": 1.8660997901589758e-05, "loss": 0.3062, "step": 4172 }, { "epoch": 0.19, "grad_norm": 0.3946763554078555, "learning_rate": 1.866025403784439e-05, "loss": 0.2094, "step": 4173 }, { "epoch": 0.19, "grad_norm": 0.4224640509324194, "learning_rate": 1.865950998236934e-05, "loss": 0.3066, "step": 4174 }, { "epoch": 0.19, "grad_norm": 0.7803322201482245, "learning_rate": 1.8658765735181084e-05, "loss": 0.4865, "step": 4175 }, { "epoch": 0.19, "grad_norm": 0.41504883906285445, "learning_rate": 1.8658021296296103e-05, "loss": 0.2681, "step": 4176 }, { "epoch": 0.19, "grad_norm": 0.4014559075218351, "learning_rate": 1.8657276665730874e-05, "loss": 0.2752, "step": 4177 }, { "epoch": 0.19, "grad_norm": 1.5075213150953028, "learning_rate": 1.8656531843501882e-05, "loss": 0.8498, "step": 4178 }, { "epoch": 0.19, "grad_norm": 0.582299446058483, "learning_rate": 1.865578682962562e-05, "loss": 0.369, "step": 4179 }, { "epoch": 0.19, "grad_norm": 0.3968796581595646, "learning_rate": 1.865504162411858e-05, "loss": 0.298, "step": 4180 }, { "epoch": 0.19, "grad_norm": 0.5380824013126718, "learning_rate": 1.865429622699726e-05, "loss": 0.3347, "step": 4181 }, { "epoch": 0.19, "grad_norm": 0.31864338716591833, "learning_rate": 1.865355063827816e-05, "loss": 0.1458, "step": 4182 }, { "epoch": 0.19, "grad_norm": 0.5149241402283542, "learning_rate": 1.8652804857977795e-05, "loss": 0.2938, "step": 4183 }, { "epoch": 0.19, "grad_norm": 0.5272483429854528, "learning_rate": 1.8652058886112668e-05, "loss": 0.3608, "step": 4184 }, { "epoch": 0.19, "grad_norm": 0.7197677646921471, "learning_rate": 1.8651312722699297e-05, "loss": 0.3616, "step": 4185 }, { "epoch": 0.19, "grad_norm": 0.4716577641111177, "learning_rate": 1.86505663677542e-05, "loss": 0.3012, "step": 4186 }, { "epoch": 0.19, "grad_norm": 0.33763787108530596, "learning_rate": 1.8649819821293897e-05, "loss": 0.1897, "step": 4187 }, { "epoch": 0.19, "grad_norm": 0.4301601508073393, "learning_rate": 1.8649073083334923e-05, "loss": 0.3134, "step": 4188 }, { "epoch": 0.19, "grad_norm": 0.3698944457036956, "learning_rate": 1.8648326153893808e-05, "loss": 0.2391, "step": 4189 }, { "epoch": 0.19, "grad_norm": 1.0532131977735477, "learning_rate": 1.8647579032987085e-05, "loss": 0.7103, "step": 4190 }, { "epoch": 0.19, "grad_norm": 0.699085459607911, "learning_rate": 1.86468317206313e-05, "loss": 0.5148, "step": 4191 }, { "epoch": 0.19, "grad_norm": 0.3157901619692983, "learning_rate": 1.8646084216842993e-05, "loss": 0.2444, "step": 4192 }, { "epoch": 0.19, "grad_norm": 0.4205461480357759, "learning_rate": 1.8645336521638713e-05, "loss": 0.2625, "step": 4193 }, { "epoch": 0.19, "grad_norm": 1.7450015034730575, "learning_rate": 1.864458863503502e-05, "loss": 0.6988, "step": 4194 }, { "epoch": 0.19, "grad_norm": 0.3652664531919449, "learning_rate": 1.8643840557048462e-05, "loss": 0.2397, "step": 4195 }, { "epoch": 0.19, "grad_norm": 0.4508231709420786, "learning_rate": 1.8643092287695604e-05, "loss": 0.3365, "step": 4196 }, { "epoch": 0.19, "grad_norm": 0.6318623707717334, "learning_rate": 1.8642343826993015e-05, "loss": 0.4869, "step": 4197 }, { "epoch": 0.19, "grad_norm": 0.40324997587226413, "learning_rate": 1.864159517495726e-05, "loss": 0.2326, "step": 4198 }, { "epoch": 0.19, "grad_norm": 0.34008914617717506, "learning_rate": 1.8640846331604924e-05, "loss": 0.2148, "step": 4199 }, { "epoch": 0.19, "grad_norm": 0.4105311755041284, "learning_rate": 1.8640097296952577e-05, "loss": 0.3071, "step": 4200 }, { "epoch": 0.19, "grad_norm": 0.37575741391242884, "learning_rate": 1.86393480710168e-05, "loss": 0.2894, "step": 4201 }, { "epoch": 0.19, "grad_norm": 0.7221986833686256, "learning_rate": 1.863859865381418e-05, "loss": 0.4171, "step": 4202 }, { "epoch": 0.19, "grad_norm": 0.40624930495804484, "learning_rate": 1.863784904536132e-05, "loss": 0.3635, "step": 4203 }, { "epoch": 0.19, "grad_norm": 0.44560544596646917, "learning_rate": 1.86370992456748e-05, "loss": 0.2901, "step": 4204 }, { "epoch": 0.19, "grad_norm": 0.3727641965880537, "learning_rate": 1.8636349254771234e-05, "loss": 0.2042, "step": 4205 }, { "epoch": 0.19, "grad_norm": 0.516023009585777, "learning_rate": 1.8635599072667213e-05, "loss": 0.3425, "step": 4206 }, { "epoch": 0.19, "grad_norm": 0.4260646509227388, "learning_rate": 1.8634848699379354e-05, "loss": 0.2607, "step": 4207 }, { "epoch": 0.19, "grad_norm": 0.3598279327398515, "learning_rate": 1.8634098134924267e-05, "loss": 0.2907, "step": 4208 }, { "epoch": 0.19, "grad_norm": 0.9702796332041599, "learning_rate": 1.863334737931857e-05, "loss": 0.549, "step": 4209 }, { "epoch": 0.19, "grad_norm": 0.3919851985249983, "learning_rate": 1.8632596432578883e-05, "loss": 0.2848, "step": 4210 }, { "epoch": 0.19, "grad_norm": 0.4326622571949437, "learning_rate": 1.863184529472183e-05, "loss": 0.2627, "step": 4211 }, { "epoch": 0.19, "grad_norm": 0.31763419007332644, "learning_rate": 1.8631093965764045e-05, "loss": 0.166, "step": 4212 }, { "epoch": 0.19, "grad_norm": 0.4018304788174188, "learning_rate": 1.8630342445722152e-05, "loss": 0.2826, "step": 4213 }, { "epoch": 0.19, "grad_norm": 1.008148420842974, "learning_rate": 1.86295907346128e-05, "loss": 0.5096, "step": 4214 }, { "epoch": 0.19, "grad_norm": 0.38199778723774036, "learning_rate": 1.8628838832452628e-05, "loss": 0.3102, "step": 4215 }, { "epoch": 0.19, "grad_norm": 0.39715618525526375, "learning_rate": 1.8628086739258278e-05, "loss": 0.3, "step": 4216 }, { "epoch": 0.19, "grad_norm": 1.5529649301836324, "learning_rate": 1.8627334455046404e-05, "loss": 0.6103, "step": 4217 }, { "epoch": 0.19, "grad_norm": 0.3406019305545106, "learning_rate": 1.862658197983366e-05, "loss": 0.1572, "step": 4218 }, { "epoch": 0.19, "grad_norm": 0.5056276918318409, "learning_rate": 1.8625829313636707e-05, "loss": 0.3183, "step": 4219 }, { "epoch": 0.19, "grad_norm": 0.395127033173385, "learning_rate": 1.86250764564722e-05, "loss": 0.3232, "step": 4220 }, { "epoch": 0.19, "grad_norm": 0.6973099338641351, "learning_rate": 1.862432340835682e-05, "loss": 0.4816, "step": 4221 }, { "epoch": 0.19, "grad_norm": 0.500428749980683, "learning_rate": 1.862357016930723e-05, "loss": 0.3079, "step": 4222 }, { "epoch": 0.19, "grad_norm": 0.5877206281907318, "learning_rate": 1.862281673934011e-05, "loss": 0.4011, "step": 4223 }, { "epoch": 0.19, "grad_norm": 0.31240727049638506, "learning_rate": 1.8622063118472135e-05, "loss": 0.2035, "step": 4224 }, { "epoch": 0.19, "grad_norm": 0.5144508665958528, "learning_rate": 1.8621309306719997e-05, "loss": 0.3407, "step": 4225 }, { "epoch": 0.19, "grad_norm": 0.7317596647436877, "learning_rate": 1.8620555304100376e-05, "loss": 0.4356, "step": 4226 }, { "epoch": 0.19, "grad_norm": 0.42435270201847763, "learning_rate": 1.861980111062997e-05, "loss": 0.3376, "step": 4227 }, { "epoch": 0.19, "grad_norm": 0.3947162720971995, "learning_rate": 1.8619046726325475e-05, "loss": 0.2002, "step": 4228 }, { "epoch": 0.19, "grad_norm": 0.6189747287194609, "learning_rate": 1.8618292151203593e-05, "loss": 0.4288, "step": 4229 }, { "epoch": 0.19, "grad_norm": 0.37510562648132945, "learning_rate": 1.861753738528103e-05, "loss": 0.2053, "step": 4230 }, { "epoch": 0.19, "grad_norm": 0.39244009005410496, "learning_rate": 1.8616782428574495e-05, "loss": 0.2667, "step": 4231 }, { "epoch": 0.19, "grad_norm": 0.41892092364403355, "learning_rate": 1.86160272811007e-05, "loss": 0.354, "step": 4232 }, { "epoch": 0.19, "grad_norm": 1.410294070545441, "learning_rate": 1.861527194287637e-05, "loss": 0.8041, "step": 4233 }, { "epoch": 0.19, "grad_norm": 0.37826410249052556, "learning_rate": 1.8614516413918218e-05, "loss": 0.2288, "step": 4234 }, { "epoch": 0.19, "grad_norm": 1.3672072258194714, "learning_rate": 1.8613760694242978e-05, "loss": 0.6668, "step": 4235 }, { "epoch": 0.19, "grad_norm": 0.47346234378571395, "learning_rate": 1.8613004783867373e-05, "loss": 0.357, "step": 4236 }, { "epoch": 0.19, "grad_norm": 0.48788018254471677, "learning_rate": 1.861224868280815e-05, "loss": 0.3419, "step": 4237 }, { "epoch": 0.19, "grad_norm": 0.25386238628449465, "learning_rate": 1.861149239108204e-05, "loss": 0.1526, "step": 4238 }, { "epoch": 0.19, "grad_norm": 0.45806722560237756, "learning_rate": 1.8610735908705786e-05, "loss": 0.3359, "step": 4239 }, { "epoch": 0.19, "grad_norm": 1.0915031035295915, "learning_rate": 1.860997923569614e-05, "loss": 0.6028, "step": 4240 }, { "epoch": 0.19, "grad_norm": 0.4183587145267306, "learning_rate": 1.8609222372069852e-05, "loss": 0.223, "step": 4241 }, { "epoch": 0.19, "grad_norm": 0.722183538773224, "learning_rate": 1.860846531784368e-05, "loss": 0.462, "step": 4242 }, { "epoch": 0.19, "grad_norm": 0.4822117616404776, "learning_rate": 1.860770807303438e-05, "loss": 0.2968, "step": 4243 }, { "epoch": 0.19, "grad_norm": 0.26642257484066423, "learning_rate": 1.8606950637658722e-05, "loss": 0.1939, "step": 4244 }, { "epoch": 0.2, "grad_norm": 1.4719138271440375, "learning_rate": 1.860619301173347e-05, "loss": 0.8307, "step": 4245 }, { "epoch": 0.2, "grad_norm": 0.6849635966770423, "learning_rate": 1.86054351952754e-05, "loss": 0.3877, "step": 4246 }, { "epoch": 0.2, "grad_norm": 0.4050195896111727, "learning_rate": 1.8604677188301288e-05, "loss": 0.267, "step": 4247 }, { "epoch": 0.2, "grad_norm": 0.7986489568831321, "learning_rate": 1.860391899082792e-05, "loss": 0.4728, "step": 4248 }, { "epoch": 0.2, "grad_norm": 0.7684005142880341, "learning_rate": 1.8603160602872074e-05, "loss": 0.3507, "step": 4249 }, { "epoch": 0.2, "grad_norm": 0.3193234082154365, "learning_rate": 1.8602402024450547e-05, "loss": 0.2323, "step": 4250 }, { "epoch": 0.2, "grad_norm": 0.462570153195672, "learning_rate": 1.860164325558013e-05, "loss": 0.3056, "step": 4251 }, { "epoch": 0.2, "grad_norm": 0.5012261944582558, "learning_rate": 1.8600884296277617e-05, "loss": 0.3121, "step": 4252 }, { "epoch": 0.2, "grad_norm": 0.6721438343562806, "learning_rate": 1.860012514655982e-05, "loss": 0.4336, "step": 4253 }, { "epoch": 0.2, "grad_norm": 0.9386869920224649, "learning_rate": 1.859936580644354e-05, "loss": 0.4039, "step": 4254 }, { "epoch": 0.2, "grad_norm": 0.3281931726235795, "learning_rate": 1.859860627594559e-05, "loss": 0.2972, "step": 4255 }, { "epoch": 0.2, "grad_norm": 0.656306461205417, "learning_rate": 1.8597846555082784e-05, "loss": 0.4151, "step": 4256 }, { "epoch": 0.2, "grad_norm": 0.22655053636844913, "learning_rate": 1.8597086643871943e-05, "loss": 0.1349, "step": 4257 }, { "epoch": 0.2, "grad_norm": 0.9686743337818449, "learning_rate": 1.8596326542329888e-05, "loss": 0.4468, "step": 4258 }, { "epoch": 0.2, "grad_norm": 0.419880464276655, "learning_rate": 1.8595566250473445e-05, "loss": 0.3284, "step": 4259 }, { "epoch": 0.2, "grad_norm": 0.7111856428853667, "learning_rate": 1.8594805768319457e-05, "loss": 0.3414, "step": 4260 }, { "epoch": 0.2, "grad_norm": 0.840799267252484, "learning_rate": 1.8594045095884748e-05, "loss": 0.4666, "step": 4261 }, { "epoch": 0.2, "grad_norm": 0.37370294635857737, "learning_rate": 1.8593284233186168e-05, "loss": 0.2589, "step": 4262 }, { "epoch": 0.2, "grad_norm": 0.4588761679099954, "learning_rate": 1.8592523180240552e-05, "loss": 0.3364, "step": 4263 }, { "epoch": 0.2, "grad_norm": 0.24257829482369264, "learning_rate": 1.859176193706476e-05, "loss": 0.1227, "step": 4264 }, { "epoch": 0.2, "grad_norm": 0.43654393219973653, "learning_rate": 1.8591000503675635e-05, "loss": 0.3622, "step": 4265 }, { "epoch": 0.2, "grad_norm": 0.8789514667030124, "learning_rate": 1.8590238880090042e-05, "loss": 0.5487, "step": 4266 }, { "epoch": 0.2, "grad_norm": 0.3764960102209963, "learning_rate": 1.858947706632484e-05, "loss": 0.2528, "step": 4267 }, { "epoch": 0.2, "grad_norm": 0.5018443247765786, "learning_rate": 1.858871506239689e-05, "loss": 0.3542, "step": 4268 }, { "epoch": 0.2, "grad_norm": 1.4484056478095855, "learning_rate": 1.858795286832307e-05, "loss": 0.8241, "step": 4269 }, { "epoch": 0.2, "grad_norm": 0.47138726810805803, "learning_rate": 1.858719048412025e-05, "loss": 0.2432, "step": 4270 }, { "epoch": 0.2, "grad_norm": 0.4069346534547707, "learning_rate": 1.8586427909805308e-05, "loss": 0.2982, "step": 4271 }, { "epoch": 0.2, "grad_norm": 0.41383520443790894, "learning_rate": 1.858566514539513e-05, "loss": 0.2979, "step": 4272 }, { "epoch": 0.2, "grad_norm": 0.40997039287376597, "learning_rate": 1.85849021909066e-05, "loss": 0.211, "step": 4273 }, { "epoch": 0.2, "grad_norm": 0.6519100111384685, "learning_rate": 1.858413904635661e-05, "loss": 0.3881, "step": 4274 }, { "epoch": 0.2, "grad_norm": 0.4862669576064738, "learning_rate": 1.8583375711762054e-05, "loss": 0.3622, "step": 4275 }, { "epoch": 0.2, "grad_norm": 0.7806093011096142, "learning_rate": 1.858261218713983e-05, "loss": 0.3965, "step": 4276 }, { "epoch": 0.2, "grad_norm": 0.3897906346049912, "learning_rate": 1.858184847250685e-05, "loss": 0.2836, "step": 4277 }, { "epoch": 0.2, "grad_norm": 0.33605509595260075, "learning_rate": 1.8581084567880012e-05, "loss": 0.2083, "step": 4278 }, { "epoch": 0.2, "grad_norm": 0.4958439717340983, "learning_rate": 1.8580320473276234e-05, "loss": 0.3217, "step": 4279 }, { "epoch": 0.2, "grad_norm": 0.40378939629410737, "learning_rate": 1.857955618871243e-05, "loss": 0.217, "step": 4280 }, { "epoch": 0.2, "grad_norm": 1.0668605638875857, "learning_rate": 1.857879171420552e-05, "loss": 0.6062, "step": 4281 }, { "epoch": 0.2, "grad_norm": 0.5995313988047305, "learning_rate": 1.857802704977243e-05, "loss": 0.3973, "step": 4282 }, { "epoch": 0.2, "grad_norm": 0.39627075026799313, "learning_rate": 1.857726219543009e-05, "loss": 0.2373, "step": 4283 }, { "epoch": 0.2, "grad_norm": 0.3131188984179075, "learning_rate": 1.857649715119543e-05, "loss": 0.2052, "step": 4284 }, { "epoch": 0.2, "grad_norm": 0.7731717920181239, "learning_rate": 1.857573191708539e-05, "loss": 0.5462, "step": 4285 }, { "epoch": 0.2, "grad_norm": 0.5843632644313207, "learning_rate": 1.857496649311691e-05, "loss": 0.2675, "step": 4286 }, { "epoch": 0.2, "grad_norm": 0.5809243707043547, "learning_rate": 1.8574200879306938e-05, "loss": 0.3591, "step": 4287 }, { "epoch": 0.2, "grad_norm": 1.137210960246675, "learning_rate": 1.8573435075672422e-05, "loss": 0.4499, "step": 4288 }, { "epoch": 0.2, "grad_norm": 0.46439071593907794, "learning_rate": 1.857266908223032e-05, "loss": 0.3029, "step": 4289 }, { "epoch": 0.2, "grad_norm": 0.31689381380182335, "learning_rate": 1.857190289899758e-05, "loss": 0.0951, "step": 4290 }, { "epoch": 0.2, "grad_norm": 0.4679546528485914, "learning_rate": 1.8571136525991178e-05, "loss": 0.3078, "step": 4291 }, { "epoch": 0.2, "grad_norm": 0.585815750155175, "learning_rate": 1.857036996322807e-05, "loss": 0.3083, "step": 4292 }, { "epoch": 0.2, "grad_norm": 1.7707977589677684, "learning_rate": 1.8569603210725233e-05, "loss": 0.4483, "step": 4293 }, { "epoch": 0.2, "grad_norm": 0.49923180782558924, "learning_rate": 1.8568836268499642e-05, "loss": 0.3514, "step": 4294 }, { "epoch": 0.2, "grad_norm": 0.40470734445423134, "learning_rate": 1.8568069136568272e-05, "loss": 0.3209, "step": 4295 }, { "epoch": 0.2, "grad_norm": 0.2698376544670076, "learning_rate": 1.8567301814948112e-05, "loss": 0.124, "step": 4296 }, { "epoch": 0.2, "grad_norm": 1.2498194323472374, "learning_rate": 1.8566534303656144e-05, "loss": 0.6711, "step": 4297 }, { "epoch": 0.2, "grad_norm": 0.575087247125326, "learning_rate": 1.8565766602709365e-05, "loss": 0.3151, "step": 4298 }, { "epoch": 0.2, "grad_norm": 0.9593289933001017, "learning_rate": 1.856499871212477e-05, "loss": 0.3349, "step": 4299 }, { "epoch": 0.2, "grad_norm": 1.2963006724299926, "learning_rate": 1.8564230631919355e-05, "loss": 0.5762, "step": 4300 }, { "epoch": 0.2, "grad_norm": 0.7776943231269928, "learning_rate": 1.856346236211013e-05, "loss": 0.295, "step": 4301 }, { "epoch": 0.2, "grad_norm": 0.3605089723684607, "learning_rate": 1.8562693902714103e-05, "loss": 0.176, "step": 4302 }, { "epoch": 0.2, "grad_norm": 0.47683794100556987, "learning_rate": 1.8561925253748283e-05, "loss": 0.288, "step": 4303 }, { "epoch": 0.2, "grad_norm": 0.5648674900877408, "learning_rate": 1.8561156415229694e-05, "loss": 0.284, "step": 4304 }, { "epoch": 0.2, "grad_norm": 1.2094934944081341, "learning_rate": 1.8560387387175352e-05, "loss": 0.4911, "step": 4305 }, { "epoch": 0.2, "grad_norm": 0.5289136182055869, "learning_rate": 1.855961816960228e-05, "loss": 0.2708, "step": 4306 }, { "epoch": 0.2, "grad_norm": 0.4976460192855919, "learning_rate": 1.8558848762527517e-05, "loss": 0.3027, "step": 4307 }, { "epoch": 0.2, "grad_norm": 0.36676655909033784, "learning_rate": 1.8558079165968087e-05, "loss": 0.2775, "step": 4308 }, { "epoch": 0.2, "grad_norm": 0.3288064090147841, "learning_rate": 1.8557309379941037e-05, "loss": 0.1968, "step": 4309 }, { "epoch": 0.2, "grad_norm": 0.4592607713250931, "learning_rate": 1.8556539404463404e-05, "loss": 0.3133, "step": 4310 }, { "epoch": 0.2, "grad_norm": 0.6501209516359576, "learning_rate": 1.8555769239552232e-05, "loss": 0.3698, "step": 4311 }, { "epoch": 0.2, "grad_norm": 1.1104770222519336, "learning_rate": 1.855499888522458e-05, "loss": 0.4961, "step": 4312 }, { "epoch": 0.2, "grad_norm": 0.42803878336672074, "learning_rate": 1.8554228341497493e-05, "loss": 0.2811, "step": 4313 }, { "epoch": 0.2, "grad_norm": 0.30356638922204654, "learning_rate": 1.855345760838804e-05, "loss": 0.197, "step": 4314 }, { "epoch": 0.2, "grad_norm": 0.569775244592558, "learning_rate": 1.8552686685913275e-05, "loss": 0.3809, "step": 4315 }, { "epoch": 0.2, "grad_norm": 0.3637833366499475, "learning_rate": 1.8551915574090277e-05, "loss": 0.2721, "step": 4316 }, { "epoch": 0.2, "grad_norm": 0.7878643115705979, "learning_rate": 1.8551144272936103e-05, "loss": 0.6212, "step": 4317 }, { "epoch": 0.2, "grad_norm": 0.5759455781547923, "learning_rate": 1.855037278246784e-05, "loss": 0.3904, "step": 4318 }, { "epoch": 0.2, "grad_norm": 0.33918927904691615, "learning_rate": 1.8549601102702564e-05, "loss": 0.234, "step": 4319 }, { "epoch": 0.2, "grad_norm": 1.4102005371211603, "learning_rate": 1.854882923365736e-05, "loss": 0.7264, "step": 4320 }, { "epoch": 0.2, "grad_norm": 0.4110129516195724, "learning_rate": 1.8548057175349314e-05, "loss": 0.283, "step": 4321 }, { "epoch": 0.2, "grad_norm": 0.3481719720307271, "learning_rate": 1.8547284927795527e-05, "loss": 0.2588, "step": 4322 }, { "epoch": 0.2, "grad_norm": 0.6066801286349882, "learning_rate": 1.8546512491013082e-05, "loss": 0.4147, "step": 4323 }, { "epoch": 0.2, "grad_norm": 1.1253572376444525, "learning_rate": 1.854573986501909e-05, "loss": 0.733, "step": 4324 }, { "epoch": 0.2, "grad_norm": 0.42633931708458467, "learning_rate": 1.8544967049830656e-05, "loss": 0.1902, "step": 4325 }, { "epoch": 0.2, "grad_norm": 0.37327992638226337, "learning_rate": 1.8544194045464888e-05, "loss": 0.278, "step": 4326 }, { "epoch": 0.2, "grad_norm": 0.4107699728277899, "learning_rate": 1.8543420851938895e-05, "loss": 0.3301, "step": 4327 }, { "epoch": 0.2, "grad_norm": 0.5103978324837614, "learning_rate": 1.85426474692698e-05, "loss": 0.2844, "step": 4328 }, { "epoch": 0.2, "grad_norm": 0.540719464121703, "learning_rate": 1.8541873897474727e-05, "loss": 0.3442, "step": 4329 }, { "epoch": 0.2, "grad_norm": 0.47485028926210837, "learning_rate": 1.8541100136570796e-05, "loss": 0.3563, "step": 4330 }, { "epoch": 0.2, "grad_norm": 0.4551609512600658, "learning_rate": 1.8540326186575138e-05, "loss": 0.3311, "step": 4331 }, { "epoch": 0.2, "grad_norm": 0.5272102468708916, "learning_rate": 1.853955204750489e-05, "loss": 0.3142, "step": 4332 }, { "epoch": 0.2, "grad_norm": 0.7036754218696629, "learning_rate": 1.8538777719377194e-05, "loss": 0.4887, "step": 4333 }, { "epoch": 0.2, "grad_norm": 0.2787362636360333, "learning_rate": 1.8538003202209186e-05, "loss": 0.1749, "step": 4334 }, { "epoch": 0.2, "grad_norm": 0.411163716910392, "learning_rate": 1.8537228496018017e-05, "loss": 0.3001, "step": 4335 }, { "epoch": 0.2, "grad_norm": 1.5993647169250635, "learning_rate": 1.8536453600820838e-05, "loss": 0.6663, "step": 4336 }, { "epoch": 0.2, "grad_norm": 0.41395799425926444, "learning_rate": 1.8535678516634803e-05, "loss": 0.3131, "step": 4337 }, { "epoch": 0.2, "grad_norm": 0.5531271887297363, "learning_rate": 1.8534903243477072e-05, "loss": 0.3346, "step": 4338 }, { "epoch": 0.2, "grad_norm": 0.5192461296597799, "learning_rate": 1.8534127781364814e-05, "loss": 0.4067, "step": 4339 }, { "epoch": 0.2, "grad_norm": 0.39986066894239397, "learning_rate": 1.8533352130315185e-05, "loss": 0.2874, "step": 4340 }, { "epoch": 0.2, "grad_norm": 0.49231641606851856, "learning_rate": 1.8532576290345368e-05, "loss": 0.2913, "step": 4341 }, { "epoch": 0.2, "grad_norm": 0.33991968611855294, "learning_rate": 1.8531800261472536e-05, "loss": 0.2481, "step": 4342 }, { "epoch": 0.2, "grad_norm": 0.47664552994992193, "learning_rate": 1.8531024043713868e-05, "loss": 0.2941, "step": 4343 }, { "epoch": 0.2, "grad_norm": 0.5820049977438846, "learning_rate": 1.853024763708655e-05, "loss": 0.386, "step": 4344 }, { "epoch": 0.2, "grad_norm": 0.44935013101731247, "learning_rate": 1.852947104160777e-05, "loss": 0.2938, "step": 4345 }, { "epoch": 0.2, "grad_norm": 0.43234043786115367, "learning_rate": 1.8528694257294723e-05, "loss": 0.2924, "step": 4346 }, { "epoch": 0.2, "grad_norm": 0.37278686898840957, "learning_rate": 1.8527917284164604e-05, "loss": 0.3214, "step": 4347 }, { "epoch": 0.2, "grad_norm": 0.32407216551283824, "learning_rate": 1.852714012223462e-05, "loss": 0.1556, "step": 4348 }, { "epoch": 0.2, "grad_norm": 0.44523967327277575, "learning_rate": 1.8526362771521968e-05, "loss": 0.3014, "step": 4349 }, { "epoch": 0.2, "grad_norm": 0.33929429334188205, "learning_rate": 1.8525585232043863e-05, "loss": 0.3431, "step": 4350 }, { "epoch": 0.2, "grad_norm": 0.5721630840565419, "learning_rate": 1.852480750381752e-05, "loss": 0.3899, "step": 4351 }, { "epoch": 0.2, "grad_norm": 0.4331620843649517, "learning_rate": 1.8524029586860154e-05, "loss": 0.3324, "step": 4352 }, { "epoch": 0.2, "grad_norm": 0.556403232307594, "learning_rate": 1.8523251481188987e-05, "loss": 0.4095, "step": 4353 }, { "epoch": 0.2, "grad_norm": 0.3856497972178304, "learning_rate": 1.852247318682125e-05, "loss": 0.2785, "step": 4354 }, { "epoch": 0.2, "grad_norm": 0.33890319024818455, "learning_rate": 1.8521694703774166e-05, "loss": 0.205, "step": 4355 }, { "epoch": 0.2, "grad_norm": 0.5340483991443618, "learning_rate": 1.852091603206498e-05, "loss": 0.3932, "step": 4356 }, { "epoch": 0.2, "grad_norm": 0.841549354372186, "learning_rate": 1.8520137171710923e-05, "loss": 0.539, "step": 4357 }, { "epoch": 0.2, "grad_norm": 0.3669957306405334, "learning_rate": 1.851935812272924e-05, "loss": 0.2357, "step": 4358 }, { "epoch": 0.2, "grad_norm": 0.591401417289004, "learning_rate": 1.851857888513718e-05, "loss": 0.43, "step": 4359 }, { "epoch": 0.2, "grad_norm": 0.4009867712349622, "learning_rate": 1.8517799458951993e-05, "loss": 0.2494, "step": 4360 }, { "epoch": 0.2, "grad_norm": 0.4435805587919202, "learning_rate": 1.851701984419094e-05, "loss": 0.2718, "step": 4361 }, { "epoch": 0.2, "grad_norm": 0.45599626280460354, "learning_rate": 1.851624004087127e-05, "loss": 0.355, "step": 4362 }, { "epoch": 0.2, "grad_norm": 0.5885986872875313, "learning_rate": 1.8515460049010254e-05, "loss": 0.4573, "step": 4363 }, { "epoch": 0.2, "grad_norm": 0.919810715342427, "learning_rate": 1.8514679868625162e-05, "loss": 0.5153, "step": 4364 }, { "epoch": 0.2, "grad_norm": 0.40571771462577466, "learning_rate": 1.8513899499733267e-05, "loss": 0.2629, "step": 4365 }, { "epoch": 0.2, "grad_norm": 0.31090043942225026, "learning_rate": 1.8513118942351838e-05, "loss": 0.2856, "step": 4366 }, { "epoch": 0.2, "grad_norm": 0.6613477166330156, "learning_rate": 1.8512338196498165e-05, "loss": 0.3469, "step": 4367 }, { "epoch": 0.2, "grad_norm": 0.3938635390395695, "learning_rate": 1.8511557262189525e-05, "loss": 0.2558, "step": 4368 }, { "epoch": 0.2, "grad_norm": 1.7553070912867637, "learning_rate": 1.8510776139443212e-05, "loss": 0.6168, "step": 4369 }, { "epoch": 0.2, "grad_norm": 0.3716812692073504, "learning_rate": 1.850999482827652e-05, "loss": 0.2994, "step": 4370 }, { "epoch": 0.2, "grad_norm": 0.4887162915109267, "learning_rate": 1.8509213328706742e-05, "loss": 0.3005, "step": 4371 }, { "epoch": 0.2, "grad_norm": 0.6384983258630488, "learning_rate": 1.8508431640751187e-05, "loss": 0.4833, "step": 4372 }, { "epoch": 0.2, "grad_norm": 0.5410682058159972, "learning_rate": 1.8507649764427153e-05, "loss": 0.3973, "step": 4373 }, { "epoch": 0.2, "grad_norm": 0.3104542340792599, "learning_rate": 1.850686769975195e-05, "loss": 0.219, "step": 4374 }, { "epoch": 0.2, "grad_norm": 0.36846554624916333, "learning_rate": 1.8506085446742898e-05, "loss": 0.2637, "step": 4375 }, { "epoch": 0.2, "grad_norm": 0.7770039456206166, "learning_rate": 1.850530300541731e-05, "loss": 0.4035, "step": 4376 }, { "epoch": 0.2, "grad_norm": 0.533397545938857, "learning_rate": 1.8504520375792513e-05, "loss": 0.3515, "step": 4377 }, { "epoch": 0.2, "grad_norm": 0.4281682691114867, "learning_rate": 1.850373755788583e-05, "loss": 0.31, "step": 4378 }, { "epoch": 0.2, "grad_norm": 0.4625267344326516, "learning_rate": 1.8502954551714598e-05, "loss": 0.3419, "step": 4379 }, { "epoch": 0.2, "grad_norm": 0.43536687230779714, "learning_rate": 1.8502171357296144e-05, "loss": 0.2531, "step": 4380 }, { "epoch": 0.2, "grad_norm": 0.33368913736221056, "learning_rate": 1.850138797464781e-05, "loss": 0.2094, "step": 4381 }, { "epoch": 0.2, "grad_norm": 0.5224790476482379, "learning_rate": 1.8500604403786943e-05, "loss": 0.3309, "step": 4382 }, { "epoch": 0.2, "grad_norm": 0.37772600058119277, "learning_rate": 1.8499820644730885e-05, "loss": 0.3301, "step": 4383 }, { "epoch": 0.2, "grad_norm": 0.6105125192859253, "learning_rate": 1.849903669749699e-05, "loss": 0.4059, "step": 4384 }, { "epoch": 0.2, "grad_norm": 0.9461638237025233, "learning_rate": 1.8498252562102615e-05, "loss": 0.5433, "step": 4385 }, { "epoch": 0.2, "grad_norm": 0.36288331797842355, "learning_rate": 1.8497468238565118e-05, "loss": 0.2845, "step": 4386 }, { "epoch": 0.2, "grad_norm": 0.23701882307365282, "learning_rate": 1.8496683726901865e-05, "loss": 0.1223, "step": 4387 }, { "epoch": 0.2, "grad_norm": 0.788831722113564, "learning_rate": 1.8495899027130222e-05, "loss": 0.3991, "step": 4388 }, { "epoch": 0.2, "grad_norm": 0.4474401788459783, "learning_rate": 1.8495114139267568e-05, "loss": 0.3721, "step": 4389 }, { "epoch": 0.2, "grad_norm": 0.4382183351444377, "learning_rate": 1.849432906333127e-05, "loss": 0.3597, "step": 4390 }, { "epoch": 0.2, "grad_norm": 0.566346240713494, "learning_rate": 1.849354379933871e-05, "loss": 0.2369, "step": 4391 }, { "epoch": 0.2, "grad_norm": 0.3569981983294628, "learning_rate": 1.849275834730728e-05, "loss": 0.2574, "step": 4392 }, { "epoch": 0.2, "grad_norm": 0.44273349348819596, "learning_rate": 1.849197270725437e-05, "loss": 0.245, "step": 4393 }, { "epoch": 0.2, "grad_norm": 0.3726626617577536, "learning_rate": 1.849118687919737e-05, "loss": 0.2525, "step": 4394 }, { "epoch": 0.2, "grad_norm": 0.5168800743061702, "learning_rate": 1.8490400863153666e-05, "loss": 0.3436, "step": 4395 }, { "epoch": 0.2, "grad_norm": 0.7209666225132275, "learning_rate": 1.848961465914068e-05, "loss": 0.4906, "step": 4396 }, { "epoch": 0.2, "grad_norm": 0.3899518100354491, "learning_rate": 1.8488828267175803e-05, "loss": 0.2732, "step": 4397 }, { "epoch": 0.2, "grad_norm": 0.33255232527366, "learning_rate": 1.848804168727645e-05, "loss": 0.2722, "step": 4398 }, { "epoch": 0.2, "grad_norm": 0.3276942358710649, "learning_rate": 1.8487254919460037e-05, "loss": 0.1672, "step": 4399 }, { "epoch": 0.2, "grad_norm": 0.5392227071197087, "learning_rate": 1.8486467963743977e-05, "loss": 0.3144, "step": 4400 }, { "epoch": 0.2, "grad_norm": 0.394977386623491, "learning_rate": 1.8485680820145696e-05, "loss": 0.344, "step": 4401 }, { "epoch": 0.2, "grad_norm": 0.37907472127344916, "learning_rate": 1.8484893488682622e-05, "loss": 0.3537, "step": 4402 }, { "epoch": 0.2, "grad_norm": 1.9878053536639875, "learning_rate": 1.8484105969372184e-05, "loss": 0.8964, "step": 4403 }, { "epoch": 0.2, "grad_norm": 0.33995285968531436, "learning_rate": 1.8483318262231818e-05, "loss": 0.2307, "step": 4404 }, { "epoch": 0.2, "grad_norm": 0.3159330698344661, "learning_rate": 1.8482530367278958e-05, "loss": 0.2055, "step": 4405 }, { "epoch": 0.2, "grad_norm": 0.4257550155514517, "learning_rate": 1.8481742284531053e-05, "loss": 0.348, "step": 4406 }, { "epoch": 0.2, "grad_norm": 0.4107979107327484, "learning_rate": 1.8480954014005553e-05, "loss": 0.2694, "step": 4407 }, { "epoch": 0.2, "grad_norm": 1.3947824071355424, "learning_rate": 1.84801655557199e-05, "loss": 0.7583, "step": 4408 }, { "epoch": 0.2, "grad_norm": 0.5301235504443728, "learning_rate": 1.8479376909691558e-05, "loss": 0.3791, "step": 4409 }, { "epoch": 0.2, "grad_norm": 0.3670339082794531, "learning_rate": 1.847858807593798e-05, "loss": 0.2492, "step": 4410 }, { "epoch": 0.2, "grad_norm": 0.38253953706095434, "learning_rate": 1.8477799054476638e-05, "loss": 0.2187, "step": 4411 }, { "epoch": 0.2, "grad_norm": 0.542900459665312, "learning_rate": 1.8477009845324994e-05, "loss": 0.4408, "step": 4412 }, { "epoch": 0.2, "grad_norm": 0.4476526777100846, "learning_rate": 1.8476220448500523e-05, "loss": 0.2616, "step": 4413 }, { "epoch": 0.2, "grad_norm": 0.4804367324837792, "learning_rate": 1.84754308640207e-05, "loss": 0.3719, "step": 4414 }, { "epoch": 0.2, "grad_norm": 1.1244076640720961, "learning_rate": 1.8474641091903003e-05, "loss": 0.5059, "step": 4415 }, { "epoch": 0.2, "grad_norm": 0.44666664237946974, "learning_rate": 1.8473851132164925e-05, "loss": 0.3148, "step": 4416 }, { "epoch": 0.2, "grad_norm": 0.28420515692853615, "learning_rate": 1.847306098482395e-05, "loss": 0.2019, "step": 4417 }, { "epoch": 0.2, "grad_norm": 0.47189472018014195, "learning_rate": 1.847227064989757e-05, "loss": 0.3727, "step": 4418 }, { "epoch": 0.2, "grad_norm": 0.44374810583881247, "learning_rate": 1.8471480127403282e-05, "loss": 0.2808, "step": 4419 }, { "epoch": 0.2, "grad_norm": 1.027147544502033, "learning_rate": 1.847068941735859e-05, "loss": 0.5333, "step": 4420 }, { "epoch": 0.2, "grad_norm": 0.46103726091028446, "learning_rate": 1.8469898519781e-05, "loss": 0.3689, "step": 4421 }, { "epoch": 0.2, "grad_norm": 0.4169384540882763, "learning_rate": 1.846910743468802e-05, "loss": 0.2671, "step": 4422 }, { "epoch": 0.2, "grad_norm": 0.3082090394264841, "learning_rate": 1.846831616209716e-05, "loss": 0.1542, "step": 4423 }, { "epoch": 0.2, "grad_norm": 0.47559608400458564, "learning_rate": 1.8467524702025946e-05, "loss": 0.3593, "step": 4424 }, { "epoch": 0.2, "grad_norm": 0.4142880098335269, "learning_rate": 1.8466733054491897e-05, "loss": 0.2974, "step": 4425 }, { "epoch": 0.2, "grad_norm": 0.4953227641209142, "learning_rate": 1.8465941219512533e-05, "loss": 0.3025, "step": 4426 }, { "epoch": 0.2, "grad_norm": 1.0096049062964039, "learning_rate": 1.8465149197105395e-05, "loss": 0.6183, "step": 4427 }, { "epoch": 0.2, "grad_norm": 0.42045356735942796, "learning_rate": 1.8464356987288012e-05, "loss": 0.302, "step": 4428 }, { "epoch": 0.2, "grad_norm": 0.8752885739345332, "learning_rate": 1.8463564590077922e-05, "loss": 0.511, "step": 4429 }, { "epoch": 0.2, "grad_norm": 0.31311122487759296, "learning_rate": 1.8462772005492672e-05, "loss": 0.2447, "step": 4430 }, { "epoch": 0.2, "grad_norm": 0.40000271386071967, "learning_rate": 1.8461979233549802e-05, "loss": 0.2985, "step": 4431 }, { "epoch": 0.2, "grad_norm": 0.44039714183767725, "learning_rate": 1.846118627426687e-05, "loss": 0.304, "step": 4432 }, { "epoch": 0.2, "grad_norm": 0.4059346377595148, "learning_rate": 1.846039312766143e-05, "loss": 0.2579, "step": 4433 }, { "epoch": 0.2, "grad_norm": 0.4173722020922135, "learning_rate": 1.845959979375104e-05, "loss": 0.3066, "step": 4434 }, { "epoch": 0.2, "grad_norm": 0.9317788078181761, "learning_rate": 1.845880627255326e-05, "loss": 0.481, "step": 4435 }, { "epoch": 0.2, "grad_norm": 0.34104343885936417, "learning_rate": 1.845801256408567e-05, "loss": 0.2029, "step": 4436 }, { "epoch": 0.2, "grad_norm": 0.38445617022669215, "learning_rate": 1.8457218668365824e-05, "loss": 0.3042, "step": 4437 }, { "epoch": 0.2, "grad_norm": 0.42929741805960203, "learning_rate": 1.845642458541131e-05, "loss": 0.3446, "step": 4438 }, { "epoch": 0.2, "grad_norm": 0.27945648425499603, "learning_rate": 1.8455630315239712e-05, "loss": 0.1119, "step": 4439 }, { "epoch": 0.2, "grad_norm": 0.3915906628524294, "learning_rate": 1.8454835857868603e-05, "loss": 0.3002, "step": 4440 }, { "epoch": 0.2, "grad_norm": 0.6248890037133725, "learning_rate": 1.845404121331558e-05, "loss": 0.4187, "step": 4441 }, { "epoch": 0.2, "grad_norm": 0.5291168908969681, "learning_rate": 1.8453246381598233e-05, "loss": 0.4117, "step": 4442 }, { "epoch": 0.2, "grad_norm": 0.28394591590560064, "learning_rate": 1.8452451362734158e-05, "loss": 0.1887, "step": 4443 }, { "epoch": 0.2, "grad_norm": 1.4212640666332423, "learning_rate": 1.8451656156740954e-05, "loss": 0.8272, "step": 4444 }, { "epoch": 0.2, "grad_norm": 0.3277265910932489, "learning_rate": 1.8450860763636232e-05, "loss": 0.2759, "step": 4445 }, { "epoch": 0.2, "grad_norm": 0.37949144874344004, "learning_rate": 1.8450065183437594e-05, "loss": 0.2399, "step": 4446 }, { "epoch": 0.2, "grad_norm": 0.8306450247301893, "learning_rate": 1.844926941616266e-05, "loss": 0.4675, "step": 4447 }, { "epoch": 0.2, "grad_norm": 1.2258189307449783, "learning_rate": 1.8448473461829045e-05, "loss": 0.5991, "step": 4448 }, { "epoch": 0.2, "grad_norm": 0.34399662138255277, "learning_rate": 1.8447677320454367e-05, "loss": 0.2217, "step": 4449 }, { "epoch": 0.2, "grad_norm": 0.4462577761254286, "learning_rate": 1.8446880992056257e-05, "loss": 0.3484, "step": 4450 }, { "epoch": 0.2, "grad_norm": 0.43238929526001907, "learning_rate": 1.8446084476652344e-05, "loss": 0.2543, "step": 4451 }, { "epoch": 0.2, "grad_norm": 0.38503095284950883, "learning_rate": 1.844528777426026e-05, "loss": 0.2124, "step": 4452 }, { "epoch": 0.2, "grad_norm": 0.5185074696526006, "learning_rate": 1.8444490884897643e-05, "loss": 0.3625, "step": 4453 }, { "epoch": 0.2, "grad_norm": 1.323745708541978, "learning_rate": 1.844369380858214e-05, "loss": 0.6126, "step": 4454 }, { "epoch": 0.2, "grad_norm": 0.4867506163141617, "learning_rate": 1.8442896545331394e-05, "loss": 0.3279, "step": 4455 }, { "epoch": 0.2, "grad_norm": 0.2659034359787943, "learning_rate": 1.8442099095163052e-05, "loss": 0.1676, "step": 4456 }, { "epoch": 0.2, "grad_norm": 0.5443897249545817, "learning_rate": 1.8441301458094773e-05, "loss": 0.349, "step": 4457 }, { "epoch": 0.2, "grad_norm": 0.497123551826214, "learning_rate": 1.844050363414422e-05, "loss": 0.3515, "step": 4458 }, { "epoch": 0.2, "grad_norm": 0.5339807415564104, "learning_rate": 1.8439705623329048e-05, "loss": 0.3164, "step": 4459 }, { "epoch": 0.2, "grad_norm": 1.0593527936879776, "learning_rate": 1.8438907425666927e-05, "loss": 0.623, "step": 4460 }, { "epoch": 0.2, "grad_norm": 0.32841147322899433, "learning_rate": 1.8438109041175532e-05, "loss": 0.2823, "step": 4461 }, { "epoch": 0.2, "grad_norm": 0.525230139524885, "learning_rate": 1.8437310469872535e-05, "loss": 0.3059, "step": 4462 }, { "epoch": 0.21, "grad_norm": 0.4128166956355856, "learning_rate": 1.8436511711775615e-05, "loss": 0.3357, "step": 4463 }, { "epoch": 0.21, "grad_norm": 0.38554193352101607, "learning_rate": 1.8435712766902458e-05, "loss": 0.2902, "step": 4464 }, { "epoch": 0.21, "grad_norm": 0.325444437079442, "learning_rate": 1.843491363527075e-05, "loss": 0.2077, "step": 4465 }, { "epoch": 0.21, "grad_norm": 1.0054154890945404, "learning_rate": 1.8434114316898185e-05, "loss": 0.5957, "step": 4466 }, { "epoch": 0.21, "grad_norm": 0.7534035535583185, "learning_rate": 1.8433314811802455e-05, "loss": 0.3231, "step": 4467 }, { "epoch": 0.21, "grad_norm": 0.4875449636812026, "learning_rate": 1.843251512000127e-05, "loss": 0.3651, "step": 4468 }, { "epoch": 0.21, "grad_norm": 0.4436255519343426, "learning_rate": 1.8431715241512322e-05, "loss": 0.3275, "step": 4469 }, { "epoch": 0.21, "grad_norm": 0.7626308540041877, "learning_rate": 1.8430915176353325e-05, "loss": 0.3959, "step": 4470 }, { "epoch": 0.21, "grad_norm": 0.3393845225210966, "learning_rate": 1.8430114924541995e-05, "loss": 0.2226, "step": 4471 }, { "epoch": 0.21, "grad_norm": 0.9710703013390126, "learning_rate": 1.8429314486096042e-05, "loss": 0.3674, "step": 4472 }, { "epoch": 0.21, "grad_norm": 0.3800482477474855, "learning_rate": 1.8428513861033193e-05, "loss": 0.2929, "step": 4473 }, { "epoch": 0.21, "grad_norm": 0.4734865335193066, "learning_rate": 1.842771304937117e-05, "loss": 0.3561, "step": 4474 }, { "epoch": 0.21, "grad_norm": 0.7634465212710199, "learning_rate": 1.8426912051127702e-05, "loss": 0.3808, "step": 4475 }, { "epoch": 0.21, "grad_norm": 0.3738599373182764, "learning_rate": 1.842611086632052e-05, "loss": 0.2332, "step": 4476 }, { "epoch": 0.21, "grad_norm": 0.3491678112595703, "learning_rate": 1.8425309494967368e-05, "loss": 0.254, "step": 4477 }, { "epoch": 0.21, "grad_norm": 2.244963672732106, "learning_rate": 1.842450793708599e-05, "loss": 0.7392, "step": 4478 }, { "epoch": 0.21, "grad_norm": 0.4096372250922761, "learning_rate": 1.8423706192694118e-05, "loss": 0.2439, "step": 4479 }, { "epoch": 0.21, "grad_norm": 0.709116923915328, "learning_rate": 1.8422904261809512e-05, "loss": 0.4124, "step": 4480 }, { "epoch": 0.21, "grad_norm": 0.5426902792370397, "learning_rate": 1.8422102144449922e-05, "loss": 0.3511, "step": 4481 }, { "epoch": 0.21, "grad_norm": 0.39892283638639275, "learning_rate": 1.8421299840633112e-05, "loss": 0.2441, "step": 4482 }, { "epoch": 0.21, "grad_norm": 0.3177808559968744, "learning_rate": 1.8420497350376838e-05, "loss": 0.2023, "step": 4483 }, { "epoch": 0.21, "grad_norm": 1.2176875641679026, "learning_rate": 1.8419694673698865e-05, "loss": 0.6791, "step": 4484 }, { "epoch": 0.21, "grad_norm": 0.6365815409978489, "learning_rate": 1.8418891810616974e-05, "loss": 0.2254, "step": 4485 }, { "epoch": 0.21, "grad_norm": 0.4559997018330957, "learning_rate": 1.8418088761148925e-05, "loss": 0.3574, "step": 4486 }, { "epoch": 0.21, "grad_norm": 0.7655479580035389, "learning_rate": 1.841728552531251e-05, "loss": 0.5315, "step": 4487 }, { "epoch": 0.21, "grad_norm": 0.30729938436871423, "learning_rate": 1.8416482103125505e-05, "loss": 0.0753, "step": 4488 }, { "epoch": 0.21, "grad_norm": 0.33186913970259163, "learning_rate": 1.84156784946057e-05, "loss": 0.2753, "step": 4489 }, { "epoch": 0.21, "grad_norm": 1.3241233056579782, "learning_rate": 1.841487469977088e-05, "loss": 0.7877, "step": 4490 }, { "epoch": 0.21, "grad_norm": 0.7339615387859504, "learning_rate": 1.8414070718638844e-05, "loss": 0.4633, "step": 4491 }, { "epoch": 0.21, "grad_norm": 0.3915467303863332, "learning_rate": 1.8413266551227394e-05, "loss": 0.2832, "step": 4492 }, { "epoch": 0.21, "grad_norm": 0.5696379466662088, "learning_rate": 1.8412462197554334e-05, "loss": 0.3713, "step": 4493 }, { "epoch": 0.21, "grad_norm": 0.6588948488218529, "learning_rate": 1.8411657657637465e-05, "loss": 0.3514, "step": 4494 }, { "epoch": 0.21, "grad_norm": 0.29986766014166094, "learning_rate": 1.8410852931494606e-05, "loss": 0.1758, "step": 4495 }, { "epoch": 0.21, "grad_norm": 1.2105300696201593, "learning_rate": 1.8410048019143568e-05, "loss": 0.7088, "step": 4496 }, { "epoch": 0.21, "grad_norm": 0.40957315010250506, "learning_rate": 1.840924292060217e-05, "loss": 0.3146, "step": 4497 }, { "epoch": 0.21, "grad_norm": 0.5616058975749821, "learning_rate": 1.8408437635888243e-05, "loss": 0.2835, "step": 4498 }, { "epoch": 0.21, "grad_norm": 0.9304271309650876, "learning_rate": 1.840763216501961e-05, "loss": 0.582, "step": 4499 }, { "epoch": 0.21, "grad_norm": 0.78292687362689, "learning_rate": 1.84068265080141e-05, "loss": 0.364, "step": 4500 }, { "epoch": 0.21, "grad_norm": 0.4235250419818637, "learning_rate": 1.8406020664889558e-05, "loss": 0.263, "step": 4501 }, { "epoch": 0.21, "grad_norm": 0.3919014340959677, "learning_rate": 1.8405214635663817e-05, "loss": 0.2591, "step": 4502 }, { "epoch": 0.21, "grad_norm": 0.8772265349328561, "learning_rate": 1.8404408420354728e-05, "loss": 0.4169, "step": 4503 }, { "epoch": 0.21, "grad_norm": 0.4960614130586802, "learning_rate": 1.8403602018980135e-05, "loss": 0.3095, "step": 4504 }, { "epoch": 0.21, "grad_norm": 0.432634523712912, "learning_rate": 1.8402795431557895e-05, "loss": 0.3026, "step": 4505 }, { "epoch": 0.21, "grad_norm": 0.8445320864471082, "learning_rate": 1.840198865810586e-05, "loss": 0.4435, "step": 4506 }, { "epoch": 0.21, "grad_norm": 0.3424407943806172, "learning_rate": 1.84011816986419e-05, "loss": 0.2474, "step": 4507 }, { "epoch": 0.21, "grad_norm": 0.4080477734016136, "learning_rate": 1.840037455318387e-05, "loss": 0.2543, "step": 4508 }, { "epoch": 0.21, "grad_norm": 0.6336525898054867, "learning_rate": 1.839956722174964e-05, "loss": 0.37, "step": 4509 }, { "epoch": 0.21, "grad_norm": 0.4321516926251434, "learning_rate": 1.8398759704357093e-05, "loss": 0.3307, "step": 4510 }, { "epoch": 0.21, "grad_norm": 0.8560378333076766, "learning_rate": 1.83979520010241e-05, "loss": 0.4089, "step": 4511 }, { "epoch": 0.21, "grad_norm": 0.46544700233128194, "learning_rate": 1.8397144111768543e-05, "loss": 0.3577, "step": 4512 }, { "epoch": 0.21, "grad_norm": 0.3635076984243348, "learning_rate": 1.8396336036608307e-05, "loss": 0.3023, "step": 4513 }, { "epoch": 0.21, "grad_norm": 0.33173049904380497, "learning_rate": 1.8395527775561284e-05, "loss": 0.1882, "step": 4514 }, { "epoch": 0.21, "grad_norm": 0.6000557727610799, "learning_rate": 1.839471932864537e-05, "loss": 0.4117, "step": 4515 }, { "epoch": 0.21, "grad_norm": 0.47762628497453224, "learning_rate": 1.839391069587846e-05, "loss": 0.3568, "step": 4516 }, { "epoch": 0.21, "grad_norm": 0.40974906813118483, "learning_rate": 1.8393101877278455e-05, "loss": 0.3395, "step": 4517 }, { "epoch": 0.21, "grad_norm": 0.38769849321606564, "learning_rate": 1.839229287286327e-05, "loss": 0.0748, "step": 4518 }, { "epoch": 0.21, "grad_norm": 0.4406936750909376, "learning_rate": 1.8391483682650803e-05, "loss": 0.3583, "step": 4519 }, { "epoch": 0.21, "grad_norm": 0.35148677905224907, "learning_rate": 1.8390674306658977e-05, "loss": 0.3382, "step": 4520 }, { "epoch": 0.21, "grad_norm": 0.31228300037368295, "learning_rate": 1.838986474490571e-05, "loss": 0.2213, "step": 4521 }, { "epoch": 0.21, "grad_norm": 0.47298557644576394, "learning_rate": 1.8389054997408923e-05, "loss": 0.3081, "step": 4522 }, { "epoch": 0.21, "grad_norm": 1.1122345600832777, "learning_rate": 1.8388245064186545e-05, "loss": 0.725, "step": 4523 }, { "epoch": 0.21, "grad_norm": 0.3933633071087256, "learning_rate": 1.8387434945256503e-05, "loss": 0.2766, "step": 4524 }, { "epoch": 0.21, "grad_norm": 0.461293271743621, "learning_rate": 1.8386624640636737e-05, "loss": 0.2911, "step": 4525 }, { "epoch": 0.21, "grad_norm": 0.6102961146517013, "learning_rate": 1.8385814150345188e-05, "loss": 0.4618, "step": 4526 }, { "epoch": 0.21, "grad_norm": 0.29311780730516, "learning_rate": 1.8385003474399792e-05, "loss": 0.1266, "step": 4527 }, { "epoch": 0.21, "grad_norm": 0.4540746138464081, "learning_rate": 1.8384192612818502e-05, "loss": 0.3421, "step": 4528 }, { "epoch": 0.21, "grad_norm": 0.542262524632551, "learning_rate": 1.8383381565619267e-05, "loss": 0.2711, "step": 4529 }, { "epoch": 0.21, "grad_norm": 0.8693316708371276, "learning_rate": 1.8382570332820045e-05, "loss": 0.5351, "step": 4530 }, { "epoch": 0.21, "grad_norm": 0.35870645133630963, "learning_rate": 1.8381758914438795e-05, "loss": 0.252, "step": 4531 }, { "epoch": 0.21, "grad_norm": 0.47024022988957936, "learning_rate": 1.8380947310493483e-05, "loss": 0.3976, "step": 4532 }, { "epoch": 0.21, "grad_norm": 0.5611438712399898, "learning_rate": 1.838013552100207e-05, "loss": 0.4061, "step": 4533 }, { "epoch": 0.21, "grad_norm": 0.3873244248317515, "learning_rate": 1.837932354598254e-05, "loss": 0.2137, "step": 4534 }, { "epoch": 0.21, "grad_norm": 0.3297114334101669, "learning_rate": 1.8378511385452856e-05, "loss": 0.2091, "step": 4535 }, { "epoch": 0.21, "grad_norm": 0.41556957290650115, "learning_rate": 1.8377699039431013e-05, "loss": 0.3359, "step": 4536 }, { "epoch": 0.21, "grad_norm": 0.39090113564999057, "learning_rate": 1.8376886507934984e-05, "loss": 0.254, "step": 4537 }, { "epoch": 0.21, "grad_norm": 0.6445740374361782, "learning_rate": 1.837607379098276e-05, "loss": 0.4622, "step": 4538 }, { "epoch": 0.21, "grad_norm": 0.7844720760239691, "learning_rate": 1.8375260888592335e-05, "loss": 0.4886, "step": 4539 }, { "epoch": 0.21, "grad_norm": 0.3841299386143589, "learning_rate": 1.8374447800781706e-05, "loss": 0.2349, "step": 4540 }, { "epoch": 0.21, "grad_norm": 0.2746444221852081, "learning_rate": 1.8373634527568877e-05, "loss": 0.2328, "step": 4541 }, { "epoch": 0.21, "grad_norm": 0.859137480089945, "learning_rate": 1.837282106897185e-05, "loss": 0.5171, "step": 4542 }, { "epoch": 0.21, "grad_norm": 0.5726114146139141, "learning_rate": 1.8372007425008633e-05, "loss": 0.299, "step": 4543 }, { "epoch": 0.21, "grad_norm": 0.3699217434096532, "learning_rate": 1.837119359569724e-05, "loss": 0.2869, "step": 4544 }, { "epoch": 0.21, "grad_norm": 1.2411413064091992, "learning_rate": 1.837037958105569e-05, "loss": 0.7696, "step": 4545 }, { "epoch": 0.21, "grad_norm": 0.40326436494667944, "learning_rate": 1.8369565381102002e-05, "loss": 0.3015, "step": 4546 }, { "epoch": 0.21, "grad_norm": 0.23076170615833244, "learning_rate": 1.8368750995854206e-05, "loss": 0.1179, "step": 4547 }, { "epoch": 0.21, "grad_norm": 0.4945097161309367, "learning_rate": 1.836793642533033e-05, "loss": 0.3707, "step": 4548 }, { "epoch": 0.21, "grad_norm": 0.38761428972781975, "learning_rate": 1.83671216695484e-05, "loss": 0.3019, "step": 4549 }, { "epoch": 0.21, "grad_norm": 0.8911628369805921, "learning_rate": 1.8366306728526465e-05, "loss": 0.3494, "step": 4550 }, { "epoch": 0.21, "grad_norm": 1.2939556702780841, "learning_rate": 1.8365491602282565e-05, "loss": 0.805, "step": 4551 }, { "epoch": 0.21, "grad_norm": 0.3996867720441695, "learning_rate": 1.8364676290834737e-05, "loss": 0.291, "step": 4552 }, { "epoch": 0.21, "grad_norm": 0.2836975215738741, "learning_rate": 1.8363860794201042e-05, "loss": 0.189, "step": 4553 }, { "epoch": 0.21, "grad_norm": 0.9526515936212498, "learning_rate": 1.836304511239953e-05, "loss": 0.4688, "step": 4554 }, { "epoch": 0.21, "grad_norm": 0.45601109398345857, "learning_rate": 1.836222924544826e-05, "loss": 0.2732, "step": 4555 }, { "epoch": 0.21, "grad_norm": 0.4014681064055307, "learning_rate": 1.8361413193365295e-05, "loss": 0.3391, "step": 4556 }, { "epoch": 0.21, "grad_norm": 0.4852827425818757, "learning_rate": 1.8360596956168698e-05, "loss": 0.3172, "step": 4557 }, { "epoch": 0.21, "grad_norm": 0.4154254437291109, "learning_rate": 1.8359780533876544e-05, "loss": 0.289, "step": 4558 }, { "epoch": 0.21, "grad_norm": 0.8806941914172582, "learning_rate": 1.8358963926506908e-05, "loss": 0.4861, "step": 4559 }, { "epoch": 0.21, "grad_norm": 0.4416382168301104, "learning_rate": 1.8358147134077863e-05, "loss": 0.3132, "step": 4560 }, { "epoch": 0.21, "grad_norm": 0.3215025712249785, "learning_rate": 1.83573301566075e-05, "loss": 0.2374, "step": 4561 }, { "epoch": 0.21, "grad_norm": 0.49120500927244126, "learning_rate": 1.83565129941139e-05, "loss": 0.32, "step": 4562 }, { "epoch": 0.21, "grad_norm": 1.389224195201891, "learning_rate": 1.8355695646615158e-05, "loss": 0.6004, "step": 4563 }, { "epoch": 0.21, "grad_norm": 0.332578909554029, "learning_rate": 1.8354878114129368e-05, "loss": 0.2911, "step": 4564 }, { "epoch": 0.21, "grad_norm": 0.5927002202672179, "learning_rate": 1.8354060396674628e-05, "loss": 0.3902, "step": 4565 }, { "epoch": 0.21, "grad_norm": 0.5609548862190832, "learning_rate": 1.835324249426904e-05, "loss": 0.2991, "step": 4566 }, { "epoch": 0.21, "grad_norm": 0.31926419091079944, "learning_rate": 1.8352424406930714e-05, "loss": 0.2216, "step": 4567 }, { "epoch": 0.21, "grad_norm": 0.6562529874200298, "learning_rate": 1.8351606134677766e-05, "loss": 0.3897, "step": 4568 }, { "epoch": 0.21, "grad_norm": 0.7577010851148317, "learning_rate": 1.8350787677528307e-05, "loss": 0.3925, "step": 4569 }, { "epoch": 0.21, "grad_norm": 0.3604380829380593, "learning_rate": 1.8349969035500456e-05, "loss": 0.2121, "step": 4570 }, { "epoch": 0.21, "grad_norm": 0.6901919160591082, "learning_rate": 1.834915020861234e-05, "loss": 0.4971, "step": 4571 }, { "epoch": 0.21, "grad_norm": 0.40402774672212377, "learning_rate": 1.8348331196882082e-05, "loss": 0.3648, "step": 4572 }, { "epoch": 0.21, "grad_norm": 0.26025303015026535, "learning_rate": 1.834751200032782e-05, "loss": 0.1365, "step": 4573 }, { "epoch": 0.21, "grad_norm": 0.4150870088650874, "learning_rate": 1.8346692618967686e-05, "loss": 0.2842, "step": 4574 }, { "epoch": 0.21, "grad_norm": 0.5295548604752122, "learning_rate": 1.8345873052819825e-05, "loss": 0.415, "step": 4575 }, { "epoch": 0.21, "grad_norm": 0.38462285384017086, "learning_rate": 1.8345053301902376e-05, "loss": 0.2054, "step": 4576 }, { "epoch": 0.21, "grad_norm": 0.9032391231196824, "learning_rate": 1.8344233366233492e-05, "loss": 0.4334, "step": 4577 }, { "epoch": 0.21, "grad_norm": 1.3487902906527032, "learning_rate": 1.8343413245831323e-05, "loss": 0.7028, "step": 4578 }, { "epoch": 0.21, "grad_norm": 0.2973113728314521, "learning_rate": 1.8342592940714026e-05, "loss": 0.1867, "step": 4579 }, { "epoch": 0.21, "grad_norm": 0.32170342019545883, "learning_rate": 1.834177245089976e-05, "loss": 0.2687, "step": 4580 }, { "epoch": 0.21, "grad_norm": 1.583807786972439, "learning_rate": 1.8340951776406695e-05, "loss": 0.9249, "step": 4581 }, { "epoch": 0.21, "grad_norm": 0.44375169137578174, "learning_rate": 1.8340130917252995e-05, "loss": 0.3125, "step": 4582 }, { "epoch": 0.21, "grad_norm": 0.5468710800300232, "learning_rate": 1.8339309873456835e-05, "loss": 0.3434, "step": 4583 }, { "epoch": 0.21, "grad_norm": 0.47646794831376693, "learning_rate": 1.833848864503639e-05, "loss": 0.3599, "step": 4584 }, { "epoch": 0.21, "grad_norm": 0.3080238853413536, "learning_rate": 1.8337667232009845e-05, "loss": 0.2248, "step": 4585 }, { "epoch": 0.21, "grad_norm": 0.31937636056518637, "learning_rate": 1.8336845634395385e-05, "loss": 0.1697, "step": 4586 }, { "epoch": 0.21, "grad_norm": 0.5312230431412615, "learning_rate": 1.8336023852211197e-05, "loss": 0.3911, "step": 4587 }, { "epoch": 0.21, "grad_norm": 0.3702593500991801, "learning_rate": 1.8335201885475476e-05, "loss": 0.2979, "step": 4588 }, { "epoch": 0.21, "grad_norm": 0.5446635664813944, "learning_rate": 1.8334379734206415e-05, "loss": 0.3337, "step": 4589 }, { "epoch": 0.21, "grad_norm": 1.1155471524612366, "learning_rate": 1.8333557398422224e-05, "loss": 0.5501, "step": 4590 }, { "epoch": 0.21, "grad_norm": 0.30708505224170524, "learning_rate": 1.8332734878141097e-05, "loss": 0.1568, "step": 4591 }, { "epoch": 0.21, "grad_norm": 0.3658113913111306, "learning_rate": 1.833191217338126e-05, "loss": 0.2356, "step": 4592 }, { "epoch": 0.21, "grad_norm": 0.7467558800542814, "learning_rate": 1.8331089284160914e-05, "loss": 0.4761, "step": 4593 }, { "epoch": 0.21, "grad_norm": 0.6299505730719956, "learning_rate": 1.833026621049828e-05, "loss": 0.3302, "step": 4594 }, { "epoch": 0.21, "grad_norm": 0.5167602375645106, "learning_rate": 1.8329442952411584e-05, "loss": 0.3782, "step": 4595 }, { "epoch": 0.21, "grad_norm": 0.4363828916744101, "learning_rate": 1.8328619509919047e-05, "loss": 0.2718, "step": 4596 }, { "epoch": 0.21, "grad_norm": 0.7518047794759544, "learning_rate": 1.83277958830389e-05, "loss": 0.3452, "step": 4597 }, { "epoch": 0.21, "grad_norm": 0.2926009312177105, "learning_rate": 1.832697207178938e-05, "loss": 0.2331, "step": 4598 }, { "epoch": 0.21, "grad_norm": 0.8532877939908471, "learning_rate": 1.8326148076188725e-05, "loss": 0.3479, "step": 4599 }, { "epoch": 0.21, "grad_norm": 0.3897996687113407, "learning_rate": 1.8325323896255175e-05, "loss": 0.3025, "step": 4600 }, { "epoch": 0.21, "grad_norm": 0.6013179483021874, "learning_rate": 1.8324499532006975e-05, "loss": 0.4292, "step": 4601 }, { "epoch": 0.21, "grad_norm": 0.8540723739208614, "learning_rate": 1.8323674983462383e-05, "loss": 0.371, "step": 4602 }, { "epoch": 0.21, "grad_norm": 0.3353940101067354, "learning_rate": 1.8322850250639645e-05, "loss": 0.2183, "step": 4603 }, { "epoch": 0.21, "grad_norm": 0.367349511253539, "learning_rate": 1.832202533355703e-05, "loss": 0.3092, "step": 4604 }, { "epoch": 0.21, "grad_norm": 0.576936953989063, "learning_rate": 1.8321200232232792e-05, "loss": 0.3234, "step": 4605 }, { "epoch": 0.21, "grad_norm": 0.40632011584896016, "learning_rate": 1.83203749466852e-05, "loss": 0.257, "step": 4606 }, { "epoch": 0.21, "grad_norm": 0.6323191584401766, "learning_rate": 1.8319549476932527e-05, "loss": 0.3819, "step": 4607 }, { "epoch": 0.21, "grad_norm": 0.40187080521452123, "learning_rate": 1.831872382299305e-05, "loss": 0.3494, "step": 4608 }, { "epoch": 0.21, "grad_norm": 0.6619876159493546, "learning_rate": 1.831789798488504e-05, "loss": 0.1928, "step": 4609 }, { "epoch": 0.21, "grad_norm": 0.31471640807963536, "learning_rate": 1.831707196262679e-05, "loss": 0.248, "step": 4610 }, { "epoch": 0.21, "grad_norm": 0.5353264629725613, "learning_rate": 1.8316245756236578e-05, "loss": 0.3937, "step": 4611 }, { "epoch": 0.21, "grad_norm": 0.40248006020890803, "learning_rate": 1.8315419365732702e-05, "loss": 0.2275, "step": 4612 }, { "epoch": 0.21, "grad_norm": 0.37234828261484243, "learning_rate": 1.831459279113346e-05, "loss": 0.2592, "step": 4613 }, { "epoch": 0.21, "grad_norm": 1.071447345174701, "learning_rate": 1.831376603245714e-05, "loss": 0.5666, "step": 4614 }, { "epoch": 0.21, "grad_norm": 0.3625191470197243, "learning_rate": 1.831293908972206e-05, "loss": 0.2405, "step": 4615 }, { "epoch": 0.21, "grad_norm": 0.3329554715877784, "learning_rate": 1.8312111962946517e-05, "loss": 0.2622, "step": 4616 }, { "epoch": 0.21, "grad_norm": 0.9591723487439654, "learning_rate": 1.8311284652148825e-05, "loss": 0.4893, "step": 4617 }, { "epoch": 0.21, "grad_norm": 0.8992108885779463, "learning_rate": 1.8310457157347306e-05, "loss": 0.5673, "step": 4618 }, { "epoch": 0.21, "grad_norm": 0.3321857530404299, "learning_rate": 1.830962947856027e-05, "loss": 0.2007, "step": 4619 }, { "epoch": 0.21, "grad_norm": 0.464540730712128, "learning_rate": 1.830880161580605e-05, "loss": 0.327, "step": 4620 }, { "epoch": 0.21, "grad_norm": 0.6866733230850293, "learning_rate": 1.830797356910297e-05, "loss": 0.3231, "step": 4621 }, { "epoch": 0.21, "grad_norm": 0.5194091625345517, "learning_rate": 1.830714533846936e-05, "loss": 0.3011, "step": 4622 }, { "epoch": 0.21, "grad_norm": 0.6108230984146801, "learning_rate": 1.8306316923923564e-05, "loss": 0.3743, "step": 4623 }, { "epoch": 0.21, "grad_norm": 0.4234023757694488, "learning_rate": 1.8305488325483913e-05, "loss": 0.3121, "step": 4624 }, { "epoch": 0.21, "grad_norm": 0.3022048740973783, "learning_rate": 1.8304659543168757e-05, "loss": 0.2031, "step": 4625 }, { "epoch": 0.21, "grad_norm": 0.5021725160542018, "learning_rate": 1.830383057699644e-05, "loss": 0.256, "step": 4626 }, { "epoch": 0.21, "grad_norm": 0.40584754966464476, "learning_rate": 1.830300142698532e-05, "loss": 0.3241, "step": 4627 }, { "epoch": 0.21, "grad_norm": 0.38035674784310214, "learning_rate": 1.830217209315375e-05, "loss": 0.2649, "step": 4628 }, { "epoch": 0.21, "grad_norm": 0.9887265610109615, "learning_rate": 1.830134257552009e-05, "loss": 0.6486, "step": 4629 }, { "epoch": 0.21, "grad_norm": 1.123316084594646, "learning_rate": 1.830051287410271e-05, "loss": 0.6933, "step": 4630 }, { "epoch": 0.21, "grad_norm": 0.30579650522816454, "learning_rate": 1.8299682988919973e-05, "loss": 0.2157, "step": 4631 }, { "epoch": 0.21, "grad_norm": 0.34962032014734423, "learning_rate": 1.8298852919990254e-05, "loss": 0.2088, "step": 4632 }, { "epoch": 0.21, "grad_norm": 0.6195422831240769, "learning_rate": 1.829802266733193e-05, "loss": 0.4219, "step": 4633 }, { "epoch": 0.21, "grad_norm": 0.4584659278579204, "learning_rate": 1.829719223096338e-05, "loss": 0.34, "step": 4634 }, { "epoch": 0.21, "grad_norm": 0.42992961965023846, "learning_rate": 1.8296361610902994e-05, "loss": 0.3106, "step": 4635 }, { "epoch": 0.21, "grad_norm": 0.589559115352862, "learning_rate": 1.8295530807169158e-05, "loss": 0.381, "step": 4636 }, { "epoch": 0.21, "grad_norm": 0.3343543749720442, "learning_rate": 1.8294699819780262e-05, "loss": 0.2649, "step": 4637 }, { "epoch": 0.21, "grad_norm": 0.40103138013636025, "learning_rate": 1.8293868648754708e-05, "loss": 0.1918, "step": 4638 }, { "epoch": 0.21, "grad_norm": 0.3976638493283039, "learning_rate": 1.8293037294110897e-05, "loss": 0.3264, "step": 4639 }, { "epoch": 0.21, "grad_norm": 0.4033869931495121, "learning_rate": 1.8292205755867233e-05, "loss": 0.2883, "step": 4640 }, { "epoch": 0.21, "grad_norm": 0.7098122268082854, "learning_rate": 1.8291374034042127e-05, "loss": 0.4081, "step": 4641 }, { "epoch": 0.21, "grad_norm": 0.7289665638466958, "learning_rate": 1.829054212865399e-05, "loss": 0.469, "step": 4642 }, { "epoch": 0.21, "grad_norm": 0.5182878505099521, "learning_rate": 1.8289710039721237e-05, "loss": 0.3409, "step": 4643 }, { "epoch": 0.21, "grad_norm": 0.3805657789707598, "learning_rate": 1.8288877767262302e-05, "loss": 0.2706, "step": 4644 }, { "epoch": 0.21, "grad_norm": 0.8086804148945254, "learning_rate": 1.8288045311295594e-05, "loss": 0.4002, "step": 4645 }, { "epoch": 0.21, "grad_norm": 0.43134834777388437, "learning_rate": 1.8287212671839554e-05, "loss": 0.3052, "step": 4646 }, { "epoch": 0.21, "grad_norm": 0.4312725745261092, "learning_rate": 1.8286379848912612e-05, "loss": 0.3506, "step": 4647 }, { "epoch": 0.21, "grad_norm": 0.8856913487808675, "learning_rate": 1.828554684253321e-05, "loss": 0.5767, "step": 4648 }, { "epoch": 0.21, "grad_norm": 0.4124641606171705, "learning_rate": 1.828471365271978e-05, "loss": 0.2705, "step": 4649 }, { "epoch": 0.21, "grad_norm": 0.35856325788690446, "learning_rate": 1.828388027949078e-05, "loss": 0.2308, "step": 4650 }, { "epoch": 0.21, "grad_norm": 0.409784233241191, "learning_rate": 1.8283046722864656e-05, "loss": 0.3141, "step": 4651 }, { "epoch": 0.21, "grad_norm": 0.3855713725879107, "learning_rate": 1.8282212982859856e-05, "loss": 0.2819, "step": 4652 }, { "epoch": 0.21, "grad_norm": 1.6425095129561391, "learning_rate": 1.8281379059494845e-05, "loss": 0.8959, "step": 4653 }, { "epoch": 0.21, "grad_norm": 0.7935460672691838, "learning_rate": 1.8280544952788086e-05, "loss": 0.3372, "step": 4654 }, { "epoch": 0.21, "grad_norm": 0.4054106948421424, "learning_rate": 1.8279710662758037e-05, "loss": 0.2795, "step": 4655 }, { "epoch": 0.21, "grad_norm": 0.6681846480968677, "learning_rate": 1.827887618942318e-05, "loss": 0.5152, "step": 4656 }, { "epoch": 0.21, "grad_norm": 0.3552188032688544, "learning_rate": 1.827804153280198e-05, "loss": 0.2441, "step": 4657 }, { "epoch": 0.21, "grad_norm": 0.3983191076778536, "learning_rate": 1.8277206692912922e-05, "loss": 0.2203, "step": 4658 }, { "epoch": 0.21, "grad_norm": 0.4557519331789354, "learning_rate": 1.8276371669774482e-05, "loss": 0.3305, "step": 4659 }, { "epoch": 0.21, "grad_norm": 1.5406306540507528, "learning_rate": 1.827553646340515e-05, "loss": 0.8721, "step": 4660 }, { "epoch": 0.21, "grad_norm": 0.45662552571470577, "learning_rate": 1.8274701073823417e-05, "loss": 0.2102, "step": 4661 }, { "epoch": 0.21, "grad_norm": 0.69059765418248, "learning_rate": 1.827386550104778e-05, "loss": 0.4733, "step": 4662 }, { "epoch": 0.21, "grad_norm": 0.4109244715818268, "learning_rate": 1.8273029745096735e-05, "loss": 0.281, "step": 4663 }, { "epoch": 0.21, "grad_norm": 0.37609499392813966, "learning_rate": 1.8272193805988782e-05, "loss": 0.1875, "step": 4664 }, { "epoch": 0.21, "grad_norm": 1.256452389267183, "learning_rate": 1.8271357683742435e-05, "loss": 0.7646, "step": 4665 }, { "epoch": 0.21, "grad_norm": 1.1758866979955218, "learning_rate": 1.82705213783762e-05, "loss": 0.7102, "step": 4666 }, { "epoch": 0.21, "grad_norm": 0.32678136383137224, "learning_rate": 1.826968488990859e-05, "loss": 0.2502, "step": 4667 }, { "epoch": 0.21, "grad_norm": 0.5348602875936085, "learning_rate": 1.826884821835813e-05, "loss": 0.3979, "step": 4668 }, { "epoch": 0.21, "grad_norm": 0.36751328397413585, "learning_rate": 1.826801136374334e-05, "loss": 0.1871, "step": 4669 }, { "epoch": 0.21, "grad_norm": 0.5728108240020834, "learning_rate": 1.826717432608274e-05, "loss": 0.3179, "step": 4670 }, { "epoch": 0.21, "grad_norm": 0.440010150035341, "learning_rate": 1.8266337105394876e-05, "loss": 0.264, "step": 4671 }, { "epoch": 0.21, "grad_norm": 0.8911246903793755, "learning_rate": 1.8265499701698276e-05, "loss": 0.6378, "step": 4672 }, { "epoch": 0.21, "grad_norm": 0.4343994218741047, "learning_rate": 1.8264662115011476e-05, "loss": 0.2722, "step": 4673 }, { "epoch": 0.21, "grad_norm": 0.7786516368156159, "learning_rate": 1.8263824345353024e-05, "loss": 0.3971, "step": 4674 }, { "epoch": 0.21, "grad_norm": 0.40711131198405126, "learning_rate": 1.8262986392741466e-05, "loss": 0.264, "step": 4675 }, { "epoch": 0.21, "grad_norm": 0.5808226132267941, "learning_rate": 1.826214825719535e-05, "loss": 0.3029, "step": 4676 }, { "epoch": 0.21, "grad_norm": 0.3383055486995955, "learning_rate": 1.8261309938733238e-05, "loss": 0.1881, "step": 4677 }, { "epoch": 0.21, "grad_norm": 0.6151882465383387, "learning_rate": 1.8260471437373685e-05, "loss": 0.4098, "step": 4678 }, { "epoch": 0.21, "grad_norm": 0.4628567342825869, "learning_rate": 1.8259632753135257e-05, "loss": 0.2942, "step": 4679 }, { "epoch": 0.21, "grad_norm": 0.4345743459060977, "learning_rate": 1.825879388603652e-05, "loss": 0.2923, "step": 4680 }, { "epoch": 0.22, "grad_norm": 0.3332112895744663, "learning_rate": 1.825795483609605e-05, "loss": 0.1965, "step": 4681 }, { "epoch": 0.22, "grad_norm": 0.4558054923541028, "learning_rate": 1.8257115603332413e-05, "loss": 0.3045, "step": 4682 }, { "epoch": 0.22, "grad_norm": 0.4229085197928831, "learning_rate": 1.8256276187764197e-05, "loss": 0.3212, "step": 4683 }, { "epoch": 0.22, "grad_norm": 0.9069230591281934, "learning_rate": 1.8255436589409984e-05, "loss": 0.4163, "step": 4684 }, { "epoch": 0.22, "grad_norm": 0.3595899320033163, "learning_rate": 1.825459680828836e-05, "loss": 0.3008, "step": 4685 }, { "epoch": 0.22, "grad_norm": 0.768017022563293, "learning_rate": 1.825375684441792e-05, "loss": 0.5708, "step": 4686 }, { "epoch": 0.22, "grad_norm": 0.40642464390280436, "learning_rate": 1.8252916697817258e-05, "loss": 0.2664, "step": 4687 }, { "epoch": 0.22, "grad_norm": 0.3855457516666452, "learning_rate": 1.8252076368504976e-05, "loss": 0.2298, "step": 4688 }, { "epoch": 0.22, "grad_norm": 0.49992006374429154, "learning_rate": 1.8251235856499677e-05, "loss": 0.3272, "step": 4689 }, { "epoch": 0.22, "grad_norm": 0.7237023060255751, "learning_rate": 1.8250395161819966e-05, "loss": 0.363, "step": 4690 }, { "epoch": 0.22, "grad_norm": 0.322398875831228, "learning_rate": 1.8249554284484458e-05, "loss": 0.2728, "step": 4691 }, { "epoch": 0.22, "grad_norm": 1.4807351590955335, "learning_rate": 1.8248713224511774e-05, "loss": 0.7584, "step": 4692 }, { "epoch": 0.22, "grad_norm": 0.38304739488723544, "learning_rate": 1.8247871981920524e-05, "loss": 0.1534, "step": 4693 }, { "epoch": 0.22, "grad_norm": 0.3773397731873623, "learning_rate": 1.824703055672934e-05, "loss": 0.1984, "step": 4694 }, { "epoch": 0.22, "grad_norm": 0.4164363671846114, "learning_rate": 1.8246188948956847e-05, "loss": 0.3022, "step": 4695 }, { "epoch": 0.22, "grad_norm": 0.7268954039486533, "learning_rate": 1.8245347158621683e-05, "loss": 0.4606, "step": 4696 }, { "epoch": 0.22, "grad_norm": 0.3798317209879306, "learning_rate": 1.8244505185742475e-05, "loss": 0.1913, "step": 4697 }, { "epoch": 0.22, "grad_norm": 0.619270687036849, "learning_rate": 1.824366303033787e-05, "loss": 0.4137, "step": 4698 }, { "epoch": 0.22, "grad_norm": 0.4168621970399676, "learning_rate": 1.824282069242651e-05, "loss": 0.3082, "step": 4699 }, { "epoch": 0.22, "grad_norm": 0.27801573609894153, "learning_rate": 1.8241978172027044e-05, "loss": 0.1453, "step": 4700 }, { "epoch": 0.22, "grad_norm": 0.48327397421284685, "learning_rate": 1.8241135469158125e-05, "loss": 0.3151, "step": 4701 }, { "epoch": 0.22, "grad_norm": 0.8042465079406699, "learning_rate": 1.824029258383841e-05, "loss": 0.4928, "step": 4702 }, { "epoch": 0.22, "grad_norm": 0.33807776918129595, "learning_rate": 1.823944951608656e-05, "loss": 0.2433, "step": 4703 }, { "epoch": 0.22, "grad_norm": 0.5621665632148922, "learning_rate": 1.823860626592124e-05, "loss": 0.4174, "step": 4704 }, { "epoch": 0.22, "grad_norm": 1.1542294522912009, "learning_rate": 1.8237762833361117e-05, "loss": 0.6366, "step": 4705 }, { "epoch": 0.22, "grad_norm": 0.27209835289750167, "learning_rate": 1.823691921842486e-05, "loss": 0.1686, "step": 4706 }, { "epoch": 0.22, "grad_norm": 0.48289717455430436, "learning_rate": 1.823607542113116e-05, "loss": 0.3493, "step": 4707 }, { "epoch": 0.22, "grad_norm": 1.2366229371075574, "learning_rate": 1.823523144149868e-05, "loss": 0.5253, "step": 4708 }, { "epoch": 0.22, "grad_norm": 0.4620283104961311, "learning_rate": 1.8234387279546118e-05, "loss": 0.3523, "step": 4709 }, { "epoch": 0.22, "grad_norm": 0.3749240780591437, "learning_rate": 1.8233542935292153e-05, "loss": 0.1686, "step": 4710 }, { "epoch": 0.22, "grad_norm": 0.5301326583150134, "learning_rate": 1.8232698408755488e-05, "loss": 0.3422, "step": 4711 }, { "epoch": 0.22, "grad_norm": 0.7728638817517502, "learning_rate": 1.8231853699954813e-05, "loss": 0.3837, "step": 4712 }, { "epoch": 0.22, "grad_norm": 0.7664304155970179, "learning_rate": 1.823100880890883e-05, "loss": 0.3311, "step": 4713 }, { "epoch": 0.22, "grad_norm": 0.3515959542651426, "learning_rate": 1.8230163735636245e-05, "loss": 0.275, "step": 4714 }, { "epoch": 0.22, "grad_norm": 0.901724141031721, "learning_rate": 1.822931848015577e-05, "loss": 0.525, "step": 4715 }, { "epoch": 0.22, "grad_norm": 0.3584011863835632, "learning_rate": 1.822847304248611e-05, "loss": 0.2057, "step": 4716 }, { "epoch": 0.22, "grad_norm": 1.2851131611178057, "learning_rate": 1.8227627422645993e-05, "loss": 0.6201, "step": 4717 }, { "epoch": 0.22, "grad_norm": 0.7538886112538911, "learning_rate": 1.8226781620654133e-05, "loss": 0.384, "step": 4718 }, { "epoch": 0.22, "grad_norm": 0.4177940255824517, "learning_rate": 1.8225935636529258e-05, "loss": 0.3271, "step": 4719 }, { "epoch": 0.22, "grad_norm": 0.9951015345003666, "learning_rate": 1.8225089470290093e-05, "loss": 0.4381, "step": 4720 }, { "epoch": 0.22, "grad_norm": 0.3749662940774599, "learning_rate": 1.822424312195538e-05, "loss": 0.2538, "step": 4721 }, { "epoch": 0.22, "grad_norm": 0.33772245914777116, "learning_rate": 1.8223396591543844e-05, "loss": 0.2542, "step": 4722 }, { "epoch": 0.22, "grad_norm": 0.4214906124449183, "learning_rate": 1.8222549879074236e-05, "loss": 0.2612, "step": 4723 }, { "epoch": 0.22, "grad_norm": 0.9992692074300207, "learning_rate": 1.82217029845653e-05, "loss": 0.3815, "step": 4724 }, { "epoch": 0.22, "grad_norm": 0.5233078416390026, "learning_rate": 1.8220855908035783e-05, "loss": 0.3449, "step": 4725 }, { "epoch": 0.22, "grad_norm": 0.42529004586002195, "learning_rate": 1.822000864950444e-05, "loss": 0.2755, "step": 4726 }, { "epoch": 0.22, "grad_norm": 0.4325188832858573, "learning_rate": 1.8219161208990028e-05, "loss": 0.2746, "step": 4727 }, { "epoch": 0.22, "grad_norm": 0.388877865397616, "learning_rate": 1.8218313586511312e-05, "loss": 0.2549, "step": 4728 }, { "epoch": 0.22, "grad_norm": 0.613589652692113, "learning_rate": 1.821746578208705e-05, "loss": 0.28, "step": 4729 }, { "epoch": 0.22, "grad_norm": 0.4312928598732534, "learning_rate": 1.8216617795736016e-05, "loss": 0.3248, "step": 4730 }, { "epoch": 0.22, "grad_norm": 0.415490290999044, "learning_rate": 1.8215769627476984e-05, "loss": 0.3123, "step": 4731 }, { "epoch": 0.22, "grad_norm": 1.3759486027783363, "learning_rate": 1.821492127732873e-05, "loss": 0.8813, "step": 4732 }, { "epoch": 0.22, "grad_norm": 0.2862880344142633, "learning_rate": 1.821407274531004e-05, "loss": 0.1198, "step": 4733 }, { "epoch": 0.22, "grad_norm": 0.34206664095532985, "learning_rate": 1.821322403143969e-05, "loss": 0.2644, "step": 4734 }, { "epoch": 0.22, "grad_norm": 0.4036479430914809, "learning_rate": 1.821237513573648e-05, "loss": 0.371, "step": 4735 }, { "epoch": 0.22, "grad_norm": 0.5153622577734677, "learning_rate": 1.82115260582192e-05, "loss": 0.2828, "step": 4736 }, { "epoch": 0.22, "grad_norm": 0.456750395413481, "learning_rate": 1.8210676798906645e-05, "loss": 0.364, "step": 4737 }, { "epoch": 0.22, "grad_norm": 0.602010975840167, "learning_rate": 1.8209827357817624e-05, "loss": 0.4441, "step": 4738 }, { "epoch": 0.22, "grad_norm": 0.34076125494475223, "learning_rate": 1.820897773497093e-05, "loss": 0.2133, "step": 4739 }, { "epoch": 0.22, "grad_norm": 0.2971201763428285, "learning_rate": 1.8208127930385387e-05, "loss": 0.2218, "step": 4740 }, { "epoch": 0.22, "grad_norm": 0.6483415474296218, "learning_rate": 1.82072779440798e-05, "loss": 0.4817, "step": 4741 }, { "epoch": 0.22, "grad_norm": 0.378368462747848, "learning_rate": 1.8206427776072995e-05, "loss": 0.2702, "step": 4742 }, { "epoch": 0.22, "grad_norm": 0.5107855403346446, "learning_rate": 1.8205577426383786e-05, "loss": 0.3199, "step": 4743 }, { "epoch": 0.22, "grad_norm": 0.9841312577019259, "learning_rate": 1.8204726895030997e-05, "loss": 0.688, "step": 4744 }, { "epoch": 0.22, "grad_norm": 0.3875498112274121, "learning_rate": 1.8203876182033467e-05, "loss": 0.268, "step": 4745 }, { "epoch": 0.22, "grad_norm": 0.3574070698500073, "learning_rate": 1.8203025287410022e-05, "loss": 0.2451, "step": 4746 }, { "epoch": 0.22, "grad_norm": 0.43691703930614784, "learning_rate": 1.8202174211179505e-05, "loss": 0.3441, "step": 4747 }, { "epoch": 0.22, "grad_norm": 0.35302804775028696, "learning_rate": 1.8201322953360758e-05, "loss": 0.1931, "step": 4748 }, { "epoch": 0.22, "grad_norm": 0.4801863093009189, "learning_rate": 1.8200471513972623e-05, "loss": 0.271, "step": 4749 }, { "epoch": 0.22, "grad_norm": 0.4660614059489682, "learning_rate": 1.8199619893033954e-05, "loss": 0.3589, "step": 4750 }, { "epoch": 0.22, "grad_norm": 1.266493296324023, "learning_rate": 1.8198768090563602e-05, "loss": 0.6325, "step": 4751 }, { "epoch": 0.22, "grad_norm": 0.3541500683018622, "learning_rate": 1.8197916106580426e-05, "loss": 0.2325, "step": 4752 }, { "epoch": 0.22, "grad_norm": 0.35932638678185097, "learning_rate": 1.819706394110329e-05, "loss": 0.2917, "step": 4753 }, { "epoch": 0.22, "grad_norm": 0.4576577785609266, "learning_rate": 1.8196211594151058e-05, "loss": 0.3583, "step": 4754 }, { "epoch": 0.22, "grad_norm": 0.39302659221463143, "learning_rate": 1.81953590657426e-05, "loss": 0.2084, "step": 4755 }, { "epoch": 0.22, "grad_norm": 1.5051807361455776, "learning_rate": 1.8194506355896796e-05, "loss": 0.8507, "step": 4756 }, { "epoch": 0.22, "grad_norm": 0.5966210692855977, "learning_rate": 1.8193653464632513e-05, "loss": 0.4403, "step": 4757 }, { "epoch": 0.22, "grad_norm": 0.3535776414814971, "learning_rate": 1.8192800391968643e-05, "loss": 0.3011, "step": 4758 }, { "epoch": 0.22, "grad_norm": 0.48462018141171237, "learning_rate": 1.819194713792407e-05, "loss": 0.3267, "step": 4759 }, { "epoch": 0.22, "grad_norm": 0.3017927467174878, "learning_rate": 1.8191093702517678e-05, "loss": 0.1626, "step": 4760 }, { "epoch": 0.22, "grad_norm": 0.47824627004262604, "learning_rate": 1.8190240085768368e-05, "loss": 0.3001, "step": 4761 }, { "epoch": 0.22, "grad_norm": 0.424934072240903, "learning_rate": 1.818938628769504e-05, "loss": 0.2948, "step": 4762 }, { "epoch": 0.22, "grad_norm": 0.684162917725333, "learning_rate": 1.818853230831659e-05, "loss": 0.439, "step": 4763 }, { "epoch": 0.22, "grad_norm": 0.41309690190185494, "learning_rate": 1.8187678147651926e-05, "loss": 0.2847, "step": 4764 }, { "epoch": 0.22, "grad_norm": 0.39768868839878774, "learning_rate": 1.818682380571996e-05, "loss": 0.2361, "step": 4765 }, { "epoch": 0.22, "grad_norm": 0.34224196850757, "learning_rate": 1.8185969282539603e-05, "loss": 0.2789, "step": 4766 }, { "epoch": 0.22, "grad_norm": 0.41830297302215247, "learning_rate": 1.818511457812978e-05, "loss": 0.3093, "step": 4767 }, { "epoch": 0.22, "grad_norm": 0.8313184852284813, "learning_rate": 1.8184259692509407e-05, "loss": 0.5134, "step": 4768 }, { "epoch": 0.22, "grad_norm": 0.7076721907603172, "learning_rate": 1.8183404625697414e-05, "loss": 0.5149, "step": 4769 }, { "epoch": 0.22, "grad_norm": 0.33474794273972025, "learning_rate": 1.8182549377712728e-05, "loss": 0.281, "step": 4770 }, { "epoch": 0.22, "grad_norm": 0.4826256554662767, "learning_rate": 1.8181693948574285e-05, "loss": 0.3736, "step": 4771 }, { "epoch": 0.22, "grad_norm": 0.2199104722451651, "learning_rate": 1.8180838338301027e-05, "loss": 0.0981, "step": 4772 }, { "epoch": 0.22, "grad_norm": 0.37015805059036166, "learning_rate": 1.817998254691189e-05, "loss": 0.3019, "step": 4773 }, { "epoch": 0.22, "grad_norm": 0.42979105796318157, "learning_rate": 1.8179126574425823e-05, "loss": 0.3682, "step": 4774 }, { "epoch": 0.22, "grad_norm": 0.8447629241379996, "learning_rate": 1.8178270420861777e-05, "loss": 0.4078, "step": 4775 }, { "epoch": 0.22, "grad_norm": 0.39098817310102824, "learning_rate": 1.8177414086238706e-05, "loss": 0.305, "step": 4776 }, { "epoch": 0.22, "grad_norm": 1.0980167268530916, "learning_rate": 1.817655757057557e-05, "loss": 0.5838, "step": 4777 }, { "epoch": 0.22, "grad_norm": 0.27983106939891933, "learning_rate": 1.8175700873891328e-05, "loss": 0.1875, "step": 4778 }, { "epoch": 0.22, "grad_norm": 0.41148485475483415, "learning_rate": 1.817484399620495e-05, "loss": 0.3007, "step": 4779 }, { "epoch": 0.22, "grad_norm": 1.1246362723611958, "learning_rate": 1.817398693753541e-05, "loss": 0.6736, "step": 4780 }, { "epoch": 0.22, "grad_norm": 0.5903963605238938, "learning_rate": 1.8173129697901667e-05, "loss": 0.3179, "step": 4781 }, { "epoch": 0.22, "grad_norm": 0.3993939456526726, "learning_rate": 1.817227227732272e-05, "loss": 0.2907, "step": 4782 }, { "epoch": 0.22, "grad_norm": 0.5668209823697162, "learning_rate": 1.8171414675817534e-05, "loss": 0.3914, "step": 4783 }, { "epoch": 0.22, "grad_norm": 0.340181644577086, "learning_rate": 1.8170556893405106e-05, "loss": 0.1881, "step": 4784 }, { "epoch": 0.22, "grad_norm": 0.3600906499156987, "learning_rate": 1.816969893010442e-05, "loss": 0.2015, "step": 4785 }, { "epoch": 0.22, "grad_norm": 0.477724102786767, "learning_rate": 1.816884078593448e-05, "loss": 0.3472, "step": 4786 }, { "epoch": 0.22, "grad_norm": 1.1388118834469854, "learning_rate": 1.8167982460914273e-05, "loss": 0.5269, "step": 4787 }, { "epoch": 0.22, "grad_norm": 0.3906495963403317, "learning_rate": 1.8167123955062805e-05, "loss": 0.2225, "step": 4788 }, { "epoch": 0.22, "grad_norm": 0.9933380816430054, "learning_rate": 1.816626526839909e-05, "loss": 0.5529, "step": 4789 }, { "epoch": 0.22, "grad_norm": 0.43152436127120847, "learning_rate": 1.816540640094213e-05, "loss": 0.3349, "step": 4790 }, { "epoch": 0.22, "grad_norm": 0.29617315324453286, "learning_rate": 1.816454735271094e-05, "loss": 0.1816, "step": 4791 }, { "epoch": 0.22, "grad_norm": 0.9499800958465451, "learning_rate": 1.8163688123724545e-05, "loss": 0.5018, "step": 4792 }, { "epoch": 0.22, "grad_norm": 0.5733666685609344, "learning_rate": 1.8162828714001962e-05, "loss": 0.404, "step": 4793 }, { "epoch": 0.22, "grad_norm": 0.33265289355024075, "learning_rate": 1.816196912356222e-05, "loss": 0.218, "step": 4794 }, { "epoch": 0.22, "grad_norm": 1.4574642507545301, "learning_rate": 1.8161109352424344e-05, "loss": 0.6969, "step": 4795 }, { "epoch": 0.22, "grad_norm": 0.5283057307505934, "learning_rate": 1.8160249400607373e-05, "loss": 0.3061, "step": 4796 }, { "epoch": 0.22, "grad_norm": 0.40521580864999374, "learning_rate": 1.815938926813035e-05, "loss": 0.2773, "step": 4797 }, { "epoch": 0.22, "grad_norm": 0.461144478827574, "learning_rate": 1.815852895501231e-05, "loss": 0.3084, "step": 4798 }, { "epoch": 0.22, "grad_norm": 0.5492646491140364, "learning_rate": 1.8157668461272303e-05, "loss": 0.3228, "step": 4799 }, { "epoch": 0.22, "grad_norm": 0.48333436529769075, "learning_rate": 1.8156807786929378e-05, "loss": 0.267, "step": 4800 }, { "epoch": 0.22, "grad_norm": 0.6457307796908567, "learning_rate": 1.815594693200259e-05, "loss": 0.2945, "step": 4801 }, { "epoch": 0.22, "grad_norm": 0.4603110603931726, "learning_rate": 1.8155085896510995e-05, "loss": 0.3358, "step": 4802 }, { "epoch": 0.22, "grad_norm": 0.46381746713654237, "learning_rate": 1.815422468047366e-05, "loss": 0.2874, "step": 4803 }, { "epoch": 0.22, "grad_norm": 0.6323139844144634, "learning_rate": 1.8153363283909655e-05, "loss": 0.3558, "step": 4804 }, { "epoch": 0.22, "grad_norm": 0.5685505744427842, "learning_rate": 1.815250170683804e-05, "loss": 0.3198, "step": 4805 }, { "epoch": 0.22, "grad_norm": 0.35115838383146947, "learning_rate": 1.8151639949277895e-05, "loss": 0.2848, "step": 4806 }, { "epoch": 0.22, "grad_norm": 0.35888834037731565, "learning_rate": 1.8150778011248298e-05, "loss": 0.208, "step": 4807 }, { "epoch": 0.22, "grad_norm": 1.0459996156884617, "learning_rate": 1.8149915892768334e-05, "loss": 0.5732, "step": 4808 }, { "epoch": 0.22, "grad_norm": 0.4416051864788834, "learning_rate": 1.8149053593857083e-05, "loss": 0.3226, "step": 4809 }, { "epoch": 0.22, "grad_norm": 0.36737304588694797, "learning_rate": 1.8148191114533646e-05, "loss": 0.324, "step": 4810 }, { "epoch": 0.22, "grad_norm": 0.29458204704630014, "learning_rate": 1.8147328454817107e-05, "loss": 0.1785, "step": 4811 }, { "epoch": 0.22, "grad_norm": 0.29219828779525503, "learning_rate": 1.8146465614726566e-05, "loss": 0.2278, "step": 4812 }, { "epoch": 0.22, "grad_norm": 0.9902808531450538, "learning_rate": 1.814560259428113e-05, "loss": 0.5717, "step": 4813 }, { "epoch": 0.22, "grad_norm": 0.41616636445218996, "learning_rate": 1.8144739393499905e-05, "loss": 0.3123, "step": 4814 }, { "epoch": 0.22, "grad_norm": 0.39471423690122903, "learning_rate": 1.8143876012402e-05, "loss": 0.3019, "step": 4815 }, { "epoch": 0.22, "grad_norm": 1.135870596713753, "learning_rate": 1.8143012451006527e-05, "loss": 0.5588, "step": 4816 }, { "epoch": 0.22, "grad_norm": 0.41529902598451396, "learning_rate": 1.814214870933261e-05, "loss": 0.2923, "step": 4817 }, { "epoch": 0.22, "grad_norm": 0.3944434681196841, "learning_rate": 1.8141284787399366e-05, "loss": 0.2211, "step": 4818 }, { "epoch": 0.22, "grad_norm": 0.35463714592759155, "learning_rate": 1.8140420685225922e-05, "loss": 0.2662, "step": 4819 }, { "epoch": 0.22, "grad_norm": 0.7125285698447634, "learning_rate": 1.8139556402831412e-05, "loss": 0.4013, "step": 4820 }, { "epoch": 0.22, "grad_norm": 0.4991907731822161, "learning_rate": 1.813869194023497e-05, "loss": 0.3243, "step": 4821 }, { "epoch": 0.22, "grad_norm": 0.41353127467976253, "learning_rate": 1.813782729745573e-05, "loss": 0.3383, "step": 4822 }, { "epoch": 0.22, "grad_norm": 1.0814407901911471, "learning_rate": 1.8136962474512833e-05, "loss": 0.7642, "step": 4823 }, { "epoch": 0.22, "grad_norm": 0.3057561758157377, "learning_rate": 1.8136097471425436e-05, "loss": 0.1695, "step": 4824 }, { "epoch": 0.22, "grad_norm": 0.33609296104696146, "learning_rate": 1.8135232288212677e-05, "loss": 0.2843, "step": 4825 }, { "epoch": 0.22, "grad_norm": 0.5554603416211129, "learning_rate": 1.813436692489372e-05, "loss": 0.3961, "step": 4826 }, { "epoch": 0.22, "grad_norm": 0.43872979862737393, "learning_rate": 1.813350138148772e-05, "loss": 0.2686, "step": 4827 }, { "epoch": 0.22, "grad_norm": 0.5658355848139722, "learning_rate": 1.8132635658013837e-05, "loss": 0.3919, "step": 4828 }, { "epoch": 0.22, "grad_norm": 0.5930619443715385, "learning_rate": 1.8131769754491237e-05, "loss": 0.417, "step": 4829 }, { "epoch": 0.22, "grad_norm": 0.2818847510129043, "learning_rate": 1.8130903670939095e-05, "loss": 0.1925, "step": 4830 }, { "epoch": 0.22, "grad_norm": 0.4172377921461991, "learning_rate": 1.813003740737658e-05, "loss": 0.2743, "step": 4831 }, { "epoch": 0.22, "grad_norm": 0.8653866168812338, "learning_rate": 1.8129170963822874e-05, "loss": 0.4447, "step": 4832 }, { "epoch": 0.22, "grad_norm": 0.3443520775128014, "learning_rate": 1.812830434029716e-05, "loss": 0.2174, "step": 4833 }, { "epoch": 0.22, "grad_norm": 0.4142131987936221, "learning_rate": 1.812743753681862e-05, "loss": 0.3367, "step": 4834 }, { "epoch": 0.22, "grad_norm": 1.1316702084313228, "learning_rate": 1.8126570553406443e-05, "loss": 0.7461, "step": 4835 }, { "epoch": 0.22, "grad_norm": 0.4409020778925048, "learning_rate": 1.812570339007983e-05, "loss": 0.3021, "step": 4836 }, { "epoch": 0.22, "grad_norm": 0.27536580880837785, "learning_rate": 1.812483604685798e-05, "loss": 0.2171, "step": 4837 }, { "epoch": 0.22, "grad_norm": 0.5873427975437153, "learning_rate": 1.8123968523760082e-05, "loss": 0.3785, "step": 4838 }, { "epoch": 0.22, "grad_norm": 0.956826894026991, "learning_rate": 1.8123100820805354e-05, "loss": 0.3385, "step": 4839 }, { "epoch": 0.22, "grad_norm": 0.41720032252015726, "learning_rate": 1.812223293801301e-05, "loss": 0.2671, "step": 4840 }, { "epoch": 0.22, "grad_norm": 0.4424938434443703, "learning_rate": 1.8121364875402246e-05, "loss": 0.3628, "step": 4841 }, { "epoch": 0.22, "grad_norm": 0.6729737982944347, "learning_rate": 1.8120496632992298e-05, "loss": 0.3947, "step": 4842 }, { "epoch": 0.22, "grad_norm": 0.29415467995743794, "learning_rate": 1.811962821080238e-05, "loss": 0.2641, "step": 4843 }, { "epoch": 0.22, "grad_norm": 0.44151005207757565, "learning_rate": 1.8118759608851715e-05, "loss": 0.2925, "step": 4844 }, { "epoch": 0.22, "grad_norm": 0.4962638776954698, "learning_rate": 1.8117890827159543e-05, "loss": 0.3166, "step": 4845 }, { "epoch": 0.22, "grad_norm": 0.3753492923719378, "learning_rate": 1.8117021865745088e-05, "loss": 0.3112, "step": 4846 }, { "epoch": 0.22, "grad_norm": 0.877620495284609, "learning_rate": 1.8116152724627592e-05, "loss": 0.4892, "step": 4847 }, { "epoch": 0.22, "grad_norm": 0.5600726502011535, "learning_rate": 1.81152834038263e-05, "loss": 0.3888, "step": 4848 }, { "epoch": 0.22, "grad_norm": 0.43939279814065196, "learning_rate": 1.8114413903360458e-05, "loss": 0.3539, "step": 4849 }, { "epoch": 0.22, "grad_norm": 0.253801875678131, "learning_rate": 1.8113544223249305e-05, "loss": 0.1997, "step": 4850 }, { "epoch": 0.22, "grad_norm": 0.691455891517991, "learning_rate": 1.811267436351211e-05, "loss": 0.3134, "step": 4851 }, { "epoch": 0.22, "grad_norm": 0.49150993290366224, "learning_rate": 1.811180432416812e-05, "loss": 0.3258, "step": 4852 }, { "epoch": 0.22, "grad_norm": 0.48509243270785324, "learning_rate": 1.8110934105236603e-05, "loss": 0.2873, "step": 4853 }, { "epoch": 0.22, "grad_norm": 0.8012016030099469, "learning_rate": 1.8110063706736817e-05, "loss": 0.359, "step": 4854 }, { "epoch": 0.22, "grad_norm": 0.4031519528108106, "learning_rate": 1.8109193128688042e-05, "loss": 0.32, "step": 4855 }, { "epoch": 0.22, "grad_norm": 0.2844282692682203, "learning_rate": 1.8108322371109547e-05, "loss": 0.1604, "step": 4856 }, { "epoch": 0.22, "grad_norm": 0.36104228062766386, "learning_rate": 1.8107451434020605e-05, "loss": 0.2769, "step": 4857 }, { "epoch": 0.22, "grad_norm": 0.43024750928793426, "learning_rate": 1.8106580317440507e-05, "loss": 0.3029, "step": 4858 }, { "epoch": 0.22, "grad_norm": 0.7999276773110835, "learning_rate": 1.8105709021388534e-05, "loss": 0.5215, "step": 4859 }, { "epoch": 0.22, "grad_norm": 0.537235222398753, "learning_rate": 1.8104837545883974e-05, "loss": 0.3447, "step": 4860 }, { "epoch": 0.22, "grad_norm": 0.36127368463258286, "learning_rate": 1.810396589094612e-05, "loss": 0.3054, "step": 4861 }, { "epoch": 0.22, "grad_norm": 0.5969677080858629, "learning_rate": 1.8103094056594276e-05, "loss": 0.3958, "step": 4862 }, { "epoch": 0.22, "grad_norm": 0.22210384059199, "learning_rate": 1.8102222042847735e-05, "loss": 0.1117, "step": 4863 }, { "epoch": 0.22, "grad_norm": 0.4885519340661976, "learning_rate": 1.810134984972581e-05, "loss": 0.3648, "step": 4864 }, { "epoch": 0.22, "grad_norm": 0.44305471709187827, "learning_rate": 1.810047747724781e-05, "loss": 0.3135, "step": 4865 }, { "epoch": 0.22, "grad_norm": 1.0364214286890467, "learning_rate": 1.8099604925433042e-05, "loss": 0.3475, "step": 4866 }, { "epoch": 0.22, "grad_norm": 0.44691047919538324, "learning_rate": 1.8098732194300828e-05, "loss": 0.3282, "step": 4867 }, { "epoch": 0.22, "grad_norm": 0.49227980493382817, "learning_rate": 1.8097859283870488e-05, "loss": 0.2859, "step": 4868 }, { "epoch": 0.22, "grad_norm": 0.2889589439590141, "learning_rate": 1.809698619416135e-05, "loss": 0.2099, "step": 4869 }, { "epoch": 0.22, "grad_norm": 0.4693138537778226, "learning_rate": 1.809611292519274e-05, "loss": 0.3438, "step": 4870 }, { "epoch": 0.22, "grad_norm": 1.2215041109995461, "learning_rate": 1.8095239476983998e-05, "loss": 0.56, "step": 4871 }, { "epoch": 0.22, "grad_norm": 0.8683455170800684, "learning_rate": 1.809436584955445e-05, "loss": 0.5487, "step": 4872 }, { "epoch": 0.22, "grad_norm": 0.3262896242519527, "learning_rate": 1.8093492042923446e-05, "loss": 0.2264, "step": 4873 }, { "epoch": 0.22, "grad_norm": 0.3822380832388538, "learning_rate": 1.809261805711033e-05, "loss": 0.2843, "step": 4874 }, { "epoch": 0.22, "grad_norm": 0.44776823424694767, "learning_rate": 1.809174389213445e-05, "loss": 0.2447, "step": 4875 }, { "epoch": 0.22, "grad_norm": 0.4720823015631681, "learning_rate": 1.8090869548015157e-05, "loss": 0.2318, "step": 4876 }, { "epoch": 0.22, "grad_norm": 0.7812423509473778, "learning_rate": 1.808999502477181e-05, "loss": 0.3845, "step": 4877 }, { "epoch": 0.22, "grad_norm": 1.254308289742614, "learning_rate": 1.8089120322423773e-05, "loss": 0.5601, "step": 4878 }, { "epoch": 0.22, "grad_norm": 0.4000921667625986, "learning_rate": 1.8088245440990405e-05, "loss": 0.2228, "step": 4879 }, { "epoch": 0.22, "grad_norm": 1.0799493334997052, "learning_rate": 1.8087370380491083e-05, "loss": 0.5448, "step": 4880 }, { "epoch": 0.22, "grad_norm": 0.3041300654951305, "learning_rate": 1.808649514094517e-05, "loss": 0.219, "step": 4881 }, { "epoch": 0.22, "grad_norm": 0.4716769690346684, "learning_rate": 1.8085619722372056e-05, "loss": 0.2504, "step": 4882 }, { "epoch": 0.22, "grad_norm": 1.1829686784598237, "learning_rate": 1.8084744124791108e-05, "loss": 0.4653, "step": 4883 }, { "epoch": 0.22, "grad_norm": 0.9325982551284647, "learning_rate": 1.808386834822172e-05, "loss": 0.4944, "step": 4884 }, { "epoch": 0.22, "grad_norm": 0.41729995974159423, "learning_rate": 1.808299239268328e-05, "loss": 0.2958, "step": 4885 }, { "epoch": 0.22, "grad_norm": 0.5204619670380874, "learning_rate": 1.8082116258195173e-05, "loss": 0.2919, "step": 4886 }, { "epoch": 0.22, "grad_norm": 0.3742924561090667, "learning_rate": 1.8081239944776804e-05, "loss": 0.2653, "step": 4887 }, { "epoch": 0.22, "grad_norm": 0.45058769054038006, "learning_rate": 1.8080363452447574e-05, "loss": 0.3298, "step": 4888 }, { "epoch": 0.22, "grad_norm": 0.79379721470678, "learning_rate": 1.8079486781226883e-05, "loss": 0.3491, "step": 4889 }, { "epoch": 0.22, "grad_norm": 0.5342093586195866, "learning_rate": 1.8078609931134142e-05, "loss": 0.3344, "step": 4890 }, { "epoch": 0.22, "grad_norm": 0.3948632775031378, "learning_rate": 1.8077732902188764e-05, "loss": 0.2832, "step": 4891 }, { "epoch": 0.22, "grad_norm": 0.5265659296839025, "learning_rate": 1.8076855694410163e-05, "loss": 0.3236, "step": 4892 }, { "epoch": 0.22, "grad_norm": 0.3507293441149866, "learning_rate": 1.8075978307817764e-05, "loss": 0.2697, "step": 4893 }, { "epoch": 0.22, "grad_norm": 0.3975490530056394, "learning_rate": 1.807510074243099e-05, "loss": 0.2896, "step": 4894 }, { "epoch": 0.22, "grad_norm": 0.6715942483483468, "learning_rate": 1.8074222998269268e-05, "loss": 0.4188, "step": 4895 }, { "epoch": 0.22, "grad_norm": 0.38869058170413096, "learning_rate": 1.807334507535203e-05, "loss": 0.2804, "step": 4896 }, { "epoch": 0.22, "grad_norm": 0.3585395205190629, "learning_rate": 1.807246697369871e-05, "loss": 0.2681, "step": 4897 }, { "epoch": 0.23, "grad_norm": 1.3574704218987816, "learning_rate": 1.8071588693328755e-05, "loss": 0.6944, "step": 4898 }, { "epoch": 0.23, "grad_norm": 0.9600124348572094, "learning_rate": 1.8070710234261602e-05, "loss": 0.3941, "step": 4899 }, { "epoch": 0.23, "grad_norm": 0.38566021947660506, "learning_rate": 1.8069831596516708e-05, "loss": 0.2965, "step": 4900 }, { "epoch": 0.23, "grad_norm": 0.45817646101477905, "learning_rate": 1.8068952780113517e-05, "loss": 0.3524, "step": 4901 }, { "epoch": 0.23, "grad_norm": 0.25956103180349843, "learning_rate": 1.806807378507149e-05, "loss": 0.1434, "step": 4902 }, { "epoch": 0.23, "grad_norm": 0.5173152303285927, "learning_rate": 1.8067194611410088e-05, "loss": 0.3302, "step": 4903 }, { "epoch": 0.23, "grad_norm": 0.6737547764909366, "learning_rate": 1.806631525914877e-05, "loss": 0.427, "step": 4904 }, { "epoch": 0.23, "grad_norm": 0.4473395472693209, "learning_rate": 1.8065435728307003e-05, "loss": 0.295, "step": 4905 }, { "epoch": 0.23, "grad_norm": 0.41613092614552577, "learning_rate": 1.8064556018904267e-05, "loss": 0.2956, "step": 4906 }, { "epoch": 0.23, "grad_norm": 0.8709645356162412, "learning_rate": 1.8063676130960034e-05, "loss": 0.5244, "step": 4907 }, { "epoch": 0.23, "grad_norm": 0.2629725713184213, "learning_rate": 1.8062796064493778e-05, "loss": 0.1922, "step": 4908 }, { "epoch": 0.23, "grad_norm": 0.40861769847708185, "learning_rate": 1.8061915819524995e-05, "loss": 0.2613, "step": 4909 }, { "epoch": 0.23, "grad_norm": 0.8861222859783292, "learning_rate": 1.8061035396073163e-05, "loss": 0.5491, "step": 4910 }, { "epoch": 0.23, "grad_norm": 0.7109338940194653, "learning_rate": 1.8060154794157777e-05, "loss": 0.4807, "step": 4911 }, { "epoch": 0.23, "grad_norm": 0.39226744628715193, "learning_rate": 1.805927401379833e-05, "loss": 0.2437, "step": 4912 }, { "epoch": 0.23, "grad_norm": 0.44135009022396576, "learning_rate": 1.8058393055014326e-05, "loss": 0.3806, "step": 4913 }, { "epoch": 0.23, "grad_norm": 0.5385783838909084, "learning_rate": 1.805751191782527e-05, "loss": 0.2912, "step": 4914 }, { "epoch": 0.23, "grad_norm": 0.26303306614178434, "learning_rate": 1.805663060225066e-05, "loss": 0.1805, "step": 4915 }, { "epoch": 0.23, "grad_norm": 0.8589848632983503, "learning_rate": 1.8055749108310013e-05, "loss": 0.5237, "step": 4916 }, { "epoch": 0.23, "grad_norm": 0.3787048911027548, "learning_rate": 1.805486743602285e-05, "loss": 0.3861, "step": 4917 }, { "epoch": 0.23, "grad_norm": 0.3541288990070035, "learning_rate": 1.805398558540868e-05, "loss": 0.2237, "step": 4918 }, { "epoch": 0.23, "grad_norm": 1.2380196928031408, "learning_rate": 1.805310355648704e-05, "loss": 0.7746, "step": 4919 }, { "epoch": 0.23, "grad_norm": 0.39638605279729744, "learning_rate": 1.8052221349277445e-05, "loss": 0.3369, "step": 4920 }, { "epoch": 0.23, "grad_norm": 0.2894316450695463, "learning_rate": 1.8051338963799426e-05, "loss": 0.1417, "step": 4921 }, { "epoch": 0.23, "grad_norm": 0.3990297858333398, "learning_rate": 1.8050456400072525e-05, "loss": 0.2783, "step": 4922 }, { "epoch": 0.23, "grad_norm": 0.6648018960889627, "learning_rate": 1.804957365811628e-05, "loss": 0.4825, "step": 4923 }, { "epoch": 0.23, "grad_norm": 0.6606045529675441, "learning_rate": 1.8048690737950233e-05, "loss": 0.3645, "step": 4924 }, { "epoch": 0.23, "grad_norm": 0.3300698750007411, "learning_rate": 1.8047807639593927e-05, "loss": 0.2723, "step": 4925 }, { "epoch": 0.23, "grad_norm": 0.31946665622115394, "learning_rate": 1.8046924363066918e-05, "loss": 0.2153, "step": 4926 }, { "epoch": 0.23, "grad_norm": 0.4527209283985424, "learning_rate": 1.804604090838876e-05, "loss": 0.2529, "step": 4927 }, { "epoch": 0.23, "grad_norm": 0.4387573845847885, "learning_rate": 1.8045157275579012e-05, "loss": 0.3032, "step": 4928 }, { "epoch": 0.23, "grad_norm": 0.46947349892964557, "learning_rate": 1.8044273464657235e-05, "loss": 0.3716, "step": 4929 }, { "epoch": 0.23, "grad_norm": 0.406328131143055, "learning_rate": 1.8043389475642994e-05, "loss": 0.2661, "step": 4930 }, { "epoch": 0.23, "grad_norm": 0.44643669372371564, "learning_rate": 1.8042505308555863e-05, "loss": 0.3231, "step": 4931 }, { "epoch": 0.23, "grad_norm": 0.39343014583605024, "learning_rate": 1.8041620963415418e-05, "loss": 0.309, "step": 4932 }, { "epoch": 0.23, "grad_norm": 0.3402967313795037, "learning_rate": 1.8040736440241237e-05, "loss": 0.2334, "step": 4933 }, { "epoch": 0.23, "grad_norm": 0.4788876682630459, "learning_rate": 1.8039851739052898e-05, "loss": 0.3227, "step": 4934 }, { "epoch": 0.23, "grad_norm": 0.4201025591158397, "learning_rate": 1.803896685986999e-05, "loss": 0.2668, "step": 4935 }, { "epoch": 0.23, "grad_norm": 0.39850208536829457, "learning_rate": 1.8038081802712105e-05, "loss": 0.2653, "step": 4936 }, { "epoch": 0.23, "grad_norm": 0.4861331027459766, "learning_rate": 1.8037196567598838e-05, "loss": 0.3719, "step": 4937 }, { "epoch": 0.23, "grad_norm": 0.42997859984442105, "learning_rate": 1.8036311154549783e-05, "loss": 0.1324, "step": 4938 }, { "epoch": 0.23, "grad_norm": 0.407048275873624, "learning_rate": 1.8035425563584547e-05, "loss": 0.3062, "step": 4939 }, { "epoch": 0.23, "grad_norm": 0.48247985877054883, "learning_rate": 1.803453979472273e-05, "loss": 0.4209, "step": 4940 }, { "epoch": 0.23, "grad_norm": 0.30509944324770105, "learning_rate": 1.803365384798395e-05, "loss": 0.2335, "step": 4941 }, { "epoch": 0.23, "grad_norm": 0.6312220628466788, "learning_rate": 1.8032767723387815e-05, "loss": 0.3115, "step": 4942 }, { "epoch": 0.23, "grad_norm": 0.44658657975447447, "learning_rate": 1.8031881420953942e-05, "loss": 0.3383, "step": 4943 }, { "epoch": 0.23, "grad_norm": 0.3459392819530411, "learning_rate": 1.8030994940701955e-05, "loss": 0.2833, "step": 4944 }, { "epoch": 0.23, "grad_norm": 0.5265033838660462, "learning_rate": 1.8030108282651487e-05, "loss": 0.34, "step": 4945 }, { "epoch": 0.23, "grad_norm": 0.40533611255100954, "learning_rate": 1.8029221446822152e-05, "loss": 0.3568, "step": 4946 }, { "epoch": 0.23, "grad_norm": 0.24979892636233852, "learning_rate": 1.80283344332336e-05, "loss": 0.0972, "step": 4947 }, { "epoch": 0.23, "grad_norm": 0.3683004679796032, "learning_rate": 1.802744724190546e-05, "loss": 0.2888, "step": 4948 }, { "epoch": 0.23, "grad_norm": 0.4099562667436985, "learning_rate": 1.8026559872857375e-05, "loss": 0.3406, "step": 4949 }, { "epoch": 0.23, "grad_norm": 0.9648779560767419, "learning_rate": 1.802567232610899e-05, "loss": 0.6488, "step": 4950 }, { "epoch": 0.23, "grad_norm": 0.3890559478680819, "learning_rate": 1.8024784601679954e-05, "loss": 0.2808, "step": 4951 }, { "epoch": 0.23, "grad_norm": 0.4134675794820532, "learning_rate": 1.8023896699589923e-05, "loss": 0.3246, "step": 4952 }, { "epoch": 0.23, "grad_norm": 0.29829236180178204, "learning_rate": 1.802300861985855e-05, "loss": 0.1903, "step": 4953 }, { "epoch": 0.23, "grad_norm": 0.38427470698638655, "learning_rate": 1.8022120362505503e-05, "loss": 0.2159, "step": 4954 }, { "epoch": 0.23, "grad_norm": 0.5770495694081988, "learning_rate": 1.802123192755044e-05, "loss": 0.3754, "step": 4955 }, { "epoch": 0.23, "grad_norm": 0.38959411915089764, "learning_rate": 1.8020343315013032e-05, "loss": 0.3709, "step": 4956 }, { "epoch": 0.23, "grad_norm": 0.5901815185721849, "learning_rate": 1.8019454524912955e-05, "loss": 0.2963, "step": 4957 }, { "epoch": 0.23, "grad_norm": 0.4660061246013941, "learning_rate": 1.8018565557269886e-05, "loss": 0.3181, "step": 4958 }, { "epoch": 0.23, "grad_norm": 0.3275253732285472, "learning_rate": 1.80176764121035e-05, "loss": 0.1886, "step": 4959 }, { "epoch": 0.23, "grad_norm": 0.4060322478994801, "learning_rate": 1.801678708943349e-05, "loss": 0.3159, "step": 4960 }, { "epoch": 0.23, "grad_norm": 0.4268258902171265, "learning_rate": 1.8015897589279537e-05, "loss": 0.2552, "step": 4961 }, { "epoch": 0.23, "grad_norm": 1.0115930066360852, "learning_rate": 1.8015007911661336e-05, "loss": 0.457, "step": 4962 }, { "epoch": 0.23, "grad_norm": 0.7788025618758897, "learning_rate": 1.801411805659859e-05, "loss": 0.4154, "step": 4963 }, { "epoch": 0.23, "grad_norm": 0.34469493728536554, "learning_rate": 1.8013228024110988e-05, "loss": 0.2606, "step": 4964 }, { "epoch": 0.23, "grad_norm": 0.48842413949931923, "learning_rate": 1.8012337814218245e-05, "loss": 0.2914, "step": 4965 }, { "epoch": 0.23, "grad_norm": 0.39885164678707197, "learning_rate": 1.8011447426940066e-05, "loss": 0.2662, "step": 4966 }, { "epoch": 0.23, "grad_norm": 0.6686094714158469, "learning_rate": 1.801055686229616e-05, "loss": 0.2824, "step": 4967 }, { "epoch": 0.23, "grad_norm": 0.49380467875691375, "learning_rate": 1.800966612030625e-05, "loss": 0.3421, "step": 4968 }, { "epoch": 0.23, "grad_norm": 0.646814246183766, "learning_rate": 1.8008775200990047e-05, "loss": 0.3629, "step": 4969 }, { "epoch": 0.23, "grad_norm": 0.4532885049029625, "learning_rate": 1.800788410436728e-05, "loss": 0.2409, "step": 4970 }, { "epoch": 0.23, "grad_norm": 0.3429094182688301, "learning_rate": 1.800699283045768e-05, "loss": 0.2069, "step": 4971 }, { "epoch": 0.23, "grad_norm": 0.42160877847068445, "learning_rate": 1.8006101379280974e-05, "loss": 0.2763, "step": 4972 }, { "epoch": 0.23, "grad_norm": 0.524193873200823, "learning_rate": 1.80052097508569e-05, "loss": 0.3373, "step": 4973 }, { "epoch": 0.23, "grad_norm": 1.2884676025108464, "learning_rate": 1.8004317945205197e-05, "loss": 0.4232, "step": 4974 }, { "epoch": 0.23, "grad_norm": 0.6585775472921765, "learning_rate": 1.800342596234561e-05, "loss": 0.3259, "step": 4975 }, { "epoch": 0.23, "grad_norm": 0.39055048734411835, "learning_rate": 1.8002533802297888e-05, "loss": 0.3014, "step": 4976 }, { "epoch": 0.23, "grad_norm": 0.30039473285011875, "learning_rate": 1.800164146508178e-05, "loss": 0.1944, "step": 4977 }, { "epoch": 0.23, "grad_norm": 1.244373866114887, "learning_rate": 1.800074895071704e-05, "loss": 0.5682, "step": 4978 }, { "epoch": 0.23, "grad_norm": 0.4742673702158325, "learning_rate": 1.799985625922343e-05, "loss": 0.3086, "step": 4979 }, { "epoch": 0.23, "grad_norm": 0.4236816339236766, "learning_rate": 1.7998963390620713e-05, "loss": 0.2567, "step": 4980 }, { "epoch": 0.23, "grad_norm": 0.6553713744692531, "learning_rate": 1.7998070344928656e-05, "loss": 0.3879, "step": 4981 }, { "epoch": 0.23, "grad_norm": 0.4096341727193085, "learning_rate": 1.799717712216703e-05, "loss": 0.2876, "step": 4982 }, { "epoch": 0.23, "grad_norm": 0.8082566017102061, "learning_rate": 1.7996283722355612e-05, "loss": 0.2622, "step": 4983 }, { "epoch": 0.23, "grad_norm": 0.4010869658506476, "learning_rate": 1.799539014551418e-05, "loss": 0.3233, "step": 4984 }, { "epoch": 0.23, "grad_norm": 0.5162993492683894, "learning_rate": 1.7994496391662513e-05, "loss": 0.3247, "step": 4985 }, { "epoch": 0.23, "grad_norm": 1.1079533540418687, "learning_rate": 1.7993602460820403e-05, "loss": 0.5842, "step": 4986 }, { "epoch": 0.23, "grad_norm": 0.2556972329154646, "learning_rate": 1.799270835300764e-05, "loss": 0.1688, "step": 4987 }, { "epoch": 0.23, "grad_norm": 0.34010955007100796, "learning_rate": 1.7991814068244012e-05, "loss": 0.2719, "step": 4988 }, { "epoch": 0.23, "grad_norm": 0.943480844671025, "learning_rate": 1.799091960654933e-05, "loss": 0.6952, "step": 4989 }, { "epoch": 0.23, "grad_norm": 0.48934264694276663, "learning_rate": 1.7990024967943385e-05, "loss": 0.3014, "step": 4990 }, { "epoch": 0.23, "grad_norm": 0.44444903972173233, "learning_rate": 1.7989130152445987e-05, "loss": 0.3504, "step": 4991 }, { "epoch": 0.23, "grad_norm": 0.44279254846404587, "learning_rate": 1.7988235160076953e-05, "loss": 0.3492, "step": 4992 }, { "epoch": 0.23, "grad_norm": 0.17618268762304568, "learning_rate": 1.798733999085609e-05, "loss": 0.0747, "step": 4993 }, { "epoch": 0.23, "grad_norm": 0.4886679567575999, "learning_rate": 1.7986444644803213e-05, "loss": 0.3231, "step": 4994 }, { "epoch": 0.23, "grad_norm": 0.7919471482729082, "learning_rate": 1.7985549121938155e-05, "loss": 0.5406, "step": 4995 }, { "epoch": 0.23, "grad_norm": 0.3769936575632186, "learning_rate": 1.7984653422280733e-05, "loss": 0.3069, "step": 4996 }, { "epoch": 0.23, "grad_norm": 0.34074282864790595, "learning_rate": 1.798375754585078e-05, "loss": 0.279, "step": 4997 }, { "epoch": 0.23, "grad_norm": 0.4259196613007591, "learning_rate": 1.7982861492668132e-05, "loss": 0.2927, "step": 4998 }, { "epoch": 0.23, "grad_norm": 0.44693566533597756, "learning_rate": 1.7981965262752625e-05, "loss": 0.2695, "step": 4999 }, { "epoch": 0.23, "grad_norm": 0.34224274782385045, "learning_rate": 1.7981068856124096e-05, "loss": 0.2429, "step": 5000 }, { "epoch": 0.23, "grad_norm": 0.9267544651736447, "learning_rate": 1.7980172272802398e-05, "loss": 0.4539, "step": 5001 }, { "epoch": 0.23, "grad_norm": 0.7859811498862915, "learning_rate": 1.7979275512807374e-05, "loss": 0.4729, "step": 5002 }, { "epoch": 0.23, "grad_norm": 0.36038858364960336, "learning_rate": 1.7978378576158885e-05, "loss": 0.2341, "step": 5003 }, { "epoch": 0.23, "grad_norm": 0.39934532417285895, "learning_rate": 1.797748146287678e-05, "loss": 0.3336, "step": 5004 }, { "epoch": 0.23, "grad_norm": 0.2939776414112071, "learning_rate": 1.7976584172980926e-05, "loss": 0.1602, "step": 5005 }, { "epoch": 0.23, "grad_norm": 0.3562785766822121, "learning_rate": 1.7975686706491187e-05, "loss": 0.2057, "step": 5006 }, { "epoch": 0.23, "grad_norm": 0.9798550143327551, "learning_rate": 1.7974789063427432e-05, "loss": 0.5223, "step": 5007 }, { "epoch": 0.23, "grad_norm": 0.43043121135174156, "learning_rate": 1.7973891243809533e-05, "loss": 0.3443, "step": 5008 }, { "epoch": 0.23, "grad_norm": 0.3453947204773186, "learning_rate": 1.7972993247657363e-05, "loss": 0.2025, "step": 5009 }, { "epoch": 0.23, "grad_norm": 0.9909072959377098, "learning_rate": 1.797209507499081e-05, "loss": 0.7143, "step": 5010 }, { "epoch": 0.23, "grad_norm": 0.26906878326357414, "learning_rate": 1.7971196725829757e-05, "loss": 0.2091, "step": 5011 }, { "epoch": 0.23, "grad_norm": 0.4355806063082319, "learning_rate": 1.7970298200194093e-05, "loss": 0.3076, "step": 5012 }, { "epoch": 0.23, "grad_norm": 0.6499085881449501, "learning_rate": 1.7969399498103703e-05, "loss": 0.3331, "step": 5013 }, { "epoch": 0.23, "grad_norm": 0.966102950643204, "learning_rate": 1.796850061957849e-05, "loss": 0.5413, "step": 5014 }, { "epoch": 0.23, "grad_norm": 0.38373489288486073, "learning_rate": 1.796760156463836e-05, "loss": 0.3069, "step": 5015 }, { "epoch": 0.23, "grad_norm": 0.36033742176904077, "learning_rate": 1.7966702333303207e-05, "loss": 0.296, "step": 5016 }, { "epoch": 0.23, "grad_norm": 0.3505410340985064, "learning_rate": 1.7965802925592938e-05, "loss": 0.1972, "step": 5017 }, { "epoch": 0.23, "grad_norm": 0.36733342077749354, "learning_rate": 1.7964903341527475e-05, "loss": 0.29, "step": 5018 }, { "epoch": 0.23, "grad_norm": 0.68552585615589, "learning_rate": 1.7964003581126728e-05, "loss": 0.3545, "step": 5019 }, { "epoch": 0.23, "grad_norm": 0.5631352086392847, "learning_rate": 1.7963103644410617e-05, "loss": 0.3859, "step": 5020 }, { "epoch": 0.23, "grad_norm": 0.3625065386007482, "learning_rate": 1.7962203531399066e-05, "loss": 0.2827, "step": 5021 }, { "epoch": 0.23, "grad_norm": 0.7826761805684815, "learning_rate": 1.7961303242112003e-05, "loss": 0.4384, "step": 5022 }, { "epoch": 0.23, "grad_norm": 0.2771096742085613, "learning_rate": 1.7960402776569358e-05, "loss": 0.2211, "step": 5023 }, { "epoch": 0.23, "grad_norm": 0.4020504042711531, "learning_rate": 1.795950213479107e-05, "loss": 0.2908, "step": 5024 }, { "epoch": 0.23, "grad_norm": 0.8002187101166851, "learning_rate": 1.7958601316797075e-05, "loss": 0.4971, "step": 5025 }, { "epoch": 0.23, "grad_norm": 0.8152742301341442, "learning_rate": 1.795770032260732e-05, "loss": 0.4577, "step": 5026 }, { "epoch": 0.23, "grad_norm": 0.486467464490504, "learning_rate": 1.7956799152241746e-05, "loss": 0.2833, "step": 5027 }, { "epoch": 0.23, "grad_norm": 0.3756770817565876, "learning_rate": 1.7955897805720308e-05, "loss": 0.3003, "step": 5028 }, { "epoch": 0.23, "grad_norm": 0.40591926312626125, "learning_rate": 1.7954996283062964e-05, "loss": 0.2406, "step": 5029 }, { "epoch": 0.23, "grad_norm": 0.47746270171148314, "learning_rate": 1.7954094584289668e-05, "loss": 0.267, "step": 5030 }, { "epoch": 0.23, "grad_norm": 0.3686969368185776, "learning_rate": 1.795319270942038e-05, "loss": 0.3011, "step": 5031 }, { "epoch": 0.23, "grad_norm": 0.5489680611806266, "learning_rate": 1.7952290658475075e-05, "loss": 0.2705, "step": 5032 }, { "epoch": 0.23, "grad_norm": 0.377050909869488, "learning_rate": 1.795138843147372e-05, "loss": 0.2775, "step": 5033 }, { "epoch": 0.23, "grad_norm": 0.8690940993606971, "learning_rate": 1.7950486028436284e-05, "loss": 0.5408, "step": 5034 }, { "epoch": 0.23, "grad_norm": 0.4216646423672232, "learning_rate": 1.7949583449382754e-05, "loss": 0.2827, "step": 5035 }, { "epoch": 0.23, "grad_norm": 0.41538686584989826, "learning_rate": 1.794868069433311e-05, "loss": 0.2792, "step": 5036 }, { "epoch": 0.23, "grad_norm": 0.359646296740704, "learning_rate": 1.7947777763307333e-05, "loss": 0.2358, "step": 5037 }, { "epoch": 0.23, "grad_norm": 1.4300416270698473, "learning_rate": 1.794687465632542e-05, "loss": 0.8272, "step": 5038 }, { "epoch": 0.23, "grad_norm": 0.38103629170890935, "learning_rate": 1.7945971373407358e-05, "loss": 0.2452, "step": 5039 }, { "epoch": 0.23, "grad_norm": 0.4314352546797358, "learning_rate": 1.7945067914573147e-05, "loss": 0.3416, "step": 5040 }, { "epoch": 0.23, "grad_norm": 0.8118304768648636, "learning_rate": 1.7944164279842794e-05, "loss": 0.5083, "step": 5041 }, { "epoch": 0.23, "grad_norm": 0.39463596889960306, "learning_rate": 1.79432604692363e-05, "loss": 0.2673, "step": 5042 }, { "epoch": 0.23, "grad_norm": 0.3633128097207244, "learning_rate": 1.794235648277367e-05, "loss": 0.245, "step": 5043 }, { "epoch": 0.23, "grad_norm": 0.3910083901651251, "learning_rate": 1.794145232047493e-05, "loss": 0.3106, "step": 5044 }, { "epoch": 0.23, "grad_norm": 0.38116107558308543, "learning_rate": 1.7940547982360085e-05, "loss": 0.1934, "step": 5045 }, { "epoch": 0.23, "grad_norm": 0.5913880272357627, "learning_rate": 1.793964346844916e-05, "loss": 0.4388, "step": 5046 }, { "epoch": 0.23, "grad_norm": 0.3800826217932683, "learning_rate": 1.7938738778762182e-05, "loss": 0.3687, "step": 5047 }, { "epoch": 0.23, "grad_norm": 0.37650891226813976, "learning_rate": 1.793783391331918e-05, "loss": 0.2309, "step": 5048 }, { "epoch": 0.23, "grad_norm": 0.40496256830260635, "learning_rate": 1.7936928872140184e-05, "loss": 0.2843, "step": 5049 }, { "epoch": 0.23, "grad_norm": 0.4622874042245147, "learning_rate": 1.7936023655245235e-05, "loss": 0.309, "step": 5050 }, { "epoch": 0.23, "grad_norm": 0.42126213902983145, "learning_rate": 1.7935118262654373e-05, "loss": 0.3017, "step": 5051 }, { "epoch": 0.23, "grad_norm": 0.33526525955823777, "learning_rate": 1.7934212694387633e-05, "loss": 0.2861, "step": 5052 }, { "epoch": 0.23, "grad_norm": 0.6335455119044751, "learning_rate": 1.793330695046508e-05, "loss": 0.4684, "step": 5053 }, { "epoch": 0.23, "grad_norm": 0.4621370173461592, "learning_rate": 1.793240103090675e-05, "loss": 0.2966, "step": 5054 }, { "epoch": 0.23, "grad_norm": 0.29275487649549736, "learning_rate": 1.793149493573271e-05, "loss": 0.2014, "step": 5055 }, { "epoch": 0.23, "grad_norm": 0.3700935017754927, "learning_rate": 1.7930588664963017e-05, "loss": 0.2722, "step": 5056 }, { "epoch": 0.23, "grad_norm": 0.4808787813036029, "learning_rate": 1.7929682218617733e-05, "loss": 0.309, "step": 5057 }, { "epoch": 0.23, "grad_norm": 0.5245466849790029, "learning_rate": 1.7928775596716925e-05, "loss": 0.3167, "step": 5058 }, { "epoch": 0.23, "grad_norm": 0.37717047325476427, "learning_rate": 1.7927868799280673e-05, "loss": 0.3543, "step": 5059 }, { "epoch": 0.23, "grad_norm": 0.4810216205367915, "learning_rate": 1.7926961826329043e-05, "loss": 0.2915, "step": 5060 }, { "epoch": 0.23, "grad_norm": 0.4879956420441694, "learning_rate": 1.792605467788212e-05, "loss": 0.2724, "step": 5061 }, { "epoch": 0.23, "grad_norm": 0.34816355175159086, "learning_rate": 1.7925147353959985e-05, "loss": 0.2283, "step": 5062 }, { "epoch": 0.23, "grad_norm": 0.4718450994324635, "learning_rate": 1.7924239854582725e-05, "loss": 0.3119, "step": 5063 }, { "epoch": 0.23, "grad_norm": 0.4352988898258253, "learning_rate": 1.7923332179770433e-05, "loss": 0.3155, "step": 5064 }, { "epoch": 0.23, "grad_norm": 0.7512087345064935, "learning_rate": 1.79224243295432e-05, "loss": 0.3957, "step": 5065 }, { "epoch": 0.23, "grad_norm": 0.6689176330344204, "learning_rate": 1.7921516303921132e-05, "loss": 0.4244, "step": 5066 }, { "epoch": 0.23, "grad_norm": 0.369814947764546, "learning_rate": 1.7920608102924326e-05, "loss": 0.3349, "step": 5067 }, { "epoch": 0.23, "grad_norm": 0.2786512559484043, "learning_rate": 1.7919699726572893e-05, "loss": 0.1647, "step": 5068 }, { "epoch": 0.23, "grad_norm": 0.5902070516974034, "learning_rate": 1.791879117488694e-05, "loss": 0.3423, "step": 5069 }, { "epoch": 0.23, "grad_norm": 0.47220820240241557, "learning_rate": 1.7917882447886585e-05, "loss": 0.3452, "step": 5070 }, { "epoch": 0.23, "grad_norm": 0.4081484900250525, "learning_rate": 1.7916973545591937e-05, "loss": 0.2614, "step": 5071 }, { "epoch": 0.23, "grad_norm": 0.6746238677439688, "learning_rate": 1.791606446802313e-05, "loss": 0.3361, "step": 5072 }, { "epoch": 0.23, "grad_norm": 0.44215829655231037, "learning_rate": 1.7915155215200285e-05, "loss": 0.323, "step": 5073 }, { "epoch": 0.23, "grad_norm": 0.2944637006754108, "learning_rate": 1.7914245787143532e-05, "loss": 0.1453, "step": 5074 }, { "epoch": 0.23, "grad_norm": 0.2984632359789537, "learning_rate": 1.7913336183873006e-05, "loss": 0.2908, "step": 5075 }, { "epoch": 0.23, "grad_norm": 0.5945538180873058, "learning_rate": 1.7912426405408843e-05, "loss": 0.4102, "step": 5076 }, { "epoch": 0.23, "grad_norm": 0.4926857362317913, "learning_rate": 1.7911516451771185e-05, "loss": 0.3105, "step": 5077 }, { "epoch": 0.23, "grad_norm": 0.40201477451956613, "learning_rate": 1.791060632298018e-05, "loss": 0.2144, "step": 5078 }, { "epoch": 0.23, "grad_norm": 0.47361384724889793, "learning_rate": 1.7909696019055973e-05, "loss": 0.3262, "step": 5079 }, { "epoch": 0.23, "grad_norm": 0.6172385438899625, "learning_rate": 1.790878554001872e-05, "loss": 0.4132, "step": 5080 }, { "epoch": 0.23, "grad_norm": 0.8913202990221846, "learning_rate": 1.790787488588858e-05, "loss": 0.3383, "step": 5081 }, { "epoch": 0.23, "grad_norm": 0.39622299418267953, "learning_rate": 1.7906964056685706e-05, "loss": 0.3411, "step": 5082 }, { "epoch": 0.23, "grad_norm": 0.3015152379408921, "learning_rate": 1.7906053052430275e-05, "loss": 0.2321, "step": 5083 }, { "epoch": 0.23, "grad_norm": 0.3814474536597992, "learning_rate": 1.7905141873142448e-05, "loss": 0.1816, "step": 5084 }, { "epoch": 0.23, "grad_norm": 0.46192952313076796, "learning_rate": 1.7904230518842398e-05, "loss": 0.3434, "step": 5085 }, { "epoch": 0.23, "grad_norm": 0.6123851064077006, "learning_rate": 1.79033189895503e-05, "loss": 0.3976, "step": 5086 }, { "epoch": 0.23, "grad_norm": 0.5192329962608332, "learning_rate": 1.7902407285286337e-05, "loss": 0.3396, "step": 5087 }, { "epoch": 0.23, "grad_norm": 0.45111407784528285, "learning_rate": 1.79014954060707e-05, "loss": 0.2908, "step": 5088 }, { "epoch": 0.23, "grad_norm": 0.3495230606972884, "learning_rate": 1.7900583351923565e-05, "loss": 0.1891, "step": 5089 }, { "epoch": 0.23, "grad_norm": 0.7658119751439924, "learning_rate": 1.789967112286513e-05, "loss": 0.4497, "step": 5090 }, { "epoch": 0.23, "grad_norm": 0.33489582883930885, "learning_rate": 1.789875871891559e-05, "loss": 0.2611, "step": 5091 }, { "epoch": 0.23, "grad_norm": 0.8286510008622918, "learning_rate": 1.7897846140095142e-05, "loss": 0.5731, "step": 5092 }, { "epoch": 0.23, "grad_norm": 0.5505761213039706, "learning_rate": 1.7896933386423998e-05, "loss": 0.3915, "step": 5093 }, { "epoch": 0.23, "grad_norm": 0.4030146431128897, "learning_rate": 1.7896020457922356e-05, "loss": 0.233, "step": 5094 }, { "epoch": 0.23, "grad_norm": 0.32751718753949155, "learning_rate": 1.789510735461043e-05, "loss": 0.2758, "step": 5095 }, { "epoch": 0.23, "grad_norm": 0.3918692206972656, "learning_rate": 1.7894194076508443e-05, "loss": 0.1948, "step": 5096 }, { "epoch": 0.23, "grad_norm": 0.4228737723619419, "learning_rate": 1.7893280623636605e-05, "loss": 0.2572, "step": 5097 }, { "epoch": 0.23, "grad_norm": 0.5845443932747214, "learning_rate": 1.7892366996015138e-05, "loss": 0.4071, "step": 5098 }, { "epoch": 0.23, "grad_norm": 0.4928719272583364, "learning_rate": 1.7891453193664276e-05, "loss": 0.3476, "step": 5099 }, { "epoch": 0.23, "grad_norm": 0.445736013525592, "learning_rate": 1.7890539216604245e-05, "loss": 0.3355, "step": 5100 }, { "epoch": 0.23, "grad_norm": 0.20490259686736165, "learning_rate": 1.788962506485528e-05, "loss": 0.1284, "step": 5101 }, { "epoch": 0.23, "grad_norm": 0.5838090314548726, "learning_rate": 1.7888710738437622e-05, "loss": 0.4154, "step": 5102 }, { "epoch": 0.23, "grad_norm": 0.3826177297512074, "learning_rate": 1.788779623737151e-05, "loss": 0.2612, "step": 5103 }, { "epoch": 0.23, "grad_norm": 0.8356145042914871, "learning_rate": 1.7886881561677195e-05, "loss": 0.3883, "step": 5104 }, { "epoch": 0.23, "grad_norm": 0.8754482570939112, "learning_rate": 1.788596671137492e-05, "loss": 0.4632, "step": 5105 }, { "epoch": 0.23, "grad_norm": 0.39233902738026744, "learning_rate": 1.7885051686484942e-05, "loss": 0.3112, "step": 5106 }, { "epoch": 0.23, "grad_norm": 0.42263745102169586, "learning_rate": 1.788413648702752e-05, "loss": 0.2787, "step": 5107 }, { "epoch": 0.23, "grad_norm": 0.3405670402336814, "learning_rate": 1.7883221113022916e-05, "loss": 0.206, "step": 5108 }, { "epoch": 0.23, "grad_norm": 0.4944128898147866, "learning_rate": 1.78823055644914e-05, "loss": 0.3103, "step": 5109 }, { "epoch": 0.23, "grad_norm": 1.2349620342328662, "learning_rate": 1.7881389841453227e-05, "loss": 0.4121, "step": 5110 }, { "epoch": 0.23, "grad_norm": 0.4201018126285985, "learning_rate": 1.7880473943928684e-05, "loss": 0.3325, "step": 5111 }, { "epoch": 0.23, "grad_norm": 0.4304973261267877, "learning_rate": 1.787955787193804e-05, "loss": 0.3227, "step": 5112 }, { "epoch": 0.23, "grad_norm": 1.0973808088007153, "learning_rate": 1.787864162550158e-05, "loss": 0.6788, "step": 5113 }, { "epoch": 0.23, "grad_norm": 0.26246339099638244, "learning_rate": 1.7877725204639587e-05, "loss": 0.1763, "step": 5114 }, { "epoch": 0.23, "grad_norm": 0.48361457418978715, "learning_rate": 1.7876808609372355e-05, "loss": 0.3104, "step": 5115 }, { "epoch": 0.24, "grad_norm": 1.6364090147413717, "learning_rate": 1.787589183972017e-05, "loss": 0.5048, "step": 5116 }, { "epoch": 0.24, "grad_norm": 0.5701602069864491, "learning_rate": 1.787497489570333e-05, "loss": 0.3161, "step": 5117 }, { "epoch": 0.24, "grad_norm": 0.43907752585742543, "learning_rate": 1.7874057777342134e-05, "loss": 0.2893, "step": 5118 }, { "epoch": 0.24, "grad_norm": 0.43587331370566484, "learning_rate": 1.787314048465689e-05, "loss": 0.3218, "step": 5119 }, { "epoch": 0.24, "grad_norm": 0.4042903562503827, "learning_rate": 1.7872223017667906e-05, "loss": 0.1801, "step": 5120 }, { "epoch": 0.24, "grad_norm": 0.47496957419460945, "learning_rate": 1.787130537639549e-05, "loss": 0.2894, "step": 5121 }, { "epoch": 0.24, "grad_norm": 0.6384442801939174, "learning_rate": 1.7870387560859958e-05, "loss": 0.3961, "step": 5122 }, { "epoch": 0.24, "grad_norm": 0.5158052504200386, "learning_rate": 1.786946957108163e-05, "loss": 0.3037, "step": 5123 }, { "epoch": 0.24, "grad_norm": 0.43578381106729347, "learning_rate": 1.7868551407080832e-05, "loss": 0.3334, "step": 5124 }, { "epoch": 0.24, "grad_norm": 1.2784151617131547, "learning_rate": 1.7867633068877892e-05, "loss": 0.6563, "step": 5125 }, { "epoch": 0.24, "grad_norm": 0.33958296227831175, "learning_rate": 1.7866714556493136e-05, "loss": 0.2788, "step": 5126 }, { "epoch": 0.24, "grad_norm": 0.3564227634273513, "learning_rate": 1.7865795869946903e-05, "loss": 0.2602, "step": 5127 }, { "epoch": 0.24, "grad_norm": 0.478661209508023, "learning_rate": 1.786487700925953e-05, "loss": 0.3011, "step": 5128 }, { "epoch": 0.24, "grad_norm": 1.1740255193604194, "learning_rate": 1.786395797445136e-05, "loss": 0.7691, "step": 5129 }, { "epoch": 0.24, "grad_norm": 0.43313948420681764, "learning_rate": 1.786303876554274e-05, "loss": 0.2257, "step": 5130 }, { "epoch": 0.24, "grad_norm": 0.4843830762632465, "learning_rate": 1.786211938255402e-05, "loss": 0.3691, "step": 5131 }, { "epoch": 0.24, "grad_norm": 0.6144764355539256, "learning_rate": 1.7861199825505556e-05, "loss": 0.4631, "step": 5132 }, { "epoch": 0.24, "grad_norm": 0.3450608236827846, "learning_rate": 1.78602800944177e-05, "loss": 0.2027, "step": 5133 }, { "epoch": 0.24, "grad_norm": 0.29344124801121907, "learning_rate": 1.7859360189310825e-05, "loss": 0.2224, "step": 5134 }, { "epoch": 0.24, "grad_norm": 0.6109325224891187, "learning_rate": 1.7858440110205286e-05, "loss": 0.4442, "step": 5135 }, { "epoch": 0.24, "grad_norm": 0.37220596204716855, "learning_rate": 1.785751985712146e-05, "loss": 0.2169, "step": 5136 }, { "epoch": 0.24, "grad_norm": 0.7527615457393293, "learning_rate": 1.7856599430079714e-05, "loss": 0.499, "step": 5137 }, { "epoch": 0.24, "grad_norm": 0.4947554977141729, "learning_rate": 1.7855678829100432e-05, "loss": 0.369, "step": 5138 }, { "epoch": 0.24, "grad_norm": 0.33828890534809214, "learning_rate": 1.785475805420399e-05, "loss": 0.2746, "step": 5139 }, { "epoch": 0.24, "grad_norm": 0.2813207333207209, "learning_rate": 1.7853837105410777e-05, "loss": 0.1462, "step": 5140 }, { "epoch": 0.24, "grad_norm": 1.374539679226561, "learning_rate": 1.785291598274118e-05, "loss": 0.7958, "step": 5141 }, { "epoch": 0.24, "grad_norm": 0.40936958480041513, "learning_rate": 1.7851994686215592e-05, "loss": 0.2704, "step": 5142 }, { "epoch": 0.24, "grad_norm": 0.42658478899250896, "learning_rate": 1.7851073215854406e-05, "loss": 0.2757, "step": 5143 }, { "epoch": 0.24, "grad_norm": 0.8468372190875213, "learning_rate": 1.785015157167803e-05, "loss": 0.5011, "step": 5144 }, { "epoch": 0.24, "grad_norm": 0.3959085581442702, "learning_rate": 1.784922975370686e-05, "loss": 0.2786, "step": 5145 }, { "epoch": 0.24, "grad_norm": 0.2545993242185012, "learning_rate": 1.784830776196131e-05, "loss": 0.1411, "step": 5146 }, { "epoch": 0.24, "grad_norm": 0.569926780503696, "learning_rate": 1.7847385596461794e-05, "loss": 0.3941, "step": 5147 }, { "epoch": 0.24, "grad_norm": 0.38102671413681816, "learning_rate": 1.7846463257228722e-05, "loss": 0.2867, "step": 5148 }, { "epoch": 0.24, "grad_norm": 1.0898220587915632, "learning_rate": 1.7845540744282518e-05, "loss": 0.3872, "step": 5149 }, { "epoch": 0.24, "grad_norm": 0.4264252293970126, "learning_rate": 1.7844618057643602e-05, "loss": 0.3491, "step": 5150 }, { "epoch": 0.24, "grad_norm": 0.4066966923141595, "learning_rate": 1.78436951973324e-05, "loss": 0.2533, "step": 5151 }, { "epoch": 0.24, "grad_norm": 0.2875041928318243, "learning_rate": 1.784277216336935e-05, "loss": 0.1999, "step": 5152 }, { "epoch": 0.24, "grad_norm": 0.5074768042131431, "learning_rate": 1.784184895577488e-05, "loss": 0.3351, "step": 5153 }, { "epoch": 0.24, "grad_norm": 0.4559811337480079, "learning_rate": 1.7840925574569437e-05, "loss": 0.2716, "step": 5154 }, { "epoch": 0.24, "grad_norm": 0.622990941790311, "learning_rate": 1.7840002019773456e-05, "loss": 0.3709, "step": 5155 }, { "epoch": 0.24, "grad_norm": 0.9347673302327928, "learning_rate": 1.7839078291407384e-05, "loss": 0.3725, "step": 5156 }, { "epoch": 0.24, "grad_norm": 0.4488739714906619, "learning_rate": 1.7838154389491677e-05, "loss": 0.3136, "step": 5157 }, { "epoch": 0.24, "grad_norm": 0.32927789280788533, "learning_rate": 1.7837230314046787e-05, "loss": 0.271, "step": 5158 }, { "epoch": 0.24, "grad_norm": 1.4351351690476724, "learning_rate": 1.783630606509317e-05, "loss": 0.58, "step": 5159 }, { "epoch": 0.24, "grad_norm": 0.31167215579178853, "learning_rate": 1.783538164265129e-05, "loss": 0.2381, "step": 5160 }, { "epoch": 0.24, "grad_norm": 0.7971649239473425, "learning_rate": 1.7834457046741615e-05, "loss": 0.4666, "step": 5161 }, { "epoch": 0.24, "grad_norm": 0.3840986936315121, "learning_rate": 1.7833532277384607e-05, "loss": 0.2925, "step": 5162 }, { "epoch": 0.24, "grad_norm": 0.4072842289363352, "learning_rate": 1.7832607334600747e-05, "loss": 0.2759, "step": 5163 }, { "epoch": 0.24, "grad_norm": 0.4914228074639435, "learning_rate": 1.783168221841051e-05, "loss": 0.283, "step": 5164 }, { "epoch": 0.24, "grad_norm": 1.3469792481722633, "learning_rate": 1.7830756928834377e-05, "loss": 0.5944, "step": 5165 }, { "epoch": 0.24, "grad_norm": 0.3849682926411431, "learning_rate": 1.7829831465892832e-05, "loss": 0.2424, "step": 5166 }, { "epoch": 0.24, "grad_norm": 0.43409061154265643, "learning_rate": 1.7828905829606364e-05, "loss": 0.3787, "step": 5167 }, { "epoch": 0.24, "grad_norm": 0.5112131660296164, "learning_rate": 1.7827980019995468e-05, "loss": 0.309, "step": 5168 }, { "epoch": 0.24, "grad_norm": 0.4182364417305089, "learning_rate": 1.7827054037080638e-05, "loss": 0.1867, "step": 5169 }, { "epoch": 0.24, "grad_norm": 0.349916291432456, "learning_rate": 1.7826127880882375e-05, "loss": 0.3165, "step": 5170 }, { "epoch": 0.24, "grad_norm": 0.8468169688689349, "learning_rate": 1.7825201551421186e-05, "loss": 0.5387, "step": 5171 }, { "epoch": 0.24, "grad_norm": 0.3740907445727186, "learning_rate": 1.7824275048717577e-05, "loss": 0.2271, "step": 5172 }, { "epoch": 0.24, "grad_norm": 0.28387501001331666, "learning_rate": 1.782334837279206e-05, "loss": 0.2224, "step": 5173 }, { "epoch": 0.24, "grad_norm": 0.4554594401129825, "learning_rate": 1.7822421523665145e-05, "loss": 0.3742, "step": 5174 }, { "epoch": 0.24, "grad_norm": 0.35900336698876534, "learning_rate": 1.782149450135736e-05, "loss": 0.2, "step": 5175 }, { "epoch": 0.24, "grad_norm": 0.5932827917613999, "learning_rate": 1.7820567305889228e-05, "loss": 0.4319, "step": 5176 }, { "epoch": 0.24, "grad_norm": 0.7536754323069328, "learning_rate": 1.7819639937281267e-05, "loss": 0.5086, "step": 5177 }, { "epoch": 0.24, "grad_norm": 0.3672583383559498, "learning_rate": 1.7818712395554017e-05, "loss": 0.2915, "step": 5178 }, { "epoch": 0.24, "grad_norm": 0.37015218238611, "learning_rate": 1.7817784680728013e-05, "loss": 0.2806, "step": 5179 }, { "epoch": 0.24, "grad_norm": 0.36906361554153094, "learning_rate": 1.7816856792823792e-05, "loss": 0.1509, "step": 5180 }, { "epoch": 0.24, "grad_norm": 0.45548953693560196, "learning_rate": 1.781592873186189e-05, "loss": 0.2952, "step": 5181 }, { "epoch": 0.24, "grad_norm": 0.36677997833233045, "learning_rate": 1.781500049786286e-05, "loss": 0.254, "step": 5182 }, { "epoch": 0.24, "grad_norm": 0.84822181983163, "learning_rate": 1.7814072090847258e-05, "loss": 0.5074, "step": 5183 }, { "epoch": 0.24, "grad_norm": 0.43773396133924425, "learning_rate": 1.7813143510835625e-05, "loss": 0.3473, "step": 5184 }, { "epoch": 0.24, "grad_norm": 0.32204299906052175, "learning_rate": 1.7812214757848523e-05, "loss": 0.1848, "step": 5185 }, { "epoch": 0.24, "grad_norm": 0.31811909019134615, "learning_rate": 1.7811285831906523e-05, "loss": 0.2868, "step": 5186 }, { "epoch": 0.24, "grad_norm": 0.7255452545514577, "learning_rate": 1.781035673303018e-05, "loss": 0.4307, "step": 5187 }, { "epoch": 0.24, "grad_norm": 0.4352985348854899, "learning_rate": 1.7809427461240068e-05, "loss": 0.2599, "step": 5188 }, { "epoch": 0.24, "grad_norm": 0.6703747768051963, "learning_rate": 1.7808498016556755e-05, "loss": 0.4372, "step": 5189 }, { "epoch": 0.24, "grad_norm": 0.43465890712938987, "learning_rate": 1.7807568399000824e-05, "loss": 0.3044, "step": 5190 }, { "epoch": 0.24, "grad_norm": 0.464771689421088, "learning_rate": 1.7806638608592855e-05, "loss": 0.3722, "step": 5191 }, { "epoch": 0.24, "grad_norm": 0.22478242378053584, "learning_rate": 1.780570864535343e-05, "loss": 0.0757, "step": 5192 }, { "epoch": 0.24, "grad_norm": 0.6888541845162903, "learning_rate": 1.7804778509303136e-05, "loss": 0.395, "step": 5193 }, { "epoch": 0.24, "grad_norm": 0.35249292241742775, "learning_rate": 1.7803848200462573e-05, "loss": 0.311, "step": 5194 }, { "epoch": 0.24, "grad_norm": 0.6874776519898484, "learning_rate": 1.780291771885233e-05, "loss": 0.3892, "step": 5195 }, { "epoch": 0.24, "grad_norm": 0.691690643929854, "learning_rate": 1.780198706449301e-05, "loss": 0.4044, "step": 5196 }, { "epoch": 0.24, "grad_norm": 0.3457667817261081, "learning_rate": 1.7801056237405213e-05, "loss": 0.2646, "step": 5197 }, { "epoch": 0.24, "grad_norm": 0.30394760569388724, "learning_rate": 1.7800125237609555e-05, "loss": 0.2033, "step": 5198 }, { "epoch": 0.24, "grad_norm": 0.5951573523861192, "learning_rate": 1.7799194065126636e-05, "loss": 0.3548, "step": 5199 }, { "epoch": 0.24, "grad_norm": 0.43618286588987465, "learning_rate": 1.7798262719977085e-05, "loss": 0.3162, "step": 5200 }, { "epoch": 0.24, "grad_norm": 0.7338380277663991, "learning_rate": 1.7797331202181507e-05, "loss": 0.4763, "step": 5201 }, { "epoch": 0.24, "grad_norm": 0.28877046688066665, "learning_rate": 1.7796399511760534e-05, "loss": 0.2282, "step": 5202 }, { "epoch": 0.24, "grad_norm": 0.6705501276530482, "learning_rate": 1.779546764873479e-05, "loss": 0.4201, "step": 5203 }, { "epoch": 0.24, "grad_norm": 0.33688539306247717, "learning_rate": 1.7794535613124907e-05, "loss": 0.21, "step": 5204 }, { "epoch": 0.24, "grad_norm": 0.5948832419149802, "learning_rate": 1.7793603404951514e-05, "loss": 0.3109, "step": 5205 }, { "epoch": 0.24, "grad_norm": 0.3289715523539774, "learning_rate": 1.7792671024235256e-05, "loss": 0.2842, "step": 5206 }, { "epoch": 0.24, "grad_norm": 0.7410247293159987, "learning_rate": 1.779173847099677e-05, "loss": 0.5316, "step": 5207 }, { "epoch": 0.24, "grad_norm": 0.4336635466877937, "learning_rate": 1.7790805745256703e-05, "loss": 0.2624, "step": 5208 }, { "epoch": 0.24, "grad_norm": 0.42537288957620334, "learning_rate": 1.778987284703571e-05, "loss": 0.2952, "step": 5209 }, { "epoch": 0.24, "grad_norm": 0.3385913774892907, "learning_rate": 1.778893977635444e-05, "loss": 0.2628, "step": 5210 }, { "epoch": 0.24, "grad_norm": 0.322044458872734, "learning_rate": 1.778800653323355e-05, "loss": 0.1447, "step": 5211 }, { "epoch": 0.24, "grad_norm": 0.4597256157625955, "learning_rate": 1.7787073117693697e-05, "loss": 0.3632, "step": 5212 }, { "epoch": 0.24, "grad_norm": 1.6048530591331966, "learning_rate": 1.7786139529755556e-05, "loss": 0.8603, "step": 5213 }, { "epoch": 0.24, "grad_norm": 0.39735037428456554, "learning_rate": 1.7785205769439787e-05, "loss": 0.3477, "step": 5214 }, { "epoch": 0.24, "grad_norm": 0.3875795766737469, "learning_rate": 1.7784271836767066e-05, "loss": 0.2253, "step": 5215 }, { "epoch": 0.24, "grad_norm": 0.3021693707839292, "learning_rate": 1.778333773175807e-05, "loss": 0.1854, "step": 5216 }, { "epoch": 0.24, "grad_norm": 0.4412982590001609, "learning_rate": 1.778240345443348e-05, "loss": 0.3371, "step": 5217 }, { "epoch": 0.24, "grad_norm": 0.3482669023945623, "learning_rate": 1.7781469004813977e-05, "loss": 0.2355, "step": 5218 }, { "epoch": 0.24, "grad_norm": 0.6817499112254342, "learning_rate": 1.7780534382920248e-05, "loss": 0.5357, "step": 5219 }, { "epoch": 0.24, "grad_norm": 0.7962834666970865, "learning_rate": 1.7779599588772987e-05, "loss": 0.5543, "step": 5220 }, { "epoch": 0.24, "grad_norm": 0.34746926440815223, "learning_rate": 1.7778664622392892e-05, "loss": 0.2112, "step": 5221 }, { "epoch": 0.24, "grad_norm": 0.39026670054962515, "learning_rate": 1.777772948380066e-05, "loss": 0.3483, "step": 5222 }, { "epoch": 0.24, "grad_norm": 0.7360276453317811, "learning_rate": 1.777679417301699e-05, "loss": 0.4678, "step": 5223 }, { "epoch": 0.24, "grad_norm": 0.23411944466051962, "learning_rate": 1.7775858690062593e-05, "loss": 0.1521, "step": 5224 }, { "epoch": 0.24, "grad_norm": 0.4210616976621871, "learning_rate": 1.777492303495818e-05, "loss": 0.3326, "step": 5225 }, { "epoch": 0.24, "grad_norm": 1.2618000448733846, "learning_rate": 1.7773987207724467e-05, "loss": 0.6404, "step": 5226 }, { "epoch": 0.24, "grad_norm": 0.38689482125096913, "learning_rate": 1.7773051208382167e-05, "loss": 0.2642, "step": 5227 }, { "epoch": 0.24, "grad_norm": 0.8526791239695162, "learning_rate": 1.7772115036952002e-05, "loss": 0.3557, "step": 5228 }, { "epoch": 0.24, "grad_norm": 0.40293118092347097, "learning_rate": 1.7771178693454703e-05, "loss": 0.3249, "step": 5229 }, { "epoch": 0.24, "grad_norm": 0.2879482304325263, "learning_rate": 1.7770242177911e-05, "loss": 0.2228, "step": 5230 }, { "epoch": 0.24, "grad_norm": 0.26981264352907014, "learning_rate": 1.7769305490341623e-05, "loss": 0.1243, "step": 5231 }, { "epoch": 0.24, "grad_norm": 0.725667607709296, "learning_rate": 1.7768368630767313e-05, "loss": 0.4623, "step": 5232 }, { "epoch": 0.24, "grad_norm": 0.41243550211453733, "learning_rate": 1.7767431599208803e-05, "loss": 0.2959, "step": 5233 }, { "epoch": 0.24, "grad_norm": 0.4381323487096433, "learning_rate": 1.776649439568685e-05, "loss": 0.2991, "step": 5234 }, { "epoch": 0.24, "grad_norm": 0.6675486121232714, "learning_rate": 1.7765557020222194e-05, "loss": 0.4056, "step": 5235 }, { "epoch": 0.24, "grad_norm": 0.4096918069581638, "learning_rate": 1.7764619472835588e-05, "loss": 0.3086, "step": 5236 }, { "epoch": 0.24, "grad_norm": 0.27266475142535906, "learning_rate": 1.7763681753547793e-05, "loss": 0.1912, "step": 5237 }, { "epoch": 0.24, "grad_norm": 0.9210119739105295, "learning_rate": 1.7762743862379568e-05, "loss": 0.5605, "step": 5238 }, { "epoch": 0.24, "grad_norm": 0.42080875466573603, "learning_rate": 1.7761805799351674e-05, "loss": 0.2861, "step": 5239 }, { "epoch": 0.24, "grad_norm": 0.8105802284617355, "learning_rate": 1.7760867564484878e-05, "loss": 0.4493, "step": 5240 }, { "epoch": 0.24, "grad_norm": 0.39538362129150423, "learning_rate": 1.7759929157799956e-05, "loss": 0.2679, "step": 5241 }, { "epoch": 0.24, "grad_norm": 0.3705966075020011, "learning_rate": 1.7758990579317684e-05, "loss": 0.2781, "step": 5242 }, { "epoch": 0.24, "grad_norm": 0.33612533562172037, "learning_rate": 1.7758051829058835e-05, "loss": 0.2386, "step": 5243 }, { "epoch": 0.24, "grad_norm": 1.1810369821346143, "learning_rate": 1.77571129070442e-05, "loss": 0.4231, "step": 5244 }, { "epoch": 0.24, "grad_norm": 0.3659901356406624, "learning_rate": 1.775617381329456e-05, "loss": 0.3135, "step": 5245 }, { "epoch": 0.24, "grad_norm": 0.46470476029951874, "learning_rate": 1.7755234547830707e-05, "loss": 0.3589, "step": 5246 }, { "epoch": 0.24, "grad_norm": 1.0560709295396808, "learning_rate": 1.7754295110673433e-05, "loss": 0.2742, "step": 5247 }, { "epoch": 0.24, "grad_norm": 0.314447804831735, "learning_rate": 1.7753355501843544e-05, "loss": 0.2245, "step": 5248 }, { "epoch": 0.24, "grad_norm": 0.419363263326942, "learning_rate": 1.7752415721361834e-05, "loss": 0.2283, "step": 5249 }, { "epoch": 0.24, "grad_norm": 0.49720558314543606, "learning_rate": 1.775147576924911e-05, "loss": 0.3053, "step": 5250 }, { "epoch": 0.24, "grad_norm": 0.35826283630409983, "learning_rate": 1.7750535645526185e-05, "loss": 0.2912, "step": 5251 }, { "epoch": 0.24, "grad_norm": 0.7105411017383069, "learning_rate": 1.7749595350213873e-05, "loss": 0.5077, "step": 5252 }, { "epoch": 0.24, "grad_norm": 0.4045070288356002, "learning_rate": 1.7748654883332984e-05, "loss": 0.3103, "step": 5253 }, { "epoch": 0.24, "grad_norm": 0.3252566904611039, "learning_rate": 1.7747714244904348e-05, "loss": 0.1992, "step": 5254 }, { "epoch": 0.24, "grad_norm": 0.5219822961456272, "learning_rate": 1.7746773434948786e-05, "loss": 0.3132, "step": 5255 }, { "epoch": 0.24, "grad_norm": 0.8543340044476929, "learning_rate": 1.7745832453487128e-05, "loss": 0.4871, "step": 5256 }, { "epoch": 0.24, "grad_norm": 0.3374603771074712, "learning_rate": 1.7744891300540204e-05, "loss": 0.2129, "step": 5257 }, { "epoch": 0.24, "grad_norm": 0.32247186276274276, "learning_rate": 1.7743949976128848e-05, "loss": 0.3165, "step": 5258 }, { "epoch": 0.24, "grad_norm": 1.037241230234345, "learning_rate": 1.774300848027391e-05, "loss": 0.5874, "step": 5259 }, { "epoch": 0.24, "grad_norm": 0.3553030391734681, "learning_rate": 1.774206681299622e-05, "loss": 0.2151, "step": 5260 }, { "epoch": 0.24, "grad_norm": 0.3177485099226369, "learning_rate": 1.7741124974316633e-05, "loss": 0.2668, "step": 5261 }, { "epoch": 0.24, "grad_norm": 0.9190638380058797, "learning_rate": 1.7740182964256006e-05, "loss": 0.5113, "step": 5262 }, { "epoch": 0.24, "grad_norm": 0.3607663271359514, "learning_rate": 1.7739240782835182e-05, "loss": 0.2741, "step": 5263 }, { "epoch": 0.24, "grad_norm": 0.4515247262454211, "learning_rate": 1.773829843007503e-05, "loss": 0.3381, "step": 5264 }, { "epoch": 0.24, "grad_norm": 0.42496545092196186, "learning_rate": 1.7737355905996406e-05, "loss": 0.3362, "step": 5265 }, { "epoch": 0.24, "grad_norm": 0.4016495691264929, "learning_rate": 1.773641321062018e-05, "loss": 0.2628, "step": 5266 }, { "epoch": 0.24, "grad_norm": 0.9906745612945159, "learning_rate": 1.7735470343967226e-05, "loss": 0.4214, "step": 5267 }, { "epoch": 0.24, "grad_norm": 0.7190889771795383, "learning_rate": 1.773452730605841e-05, "loss": 0.4796, "step": 5268 }, { "epoch": 0.24, "grad_norm": 0.3617645310747818, "learning_rate": 1.773358409691462e-05, "loss": 0.2783, "step": 5269 }, { "epoch": 0.24, "grad_norm": 0.2685715013075121, "learning_rate": 1.7732640716556724e-05, "loss": 0.1528, "step": 5270 }, { "epoch": 0.24, "grad_norm": 0.9698734634096667, "learning_rate": 1.773169716500562e-05, "loss": 0.5998, "step": 5271 }, { "epoch": 0.24, "grad_norm": 0.4426938051822305, "learning_rate": 1.773075344228219e-05, "loss": 0.304, "step": 5272 }, { "epoch": 0.24, "grad_norm": 0.37919730133409146, "learning_rate": 1.7729809548407333e-05, "loss": 0.2818, "step": 5273 }, { "epoch": 0.24, "grad_norm": 0.9676076174283644, "learning_rate": 1.772886548340194e-05, "loss": 0.4784, "step": 5274 }, { "epoch": 0.24, "grad_norm": 0.4434788850840855, "learning_rate": 1.7727921247286916e-05, "loss": 0.2774, "step": 5275 }, { "epoch": 0.24, "grad_norm": 0.259057207680575, "learning_rate": 1.7726976840083163e-05, "loss": 0.1555, "step": 5276 }, { "epoch": 0.24, "grad_norm": 0.4464882984457147, "learning_rate": 1.772603226181159e-05, "loss": 0.351, "step": 5277 }, { "epoch": 0.24, "grad_norm": 0.48389635229113404, "learning_rate": 1.7725087512493112e-05, "loss": 0.3329, "step": 5278 }, { "epoch": 0.24, "grad_norm": 0.6150766030173602, "learning_rate": 1.7724142592148638e-05, "loss": 0.4035, "step": 5279 }, { "epoch": 0.24, "grad_norm": 1.0467302687463638, "learning_rate": 1.7723197500799094e-05, "loss": 0.4174, "step": 5280 }, { "epoch": 0.24, "grad_norm": 0.32914122859711714, "learning_rate": 1.77222522384654e-05, "loss": 0.2756, "step": 5281 }, { "epoch": 0.24, "grad_norm": 0.29164539262552425, "learning_rate": 1.7721306805168482e-05, "loss": 0.2218, "step": 5282 }, { "epoch": 0.24, "grad_norm": 1.0002549553393807, "learning_rate": 1.7720361200929277e-05, "loss": 0.2734, "step": 5283 }, { "epoch": 0.24, "grad_norm": 0.526273043712311, "learning_rate": 1.7719415425768717e-05, "loss": 0.3203, "step": 5284 }, { "epoch": 0.24, "grad_norm": 0.48531547345330445, "learning_rate": 1.7718469479707736e-05, "loss": 0.3528, "step": 5285 }, { "epoch": 0.24, "grad_norm": 0.7371777220393582, "learning_rate": 1.771752336276728e-05, "loss": 0.3691, "step": 5286 }, { "epoch": 0.24, "grad_norm": 0.39809046630610884, "learning_rate": 1.7716577074968294e-05, "loss": 0.3049, "step": 5287 }, { "epoch": 0.24, "grad_norm": 0.39300245671791856, "learning_rate": 1.7715630616331732e-05, "loss": 0.2478, "step": 5288 }, { "epoch": 0.24, "grad_norm": 0.30475467441553045, "learning_rate": 1.7714683986878546e-05, "loss": 0.2109, "step": 5289 }, { "epoch": 0.24, "grad_norm": 0.4939294644596549, "learning_rate": 1.771373718662969e-05, "loss": 0.298, "step": 5290 }, { "epoch": 0.24, "grad_norm": 0.49380525786565277, "learning_rate": 1.7712790215606125e-05, "loss": 0.4032, "step": 5291 }, { "epoch": 0.24, "grad_norm": 0.4937465102377239, "learning_rate": 1.771184307382882e-05, "loss": 0.4249, "step": 5292 }, { "epoch": 0.24, "grad_norm": 0.3509530922577548, "learning_rate": 1.7710895761318742e-05, "loss": 0.2236, "step": 5293 }, { "epoch": 0.24, "grad_norm": 0.3606802472398584, "learning_rate": 1.7709948278096864e-05, "loss": 0.2475, "step": 5294 }, { "epoch": 0.24, "grad_norm": 0.4441923229747742, "learning_rate": 1.7709000624184162e-05, "loss": 0.2267, "step": 5295 }, { "epoch": 0.24, "grad_norm": 0.3730345544866859, "learning_rate": 1.7708052799601616e-05, "loss": 0.2222, "step": 5296 }, { "epoch": 0.24, "grad_norm": 0.3338858572538388, "learning_rate": 1.770710480437021e-05, "loss": 0.3307, "step": 5297 }, { "epoch": 0.24, "grad_norm": 0.8327284263440696, "learning_rate": 1.770615663851093e-05, "loss": 0.5879, "step": 5298 }, { "epoch": 0.24, "grad_norm": 0.36181070825841877, "learning_rate": 1.7705208302044773e-05, "loss": 0.2201, "step": 5299 }, { "epoch": 0.24, "grad_norm": 0.3145369428981611, "learning_rate": 1.7704259794992734e-05, "loss": 0.21, "step": 5300 }, { "epoch": 0.24, "grad_norm": 0.40542346749301694, "learning_rate": 1.7703311117375802e-05, "loss": 0.3475, "step": 5301 }, { "epoch": 0.24, "grad_norm": 0.3542386300371763, "learning_rate": 1.7702362269214987e-05, "loss": 0.2645, "step": 5302 }, { "epoch": 0.24, "grad_norm": 0.5477915690020112, "learning_rate": 1.7701413250531297e-05, "loss": 0.3965, "step": 5303 }, { "epoch": 0.24, "grad_norm": 1.0874136735992839, "learning_rate": 1.770046406134574e-05, "loss": 0.7622, "step": 5304 }, { "epoch": 0.24, "grad_norm": 0.35738715186187187, "learning_rate": 1.769951470167933e-05, "loss": 0.2956, "step": 5305 }, { "epoch": 0.24, "grad_norm": 0.3577696108706678, "learning_rate": 1.7698565171553084e-05, "loss": 0.2537, "step": 5306 }, { "epoch": 0.24, "grad_norm": 0.2835751065905806, "learning_rate": 1.7697615470988028e-05, "loss": 0.2337, "step": 5307 }, { "epoch": 0.24, "grad_norm": 0.5011495333605145, "learning_rate": 1.769666560000518e-05, "loss": 0.3584, "step": 5308 }, { "epoch": 0.24, "grad_norm": 0.35434548498309154, "learning_rate": 1.769571555862558e-05, "loss": 0.2608, "step": 5309 }, { "epoch": 0.24, "grad_norm": 1.4187300353634535, "learning_rate": 1.769476534687025e-05, "loss": 0.8306, "step": 5310 }, { "epoch": 0.24, "grad_norm": 0.8124537410002787, "learning_rate": 1.7693814964760232e-05, "loss": 0.4306, "step": 5311 }, { "epoch": 0.24, "grad_norm": 0.3232773986465274, "learning_rate": 1.769286441231657e-05, "loss": 0.1941, "step": 5312 }, { "epoch": 0.24, "grad_norm": 0.39943158492088676, "learning_rate": 1.7691913689560298e-05, "loss": 0.3028, "step": 5313 }, { "epoch": 0.24, "grad_norm": 0.759021070553651, "learning_rate": 1.7690962796512473e-05, "loss": 0.4193, "step": 5314 }, { "epoch": 0.24, "grad_norm": 0.37068074210510876, "learning_rate": 1.7690011733194147e-05, "loss": 0.2606, "step": 5315 }, { "epoch": 0.24, "grad_norm": 1.4859877427662964, "learning_rate": 1.7689060499626372e-05, "loss": 0.7402, "step": 5316 }, { "epoch": 0.24, "grad_norm": 0.3588662416475269, "learning_rate": 1.768810909583021e-05, "loss": 0.2907, "step": 5317 }, { "epoch": 0.24, "grad_norm": 0.49581248611612044, "learning_rate": 1.7687157521826717e-05, "loss": 0.3456, "step": 5318 }, { "epoch": 0.24, "grad_norm": 1.081077636446496, "learning_rate": 1.7686205777636968e-05, "loss": 0.3914, "step": 5319 }, { "epoch": 0.24, "grad_norm": 0.4724669378778735, "learning_rate": 1.7685253863282034e-05, "loss": 0.2636, "step": 5320 }, { "epoch": 0.24, "grad_norm": 0.4383230350439651, "learning_rate": 1.7684301778782985e-05, "loss": 0.254, "step": 5321 }, { "epoch": 0.24, "grad_norm": 1.1850448914299558, "learning_rate": 1.76833495241609e-05, "loss": 0.5776, "step": 5322 }, { "epoch": 0.24, "grad_norm": 0.7103752962802321, "learning_rate": 1.768239709943686e-05, "loss": 0.4159, "step": 5323 }, { "epoch": 0.24, "grad_norm": 0.4637714608330094, "learning_rate": 1.7681444504631954e-05, "loss": 0.3284, "step": 5324 }, { "epoch": 0.24, "grad_norm": 0.7745097220540436, "learning_rate": 1.768049173976727e-05, "loss": 0.3336, "step": 5325 }, { "epoch": 0.24, "grad_norm": 0.30202984267597993, "learning_rate": 1.7679538804863903e-05, "loss": 0.168, "step": 5326 }, { "epoch": 0.24, "grad_norm": 0.45594436122596893, "learning_rate": 1.7678585699942948e-05, "loss": 0.3128, "step": 5327 }, { "epoch": 0.24, "grad_norm": 0.5566839352451198, "learning_rate": 1.7677632425025506e-05, "loss": 0.4382, "step": 5328 }, { "epoch": 0.24, "grad_norm": 0.39505831684370435, "learning_rate": 1.7676678980132682e-05, "loss": 0.2559, "step": 5329 }, { "epoch": 0.24, "grad_norm": 0.40993040256619195, "learning_rate": 1.7675725365285584e-05, "loss": 0.31, "step": 5330 }, { "epoch": 0.24, "grad_norm": 0.7886156900093502, "learning_rate": 1.7674771580505322e-05, "loss": 0.483, "step": 5331 }, { "epoch": 0.24, "grad_norm": 0.36137530796608164, "learning_rate": 1.767381762581302e-05, "loss": 0.1697, "step": 5332 }, { "epoch": 0.24, "grad_norm": 0.3543864315454361, "learning_rate": 1.7672863501229785e-05, "loss": 0.2562, "step": 5333 }, { "epoch": 0.25, "grad_norm": 1.3811676388884337, "learning_rate": 1.7671909206776755e-05, "loss": 0.7972, "step": 5334 }, { "epoch": 0.25, "grad_norm": 0.6804625592687791, "learning_rate": 1.7670954742475043e-05, "loss": 0.3052, "step": 5335 }, { "epoch": 0.25, "grad_norm": 0.36262471820811604, "learning_rate": 1.7670000108345792e-05, "loss": 0.3055, "step": 5336 }, { "epoch": 0.25, "grad_norm": 0.5399199708988571, "learning_rate": 1.7669045304410125e-05, "loss": 0.391, "step": 5337 }, { "epoch": 0.25, "grad_norm": 0.4891472477728291, "learning_rate": 1.766809033068919e-05, "loss": 0.1823, "step": 5338 }, { "epoch": 0.25, "grad_norm": 0.3811185111368319, "learning_rate": 1.7667135187204122e-05, "loss": 0.2745, "step": 5339 }, { "epoch": 0.25, "grad_norm": 0.6169401152155215, "learning_rate": 1.7666179873976076e-05, "loss": 0.4299, "step": 5340 }, { "epoch": 0.25, "grad_norm": 0.35763136082698405, "learning_rate": 1.7665224391026194e-05, "loss": 0.2768, "step": 5341 }, { "epoch": 0.25, "grad_norm": 0.40930570460072796, "learning_rate": 1.766426873837563e-05, "loss": 0.2608, "step": 5342 }, { "epoch": 0.25, "grad_norm": 0.732089951340725, "learning_rate": 1.7663312916045546e-05, "loss": 0.5222, "step": 5343 }, { "epoch": 0.25, "grad_norm": 0.3911261660624748, "learning_rate": 1.7662356924057097e-05, "loss": 0.2784, "step": 5344 }, { "epoch": 0.25, "grad_norm": 0.3097672211102889, "learning_rate": 1.7661400762431452e-05, "loss": 0.1856, "step": 5345 }, { "epoch": 0.25, "grad_norm": 0.4216428171639803, "learning_rate": 1.766044443118978e-05, "loss": 0.271, "step": 5346 }, { "epoch": 0.25, "grad_norm": 0.7318256855388012, "learning_rate": 1.7659487930353254e-05, "loss": 0.4832, "step": 5347 }, { "epoch": 0.25, "grad_norm": 0.32713674854107844, "learning_rate": 1.7658531259943043e-05, "loss": 0.2473, "step": 5348 }, { "epoch": 0.25, "grad_norm": 0.5035967159093254, "learning_rate": 1.7657574419980332e-05, "loss": 0.352, "step": 5349 }, { "epoch": 0.25, "grad_norm": 1.2419471247231, "learning_rate": 1.7656617410486304e-05, "loss": 0.6467, "step": 5350 }, { "epoch": 0.25, "grad_norm": 0.3762643443660175, "learning_rate": 1.7655660231482146e-05, "loss": 0.2314, "step": 5351 }, { "epoch": 0.25, "grad_norm": 0.6191544290234654, "learning_rate": 1.765470288298905e-05, "loss": 0.4019, "step": 5352 }, { "epoch": 0.25, "grad_norm": 0.4127265787398909, "learning_rate": 1.765374536502821e-05, "loss": 0.3071, "step": 5353 }, { "epoch": 0.25, "grad_norm": 0.362009018621164, "learning_rate": 1.765278767762082e-05, "loss": 0.2875, "step": 5354 }, { "epoch": 0.25, "grad_norm": 0.3155714146758715, "learning_rate": 1.765182982078809e-05, "loss": 0.1206, "step": 5355 }, { "epoch": 0.25, "grad_norm": 0.4476672216955325, "learning_rate": 1.7650871794551224e-05, "loss": 0.3252, "step": 5356 }, { "epoch": 0.25, "grad_norm": 0.4287837833380792, "learning_rate": 1.764991359893143e-05, "loss": 0.289, "step": 5357 }, { "epoch": 0.25, "grad_norm": 0.8789563948301996, "learning_rate": 1.764895523394992e-05, "loss": 0.4322, "step": 5358 }, { "epoch": 0.25, "grad_norm": 0.44726550857237085, "learning_rate": 1.7647996699627917e-05, "loss": 0.3142, "step": 5359 }, { "epoch": 0.25, "grad_norm": 0.40362695607033316, "learning_rate": 1.7647037995986632e-05, "loss": 0.2936, "step": 5360 }, { "epoch": 0.25, "grad_norm": 0.3299130694258084, "learning_rate": 1.7646079123047304e-05, "loss": 0.241, "step": 5361 }, { "epoch": 0.25, "grad_norm": 0.8166023977599337, "learning_rate": 1.7645120080831148e-05, "loss": 0.4047, "step": 5362 }, { "epoch": 0.25, "grad_norm": 0.42804685778601004, "learning_rate": 1.7644160869359404e-05, "loss": 0.3248, "step": 5363 }, { "epoch": 0.25, "grad_norm": 0.49572994572245704, "learning_rate": 1.7643201488653304e-05, "loss": 0.3077, "step": 5364 }, { "epoch": 0.25, "grad_norm": 1.0406711423087707, "learning_rate": 1.7642241938734094e-05, "loss": 0.4977, "step": 5365 }, { "epoch": 0.25, "grad_norm": 0.3972197347625694, "learning_rate": 1.764128221962301e-05, "loss": 0.2901, "step": 5366 }, { "epoch": 0.25, "grad_norm": 0.32096132728343624, "learning_rate": 1.7640322331341303e-05, "loss": 0.1718, "step": 5367 }, { "epoch": 0.25, "grad_norm": 0.4084009645713907, "learning_rate": 1.763936227391022e-05, "loss": 0.2684, "step": 5368 }, { "epoch": 0.25, "grad_norm": 0.4286986063950546, "learning_rate": 1.7638402047351025e-05, "loss": 0.2882, "step": 5369 }, { "epoch": 0.25, "grad_norm": 0.7587476274767847, "learning_rate": 1.7637441651684965e-05, "loss": 0.4841, "step": 5370 }, { "epoch": 0.25, "grad_norm": 0.675007792203134, "learning_rate": 1.7636481086933313e-05, "loss": 0.3884, "step": 5371 }, { "epoch": 0.25, "grad_norm": 0.35202158900089725, "learning_rate": 1.7635520353117325e-05, "loss": 0.2711, "step": 5372 }, { "epoch": 0.25, "grad_norm": 0.2668374327689299, "learning_rate": 1.763455945025828e-05, "loss": 0.1899, "step": 5373 }, { "epoch": 0.25, "grad_norm": 0.750337832601005, "learning_rate": 1.7633598378377445e-05, "loss": 0.4295, "step": 5374 }, { "epoch": 0.25, "grad_norm": 0.3860518586114576, "learning_rate": 1.76326371374961e-05, "loss": 0.2965, "step": 5375 }, { "epoch": 0.25, "grad_norm": 0.5922896011481424, "learning_rate": 1.7631675727635523e-05, "loss": 0.3579, "step": 5376 }, { "epoch": 0.25, "grad_norm": 1.2020795345724793, "learning_rate": 1.7630714148817003e-05, "loss": 0.4417, "step": 5377 }, { "epoch": 0.25, "grad_norm": 0.4298680205314211, "learning_rate": 1.7629752401061827e-05, "loss": 0.258, "step": 5378 }, { "epoch": 0.25, "grad_norm": 0.2817596798236294, "learning_rate": 1.7628790484391284e-05, "loss": 0.1942, "step": 5379 }, { "epoch": 0.25, "grad_norm": 0.4280741300576875, "learning_rate": 1.7627828398826677e-05, "loss": 0.3272, "step": 5380 }, { "epoch": 0.25, "grad_norm": 0.3555957654039311, "learning_rate": 1.7626866144389298e-05, "loss": 0.2068, "step": 5381 }, { "epoch": 0.25, "grad_norm": 0.7844070263543746, "learning_rate": 1.762590372110045e-05, "loss": 0.4738, "step": 5382 }, { "epoch": 0.25, "grad_norm": 1.0673990680732155, "learning_rate": 1.7624941128981447e-05, "loss": 0.6371, "step": 5383 }, { "epoch": 0.25, "grad_norm": 0.3092697260348129, "learning_rate": 1.76239783680536e-05, "loss": 0.2296, "step": 5384 }, { "epoch": 0.25, "grad_norm": 0.27674502266295214, "learning_rate": 1.7623015438338213e-05, "loss": 0.2336, "step": 5385 }, { "epoch": 0.25, "grad_norm": 1.555437244312456, "learning_rate": 1.7622052339856616e-05, "loss": 0.827, "step": 5386 }, { "epoch": 0.25, "grad_norm": 0.37882981261221843, "learning_rate": 1.7621089072630124e-05, "loss": 0.2686, "step": 5387 }, { "epoch": 0.25, "grad_norm": 0.45064335583607196, "learning_rate": 1.7620125636680066e-05, "loss": 0.3539, "step": 5388 }, { "epoch": 0.25, "grad_norm": 1.3016383335616688, "learning_rate": 1.761916203202777e-05, "loss": 0.7861, "step": 5389 }, { "epoch": 0.25, "grad_norm": 0.3699306466568561, "learning_rate": 1.7618198258694573e-05, "loss": 0.2104, "step": 5390 }, { "epoch": 0.25, "grad_norm": 0.3166011455458693, "learning_rate": 1.7617234316701805e-05, "loss": 0.1793, "step": 5391 }, { "epoch": 0.25, "grad_norm": 0.40494307795963713, "learning_rate": 1.7616270206070814e-05, "loss": 0.3503, "step": 5392 }, { "epoch": 0.25, "grad_norm": 0.3613131161632067, "learning_rate": 1.761530592682294e-05, "loss": 0.2929, "step": 5393 }, { "epoch": 0.25, "grad_norm": 0.6878601471951274, "learning_rate": 1.7614341478979534e-05, "loss": 0.4139, "step": 5394 }, { "epoch": 0.25, "grad_norm": 0.5634946063770254, "learning_rate": 1.7613376862561945e-05, "loss": 0.3851, "step": 5395 }, { "epoch": 0.25, "grad_norm": 0.4720377988624362, "learning_rate": 1.761241207759153e-05, "loss": 0.3021, "step": 5396 }, { "epoch": 0.25, "grad_norm": 0.28455155420756945, "learning_rate": 1.761144712408965e-05, "loss": 0.1755, "step": 5397 }, { "epoch": 0.25, "grad_norm": 0.7821847278938959, "learning_rate": 1.7610482002077664e-05, "loss": 0.4859, "step": 5398 }, { "epoch": 0.25, "grad_norm": 0.6151619713153066, "learning_rate": 1.7609516711576945e-05, "loss": 0.4159, "step": 5399 }, { "epoch": 0.25, "grad_norm": 0.342258907693212, "learning_rate": 1.7608551252608856e-05, "loss": 0.2748, "step": 5400 }, { "epoch": 0.25, "grad_norm": 1.3379169610531831, "learning_rate": 1.7607585625194777e-05, "loss": 0.717, "step": 5401 }, { "epoch": 0.25, "grad_norm": 0.4483810569423919, "learning_rate": 1.7606619829356085e-05, "loss": 0.2805, "step": 5402 }, { "epoch": 0.25, "grad_norm": 0.2538079070203393, "learning_rate": 1.7605653865114164e-05, "loss": 0.1792, "step": 5403 }, { "epoch": 0.25, "grad_norm": 0.5764539780612977, "learning_rate": 1.7604687732490395e-05, "loss": 0.3614, "step": 5404 }, { "epoch": 0.25, "grad_norm": 0.4828898287129161, "learning_rate": 1.7603721431506166e-05, "loss": 0.3602, "step": 5405 }, { "epoch": 0.25, "grad_norm": 0.551999541937087, "learning_rate": 1.760275496218288e-05, "loss": 0.4022, "step": 5406 }, { "epoch": 0.25, "grad_norm": 0.49868403841116477, "learning_rate": 1.7601788324541923e-05, "loss": 0.3023, "step": 5407 }, { "epoch": 0.25, "grad_norm": 0.3641138768088273, "learning_rate": 1.7600821518604697e-05, "loss": 0.2889, "step": 5408 }, { "epoch": 0.25, "grad_norm": 0.6178808255566147, "learning_rate": 1.759985454439261e-05, "loss": 0.3469, "step": 5409 }, { "epoch": 0.25, "grad_norm": 0.3737169623201628, "learning_rate": 1.7598887401927067e-05, "loss": 0.2347, "step": 5410 }, { "epoch": 0.25, "grad_norm": 0.46720364496599337, "learning_rate": 1.7597920091229485e-05, "loss": 0.3177, "step": 5411 }, { "epoch": 0.25, "grad_norm": 0.39455697327976896, "learning_rate": 1.759695261232127e-05, "loss": 0.2881, "step": 5412 }, { "epoch": 0.25, "grad_norm": 1.0322737334721312, "learning_rate": 1.7595984965223847e-05, "loss": 0.4857, "step": 5413 }, { "epoch": 0.25, "grad_norm": 0.7169512191447588, "learning_rate": 1.759501714995864e-05, "loss": 0.3822, "step": 5414 }, { "epoch": 0.25, "grad_norm": 0.4219749073623418, "learning_rate": 1.7594049166547073e-05, "loss": 0.3461, "step": 5415 }, { "epoch": 0.25, "grad_norm": 0.32681878773960116, "learning_rate": 1.7593081015010576e-05, "loss": 0.2607, "step": 5416 }, { "epoch": 0.25, "grad_norm": 0.34474579751439477, "learning_rate": 1.7592112695370583e-05, "loss": 0.1723, "step": 5417 }, { "epoch": 0.25, "grad_norm": 0.518080056678753, "learning_rate": 1.7591144207648537e-05, "loss": 0.3486, "step": 5418 }, { "epoch": 0.25, "grad_norm": 0.5348655738030185, "learning_rate": 1.759017555186587e-05, "loss": 0.4202, "step": 5419 }, { "epoch": 0.25, "grad_norm": 0.39325533270405216, "learning_rate": 1.7589206728044033e-05, "loss": 0.2187, "step": 5420 }, { "epoch": 0.25, "grad_norm": 0.3914136878533845, "learning_rate": 1.7588237736204473e-05, "loss": 0.3559, "step": 5421 }, { "epoch": 0.25, "grad_norm": 0.6978563700692048, "learning_rate": 1.7587268576368644e-05, "loss": 0.4926, "step": 5422 }, { "epoch": 0.25, "grad_norm": 0.2376020644869763, "learning_rate": 1.7586299248558002e-05, "loss": 0.1381, "step": 5423 }, { "epoch": 0.25, "grad_norm": 0.37996293801078646, "learning_rate": 1.7585329752794005e-05, "loss": 0.3394, "step": 5424 }, { "epoch": 0.25, "grad_norm": 1.194114609951105, "learning_rate": 1.758436008909812e-05, "loss": 0.7244, "step": 5425 }, { "epoch": 0.25, "grad_norm": 0.3303461977181814, "learning_rate": 1.758339025749181e-05, "loss": 0.2354, "step": 5426 }, { "epoch": 0.25, "grad_norm": 0.4878896305020034, "learning_rate": 1.758242025799655e-05, "loss": 0.4082, "step": 5427 }, { "epoch": 0.25, "grad_norm": 0.3890857832072473, "learning_rate": 1.758145009063381e-05, "loss": 0.3401, "step": 5428 }, { "epoch": 0.25, "grad_norm": 0.2596334678605455, "learning_rate": 1.758047975542508e-05, "loss": 0.1725, "step": 5429 }, { "epoch": 0.25, "grad_norm": 0.6231709559718339, "learning_rate": 1.7579509252391828e-05, "loss": 0.4401, "step": 5430 }, { "epoch": 0.25, "grad_norm": 0.3003304733335632, "learning_rate": 1.7578538581555547e-05, "loss": 0.2721, "step": 5431 }, { "epoch": 0.25, "grad_norm": 0.7164788872061879, "learning_rate": 1.757756774293773e-05, "loss": 0.4277, "step": 5432 }, { "epoch": 0.25, "grad_norm": 0.3870120340850268, "learning_rate": 1.757659673655986e-05, "loss": 0.2765, "step": 5433 }, { "epoch": 0.25, "grad_norm": 0.8221862114737117, "learning_rate": 1.7575625562443446e-05, "loss": 0.5428, "step": 5434 }, { "epoch": 0.25, "grad_norm": 0.5011581808411303, "learning_rate": 1.7574654220609982e-05, "loss": 0.3634, "step": 5435 }, { "epoch": 0.25, "grad_norm": 0.3669117426025068, "learning_rate": 1.7573682711080976e-05, "loss": 0.2468, "step": 5436 }, { "epoch": 0.25, "grad_norm": 0.33691250694715735, "learning_rate": 1.757271103387793e-05, "loss": 0.173, "step": 5437 }, { "epoch": 0.25, "grad_norm": 0.6379642204530226, "learning_rate": 1.7571739189022365e-05, "loss": 0.4672, "step": 5438 }, { "epoch": 0.25, "grad_norm": 0.33080544044469334, "learning_rate": 1.757076717653579e-05, "loss": 0.2518, "step": 5439 }, { "epoch": 0.25, "grad_norm": 0.6051379155582781, "learning_rate": 1.7569794996439723e-05, "loss": 0.4238, "step": 5440 }, { "epoch": 0.25, "grad_norm": 1.0151758589234574, "learning_rate": 1.7568822648755698e-05, "loss": 0.435, "step": 5441 }, { "epoch": 0.25, "grad_norm": 0.2695400837074594, "learning_rate": 1.7567850133505228e-05, "loss": 0.199, "step": 5442 }, { "epoch": 0.25, "grad_norm": 0.344835665759451, "learning_rate": 1.7566877450709853e-05, "loss": 0.2654, "step": 5443 }, { "epoch": 0.25, "grad_norm": 0.39409050533222306, "learning_rate": 1.7565904600391107e-05, "loss": 0.3045, "step": 5444 }, { "epoch": 0.25, "grad_norm": 0.4677493086821581, "learning_rate": 1.7564931582570518e-05, "loss": 0.3158, "step": 5445 }, { "epoch": 0.25, "grad_norm": 0.5786072097585501, "learning_rate": 1.756395839726964e-05, "loss": 0.381, "step": 5446 }, { "epoch": 0.25, "grad_norm": 0.4359064415087416, "learning_rate": 1.7562985044510013e-05, "loss": 0.3011, "step": 5447 }, { "epoch": 0.25, "grad_norm": 0.4554207174239243, "learning_rate": 1.7562011524313187e-05, "loss": 0.3203, "step": 5448 }, { "epoch": 0.25, "grad_norm": 0.43245895209493135, "learning_rate": 1.7561037836700712e-05, "loss": 0.2733, "step": 5449 }, { "epoch": 0.25, "grad_norm": 0.39512325259801495, "learning_rate": 1.7560063981694147e-05, "loss": 0.2513, "step": 5450 }, { "epoch": 0.25, "grad_norm": 0.4566824497437569, "learning_rate": 1.7559089959315055e-05, "loss": 0.3156, "step": 5451 }, { "epoch": 0.25, "grad_norm": 0.4423352364127872, "learning_rate": 1.7558115769584993e-05, "loss": 0.2668, "step": 5452 }, { "epoch": 0.25, "grad_norm": 0.8350792917082918, "learning_rate": 1.7557141412525537e-05, "loss": 0.3648, "step": 5453 }, { "epoch": 0.25, "grad_norm": 0.4237963037705092, "learning_rate": 1.7556166888158247e-05, "loss": 0.3163, "step": 5454 }, { "epoch": 0.25, "grad_norm": 0.3718714647792502, "learning_rate": 1.755519219650471e-05, "loss": 0.3253, "step": 5455 }, { "epoch": 0.25, "grad_norm": 0.43311022096021995, "learning_rate": 1.7554217337586498e-05, "loss": 0.2025, "step": 5456 }, { "epoch": 0.25, "grad_norm": 0.33321326062518913, "learning_rate": 1.7553242311425193e-05, "loss": 0.2365, "step": 5457 }, { "epoch": 0.25, "grad_norm": 3.2902275822575535, "learning_rate": 1.7552267118042387e-05, "loss": 0.8229, "step": 5458 }, { "epoch": 0.25, "grad_norm": 0.44687465848182856, "learning_rate": 1.7551291757459665e-05, "loss": 0.2649, "step": 5459 }, { "epoch": 0.25, "grad_norm": 0.7245716937805797, "learning_rate": 1.755031622969862e-05, "loss": 0.3053, "step": 5460 }, { "epoch": 0.25, "grad_norm": 0.7769051688167621, "learning_rate": 1.7549340534780852e-05, "loss": 0.4785, "step": 5461 }, { "epoch": 0.25, "grad_norm": 1.4746107021395496, "learning_rate": 1.754836467272796e-05, "loss": 0.2147, "step": 5462 }, { "epoch": 0.25, "grad_norm": 0.47671657402352136, "learning_rate": 1.754738864356155e-05, "loss": 0.3301, "step": 5463 }, { "epoch": 0.25, "grad_norm": 0.43391529410615104, "learning_rate": 1.754641244730323e-05, "loss": 0.3225, "step": 5464 }, { "epoch": 0.25, "grad_norm": 0.5811773940306859, "learning_rate": 1.754543608397461e-05, "loss": 0.2113, "step": 5465 }, { "epoch": 0.25, "grad_norm": 0.4504944939740752, "learning_rate": 1.754445955359731e-05, "loss": 0.3213, "step": 5466 }, { "epoch": 0.25, "grad_norm": 0.6286840304556816, "learning_rate": 1.7543482856192944e-05, "loss": 0.3719, "step": 5467 }, { "epoch": 0.25, "grad_norm": 0.9886658602524326, "learning_rate": 1.754250599178314e-05, "loss": 0.5822, "step": 5468 }, { "epoch": 0.25, "grad_norm": 0.31863041089624744, "learning_rate": 1.7541528960389525e-05, "loss": 0.1721, "step": 5469 }, { "epoch": 0.25, "grad_norm": 0.5209085253144841, "learning_rate": 1.7540551762033725e-05, "loss": 0.319, "step": 5470 }, { "epoch": 0.25, "grad_norm": 0.4863838564157181, "learning_rate": 1.7539574396737374e-05, "loss": 0.3466, "step": 5471 }, { "epoch": 0.25, "grad_norm": 0.44091713085452766, "learning_rate": 1.7538596864522115e-05, "loss": 0.2453, "step": 5472 }, { "epoch": 0.25, "grad_norm": 1.0637869008613243, "learning_rate": 1.7537619165409588e-05, "loss": 0.5225, "step": 5473 }, { "epoch": 0.25, "grad_norm": 0.6071861090362503, "learning_rate": 1.7536641299421437e-05, "loss": 0.3065, "step": 5474 }, { "epoch": 0.25, "grad_norm": 0.3417735730793651, "learning_rate": 1.753566326657931e-05, "loss": 0.2379, "step": 5475 }, { "epoch": 0.25, "grad_norm": 0.5201840419593602, "learning_rate": 1.7534685066904865e-05, "loss": 0.2679, "step": 5476 }, { "epoch": 0.25, "grad_norm": 1.1233950613986918, "learning_rate": 1.753370670041975e-05, "loss": 0.5915, "step": 5477 }, { "epoch": 0.25, "grad_norm": 0.4922709057585047, "learning_rate": 1.7532728167145634e-05, "loss": 0.2654, "step": 5478 }, { "epoch": 0.25, "grad_norm": 0.41368725471825823, "learning_rate": 1.7531749467104173e-05, "loss": 0.3386, "step": 5479 }, { "epoch": 0.25, "grad_norm": 0.9412234697337155, "learning_rate": 1.753077060031704e-05, "loss": 0.6559, "step": 5480 }, { "epoch": 0.25, "grad_norm": 0.36733941121537433, "learning_rate": 1.7529791566805905e-05, "loss": 0.2679, "step": 5481 }, { "epoch": 0.25, "grad_norm": 0.24829608526489824, "learning_rate": 1.752881236659244e-05, "loss": 0.0957, "step": 5482 }, { "epoch": 0.25, "grad_norm": 0.5833504436778258, "learning_rate": 1.7527832999698326e-05, "loss": 0.3579, "step": 5483 }, { "epoch": 0.25, "grad_norm": 0.5183251670338388, "learning_rate": 1.7526853466145248e-05, "loss": 0.2879, "step": 5484 }, { "epoch": 0.25, "grad_norm": 0.8100983767337303, "learning_rate": 1.7525873765954883e-05, "loss": 0.3634, "step": 5485 }, { "epoch": 0.25, "grad_norm": 0.5898599995075775, "learning_rate": 1.7524893899148933e-05, "loss": 0.3806, "step": 5486 }, { "epoch": 0.25, "grad_norm": 0.3679107749423647, "learning_rate": 1.7523913865749078e-05, "loss": 0.2789, "step": 5487 }, { "epoch": 0.25, "grad_norm": 0.2682326309454914, "learning_rate": 1.7522933665777026e-05, "loss": 0.1769, "step": 5488 }, { "epoch": 0.25, "grad_norm": 0.7039475583184233, "learning_rate": 1.752195329925447e-05, "loss": 0.4787, "step": 5489 }, { "epoch": 0.25, "grad_norm": 0.4600975327503642, "learning_rate": 1.752097276620312e-05, "loss": 0.3064, "step": 5490 }, { "epoch": 0.25, "grad_norm": 0.5610867409219054, "learning_rate": 1.751999206664468e-05, "loss": 0.294, "step": 5491 }, { "epoch": 0.25, "grad_norm": 1.8584668492079515, "learning_rate": 1.7519011200600868e-05, "loss": 0.7175, "step": 5492 }, { "epoch": 0.25, "grad_norm": 0.3878836382538289, "learning_rate": 1.751803016809339e-05, "loss": 0.3, "step": 5493 }, { "epoch": 0.25, "grad_norm": 0.3295192742854476, "learning_rate": 1.751704896914397e-05, "loss": 0.2256, "step": 5494 }, { "epoch": 0.25, "grad_norm": 0.351966662334592, "learning_rate": 1.7516067603774334e-05, "loss": 0.3035, "step": 5495 }, { "epoch": 0.25, "grad_norm": 0.628088490271632, "learning_rate": 1.7515086072006206e-05, "loss": 0.3153, "step": 5496 }, { "epoch": 0.25, "grad_norm": 1.4191136572834981, "learning_rate": 1.7514104373861315e-05, "loss": 0.7893, "step": 5497 }, { "epoch": 0.25, "grad_norm": 0.4116527780239412, "learning_rate": 1.751312250936139e-05, "loss": 0.2572, "step": 5498 }, { "epoch": 0.25, "grad_norm": 0.4050692040878053, "learning_rate": 1.7512140478528178e-05, "loss": 0.2823, "step": 5499 }, { "epoch": 0.25, "grad_norm": 0.32692244189556385, "learning_rate": 1.7511158281383414e-05, "loss": 0.199, "step": 5500 }, { "epoch": 0.25, "grad_norm": 0.6127417492125128, "learning_rate": 1.7510175917948848e-05, "loss": 0.4033, "step": 5501 }, { "epoch": 0.25, "grad_norm": 0.44488398197754225, "learning_rate": 1.7509193388246228e-05, "loss": 0.2975, "step": 5502 }, { "epoch": 0.25, "grad_norm": 0.4129553053942979, "learning_rate": 1.7508210692297297e-05, "loss": 0.3279, "step": 5503 }, { "epoch": 0.25, "grad_norm": 1.0363329813131057, "learning_rate": 1.7507227830123823e-05, "loss": 0.3648, "step": 5504 }, { "epoch": 0.25, "grad_norm": 0.47848448351362016, "learning_rate": 1.750624480174756e-05, "loss": 0.3238, "step": 5505 }, { "epoch": 0.25, "grad_norm": 0.4066876099306231, "learning_rate": 1.750526160719027e-05, "loss": 0.327, "step": 5506 }, { "epoch": 0.25, "grad_norm": 0.4134135759344062, "learning_rate": 1.750427824647372e-05, "loss": 0.3355, "step": 5507 }, { "epoch": 0.25, "grad_norm": 0.2578958417890306, "learning_rate": 1.7503294719619686e-05, "loss": 0.1796, "step": 5508 }, { "epoch": 0.25, "grad_norm": 1.4175381476440974, "learning_rate": 1.7502311026649934e-05, "loss": 0.8524, "step": 5509 }, { "epoch": 0.25, "grad_norm": 1.1015000460846274, "learning_rate": 1.750132716758625e-05, "loss": 0.6689, "step": 5510 }, { "epoch": 0.25, "grad_norm": 0.31954073202572597, "learning_rate": 1.750034314245041e-05, "loss": 0.2532, "step": 5511 }, { "epoch": 0.25, "grad_norm": 0.8446500106591986, "learning_rate": 1.7499358951264206e-05, "loss": 0.4939, "step": 5512 }, { "epoch": 0.25, "grad_norm": 0.33155850869884296, "learning_rate": 1.749837459404942e-05, "loss": 0.2585, "step": 5513 }, { "epoch": 0.25, "grad_norm": 0.33607717079610905, "learning_rate": 1.749739007082785e-05, "loss": 0.2288, "step": 5514 }, { "epoch": 0.25, "grad_norm": 0.4390946480084589, "learning_rate": 1.7496405381621287e-05, "loss": 0.3352, "step": 5515 }, { "epoch": 0.25, "grad_norm": 0.940680494017994, "learning_rate": 1.7495420526451536e-05, "loss": 0.649, "step": 5516 }, { "epoch": 0.25, "grad_norm": 0.36551814881252376, "learning_rate": 1.7494435505340397e-05, "loss": 0.2102, "step": 5517 }, { "epoch": 0.25, "grad_norm": 0.7879957787318361, "learning_rate": 1.749345031830968e-05, "loss": 0.4342, "step": 5518 }, { "epoch": 0.25, "grad_norm": 0.3174696259669652, "learning_rate": 1.7492464965381194e-05, "loss": 0.2537, "step": 5519 }, { "epoch": 0.25, "grad_norm": 0.704205586885652, "learning_rate": 1.7491479446576755e-05, "loss": 0.338, "step": 5520 }, { "epoch": 0.25, "grad_norm": 0.4024999658306773, "learning_rate": 1.7490493761918183e-05, "loss": 0.2876, "step": 5521 }, { "epoch": 0.25, "grad_norm": 0.45776731587904745, "learning_rate": 1.74895079114273e-05, "loss": 0.3656, "step": 5522 }, { "epoch": 0.25, "grad_norm": 0.5962707177436178, "learning_rate": 1.7488521895125927e-05, "loss": 0.3949, "step": 5523 }, { "epoch": 0.25, "grad_norm": 0.4076064963823635, "learning_rate": 1.7487535713035897e-05, "loss": 0.3203, "step": 5524 }, { "epoch": 0.25, "grad_norm": 0.42992572075214125, "learning_rate": 1.7486549365179045e-05, "loss": 0.2369, "step": 5525 }, { "epoch": 0.25, "grad_norm": 0.3954458500919235, "learning_rate": 1.7485562851577205e-05, "loss": 0.2901, "step": 5526 }, { "epoch": 0.25, "grad_norm": 0.3271167740892574, "learning_rate": 1.748457617225222e-05, "loss": 0.2412, "step": 5527 }, { "epoch": 0.25, "grad_norm": 1.2681146861906707, "learning_rate": 1.7483589327225927e-05, "loss": 0.614, "step": 5528 }, { "epoch": 0.25, "grad_norm": 0.3870381544686057, "learning_rate": 1.7482602316520183e-05, "loss": 0.3309, "step": 5529 }, { "epoch": 0.25, "grad_norm": 0.5584428584285267, "learning_rate": 1.7481615140156837e-05, "loss": 0.3171, "step": 5530 }, { "epoch": 0.25, "grad_norm": 0.43304931957735154, "learning_rate": 1.7480627798157737e-05, "loss": 0.3246, "step": 5531 }, { "epoch": 0.25, "grad_norm": 0.44842400368575336, "learning_rate": 1.747964029054475e-05, "loss": 0.3216, "step": 5532 }, { "epoch": 0.25, "grad_norm": 0.6474209819018528, "learning_rate": 1.7478652617339738e-05, "loss": 0.4007, "step": 5533 }, { "epoch": 0.25, "grad_norm": 0.2895672645726194, "learning_rate": 1.7477664778564562e-05, "loss": 0.1985, "step": 5534 }, { "epoch": 0.25, "grad_norm": 0.6804243401085998, "learning_rate": 1.7476676774241097e-05, "loss": 0.3806, "step": 5535 }, { "epoch": 0.25, "grad_norm": 0.4569814324692539, "learning_rate": 1.7475688604391218e-05, "loss": 0.3199, "step": 5536 }, { "epoch": 0.25, "grad_norm": 0.7927348814684542, "learning_rate": 1.7474700269036794e-05, "loss": 0.4179, "step": 5537 }, { "epoch": 0.25, "grad_norm": 0.6753437141261602, "learning_rate": 1.7473711768199708e-05, "loss": 0.3589, "step": 5538 }, { "epoch": 0.25, "grad_norm": 0.35681800898559896, "learning_rate": 1.747272310190185e-05, "loss": 0.3038, "step": 5539 }, { "epoch": 0.25, "grad_norm": 0.41427464489092086, "learning_rate": 1.7471734270165103e-05, "loss": 0.2292, "step": 5540 }, { "epoch": 0.25, "grad_norm": 0.7296291415376092, "learning_rate": 1.7470745273011362e-05, "loss": 0.3438, "step": 5541 }, { "epoch": 0.25, "grad_norm": 0.3836577607827537, "learning_rate": 1.7469756110462525e-05, "loss": 0.2987, "step": 5542 }, { "epoch": 0.25, "grad_norm": 0.5038836479639311, "learning_rate": 1.746876678254048e-05, "loss": 0.3117, "step": 5543 }, { "epoch": 0.25, "grad_norm": 0.5555574090695773, "learning_rate": 1.746777728926714e-05, "loss": 0.3172, "step": 5544 }, { "epoch": 0.25, "grad_norm": 0.3703209841274183, "learning_rate": 1.746678763066441e-05, "loss": 0.2682, "step": 5545 }, { "epoch": 0.25, "grad_norm": 0.4342322226128129, "learning_rate": 1.7465797806754196e-05, "loss": 0.3675, "step": 5546 }, { "epoch": 0.25, "grad_norm": 0.28401551730067803, "learning_rate": 1.7464807817558415e-05, "loss": 0.2278, "step": 5547 }, { "epoch": 0.25, "grad_norm": 0.47794456547842595, "learning_rate": 1.7463817663098984e-05, "loss": 0.3532, "step": 5548 }, { "epoch": 0.25, "grad_norm": 1.0636473162304059, "learning_rate": 1.746282734339782e-05, "loss": 0.6456, "step": 5549 }, { "epoch": 0.25, "grad_norm": 0.32346275506783834, "learning_rate": 1.7461836858476858e-05, "loss": 0.2311, "step": 5550 }, { "epoch": 0.26, "grad_norm": 0.4756908085461457, "learning_rate": 1.7460846208358013e-05, "loss": 0.3495, "step": 5551 }, { "epoch": 0.26, "grad_norm": 0.5989378972595614, "learning_rate": 1.745985539306323e-05, "loss": 0.4244, "step": 5552 }, { "epoch": 0.26, "grad_norm": 0.23702202482773296, "learning_rate": 1.7458864412614436e-05, "loss": 0.0756, "step": 5553 }, { "epoch": 0.26, "grad_norm": 0.4152657828057209, "learning_rate": 1.745787326703357e-05, "loss": 0.3079, "step": 5554 }, { "epoch": 0.26, "grad_norm": 0.5623651184138426, "learning_rate": 1.7456881956342583e-05, "loss": 0.3968, "step": 5555 }, { "epoch": 0.26, "grad_norm": 0.4699301568068827, "learning_rate": 1.745589048056341e-05, "loss": 0.2673, "step": 5556 }, { "epoch": 0.26, "grad_norm": 0.4194592787219083, "learning_rate": 1.7454898839718014e-05, "loss": 0.337, "step": 5557 }, { "epoch": 0.26, "grad_norm": 0.39633885544904907, "learning_rate": 1.7453907033828343e-05, "loss": 0.3409, "step": 5558 }, { "epoch": 0.26, "grad_norm": 0.44179787569327256, "learning_rate": 1.7452915062916348e-05, "loss": 0.2552, "step": 5559 }, { "epoch": 0.26, "grad_norm": 0.273151538827484, "learning_rate": 1.7451922927004003e-05, "loss": 0.1806, "step": 5560 }, { "epoch": 0.26, "grad_norm": 1.49414716285304, "learning_rate": 1.7450930626113264e-05, "loss": 0.7638, "step": 5561 }, { "epoch": 0.26, "grad_norm": 0.42873582401585386, "learning_rate": 1.7449938160266105e-05, "loss": 0.3382, "step": 5562 }, { "epoch": 0.26, "grad_norm": 0.36190364832330235, "learning_rate": 1.744894552948449e-05, "loss": 0.2742, "step": 5563 }, { "epoch": 0.26, "grad_norm": 0.8134616846873223, "learning_rate": 1.7447952733790404e-05, "loss": 0.5386, "step": 5564 }, { "epoch": 0.26, "grad_norm": 0.3424556384055768, "learning_rate": 1.7446959773205822e-05, "loss": 0.1773, "step": 5565 }, { "epoch": 0.26, "grad_norm": 0.3491969739889134, "learning_rate": 1.7445966647752728e-05, "loss": 0.2339, "step": 5566 }, { "epoch": 0.26, "grad_norm": 0.5635256336639329, "learning_rate": 1.744497335745311e-05, "loss": 0.3855, "step": 5567 }, { "epoch": 0.26, "grad_norm": 0.7408331178990825, "learning_rate": 1.744397990232896e-05, "loss": 0.4669, "step": 5568 }, { "epoch": 0.26, "grad_norm": 0.4341395588313263, "learning_rate": 1.7442986282402262e-05, "loss": 0.2919, "step": 5569 }, { "epoch": 0.26, "grad_norm": 0.36482458113486965, "learning_rate": 1.7441992497695026e-05, "loss": 0.2912, "step": 5570 }, { "epoch": 0.26, "grad_norm": 0.3439316292028626, "learning_rate": 1.744099854822925e-05, "loss": 0.199, "step": 5571 }, { "epoch": 0.26, "grad_norm": 0.3621455134742387, "learning_rate": 1.7440004434026936e-05, "loss": 0.254, "step": 5572 }, { "epoch": 0.26, "grad_norm": 0.8622771821856549, "learning_rate": 1.7439010155110097e-05, "loss": 0.4139, "step": 5573 }, { "epoch": 0.26, "grad_norm": 0.4538153950747039, "learning_rate": 1.743801571150074e-05, "loss": 0.3527, "step": 5574 }, { "epoch": 0.26, "grad_norm": 0.3723007802785951, "learning_rate": 1.7437021103220885e-05, "loss": 0.2941, "step": 5575 }, { "epoch": 0.26, "grad_norm": 0.7528489615942247, "learning_rate": 1.743602633029255e-05, "loss": 0.416, "step": 5576 }, { "epoch": 0.26, "grad_norm": 0.33891911459719626, "learning_rate": 1.743503139273776e-05, "loss": 0.1789, "step": 5577 }, { "epoch": 0.26, "grad_norm": 0.3010382806624568, "learning_rate": 1.743403629057854e-05, "loss": 0.284, "step": 5578 }, { "epoch": 0.26, "grad_norm": 0.4792897691666127, "learning_rate": 1.7433041023836923e-05, "loss": 0.2743, "step": 5579 }, { "epoch": 0.26, "grad_norm": 0.7178760141248861, "learning_rate": 1.743204559253494e-05, "loss": 0.4392, "step": 5580 }, { "epoch": 0.26, "grad_norm": 0.40760133357083406, "learning_rate": 1.743104999669463e-05, "loss": 0.3359, "step": 5581 }, { "epoch": 0.26, "grad_norm": 0.4355898438559445, "learning_rate": 1.7430054236338037e-05, "loss": 0.3365, "step": 5582 }, { "epoch": 0.26, "grad_norm": 0.32172275591398297, "learning_rate": 1.7429058311487206e-05, "loss": 0.158, "step": 5583 }, { "epoch": 0.26, "grad_norm": 0.3182237158452271, "learning_rate": 1.742806222216418e-05, "loss": 0.257, "step": 5584 }, { "epoch": 0.26, "grad_norm": 0.9623790639019112, "learning_rate": 1.742706596839102e-05, "loss": 0.6209, "step": 5585 }, { "epoch": 0.26, "grad_norm": 0.36337913276205897, "learning_rate": 1.7426069550189778e-05, "loss": 0.2937, "step": 5586 }, { "epoch": 0.26, "grad_norm": 0.4394258877417794, "learning_rate": 1.7425072967582507e-05, "loss": 0.3099, "step": 5587 }, { "epoch": 0.26, "grad_norm": 1.0956554960119813, "learning_rate": 1.7424076220591285e-05, "loss": 0.8158, "step": 5588 }, { "epoch": 0.26, "grad_norm": 0.27556780452144203, "learning_rate": 1.7423079309238168e-05, "loss": 0.1987, "step": 5589 }, { "epoch": 0.26, "grad_norm": 0.39900886071089864, "learning_rate": 1.7422082233545226e-05, "loss": 0.2783, "step": 5590 }, { "epoch": 0.26, "grad_norm": 0.47435572851914515, "learning_rate": 1.742108499353454e-05, "loss": 0.2981, "step": 5591 }, { "epoch": 0.26, "grad_norm": 0.8477030425880551, "learning_rate": 1.7420087589228184e-05, "loss": 0.4025, "step": 5592 }, { "epoch": 0.26, "grad_norm": 0.3945212961533725, "learning_rate": 1.741909002064824e-05, "loss": 0.2767, "step": 5593 }, { "epoch": 0.26, "grad_norm": 0.481711592424028, "learning_rate": 1.7418092287816795e-05, "loss": 0.3647, "step": 5594 }, { "epoch": 0.26, "grad_norm": 1.458183075885284, "learning_rate": 1.7417094390755936e-05, "loss": 0.6424, "step": 5595 }, { "epoch": 0.26, "grad_norm": 0.2630830009014287, "learning_rate": 1.7416096329487753e-05, "loss": 0.1697, "step": 5596 }, { "epoch": 0.26, "grad_norm": 0.8055686369358457, "learning_rate": 1.7415098104034348e-05, "loss": 0.3358, "step": 5597 }, { "epoch": 0.26, "grad_norm": 0.4673829162713825, "learning_rate": 1.7414099714417817e-05, "loss": 0.3569, "step": 5598 }, { "epoch": 0.26, "grad_norm": 0.3351364334154858, "learning_rate": 1.7413101160660267e-05, "loss": 0.2354, "step": 5599 }, { "epoch": 0.26, "grad_norm": 1.002943229775971, "learning_rate": 1.7412102442783798e-05, "loss": 0.6525, "step": 5600 }, { "epoch": 0.26, "grad_norm": 0.5334498407865381, "learning_rate": 1.7411103560810528e-05, "loss": 0.4041, "step": 5601 }, { "epoch": 0.26, "grad_norm": 0.2526540479265092, "learning_rate": 1.7410104514762565e-05, "loss": 0.188, "step": 5602 }, { "epoch": 0.26, "grad_norm": 0.6150239105966623, "learning_rate": 1.740910530466203e-05, "loss": 0.4861, "step": 5603 }, { "epoch": 0.26, "grad_norm": 0.45494544578867, "learning_rate": 1.7408105930531047e-05, "loss": 0.3379, "step": 5604 }, { "epoch": 0.26, "grad_norm": 0.357999692372862, "learning_rate": 1.7407106392391738e-05, "loss": 0.2115, "step": 5605 }, { "epoch": 0.26, "grad_norm": 0.3755942713096529, "learning_rate": 1.740610669026623e-05, "loss": 0.3533, "step": 5606 }, { "epoch": 0.26, "grad_norm": 0.9485555677729863, "learning_rate": 1.740510682417666e-05, "loss": 0.5368, "step": 5607 }, { "epoch": 0.26, "grad_norm": 0.46092882515729766, "learning_rate": 1.7404106794145163e-05, "loss": 0.3263, "step": 5608 }, { "epoch": 0.26, "grad_norm": 0.4084643015341037, "learning_rate": 1.7403106600193878e-05, "loss": 0.2989, "step": 5609 }, { "epoch": 0.26, "grad_norm": 0.3720021783684052, "learning_rate": 1.7402106242344946e-05, "loss": 0.2826, "step": 5610 }, { "epoch": 0.26, "grad_norm": 0.46029209665660464, "learning_rate": 1.7401105720620516e-05, "loss": 0.3059, "step": 5611 }, { "epoch": 0.26, "grad_norm": 0.332297214524411, "learning_rate": 1.740010503504274e-05, "loss": 0.2116, "step": 5612 }, { "epoch": 0.26, "grad_norm": 0.5243993439037318, "learning_rate": 1.739910418563377e-05, "loss": 0.3978, "step": 5613 }, { "epoch": 0.26, "grad_norm": 0.3692500602824166, "learning_rate": 1.7398103172415766e-05, "loss": 0.2824, "step": 5614 }, { "epoch": 0.26, "grad_norm": 0.8458931807717212, "learning_rate": 1.739710199541089e-05, "loss": 0.3971, "step": 5615 }, { "epoch": 0.26, "grad_norm": 0.438794326379432, "learning_rate": 1.73961006546413e-05, "loss": 0.2845, "step": 5616 }, { "epoch": 0.26, "grad_norm": 0.38727393576428387, "learning_rate": 1.7395099150129173e-05, "loss": 0.2915, "step": 5617 }, { "epoch": 0.26, "grad_norm": 0.3084459336446391, "learning_rate": 1.7394097481896676e-05, "loss": 0.2106, "step": 5618 }, { "epoch": 0.26, "grad_norm": 1.0550397826156737, "learning_rate": 1.739309564996599e-05, "loss": 0.525, "step": 5619 }, { "epoch": 0.26, "grad_norm": 0.37656289439841306, "learning_rate": 1.739209365435929e-05, "loss": 0.2914, "step": 5620 }, { "epoch": 0.26, "grad_norm": 0.7235882317830873, "learning_rate": 1.7391091495098763e-05, "loss": 0.5765, "step": 5621 }, { "epoch": 0.26, "grad_norm": 0.3369582148775822, "learning_rate": 1.7390089172206594e-05, "loss": 0.2781, "step": 5622 }, { "epoch": 0.26, "grad_norm": 0.46625909800849197, "learning_rate": 1.7389086685704973e-05, "loss": 0.2867, "step": 5623 }, { "epoch": 0.26, "grad_norm": 0.31703079580319904, "learning_rate": 1.7388084035616092e-05, "loss": 0.2144, "step": 5624 }, { "epoch": 0.26, "grad_norm": 0.5907459046423541, "learning_rate": 1.738708122196215e-05, "loss": 0.3142, "step": 5625 }, { "epoch": 0.26, "grad_norm": 0.454016486346263, "learning_rate": 1.7386078244765355e-05, "loss": 0.2734, "step": 5626 }, { "epoch": 0.26, "grad_norm": 0.506993614136617, "learning_rate": 1.7385075104047903e-05, "loss": 0.389, "step": 5627 }, { "epoch": 0.26, "grad_norm": 1.0332193451090583, "learning_rate": 1.7384071799832008e-05, "loss": 0.5181, "step": 5628 }, { "epoch": 0.26, "grad_norm": 0.4815321749613126, "learning_rate": 1.738306833213988e-05, "loss": 0.299, "step": 5629 }, { "epoch": 0.26, "grad_norm": 0.3180429072866193, "learning_rate": 1.7382064700993733e-05, "loss": 0.229, "step": 5630 }, { "epoch": 0.26, "grad_norm": 0.7769906387017061, "learning_rate": 1.738106090641579e-05, "loss": 0.3666, "step": 5631 }, { "epoch": 0.26, "grad_norm": 0.42152042471688644, "learning_rate": 1.7380056948428273e-05, "loss": 0.309, "step": 5632 }, { "epoch": 0.26, "grad_norm": 0.44256491183205654, "learning_rate": 1.7379052827053405e-05, "loss": 0.3069, "step": 5633 }, { "epoch": 0.26, "grad_norm": 0.615511915459569, "learning_rate": 1.7378048542313424e-05, "loss": 0.4168, "step": 5634 }, { "epoch": 0.26, "grad_norm": 0.354479821178787, "learning_rate": 1.7377044094230555e-05, "loss": 0.2301, "step": 5635 }, { "epoch": 0.26, "grad_norm": 0.2832020994153795, "learning_rate": 1.7376039482827043e-05, "loss": 0.1765, "step": 5636 }, { "epoch": 0.26, "grad_norm": 0.4967041052459835, "learning_rate": 1.7375034708125126e-05, "loss": 0.3644, "step": 5637 }, { "epoch": 0.26, "grad_norm": 0.3561312702698118, "learning_rate": 1.7374029770147048e-05, "loss": 0.2453, "step": 5638 }, { "epoch": 0.26, "grad_norm": 1.381147977528637, "learning_rate": 1.7373024668915058e-05, "loss": 0.6876, "step": 5639 }, { "epoch": 0.26, "grad_norm": 1.2047690047716177, "learning_rate": 1.7372019404451403e-05, "loss": 0.7801, "step": 5640 }, { "epoch": 0.26, "grad_norm": 0.37661967041072036, "learning_rate": 1.737101397677835e-05, "loss": 0.202, "step": 5641 }, { "epoch": 0.26, "grad_norm": 0.3920474704374135, "learning_rate": 1.7370008385918148e-05, "loss": 0.3182, "step": 5642 }, { "epoch": 0.26, "grad_norm": 0.43538211661209547, "learning_rate": 1.736900263189307e-05, "loss": 0.2534, "step": 5643 }, { "epoch": 0.26, "grad_norm": 0.4076558293800915, "learning_rate": 1.736799671472537e-05, "loss": 0.2037, "step": 5644 }, { "epoch": 0.26, "grad_norm": 0.44744861355234516, "learning_rate": 1.7366990634437328e-05, "loss": 0.32, "step": 5645 }, { "epoch": 0.26, "grad_norm": 0.6076813164620158, "learning_rate": 1.736598439105121e-05, "loss": 0.4046, "step": 5646 }, { "epoch": 0.26, "grad_norm": 0.5982607587524688, "learning_rate": 1.73649779845893e-05, "loss": 0.3316, "step": 5647 }, { "epoch": 0.26, "grad_norm": 0.3950500795104273, "learning_rate": 1.7363971415073875e-05, "loss": 0.3085, "step": 5648 }, { "epoch": 0.26, "grad_norm": 0.35963493739046604, "learning_rate": 1.736296468252722e-05, "loss": 0.2817, "step": 5649 }, { "epoch": 0.26, "grad_norm": 0.32435917135990094, "learning_rate": 1.7361957786971623e-05, "loss": 0.2336, "step": 5650 }, { "epoch": 0.26, "grad_norm": 0.4350948857761065, "learning_rate": 1.7360950728429377e-05, "loss": 0.2686, "step": 5651 }, { "epoch": 0.26, "grad_norm": 1.4148213763420012, "learning_rate": 1.7359943506922775e-05, "loss": 0.8692, "step": 5652 }, { "epoch": 0.26, "grad_norm": 0.35526705279168586, "learning_rate": 1.7358936122474116e-05, "loss": 0.3023, "step": 5653 }, { "epoch": 0.26, "grad_norm": 0.3909234274804482, "learning_rate": 1.7357928575105707e-05, "loss": 0.2812, "step": 5654 }, { "epoch": 0.26, "grad_norm": 0.42766647794751456, "learning_rate": 1.735692086483985e-05, "loss": 0.3338, "step": 5655 }, { "epoch": 0.26, "grad_norm": 0.3860190473888694, "learning_rate": 1.7355912991698857e-05, "loss": 0.2115, "step": 5656 }, { "epoch": 0.26, "grad_norm": 0.4450769992913822, "learning_rate": 1.7354904955705038e-05, "loss": 0.2641, "step": 5657 }, { "epoch": 0.26, "grad_norm": 0.5352681874680946, "learning_rate": 1.7353896756880716e-05, "loss": 0.4147, "step": 5658 }, { "epoch": 0.26, "grad_norm": 0.516889149628235, "learning_rate": 1.73528883952482e-05, "loss": 0.3467, "step": 5659 }, { "epoch": 0.26, "grad_norm": 0.4220909448385419, "learning_rate": 1.7351879870829828e-05, "loss": 0.353, "step": 5660 }, { "epoch": 0.26, "grad_norm": 0.3632217779083125, "learning_rate": 1.7350871183647918e-05, "loss": 0.2884, "step": 5661 }, { "epoch": 0.26, "grad_norm": 0.3498193295901684, "learning_rate": 1.734986233372481e-05, "loss": 0.1534, "step": 5662 }, { "epoch": 0.26, "grad_norm": 0.4371083048114835, "learning_rate": 1.7348853321082832e-05, "loss": 0.2898, "step": 5663 }, { "epoch": 0.26, "grad_norm": 0.7957103306423904, "learning_rate": 1.734784414574432e-05, "loss": 0.4008, "step": 5664 }, { "epoch": 0.26, "grad_norm": 0.3850971126805678, "learning_rate": 1.7346834807731626e-05, "loss": 0.2912, "step": 5665 }, { "epoch": 0.26, "grad_norm": 0.4251904203752995, "learning_rate": 1.7345825307067086e-05, "loss": 0.3733, "step": 5666 }, { "epoch": 0.26, "grad_norm": 0.8041601759705365, "learning_rate": 1.7344815643773058e-05, "loss": 0.3763, "step": 5667 }, { "epoch": 0.26, "grad_norm": 0.28941843269082057, "learning_rate": 1.7343805817871885e-05, "loss": 0.1598, "step": 5668 }, { "epoch": 0.26, "grad_norm": 0.3075629209464799, "learning_rate": 1.7342795829385933e-05, "loss": 0.2825, "step": 5669 }, { "epoch": 0.26, "grad_norm": 1.09065347121831, "learning_rate": 1.7341785678337557e-05, "loss": 0.4191, "step": 5670 }, { "epoch": 0.26, "grad_norm": 0.5923272769500549, "learning_rate": 1.7340775364749124e-05, "loss": 0.3909, "step": 5671 }, { "epoch": 0.26, "grad_norm": 0.4631238582513766, "learning_rate": 1.7339764888642998e-05, "loss": 0.3288, "step": 5672 }, { "epoch": 0.26, "grad_norm": 0.47622480077415913, "learning_rate": 1.7338754250041553e-05, "loss": 0.3559, "step": 5673 }, { "epoch": 0.26, "grad_norm": 0.1966611086715717, "learning_rate": 1.733774344896716e-05, "loss": 0.0749, "step": 5674 }, { "epoch": 0.26, "grad_norm": 0.4923213865082224, "learning_rate": 1.73367324854422e-05, "loss": 0.323, "step": 5675 }, { "epoch": 0.26, "grad_norm": 1.0501885045886348, "learning_rate": 1.7335721359489058e-05, "loss": 0.5343, "step": 5676 }, { "epoch": 0.26, "grad_norm": 0.3522192425530954, "learning_rate": 1.733471007113011e-05, "loss": 0.2633, "step": 5677 }, { "epoch": 0.26, "grad_norm": 0.44787884626806374, "learning_rate": 1.7333698620387755e-05, "loss": 0.3715, "step": 5678 }, { "epoch": 0.26, "grad_norm": 1.4619347999333387, "learning_rate": 1.7332687007284374e-05, "loss": 0.6438, "step": 5679 }, { "epoch": 0.26, "grad_norm": 0.24956571188799068, "learning_rate": 1.7331675231842374e-05, "loss": 0.0983, "step": 5680 }, { "epoch": 0.26, "grad_norm": 0.28766261713753105, "learning_rate": 1.7330663294084153e-05, "loss": 0.2728, "step": 5681 }, { "epoch": 0.26, "grad_norm": 0.7681976327198932, "learning_rate": 1.7329651194032108e-05, "loss": 0.4802, "step": 5682 }, { "epoch": 0.26, "grad_norm": 0.6220765355851102, "learning_rate": 1.7328638931708652e-05, "loss": 0.3276, "step": 5683 }, { "epoch": 0.26, "grad_norm": 0.381099517804756, "learning_rate": 1.7327626507136194e-05, "loss": 0.3142, "step": 5684 }, { "epoch": 0.26, "grad_norm": 0.4168246988884917, "learning_rate": 1.7326613920337147e-05, "loss": 0.302, "step": 5685 }, { "epoch": 0.26, "grad_norm": 0.5242040974825304, "learning_rate": 1.7325601171333927e-05, "loss": 0.2687, "step": 5686 }, { "epoch": 0.26, "grad_norm": 0.29266260419698903, "learning_rate": 1.732458826014896e-05, "loss": 0.2086, "step": 5687 }, { "epoch": 0.26, "grad_norm": 0.884541165777677, "learning_rate": 1.732357518680467e-05, "loss": 0.483, "step": 5688 }, { "epoch": 0.26, "grad_norm": 0.37147893597954396, "learning_rate": 1.732256195132348e-05, "loss": 0.3212, "step": 5689 }, { "epoch": 0.26, "grad_norm": 0.4020129683769188, "learning_rate": 1.7321548553727828e-05, "loss": 0.2408, "step": 5690 }, { "epoch": 0.26, "grad_norm": 0.9398524144431251, "learning_rate": 1.7320534994040148e-05, "loss": 0.5556, "step": 5691 }, { "epoch": 0.26, "grad_norm": 0.3660214863767395, "learning_rate": 1.731952127228288e-05, "loss": 0.2494, "step": 5692 }, { "epoch": 0.26, "grad_norm": 0.35284040939135825, "learning_rate": 1.7318507388478464e-05, "loss": 0.2452, "step": 5693 }, { "epoch": 0.26, "grad_norm": 0.559651903776116, "learning_rate": 1.7317493342649346e-05, "loss": 0.3134, "step": 5694 }, { "epoch": 0.26, "grad_norm": 0.8467735362485772, "learning_rate": 1.731647913481798e-05, "loss": 0.6168, "step": 5695 }, { "epoch": 0.26, "grad_norm": 0.43903044203404934, "learning_rate": 1.731546476500682e-05, "loss": 0.3127, "step": 5696 }, { "epoch": 0.26, "grad_norm": 0.35617455049077595, "learning_rate": 1.7314450233238316e-05, "loss": 0.2724, "step": 5697 }, { "epoch": 0.26, "grad_norm": 0.48892042488874515, "learning_rate": 1.731343553953494e-05, "loss": 0.3005, "step": 5698 }, { "epoch": 0.26, "grad_norm": 0.3875388619343407, "learning_rate": 1.7312420683919144e-05, "loss": 0.2844, "step": 5699 }, { "epoch": 0.26, "grad_norm": 0.3358371267370901, "learning_rate": 1.7311405666413405e-05, "loss": 0.2886, "step": 5700 }, { "epoch": 0.26, "grad_norm": 0.5242297881453637, "learning_rate": 1.7310390487040193e-05, "loss": 0.4036, "step": 5701 }, { "epoch": 0.26, "grad_norm": 0.37759615456846335, "learning_rate": 1.730937514582198e-05, "loss": 0.3006, "step": 5702 }, { "epoch": 0.26, "grad_norm": 0.8962198550028754, "learning_rate": 1.730835964278124e-05, "loss": 0.3179, "step": 5703 }, { "epoch": 0.26, "grad_norm": 0.44275502915243686, "learning_rate": 1.7307343977940467e-05, "loss": 0.3537, "step": 5704 }, { "epoch": 0.26, "grad_norm": 0.37448329798360214, "learning_rate": 1.7306328151322142e-05, "loss": 0.2974, "step": 5705 }, { "epoch": 0.26, "grad_norm": 0.4951967233846368, "learning_rate": 1.7305312162948754e-05, "loss": 0.2319, "step": 5706 }, { "epoch": 0.26, "grad_norm": 0.9714689662565439, "learning_rate": 1.7304296012842794e-05, "loss": 0.6148, "step": 5707 }, { "epoch": 0.26, "grad_norm": 0.3379417138763714, "learning_rate": 1.730327970102676e-05, "loss": 0.2345, "step": 5708 }, { "epoch": 0.26, "grad_norm": 0.4531665531555558, "learning_rate": 1.7302263227523154e-05, "loss": 0.3227, "step": 5709 }, { "epoch": 0.26, "grad_norm": 0.6324633800801548, "learning_rate": 1.7301246592354476e-05, "loss": 0.351, "step": 5710 }, { "epoch": 0.26, "grad_norm": 0.420000513436948, "learning_rate": 1.7300229795543234e-05, "loss": 0.2891, "step": 5711 }, { "epoch": 0.26, "grad_norm": 0.4751467632596921, "learning_rate": 1.7299212837111947e-05, "loss": 0.2844, "step": 5712 }, { "epoch": 0.26, "grad_norm": 0.40294653526200697, "learning_rate": 1.7298195717083117e-05, "loss": 0.285, "step": 5713 }, { "epoch": 0.26, "grad_norm": 0.3826517017215292, "learning_rate": 1.729717843547927e-05, "loss": 0.2872, "step": 5714 }, { "epoch": 0.26, "grad_norm": 0.48401376817174496, "learning_rate": 1.7296160992322922e-05, "loss": 0.2942, "step": 5715 }, { "epoch": 0.26, "grad_norm": 0.4268489505252926, "learning_rate": 1.729514338763661e-05, "loss": 0.3299, "step": 5716 }, { "epoch": 0.26, "grad_norm": 0.36317192336647813, "learning_rate": 1.7294125621442848e-05, "loss": 0.2833, "step": 5717 }, { "epoch": 0.26, "grad_norm": 0.9010094483756562, "learning_rate": 1.7293107693764177e-05, "loss": 0.5994, "step": 5718 }, { "epoch": 0.26, "grad_norm": 0.3021988986741908, "learning_rate": 1.729208960462313e-05, "loss": 0.157, "step": 5719 }, { "epoch": 0.26, "grad_norm": 0.3065599133268593, "learning_rate": 1.7291071354042247e-05, "loss": 0.2259, "step": 5720 }, { "epoch": 0.26, "grad_norm": 0.5142229429835107, "learning_rate": 1.729005294204407e-05, "loss": 0.3521, "step": 5721 }, { "epoch": 0.26, "grad_norm": 0.6682596708382953, "learning_rate": 1.728903436865115e-05, "loss": 0.4585, "step": 5722 }, { "epoch": 0.26, "grad_norm": 0.37217664712457094, "learning_rate": 1.7288015633886033e-05, "loss": 0.2377, "step": 5723 }, { "epoch": 0.26, "grad_norm": 0.9842439217105892, "learning_rate": 1.7286996737771275e-05, "loss": 0.6513, "step": 5724 }, { "epoch": 0.26, "grad_norm": 0.4378615319020107, "learning_rate": 1.7285977680329434e-05, "loss": 0.3646, "step": 5725 }, { "epoch": 0.26, "grad_norm": 0.36344080572564, "learning_rate": 1.728495846158307e-05, "loss": 0.211, "step": 5726 }, { "epoch": 0.26, "grad_norm": 0.44132970603907606, "learning_rate": 1.728393908155474e-05, "loss": 0.2769, "step": 5727 }, { "epoch": 0.26, "grad_norm": 0.41273891687674874, "learning_rate": 1.7282919540267025e-05, "loss": 0.3615, "step": 5728 }, { "epoch": 0.26, "grad_norm": 0.36402768497411836, "learning_rate": 1.728189983774249e-05, "loss": 0.2034, "step": 5729 }, { "epoch": 0.26, "grad_norm": 1.2272492539924984, "learning_rate": 1.728087997400371e-05, "loss": 0.7772, "step": 5730 }, { "epoch": 0.26, "grad_norm": 0.47546045652196284, "learning_rate": 1.7279859949073263e-05, "loss": 0.3434, "step": 5731 }, { "epoch": 0.26, "grad_norm": 0.3673803950858512, "learning_rate": 1.7278839762973734e-05, "loss": 0.2036, "step": 5732 }, { "epoch": 0.26, "grad_norm": 0.3403737123198815, "learning_rate": 1.7277819415727712e-05, "loss": 0.3178, "step": 5733 }, { "epoch": 0.26, "grad_norm": 0.8029106563032435, "learning_rate": 1.7276798907357778e-05, "loss": 0.4761, "step": 5734 }, { "epoch": 0.26, "grad_norm": 0.44449006366362753, "learning_rate": 1.7275778237886535e-05, "loss": 0.2912, "step": 5735 }, { "epoch": 0.26, "grad_norm": 0.370434068186547, "learning_rate": 1.7274757407336567e-05, "loss": 0.2667, "step": 5736 }, { "epoch": 0.26, "grad_norm": 1.5097823563245483, "learning_rate": 1.7273736415730488e-05, "loss": 0.7884, "step": 5737 }, { "epoch": 0.26, "grad_norm": 0.4298364188280013, "learning_rate": 1.7272715263090892e-05, "loss": 0.3288, "step": 5738 }, { "epoch": 0.26, "grad_norm": 0.5443692638418025, "learning_rate": 1.7271693949440393e-05, "loss": 0.3409, "step": 5739 }, { "epoch": 0.26, "grad_norm": 0.2504824799741428, "learning_rate": 1.7270672474801595e-05, "loss": 0.2353, "step": 5740 }, { "epoch": 0.26, "grad_norm": 0.36093064266195934, "learning_rate": 1.7269650839197115e-05, "loss": 0.2828, "step": 5741 }, { "epoch": 0.26, "grad_norm": 0.8202041556267456, "learning_rate": 1.7268629042649575e-05, "loss": 0.388, "step": 5742 }, { "epoch": 0.26, "grad_norm": 0.9438074064482519, "learning_rate": 1.7267607085181594e-05, "loss": 0.6054, "step": 5743 }, { "epoch": 0.26, "grad_norm": 0.39074207454549437, "learning_rate": 1.7266584966815794e-05, "loss": 0.2933, "step": 5744 }, { "epoch": 0.26, "grad_norm": 0.3531036342342758, "learning_rate": 1.726556268757481e-05, "loss": 0.2938, "step": 5745 }, { "epoch": 0.26, "grad_norm": 0.3201236283934675, "learning_rate": 1.726454024748127e-05, "loss": 0.2069, "step": 5746 }, { "epoch": 0.26, "grad_norm": 0.6122363142521149, "learning_rate": 1.7263517646557804e-05, "loss": 0.3459, "step": 5747 }, { "epoch": 0.26, "grad_norm": 0.3921042081591479, "learning_rate": 1.7262494884827062e-05, "loss": 0.3123, "step": 5748 }, { "epoch": 0.26, "grad_norm": 0.48072946182860743, "learning_rate": 1.7261471962311683e-05, "loss": 0.3299, "step": 5749 }, { "epoch": 0.26, "grad_norm": 0.4454915453155642, "learning_rate": 1.7260448879034316e-05, "loss": 0.2783, "step": 5750 }, { "epoch": 0.26, "grad_norm": 0.5489973478649294, "learning_rate": 1.7259425635017607e-05, "loss": 0.3521, "step": 5751 }, { "epoch": 0.26, "grad_norm": 0.3241000928532584, "learning_rate": 1.725840223028421e-05, "loss": 0.2122, "step": 5752 }, { "epoch": 0.26, "grad_norm": 0.33740597443333964, "learning_rate": 1.7257378664856786e-05, "loss": 0.2196, "step": 5753 }, { "epoch": 0.26, "grad_norm": 0.6357272557447061, "learning_rate": 1.725635493875799e-05, "loss": 0.4354, "step": 5754 }, { "epoch": 0.26, "grad_norm": 0.6842153455847927, "learning_rate": 1.7255331052010495e-05, "loss": 0.3914, "step": 5755 }, { "epoch": 0.26, "grad_norm": 0.35668705745627666, "learning_rate": 1.7254307004636957e-05, "loss": 0.2987, "step": 5756 }, { "epoch": 0.26, "grad_norm": 0.39085556828179274, "learning_rate": 1.7253282796660054e-05, "loss": 0.347, "step": 5757 }, { "epoch": 0.26, "grad_norm": 0.2997024556732525, "learning_rate": 1.7252258428102465e-05, "loss": 0.0992, "step": 5758 }, { "epoch": 0.26, "grad_norm": 0.49260611058103027, "learning_rate": 1.725123389898686e-05, "loss": 0.3484, "step": 5759 }, { "epoch": 0.26, "grad_norm": 0.6227143805467956, "learning_rate": 1.725020920933593e-05, "loss": 0.3773, "step": 5760 }, { "epoch": 0.26, "grad_norm": 0.4459339345217128, "learning_rate": 1.724918435917235e-05, "loss": 0.3502, "step": 5761 }, { "epoch": 0.26, "grad_norm": 0.38610546633587495, "learning_rate": 1.7248159348518818e-05, "loss": 0.2548, "step": 5762 }, { "epoch": 0.26, "grad_norm": 1.114210841299817, "learning_rate": 1.7247134177398023e-05, "loss": 0.6353, "step": 5763 }, { "epoch": 0.26, "grad_norm": 0.3117591093552998, "learning_rate": 1.7246108845832667e-05, "loss": 0.2635, "step": 5764 }, { "epoch": 0.26, "grad_norm": 0.24797431233334552, "learning_rate": 1.724508335384544e-05, "loss": 0.1028, "step": 5765 }, { "epoch": 0.26, "grad_norm": 0.529518773243945, "learning_rate": 1.724405770145905e-05, "loss": 0.3922, "step": 5766 }, { "epoch": 0.26, "grad_norm": 0.7757320019952428, "learning_rate": 1.724303188869621e-05, "loss": 0.4445, "step": 5767 }, { "epoch": 0.26, "grad_norm": 0.3491916514592435, "learning_rate": 1.724200591557962e-05, "loss": 0.2368, "step": 5768 }, { "epoch": 0.27, "grad_norm": 0.48172193968435517, "learning_rate": 1.7240979782131998e-05, "loss": 0.3551, "step": 5769 }, { "epoch": 0.27, "grad_norm": 0.5095995204420244, "learning_rate": 1.7239953488376066e-05, "loss": 0.2531, "step": 5770 }, { "epoch": 0.27, "grad_norm": 0.28557155392344324, "learning_rate": 1.723892703433454e-05, "loss": 0.1312, "step": 5771 }, { "epoch": 0.27, "grad_norm": 0.3801121457480909, "learning_rate": 1.7237900420030147e-05, "loss": 0.3278, "step": 5772 }, { "epoch": 0.27, "grad_norm": 0.868181009286335, "learning_rate": 1.7236873645485615e-05, "loss": 0.4714, "step": 5773 }, { "epoch": 0.27, "grad_norm": 0.5822177097303524, "learning_rate": 1.7235846710723673e-05, "loss": 0.3931, "step": 5774 }, { "epoch": 0.27, "grad_norm": 0.42598965445268405, "learning_rate": 1.7234819615767062e-05, "loss": 0.2516, "step": 5775 }, { "epoch": 0.27, "grad_norm": 0.3970620460769952, "learning_rate": 1.7233792360638517e-05, "loss": 0.3418, "step": 5776 }, { "epoch": 0.27, "grad_norm": 0.2489477097711643, "learning_rate": 1.723276494536078e-05, "loss": 0.175, "step": 5777 }, { "epoch": 0.27, "grad_norm": 0.4650641750966152, "learning_rate": 1.72317373699566e-05, "loss": 0.2898, "step": 5778 }, { "epoch": 0.27, "grad_norm": 0.9355180217620334, "learning_rate": 1.7230709634448723e-05, "loss": 0.5046, "step": 5779 }, { "epoch": 0.27, "grad_norm": 0.34469339706021446, "learning_rate": 1.7229681738859904e-05, "loss": 0.3106, "step": 5780 }, { "epoch": 0.27, "grad_norm": 0.42488854443534935, "learning_rate": 1.72286536832129e-05, "loss": 0.2773, "step": 5781 }, { "epoch": 0.27, "grad_norm": 0.7875686750730964, "learning_rate": 1.722762546753047e-05, "loss": 0.2938, "step": 5782 }, { "epoch": 0.27, "grad_norm": 0.3879048918182757, "learning_rate": 1.7226597091835377e-05, "loss": 0.2362, "step": 5783 }, { "epoch": 0.27, "grad_norm": 0.2989793748894178, "learning_rate": 1.722556855615039e-05, "loss": 0.2532, "step": 5784 }, { "epoch": 0.27, "grad_norm": 0.9189858035343479, "learning_rate": 1.7224539860498282e-05, "loss": 0.5291, "step": 5785 }, { "epoch": 0.27, "grad_norm": 0.8813741482748013, "learning_rate": 1.7223511004901822e-05, "loss": 0.5108, "step": 5786 }, { "epoch": 0.27, "grad_norm": 0.41102166488470704, "learning_rate": 1.722248198938379e-05, "loss": 0.3309, "step": 5787 }, { "epoch": 0.27, "grad_norm": 0.32237967802251427, "learning_rate": 1.722145281396697e-05, "loss": 0.2662, "step": 5788 }, { "epoch": 0.27, "grad_norm": 0.42583782811837856, "learning_rate": 1.7220423478674143e-05, "loss": 0.2792, "step": 5789 }, { "epoch": 0.27, "grad_norm": 0.4132686269014517, "learning_rate": 1.72193939835281e-05, "loss": 0.3214, "step": 5790 }, { "epoch": 0.27, "grad_norm": 0.9032654133647734, "learning_rate": 1.7218364328551635e-05, "loss": 0.4587, "step": 5791 }, { "epoch": 0.27, "grad_norm": 0.38153176557901086, "learning_rate": 1.7217334513767538e-05, "loss": 0.2955, "step": 5792 }, { "epoch": 0.27, "grad_norm": 0.37079660264385733, "learning_rate": 1.721630453919861e-05, "loss": 0.2496, "step": 5793 }, { "epoch": 0.27, "grad_norm": 1.0746624865275438, "learning_rate": 1.7215274404867654e-05, "loss": 0.3372, "step": 5794 }, { "epoch": 0.27, "grad_norm": 0.5883931719571168, "learning_rate": 1.721424411079748e-05, "loss": 0.3618, "step": 5795 }, { "epoch": 0.27, "grad_norm": 0.38883734381367896, "learning_rate": 1.7213213657010896e-05, "loss": 0.3388, "step": 5796 }, { "epoch": 0.27, "grad_norm": 0.35261168000140575, "learning_rate": 1.721218304353071e-05, "loss": 0.2495, "step": 5797 }, { "epoch": 0.27, "grad_norm": 0.3782987484267413, "learning_rate": 1.7211152270379745e-05, "loss": 0.2499, "step": 5798 }, { "epoch": 0.27, "grad_norm": 0.4598594174131196, "learning_rate": 1.7210121337580818e-05, "loss": 0.3261, "step": 5799 }, { "epoch": 0.27, "grad_norm": 0.3873382160467594, "learning_rate": 1.7209090245156753e-05, "loss": 0.3042, "step": 5800 }, { "epoch": 0.27, "grad_norm": 0.7672132406200473, "learning_rate": 1.720805899313038e-05, "loss": 0.3922, "step": 5801 }, { "epoch": 0.27, "grad_norm": 0.3591073766703548, "learning_rate": 1.7207027581524524e-05, "loss": 0.2865, "step": 5802 }, { "epoch": 0.27, "grad_norm": 0.5236412895694764, "learning_rate": 1.720599601036203e-05, "loss": 0.435, "step": 5803 }, { "epoch": 0.27, "grad_norm": 0.26953839278131897, "learning_rate": 1.7204964279665722e-05, "loss": 0.1768, "step": 5804 }, { "epoch": 0.27, "grad_norm": 0.30742273712711765, "learning_rate": 1.7203932389458455e-05, "loss": 0.2436, "step": 5805 }, { "epoch": 0.27, "grad_norm": 0.9557747287363251, "learning_rate": 1.7202900339763066e-05, "loss": 0.597, "step": 5806 }, { "epoch": 0.27, "grad_norm": 0.5364717619313458, "learning_rate": 1.7201868130602405e-05, "loss": 0.3133, "step": 5807 }, { "epoch": 0.27, "grad_norm": 0.3620703099182744, "learning_rate": 1.7200835761999325e-05, "loss": 0.2779, "step": 5808 }, { "epoch": 0.27, "grad_norm": 1.2175619693003903, "learning_rate": 1.7199803233976683e-05, "loss": 0.7602, "step": 5809 }, { "epoch": 0.27, "grad_norm": 0.31839467340084004, "learning_rate": 1.7198770546557337e-05, "loss": 0.2199, "step": 5810 }, { "epoch": 0.27, "grad_norm": 0.35701777888281405, "learning_rate": 1.7197737699764148e-05, "loss": 0.2316, "step": 5811 }, { "epoch": 0.27, "grad_norm": 0.425016871666638, "learning_rate": 1.7196704693619985e-05, "loss": 0.329, "step": 5812 }, { "epoch": 0.27, "grad_norm": 0.6701916109531195, "learning_rate": 1.7195671528147712e-05, "loss": 0.4309, "step": 5813 }, { "epoch": 0.27, "grad_norm": 0.3948717405906899, "learning_rate": 1.7194638203370212e-05, "loss": 0.2223, "step": 5814 }, { "epoch": 0.27, "grad_norm": 1.2256942530519768, "learning_rate": 1.7193604719310352e-05, "loss": 0.7725, "step": 5815 }, { "epoch": 0.27, "grad_norm": 0.3028268025501044, "learning_rate": 1.719257107599102e-05, "loss": 0.2441, "step": 5816 }, { "epoch": 0.27, "grad_norm": 0.29925141493278345, "learning_rate": 1.7191537273435098e-05, "loss": 0.1791, "step": 5817 }, { "epoch": 0.27, "grad_norm": 0.64299092472545, "learning_rate": 1.719050331166547e-05, "loss": 0.4657, "step": 5818 }, { "epoch": 0.27, "grad_norm": 0.3766146370633808, "learning_rate": 1.7189469190705027e-05, "loss": 0.3451, "step": 5819 }, { "epoch": 0.27, "grad_norm": 0.3829537997146153, "learning_rate": 1.7188434910576667e-05, "loss": 0.2215, "step": 5820 }, { "epoch": 0.27, "grad_norm": 1.3214843597360413, "learning_rate": 1.7187400471303285e-05, "loss": 0.8656, "step": 5821 }, { "epoch": 0.27, "grad_norm": 0.4949355759743745, "learning_rate": 1.7186365872907787e-05, "loss": 0.2233, "step": 5822 }, { "epoch": 0.27, "grad_norm": 0.271422659124242, "learning_rate": 1.718533111541307e-05, "loss": 0.2192, "step": 5823 }, { "epoch": 0.27, "grad_norm": 0.4502434979928553, "learning_rate": 1.7184296198842055e-05, "loss": 0.309, "step": 5824 }, { "epoch": 0.27, "grad_norm": 0.861666700479389, "learning_rate": 1.7183261123217644e-05, "loss": 0.4755, "step": 5825 }, { "epoch": 0.27, "grad_norm": 0.37327434428736245, "learning_rate": 1.718222588856275e-05, "loss": 0.2514, "step": 5826 }, { "epoch": 0.27, "grad_norm": 1.2286720664493243, "learning_rate": 1.71811904949003e-05, "loss": 0.5646, "step": 5827 }, { "epoch": 0.27, "grad_norm": 0.33393699519502734, "learning_rate": 1.7180154942253216e-05, "loss": 0.2762, "step": 5828 }, { "epoch": 0.27, "grad_norm": 0.4325059664873086, "learning_rate": 1.717911923064442e-05, "loss": 0.2711, "step": 5829 }, { "epoch": 0.27, "grad_norm": 0.8094255389578909, "learning_rate": 1.7178083360096844e-05, "loss": 0.261, "step": 5830 }, { "epoch": 0.27, "grad_norm": 0.4332613296392825, "learning_rate": 1.717704733063342e-05, "loss": 0.3482, "step": 5831 }, { "epoch": 0.27, "grad_norm": 0.5066992236696453, "learning_rate": 1.7176011142277086e-05, "loss": 0.3211, "step": 5832 }, { "epoch": 0.27, "grad_norm": 0.7439765878280497, "learning_rate": 1.7174974795050782e-05, "loss": 0.346, "step": 5833 }, { "epoch": 0.27, "grad_norm": 0.6234978757292291, "learning_rate": 1.7173938288977452e-05, "loss": 0.4406, "step": 5834 }, { "epoch": 0.27, "grad_norm": 0.45017634078879964, "learning_rate": 1.717290162408004e-05, "loss": 0.2895, "step": 5835 }, { "epoch": 0.27, "grad_norm": 0.5012433321945928, "learning_rate": 1.7171864800381502e-05, "loss": 0.3469, "step": 5836 }, { "epoch": 0.27, "grad_norm": 0.21279027224363434, "learning_rate": 1.7170827817904787e-05, "loss": 0.0712, "step": 5837 }, { "epoch": 0.27, "grad_norm": 0.4013895858292727, "learning_rate": 1.716979067667286e-05, "loss": 0.2712, "step": 5838 }, { "epoch": 0.27, "grad_norm": 0.5904687876023449, "learning_rate": 1.7168753376708673e-05, "loss": 0.428, "step": 5839 }, { "epoch": 0.27, "grad_norm": 0.48353108877238155, "learning_rate": 1.71677159180352e-05, "loss": 0.2929, "step": 5840 }, { "epoch": 0.27, "grad_norm": 0.3557112288052493, "learning_rate": 1.7166678300675398e-05, "loss": 0.2895, "step": 5841 }, { "epoch": 0.27, "grad_norm": 0.3616229942847358, "learning_rate": 1.716564052465225e-05, "loss": 0.2292, "step": 5842 }, { "epoch": 0.27, "grad_norm": 0.3621274955800064, "learning_rate": 1.7164602589988728e-05, "loss": 0.2557, "step": 5843 }, { "epoch": 0.27, "grad_norm": 0.37199458099465066, "learning_rate": 1.716356449670781e-05, "loss": 0.3001, "step": 5844 }, { "epoch": 0.27, "grad_norm": 1.5768103943665102, "learning_rate": 1.716252624483248e-05, "loss": 0.7813, "step": 5845 }, { "epoch": 0.27, "grad_norm": 0.7930134840988093, "learning_rate": 1.7161487834385715e-05, "loss": 0.4029, "step": 5846 }, { "epoch": 0.27, "grad_norm": 0.38940351610650864, "learning_rate": 1.716044926539052e-05, "loss": 0.2844, "step": 5847 }, { "epoch": 0.27, "grad_norm": 0.6200257502722679, "learning_rate": 1.7159410537869873e-05, "loss": 0.3911, "step": 5848 }, { "epoch": 0.27, "grad_norm": 0.3000337809963599, "learning_rate": 1.715837165184678e-05, "loss": 0.1673, "step": 5849 }, { "epoch": 0.27, "grad_norm": 0.39463446809050373, "learning_rate": 1.715733260734424e-05, "loss": 0.2032, "step": 5850 }, { "epoch": 0.27, "grad_norm": 0.45259417730496887, "learning_rate": 1.715629340438525e-05, "loss": 0.3353, "step": 5851 }, { "epoch": 0.27, "grad_norm": 0.8668755986058073, "learning_rate": 1.7155254042992827e-05, "loss": 0.4244, "step": 5852 }, { "epoch": 0.27, "grad_norm": 0.4159204373056371, "learning_rate": 1.7154214523189972e-05, "loss": 0.2308, "step": 5853 }, { "epoch": 0.27, "grad_norm": 0.5479494069423346, "learning_rate": 1.71531748449997e-05, "loss": 0.3447, "step": 5854 }, { "epoch": 0.27, "grad_norm": 0.3224629679964485, "learning_rate": 1.7152135008445037e-05, "loss": 0.2353, "step": 5855 }, { "epoch": 0.27, "grad_norm": 0.35217299699708465, "learning_rate": 1.7151095013548996e-05, "loss": 0.2112, "step": 5856 }, { "epoch": 0.27, "grad_norm": 1.2285932683964818, "learning_rate": 1.7150054860334605e-05, "loss": 0.5675, "step": 5857 }, { "epoch": 0.27, "grad_norm": 0.677607561893836, "learning_rate": 1.714901454882489e-05, "loss": 0.4906, "step": 5858 }, { "epoch": 0.27, "grad_norm": 0.3286106397018948, "learning_rate": 1.714797407904288e-05, "loss": 0.2325, "step": 5859 }, { "epoch": 0.27, "grad_norm": 0.5446666253228418, "learning_rate": 1.7146933451011617e-05, "loss": 0.4133, "step": 5860 }, { "epoch": 0.27, "grad_norm": 0.31587820781403214, "learning_rate": 1.7145892664754133e-05, "loss": 0.1858, "step": 5861 }, { "epoch": 0.27, "grad_norm": 0.4927012177450915, "learning_rate": 1.7144851720293473e-05, "loss": 0.3047, "step": 5862 }, { "epoch": 0.27, "grad_norm": 0.47065933721900155, "learning_rate": 1.7143810617652682e-05, "loss": 0.2839, "step": 5863 }, { "epoch": 0.27, "grad_norm": 0.6183371403105031, "learning_rate": 1.714276935685481e-05, "loss": 0.4062, "step": 5864 }, { "epoch": 0.27, "grad_norm": 0.3945464366476125, "learning_rate": 1.7141727937922912e-05, "loss": 0.2879, "step": 5865 }, { "epoch": 0.27, "grad_norm": 0.7058531087662331, "learning_rate": 1.7140686360880036e-05, "loss": 0.3724, "step": 5866 }, { "epoch": 0.27, "grad_norm": 0.3434273443611105, "learning_rate": 1.713964462574925e-05, "loss": 0.2326, "step": 5867 }, { "epoch": 0.27, "grad_norm": 0.4973094408858428, "learning_rate": 1.713860273255361e-05, "loss": 0.304, "step": 5868 }, { "epoch": 0.27, "grad_norm": 0.4709200764103284, "learning_rate": 1.7137560681316186e-05, "loss": 0.3315, "step": 5869 }, { "epoch": 0.27, "grad_norm": 0.8171774224953345, "learning_rate": 1.713651847206005e-05, "loss": 0.5632, "step": 5870 }, { "epoch": 0.27, "grad_norm": 0.43955160899274065, "learning_rate": 1.7135476104808272e-05, "loss": 0.2804, "step": 5871 }, { "epoch": 0.27, "grad_norm": 0.35955418292862135, "learning_rate": 1.713443357958393e-05, "loss": 0.2526, "step": 5872 }, { "epoch": 0.27, "grad_norm": 0.37398628019480346, "learning_rate": 1.7133390896410106e-05, "loss": 0.2334, "step": 5873 }, { "epoch": 0.27, "grad_norm": 0.5366213156376702, "learning_rate": 1.7132348055309883e-05, "loss": 0.4113, "step": 5874 }, { "epoch": 0.27, "grad_norm": 0.327035655663534, "learning_rate": 1.713130505630635e-05, "loss": 0.2857, "step": 5875 }, { "epoch": 0.27, "grad_norm": 0.7560665579329858, "learning_rate": 1.713026189942259e-05, "loss": 0.3955, "step": 5876 }, { "epoch": 0.27, "grad_norm": 0.40662581397332515, "learning_rate": 1.712921858468171e-05, "loss": 0.2842, "step": 5877 }, { "epoch": 0.27, "grad_norm": 0.5805503852268482, "learning_rate": 1.71281751121068e-05, "loss": 0.4002, "step": 5878 }, { "epoch": 0.27, "grad_norm": 0.33117583284620183, "learning_rate": 1.712713148172096e-05, "loss": 0.2203, "step": 5879 }, { "epoch": 0.27, "grad_norm": 0.2829564150260896, "learning_rate": 1.71260876935473e-05, "loss": 0.2386, "step": 5880 }, { "epoch": 0.27, "grad_norm": 0.5673148020543327, "learning_rate": 1.712504374760893e-05, "loss": 0.397, "step": 5881 }, { "epoch": 0.27, "grad_norm": 0.9633938669381699, "learning_rate": 1.7123999643928956e-05, "loss": 0.5792, "step": 5882 }, { "epoch": 0.27, "grad_norm": 0.30280809128875374, "learning_rate": 1.7122955382530496e-05, "loss": 0.2618, "step": 5883 }, { "epoch": 0.27, "grad_norm": 0.5748785898886367, "learning_rate": 1.7121910963436667e-05, "loss": 0.4187, "step": 5884 }, { "epoch": 0.27, "grad_norm": 0.4027072858325578, "learning_rate": 1.7120866386670596e-05, "loss": 0.2073, "step": 5885 }, { "epoch": 0.27, "grad_norm": 0.45659649518877693, "learning_rate": 1.711982165225541e-05, "loss": 0.326, "step": 5886 }, { "epoch": 0.27, "grad_norm": 0.4084976178266795, "learning_rate": 1.711877676021423e-05, "loss": 0.3211, "step": 5887 }, { "epoch": 0.27, "grad_norm": 1.013471602239002, "learning_rate": 1.7117731710570195e-05, "loss": 0.646, "step": 5888 }, { "epoch": 0.27, "grad_norm": 0.2144267759206389, "learning_rate": 1.711668650334644e-05, "loss": 0.1026, "step": 5889 }, { "epoch": 0.27, "grad_norm": 0.4474988324901726, "learning_rate": 1.71156411385661e-05, "loss": 0.3286, "step": 5890 }, { "epoch": 0.27, "grad_norm": 0.40574563565382427, "learning_rate": 1.7114595616252333e-05, "loss": 0.3439, "step": 5891 }, { "epoch": 0.27, "grad_norm": 0.582548240989252, "learning_rate": 1.7113549936428268e-05, "loss": 0.2617, "step": 5892 }, { "epoch": 0.27, "grad_norm": 0.3260827288793383, "learning_rate": 1.7112504099117068e-05, "loss": 0.2732, "step": 5893 }, { "epoch": 0.27, "grad_norm": 1.2391769374618, "learning_rate": 1.711145810434188e-05, "loss": 0.8635, "step": 5894 }, { "epoch": 0.27, "grad_norm": 0.29249206923785054, "learning_rate": 1.7110411952125865e-05, "loss": 0.1999, "step": 5895 }, { "epoch": 0.27, "grad_norm": 0.40388406598616816, "learning_rate": 1.710936564249218e-05, "loss": 0.3259, "step": 5896 }, { "epoch": 0.27, "grad_norm": 0.7120424548109571, "learning_rate": 1.7108319175463995e-05, "loss": 0.4804, "step": 5897 }, { "epoch": 0.27, "grad_norm": 0.3619483429348296, "learning_rate": 1.710727255106447e-05, "loss": 0.2629, "step": 5898 }, { "epoch": 0.27, "grad_norm": 0.42753785654649634, "learning_rate": 1.7106225769316787e-05, "loss": 0.3433, "step": 5899 }, { "epoch": 0.27, "grad_norm": 0.561893768698309, "learning_rate": 1.7105178830244114e-05, "loss": 0.4462, "step": 5900 }, { "epoch": 0.27, "grad_norm": 0.2688047528381382, "learning_rate": 1.7104131733869626e-05, "loss": 0.1582, "step": 5901 }, { "epoch": 0.27, "grad_norm": 0.42863858502218144, "learning_rate": 1.710308448021651e-05, "loss": 0.2429, "step": 5902 }, { "epoch": 0.27, "grad_norm": 0.41429441121269206, "learning_rate": 1.710203706930795e-05, "loss": 0.3384, "step": 5903 }, { "epoch": 0.27, "grad_norm": 0.5679975406900668, "learning_rate": 1.7100989501167132e-05, "loss": 0.3788, "step": 5904 }, { "epoch": 0.27, "grad_norm": 0.39518832098371826, "learning_rate": 1.7099941775817254e-05, "loss": 0.2571, "step": 5905 }, { "epoch": 0.27, "grad_norm": 0.39310314822991255, "learning_rate": 1.7098893893281506e-05, "loss": 0.3205, "step": 5906 }, { "epoch": 0.27, "grad_norm": 0.3376500534049663, "learning_rate": 1.709784585358309e-05, "loss": 0.2525, "step": 5907 }, { "epoch": 0.27, "grad_norm": 0.3682762590735687, "learning_rate": 1.709679765674521e-05, "loss": 0.2259, "step": 5908 }, { "epoch": 0.27, "grad_norm": 0.6716331436295712, "learning_rate": 1.7095749302791067e-05, "loss": 0.4441, "step": 5909 }, { "epoch": 0.27, "grad_norm": 0.8088737547134324, "learning_rate": 1.7094700791743872e-05, "loss": 0.4461, "step": 5910 }, { "epoch": 0.27, "grad_norm": 0.2960270430871615, "learning_rate": 1.709365212362684e-05, "loss": 0.2436, "step": 5911 }, { "epoch": 0.27, "grad_norm": 1.2843195248368082, "learning_rate": 1.709260329846319e-05, "loss": 0.8546, "step": 5912 }, { "epoch": 0.27, "grad_norm": 0.31189513533251706, "learning_rate": 1.7091554316276138e-05, "loss": 0.1724, "step": 5913 }, { "epoch": 0.27, "grad_norm": 0.4602971756173651, "learning_rate": 1.7090505177088906e-05, "loss": 0.3303, "step": 5914 }, { "epoch": 0.27, "grad_norm": 0.43344008028450426, "learning_rate": 1.7089455880924724e-05, "loss": 0.2971, "step": 5915 }, { "epoch": 0.27, "grad_norm": 0.7778317419096797, "learning_rate": 1.7088406427806825e-05, "loss": 0.3723, "step": 5916 }, { "epoch": 0.27, "grad_norm": 0.38608544339889544, "learning_rate": 1.708735681775844e-05, "loss": 0.3147, "step": 5917 }, { "epoch": 0.27, "grad_norm": 0.46509658491909134, "learning_rate": 1.70863070508028e-05, "loss": 0.3325, "step": 5918 }, { "epoch": 0.27, "grad_norm": 0.320389087123723, "learning_rate": 1.7085257126963154e-05, "loss": 0.2496, "step": 5919 }, { "epoch": 0.27, "grad_norm": 0.32782006316907925, "learning_rate": 1.7084207046262745e-05, "loss": 0.2529, "step": 5920 }, { "epoch": 0.27, "grad_norm": 0.5490537286663046, "learning_rate": 1.7083156808724817e-05, "loss": 0.3929, "step": 5921 }, { "epoch": 0.27, "grad_norm": 0.468908744682117, "learning_rate": 1.708210641437263e-05, "loss": 0.3265, "step": 5922 }, { "epoch": 0.27, "grad_norm": 0.38405776769562694, "learning_rate": 1.708105586322943e-05, "loss": 0.3092, "step": 5923 }, { "epoch": 0.27, "grad_norm": 0.8326960327561136, "learning_rate": 1.7080005155318476e-05, "loss": 0.5483, "step": 5924 }, { "epoch": 0.27, "grad_norm": 0.43608237402044575, "learning_rate": 1.7078954290663033e-05, "loss": 0.2795, "step": 5925 }, { "epoch": 0.27, "grad_norm": 0.37808866573357075, "learning_rate": 1.7077903269286366e-05, "loss": 0.3198, "step": 5926 }, { "epoch": 0.27, "grad_norm": 0.379720954350531, "learning_rate": 1.707685209121174e-05, "loss": 0.2869, "step": 5927 }, { "epoch": 0.27, "grad_norm": 0.5178370784686631, "learning_rate": 1.707580075646243e-05, "loss": 0.1615, "step": 5928 }, { "epoch": 0.27, "grad_norm": 0.39359873889018226, "learning_rate": 1.7074749265061714e-05, "loss": 0.3271, "step": 5929 }, { "epoch": 0.27, "grad_norm": 0.5957345151167208, "learning_rate": 1.7073697617032867e-05, "loss": 0.4118, "step": 5930 }, { "epoch": 0.27, "grad_norm": 0.4582023081405017, "learning_rate": 1.7072645812399172e-05, "loss": 0.3029, "step": 5931 }, { "epoch": 0.27, "grad_norm": 0.39460801200391943, "learning_rate": 1.7071593851183915e-05, "loss": 0.3449, "step": 5932 }, { "epoch": 0.27, "grad_norm": 0.32835350756640214, "learning_rate": 1.7070541733410383e-05, "loss": 0.167, "step": 5933 }, { "epoch": 0.27, "grad_norm": 0.34307047054451734, "learning_rate": 1.7069489459101876e-05, "loss": 0.2585, "step": 5934 }, { "epoch": 0.27, "grad_norm": 0.4075466560552005, "learning_rate": 1.7068437028281683e-05, "loss": 0.316, "step": 5935 }, { "epoch": 0.27, "grad_norm": 1.4424457538623434, "learning_rate": 1.7067384440973106e-05, "loss": 0.932, "step": 5936 }, { "epoch": 0.27, "grad_norm": 0.6437089694640437, "learning_rate": 1.7066331697199454e-05, "loss": 0.4623, "step": 5937 }, { "epoch": 0.27, "grad_norm": 0.3838995757116502, "learning_rate": 1.7065278796984027e-05, "loss": 0.2125, "step": 5938 }, { "epoch": 0.27, "grad_norm": 0.2518315059890443, "learning_rate": 1.7064225740350137e-05, "loss": 0.2132, "step": 5939 }, { "epoch": 0.27, "grad_norm": 0.9210562135600497, "learning_rate": 1.7063172527321096e-05, "loss": 0.4485, "step": 5940 }, { "epoch": 0.27, "grad_norm": 0.36665256513667205, "learning_rate": 1.7062119157920222e-05, "loss": 0.2339, "step": 5941 }, { "epoch": 0.27, "grad_norm": 0.41921546425270406, "learning_rate": 1.706106563217084e-05, "loss": 0.3369, "step": 5942 }, { "epoch": 0.27, "grad_norm": 0.7684850884279384, "learning_rate": 1.706001195009627e-05, "loss": 0.4465, "step": 5943 }, { "epoch": 0.27, "grad_norm": 0.350611482249508, "learning_rate": 1.7058958111719836e-05, "loss": 0.2499, "step": 5944 }, { "epoch": 0.27, "grad_norm": 0.31333883509907856, "learning_rate": 1.7057904117064875e-05, "loss": 0.2046, "step": 5945 }, { "epoch": 0.27, "grad_norm": 0.4747905032968039, "learning_rate": 1.705684996615472e-05, "loss": 0.3294, "step": 5946 }, { "epoch": 0.27, "grad_norm": 0.3962397597331546, "learning_rate": 1.7055795659012707e-05, "loss": 0.2201, "step": 5947 }, { "epoch": 0.27, "grad_norm": 0.8090891394958526, "learning_rate": 1.705474119566218e-05, "loss": 0.5365, "step": 5948 }, { "epoch": 0.27, "grad_norm": 0.7227864126728298, "learning_rate": 1.7053686576126482e-05, "loss": 0.4698, "step": 5949 }, { "epoch": 0.27, "grad_norm": 0.4635124470831637, "learning_rate": 1.7052631800428962e-05, "loss": 0.2808, "step": 5950 }, { "epoch": 0.27, "grad_norm": 0.39777661899904093, "learning_rate": 1.705157686859297e-05, "loss": 0.2728, "step": 5951 }, { "epoch": 0.27, "grad_norm": 0.3408020935369634, "learning_rate": 1.7050521780641864e-05, "loss": 0.2248, "step": 5952 }, { "epoch": 0.27, "grad_norm": 0.4014637825287904, "learning_rate": 1.7049466536599e-05, "loss": 0.2662, "step": 5953 }, { "epoch": 0.27, "grad_norm": 0.6751528364714424, "learning_rate": 1.7048411136487743e-05, "loss": 0.3141, "step": 5954 }, { "epoch": 0.27, "grad_norm": 0.6026289779540165, "learning_rate": 1.7047355580331457e-05, "loss": 0.4155, "step": 5955 }, { "epoch": 0.27, "grad_norm": 0.43519046763094116, "learning_rate": 1.7046299868153507e-05, "loss": 0.2816, "step": 5956 }, { "epoch": 0.27, "grad_norm": 0.24149123979846776, "learning_rate": 1.7045243999977274e-05, "loss": 0.1473, "step": 5957 }, { "epoch": 0.27, "grad_norm": 0.40930465318030845, "learning_rate": 1.7044187975826126e-05, "loss": 0.3579, "step": 5958 }, { "epoch": 0.27, "grad_norm": 0.4054815146348783, "learning_rate": 1.7043131795723446e-05, "loss": 0.2759, "step": 5959 }, { "epoch": 0.27, "grad_norm": 0.6401056722694114, "learning_rate": 1.7042075459692616e-05, "loss": 0.3513, "step": 5960 }, { "epoch": 0.27, "grad_norm": 1.335019752203884, "learning_rate": 1.7041018967757024e-05, "loss": 0.6319, "step": 5961 }, { "epoch": 0.27, "grad_norm": 0.38521141886876703, "learning_rate": 1.7039962319940054e-05, "loss": 0.275, "step": 5962 }, { "epoch": 0.27, "grad_norm": 0.39669759543479416, "learning_rate": 1.703890551626511e-05, "loss": 0.3629, "step": 5963 }, { "epoch": 0.27, "grad_norm": 0.3299941671127926, "learning_rate": 1.703784855675558e-05, "loss": 0.1072, "step": 5964 }, { "epoch": 0.27, "grad_norm": 0.3700914505473078, "learning_rate": 1.7036791441434864e-05, "loss": 0.3039, "step": 5965 }, { "epoch": 0.27, "grad_norm": 0.48813217712176504, "learning_rate": 1.703573417032637e-05, "loss": 0.3655, "step": 5966 }, { "epoch": 0.27, "grad_norm": 0.4843069303760202, "learning_rate": 1.70346767434535e-05, "loss": 0.3022, "step": 5967 }, { "epoch": 0.27, "grad_norm": 0.39623930683239766, "learning_rate": 1.703361916083967e-05, "loss": 0.2911, "step": 5968 }, { "epoch": 0.27, "grad_norm": 1.2998819153745773, "learning_rate": 1.7032561422508288e-05, "loss": 0.7017, "step": 5969 }, { "epoch": 0.27, "grad_norm": 0.32507802534360075, "learning_rate": 1.7031503528482774e-05, "loss": 0.2098, "step": 5970 }, { "epoch": 0.27, "grad_norm": 0.35439757017275464, "learning_rate": 1.703044547878655e-05, "loss": 0.298, "step": 5971 }, { "epoch": 0.27, "grad_norm": 0.6740460811168439, "learning_rate": 1.702938727344304e-05, "loss": 0.4836, "step": 5972 }, { "epoch": 0.27, "grad_norm": 0.4855944208006054, "learning_rate": 1.702832891247567e-05, "loss": 0.2979, "step": 5973 }, { "epoch": 0.27, "grad_norm": 0.4731371035031592, "learning_rate": 1.7027270395907872e-05, "loss": 0.296, "step": 5974 }, { "epoch": 0.27, "grad_norm": 0.41276602074770175, "learning_rate": 1.702621172376308e-05, "loss": 0.3397, "step": 5975 }, { "epoch": 0.27, "grad_norm": 0.4361444034100218, "learning_rate": 1.7025152896064733e-05, "loss": 0.3226, "step": 5976 }, { "epoch": 0.27, "grad_norm": 0.37249748123834836, "learning_rate": 1.702409391283627e-05, "loss": 0.2229, "step": 5977 }, { "epoch": 0.27, "grad_norm": 0.3936272414823332, "learning_rate": 1.702303477410114e-05, "loss": 0.317, "step": 5978 }, { "epoch": 0.27, "grad_norm": 0.47408399348132163, "learning_rate": 1.7021975479882788e-05, "loss": 0.2565, "step": 5979 }, { "epoch": 0.27, "grad_norm": 0.4040780028913679, "learning_rate": 1.702091603020467e-05, "loss": 0.1798, "step": 5980 }, { "epoch": 0.27, "grad_norm": 0.5793933469561242, "learning_rate": 1.7019856425090233e-05, "loss": 0.3687, "step": 5981 }, { "epoch": 0.27, "grad_norm": 0.4041240082953772, "learning_rate": 1.701879666456294e-05, "loss": 0.3447, "step": 5982 }, { "epoch": 0.27, "grad_norm": 0.3936935373938144, "learning_rate": 1.701773674864626e-05, "loss": 0.2519, "step": 5983 }, { "epoch": 0.27, "grad_norm": 0.8124025409919884, "learning_rate": 1.7016676677363646e-05, "loss": 0.5801, "step": 5984 }, { "epoch": 0.27, "grad_norm": 0.32712509727414457, "learning_rate": 1.7015616450738575e-05, "loss": 0.197, "step": 5985 }, { "epoch": 0.27, "grad_norm": 0.3090090016451351, "learning_rate": 1.701455606879452e-05, "loss": 0.2398, "step": 5986 }, { "epoch": 0.28, "grad_norm": 1.0961125156859128, "learning_rate": 1.7013495531554952e-05, "loss": 0.6021, "step": 5987 }, { "epoch": 0.28, "grad_norm": 0.834353473295938, "learning_rate": 1.7012434839043353e-05, "loss": 0.4405, "step": 5988 }, { "epoch": 0.28, "grad_norm": 0.44065027462902717, "learning_rate": 1.7011373991283204e-05, "loss": 0.3054, "step": 5989 }, { "epoch": 0.28, "grad_norm": 0.38746338192129737, "learning_rate": 1.7010312988297993e-05, "loss": 0.2756, "step": 5990 }, { "epoch": 0.28, "grad_norm": 0.32675984416050463, "learning_rate": 1.7009251830111214e-05, "loss": 0.2067, "step": 5991 }, { "epoch": 0.28, "grad_norm": 0.6536680117197015, "learning_rate": 1.7008190516746348e-05, "loss": 0.3086, "step": 5992 }, { "epoch": 0.28, "grad_norm": 0.51514725544651, "learning_rate": 1.7007129048226903e-05, "loss": 0.2821, "step": 5993 }, { "epoch": 0.28, "grad_norm": 0.5207101334407666, "learning_rate": 1.7006067424576372e-05, "loss": 0.3537, "step": 5994 }, { "epoch": 0.28, "grad_norm": 0.7161801081282118, "learning_rate": 1.7005005645818262e-05, "loss": 0.3367, "step": 5995 }, { "epoch": 0.28, "grad_norm": 0.4244158576760907, "learning_rate": 1.7003943711976077e-05, "loss": 0.2895, "step": 5996 }, { "epoch": 0.28, "grad_norm": 0.26219144911785597, "learning_rate": 1.700288162307333e-05, "loss": 0.1817, "step": 5997 }, { "epoch": 0.28, "grad_norm": 0.4951344654928905, "learning_rate": 1.7001819379133532e-05, "loss": 0.3341, "step": 5998 }, { "epoch": 0.28, "grad_norm": 0.3726495835641448, "learning_rate": 1.7000756980180206e-05, "loss": 0.3028, "step": 5999 }, { "epoch": 0.28, "grad_norm": 0.7759250923457206, "learning_rate": 1.6999694426236862e-05, "loss": 0.5065, "step": 6000 }, { "epoch": 0.28, "grad_norm": 0.668703465881885, "learning_rate": 1.6998631717327034e-05, "loss": 0.3654, "step": 6001 }, { "epoch": 0.28, "grad_norm": 0.37960069988963036, "learning_rate": 1.6997568853474244e-05, "loss": 0.29, "step": 6002 }, { "epoch": 0.28, "grad_norm": 0.2023504059861079, "learning_rate": 1.699650583470202e-05, "loss": 0.1109, "step": 6003 }, { "epoch": 0.28, "grad_norm": 0.45500710574339603, "learning_rate": 1.6995442661033905e-05, "loss": 0.2884, "step": 6004 }, { "epoch": 0.28, "grad_norm": 0.516264511830306, "learning_rate": 1.699437933249343e-05, "loss": 0.3589, "step": 6005 }, { "epoch": 0.28, "grad_norm": 0.453949929285188, "learning_rate": 1.699331584910414e-05, "loss": 0.3158, "step": 6006 }, { "epoch": 0.28, "grad_norm": 0.4407652820726819, "learning_rate": 1.6992252210889573e-05, "loss": 0.2931, "step": 6007 }, { "epoch": 0.28, "grad_norm": 0.5893706782684486, "learning_rate": 1.6991188417873284e-05, "loss": 0.394, "step": 6008 }, { "epoch": 0.28, "grad_norm": 0.24325209210070836, "learning_rate": 1.699012447007882e-05, "loss": 0.1546, "step": 6009 }, { "epoch": 0.28, "grad_norm": 0.4578150028039045, "learning_rate": 1.698906036752974e-05, "loss": 0.3077, "step": 6010 }, { "epoch": 0.28, "grad_norm": 0.38129347866487756, "learning_rate": 1.6987996110249598e-05, "loss": 0.3161, "step": 6011 }, { "epoch": 0.28, "grad_norm": 0.6261919988439754, "learning_rate": 1.698693169826196e-05, "loss": 0.3825, "step": 6012 }, { "epoch": 0.28, "grad_norm": 0.6577505177874339, "learning_rate": 1.6985867131590383e-05, "loss": 0.3495, "step": 6013 }, { "epoch": 0.28, "grad_norm": 0.33937185162094, "learning_rate": 1.698480241025845e-05, "loss": 0.2895, "step": 6014 }, { "epoch": 0.28, "grad_norm": 0.4413707928373477, "learning_rate": 1.6983737534289714e-05, "loss": 0.2673, "step": 6015 }, { "epoch": 0.28, "grad_norm": 0.49356171285747646, "learning_rate": 1.6982672503707762e-05, "loss": 0.2618, "step": 6016 }, { "epoch": 0.28, "grad_norm": 0.33354465622688945, "learning_rate": 1.698160731853617e-05, "loss": 0.3051, "step": 6017 }, { "epoch": 0.28, "grad_norm": 0.42406956238714966, "learning_rate": 1.6980541978798528e-05, "loss": 0.3579, "step": 6018 }, { "epoch": 0.28, "grad_norm": 0.4118361159173712, "learning_rate": 1.6979476484518408e-05, "loss": 0.2122, "step": 6019 }, { "epoch": 0.28, "grad_norm": 0.4653993626995571, "learning_rate": 1.6978410835719407e-05, "loss": 0.3143, "step": 6020 }, { "epoch": 0.28, "grad_norm": 0.4493591437459993, "learning_rate": 1.6977345032425113e-05, "loss": 0.2791, "step": 6021 }, { "epoch": 0.28, "grad_norm": 0.32919852786738657, "learning_rate": 1.697627907465913e-05, "loss": 0.2663, "step": 6022 }, { "epoch": 0.28, "grad_norm": 0.41225887895580565, "learning_rate": 1.697521296244505e-05, "loss": 0.311, "step": 6023 }, { "epoch": 0.28, "grad_norm": 0.6160853133769265, "learning_rate": 1.6974146695806476e-05, "loss": 0.3151, "step": 6024 }, { "epoch": 0.28, "grad_norm": 0.325981469415452, "learning_rate": 1.697308027476702e-05, "loss": 0.2359, "step": 6025 }, { "epoch": 0.28, "grad_norm": 0.4237713675844246, "learning_rate": 1.6972013699350285e-05, "loss": 0.3347, "step": 6026 }, { "epoch": 0.28, "grad_norm": 0.7650524675386389, "learning_rate": 1.6970946969579888e-05, "loss": 0.5469, "step": 6027 }, { "epoch": 0.28, "grad_norm": 0.593426899966432, "learning_rate": 1.696988008547944e-05, "loss": 0.3911, "step": 6028 }, { "epoch": 0.28, "grad_norm": 0.28153683964539106, "learning_rate": 1.6968813047072567e-05, "loss": 0.1793, "step": 6029 }, { "epoch": 0.28, "grad_norm": 0.30450963292171274, "learning_rate": 1.6967745854382893e-05, "loss": 0.262, "step": 6030 }, { "epoch": 0.28, "grad_norm": 1.8843173363275398, "learning_rate": 1.6966678507434035e-05, "loss": 0.8055, "step": 6031 }, { "epoch": 0.28, "grad_norm": 0.36372672819028323, "learning_rate": 1.6965611006249635e-05, "loss": 0.2339, "step": 6032 }, { "epoch": 0.28, "grad_norm": 0.5611469696920824, "learning_rate": 1.6964543350853316e-05, "loss": 0.3845, "step": 6033 }, { "epoch": 0.28, "grad_norm": 0.47126325851231976, "learning_rate": 1.6963475541268723e-05, "loss": 0.3809, "step": 6034 }, { "epoch": 0.28, "grad_norm": 0.3065203273828383, "learning_rate": 1.6962407577519492e-05, "loss": 0.1819, "step": 6035 }, { "epoch": 0.28, "grad_norm": 0.4491202114938126, "learning_rate": 1.696133945962927e-05, "loss": 0.2491, "step": 6036 }, { "epoch": 0.28, "grad_norm": 0.4840605326678392, "learning_rate": 1.6960271187621696e-05, "loss": 0.3743, "step": 6037 }, { "epoch": 0.28, "grad_norm": 0.33831501323115126, "learning_rate": 1.6959202761520432e-05, "loss": 0.2472, "step": 6038 }, { "epoch": 0.28, "grad_norm": 0.7609414065011277, "learning_rate": 1.6958134181349123e-05, "loss": 0.4609, "step": 6039 }, { "epoch": 0.28, "grad_norm": 0.5736951071401374, "learning_rate": 1.6957065447131432e-05, "loss": 0.452, "step": 6040 }, { "epoch": 0.28, "grad_norm": 0.4035008562619525, "learning_rate": 1.6955996558891015e-05, "loss": 0.2844, "step": 6041 }, { "epoch": 0.28, "grad_norm": 0.28519964042492596, "learning_rate": 1.695492751665154e-05, "loss": 0.2066, "step": 6042 }, { "epoch": 0.28, "grad_norm": 0.8209423128244802, "learning_rate": 1.6953858320436673e-05, "loss": 0.5503, "step": 6043 }, { "epoch": 0.28, "grad_norm": 0.38490571150414665, "learning_rate": 1.6952788970270083e-05, "loss": 0.2692, "step": 6044 }, { "epoch": 0.28, "grad_norm": 0.6216794434170183, "learning_rate": 1.6951719466175446e-05, "loss": 0.3147, "step": 6045 }, { "epoch": 0.28, "grad_norm": 0.8719709012471768, "learning_rate": 1.6950649808176443e-05, "loss": 0.3836, "step": 6046 }, { "epoch": 0.28, "grad_norm": 0.40274786221622766, "learning_rate": 1.694957999629675e-05, "loss": 0.304, "step": 6047 }, { "epoch": 0.28, "grad_norm": 0.2557647704909251, "learning_rate": 1.6948510030560057e-05, "loss": 0.1307, "step": 6048 }, { "epoch": 0.28, "grad_norm": 0.5486500709570629, "learning_rate": 1.694743991099005e-05, "loss": 0.35, "step": 6049 }, { "epoch": 0.28, "grad_norm": 0.45517943040362524, "learning_rate": 1.6946369637610413e-05, "loss": 0.2936, "step": 6050 }, { "epoch": 0.28, "grad_norm": 1.5907954591387232, "learning_rate": 1.6945299210444854e-05, "loss": 0.5075, "step": 6051 }, { "epoch": 0.28, "grad_norm": 0.8558424429360821, "learning_rate": 1.694422862951706e-05, "loss": 0.3208, "step": 6052 }, { "epoch": 0.28, "grad_norm": 0.3339334154799586, "learning_rate": 1.694315789485074e-05, "loss": 0.2779, "step": 6053 }, { "epoch": 0.28, "grad_norm": 0.41894491336564543, "learning_rate": 1.6942087006469593e-05, "loss": 0.3042, "step": 6054 }, { "epoch": 0.28, "grad_norm": 0.3447293429355375, "learning_rate": 1.6941015964397335e-05, "loss": 0.1583, "step": 6055 }, { "epoch": 0.28, "grad_norm": 0.8016445606098862, "learning_rate": 1.6939944768657667e-05, "loss": 0.3299, "step": 6056 }, { "epoch": 0.28, "grad_norm": 1.0262480428351024, "learning_rate": 1.6938873419274317e-05, "loss": 0.3908, "step": 6057 }, { "epoch": 0.28, "grad_norm": 0.5059157558725144, "learning_rate": 1.6937801916270994e-05, "loss": 0.2791, "step": 6058 }, { "epoch": 0.28, "grad_norm": 0.41307188278869295, "learning_rate": 1.6936730259671423e-05, "loss": 0.3029, "step": 6059 }, { "epoch": 0.28, "grad_norm": 0.2878714921238245, "learning_rate": 1.693565844949933e-05, "loss": 0.1378, "step": 6060 }, { "epoch": 0.28, "grad_norm": 0.348318136016693, "learning_rate": 1.6934586485778447e-05, "loss": 0.2673, "step": 6061 }, { "epoch": 0.28, "grad_norm": 0.44898234027538114, "learning_rate": 1.69335143685325e-05, "loss": 0.2887, "step": 6062 }, { "epoch": 0.28, "grad_norm": 0.827880079550586, "learning_rate": 1.6932442097785223e-05, "loss": 0.4794, "step": 6063 }, { "epoch": 0.28, "grad_norm": 1.1689612534022173, "learning_rate": 1.6931369673560366e-05, "loss": 0.5734, "step": 6064 }, { "epoch": 0.28, "grad_norm": 0.41124638834998045, "learning_rate": 1.693029709588166e-05, "loss": 0.212, "step": 6065 }, { "epoch": 0.28, "grad_norm": 0.28697558803579654, "learning_rate": 1.692922436477286e-05, "loss": 0.2121, "step": 6066 }, { "epoch": 0.28, "grad_norm": 0.734994815884358, "learning_rate": 1.6928151480257714e-05, "loss": 0.529, "step": 6067 }, { "epoch": 0.28, "grad_norm": 0.38106499627715945, "learning_rate": 1.6927078442359964e-05, "loss": 0.266, "step": 6068 }, { "epoch": 0.28, "grad_norm": 0.4141706436414943, "learning_rate": 1.6926005251103375e-05, "loss": 0.3363, "step": 6069 }, { "epoch": 0.28, "grad_norm": 1.4455654506801, "learning_rate": 1.6924931906511712e-05, "loss": 0.6979, "step": 6070 }, { "epoch": 0.28, "grad_norm": 0.40147789886089097, "learning_rate": 1.6923858408608725e-05, "loss": 0.2373, "step": 6071 }, { "epoch": 0.28, "grad_norm": 0.5687400302165845, "learning_rate": 1.6922784757418186e-05, "loss": 0.2791, "step": 6072 }, { "epoch": 0.28, "grad_norm": 0.39216835347918527, "learning_rate": 1.6921710952963865e-05, "loss": 0.3428, "step": 6073 }, { "epoch": 0.28, "grad_norm": 0.39676697199576405, "learning_rate": 1.6920636995269538e-05, "loss": 0.2624, "step": 6074 }, { "epoch": 0.28, "grad_norm": 1.3725132668730633, "learning_rate": 1.6919562884358974e-05, "loss": 0.8211, "step": 6075 }, { "epoch": 0.28, "grad_norm": 0.40003579464518224, "learning_rate": 1.691848862025596e-05, "loss": 0.2199, "step": 6076 }, { "epoch": 0.28, "grad_norm": 0.41450842589664544, "learning_rate": 1.6917414202984276e-05, "loss": 0.2788, "step": 6077 }, { "epoch": 0.28, "grad_norm": 0.4211359398473587, "learning_rate": 1.6916339632567708e-05, "loss": 0.2763, "step": 6078 }, { "epoch": 0.28, "grad_norm": 0.5568112877740296, "learning_rate": 1.6915264909030045e-05, "loss": 0.4786, "step": 6079 }, { "epoch": 0.28, "grad_norm": 0.47680523131497715, "learning_rate": 1.691419003239508e-05, "loss": 0.2764, "step": 6080 }, { "epoch": 0.28, "grad_norm": 0.2917364824866476, "learning_rate": 1.691311500268662e-05, "loss": 0.274, "step": 6081 }, { "epoch": 0.28, "grad_norm": 0.29228890359227333, "learning_rate": 1.691203981992845e-05, "loss": 0.1753, "step": 6082 }, { "epoch": 0.28, "grad_norm": 0.4614316109945287, "learning_rate": 1.691096448414438e-05, "loss": 0.274, "step": 6083 }, { "epoch": 0.28, "grad_norm": 0.45011371996749644, "learning_rate": 1.6909888995358218e-05, "loss": 0.3118, "step": 6084 }, { "epoch": 0.28, "grad_norm": 0.4074407229846666, "learning_rate": 1.6908813353593777e-05, "loss": 0.3055, "step": 6085 }, { "epoch": 0.28, "grad_norm": 0.41330223814334716, "learning_rate": 1.6907737558874865e-05, "loss": 0.3149, "step": 6086 }, { "epoch": 0.28, "grad_norm": 0.2545919589818811, "learning_rate": 1.69066616112253e-05, "loss": 0.1596, "step": 6087 }, { "epoch": 0.28, "grad_norm": 1.3876838952017077, "learning_rate": 1.69055855106689e-05, "loss": 0.5954, "step": 6088 }, { "epoch": 0.28, "grad_norm": 0.34892759320725875, "learning_rate": 1.69045092572295e-05, "loss": 0.2682, "step": 6089 }, { "epoch": 0.28, "grad_norm": 0.46515044839801595, "learning_rate": 1.6903432850930917e-05, "loss": 0.3354, "step": 6090 }, { "epoch": 0.28, "grad_norm": 0.6463762831254736, "learning_rate": 1.6902356291796988e-05, "loss": 0.3963, "step": 6091 }, { "epoch": 0.28, "grad_norm": 0.37110478885632686, "learning_rate": 1.690127957985154e-05, "loss": 0.2721, "step": 6092 }, { "epoch": 0.28, "grad_norm": 0.5141544951089635, "learning_rate": 1.6900202715118408e-05, "loss": 0.3917, "step": 6093 }, { "epoch": 0.28, "grad_norm": 0.2299420957358207, "learning_rate": 1.6899125697621442e-05, "loss": 0.1728, "step": 6094 }, { "epoch": 0.28, "grad_norm": 0.4430942682785905, "learning_rate": 1.6898048527384486e-05, "loss": 0.3086, "step": 6095 }, { "epoch": 0.28, "grad_norm": 0.6352324692695902, "learning_rate": 1.6896971204431383e-05, "loss": 0.3896, "step": 6096 }, { "epoch": 0.28, "grad_norm": 0.3987868557716098, "learning_rate": 1.6895893728785982e-05, "loss": 0.2843, "step": 6097 }, { "epoch": 0.28, "grad_norm": 0.42551849940246234, "learning_rate": 1.6894816100472145e-05, "loss": 0.2786, "step": 6098 }, { "epoch": 0.28, "grad_norm": 0.549571389187052, "learning_rate": 1.689373831951372e-05, "loss": 0.4, "step": 6099 }, { "epoch": 0.28, "grad_norm": 0.2723232488536985, "learning_rate": 1.6892660385934573e-05, "loss": 0.18, "step": 6100 }, { "epoch": 0.28, "grad_norm": 0.44022394434425827, "learning_rate": 1.689158229975857e-05, "loss": 0.2978, "step": 6101 }, { "epoch": 0.28, "grad_norm": 0.4916255036387172, "learning_rate": 1.689050406100958e-05, "loss": 0.4022, "step": 6102 }, { "epoch": 0.28, "grad_norm": 1.0689724710598505, "learning_rate": 1.6889425669711465e-05, "loss": 0.6996, "step": 6103 }, { "epoch": 0.28, "grad_norm": 0.3789200014912838, "learning_rate": 1.688834712588811e-05, "loss": 0.2177, "step": 6104 }, { "epoch": 0.28, "grad_norm": 0.3720698295016904, "learning_rate": 1.6887268429563387e-05, "loss": 0.3069, "step": 6105 }, { "epoch": 0.28, "grad_norm": 0.3285228171657959, "learning_rate": 1.6886189580761182e-05, "loss": 0.2083, "step": 6106 }, { "epoch": 0.28, "grad_norm": 0.3871790844958291, "learning_rate": 1.6885110579505376e-05, "loss": 0.2608, "step": 6107 }, { "epoch": 0.28, "grad_norm": 0.5183039018569343, "learning_rate": 1.6884031425819852e-05, "loss": 0.3943, "step": 6108 }, { "epoch": 0.28, "grad_norm": 0.3744984451984501, "learning_rate": 1.6882952119728513e-05, "loss": 0.3436, "step": 6109 }, { "epoch": 0.28, "grad_norm": 0.34136526441459136, "learning_rate": 1.6881872661255246e-05, "loss": 0.2276, "step": 6110 }, { "epoch": 0.28, "grad_norm": 1.482088844935787, "learning_rate": 1.6880793050423953e-05, "loss": 0.8034, "step": 6111 }, { "epoch": 0.28, "grad_norm": 0.358724382271073, "learning_rate": 1.687971328725853e-05, "loss": 0.2686, "step": 6112 }, { "epoch": 0.28, "grad_norm": 0.3051728609100929, "learning_rate": 1.6878633371782888e-05, "loss": 0.2411, "step": 6113 }, { "epoch": 0.28, "grad_norm": 0.3740353912766072, "learning_rate": 1.6877553304020932e-05, "loss": 0.2909, "step": 6114 }, { "epoch": 0.28, "grad_norm": 1.188675630684188, "learning_rate": 1.6876473083996577e-05, "loss": 0.7182, "step": 6115 }, { "epoch": 0.28, "grad_norm": 0.8364849318520382, "learning_rate": 1.6875392711733734e-05, "loss": 0.4632, "step": 6116 }, { "epoch": 0.28, "grad_norm": 0.31318200167225707, "learning_rate": 1.687431218725632e-05, "loss": 0.2528, "step": 6117 }, { "epoch": 0.28, "grad_norm": 0.6102266840681903, "learning_rate": 1.687323151058826e-05, "loss": 0.4819, "step": 6118 }, { "epoch": 0.28, "grad_norm": 0.5123954609596111, "learning_rate": 1.6872150681753483e-05, "loss": 0.3207, "step": 6119 }, { "epoch": 0.28, "grad_norm": 0.25815428061903417, "learning_rate": 1.6871069700775914e-05, "loss": 0.177, "step": 6120 }, { "epoch": 0.28, "grad_norm": 0.43121083925553133, "learning_rate": 1.686998856767948e-05, "loss": 0.309, "step": 6121 }, { "epoch": 0.28, "grad_norm": 0.6689713108866191, "learning_rate": 1.6868907282488123e-05, "loss": 0.3585, "step": 6122 }, { "epoch": 0.28, "grad_norm": 0.39755458221148543, "learning_rate": 1.6867825845225775e-05, "loss": 0.2721, "step": 6123 }, { "epoch": 0.28, "grad_norm": 0.5917405356812365, "learning_rate": 1.686674425591639e-05, "loss": 0.3943, "step": 6124 }, { "epoch": 0.28, "grad_norm": 0.40517840206435146, "learning_rate": 1.68656625145839e-05, "loss": 0.3089, "step": 6125 }, { "epoch": 0.28, "grad_norm": 0.31816780513752596, "learning_rate": 1.686458062125226e-05, "loss": 0.2008, "step": 6126 }, { "epoch": 0.28, "grad_norm": 0.4010119293325127, "learning_rate": 1.686349857594542e-05, "loss": 0.2866, "step": 6127 }, { "epoch": 0.28, "grad_norm": 0.4827422719525275, "learning_rate": 1.686241637868734e-05, "loss": 0.3284, "step": 6128 }, { "epoch": 0.28, "grad_norm": 0.42813336158300624, "learning_rate": 1.686133402950197e-05, "loss": 0.2933, "step": 6129 }, { "epoch": 0.28, "grad_norm": 0.7179846045814073, "learning_rate": 1.6860251528413282e-05, "loss": 0.3707, "step": 6130 }, { "epoch": 0.28, "grad_norm": 0.48624409515335804, "learning_rate": 1.6859168875445236e-05, "loss": 0.2945, "step": 6131 }, { "epoch": 0.28, "grad_norm": 0.33958895094849484, "learning_rate": 1.68580860706218e-05, "loss": 0.1913, "step": 6132 }, { "epoch": 0.28, "grad_norm": 0.42909751293322845, "learning_rate": 1.685700311396695e-05, "loss": 0.2727, "step": 6133 }, { "epoch": 0.28, "grad_norm": 0.9082685050114968, "learning_rate": 1.685592000550466e-05, "loss": 0.5663, "step": 6134 }, { "epoch": 0.28, "grad_norm": 0.4111659462283185, "learning_rate": 1.685483674525891e-05, "loss": 0.3172, "step": 6135 }, { "epoch": 0.28, "grad_norm": 0.42576452363918815, "learning_rate": 1.685375333325368e-05, "loss": 0.3157, "step": 6136 }, { "epoch": 0.28, "grad_norm": 0.7430281881367897, "learning_rate": 1.685266976951296e-05, "loss": 0.4179, "step": 6137 }, { "epoch": 0.28, "grad_norm": 0.3274380332259152, "learning_rate": 1.6851586054060734e-05, "loss": 0.2378, "step": 6138 }, { "epoch": 0.28, "grad_norm": 0.2979767455354676, "learning_rate": 1.6850502186920998e-05, "loss": 0.1382, "step": 6139 }, { "epoch": 0.28, "grad_norm": 0.4076645942048964, "learning_rate": 1.684941816811774e-05, "loss": 0.291, "step": 6140 }, { "epoch": 0.28, "grad_norm": 0.33549334423819066, "learning_rate": 1.684833399767497e-05, "loss": 0.3082, "step": 6141 }, { "epoch": 0.28, "grad_norm": 0.6949932639774373, "learning_rate": 1.6847249675616685e-05, "loss": 0.4677, "step": 6142 }, { "epoch": 0.28, "grad_norm": 0.40695497359213256, "learning_rate": 1.684616520196689e-05, "loss": 0.1946, "step": 6143 }, { "epoch": 0.28, "grad_norm": 0.33476583211033745, "learning_rate": 1.6845080576749597e-05, "loss": 0.2583, "step": 6144 }, { "epoch": 0.28, "grad_norm": 0.32779572957822056, "learning_rate": 1.684399579998882e-05, "loss": 0.2542, "step": 6145 }, { "epoch": 0.28, "grad_norm": 0.420084232412222, "learning_rate": 1.6842910871708567e-05, "loss": 0.2605, "step": 6146 }, { "epoch": 0.28, "grad_norm": 0.4490139826704272, "learning_rate": 1.684182579193287e-05, "loss": 0.3492, "step": 6147 }, { "epoch": 0.28, "grad_norm": 0.4775241914190654, "learning_rate": 1.684074056068574e-05, "loss": 0.3807, "step": 6148 }, { "epoch": 0.28, "grad_norm": 0.5671926803838095, "learning_rate": 1.683965517799121e-05, "loss": 0.2295, "step": 6149 }, { "epoch": 0.28, "grad_norm": 0.4400387959390509, "learning_rate": 1.6838569643873298e-05, "loss": 0.3444, "step": 6150 }, { "epoch": 0.28, "grad_norm": 0.2887850133710706, "learning_rate": 1.6837483958356054e-05, "loss": 0.223, "step": 6151 }, { "epoch": 0.28, "grad_norm": 0.3737983166875972, "learning_rate": 1.6836398121463503e-05, "loss": 0.2646, "step": 6152 }, { "epoch": 0.28, "grad_norm": 0.41767497772136275, "learning_rate": 1.6835312133219686e-05, "loss": 0.3283, "step": 6153 }, { "epoch": 0.28, "grad_norm": 0.7531149947347106, "learning_rate": 1.6834225993648645e-05, "loss": 0.5021, "step": 6154 }, { "epoch": 0.28, "grad_norm": 1.6108136139877414, "learning_rate": 1.6833139702774427e-05, "loss": 0.761, "step": 6155 }, { "epoch": 0.28, "grad_norm": 0.34446432557055656, "learning_rate": 1.6832053260621087e-05, "loss": 0.217, "step": 6156 }, { "epoch": 0.28, "grad_norm": 0.37263497557451325, "learning_rate": 1.6830966667212666e-05, "loss": 0.2816, "step": 6157 }, { "epoch": 0.28, "grad_norm": 0.9423084149383641, "learning_rate": 1.682987992257323e-05, "loss": 0.498, "step": 6158 }, { "epoch": 0.28, "grad_norm": 0.388627776621658, "learning_rate": 1.6828793026726832e-05, "loss": 0.2476, "step": 6159 }, { "epoch": 0.28, "grad_norm": 0.4172312075947139, "learning_rate": 1.6827705979697543e-05, "loss": 0.3124, "step": 6160 }, { "epoch": 0.28, "grad_norm": 0.4529089485767976, "learning_rate": 1.6826618781509424e-05, "loss": 0.3475, "step": 6161 }, { "epoch": 0.28, "grad_norm": 0.3372153196100555, "learning_rate": 1.6825531432186545e-05, "loss": 0.2345, "step": 6162 }, { "epoch": 0.28, "grad_norm": 0.4599082257205266, "learning_rate": 1.6824443931752975e-05, "loss": 0.3293, "step": 6163 }, { "epoch": 0.28, "grad_norm": 0.38903388561941266, "learning_rate": 1.6823356280232796e-05, "loss": 0.3224, "step": 6164 }, { "epoch": 0.28, "grad_norm": 0.396568310019041, "learning_rate": 1.682226847765008e-05, "loss": 0.2574, "step": 6165 }, { "epoch": 0.28, "grad_norm": 0.4503887692573361, "learning_rate": 1.6821180524028923e-05, "loss": 0.3259, "step": 6166 }, { "epoch": 0.28, "grad_norm": 0.8104955755343283, "learning_rate": 1.6820092419393402e-05, "loss": 0.4164, "step": 6167 }, { "epoch": 0.28, "grad_norm": 0.4170117764541688, "learning_rate": 1.6819004163767603e-05, "loss": 0.2852, "step": 6168 }, { "epoch": 0.28, "grad_norm": 0.40572143292102075, "learning_rate": 1.681791575717563e-05, "loss": 0.3082, "step": 6169 }, { "epoch": 0.28, "grad_norm": 0.7091611557836884, "learning_rate": 1.681682719964157e-05, "loss": 0.5656, "step": 6170 }, { "epoch": 0.28, "grad_norm": 0.33634916313020513, "learning_rate": 1.6815738491189527e-05, "loss": 0.2604, "step": 6171 }, { "epoch": 0.28, "grad_norm": 0.2682924595643512, "learning_rate": 1.6814649631843604e-05, "loss": 0.1954, "step": 6172 }, { "epoch": 0.28, "grad_norm": 1.0169714608082723, "learning_rate": 1.6813560621627907e-05, "loss": 0.6078, "step": 6173 }, { "epoch": 0.28, "grad_norm": 0.37130390016311093, "learning_rate": 1.681247146056654e-05, "loss": 0.2723, "step": 6174 }, { "epoch": 0.28, "grad_norm": 0.7861306371018686, "learning_rate": 1.6811382148683627e-05, "loss": 0.379, "step": 6175 }, { "epoch": 0.28, "grad_norm": 0.4085449472360082, "learning_rate": 1.6810292686003275e-05, "loss": 0.3176, "step": 6176 }, { "epoch": 0.28, "grad_norm": 0.3986526865305402, "learning_rate": 1.680920307254961e-05, "loss": 0.3029, "step": 6177 }, { "epoch": 0.28, "grad_norm": 0.444549614434705, "learning_rate": 1.6808113308346744e-05, "loss": 0.2624, "step": 6178 }, { "epoch": 0.28, "grad_norm": 0.33771641695056204, "learning_rate": 1.6807023393418814e-05, "loss": 0.2158, "step": 6179 }, { "epoch": 0.28, "grad_norm": 0.33512782605013125, "learning_rate": 1.680593332778995e-05, "loss": 0.2657, "step": 6180 }, { "epoch": 0.28, "grad_norm": 0.9359137319840171, "learning_rate": 1.680484311148428e-05, "loss": 0.5217, "step": 6181 }, { "epoch": 0.28, "grad_norm": 0.6859444847448948, "learning_rate": 1.6803752744525944e-05, "loss": 0.3614, "step": 6182 }, { "epoch": 0.28, "grad_norm": 0.5275907568231512, "learning_rate": 1.6802662226939077e-05, "loss": 0.3205, "step": 6183 }, { "epoch": 0.28, "grad_norm": 0.32306567555137367, "learning_rate": 1.6801571558747827e-05, "loss": 0.2753, "step": 6184 }, { "epoch": 0.28, "grad_norm": 0.30279117848793197, "learning_rate": 1.6800480739976336e-05, "loss": 0.1034, "step": 6185 }, { "epoch": 0.28, "grad_norm": 0.42352297994000604, "learning_rate": 1.6799389770648757e-05, "loss": 0.312, "step": 6186 }, { "epoch": 0.28, "grad_norm": 1.268347074985257, "learning_rate": 1.6798298650789243e-05, "loss": 0.5, "step": 6187 }, { "epoch": 0.28, "grad_norm": 0.3486603125602323, "learning_rate": 1.6797207380421946e-05, "loss": 0.2772, "step": 6188 }, { "epoch": 0.28, "grad_norm": 0.3596056668236512, "learning_rate": 1.679611595957103e-05, "loss": 0.2817, "step": 6189 }, { "epoch": 0.28, "grad_norm": 0.3489577443385812, "learning_rate": 1.679502438826066e-05, "loss": 0.2316, "step": 6190 }, { "epoch": 0.28, "grad_norm": 0.9594459643418749, "learning_rate": 1.6793932666514993e-05, "loss": 0.6075, "step": 6191 }, { "epoch": 0.28, "grad_norm": 0.31062788415925635, "learning_rate": 1.679284079435821e-05, "loss": 0.2363, "step": 6192 }, { "epoch": 0.28, "grad_norm": 0.8074440562023991, "learning_rate": 1.679174877181448e-05, "loss": 0.4801, "step": 6193 }, { "epoch": 0.28, "grad_norm": 1.3176445324324617, "learning_rate": 1.6790656598907972e-05, "loss": 0.7438, "step": 6194 }, { "epoch": 0.28, "grad_norm": 0.35937571183002975, "learning_rate": 1.678956427566288e-05, "loss": 0.2186, "step": 6195 }, { "epoch": 0.28, "grad_norm": 0.39339232038619826, "learning_rate": 1.6788471802103373e-05, "loss": 0.3179, "step": 6196 }, { "epoch": 0.28, "grad_norm": 0.33535457427796556, "learning_rate": 1.6787379178253642e-05, "loss": 0.2181, "step": 6197 }, { "epoch": 0.28, "grad_norm": 0.38990069809666605, "learning_rate": 1.6786286404137878e-05, "loss": 0.2579, "step": 6198 }, { "epoch": 0.28, "grad_norm": 0.9990616457067675, "learning_rate": 1.678519347978028e-05, "loss": 0.6198, "step": 6199 }, { "epoch": 0.28, "grad_norm": 0.5088490275237567, "learning_rate": 1.678410040520503e-05, "loss": 0.3489, "step": 6200 }, { "epoch": 0.28, "grad_norm": 0.42355370918877006, "learning_rate": 1.678300718043634e-05, "loss": 0.215, "step": 6201 }, { "epoch": 0.28, "grad_norm": 0.3883425985063007, "learning_rate": 1.6781913805498407e-05, "loss": 0.2396, "step": 6202 }, { "epoch": 0.28, "grad_norm": 0.36173677180693203, "learning_rate": 1.6780820280415443e-05, "loss": 0.3148, "step": 6203 }, { "epoch": 0.29, "grad_norm": 0.44475274537331616, "learning_rate": 1.6779726605211647e-05, "loss": 0.2964, "step": 6204 }, { "epoch": 0.29, "grad_norm": 0.4406160184383901, "learning_rate": 1.6778632779911244e-05, "loss": 0.2867, "step": 6205 }, { "epoch": 0.29, "grad_norm": 1.571132556379544, "learning_rate": 1.677753880453844e-05, "loss": 0.8139, "step": 6206 }, { "epoch": 0.29, "grad_norm": 0.44379481212780597, "learning_rate": 1.6776444679117462e-05, "loss": 0.3072, "step": 6207 }, { "epoch": 0.29, "grad_norm": 0.3857561584044854, "learning_rate": 1.677535040367253e-05, "loss": 0.2548, "step": 6208 }, { "epoch": 0.29, "grad_norm": 0.4453954411026201, "learning_rate": 1.6774255978227868e-05, "loss": 0.3191, "step": 6209 }, { "epoch": 0.29, "grad_norm": 0.5031902846841962, "learning_rate": 1.6773161402807708e-05, "loss": 0.3194, "step": 6210 }, { "epoch": 0.29, "grad_norm": 0.36845310678394955, "learning_rate": 1.6772066677436286e-05, "loss": 0.1839, "step": 6211 }, { "epoch": 0.29, "grad_norm": 0.4569215786045307, "learning_rate": 1.6770971802137833e-05, "loss": 0.3375, "step": 6212 }, { "epoch": 0.29, "grad_norm": 0.378369893867642, "learning_rate": 1.676987677693659e-05, "loss": 0.2721, "step": 6213 }, { "epoch": 0.29, "grad_norm": 0.9140137734247527, "learning_rate": 1.67687816018568e-05, "loss": 0.3614, "step": 6214 }, { "epoch": 0.29, "grad_norm": 0.5566484769908658, "learning_rate": 1.6767686276922708e-05, "loss": 0.3893, "step": 6215 }, { "epoch": 0.29, "grad_norm": 0.32996133843398096, "learning_rate": 1.6766590802158567e-05, "loss": 0.2613, "step": 6216 }, { "epoch": 0.29, "grad_norm": 0.33123740142903274, "learning_rate": 1.6765495177588626e-05, "loss": 0.1714, "step": 6217 }, { "epoch": 0.29, "grad_norm": 1.1497671674929857, "learning_rate": 1.6764399403237142e-05, "loss": 0.5797, "step": 6218 }, { "epoch": 0.29, "grad_norm": 0.7003130140303847, "learning_rate": 1.6763303479128375e-05, "loss": 0.3869, "step": 6219 }, { "epoch": 0.29, "grad_norm": 0.37767927823520225, "learning_rate": 1.676220740528659e-05, "loss": 0.3077, "step": 6220 }, { "epoch": 0.29, "grad_norm": 0.8986653192060193, "learning_rate": 1.6761111181736046e-05, "loss": 0.4047, "step": 6221 }, { "epoch": 0.29, "grad_norm": 0.4121861433500796, "learning_rate": 1.676001480850102e-05, "loss": 0.1735, "step": 6222 }, { "epoch": 0.29, "grad_norm": 0.35794801121010156, "learning_rate": 1.675891828560578e-05, "loss": 0.2502, "step": 6223 }, { "epoch": 0.29, "grad_norm": 0.4697442372345894, "learning_rate": 1.6757821613074602e-05, "loss": 0.2861, "step": 6224 }, { "epoch": 0.29, "grad_norm": 0.4315078233040824, "learning_rate": 1.675672479093177e-05, "loss": 0.2932, "step": 6225 }, { "epoch": 0.29, "grad_norm": 0.6736215667094393, "learning_rate": 1.6755627819201565e-05, "loss": 0.4202, "step": 6226 }, { "epoch": 0.29, "grad_norm": 0.6467972990353513, "learning_rate": 1.6754530697908266e-05, "loss": 0.3264, "step": 6227 }, { "epoch": 0.29, "grad_norm": 0.3532752657948103, "learning_rate": 1.6753433427076172e-05, "loss": 0.2858, "step": 6228 }, { "epoch": 0.29, "grad_norm": 0.28755185743151107, "learning_rate": 1.675233600672957e-05, "loss": 0.2124, "step": 6229 }, { "epoch": 0.29, "grad_norm": 1.4080205338288985, "learning_rate": 1.6751238436892754e-05, "loss": 0.7149, "step": 6230 }, { "epoch": 0.29, "grad_norm": 0.42433156002072586, "learning_rate": 1.675014071759003e-05, "loss": 0.2412, "step": 6231 }, { "epoch": 0.29, "grad_norm": 0.39373225192887057, "learning_rate": 1.6749042848845693e-05, "loss": 0.3007, "step": 6232 }, { "epoch": 0.29, "grad_norm": 0.9977138063994544, "learning_rate": 1.6747944830684052e-05, "loss": 0.4683, "step": 6233 }, { "epoch": 0.29, "grad_norm": 0.3833623733107689, "learning_rate": 1.674684666312942e-05, "loss": 0.2225, "step": 6234 }, { "epoch": 0.29, "grad_norm": 0.30157982985527987, "learning_rate": 1.6745748346206102e-05, "loss": 0.1823, "step": 6235 }, { "epoch": 0.29, "grad_norm": 0.4053713004656121, "learning_rate": 1.6744649879938415e-05, "loss": 0.3436, "step": 6236 }, { "epoch": 0.29, "grad_norm": 0.4959815269437256, "learning_rate": 1.6743551264350685e-05, "loss": 0.2365, "step": 6237 }, { "epoch": 0.29, "grad_norm": 0.46964425240217167, "learning_rate": 1.674245249946723e-05, "loss": 0.352, "step": 6238 }, { "epoch": 0.29, "grad_norm": 0.38553237561564896, "learning_rate": 1.674135358531237e-05, "loss": 0.3262, "step": 6239 }, { "epoch": 0.29, "grad_norm": 0.3912276026898913, "learning_rate": 1.6740254521910444e-05, "loss": 0.1391, "step": 6240 }, { "epoch": 0.29, "grad_norm": 0.35203268476311156, "learning_rate": 1.6739155309285777e-05, "loss": 0.2674, "step": 6241 }, { "epoch": 0.29, "grad_norm": 0.5302204562081397, "learning_rate": 1.6738055947462708e-05, "loss": 0.3184, "step": 6242 }, { "epoch": 0.29, "grad_norm": 0.43525642485272226, "learning_rate": 1.6736956436465573e-05, "loss": 0.3481, "step": 6243 }, { "epoch": 0.29, "grad_norm": 0.34913289211700765, "learning_rate": 1.6735856776318717e-05, "loss": 0.2811, "step": 6244 }, { "epoch": 0.29, "grad_norm": 1.2958632343230958, "learning_rate": 1.673475696704648e-05, "loss": 0.6617, "step": 6245 }, { "epoch": 0.29, "grad_norm": 0.5950695357489686, "learning_rate": 1.6733657008673222e-05, "loss": 0.3777, "step": 6246 }, { "epoch": 0.29, "grad_norm": 0.2772659339893456, "learning_rate": 1.6732556901223285e-05, "loss": 0.2371, "step": 6247 }, { "epoch": 0.29, "grad_norm": 1.3638579828752004, "learning_rate": 1.6731456644721025e-05, "loss": 0.7876, "step": 6248 }, { "epoch": 0.29, "grad_norm": 0.5403711670639993, "learning_rate": 1.6730356239190803e-05, "loss": 0.3488, "step": 6249 }, { "epoch": 0.29, "grad_norm": 0.35083495921364105, "learning_rate": 1.6729255684656985e-05, "loss": 0.2192, "step": 6250 }, { "epoch": 0.29, "grad_norm": 0.4141459537445995, "learning_rate": 1.6728154981143926e-05, "loss": 0.3439, "step": 6251 }, { "epoch": 0.29, "grad_norm": 0.5811140674139026, "learning_rate": 1.6727054128676003e-05, "loss": 0.3093, "step": 6252 }, { "epoch": 0.29, "grad_norm": 0.30939466067042687, "learning_rate": 1.6725953127277583e-05, "loss": 0.1993, "step": 6253 }, { "epoch": 0.29, "grad_norm": 0.8788591395332983, "learning_rate": 1.6724851976973045e-05, "loss": 0.5366, "step": 6254 }, { "epoch": 0.29, "grad_norm": 0.4335156840167137, "learning_rate": 1.6723750677786766e-05, "loss": 0.3394, "step": 6255 }, { "epoch": 0.29, "grad_norm": 0.3888677893498946, "learning_rate": 1.672264922974313e-05, "loss": 0.3269, "step": 6256 }, { "epoch": 0.29, "grad_norm": 0.3866378932333481, "learning_rate": 1.672154763286652e-05, "loss": 0.1418, "step": 6257 }, { "epoch": 0.29, "grad_norm": 0.785775133398653, "learning_rate": 1.672044588718132e-05, "loss": 0.443, "step": 6258 }, { "epoch": 0.29, "grad_norm": 0.3234054495124574, "learning_rate": 1.6719343992711927e-05, "loss": 0.2352, "step": 6259 }, { "epoch": 0.29, "grad_norm": 0.41064044354499357, "learning_rate": 1.6718241949482728e-05, "loss": 0.3087, "step": 6260 }, { "epoch": 0.29, "grad_norm": 0.6730244103584585, "learning_rate": 1.6717139757518136e-05, "loss": 0.4425, "step": 6261 }, { "epoch": 0.29, "grad_norm": 0.3220522601637308, "learning_rate": 1.671603741684254e-05, "loss": 0.2334, "step": 6262 }, { "epoch": 0.29, "grad_norm": 0.40052128746084487, "learning_rate": 1.671493492748035e-05, "loss": 0.264, "step": 6263 }, { "epoch": 0.29, "grad_norm": 0.9299012571992346, "learning_rate": 1.671383228945597e-05, "loss": 0.4632, "step": 6264 }, { "epoch": 0.29, "grad_norm": 0.42619825889112395, "learning_rate": 1.6712729502793817e-05, "loss": 0.3148, "step": 6265 }, { "epoch": 0.29, "grad_norm": 0.7420893871804587, "learning_rate": 1.67116265675183e-05, "loss": 0.3607, "step": 6266 }, { "epoch": 0.29, "grad_norm": 0.3503878966888698, "learning_rate": 1.6710523483653843e-05, "loss": 0.3021, "step": 6267 }, { "epoch": 0.29, "grad_norm": 0.44673891266326704, "learning_rate": 1.6709420251224857e-05, "loss": 0.3383, "step": 6268 }, { "epoch": 0.29, "grad_norm": 0.34466338094666155, "learning_rate": 1.670831687025578e-05, "loss": 0.2059, "step": 6269 }, { "epoch": 0.29, "grad_norm": 0.3384130873697249, "learning_rate": 1.6707213340771028e-05, "loss": 0.1287, "step": 6270 }, { "epoch": 0.29, "grad_norm": 0.44415776855875705, "learning_rate": 1.6706109662795042e-05, "loss": 0.3424, "step": 6271 }, { "epoch": 0.29, "grad_norm": 0.4389914318106942, "learning_rate": 1.6705005836352252e-05, "loss": 0.3585, "step": 6272 }, { "epoch": 0.29, "grad_norm": 0.4681074547384896, "learning_rate": 1.670390186146709e-05, "loss": 0.3217, "step": 6273 }, { "epoch": 0.29, "grad_norm": 0.40079443358366845, "learning_rate": 1.6702797738164006e-05, "loss": 0.2801, "step": 6274 }, { "epoch": 0.29, "grad_norm": 0.26748558497279584, "learning_rate": 1.6701693466467438e-05, "loss": 0.2378, "step": 6275 }, { "epoch": 0.29, "grad_norm": 0.63155720708638, "learning_rate": 1.6700589046401838e-05, "loss": 0.1699, "step": 6276 }, { "epoch": 0.29, "grad_norm": 0.389828455653685, "learning_rate": 1.669948447799165e-05, "loss": 0.323, "step": 6277 }, { "epoch": 0.29, "grad_norm": 0.6006399960313791, "learning_rate": 1.669837976126134e-05, "loss": 0.4766, "step": 6278 }, { "epoch": 0.29, "grad_norm": 0.3396219087726241, "learning_rate": 1.6697274896235352e-05, "loss": 0.2598, "step": 6279 }, { "epoch": 0.29, "grad_norm": 0.4066565505831249, "learning_rate": 1.6696169882938155e-05, "loss": 0.3105, "step": 6280 }, { "epoch": 0.29, "grad_norm": 0.3528771325034826, "learning_rate": 1.669506472139421e-05, "loss": 0.1733, "step": 6281 }, { "epoch": 0.29, "grad_norm": 0.6296824056540992, "learning_rate": 1.6693959411627988e-05, "loss": 0.4715, "step": 6282 }, { "epoch": 0.29, "grad_norm": 0.2955500196311048, "learning_rate": 1.6692853953663952e-05, "loss": 0.23, "step": 6283 }, { "epoch": 0.29, "grad_norm": 0.7768370151595468, "learning_rate": 1.6691748347526583e-05, "loss": 0.5156, "step": 6284 }, { "epoch": 0.29, "grad_norm": 1.2803989173196917, "learning_rate": 1.6690642593240352e-05, "loss": 0.8643, "step": 6285 }, { "epoch": 0.29, "grad_norm": 0.3587370619822952, "learning_rate": 1.6689536690829747e-05, "loss": 0.2067, "step": 6286 }, { "epoch": 0.29, "grad_norm": 0.2596244476789631, "learning_rate": 1.6688430640319245e-05, "loss": 0.2299, "step": 6287 }, { "epoch": 0.29, "grad_norm": 0.7635178463698319, "learning_rate": 1.6687324441733334e-05, "loss": 0.4121, "step": 6288 }, { "epoch": 0.29, "grad_norm": 0.352632295180572, "learning_rate": 1.6686218095096506e-05, "loss": 0.2542, "step": 6289 }, { "epoch": 0.29, "grad_norm": 0.7364891852525304, "learning_rate": 1.6685111600433254e-05, "loss": 0.5289, "step": 6290 }, { "epoch": 0.29, "grad_norm": 0.33733571839356374, "learning_rate": 1.6684004957768074e-05, "loss": 0.2873, "step": 6291 }, { "epoch": 0.29, "grad_norm": 0.45746261266090393, "learning_rate": 1.6682898167125466e-05, "loss": 0.3198, "step": 6292 }, { "epoch": 0.29, "grad_norm": 0.24579898622521043, "learning_rate": 1.6681791228529935e-05, "loss": 0.129, "step": 6293 }, { "epoch": 0.29, "grad_norm": 0.8075615873550177, "learning_rate": 1.6680684142005982e-05, "loss": 0.4445, "step": 6294 }, { "epoch": 0.29, "grad_norm": 0.30063780559748843, "learning_rate": 1.6679576907578127e-05, "loss": 0.2545, "step": 6295 }, { "epoch": 0.29, "grad_norm": 0.493717360174348, "learning_rate": 1.6678469525270875e-05, "loss": 0.3204, "step": 6296 }, { "epoch": 0.29, "grad_norm": 1.5194390153763373, "learning_rate": 1.6677361995108744e-05, "loss": 0.8334, "step": 6297 }, { "epoch": 0.29, "grad_norm": 0.37078101996787816, "learning_rate": 1.6676254317116253e-05, "loss": 0.2861, "step": 6298 }, { "epoch": 0.29, "grad_norm": 0.3022909530929876, "learning_rate": 1.6675146491317925e-05, "loss": 0.2136, "step": 6299 }, { "epoch": 0.29, "grad_norm": 0.5895145806009565, "learning_rate": 1.667403851773829e-05, "loss": 0.3422, "step": 6300 }, { "epoch": 0.29, "grad_norm": 0.38695642650908796, "learning_rate": 1.6672930396401878e-05, "loss": 0.3004, "step": 6301 }, { "epoch": 0.29, "grad_norm": 1.067488679094624, "learning_rate": 1.667182212733321e-05, "loss": 0.446, "step": 6302 }, { "epoch": 0.29, "grad_norm": 0.41294349476655834, "learning_rate": 1.6670713710556836e-05, "loss": 0.3583, "step": 6303 }, { "epoch": 0.29, "grad_norm": 0.3805121091521622, "learning_rate": 1.666960514609729e-05, "loss": 0.2943, "step": 6304 }, { "epoch": 0.29, "grad_norm": 0.8663100037922389, "learning_rate": 1.666849643397911e-05, "loss": 0.5069, "step": 6305 }, { "epoch": 0.29, "grad_norm": 0.3082071252749816, "learning_rate": 1.666738757422685e-05, "loss": 0.2648, "step": 6306 }, { "epoch": 0.29, "grad_norm": 0.455373458543171, "learning_rate": 1.6666278566865056e-05, "loss": 0.3105, "step": 6307 }, { "epoch": 0.29, "grad_norm": 0.3925738256555642, "learning_rate": 1.6665169411918278e-05, "loss": 0.2662, "step": 6308 }, { "epoch": 0.29, "grad_norm": 0.5439166457188115, "learning_rate": 1.6664060109411072e-05, "loss": 0.2752, "step": 6309 }, { "epoch": 0.29, "grad_norm": 0.39004279682172305, "learning_rate": 1.6662950659368e-05, "loss": 0.2738, "step": 6310 }, { "epoch": 0.29, "grad_norm": 0.4535619052993781, "learning_rate": 1.666184106181362e-05, "loss": 0.3534, "step": 6311 }, { "epoch": 0.29, "grad_norm": 0.5175851342474954, "learning_rate": 1.6660731316772503e-05, "loss": 0.261, "step": 6312 }, { "epoch": 0.29, "grad_norm": 0.27573522809198897, "learning_rate": 1.665962142426921e-05, "loss": 0.2235, "step": 6313 }, { "epoch": 0.29, "grad_norm": 1.1629230784224807, "learning_rate": 1.6658511384328325e-05, "loss": 0.7181, "step": 6314 }, { "epoch": 0.29, "grad_norm": 0.3318162751432222, "learning_rate": 1.6657401196974405e-05, "loss": 0.2674, "step": 6315 }, { "epoch": 0.29, "grad_norm": 0.3459968045154157, "learning_rate": 1.6656290862232047e-05, "loss": 0.2638, "step": 6316 }, { "epoch": 0.29, "grad_norm": 0.9530287036238254, "learning_rate": 1.6655180380125823e-05, "loss": 0.4683, "step": 6317 }, { "epoch": 0.29, "grad_norm": 0.4835353857930054, "learning_rate": 1.6654069750680316e-05, "loss": 0.3997, "step": 6318 }, { "epoch": 0.29, "grad_norm": 0.2558805157049701, "learning_rate": 1.6652958973920122e-05, "loss": 0.1935, "step": 6319 }, { "epoch": 0.29, "grad_norm": 0.4554211813492704, "learning_rate": 1.6651848049869827e-05, "loss": 0.2762, "step": 6320 }, { "epoch": 0.29, "grad_norm": 1.3292099913978672, "learning_rate": 1.6650736978554028e-05, "loss": 0.8286, "step": 6321 }, { "epoch": 0.29, "grad_norm": 0.44992536777672276, "learning_rate": 1.6649625759997323e-05, "loss": 0.2347, "step": 6322 }, { "epoch": 0.29, "grad_norm": 0.4761697221142709, "learning_rate": 1.6648514394224312e-05, "loss": 0.3489, "step": 6323 }, { "epoch": 0.29, "grad_norm": 0.4676053973870956, "learning_rate": 1.66474028812596e-05, "loss": 0.3047, "step": 6324 }, { "epoch": 0.29, "grad_norm": 0.27494626712851433, "learning_rate": 1.6646291221127796e-05, "loss": 0.1426, "step": 6325 }, { "epoch": 0.29, "grad_norm": 0.5364449897066189, "learning_rate": 1.664517941385351e-05, "loss": 0.3772, "step": 6326 }, { "epoch": 0.29, "grad_norm": 0.430613294379969, "learning_rate": 1.6644067459461352e-05, "loss": 0.3259, "step": 6327 }, { "epoch": 0.29, "grad_norm": 0.4089229958897362, "learning_rate": 1.664295535797595e-05, "loss": 0.2233, "step": 6328 }, { "epoch": 0.29, "grad_norm": 0.5861851110480223, "learning_rate": 1.6641843109421913e-05, "loss": 0.3913, "step": 6329 }, { "epoch": 0.29, "grad_norm": 0.5321857399413813, "learning_rate": 1.6640730713823877e-05, "loss": 0.3773, "step": 6330 }, { "epoch": 0.29, "grad_norm": 0.37885220313592155, "learning_rate": 1.6639618171206458e-05, "loss": 0.2714, "step": 6331 }, { "epoch": 0.29, "grad_norm": 0.29796201483176976, "learning_rate": 1.6638505481594298e-05, "loss": 0.1663, "step": 6332 }, { "epoch": 0.29, "grad_norm": 0.8679850758011335, "learning_rate": 1.663739264501202e-05, "loss": 0.542, "step": 6333 }, { "epoch": 0.29, "grad_norm": 0.4190510430581646, "learning_rate": 1.6636279661484273e-05, "loss": 0.3362, "step": 6334 }, { "epoch": 0.29, "grad_norm": 0.32686844721063457, "learning_rate": 1.663516653103568e-05, "loss": 0.2784, "step": 6335 }, { "epoch": 0.29, "grad_norm": 1.04724465035836, "learning_rate": 1.6634053253690903e-05, "loss": 0.5535, "step": 6336 }, { "epoch": 0.29, "grad_norm": 0.310551281913641, "learning_rate": 1.6632939829474576e-05, "loss": 0.2248, "step": 6337 }, { "epoch": 0.29, "grad_norm": 0.5606145855846372, "learning_rate": 1.6631826258411358e-05, "loss": 0.3006, "step": 6338 }, { "epoch": 0.29, "grad_norm": 0.45055836410774835, "learning_rate": 1.6630712540525896e-05, "loss": 0.3416, "step": 6339 }, { "epoch": 0.29, "grad_norm": 0.39080085480333115, "learning_rate": 1.6629598675842854e-05, "loss": 0.2752, "step": 6340 }, { "epoch": 0.29, "grad_norm": 0.39700730166110776, "learning_rate": 1.6628484664386882e-05, "loss": 0.1905, "step": 6341 }, { "epoch": 0.29, "grad_norm": 0.4366012834277006, "learning_rate": 1.6627370506182648e-05, "loss": 0.3061, "step": 6342 }, { "epoch": 0.29, "grad_norm": 0.6440574917110293, "learning_rate": 1.662625620125482e-05, "loss": 0.2987, "step": 6343 }, { "epoch": 0.29, "grad_norm": 0.35728094277128986, "learning_rate": 1.6625141749628065e-05, "loss": 0.263, "step": 6344 }, { "epoch": 0.29, "grad_norm": 0.562164155255936, "learning_rate": 1.6624027151327057e-05, "loss": 0.3814, "step": 6345 }, { "epoch": 0.29, "grad_norm": 0.43906920824476925, "learning_rate": 1.6622912406376473e-05, "loss": 0.3024, "step": 6346 }, { "epoch": 0.29, "grad_norm": 0.33971591982119675, "learning_rate": 1.6621797514800993e-05, "loss": 0.2781, "step": 6347 }, { "epoch": 0.29, "grad_norm": 1.131439514793796, "learning_rate": 1.66206824766253e-05, "loss": 0.3736, "step": 6348 }, { "epoch": 0.29, "grad_norm": 0.5980276288514539, "learning_rate": 1.6619567291874075e-05, "loss": 0.3617, "step": 6349 }, { "epoch": 0.29, "grad_norm": 0.35631731997263694, "learning_rate": 1.6618451960572008e-05, "loss": 0.3174, "step": 6350 }, { "epoch": 0.29, "grad_norm": 0.41425111626751443, "learning_rate": 1.6617336482743795e-05, "loss": 0.3276, "step": 6351 }, { "epoch": 0.29, "grad_norm": 0.3922338544363831, "learning_rate": 1.6616220858414133e-05, "loss": 0.3006, "step": 6352 }, { "epoch": 0.29, "grad_norm": 0.2975966788566311, "learning_rate": 1.6615105087607713e-05, "loss": 0.1881, "step": 6353 }, { "epoch": 0.29, "grad_norm": 0.40539495399710473, "learning_rate": 1.6613989170349246e-05, "loss": 0.2697, "step": 6354 }, { "epoch": 0.29, "grad_norm": 0.4518923962727792, "learning_rate": 1.661287310666343e-05, "loss": 0.3097, "step": 6355 }, { "epoch": 0.29, "grad_norm": 0.5939946551618978, "learning_rate": 1.661175689657498e-05, "loss": 0.413, "step": 6356 }, { "epoch": 0.29, "grad_norm": 0.6031200865752705, "learning_rate": 1.6610640540108606e-05, "loss": 0.4633, "step": 6357 }, { "epoch": 0.29, "grad_norm": 0.3015549065368941, "learning_rate": 1.660952403728902e-05, "loss": 0.2345, "step": 6358 }, { "epoch": 0.29, "grad_norm": 0.3099986315859341, "learning_rate": 1.6608407388140943e-05, "loss": 0.2063, "step": 6359 }, { "epoch": 0.29, "grad_norm": 1.2604889669744475, "learning_rate": 1.6607290592689094e-05, "loss": 0.5545, "step": 6360 }, { "epoch": 0.29, "grad_norm": 0.40911853771604456, "learning_rate": 1.6606173650958203e-05, "loss": 0.1232, "step": 6361 }, { "epoch": 0.29, "grad_norm": 0.38924994562926674, "learning_rate": 1.660505656297299e-05, "loss": 0.3234, "step": 6362 }, { "epoch": 0.29, "grad_norm": 0.5997168965650626, "learning_rate": 1.6603939328758195e-05, "loss": 0.3821, "step": 6363 }, { "epoch": 0.29, "grad_norm": 0.445420043278386, "learning_rate": 1.6602821948338546e-05, "loss": 0.1303, "step": 6364 }, { "epoch": 0.29, "grad_norm": 0.3114484834562211, "learning_rate": 1.660170442173878e-05, "loss": 0.2131, "step": 6365 }, { "epoch": 0.29, "grad_norm": 0.4656108937759206, "learning_rate": 1.6600586748983642e-05, "loss": 0.346, "step": 6366 }, { "epoch": 0.29, "grad_norm": 0.3307842997711832, "learning_rate": 1.659946893009788e-05, "loss": 0.1186, "step": 6367 }, { "epoch": 0.29, "grad_norm": 0.50826475733387, "learning_rate": 1.6598350965106233e-05, "loss": 0.3489, "step": 6368 }, { "epoch": 0.29, "grad_norm": 1.0728728318204348, "learning_rate": 1.6597232854033453e-05, "loss": 0.6073, "step": 6369 }, { "epoch": 0.29, "grad_norm": 0.38497940963281974, "learning_rate": 1.65961145969043e-05, "loss": 0.3017, "step": 6370 }, { "epoch": 0.29, "grad_norm": 0.28089285923508095, "learning_rate": 1.6594996193743525e-05, "loss": 0.1894, "step": 6371 }, { "epoch": 0.29, "grad_norm": 0.47117422290957506, "learning_rate": 1.6593877644575892e-05, "loss": 0.2904, "step": 6372 }, { "epoch": 0.29, "grad_norm": 0.569869256934075, "learning_rate": 1.6592758949426162e-05, "loss": 0.3819, "step": 6373 }, { "epoch": 0.29, "grad_norm": 0.32219923007340284, "learning_rate": 1.6591640108319103e-05, "loss": 0.2698, "step": 6374 }, { "epoch": 0.29, "grad_norm": 0.47571440712025526, "learning_rate": 1.659052112127949e-05, "loss": 0.3982, "step": 6375 }, { "epoch": 0.29, "grad_norm": 0.6088639214926304, "learning_rate": 1.6589401988332087e-05, "loss": 0.3442, "step": 6376 }, { "epoch": 0.29, "grad_norm": 0.25203709549402803, "learning_rate": 1.6588282709501674e-05, "loss": 0.1595, "step": 6377 }, { "epoch": 0.29, "grad_norm": 0.433420173369272, "learning_rate": 1.6587163284813034e-05, "loss": 0.3675, "step": 6378 }, { "epoch": 0.29, "grad_norm": 0.7254860163934584, "learning_rate": 1.6586043714290946e-05, "loss": 0.4435, "step": 6379 }, { "epoch": 0.29, "grad_norm": 0.32955721812052813, "learning_rate": 1.6584923997960196e-05, "loss": 0.2515, "step": 6380 }, { "epoch": 0.29, "grad_norm": 1.0185057360039, "learning_rate": 1.6583804135845582e-05, "loss": 0.7566, "step": 6381 }, { "epoch": 0.29, "grad_norm": 0.39712767852618336, "learning_rate": 1.658268412797188e-05, "loss": 0.3063, "step": 6382 }, { "epoch": 0.29, "grad_norm": 0.38838602511710074, "learning_rate": 1.6581563974363903e-05, "loss": 0.2947, "step": 6383 }, { "epoch": 0.29, "grad_norm": 0.37996672965524714, "learning_rate": 1.658044367504644e-05, "loss": 0.1405, "step": 6384 }, { "epoch": 0.29, "grad_norm": 0.8026404437762646, "learning_rate": 1.65793232300443e-05, "loss": 0.3809, "step": 6385 }, { "epoch": 0.29, "grad_norm": 0.320474651717984, "learning_rate": 1.6578202639382284e-05, "loss": 0.2801, "step": 6386 }, { "epoch": 0.29, "grad_norm": 1.0708099246273617, "learning_rate": 1.65770819030852e-05, "loss": 0.5058, "step": 6387 }, { "epoch": 0.29, "grad_norm": 0.7274830765276327, "learning_rate": 1.6575961021177862e-05, "loss": 0.4803, "step": 6388 }, { "epoch": 0.29, "grad_norm": 0.2751653610238452, "learning_rate": 1.6574839993685087e-05, "loss": 0.2154, "step": 6389 }, { "epoch": 0.29, "grad_norm": 0.5314590233336374, "learning_rate": 1.657371882063169e-05, "loss": 0.2834, "step": 6390 }, { "epoch": 0.29, "grad_norm": 1.30410068246301, "learning_rate": 1.6572597502042492e-05, "loss": 0.4119, "step": 6391 }, { "epoch": 0.29, "grad_norm": 0.3833280788467357, "learning_rate": 1.6571476037942322e-05, "loss": 0.2891, "step": 6392 }, { "epoch": 0.29, "grad_norm": 1.000137740200687, "learning_rate": 1.6570354428356007e-05, "loss": 0.5199, "step": 6393 }, { "epoch": 0.29, "grad_norm": 0.36254230672963805, "learning_rate": 1.6569232673308375e-05, "loss": 0.3096, "step": 6394 }, { "epoch": 0.29, "grad_norm": 0.5080913992817924, "learning_rate": 1.6568110772824264e-05, "loss": 0.3477, "step": 6395 }, { "epoch": 0.29, "grad_norm": 0.5572110534200213, "learning_rate": 1.6566988726928513e-05, "loss": 0.263, "step": 6396 }, { "epoch": 0.29, "grad_norm": 1.1878365645437057, "learning_rate": 1.6565866535645957e-05, "loss": 0.3918, "step": 6397 }, { "epoch": 0.29, "grad_norm": 0.3527137301841216, "learning_rate": 1.6564744199001447e-05, "loss": 0.2709, "step": 6398 }, { "epoch": 0.29, "grad_norm": 0.5945591121654509, "learning_rate": 1.656362171701983e-05, "loss": 0.439, "step": 6399 }, { "epoch": 0.29, "grad_norm": 1.002199503429373, "learning_rate": 1.656249908972595e-05, "loss": 0.2774, "step": 6400 }, { "epoch": 0.29, "grad_norm": 0.4818106349507305, "learning_rate": 1.6561376317144668e-05, "loss": 0.299, "step": 6401 }, { "epoch": 0.29, "grad_norm": 0.7519456610443557, "learning_rate": 1.6560253399300838e-05, "loss": 0.371, "step": 6402 }, { "epoch": 0.29, "grad_norm": 0.30058306025878523, "learning_rate": 1.6559130336219317e-05, "loss": 0.126, "step": 6403 }, { "epoch": 0.29, "grad_norm": 0.41400588950928835, "learning_rate": 1.655800712792498e-05, "loss": 0.3191, "step": 6404 }, { "epoch": 0.29, "grad_norm": 1.3828559546417385, "learning_rate": 1.6556883774442675e-05, "loss": 0.781, "step": 6405 }, { "epoch": 0.29, "grad_norm": 0.38337491252087164, "learning_rate": 1.655576027579729e-05, "loss": 0.2558, "step": 6406 }, { "epoch": 0.29, "grad_norm": 0.36259767929482484, "learning_rate": 1.6554636632013692e-05, "loss": 0.2837, "step": 6407 }, { "epoch": 0.29, "grad_norm": 0.7652063219605066, "learning_rate": 1.655351284311675e-05, "loss": 0.4721, "step": 6408 }, { "epoch": 0.29, "grad_norm": 0.3870541204371109, "learning_rate": 1.6552388909131357e-05, "loss": 0.2264, "step": 6409 }, { "epoch": 0.29, "grad_norm": 0.43873263787672284, "learning_rate": 1.6551264830082385e-05, "loss": 0.2093, "step": 6410 }, { "epoch": 0.29, "grad_norm": 0.7432212206213976, "learning_rate": 1.655014060599473e-05, "loss": 0.4335, "step": 6411 }, { "epoch": 0.29, "grad_norm": 0.9396366879479621, "learning_rate": 1.6549016236893263e-05, "loss": 0.5101, "step": 6412 }, { "epoch": 0.29, "grad_norm": 0.34664505583475064, "learning_rate": 1.6547891722802897e-05, "loss": 0.2074, "step": 6413 }, { "epoch": 0.29, "grad_norm": 0.36792639608171956, "learning_rate": 1.6546767063748518e-05, "loss": 0.3533, "step": 6414 }, { "epoch": 0.29, "grad_norm": 0.30901995119216663, "learning_rate": 1.6545642259755025e-05, "loss": 0.1517, "step": 6415 }, { "epoch": 0.29, "grad_norm": 0.34018907227089984, "learning_rate": 1.6544517310847323e-05, "loss": 0.2118, "step": 6416 }, { "epoch": 0.29, "grad_norm": 0.5484774262032054, "learning_rate": 1.6543392217050312e-05, "loss": 0.3989, "step": 6417 }, { "epoch": 0.29, "grad_norm": 0.5907915075173672, "learning_rate": 1.6542266978388905e-05, "loss": 0.354, "step": 6418 }, { "epoch": 0.29, "grad_norm": 0.426750177669408, "learning_rate": 1.6541141594888016e-05, "loss": 0.2794, "step": 6419 }, { "epoch": 0.29, "grad_norm": 0.7001037294796925, "learning_rate": 1.6540016066572552e-05, "loss": 0.3857, "step": 6420 }, { "epoch": 0.29, "grad_norm": 0.24729124452153697, "learning_rate": 1.653889039346744e-05, "loss": 0.1918, "step": 6421 }, { "epoch": 0.3, "grad_norm": 0.3373098248567087, "learning_rate": 1.6537764575597595e-05, "loss": 0.2745, "step": 6422 }, { "epoch": 0.3, "grad_norm": 1.151014603013369, "learning_rate": 1.6536638612987943e-05, "loss": 0.5794, "step": 6423 }, { "epoch": 0.3, "grad_norm": 0.8049619141049491, "learning_rate": 1.6535512505663413e-05, "loss": 0.4705, "step": 6424 }, { "epoch": 0.3, "grad_norm": 0.3490047826268383, "learning_rate": 1.6534386253648933e-05, "loss": 0.2827, "step": 6425 }, { "epoch": 0.3, "grad_norm": 0.427358246409588, "learning_rate": 1.6533259856969444e-05, "loss": 0.2638, "step": 6426 }, { "epoch": 0.3, "grad_norm": 0.4880011568089692, "learning_rate": 1.653213331564987e-05, "loss": 0.296, "step": 6427 }, { "epoch": 0.3, "grad_norm": 0.4028054990303717, "learning_rate": 1.653100662971517e-05, "loss": 0.2922, "step": 6428 }, { "epoch": 0.3, "grad_norm": 0.36823458657292246, "learning_rate": 1.652987979919027e-05, "loss": 0.201, "step": 6429 }, { "epoch": 0.3, "grad_norm": 0.7151789518021233, "learning_rate": 1.6528752824100132e-05, "loss": 0.3985, "step": 6430 }, { "epoch": 0.3, "grad_norm": 0.4727870770840873, "learning_rate": 1.6527625704469695e-05, "loss": 0.3021, "step": 6431 }, { "epoch": 0.3, "grad_norm": 0.5097091756319991, "learning_rate": 1.6526498440323914e-05, "loss": 0.3732, "step": 6432 }, { "epoch": 0.3, "grad_norm": 0.39534392625997816, "learning_rate": 1.652537103168775e-05, "loss": 0.2969, "step": 6433 }, { "epoch": 0.3, "grad_norm": 0.3812483646063325, "learning_rate": 1.6524243478586163e-05, "loss": 0.2739, "step": 6434 }, { "epoch": 0.3, "grad_norm": 0.37011732969299765, "learning_rate": 1.6523115781044112e-05, "loss": 0.313, "step": 6435 }, { "epoch": 0.3, "grad_norm": 0.7606936117525245, "learning_rate": 1.6521987939086563e-05, "loss": 0.3745, "step": 6436 }, { "epoch": 0.3, "grad_norm": 0.29443869151396307, "learning_rate": 1.652085995273849e-05, "loss": 0.2171, "step": 6437 }, { "epoch": 0.3, "grad_norm": 0.3920016536379928, "learning_rate": 1.651973182202486e-05, "loss": 0.3335, "step": 6438 }, { "epoch": 0.3, "grad_norm": 0.9988018325901118, "learning_rate": 1.6518603546970655e-05, "loss": 0.2928, "step": 6439 }, { "epoch": 0.3, "grad_norm": 0.405660531891023, "learning_rate": 1.6517475127600845e-05, "loss": 0.3, "step": 6440 }, { "epoch": 0.3, "grad_norm": 0.9319795140839109, "learning_rate": 1.651634656394042e-05, "loss": 0.6358, "step": 6441 }, { "epoch": 0.3, "grad_norm": 0.3085834874736504, "learning_rate": 1.651521785601436e-05, "loss": 0.2628, "step": 6442 }, { "epoch": 0.3, "grad_norm": 0.31520767514972325, "learning_rate": 1.651408900384766e-05, "loss": 0.2112, "step": 6443 }, { "epoch": 0.3, "grad_norm": 1.3959477785880945, "learning_rate": 1.6512960007465304e-05, "loss": 0.57, "step": 6444 }, { "epoch": 0.3, "grad_norm": 0.4420944524791979, "learning_rate": 1.6511830866892294e-05, "loss": 0.3302, "step": 6445 }, { "epoch": 0.3, "grad_norm": 0.382169930279093, "learning_rate": 1.6510701582153622e-05, "loss": 0.2294, "step": 6446 }, { "epoch": 0.3, "grad_norm": 0.8556709441391702, "learning_rate": 1.6509572153274293e-05, "loss": 0.6459, "step": 6447 }, { "epoch": 0.3, "grad_norm": 0.5330859134336606, "learning_rate": 1.6508442580279315e-05, "loss": 0.3381, "step": 6448 }, { "epoch": 0.3, "grad_norm": 0.2722344894132027, "learning_rate": 1.6507312863193685e-05, "loss": 0.1474, "step": 6449 }, { "epoch": 0.3, "grad_norm": 0.4617405629559114, "learning_rate": 1.650618300204242e-05, "loss": 0.3043, "step": 6450 }, { "epoch": 0.3, "grad_norm": 1.138138607850186, "learning_rate": 1.6505052996850542e-05, "loss": 0.5973, "step": 6451 }, { "epoch": 0.3, "grad_norm": 0.42641339499950154, "learning_rate": 1.650392284764305e-05, "loss": 0.19, "step": 6452 }, { "epoch": 0.3, "grad_norm": 0.4647623875488781, "learning_rate": 1.650279255444498e-05, "loss": 0.3553, "step": 6453 }, { "epoch": 0.3, "grad_norm": 0.5271020832430143, "learning_rate": 1.6501662117281352e-05, "loss": 0.4169, "step": 6454 }, { "epoch": 0.3, "grad_norm": 0.2699697380530017, "learning_rate": 1.650053153617719e-05, "loss": 0.1342, "step": 6455 }, { "epoch": 0.3, "grad_norm": 0.5867352927487277, "learning_rate": 1.649940081115752e-05, "loss": 0.4223, "step": 6456 }, { "epoch": 0.3, "grad_norm": 0.4168908015698718, "learning_rate": 1.649826994224739e-05, "loss": 0.3353, "step": 6457 }, { "epoch": 0.3, "grad_norm": 0.4440769195248618, "learning_rate": 1.649713892947182e-05, "loss": 0.3082, "step": 6458 }, { "epoch": 0.3, "grad_norm": 0.546380066666568, "learning_rate": 1.6496007772855855e-05, "loss": 0.3323, "step": 6459 }, { "epoch": 0.3, "grad_norm": 0.927394827000011, "learning_rate": 1.649487647242454e-05, "loss": 0.5915, "step": 6460 }, { "epoch": 0.3, "grad_norm": 0.3085757473844876, "learning_rate": 1.649374502820292e-05, "loss": 0.2627, "step": 6461 }, { "epoch": 0.3, "grad_norm": 0.32810136978048665, "learning_rate": 1.6492613440216044e-05, "loss": 0.201, "step": 6462 }, { "epoch": 0.3, "grad_norm": 1.134202196600778, "learning_rate": 1.6491481708488963e-05, "loss": 0.5609, "step": 6463 }, { "epoch": 0.3, "grad_norm": 0.5712556717635934, "learning_rate": 1.6490349833046737e-05, "loss": 0.3109, "step": 6464 }, { "epoch": 0.3, "grad_norm": 0.3671115764477214, "learning_rate": 1.648921781391442e-05, "loss": 0.282, "step": 6465 }, { "epoch": 0.3, "grad_norm": 0.5454346246915641, "learning_rate": 1.6488085651117074e-05, "loss": 0.392, "step": 6466 }, { "epoch": 0.3, "grad_norm": 0.6252389708864353, "learning_rate": 1.6486953344679764e-05, "loss": 0.3289, "step": 6467 }, { "epoch": 0.3, "grad_norm": 0.2545009118485677, "learning_rate": 1.648582089462756e-05, "loss": 0.1773, "step": 6468 }, { "epoch": 0.3, "grad_norm": 0.4031403885745529, "learning_rate": 1.6484688300985533e-05, "loss": 0.3279, "step": 6469 }, { "epoch": 0.3, "grad_norm": 0.6124678660679872, "learning_rate": 1.6483555563778756e-05, "loss": 0.3322, "step": 6470 }, { "epoch": 0.3, "grad_norm": 0.43316173994258067, "learning_rate": 1.6482422683032312e-05, "loss": 0.3449, "step": 6471 }, { "epoch": 0.3, "grad_norm": 0.9208136504046879, "learning_rate": 1.648128965877127e-05, "loss": 0.4879, "step": 6472 }, { "epoch": 0.3, "grad_norm": 0.3417605253579729, "learning_rate": 1.648015649102073e-05, "loss": 0.2739, "step": 6473 }, { "epoch": 0.3, "grad_norm": 0.2816719511279857, "learning_rate": 1.6479023179805764e-05, "loss": 0.1916, "step": 6474 }, { "epoch": 0.3, "grad_norm": 0.6813936846086502, "learning_rate": 1.6477889725151476e-05, "loss": 0.3733, "step": 6475 }, { "epoch": 0.3, "grad_norm": 0.5448978403851198, "learning_rate": 1.6476756127082946e-05, "loss": 0.3566, "step": 6476 }, { "epoch": 0.3, "grad_norm": 0.36469015522245996, "learning_rate": 1.6475622385625284e-05, "loss": 0.3183, "step": 6477 }, { "epoch": 0.3, "grad_norm": 0.4750408310436837, "learning_rate": 1.6474488500803578e-05, "loss": 0.3073, "step": 6478 }, { "epoch": 0.3, "grad_norm": 0.4697146339811533, "learning_rate": 1.647335447264294e-05, "loss": 0.3323, "step": 6479 }, { "epoch": 0.3, "grad_norm": 0.2832133721004585, "learning_rate": 1.6472220301168473e-05, "loss": 0.204, "step": 6480 }, { "epoch": 0.3, "grad_norm": 0.4753356605456584, "learning_rate": 1.6471085986405288e-05, "loss": 0.2802, "step": 6481 }, { "epoch": 0.3, "grad_norm": 0.6608626181946445, "learning_rate": 1.646995152837849e-05, "loss": 0.3333, "step": 6482 }, { "epoch": 0.3, "grad_norm": 0.42900784324380353, "learning_rate": 1.6468816927113205e-05, "loss": 0.329, "step": 6483 }, { "epoch": 0.3, "grad_norm": 0.9741182913532257, "learning_rate": 1.646768218263455e-05, "loss": 0.6685, "step": 6484 }, { "epoch": 0.3, "grad_norm": 0.39421012050696175, "learning_rate": 1.6466547294967644e-05, "loss": 0.2342, "step": 6485 }, { "epoch": 0.3, "grad_norm": 0.273922438912459, "learning_rate": 1.6465412264137612e-05, "loss": 0.1913, "step": 6486 }, { "epoch": 0.3, "grad_norm": 1.0887137023245104, "learning_rate": 1.6464277090169583e-05, "loss": 0.4488, "step": 6487 }, { "epoch": 0.3, "grad_norm": 0.4291051285151722, "learning_rate": 1.6463141773088693e-05, "loss": 0.1869, "step": 6488 }, { "epoch": 0.3, "grad_norm": 0.3155116737247999, "learning_rate": 1.6462006312920072e-05, "loss": 0.2864, "step": 6489 }, { "epoch": 0.3, "grad_norm": 1.5978708768858632, "learning_rate": 1.646087070968886e-05, "loss": 0.7105, "step": 6490 }, { "epoch": 0.3, "grad_norm": 0.4810588920180695, "learning_rate": 1.6459734963420194e-05, "loss": 0.2244, "step": 6491 }, { "epoch": 0.3, "grad_norm": 0.43802119282501034, "learning_rate": 1.6458599074139224e-05, "loss": 0.2635, "step": 6492 }, { "epoch": 0.3, "grad_norm": 0.5200842674695335, "learning_rate": 1.6457463041871097e-05, "loss": 0.3478, "step": 6493 }, { "epoch": 0.3, "grad_norm": 0.3939064074560006, "learning_rate": 1.645632686664096e-05, "loss": 0.1431, "step": 6494 }, { "epoch": 0.3, "grad_norm": 0.5030782381319365, "learning_rate": 1.645519054847397e-05, "loss": 0.3983, "step": 6495 }, { "epoch": 0.3, "grad_norm": 1.6266526676861548, "learning_rate": 1.6454054087395284e-05, "loss": 0.8158, "step": 6496 }, { "epoch": 0.3, "grad_norm": 0.366753809075817, "learning_rate": 1.6452917483430058e-05, "loss": 0.3035, "step": 6497 }, { "epoch": 0.3, "grad_norm": 0.43293325953343664, "learning_rate": 1.645178073660346e-05, "loss": 0.2439, "step": 6498 }, { "epoch": 0.3, "grad_norm": 0.4292164680054889, "learning_rate": 1.6450643846940653e-05, "loss": 0.3375, "step": 6499 }, { "epoch": 0.3, "grad_norm": 0.3329224444778718, "learning_rate": 1.6449506814466813e-05, "loss": 0.1993, "step": 6500 }, { "epoch": 0.3, "grad_norm": 0.41734160041646134, "learning_rate": 1.6448369639207103e-05, "loss": 0.2622, "step": 6501 }, { "epoch": 0.3, "grad_norm": 1.34362989409153, "learning_rate": 1.6447232321186706e-05, "loss": 0.7333, "step": 6502 }, { "epoch": 0.3, "grad_norm": 0.8554454989854707, "learning_rate": 1.64460948604308e-05, "loss": 0.5656, "step": 6503 }, { "epoch": 0.3, "grad_norm": 0.3497685134856179, "learning_rate": 1.644495725696457e-05, "loss": 0.257, "step": 6504 }, { "epoch": 0.3, "grad_norm": 0.3312840991808666, "learning_rate": 1.6443819510813192e-05, "loss": 0.2743, "step": 6505 }, { "epoch": 0.3, "grad_norm": 0.5297136666415108, "learning_rate": 1.6442681622001862e-05, "loss": 0.2798, "step": 6506 }, { "epoch": 0.3, "grad_norm": 0.366222201035637, "learning_rate": 1.644154359055577e-05, "loss": 0.2263, "step": 6507 }, { "epoch": 0.3, "grad_norm": 0.9062065267477913, "learning_rate": 1.6440405416500117e-05, "loss": 0.5924, "step": 6508 }, { "epoch": 0.3, "grad_norm": 0.48981059173960173, "learning_rate": 1.643926709986009e-05, "loss": 0.3159, "step": 6509 }, { "epoch": 0.3, "grad_norm": 0.49196951056219296, "learning_rate": 1.6438128640660896e-05, "loss": 0.3288, "step": 6510 }, { "epoch": 0.3, "grad_norm": 0.8354605002344404, "learning_rate": 1.643699003892774e-05, "loss": 0.3427, "step": 6511 }, { "epoch": 0.3, "grad_norm": 0.2788487926854313, "learning_rate": 1.643585129468583e-05, "loss": 0.1987, "step": 6512 }, { "epoch": 0.3, "grad_norm": 0.3464945325308807, "learning_rate": 1.6434712407960375e-05, "loss": 0.3081, "step": 6513 }, { "epoch": 0.3, "grad_norm": 0.9497345174428397, "learning_rate": 1.6433573378776587e-05, "loss": 0.4322, "step": 6514 }, { "epoch": 0.3, "grad_norm": 0.7505188900261159, "learning_rate": 1.6432434207159683e-05, "loss": 0.4061, "step": 6515 }, { "epoch": 0.3, "grad_norm": 0.4157365745160956, "learning_rate": 1.643129489313489e-05, "loss": 0.3121, "step": 6516 }, { "epoch": 0.3, "grad_norm": 0.4006192315395858, "learning_rate": 1.6430155436727427e-05, "loss": 0.2886, "step": 6517 }, { "epoch": 0.3, "grad_norm": 0.32545635316675936, "learning_rate": 1.6429015837962518e-05, "loss": 0.1956, "step": 6518 }, { "epoch": 0.3, "grad_norm": 0.4379321418833836, "learning_rate": 1.6427876096865394e-05, "loss": 0.3069, "step": 6519 }, { "epoch": 0.3, "grad_norm": 0.5114829747424323, "learning_rate": 1.6426736213461293e-05, "loss": 0.295, "step": 6520 }, { "epoch": 0.3, "grad_norm": 0.6533020942405925, "learning_rate": 1.6425596187775437e-05, "loss": 0.3727, "step": 6521 }, { "epoch": 0.3, "grad_norm": 0.3730304243109258, "learning_rate": 1.6424456019833085e-05, "loss": 0.2997, "step": 6522 }, { "epoch": 0.3, "grad_norm": 0.8596725205814361, "learning_rate": 1.6423315709659464e-05, "loss": 0.5281, "step": 6523 }, { "epoch": 0.3, "grad_norm": 0.3182355629143691, "learning_rate": 1.6422175257279822e-05, "loss": 0.2185, "step": 6524 }, { "epoch": 0.3, "grad_norm": 0.38124723854814485, "learning_rate": 1.6421034662719412e-05, "loss": 0.2891, "step": 6525 }, { "epoch": 0.3, "grad_norm": 0.3964815759041436, "learning_rate": 1.641989392600348e-05, "loss": 0.2678, "step": 6526 }, { "epoch": 0.3, "grad_norm": 0.771444130264634, "learning_rate": 1.641875304715729e-05, "loss": 0.3762, "step": 6527 }, { "epoch": 0.3, "grad_norm": 0.42810467766420895, "learning_rate": 1.641761202620609e-05, "loss": 0.2762, "step": 6528 }, { "epoch": 0.3, "grad_norm": 0.46037957759995773, "learning_rate": 1.6416470863175148e-05, "loss": 0.3287, "step": 6529 }, { "epoch": 0.3, "grad_norm": 0.5978510254078991, "learning_rate": 1.6415329558089722e-05, "loss": 0.2977, "step": 6530 }, { "epoch": 0.3, "grad_norm": 0.4127031440530877, "learning_rate": 1.6414188110975085e-05, "loss": 0.3168, "step": 6531 }, { "epoch": 0.3, "grad_norm": 0.5794757409750412, "learning_rate": 1.6413046521856504e-05, "loss": 0.3198, "step": 6532 }, { "epoch": 0.3, "grad_norm": 0.32487824411162197, "learning_rate": 1.6411904790759255e-05, "loss": 0.2918, "step": 6533 }, { "epoch": 0.3, "grad_norm": 0.35744032735483067, "learning_rate": 1.6410762917708612e-05, "loss": 0.2056, "step": 6534 }, { "epoch": 0.3, "grad_norm": 0.9635264121585168, "learning_rate": 1.640962090272986e-05, "loss": 0.576, "step": 6535 }, { "epoch": 0.3, "grad_norm": 0.3742747925858866, "learning_rate": 1.640847874584828e-05, "loss": 0.3305, "step": 6536 }, { "epoch": 0.3, "grad_norm": 0.33697788400838236, "learning_rate": 1.640733644708915e-05, "loss": 0.2073, "step": 6537 }, { "epoch": 0.3, "grad_norm": 0.6467408366970562, "learning_rate": 1.6406194006477768e-05, "loss": 0.4713, "step": 6538 }, { "epoch": 0.3, "grad_norm": 0.34870994071230194, "learning_rate": 1.640505142403943e-05, "loss": 0.2306, "step": 6539 }, { "epoch": 0.3, "grad_norm": 0.36269345573178563, "learning_rate": 1.6403908699799423e-05, "loss": 0.2005, "step": 6540 }, { "epoch": 0.3, "grad_norm": 0.42239697306336577, "learning_rate": 1.6402765833783054e-05, "loss": 0.3199, "step": 6541 }, { "epoch": 0.3, "grad_norm": 1.0852601527734975, "learning_rate": 1.6401622826015616e-05, "loss": 0.7276, "step": 6542 }, { "epoch": 0.3, "grad_norm": 0.3379799938024275, "learning_rate": 1.640047967652242e-05, "loss": 0.231, "step": 6543 }, { "epoch": 0.3, "grad_norm": 0.47756456779866496, "learning_rate": 1.6399336385328775e-05, "loss": 0.3845, "step": 6544 }, { "epoch": 0.3, "grad_norm": 0.35860597543024575, "learning_rate": 1.6398192952459987e-05, "loss": 0.2857, "step": 6545 }, { "epoch": 0.3, "grad_norm": 0.3238611100152137, "learning_rate": 1.6397049377941378e-05, "loss": 0.2216, "step": 6546 }, { "epoch": 0.3, "grad_norm": 0.6573411854352372, "learning_rate": 1.6395905661798253e-05, "loss": 0.3484, "step": 6547 }, { "epoch": 0.3, "grad_norm": 0.44558358204369625, "learning_rate": 1.6394761804055947e-05, "loss": 0.3579, "step": 6548 }, { "epoch": 0.3, "grad_norm": 0.3720275209746239, "learning_rate": 1.6393617804739777e-05, "loss": 0.2655, "step": 6549 }, { "epoch": 0.3, "grad_norm": 0.731963916306467, "learning_rate": 1.639247366387507e-05, "loss": 0.3905, "step": 6550 }, { "epoch": 0.3, "grad_norm": 0.2952154608547386, "learning_rate": 1.6391329381487162e-05, "loss": 0.1903, "step": 6551 }, { "epoch": 0.3, "grad_norm": 0.49260663139873556, "learning_rate": 1.6390184957601376e-05, "loss": 0.3277, "step": 6552 }, { "epoch": 0.3, "grad_norm": 0.35835665871153694, "learning_rate": 1.6389040392243056e-05, "loss": 0.2598, "step": 6553 }, { "epoch": 0.3, "grad_norm": 0.888465678475625, "learning_rate": 1.638789568543754e-05, "loss": 0.49, "step": 6554 }, { "epoch": 0.3, "grad_norm": 0.5681236508741496, "learning_rate": 1.638675083721017e-05, "loss": 0.2694, "step": 6555 }, { "epoch": 0.3, "grad_norm": 0.32378960558442227, "learning_rate": 1.638560584758629e-05, "loss": 0.2938, "step": 6556 }, { "epoch": 0.3, "grad_norm": 1.1897165811018666, "learning_rate": 1.6384460716591255e-05, "loss": 0.6916, "step": 6557 }, { "epoch": 0.3, "grad_norm": 0.2604145627671716, "learning_rate": 1.638331544425041e-05, "loss": 0.2078, "step": 6558 }, { "epoch": 0.3, "grad_norm": 0.5654120186370319, "learning_rate": 1.6382170030589116e-05, "loss": 0.3269, "step": 6559 }, { "epoch": 0.3, "grad_norm": 0.49399185386294203, "learning_rate": 1.6381024475632727e-05, "loss": 0.2909, "step": 6560 }, { "epoch": 0.3, "grad_norm": 0.47135892073226987, "learning_rate": 1.6379878779406608e-05, "loss": 0.3072, "step": 6561 }, { "epoch": 0.3, "grad_norm": 1.0402325299141095, "learning_rate": 1.637873294193612e-05, "loss": 0.5584, "step": 6562 }, { "epoch": 0.3, "grad_norm": 0.2789542268993825, "learning_rate": 1.6377586963246632e-05, "loss": 0.1461, "step": 6563 }, { "epoch": 0.3, "grad_norm": 0.3545696528232299, "learning_rate": 1.6376440843363517e-05, "loss": 0.2166, "step": 6564 }, { "epoch": 0.3, "grad_norm": 0.42002315458438283, "learning_rate": 1.637529458231215e-05, "loss": 0.3116, "step": 6565 }, { "epoch": 0.3, "grad_norm": 1.162490950676672, "learning_rate": 1.6374148180117898e-05, "loss": 0.3841, "step": 6566 }, { "epoch": 0.3, "grad_norm": 0.39181416972488065, "learning_rate": 1.6373001636806153e-05, "loss": 0.2964, "step": 6567 }, { "epoch": 0.3, "grad_norm": 0.5113878099309274, "learning_rate": 1.6371854952402297e-05, "loss": 0.378, "step": 6568 }, { "epoch": 0.3, "grad_norm": 0.31200994071467875, "learning_rate": 1.637070812693171e-05, "loss": 0.2171, "step": 6569 }, { "epoch": 0.3, "grad_norm": 0.3603889434836613, "learning_rate": 1.6369561160419783e-05, "loss": 0.217, "step": 6570 }, { "epoch": 0.3, "grad_norm": 0.42561214625380145, "learning_rate": 1.6368414052891918e-05, "loss": 0.3061, "step": 6571 }, { "epoch": 0.3, "grad_norm": 0.5492331803478658, "learning_rate": 1.6367266804373497e-05, "loss": 0.3626, "step": 6572 }, { "epoch": 0.3, "grad_norm": 0.4842441172718364, "learning_rate": 1.636611941488993e-05, "loss": 0.2311, "step": 6573 }, { "epoch": 0.3, "grad_norm": 0.5915639741104606, "learning_rate": 1.6364971884466614e-05, "loss": 0.4518, "step": 6574 }, { "epoch": 0.3, "grad_norm": 0.904162550188888, "learning_rate": 1.6363824213128953e-05, "loss": 0.6507, "step": 6575 }, { "epoch": 0.3, "grad_norm": 0.2796132707422062, "learning_rate": 1.636267640090236e-05, "loss": 0.1828, "step": 6576 }, { "epoch": 0.3, "grad_norm": 0.3129185611756594, "learning_rate": 1.6361528447812244e-05, "loss": 0.2779, "step": 6577 }, { "epoch": 0.3, "grad_norm": 0.7736276625191115, "learning_rate": 1.6360380353884018e-05, "loss": 0.4636, "step": 6578 }, { "epoch": 0.3, "grad_norm": 0.4248028508473478, "learning_rate": 1.63592321191431e-05, "loss": 0.2321, "step": 6579 }, { "epoch": 0.3, "grad_norm": 0.39447472201104505, "learning_rate": 1.6358083743614916e-05, "loss": 0.321, "step": 6580 }, { "epoch": 0.3, "grad_norm": 1.160599207404717, "learning_rate": 1.6356935227324885e-05, "loss": 0.6212, "step": 6581 }, { "epoch": 0.3, "grad_norm": 0.25491342699720554, "learning_rate": 1.6355786570298432e-05, "loss": 0.1701, "step": 6582 }, { "epoch": 0.3, "grad_norm": 0.41688351398019957, "learning_rate": 1.635463777256099e-05, "loss": 0.2677, "step": 6583 }, { "epoch": 0.3, "grad_norm": 0.43894958498593034, "learning_rate": 1.6353488834137995e-05, "loss": 0.3514, "step": 6584 }, { "epoch": 0.3, "grad_norm": 0.4896844953290846, "learning_rate": 1.635233975505488e-05, "loss": 0.312, "step": 6585 }, { "epoch": 0.3, "grad_norm": 0.5567304375845444, "learning_rate": 1.6351190535337084e-05, "loss": 0.298, "step": 6586 }, { "epoch": 0.3, "grad_norm": 1.6726880481382136, "learning_rate": 1.635004117501005e-05, "loss": 0.6986, "step": 6587 }, { "epoch": 0.3, "grad_norm": 0.3492931456414217, "learning_rate": 1.634889167409923e-05, "loss": 0.2672, "step": 6588 }, { "epoch": 0.3, "grad_norm": 0.3434865777364456, "learning_rate": 1.634774203263006e-05, "loss": 0.2333, "step": 6589 }, { "epoch": 0.3, "grad_norm": 0.40991772471326116, "learning_rate": 1.6346592250628005e-05, "loss": 0.3278, "step": 6590 }, { "epoch": 0.3, "grad_norm": 0.9259186845405233, "learning_rate": 1.6345442328118516e-05, "loss": 0.5356, "step": 6591 }, { "epoch": 0.3, "grad_norm": 0.38355605242118107, "learning_rate": 1.6344292265127045e-05, "loss": 0.2511, "step": 6592 }, { "epoch": 0.3, "grad_norm": 1.2056774717455023, "learning_rate": 1.6343142061679063e-05, "loss": 0.5917, "step": 6593 }, { "epoch": 0.3, "grad_norm": 0.5849185890907708, "learning_rate": 1.634199171780002e-05, "loss": 0.3827, "step": 6594 }, { "epoch": 0.3, "grad_norm": 0.44687750074031735, "learning_rate": 1.6340841233515403e-05, "loss": 0.3062, "step": 6595 }, { "epoch": 0.3, "grad_norm": 0.3994128813316267, "learning_rate": 1.633969060885067e-05, "loss": 0.2704, "step": 6596 }, { "epoch": 0.3, "grad_norm": 0.46614208420869735, "learning_rate": 1.6338539843831294e-05, "loss": 0.23, "step": 6597 }, { "epoch": 0.3, "grad_norm": 0.5080092868190113, "learning_rate": 1.6337388938482755e-05, "loss": 0.3573, "step": 6598 }, { "epoch": 0.3, "grad_norm": 0.9378578063602536, "learning_rate": 1.6336237892830537e-05, "loss": 0.3754, "step": 6599 }, { "epoch": 0.3, "grad_norm": 0.35400897780871404, "learning_rate": 1.6335086706900115e-05, "loss": 0.2836, "step": 6600 }, { "epoch": 0.3, "grad_norm": 0.4337132596414439, "learning_rate": 1.633393538071698e-05, "loss": 0.3339, "step": 6601 }, { "epoch": 0.3, "grad_norm": 0.32505940605019057, "learning_rate": 1.6332783914306622e-05, "loss": 0.1958, "step": 6602 }, { "epoch": 0.3, "grad_norm": 0.45687920251500214, "learning_rate": 1.6331632307694532e-05, "loss": 0.3003, "step": 6603 }, { "epoch": 0.3, "grad_norm": 0.46936589442553284, "learning_rate": 1.6330480560906205e-05, "loss": 0.321, "step": 6604 }, { "epoch": 0.3, "grad_norm": 0.5100559966180981, "learning_rate": 1.6329328673967138e-05, "loss": 0.2819, "step": 6605 }, { "epoch": 0.3, "grad_norm": 0.550454797363434, "learning_rate": 1.6328176646902835e-05, "loss": 0.392, "step": 6606 }, { "epoch": 0.3, "grad_norm": 0.4499413744673752, "learning_rate": 1.63270244797388e-05, "loss": 0.3628, "step": 6607 }, { "epoch": 0.3, "grad_norm": 0.34968759889757317, "learning_rate": 1.6325872172500542e-05, "loss": 0.2332, "step": 6608 }, { "epoch": 0.3, "grad_norm": 0.9990188160091769, "learning_rate": 1.6324719725213572e-05, "loss": 0.6058, "step": 6609 }, { "epoch": 0.3, "grad_norm": 0.3665778852545278, "learning_rate": 1.63235671379034e-05, "loss": 0.2647, "step": 6610 }, { "epoch": 0.3, "grad_norm": 0.5049814006095249, "learning_rate": 1.6322414410595548e-05, "loss": 0.3838, "step": 6611 }, { "epoch": 0.3, "grad_norm": 0.4563241127204825, "learning_rate": 1.6321261543315534e-05, "loss": 0.2876, "step": 6612 }, { "epoch": 0.3, "grad_norm": 0.42906696644990255, "learning_rate": 1.6320108536088882e-05, "loss": 0.2981, "step": 6613 }, { "epoch": 0.3, "grad_norm": 0.43835403624413555, "learning_rate": 1.631895538894112e-05, "loss": 0.2734, "step": 6614 }, { "epoch": 0.3, "grad_norm": 0.39212094038623563, "learning_rate": 1.6317802101897776e-05, "loss": 0.2466, "step": 6615 }, { "epoch": 0.3, "grad_norm": 0.30264006684812716, "learning_rate": 1.6316648674984384e-05, "loss": 0.2613, "step": 6616 }, { "epoch": 0.3, "grad_norm": 0.9147456877051281, "learning_rate": 1.6315495108226473e-05, "loss": 0.5134, "step": 6617 }, { "epoch": 0.3, "grad_norm": 0.5243111004367629, "learning_rate": 1.631434140164959e-05, "loss": 0.2919, "step": 6618 }, { "epoch": 0.3, "grad_norm": 0.4207730961732931, "learning_rate": 1.631318755527928e-05, "loss": 0.2993, "step": 6619 }, { "epoch": 0.3, "grad_norm": 0.3910479649172596, "learning_rate": 1.6312033569141074e-05, "loss": 0.3473, "step": 6620 }, { "epoch": 0.3, "grad_norm": 0.38394718072345824, "learning_rate": 1.631087944326053e-05, "loss": 0.2358, "step": 6621 }, { "epoch": 0.3, "grad_norm": 0.31632673088315016, "learning_rate": 1.6309725177663198e-05, "loss": 0.2357, "step": 6622 }, { "epoch": 0.3, "grad_norm": 0.5400865336847385, "learning_rate": 1.6308570772374633e-05, "loss": 0.2937, "step": 6623 }, { "epoch": 0.3, "grad_norm": 0.4609562941086053, "learning_rate": 1.630741622742039e-05, "loss": 0.3502, "step": 6624 }, { "epoch": 0.3, "grad_norm": 0.35723449188110534, "learning_rate": 1.6306261542826035e-05, "loss": 0.2405, "step": 6625 }, { "epoch": 0.3, "grad_norm": 1.1537993159643016, "learning_rate": 1.6305106718617122e-05, "loss": 0.7594, "step": 6626 }, { "epoch": 0.3, "grad_norm": 0.5084133730292307, "learning_rate": 1.6303951754819226e-05, "loss": 0.3942, "step": 6627 }, { "epoch": 0.3, "grad_norm": 0.28867098184928724, "learning_rate": 1.6302796651457913e-05, "loss": 0.2221, "step": 6628 }, { "epoch": 0.3, "grad_norm": 0.4188880799190303, "learning_rate": 1.6301641408558758e-05, "loss": 0.3502, "step": 6629 }, { "epoch": 0.3, "grad_norm": 0.5011034823953276, "learning_rate": 1.6300486026147334e-05, "loss": 0.2896, "step": 6630 }, { "epoch": 0.3, "grad_norm": 0.35207389748379175, "learning_rate": 1.6299330504249224e-05, "loss": 0.2359, "step": 6631 }, { "epoch": 0.3, "grad_norm": 0.38497907163084577, "learning_rate": 1.6298174842890006e-05, "loss": 0.2938, "step": 6632 }, { "epoch": 0.3, "grad_norm": 1.314538370182189, "learning_rate": 1.629701904209527e-05, "loss": 0.796, "step": 6633 }, { "epoch": 0.3, "grad_norm": 0.3369898610076391, "learning_rate": 1.6295863101890603e-05, "loss": 0.2508, "step": 6634 }, { "epoch": 0.3, "grad_norm": 0.47024977776365207, "learning_rate": 1.629470702230159e-05, "loss": 0.3608, "step": 6635 }, { "epoch": 0.3, "grad_norm": 0.27704271718116796, "learning_rate": 1.6293550803353832e-05, "loss": 0.2039, "step": 6636 }, { "epoch": 0.3, "grad_norm": 0.4259659476564318, "learning_rate": 1.6292394445072927e-05, "loss": 0.3, "step": 6637 }, { "epoch": 0.3, "grad_norm": 1.2456542277269682, "learning_rate": 1.629123794748447e-05, "loss": 0.4059, "step": 6638 }, { "epoch": 0.3, "grad_norm": 0.3905702507302019, "learning_rate": 1.629008131061407e-05, "loss": 0.3038, "step": 6639 }, { "epoch": 0.31, "grad_norm": 0.4082656287472168, "learning_rate": 1.6288924534487332e-05, "loss": 0.2866, "step": 6640 }, { "epoch": 0.31, "grad_norm": 0.7821103023045858, "learning_rate": 1.628776761912987e-05, "loss": 0.3859, "step": 6641 }, { "epoch": 0.31, "grad_norm": 0.29564259848287044, "learning_rate": 1.6286610564567288e-05, "loss": 0.1779, "step": 6642 }, { "epoch": 0.31, "grad_norm": 0.3985979065971165, "learning_rate": 1.628545337082521e-05, "loss": 0.2637, "step": 6643 }, { "epoch": 0.31, "grad_norm": 0.4621808473295159, "learning_rate": 1.6284296037929253e-05, "loss": 0.2753, "step": 6644 }, { "epoch": 0.31, "grad_norm": 0.7849065580877207, "learning_rate": 1.6283138565905034e-05, "loss": 0.4651, "step": 6645 }, { "epoch": 0.31, "grad_norm": 0.3900640459562685, "learning_rate": 1.628198095477819e-05, "loss": 0.2911, "step": 6646 }, { "epoch": 0.31, "grad_norm": 0.441058727034453, "learning_rate": 1.6280823204574335e-05, "loss": 0.2914, "step": 6647 }, { "epoch": 0.31, "grad_norm": 0.3023276919347662, "learning_rate": 1.6279665315319114e-05, "loss": 0.1633, "step": 6648 }, { "epoch": 0.31, "grad_norm": 0.43219489998759303, "learning_rate": 1.6278507287038154e-05, "loss": 0.2863, "step": 6649 }, { "epoch": 0.31, "grad_norm": 1.0578711094906632, "learning_rate": 1.627734911975709e-05, "loss": 0.6318, "step": 6650 }, { "epoch": 0.31, "grad_norm": 0.47878463713258884, "learning_rate": 1.627619081350157e-05, "loss": 0.3008, "step": 6651 }, { "epoch": 0.31, "grad_norm": 0.3784177058794493, "learning_rate": 1.6275032368297234e-05, "loss": 0.3219, "step": 6652 }, { "epoch": 0.31, "grad_norm": 0.985831808752877, "learning_rate": 1.6273873784169726e-05, "loss": 0.5825, "step": 6653 }, { "epoch": 0.31, "grad_norm": 0.22157624563053643, "learning_rate": 1.6272715061144705e-05, "loss": 0.0984, "step": 6654 }, { "epoch": 0.31, "grad_norm": 0.4806553177361242, "learning_rate": 1.6271556199247816e-05, "loss": 0.2876, "step": 6655 }, { "epoch": 0.31, "grad_norm": 0.4436336409772609, "learning_rate": 1.6270397198504713e-05, "loss": 0.3317, "step": 6656 }, { "epoch": 0.31, "grad_norm": 0.750027154728805, "learning_rate": 1.626923805894107e-05, "loss": 0.3553, "step": 6657 }, { "epoch": 0.31, "grad_norm": 0.4018477183641706, "learning_rate": 1.626807878058253e-05, "loss": 0.303, "step": 6658 }, { "epoch": 0.31, "grad_norm": 0.5838624704541613, "learning_rate": 1.6266919363454767e-05, "loss": 0.4501, "step": 6659 }, { "epoch": 0.31, "grad_norm": 0.28678391350476534, "learning_rate": 1.6265759807583452e-05, "loss": 0.2104, "step": 6660 }, { "epoch": 0.31, "grad_norm": 0.3718243736984594, "learning_rate": 1.6264600112994253e-05, "loss": 0.1991, "step": 6661 }, { "epoch": 0.31, "grad_norm": 0.728963213666723, "learning_rate": 1.6263440279712844e-05, "loss": 0.4662, "step": 6662 }, { "epoch": 0.31, "grad_norm": 0.41942227561214906, "learning_rate": 1.626228030776491e-05, "loss": 0.3217, "step": 6663 }, { "epoch": 0.31, "grad_norm": 0.32813790885906, "learning_rate": 1.626112019717612e-05, "loss": 0.2429, "step": 6664 }, { "epoch": 0.31, "grad_norm": 0.977314052579278, "learning_rate": 1.6259959947972164e-05, "loss": 0.6341, "step": 6665 }, { "epoch": 0.31, "grad_norm": 0.3370864614640248, "learning_rate": 1.625879956017873e-05, "loss": 0.1958, "step": 6666 }, { "epoch": 0.31, "grad_norm": 0.3203806194043662, "learning_rate": 1.6257639033821506e-05, "loss": 0.2275, "step": 6667 }, { "epoch": 0.31, "grad_norm": 0.5701309424295458, "learning_rate": 1.6256478368926182e-05, "loss": 0.3468, "step": 6668 }, { "epoch": 0.31, "grad_norm": 0.7790299465689874, "learning_rate": 1.625531756551846e-05, "loss": 0.4701, "step": 6669 }, { "epoch": 0.31, "grad_norm": 0.32906499967314945, "learning_rate": 1.6254156623624037e-05, "loss": 0.2211, "step": 6670 }, { "epoch": 0.31, "grad_norm": 0.5755530672111476, "learning_rate": 1.625299554326861e-05, "loss": 0.4002, "step": 6671 }, { "epoch": 0.31, "grad_norm": 0.32966558010924363, "learning_rate": 1.625183432447789e-05, "loss": 0.2232, "step": 6672 }, { "epoch": 0.31, "grad_norm": 0.2876879845530426, "learning_rate": 1.6250672967277585e-05, "loss": 0.2169, "step": 6673 }, { "epoch": 0.31, "grad_norm": 0.757826477631533, "learning_rate": 1.6249511471693408e-05, "loss": 0.3658, "step": 6674 }, { "epoch": 0.31, "grad_norm": 0.37199607250562394, "learning_rate": 1.6248349837751064e-05, "loss": 0.3546, "step": 6675 }, { "epoch": 0.31, "grad_norm": 0.6385973486357023, "learning_rate": 1.624718806547628e-05, "loss": 0.3386, "step": 6676 }, { "epoch": 0.31, "grad_norm": 0.39124219593765885, "learning_rate": 1.624602615489477e-05, "loss": 0.2736, "step": 6677 }, { "epoch": 0.31, "grad_norm": 0.2912565711689406, "learning_rate": 1.6244864106032268e-05, "loss": 0.2344, "step": 6678 }, { "epoch": 0.31, "grad_norm": 0.45834645015404796, "learning_rate": 1.624370191891449e-05, "loss": 0.2745, "step": 6679 }, { "epoch": 0.31, "grad_norm": 0.527540308502391, "learning_rate": 1.624253959356717e-05, "loss": 0.3159, "step": 6680 }, { "epoch": 0.31, "grad_norm": 1.5229778699898082, "learning_rate": 1.6241377130016038e-05, "loss": 0.839, "step": 6681 }, { "epoch": 0.31, "grad_norm": 0.42971366386805065, "learning_rate": 1.6240214528286832e-05, "loss": 0.3053, "step": 6682 }, { "epoch": 0.31, "grad_norm": 0.3052613118371382, "learning_rate": 1.623905178840529e-05, "loss": 0.2679, "step": 6683 }, { "epoch": 0.31, "grad_norm": 1.3014448745623601, "learning_rate": 1.6237888910397154e-05, "loss": 0.6817, "step": 6684 }, { "epoch": 0.31, "grad_norm": 0.28575253932999406, "learning_rate": 1.6236725894288175e-05, "loss": 0.2282, "step": 6685 }, { "epoch": 0.31, "grad_norm": 0.5340218479287223, "learning_rate": 1.623556274010409e-05, "loss": 0.378, "step": 6686 }, { "epoch": 0.31, "grad_norm": 0.27982679677075867, "learning_rate": 1.623439944787066e-05, "loss": 0.2111, "step": 6687 }, { "epoch": 0.31, "grad_norm": 0.38337725462738864, "learning_rate": 1.623323601761363e-05, "loss": 0.2649, "step": 6688 }, { "epoch": 0.31, "grad_norm": 1.2048162832634715, "learning_rate": 1.6232072449358768e-05, "loss": 0.6965, "step": 6689 }, { "epoch": 0.31, "grad_norm": 0.4394263726569237, "learning_rate": 1.6230908743131823e-05, "loss": 0.2986, "step": 6690 }, { "epoch": 0.31, "grad_norm": 0.35074423813786554, "learning_rate": 1.622974489895857e-05, "loss": 0.2692, "step": 6691 }, { "epoch": 0.31, "grad_norm": 0.45193036295581984, "learning_rate": 1.6228580916864767e-05, "loss": 0.3796, "step": 6692 }, { "epoch": 0.31, "grad_norm": 0.20823536556111047, "learning_rate": 1.6227416796876183e-05, "loss": 0.1192, "step": 6693 }, { "epoch": 0.31, "grad_norm": 0.6476606239893536, "learning_rate": 1.6226252539018597e-05, "loss": 0.3743, "step": 6694 }, { "epoch": 0.31, "grad_norm": 0.35599435692443465, "learning_rate": 1.6225088143317777e-05, "loss": 0.3013, "step": 6695 }, { "epoch": 0.31, "grad_norm": 0.4938401320624271, "learning_rate": 1.622392360979951e-05, "loss": 0.3729, "step": 6696 }, { "epoch": 0.31, "grad_norm": 0.567718113436878, "learning_rate": 1.6222758938489566e-05, "loss": 0.3546, "step": 6697 }, { "epoch": 0.31, "grad_norm": 0.33765679097510365, "learning_rate": 1.6221594129413743e-05, "loss": 0.2465, "step": 6698 }, { "epoch": 0.31, "grad_norm": 0.32090428334192345, "learning_rate": 1.622042918259782e-05, "loss": 0.2692, "step": 6699 }, { "epoch": 0.31, "grad_norm": 0.3373045259337222, "learning_rate": 1.621926409806759e-05, "loss": 0.2069, "step": 6700 }, { "epoch": 0.31, "grad_norm": 0.519017569319671, "learning_rate": 1.6218098875848846e-05, "loss": 0.3807, "step": 6701 }, { "epoch": 0.31, "grad_norm": 0.9547243282030283, "learning_rate": 1.621693351596739e-05, "loss": 0.5102, "step": 6702 }, { "epoch": 0.31, "grad_norm": 0.325505484926452, "learning_rate": 1.6215768018449015e-05, "loss": 0.2578, "step": 6703 }, { "epoch": 0.31, "grad_norm": 0.5270273182324773, "learning_rate": 1.6214602383319527e-05, "loss": 0.363, "step": 6704 }, { "epoch": 0.31, "grad_norm": 0.5844785943443738, "learning_rate": 1.621343661060473e-05, "loss": 0.3161, "step": 6705 }, { "epoch": 0.31, "grad_norm": 0.2641593182842511, "learning_rate": 1.6212270700330438e-05, "loss": 0.1691, "step": 6706 }, { "epoch": 0.31, "grad_norm": 0.37250703081140096, "learning_rate": 1.6211104652522462e-05, "loss": 0.3196, "step": 6707 }, { "epoch": 0.31, "grad_norm": 1.0609618031098604, "learning_rate": 1.6209938467206612e-05, "loss": 0.4588, "step": 6708 }, { "epoch": 0.31, "grad_norm": 0.3954895806936831, "learning_rate": 1.6208772144408712e-05, "loss": 0.2258, "step": 6709 }, { "epoch": 0.31, "grad_norm": 0.5140490469489751, "learning_rate": 1.6207605684154577e-05, "loss": 0.3586, "step": 6710 }, { "epoch": 0.31, "grad_norm": 0.3198506020047123, "learning_rate": 1.6206439086470037e-05, "loss": 0.2858, "step": 6711 }, { "epoch": 0.31, "grad_norm": 0.8625590486821628, "learning_rate": 1.6205272351380917e-05, "loss": 0.5312, "step": 6712 }, { "epoch": 0.31, "grad_norm": 0.36562163202819825, "learning_rate": 1.6204105478913052e-05, "loss": 0.2109, "step": 6713 }, { "epoch": 0.31, "grad_norm": 0.8021963167604096, "learning_rate": 1.620293846909226e-05, "loss": 0.412, "step": 6714 }, { "epoch": 0.31, "grad_norm": 0.45388094873874923, "learning_rate": 1.62017713219444e-05, "loss": 0.3029, "step": 6715 }, { "epoch": 0.31, "grad_norm": 0.39095581712637106, "learning_rate": 1.6200604037495295e-05, "loss": 0.2252, "step": 6716 }, { "epoch": 0.31, "grad_norm": 1.0627967619207583, "learning_rate": 1.6199436615770796e-05, "loss": 0.7262, "step": 6717 }, { "epoch": 0.31, "grad_norm": 0.3545428142235433, "learning_rate": 1.6198269056796746e-05, "loss": 0.2523, "step": 6718 }, { "epoch": 0.31, "grad_norm": 0.3517835082515496, "learning_rate": 1.619710136059899e-05, "loss": 0.2231, "step": 6719 }, { "epoch": 0.31, "grad_norm": 0.7363759592821211, "learning_rate": 1.6195933527203385e-05, "loss": 0.4622, "step": 6720 }, { "epoch": 0.31, "grad_norm": 0.6184428541104103, "learning_rate": 1.6194765556635782e-05, "loss": 0.3789, "step": 6721 }, { "epoch": 0.31, "grad_norm": 0.3786599318888344, "learning_rate": 1.619359744892204e-05, "loss": 0.2309, "step": 6722 }, { "epoch": 0.31, "grad_norm": 0.41944427748634533, "learning_rate": 1.6192429204088022e-05, "loss": 0.3473, "step": 6723 }, { "epoch": 0.31, "grad_norm": 0.633881108809735, "learning_rate": 1.6191260822159587e-05, "loss": 0.3757, "step": 6724 }, { "epoch": 0.31, "grad_norm": 0.4637489300912422, "learning_rate": 1.6190092303162607e-05, "loss": 0.3034, "step": 6725 }, { "epoch": 0.31, "grad_norm": 0.4003159739131099, "learning_rate": 1.6188923647122946e-05, "loss": 0.223, "step": 6726 }, { "epoch": 0.31, "grad_norm": 0.42649843040556173, "learning_rate": 1.618775485406648e-05, "loss": 0.2642, "step": 6727 }, { "epoch": 0.31, "grad_norm": 0.46464470932303426, "learning_rate": 1.618658592401909e-05, "loss": 0.3414, "step": 6728 }, { "epoch": 0.31, "grad_norm": 0.9591152387680215, "learning_rate": 1.6185416857006648e-05, "loss": 0.4963, "step": 6729 }, { "epoch": 0.31, "grad_norm": 0.5672483732745518, "learning_rate": 1.6184247653055042e-05, "loss": 0.4149, "step": 6730 }, { "epoch": 0.31, "grad_norm": 0.32972760903041165, "learning_rate": 1.6183078312190148e-05, "loss": 0.2746, "step": 6731 }, { "epoch": 0.31, "grad_norm": 0.35917607074949526, "learning_rate": 1.6181908834437862e-05, "loss": 0.1825, "step": 6732 }, { "epoch": 0.31, "grad_norm": 0.806829928330583, "learning_rate": 1.6180739219824073e-05, "loss": 0.5079, "step": 6733 }, { "epoch": 0.31, "grad_norm": 0.4899453701906641, "learning_rate": 1.6179569468374673e-05, "loss": 0.3172, "step": 6734 }, { "epoch": 0.31, "grad_norm": 0.47240583651985885, "learning_rate": 1.617839958011556e-05, "loss": 0.2773, "step": 6735 }, { "epoch": 0.31, "grad_norm": 0.5300140555706498, "learning_rate": 1.617722955507264e-05, "loss": 0.3969, "step": 6736 }, { "epoch": 0.31, "grad_norm": 0.3825746144884539, "learning_rate": 1.6176059393271807e-05, "loss": 0.3226, "step": 6737 }, { "epoch": 0.31, "grad_norm": 0.5488936585847455, "learning_rate": 1.6174889094738975e-05, "loss": 0.2657, "step": 6738 }, { "epoch": 0.31, "grad_norm": 0.34170981470557205, "learning_rate": 1.6173718659500046e-05, "loss": 0.1949, "step": 6739 }, { "epoch": 0.31, "grad_norm": 0.40246048147600033, "learning_rate": 1.617254808758094e-05, "loss": 0.293, "step": 6740 }, { "epoch": 0.31, "grad_norm": 1.5602604552794073, "learning_rate": 1.617137737900757e-05, "loss": 0.8928, "step": 6741 }, { "epoch": 0.31, "grad_norm": 0.4150834756675873, "learning_rate": 1.6170206533805845e-05, "loss": 0.2699, "step": 6742 }, { "epoch": 0.31, "grad_norm": 0.35616587627774376, "learning_rate": 1.6169035552001698e-05, "loss": 0.3272, "step": 6743 }, { "epoch": 0.31, "grad_norm": 0.4449961110690607, "learning_rate": 1.616786443362105e-05, "loss": 0.3113, "step": 6744 }, { "epoch": 0.31, "grad_norm": 0.2224810647045958, "learning_rate": 1.616669317868983e-05, "loss": 0.1054, "step": 6745 }, { "epoch": 0.31, "grad_norm": 0.49738218882020835, "learning_rate": 1.6165521787233963e-05, "loss": 0.3378, "step": 6746 }, { "epoch": 0.31, "grad_norm": 0.4117795768770408, "learning_rate": 1.616435025927939e-05, "loss": 0.326, "step": 6747 }, { "epoch": 0.31, "grad_norm": 0.8571403058004048, "learning_rate": 1.616317859485204e-05, "loss": 0.3727, "step": 6748 }, { "epoch": 0.31, "grad_norm": 0.3997159562974615, "learning_rate": 1.6162006793977858e-05, "loss": 0.318, "step": 6749 }, { "epoch": 0.31, "grad_norm": 0.410027533778778, "learning_rate": 1.6160834856682783e-05, "loss": 0.3185, "step": 6750 }, { "epoch": 0.31, "grad_norm": 0.30433183712987544, "learning_rate": 1.6159662782992767e-05, "loss": 0.2025, "step": 6751 }, { "epoch": 0.31, "grad_norm": 0.31155054079936434, "learning_rate": 1.615849057293375e-05, "loss": 0.2178, "step": 6752 }, { "epoch": 0.31, "grad_norm": 1.2629097236999247, "learning_rate": 1.6157318226531685e-05, "loss": 0.5071, "step": 6753 }, { "epoch": 0.31, "grad_norm": 0.4682042607277238, "learning_rate": 1.6156145743812532e-05, "loss": 0.3344, "step": 6754 }, { "epoch": 0.31, "grad_norm": 0.3357680303839436, "learning_rate": 1.6154973124802248e-05, "loss": 0.2289, "step": 6755 }, { "epoch": 0.31, "grad_norm": 1.2869987732237733, "learning_rate": 1.6153800369526788e-05, "loss": 0.6715, "step": 6756 }, { "epoch": 0.31, "grad_norm": 0.2746826192713602, "learning_rate": 1.6152627478012116e-05, "loss": 0.1636, "step": 6757 }, { "epoch": 0.31, "grad_norm": 0.39425936674462725, "learning_rate": 1.6151454450284206e-05, "loss": 0.228, "step": 6758 }, { "epoch": 0.31, "grad_norm": 0.9537556790267768, "learning_rate": 1.6150281286369024e-05, "loss": 0.3704, "step": 6759 }, { "epoch": 0.31, "grad_norm": 1.069674269395746, "learning_rate": 1.614910798629254e-05, "loss": 0.6016, "step": 6760 }, { "epoch": 0.31, "grad_norm": 0.3306242853561329, "learning_rate": 1.6147934550080734e-05, "loss": 0.2041, "step": 6761 }, { "epoch": 0.31, "grad_norm": 0.40916384354161917, "learning_rate": 1.614676097775958e-05, "loss": 0.33, "step": 6762 }, { "epoch": 0.31, "grad_norm": 0.26442418691484854, "learning_rate": 1.6145587269355062e-05, "loss": 0.1729, "step": 6763 }, { "epoch": 0.31, "grad_norm": 0.45136439396279865, "learning_rate": 1.6144413424893163e-05, "loss": 0.2993, "step": 6764 }, { "epoch": 0.31, "grad_norm": 1.394868004123701, "learning_rate": 1.614323944439988e-05, "loss": 0.4107, "step": 6765 }, { "epoch": 0.31, "grad_norm": 0.5002144570948106, "learning_rate": 1.6142065327901192e-05, "loss": 0.3427, "step": 6766 }, { "epoch": 0.31, "grad_norm": 0.37886809342006733, "learning_rate": 1.6140891075423095e-05, "loss": 0.2702, "step": 6767 }, { "epoch": 0.31, "grad_norm": 0.862629977415794, "learning_rate": 1.6139716686991592e-05, "loss": 0.3901, "step": 6768 }, { "epoch": 0.31, "grad_norm": 0.2935159245210967, "learning_rate": 1.6138542162632677e-05, "loss": 0.1577, "step": 6769 }, { "epoch": 0.31, "grad_norm": 0.515303048678815, "learning_rate": 1.6137367502372356e-05, "loss": 0.2746, "step": 6770 }, { "epoch": 0.31, "grad_norm": 0.656812904560812, "learning_rate": 1.6136192706236635e-05, "loss": 0.3252, "step": 6771 }, { "epoch": 0.31, "grad_norm": 1.3988466734238543, "learning_rate": 1.613501777425152e-05, "loss": 0.7831, "step": 6772 }, { "epoch": 0.31, "grad_norm": 0.3874902729059708, "learning_rate": 1.6133842706443025e-05, "loss": 0.2384, "step": 6773 }, { "epoch": 0.31, "grad_norm": 0.5984432874780572, "learning_rate": 1.6132667502837164e-05, "loss": 0.4009, "step": 6774 }, { "epoch": 0.31, "grad_norm": 0.2732991728496859, "learning_rate": 1.6131492163459955e-05, "loss": 0.1915, "step": 6775 }, { "epoch": 0.31, "grad_norm": 0.36326131384568094, "learning_rate": 1.613031668833742e-05, "loss": 0.315, "step": 6776 }, { "epoch": 0.31, "grad_norm": 0.8692199684281178, "learning_rate": 1.6129141077495583e-05, "loss": 0.4525, "step": 6777 }, { "epoch": 0.31, "grad_norm": 0.5128647275606913, "learning_rate": 1.6127965330960468e-05, "loss": 0.3042, "step": 6778 }, { "epoch": 0.31, "grad_norm": 0.463054745040407, "learning_rate": 1.612678944875811e-05, "loss": 0.2903, "step": 6779 }, { "epoch": 0.31, "grad_norm": 0.5973145410063831, "learning_rate": 1.6125613430914533e-05, "loss": 0.393, "step": 6780 }, { "epoch": 0.31, "grad_norm": 0.4236879474704736, "learning_rate": 1.612443727745578e-05, "loss": 0.321, "step": 6781 }, { "epoch": 0.31, "grad_norm": 0.4136210512327562, "learning_rate": 1.612326098840789e-05, "loss": 0.2482, "step": 6782 }, { "epoch": 0.31, "grad_norm": 0.24866531118196758, "learning_rate": 1.6122084563796906e-05, "loss": 0.2327, "step": 6783 }, { "epoch": 0.31, "grad_norm": 1.4657352652578608, "learning_rate": 1.6120908003648867e-05, "loss": 0.5785, "step": 6784 }, { "epoch": 0.31, "grad_norm": 0.4151820642974993, "learning_rate": 1.6119731307989822e-05, "loss": 0.3108, "step": 6785 }, { "epoch": 0.31, "grad_norm": 0.49679111405320464, "learning_rate": 1.611855447684583e-05, "loss": 0.3482, "step": 6786 }, { "epoch": 0.31, "grad_norm": 1.2816141259276508, "learning_rate": 1.6117377510242937e-05, "loss": 0.5277, "step": 6787 }, { "epoch": 0.31, "grad_norm": 0.5528848412690877, "learning_rate": 1.61162004082072e-05, "loss": 0.2533, "step": 6788 }, { "epoch": 0.31, "grad_norm": 0.4769770828112979, "learning_rate": 1.6115023170764682e-05, "loss": 0.323, "step": 6789 }, { "epoch": 0.31, "grad_norm": 0.480064379038675, "learning_rate": 1.6113845797941446e-05, "loss": 0.3585, "step": 6790 }, { "epoch": 0.31, "grad_norm": 0.24738841261086383, "learning_rate": 1.6112668289763552e-05, "loss": 0.1739, "step": 6791 }, { "epoch": 0.31, "grad_norm": 1.3255656447705793, "learning_rate": 1.6111490646257078e-05, "loss": 0.6968, "step": 6792 }, { "epoch": 0.31, "grad_norm": 0.9341280895214518, "learning_rate": 1.611031286744809e-05, "loss": 0.4414, "step": 6793 }, { "epoch": 0.31, "grad_norm": 0.2871733134202783, "learning_rate": 1.6109134953362664e-05, "loss": 0.232, "step": 6794 }, { "epoch": 0.31, "grad_norm": 0.5507399183982784, "learning_rate": 1.610795690402688e-05, "loss": 0.3022, "step": 6795 }, { "epoch": 0.31, "grad_norm": 0.6050850138232671, "learning_rate": 1.6106778719466817e-05, "loss": 0.3637, "step": 6796 }, { "epoch": 0.31, "grad_norm": 0.45738647184609826, "learning_rate": 1.6105600399708556e-05, "loss": 0.1876, "step": 6797 }, { "epoch": 0.31, "grad_norm": 0.343693379784756, "learning_rate": 1.6104421944778186e-05, "loss": 0.3293, "step": 6798 }, { "epoch": 0.31, "grad_norm": 0.6359526776662212, "learning_rate": 1.6103243354701806e-05, "loss": 0.4819, "step": 6799 }, { "epoch": 0.31, "grad_norm": 0.70958675028496, "learning_rate": 1.610206462950549e-05, "loss": 0.3624, "step": 6800 }, { "epoch": 0.31, "grad_norm": 0.4072201845114903, "learning_rate": 1.6100885769215352e-05, "loss": 0.2725, "step": 6801 }, { "epoch": 0.31, "grad_norm": 0.4006641066705243, "learning_rate": 1.6099706773857477e-05, "loss": 0.3466, "step": 6802 }, { "epoch": 0.31, "grad_norm": 0.31521109442414585, "learning_rate": 1.609852764345798e-05, "loss": 0.1346, "step": 6803 }, { "epoch": 0.31, "grad_norm": 0.35542823126621176, "learning_rate": 1.6097348378042955e-05, "loss": 0.2928, "step": 6804 }, { "epoch": 0.31, "grad_norm": 0.7152261799285524, "learning_rate": 1.6096168977638512e-05, "loss": 0.4879, "step": 6805 }, { "epoch": 0.31, "grad_norm": 0.29095419748280965, "learning_rate": 1.6094989442270763e-05, "loss": 0.2804, "step": 6806 }, { "epoch": 0.31, "grad_norm": 0.4843826211116314, "learning_rate": 1.6093809771965828e-05, "loss": 0.2903, "step": 6807 }, { "epoch": 0.31, "grad_norm": 0.5383422719547504, "learning_rate": 1.609262996674981e-05, "loss": 0.3184, "step": 6808 }, { "epoch": 0.31, "grad_norm": 0.29424727688192087, "learning_rate": 1.6091450026648842e-05, "loss": 0.2202, "step": 6809 }, { "epoch": 0.31, "grad_norm": 0.32762336311741225, "learning_rate": 1.609026995168904e-05, "loss": 0.2742, "step": 6810 }, { "epoch": 0.31, "grad_norm": 0.7601029061192826, "learning_rate": 1.6089089741896532e-05, "loss": 0.4783, "step": 6811 }, { "epoch": 0.31, "grad_norm": 0.42621298129095053, "learning_rate": 1.6087909397297446e-05, "loss": 0.3401, "step": 6812 }, { "epoch": 0.31, "grad_norm": 0.5727437044560925, "learning_rate": 1.6086728917917912e-05, "loss": 0.372, "step": 6813 }, { "epoch": 0.31, "grad_norm": 0.3958965101183971, "learning_rate": 1.608554830378407e-05, "loss": 0.3055, "step": 6814 }, { "epoch": 0.31, "grad_norm": 0.3082188131968349, "learning_rate": 1.6084367554922046e-05, "loss": 0.2364, "step": 6815 }, { "epoch": 0.31, "grad_norm": 0.6309798807923693, "learning_rate": 1.6083186671357996e-05, "loss": 0.3967, "step": 6816 }, { "epoch": 0.31, "grad_norm": 0.2946102930878342, "learning_rate": 1.6082005653118052e-05, "loss": 0.2331, "step": 6817 }, { "epoch": 0.31, "grad_norm": 0.7326357993089266, "learning_rate": 1.6080824500228367e-05, "loss": 0.3505, "step": 6818 }, { "epoch": 0.31, "grad_norm": 0.45068825860403194, "learning_rate": 1.6079643212715088e-05, "loss": 0.3618, "step": 6819 }, { "epoch": 0.31, "grad_norm": 0.7403188061324231, "learning_rate": 1.6078461790604366e-05, "loss": 0.4669, "step": 6820 }, { "epoch": 0.31, "grad_norm": 0.4815423705489495, "learning_rate": 1.607728023392236e-05, "loss": 0.3256, "step": 6821 }, { "epoch": 0.31, "grad_norm": 0.31322535261786, "learning_rate": 1.607609854269523e-05, "loss": 0.3044, "step": 6822 }, { "epoch": 0.31, "grad_norm": 0.2588897743374257, "learning_rate": 1.607491671694913e-05, "loss": 0.1357, "step": 6823 }, { "epoch": 0.31, "grad_norm": 0.6505474252827345, "learning_rate": 1.607373475671023e-05, "loss": 0.3622, "step": 6824 }, { "epoch": 0.31, "grad_norm": 0.45338916104718013, "learning_rate": 1.6072552662004696e-05, "loss": 0.3423, "step": 6825 }, { "epoch": 0.31, "grad_norm": 0.4115278810209507, "learning_rate": 1.60713704328587e-05, "loss": 0.2984, "step": 6826 }, { "epoch": 0.31, "grad_norm": 0.42939497297794194, "learning_rate": 1.6070188069298413e-05, "loss": 0.2536, "step": 6827 }, { "epoch": 0.31, "grad_norm": 0.45023240812222515, "learning_rate": 1.6069005571350017e-05, "loss": 0.3614, "step": 6828 }, { "epoch": 0.31, "grad_norm": 0.2823525598651837, "learning_rate": 1.6067822939039684e-05, "loss": 0.2358, "step": 6829 }, { "epoch": 0.31, "grad_norm": 0.30143471812810824, "learning_rate": 1.6066640172393596e-05, "loss": 0.2172, "step": 6830 }, { "epoch": 0.31, "grad_norm": 0.5948840656659351, "learning_rate": 1.606545727143795e-05, "loss": 0.426, "step": 6831 }, { "epoch": 0.31, "grad_norm": 0.6713991171828799, "learning_rate": 1.606427423619892e-05, "loss": 0.5151, "step": 6832 }, { "epoch": 0.31, "grad_norm": 0.34501182915948037, "learning_rate": 1.60630910667027e-05, "loss": 0.2542, "step": 6833 }, { "epoch": 0.31, "grad_norm": 0.3574889925513999, "learning_rate": 1.6061907762975493e-05, "loss": 0.3016, "step": 6834 }, { "epoch": 0.31, "grad_norm": 0.34461397672738825, "learning_rate": 1.606072432504349e-05, "loss": 0.1627, "step": 6835 }, { "epoch": 0.31, "grad_norm": 0.4450290332471284, "learning_rate": 1.605954075293289e-05, "loss": 0.1395, "step": 6836 }, { "epoch": 0.31, "grad_norm": 0.45673990853789603, "learning_rate": 1.60583570466699e-05, "loss": 0.354, "step": 6837 }, { "epoch": 0.31, "grad_norm": 0.44715701488297044, "learning_rate": 1.605717320628072e-05, "loss": 0.3432, "step": 6838 }, { "epoch": 0.31, "grad_norm": 0.6940711367704417, "learning_rate": 1.605598923179157e-05, "loss": 0.4356, "step": 6839 }, { "epoch": 0.31, "grad_norm": 0.38083703912190275, "learning_rate": 1.6054805123228648e-05, "loss": 0.2554, "step": 6840 }, { "epoch": 0.31, "grad_norm": 0.34331494545670194, "learning_rate": 1.605362088061818e-05, "loss": 0.1757, "step": 6841 }, { "epoch": 0.31, "grad_norm": 0.3336766412842956, "learning_rate": 1.605243650398638e-05, "loss": 0.2941, "step": 6842 }, { "epoch": 0.31, "grad_norm": 0.36675317407101293, "learning_rate": 1.605125199335947e-05, "loss": 0.2458, "step": 6843 }, { "epoch": 0.31, "grad_norm": 0.8139713344227841, "learning_rate": 1.6050067348763675e-05, "loss": 0.4264, "step": 6844 }, { "epoch": 0.31, "grad_norm": 0.3545051459600527, "learning_rate": 1.6048882570225215e-05, "loss": 0.3277, "step": 6845 }, { "epoch": 0.31, "grad_norm": 0.3662930018112263, "learning_rate": 1.6047697657770327e-05, "loss": 0.2165, "step": 6846 }, { "epoch": 0.31, "grad_norm": 0.33413171516541573, "learning_rate": 1.6046512611425243e-05, "loss": 0.1861, "step": 6847 }, { "epoch": 0.31, "grad_norm": 0.598947737971673, "learning_rate": 1.6045327431216197e-05, "loss": 0.3753, "step": 6848 }, { "epoch": 0.31, "grad_norm": 0.4506697504356536, "learning_rate": 1.6044142117169427e-05, "loss": 0.2664, "step": 6849 }, { "epoch": 0.31, "grad_norm": 0.48025022548378704, "learning_rate": 1.6042956669311176e-05, "loss": 0.3435, "step": 6850 }, { "epoch": 0.31, "grad_norm": 0.5896149408113857, "learning_rate": 1.604177108766769e-05, "loss": 0.3381, "step": 6851 }, { "epoch": 0.31, "grad_norm": 0.4317496372531082, "learning_rate": 1.6040585372265213e-05, "loss": 0.3112, "step": 6852 }, { "epoch": 0.31, "grad_norm": 0.2691848151848456, "learning_rate": 1.6039399523130003e-05, "loss": 0.2217, "step": 6853 }, { "epoch": 0.31, "grad_norm": 0.47747361973103597, "learning_rate": 1.6038213540288304e-05, "loss": 0.2363, "step": 6854 }, { "epoch": 0.31, "grad_norm": 0.3886684146665068, "learning_rate": 1.6037027423766375e-05, "loss": 0.2785, "step": 6855 }, { "epoch": 0.31, "grad_norm": 0.9390365614975763, "learning_rate": 1.603584117359048e-05, "loss": 0.3946, "step": 6856 }, { "epoch": 0.32, "grad_norm": 0.40074826452245976, "learning_rate": 1.6034654789786878e-05, "loss": 0.3285, "step": 6857 }, { "epoch": 0.32, "grad_norm": 0.409586810672038, "learning_rate": 1.603346827238184e-05, "loss": 0.2989, "step": 6858 }, { "epoch": 0.32, "grad_norm": 0.31078953147650473, "learning_rate": 1.6032281621401626e-05, "loss": 0.1139, "step": 6859 }, { "epoch": 0.32, "grad_norm": 0.45640974303079584, "learning_rate": 1.6031094836872506e-05, "loss": 0.2964, "step": 6860 }, { "epoch": 0.32, "grad_norm": 0.3763014255049807, "learning_rate": 1.6029907918820768e-05, "loss": 0.2779, "step": 6861 }, { "epoch": 0.32, "grad_norm": 0.6784209985122777, "learning_rate": 1.6028720867272673e-05, "loss": 0.3237, "step": 6862 }, { "epoch": 0.32, "grad_norm": 0.8847086099764302, "learning_rate": 1.6027533682254514e-05, "loss": 0.5215, "step": 6863 }, { "epoch": 0.32, "grad_norm": 0.4232336388396326, "learning_rate": 1.6026346363792565e-05, "loss": 0.3084, "step": 6864 }, { "epoch": 0.32, "grad_norm": 0.507968126895754, "learning_rate": 1.602515891191312e-05, "loss": 0.4027, "step": 6865 }, { "epoch": 0.32, "grad_norm": 0.2919721032057138, "learning_rate": 1.602397132664246e-05, "loss": 0.1933, "step": 6866 }, { "epoch": 0.32, "grad_norm": 0.40227782258568073, "learning_rate": 1.6022783608006882e-05, "loss": 0.2841, "step": 6867 }, { "epoch": 0.32, "grad_norm": 0.9535769571636706, "learning_rate": 1.6021595756032676e-05, "loss": 0.5107, "step": 6868 }, { "epoch": 0.32, "grad_norm": 0.4060843341233045, "learning_rate": 1.602040777074615e-05, "loss": 0.2514, "step": 6869 }, { "epoch": 0.32, "grad_norm": 0.4409029956950468, "learning_rate": 1.6019219652173593e-05, "loss": 0.3047, "step": 6870 }, { "epoch": 0.32, "grad_norm": 0.43729588069683784, "learning_rate": 1.601803140034132e-05, "loss": 0.298, "step": 6871 }, { "epoch": 0.32, "grad_norm": 0.44032164230768894, "learning_rate": 1.6016843015275626e-05, "loss": 0.232, "step": 6872 }, { "epoch": 0.32, "grad_norm": 0.326167030678317, "learning_rate": 1.6015654497002834e-05, "loss": 0.272, "step": 6873 }, { "epoch": 0.32, "grad_norm": 0.5167953468890346, "learning_rate": 1.6014465845549242e-05, "loss": 0.3728, "step": 6874 }, { "epoch": 0.32, "grad_norm": 0.8922550098225157, "learning_rate": 1.6013277060941176e-05, "loss": 0.4295, "step": 6875 }, { "epoch": 0.32, "grad_norm": 0.38770091810482626, "learning_rate": 1.6012088143204953e-05, "loss": 0.2686, "step": 6876 }, { "epoch": 0.32, "grad_norm": 0.5403345297611002, "learning_rate": 1.6010899092366893e-05, "loss": 0.3632, "step": 6877 }, { "epoch": 0.32, "grad_norm": 0.4529593587532332, "learning_rate": 1.600970990845332e-05, "loss": 0.3954, "step": 6878 }, { "epoch": 0.32, "grad_norm": 0.27534562575417915, "learning_rate": 1.6008520591490557e-05, "loss": 0.1938, "step": 6879 }, { "epoch": 0.32, "grad_norm": 0.45557094117888947, "learning_rate": 1.6007331141504945e-05, "loss": 0.2648, "step": 6880 }, { "epoch": 0.32, "grad_norm": 0.43010774682390496, "learning_rate": 1.600614155852281e-05, "loss": 0.3301, "step": 6881 }, { "epoch": 0.32, "grad_norm": 0.3469888570446096, "learning_rate": 1.600495184257049e-05, "loss": 0.203, "step": 6882 }, { "epoch": 0.32, "grad_norm": 0.8671778549060267, "learning_rate": 1.6003761993674324e-05, "loss": 0.4917, "step": 6883 }, { "epoch": 0.32, "grad_norm": 0.3943633702846707, "learning_rate": 1.600257201186065e-05, "loss": 0.379, "step": 6884 }, { "epoch": 0.32, "grad_norm": 0.3383425821711801, "learning_rate": 1.6001381897155824e-05, "loss": 0.2013, "step": 6885 }, { "epoch": 0.32, "grad_norm": 0.36101508544224437, "learning_rate": 1.6000191649586184e-05, "loss": 0.3015, "step": 6886 }, { "epoch": 0.32, "grad_norm": 0.41799623857106905, "learning_rate": 1.5999001269178082e-05, "loss": 0.2791, "step": 6887 }, { "epoch": 0.32, "grad_norm": 0.36177114568788626, "learning_rate": 1.5997810755957873e-05, "loss": 0.2103, "step": 6888 }, { "epoch": 0.32, "grad_norm": 0.4635813671941839, "learning_rate": 1.599662010995192e-05, "loss": 0.3202, "step": 6889 }, { "epoch": 0.32, "grad_norm": 0.8769488708957407, "learning_rate": 1.599542933118657e-05, "loss": 0.4475, "step": 6890 }, { "epoch": 0.32, "grad_norm": 0.39010327570036185, "learning_rate": 1.59942384196882e-05, "loss": 0.297, "step": 6891 }, { "epoch": 0.32, "grad_norm": 0.3335678281191913, "learning_rate": 1.5993047375483162e-05, "loss": 0.1796, "step": 6892 }, { "epoch": 0.32, "grad_norm": 0.3353713916587556, "learning_rate": 1.599185619859784e-05, "loss": 0.2677, "step": 6893 }, { "epoch": 0.32, "grad_norm": 0.35712774063039415, "learning_rate": 1.599066488905859e-05, "loss": 0.286, "step": 6894 }, { "epoch": 0.32, "grad_norm": 0.8281197344738832, "learning_rate": 1.5989473446891797e-05, "loss": 0.3607, "step": 6895 }, { "epoch": 0.32, "grad_norm": 0.8488193424114462, "learning_rate": 1.598828187212383e-05, "loss": 0.4748, "step": 6896 }, { "epoch": 0.32, "grad_norm": 0.3078338726199657, "learning_rate": 1.598709016478108e-05, "loss": 0.28, "step": 6897 }, { "epoch": 0.32, "grad_norm": 0.4576072838980302, "learning_rate": 1.5985898324889916e-05, "loss": 0.2575, "step": 6898 }, { "epoch": 0.32, "grad_norm": 0.32795162253703297, "learning_rate": 1.598470635247674e-05, "loss": 0.1884, "step": 6899 }, { "epoch": 0.32, "grad_norm": 0.4561446002325696, "learning_rate": 1.5983514247567932e-05, "loss": 0.2971, "step": 6900 }, { "epoch": 0.32, "grad_norm": 0.48078808325838135, "learning_rate": 1.5982322010189882e-05, "loss": 0.35, "step": 6901 }, { "epoch": 0.32, "grad_norm": 1.4042664619064698, "learning_rate": 1.598112964036899e-05, "loss": 0.4052, "step": 6902 }, { "epoch": 0.32, "grad_norm": 0.46527242320449574, "learning_rate": 1.5979937138131653e-05, "loss": 0.2957, "step": 6903 }, { "epoch": 0.32, "grad_norm": 0.5875960600114221, "learning_rate": 1.5978744503504272e-05, "loss": 0.4124, "step": 6904 }, { "epoch": 0.32, "grad_norm": 0.27732684730113033, "learning_rate": 1.5977551736513254e-05, "loss": 0.1848, "step": 6905 }, { "epoch": 0.32, "grad_norm": 0.4752967015586555, "learning_rate": 1.5976358837184997e-05, "loss": 0.2687, "step": 6906 }, { "epoch": 0.32, "grad_norm": 0.9484632343741927, "learning_rate": 1.5975165805545914e-05, "loss": 0.3972, "step": 6907 }, { "epoch": 0.32, "grad_norm": 0.9767531084441508, "learning_rate": 1.597397264162242e-05, "loss": 0.3719, "step": 6908 }, { "epoch": 0.32, "grad_norm": 0.3074732388545118, "learning_rate": 1.597277934544093e-05, "loss": 0.2789, "step": 6909 }, { "epoch": 0.32, "grad_norm": 0.4866662058408034, "learning_rate": 1.5971585917027864e-05, "loss": 0.3899, "step": 6910 }, { "epoch": 0.32, "grad_norm": 0.33342035515963886, "learning_rate": 1.5970392356409638e-05, "loss": 0.1841, "step": 6911 }, { "epoch": 0.32, "grad_norm": 0.435606684206042, "learning_rate": 1.5969198663612683e-05, "loss": 0.3018, "step": 6912 }, { "epoch": 0.32, "grad_norm": 0.47893228425258727, "learning_rate": 1.596800483866342e-05, "loss": 0.3487, "step": 6913 }, { "epoch": 0.32, "grad_norm": 0.44275612396158515, "learning_rate": 1.596681088158828e-05, "loss": 0.2256, "step": 6914 }, { "epoch": 0.32, "grad_norm": 0.4220416337744027, "learning_rate": 1.5965616792413704e-05, "loss": 0.2228, "step": 6915 }, { "epoch": 0.32, "grad_norm": 1.3529763468449862, "learning_rate": 1.596442257116612e-05, "loss": 0.8071, "step": 6916 }, { "epoch": 0.32, "grad_norm": 0.44418141764394653, "learning_rate": 1.5963228217871965e-05, "loss": 0.3291, "step": 6917 }, { "epoch": 0.32, "grad_norm": 0.3396552657633624, "learning_rate": 1.5962033732557685e-05, "loss": 0.2538, "step": 6918 }, { "epoch": 0.32, "grad_norm": 0.3915801776030961, "learning_rate": 1.596083911524973e-05, "loss": 0.3043, "step": 6919 }, { "epoch": 0.32, "grad_norm": 0.3648653081976593, "learning_rate": 1.595964436597454e-05, "loss": 0.2684, "step": 6920 }, { "epoch": 0.32, "grad_norm": 0.33500108351701374, "learning_rate": 1.5958449484758565e-05, "loss": 0.2106, "step": 6921 }, { "epoch": 0.32, "grad_norm": 0.690530127555515, "learning_rate": 1.595725447162826e-05, "loss": 0.4264, "step": 6922 }, { "epoch": 0.32, "grad_norm": 0.8618071242197098, "learning_rate": 1.595605932661009e-05, "loss": 0.4837, "step": 6923 }, { "epoch": 0.32, "grad_norm": 0.35652332215607885, "learning_rate": 1.59548640497305e-05, "loss": 0.2158, "step": 6924 }, { "epoch": 0.32, "grad_norm": 0.3031567683003244, "learning_rate": 1.5953668641015966e-05, "loss": 0.2465, "step": 6925 }, { "epoch": 0.32, "grad_norm": 0.5282492146970877, "learning_rate": 1.595247310049294e-05, "loss": 0.3094, "step": 6926 }, { "epoch": 0.32, "grad_norm": 0.6403949276638996, "learning_rate": 1.59512774281879e-05, "loss": 0.3388, "step": 6927 }, { "epoch": 0.32, "grad_norm": 0.39067733403251004, "learning_rate": 1.5950081624127313e-05, "loss": 0.2657, "step": 6928 }, { "epoch": 0.32, "grad_norm": 0.5381938789802505, "learning_rate": 1.5948885688337653e-05, "loss": 0.4074, "step": 6929 }, { "epoch": 0.32, "grad_norm": 0.4861501759524133, "learning_rate": 1.5947689620845395e-05, "loss": 0.3484, "step": 6930 }, { "epoch": 0.32, "grad_norm": 0.30761780885397155, "learning_rate": 1.5946493421677024e-05, "loss": 0.1808, "step": 6931 }, { "epoch": 0.32, "grad_norm": 0.37904072435830366, "learning_rate": 1.594529709085902e-05, "loss": 0.2693, "step": 6932 }, { "epoch": 0.32, "grad_norm": 0.3356277272991689, "learning_rate": 1.594410062841787e-05, "loss": 0.266, "step": 6933 }, { "epoch": 0.32, "grad_norm": 0.5315972328413664, "learning_rate": 1.594290403438006e-05, "loss": 0.3077, "step": 6934 }, { "epoch": 0.32, "grad_norm": 0.6409617624888739, "learning_rate": 1.594170730877208e-05, "loss": 0.4547, "step": 6935 }, { "epoch": 0.32, "grad_norm": 0.4423873108982728, "learning_rate": 1.594051045162043e-05, "loss": 0.2881, "step": 6936 }, { "epoch": 0.32, "grad_norm": 0.35771566609468075, "learning_rate": 1.59393134629516e-05, "loss": 0.26, "step": 6937 }, { "epoch": 0.32, "grad_norm": 0.3161700659163172, "learning_rate": 1.5938116342792102e-05, "loss": 0.1864, "step": 6938 }, { "epoch": 0.32, "grad_norm": 0.909307031811636, "learning_rate": 1.5936919091168425e-05, "loss": 0.5266, "step": 6939 }, { "epoch": 0.32, "grad_norm": 0.37653560140862996, "learning_rate": 1.5935721708107084e-05, "loss": 0.3006, "step": 6940 }, { "epoch": 0.32, "grad_norm": 0.3637480262662844, "learning_rate": 1.5934524193634585e-05, "loss": 0.3126, "step": 6941 }, { "epoch": 0.32, "grad_norm": 0.6448110803857646, "learning_rate": 1.593332654777744e-05, "loss": 0.4071, "step": 6942 }, { "epoch": 0.32, "grad_norm": 0.4629095737874534, "learning_rate": 1.593212877056216e-05, "loss": 0.3411, "step": 6943 }, { "epoch": 0.32, "grad_norm": 0.274595404618627, "learning_rate": 1.5930930862015272e-05, "loss": 0.1848, "step": 6944 }, { "epoch": 0.32, "grad_norm": 0.6586069668335975, "learning_rate": 1.592973282216329e-05, "loss": 0.3532, "step": 6945 }, { "epoch": 0.32, "grad_norm": 0.41901383214740723, "learning_rate": 1.5928534651032737e-05, "loss": 0.3426, "step": 6946 }, { "epoch": 0.32, "grad_norm": 0.8287847070146014, "learning_rate": 1.592733634865014e-05, "loss": 0.4165, "step": 6947 }, { "epoch": 0.32, "grad_norm": 0.4093970543107728, "learning_rate": 1.5926137915042033e-05, "loss": 0.3005, "step": 6948 }, { "epoch": 0.32, "grad_norm": 0.3470183858810759, "learning_rate": 1.592493935023494e-05, "loss": 0.2914, "step": 6949 }, { "epoch": 0.32, "grad_norm": 0.2251401644423978, "learning_rate": 1.59237406542554e-05, "loss": 0.1113, "step": 6950 }, { "epoch": 0.32, "grad_norm": 0.4184922996960351, "learning_rate": 1.5922541827129958e-05, "loss": 0.2996, "step": 6951 }, { "epoch": 0.32, "grad_norm": 0.5393464053393179, "learning_rate": 1.592134286888514e-05, "loss": 0.3966, "step": 6952 }, { "epoch": 0.32, "grad_norm": 0.42479808828378685, "learning_rate": 1.5920143779547503e-05, "loss": 0.345, "step": 6953 }, { "epoch": 0.32, "grad_norm": 0.3879686329522963, "learning_rate": 1.5918944559143587e-05, "loss": 0.1949, "step": 6954 }, { "epoch": 0.32, "grad_norm": 0.5552847108088039, "learning_rate": 1.5917745207699944e-05, "loss": 0.3647, "step": 6955 }, { "epoch": 0.32, "grad_norm": 0.26603469896501525, "learning_rate": 1.5916545725243124e-05, "loss": 0.2171, "step": 6956 }, { "epoch": 0.32, "grad_norm": 0.7996686853383073, "learning_rate": 1.5915346111799685e-05, "loss": 0.2921, "step": 6957 }, { "epoch": 0.32, "grad_norm": 0.37037930645833633, "learning_rate": 1.5914146367396184e-05, "loss": 0.302, "step": 6958 }, { "epoch": 0.32, "grad_norm": 0.812935821607108, "learning_rate": 1.5912946492059183e-05, "loss": 0.6086, "step": 6959 }, { "epoch": 0.32, "grad_norm": 0.37040842776695465, "learning_rate": 1.5911746485815243e-05, "loss": 0.2238, "step": 6960 }, { "epoch": 0.32, "grad_norm": 0.3946849282870186, "learning_rate": 1.5910546348690937e-05, "loss": 0.2737, "step": 6961 }, { "epoch": 0.32, "grad_norm": 0.29992381253370365, "learning_rate": 1.590934608071283e-05, "loss": 0.2081, "step": 6962 }, { "epoch": 0.32, "grad_norm": 0.6996118821176629, "learning_rate": 1.59081456819075e-05, "loss": 0.2856, "step": 6963 }, { "epoch": 0.32, "grad_norm": 0.3245246847693063, "learning_rate": 1.5906945152301513e-05, "loss": 0.3091, "step": 6964 }, { "epoch": 0.32, "grad_norm": 0.5125440038117659, "learning_rate": 1.5905744491921458e-05, "loss": 0.3695, "step": 6965 }, { "epoch": 0.32, "grad_norm": 0.81201944495728, "learning_rate": 1.590454370079391e-05, "loss": 0.4692, "step": 6966 }, { "epoch": 0.32, "grad_norm": 0.3531949158404305, "learning_rate": 1.5903342778945456e-05, "loss": 0.2292, "step": 6967 }, { "epoch": 0.32, "grad_norm": 0.30026128821558595, "learning_rate": 1.5902141726402683e-05, "loss": 0.2429, "step": 6968 }, { "epoch": 0.32, "grad_norm": 0.7398692022710958, "learning_rate": 1.5900940543192178e-05, "loss": 0.3977, "step": 6969 }, { "epoch": 0.32, "grad_norm": 0.2961299475918896, "learning_rate": 1.589973922934054e-05, "loss": 0.1787, "step": 6970 }, { "epoch": 0.32, "grad_norm": 1.2727301200189884, "learning_rate": 1.589853778487436e-05, "loss": 0.8791, "step": 6971 }, { "epoch": 0.32, "grad_norm": 0.3480884788234686, "learning_rate": 1.589733620982024e-05, "loss": 0.268, "step": 6972 }, { "epoch": 0.32, "grad_norm": 0.4112824919959956, "learning_rate": 1.589613450420478e-05, "loss": 0.2496, "step": 6973 }, { "epoch": 0.32, "grad_norm": 0.7164931737780263, "learning_rate": 1.5894932668054584e-05, "loss": 0.4524, "step": 6974 }, { "epoch": 0.32, "grad_norm": 0.42534252649023646, "learning_rate": 1.589373070139626e-05, "loss": 0.2674, "step": 6975 }, { "epoch": 0.32, "grad_norm": 0.3203618831423938, "learning_rate": 1.589252860425642e-05, "loss": 0.2409, "step": 6976 }, { "epoch": 0.32, "grad_norm": 0.37633900464818065, "learning_rate": 1.5891326376661673e-05, "loss": 0.295, "step": 6977 }, { "epoch": 0.32, "grad_norm": 1.6632348828260697, "learning_rate": 1.589012401863864e-05, "loss": 0.8379, "step": 6978 }, { "epoch": 0.32, "grad_norm": 0.4397446089791949, "learning_rate": 1.5888921530213938e-05, "loss": 0.2884, "step": 6979 }, { "epoch": 0.32, "grad_norm": 0.405569344813435, "learning_rate": 1.5887718911414193e-05, "loss": 0.2987, "step": 6980 }, { "epoch": 0.32, "grad_norm": 0.7348671276608425, "learning_rate": 1.5886516162266022e-05, "loss": 0.5122, "step": 6981 }, { "epoch": 0.32, "grad_norm": 0.41252584472183856, "learning_rate": 1.5885313282796055e-05, "loss": 0.2909, "step": 6982 }, { "epoch": 0.32, "grad_norm": 0.3462711642240843, "learning_rate": 1.5884110273030926e-05, "loss": 0.1531, "step": 6983 }, { "epoch": 0.32, "grad_norm": 0.32032695032253367, "learning_rate": 1.5882907132997267e-05, "loss": 0.243, "step": 6984 }, { "epoch": 0.32, "grad_norm": 0.40311796625937124, "learning_rate": 1.5881703862721714e-05, "loss": 0.2963, "step": 6985 }, { "epoch": 0.32, "grad_norm": 0.670350134136847, "learning_rate": 1.588050046223091e-05, "loss": 0.3703, "step": 6986 }, { "epoch": 0.32, "grad_norm": 0.8122544777904941, "learning_rate": 1.587929693155149e-05, "loss": 0.5043, "step": 6987 }, { "epoch": 0.32, "grad_norm": 0.46970269922932073, "learning_rate": 1.58780932707101e-05, "loss": 0.2701, "step": 6988 }, { "epoch": 0.32, "grad_norm": 0.33628963076202206, "learning_rate": 1.5876889479733393e-05, "loss": 0.2215, "step": 6989 }, { "epoch": 0.32, "grad_norm": 0.36926985031194653, "learning_rate": 1.587568555864802e-05, "loss": 0.2319, "step": 6990 }, { "epoch": 0.32, "grad_norm": 0.41933988387072885, "learning_rate": 1.5874481507480627e-05, "loss": 0.2744, "step": 6991 }, { "epoch": 0.32, "grad_norm": 0.3858673053069211, "learning_rate": 1.587327732625788e-05, "loss": 0.3337, "step": 6992 }, { "epoch": 0.32, "grad_norm": 0.6208726978720848, "learning_rate": 1.5872073015006428e-05, "loss": 0.2952, "step": 6993 }, { "epoch": 0.32, "grad_norm": 0.45439751346288976, "learning_rate": 1.5870868573752942e-05, "loss": 0.3141, "step": 6994 }, { "epoch": 0.32, "grad_norm": 0.5046595391458979, "learning_rate": 1.5869664002524087e-05, "loss": 0.3503, "step": 6995 }, { "epoch": 0.32, "grad_norm": 0.29785386679867737, "learning_rate": 1.5868459301346523e-05, "loss": 0.2154, "step": 6996 }, { "epoch": 0.32, "grad_norm": 0.3985196186107559, "learning_rate": 1.586725447024693e-05, "loss": 0.2923, "step": 6997 }, { "epoch": 0.32, "grad_norm": 0.6091645423934269, "learning_rate": 1.5866049509251977e-05, "loss": 0.4636, "step": 6998 }, { "epoch": 0.32, "grad_norm": 0.4994393555808652, "learning_rate": 1.586484441838834e-05, "loss": 0.2702, "step": 6999 }, { "epoch": 0.32, "grad_norm": 0.2987715008595132, "learning_rate": 1.5863639197682702e-05, "loss": 0.2693, "step": 7000 }, { "epoch": 0.32, "grad_norm": 1.5194518614292296, "learning_rate": 1.5862433847161743e-05, "loss": 0.8884, "step": 7001 }, { "epoch": 0.32, "grad_norm": 0.3236844969139569, "learning_rate": 1.5861228366852148e-05, "loss": 0.1952, "step": 7002 }, { "epoch": 0.32, "grad_norm": 0.38884563513378256, "learning_rate": 1.5860022756780605e-05, "loss": 0.287, "step": 7003 }, { "epoch": 0.32, "grad_norm": 0.4468560354570671, "learning_rate": 1.585881701697381e-05, "loss": 0.3274, "step": 7004 }, { "epoch": 0.32, "grad_norm": 1.0251347470159884, "learning_rate": 1.585761114745845e-05, "loss": 0.6627, "step": 7005 }, { "epoch": 0.32, "grad_norm": 0.3168888464016101, "learning_rate": 1.585640514826123e-05, "loss": 0.2009, "step": 7006 }, { "epoch": 0.32, "grad_norm": 1.511274961618501, "learning_rate": 1.585519901940884e-05, "loss": 0.8503, "step": 7007 }, { "epoch": 0.32, "grad_norm": 0.30665772093699484, "learning_rate": 1.5853992760927985e-05, "loss": 0.279, "step": 7008 }, { "epoch": 0.32, "grad_norm": 0.35810481255151594, "learning_rate": 1.5852786372845374e-05, "loss": 0.2125, "step": 7009 }, { "epoch": 0.32, "grad_norm": 0.5058657560529324, "learning_rate": 1.5851579855187718e-05, "loss": 0.3144, "step": 7010 }, { "epoch": 0.32, "grad_norm": 0.4901265802305454, "learning_rate": 1.585037320798172e-05, "loss": 0.3428, "step": 7011 }, { "epoch": 0.32, "grad_norm": 0.39933863050402013, "learning_rate": 1.5849166431254097e-05, "loss": 0.199, "step": 7012 }, { "epoch": 0.32, "grad_norm": 0.9381234933348189, "learning_rate": 1.584795952503157e-05, "loss": 0.6108, "step": 7013 }, { "epoch": 0.32, "grad_norm": 0.587697812519193, "learning_rate": 1.584675248934085e-05, "loss": 0.4646, "step": 7014 }, { "epoch": 0.32, "grad_norm": 0.41601005932308976, "learning_rate": 1.584554532420867e-05, "loss": 0.309, "step": 7015 }, { "epoch": 0.32, "grad_norm": 0.2783392801683017, "learning_rate": 1.5844338029661746e-05, "loss": 0.1693, "step": 7016 }, { "epoch": 0.32, "grad_norm": 1.5745887702044628, "learning_rate": 1.5843130605726816e-05, "loss": 0.8246, "step": 7017 }, { "epoch": 0.32, "grad_norm": 0.40984878499469385, "learning_rate": 1.5841923052430605e-05, "loss": 0.2883, "step": 7018 }, { "epoch": 0.32, "grad_norm": 0.7584016875381563, "learning_rate": 1.584071536979985e-05, "loss": 0.4103, "step": 7019 }, { "epoch": 0.32, "grad_norm": 0.3989450099102825, "learning_rate": 1.583950755786128e-05, "loss": 0.3519, "step": 7020 }, { "epoch": 0.32, "grad_norm": 0.43722143984576023, "learning_rate": 1.5838299616641647e-05, "loss": 0.2907, "step": 7021 }, { "epoch": 0.32, "grad_norm": 0.2233912390190278, "learning_rate": 1.583709154616768e-05, "loss": 0.0734, "step": 7022 }, { "epoch": 0.32, "grad_norm": 0.40367170609808656, "learning_rate": 1.5835883346466137e-05, "loss": 0.3338, "step": 7023 }, { "epoch": 0.32, "grad_norm": 0.5874177778425868, "learning_rate": 1.5834675017563764e-05, "loss": 0.2972, "step": 7024 }, { "epoch": 0.32, "grad_norm": 0.4923285045067426, "learning_rate": 1.5833466559487305e-05, "loss": 0.297, "step": 7025 }, { "epoch": 0.32, "grad_norm": 0.7148566082214294, "learning_rate": 1.5832257972263523e-05, "loss": 0.4415, "step": 7026 }, { "epoch": 0.32, "grad_norm": 0.46271961459001154, "learning_rate": 1.5831049255919166e-05, "loss": 0.2911, "step": 7027 }, { "epoch": 0.32, "grad_norm": 0.2899847274702938, "learning_rate": 1.5829840410481e-05, "loss": 0.2454, "step": 7028 }, { "epoch": 0.32, "grad_norm": 0.3245711229305609, "learning_rate": 1.5828631435975784e-05, "loss": 0.1707, "step": 7029 }, { "epoch": 0.32, "grad_norm": 0.48453886811842634, "learning_rate": 1.582742233243029e-05, "loss": 0.3028, "step": 7030 }, { "epoch": 0.32, "grad_norm": 0.6697341837538948, "learning_rate": 1.5826213099871283e-05, "loss": 0.417, "step": 7031 }, { "epoch": 0.32, "grad_norm": 0.43280378742972203, "learning_rate": 1.582500373832553e-05, "loss": 0.263, "step": 7032 }, { "epoch": 0.32, "grad_norm": 0.4382249961580811, "learning_rate": 1.5823794247819807e-05, "loss": 0.278, "step": 7033 }, { "epoch": 0.32, "grad_norm": 0.3122594246376777, "learning_rate": 1.58225846283809e-05, "loss": 0.2171, "step": 7034 }, { "epoch": 0.32, "grad_norm": 0.4925201368329097, "learning_rate": 1.5821374880035573e-05, "loss": 0.3129, "step": 7035 }, { "epoch": 0.32, "grad_norm": 0.32176347621818263, "learning_rate": 1.582016500281062e-05, "loss": 0.2646, "step": 7036 }, { "epoch": 0.32, "grad_norm": 0.7783341204397229, "learning_rate": 1.5818954996732824e-05, "loss": 0.5031, "step": 7037 }, { "epoch": 0.32, "grad_norm": 0.6571167030321027, "learning_rate": 1.5817744861828973e-05, "loss": 0.3726, "step": 7038 }, { "epoch": 0.32, "grad_norm": 0.4290360708082677, "learning_rate": 1.5816534598125858e-05, "loss": 0.292, "step": 7039 }, { "epoch": 0.32, "grad_norm": 0.43748331329321793, "learning_rate": 1.5815324205650272e-05, "loss": 0.3258, "step": 7040 }, { "epoch": 0.32, "grad_norm": 0.29741116283590613, "learning_rate": 1.5814113684429012e-05, "loss": 0.2266, "step": 7041 }, { "epoch": 0.32, "grad_norm": 0.4432495160657537, "learning_rate": 1.581290303448888e-05, "loss": 0.2383, "step": 7042 }, { "epoch": 0.32, "grad_norm": 0.6461177350711316, "learning_rate": 1.5811692255856677e-05, "loss": 0.3963, "step": 7043 }, { "epoch": 0.32, "grad_norm": 0.39291935499020886, "learning_rate": 1.5810481348559206e-05, "loss": 0.3386, "step": 7044 }, { "epoch": 0.32, "grad_norm": 0.3627894285941001, "learning_rate": 1.5809270312623285e-05, "loss": 0.1818, "step": 7045 }, { "epoch": 0.32, "grad_norm": 0.3104216583452634, "learning_rate": 1.580805914807571e-05, "loss": 0.2475, "step": 7046 }, { "epoch": 0.32, "grad_norm": 0.3187025212366548, "learning_rate": 1.580684785494331e-05, "loss": 0.2702, "step": 7047 }, { "epoch": 0.32, "grad_norm": 0.40618861964824365, "learning_rate": 1.5805636433252892e-05, "loss": 0.2201, "step": 7048 }, { "epoch": 0.32, "grad_norm": 0.5029632200939427, "learning_rate": 1.5804424883031278e-05, "loss": 0.3887, "step": 7049 }, { "epoch": 0.32, "grad_norm": 1.353741252296431, "learning_rate": 1.5803213204305296e-05, "loss": 0.706, "step": 7050 }, { "epoch": 0.32, "grad_norm": 0.3222564140371361, "learning_rate": 1.5802001397101766e-05, "loss": 0.2327, "step": 7051 }, { "epoch": 0.32, "grad_norm": 0.29503784486134943, "learning_rate": 1.5800789461447513e-05, "loss": 0.2503, "step": 7052 }, { "epoch": 0.32, "grad_norm": 0.4649224153251475, "learning_rate": 1.5799577397369374e-05, "loss": 0.3034, "step": 7053 }, { "epoch": 0.32, "grad_norm": 0.544466315090083, "learning_rate": 1.5798365204894182e-05, "loss": 0.3806, "step": 7054 }, { "epoch": 0.32, "grad_norm": 0.4165995865076928, "learning_rate": 1.5797152884048772e-05, "loss": 0.2623, "step": 7055 }, { "epoch": 0.32, "grad_norm": 0.40300126813059706, "learning_rate": 1.5795940434859988e-05, "loss": 0.3143, "step": 7056 }, { "epoch": 0.32, "grad_norm": 0.40222924253914133, "learning_rate": 1.5794727857354667e-05, "loss": 0.2679, "step": 7057 }, { "epoch": 0.32, "grad_norm": 0.4795952423234212, "learning_rate": 1.5793515151559654e-05, "loss": 0.2749, "step": 7058 }, { "epoch": 0.32, "grad_norm": 0.26735847199324964, "learning_rate": 1.57923023175018e-05, "loss": 0.2571, "step": 7059 }, { "epoch": 0.32, "grad_norm": 0.4861939241519787, "learning_rate": 1.5791089355207958e-05, "loss": 0.3244, "step": 7060 }, { "epoch": 0.32, "grad_norm": 0.5088622258196305, "learning_rate": 1.5789876264704976e-05, "loss": 0.3102, "step": 7061 }, { "epoch": 0.32, "grad_norm": 0.941350111566803, "learning_rate": 1.5788663046019716e-05, "loss": 0.599, "step": 7062 }, { "epoch": 0.32, "grad_norm": 0.4654436940082658, "learning_rate": 1.5787449699179035e-05, "loss": 0.335, "step": 7063 }, { "epoch": 0.32, "grad_norm": 0.3494005532652234, "learning_rate": 1.5786236224209794e-05, "loss": 0.2364, "step": 7064 }, { "epoch": 0.32, "grad_norm": 0.4565802442528608, "learning_rate": 1.578502262113886e-05, "loss": 0.3434, "step": 7065 }, { "epoch": 0.32, "grad_norm": 0.3930450147236665, "learning_rate": 1.57838088899931e-05, "loss": 0.2468, "step": 7066 }, { "epoch": 0.32, "grad_norm": 0.3462006485523297, "learning_rate": 1.578259503079939e-05, "loss": 0.2708, "step": 7067 }, { "epoch": 0.32, "grad_norm": 0.5048257621632538, "learning_rate": 1.5781381043584598e-05, "loss": 0.3174, "step": 7068 }, { "epoch": 0.32, "grad_norm": 0.5950023493788619, "learning_rate": 1.5780166928375597e-05, "loss": 0.3548, "step": 7069 }, { "epoch": 0.32, "grad_norm": 0.40542166133443314, "learning_rate": 1.5778952685199277e-05, "loss": 0.3176, "step": 7070 }, { "epoch": 0.32, "grad_norm": 0.3648558735746394, "learning_rate": 1.5777738314082514e-05, "loss": 0.3044, "step": 7071 }, { "epoch": 0.32, "grad_norm": 0.33630819743275625, "learning_rate": 1.5776523815052192e-05, "loss": 0.2185, "step": 7072 }, { "epoch": 0.32, "grad_norm": 0.43126215720563094, "learning_rate": 1.57753091881352e-05, "loss": 0.3397, "step": 7073 }, { "epoch": 0.32, "grad_norm": 0.32908474271947774, "learning_rate": 1.5774094433358426e-05, "loss": 0.1656, "step": 7074 }, { "epoch": 0.33, "grad_norm": 0.37716287577339264, "learning_rate": 1.5772879550748773e-05, "loss": 0.307, "step": 7075 }, { "epoch": 0.33, "grad_norm": 0.4232597156547772, "learning_rate": 1.5771664540333123e-05, "loss": 0.2849, "step": 7076 }, { "epoch": 0.33, "grad_norm": 0.60882281616766, "learning_rate": 1.5770449402138387e-05, "loss": 0.3704, "step": 7077 }, { "epoch": 0.33, "grad_norm": 0.5124361998287736, "learning_rate": 1.5769234136191464e-05, "loss": 0.2911, "step": 7078 }, { "epoch": 0.33, "grad_norm": 0.4623270719524845, "learning_rate": 1.5768018742519258e-05, "loss": 0.3218, "step": 7079 }, { "epoch": 0.33, "grad_norm": 0.29380197925881496, "learning_rate": 1.5766803221148676e-05, "loss": 0.215, "step": 7080 }, { "epoch": 0.33, "grad_norm": 0.6255721310019001, "learning_rate": 1.5765587572106623e-05, "loss": 0.2463, "step": 7081 }, { "epoch": 0.33, "grad_norm": 0.4028652804287961, "learning_rate": 1.5764371795420023e-05, "loss": 0.3133, "step": 7082 }, { "epoch": 0.33, "grad_norm": 0.4195086038770725, "learning_rate": 1.576315589111579e-05, "loss": 0.3431, "step": 7083 }, { "epoch": 0.33, "grad_norm": 0.5466980537753369, "learning_rate": 1.5761939859220836e-05, "loss": 0.1564, "step": 7084 }, { "epoch": 0.33, "grad_norm": 0.4364585525160943, "learning_rate": 1.576072369976209e-05, "loss": 0.3446, "step": 7085 }, { "epoch": 0.33, "grad_norm": 0.3108104218036844, "learning_rate": 1.575950741276647e-05, "loss": 0.1915, "step": 7086 }, { "epoch": 0.33, "grad_norm": 0.31965716798944344, "learning_rate": 1.575829099826091e-05, "loss": 0.2636, "step": 7087 }, { "epoch": 0.33, "grad_norm": 0.41521259698088897, "learning_rate": 1.5757074456272333e-05, "loss": 0.3252, "step": 7088 }, { "epoch": 0.33, "grad_norm": 0.8762257288004384, "learning_rate": 1.575585778682768e-05, "loss": 0.5277, "step": 7089 }, { "epoch": 0.33, "grad_norm": 0.5259842598684246, "learning_rate": 1.5754640989953883e-05, "loss": 0.2338, "step": 7090 }, { "epoch": 0.33, "grad_norm": 0.3559979703712719, "learning_rate": 1.5753424065677877e-05, "loss": 0.314, "step": 7091 }, { "epoch": 0.33, "grad_norm": 0.3232071324443672, "learning_rate": 1.5752207014026612e-05, "loss": 0.2576, "step": 7092 }, { "epoch": 0.33, "grad_norm": 0.40866753029234226, "learning_rate": 1.5750989835027026e-05, "loss": 0.2895, "step": 7093 }, { "epoch": 0.33, "grad_norm": 0.37266436007685355, "learning_rate": 1.574977252870607e-05, "loss": 0.2585, "step": 7094 }, { "epoch": 0.33, "grad_norm": 0.4114777726193609, "learning_rate": 1.5748555095090687e-05, "loss": 0.341, "step": 7095 }, { "epoch": 0.33, "grad_norm": 0.6554168182537494, "learning_rate": 1.574733753420784e-05, "loss": 0.4785, "step": 7096 }, { "epoch": 0.33, "grad_norm": 0.3472327629826033, "learning_rate": 1.5746119846084474e-05, "loss": 0.2206, "step": 7097 }, { "epoch": 0.33, "grad_norm": 0.3499202776527631, "learning_rate": 1.5744902030747558e-05, "loss": 0.2326, "step": 7098 }, { "epoch": 0.33, "grad_norm": 0.3741230742793275, "learning_rate": 1.5743684088224044e-05, "loss": 0.3324, "step": 7099 }, { "epoch": 0.33, "grad_norm": 0.34978562932828566, "learning_rate": 1.57424660185409e-05, "loss": 0.2506, "step": 7100 }, { "epoch": 0.33, "grad_norm": 1.0987027031706893, "learning_rate": 1.57412478217251e-05, "loss": 0.6012, "step": 7101 }, { "epoch": 0.33, "grad_norm": 1.418759263897151, "learning_rate": 1.57400294978036e-05, "loss": 0.7911, "step": 7102 }, { "epoch": 0.33, "grad_norm": 0.28126870436763585, "learning_rate": 1.573881104680338e-05, "loss": 0.2318, "step": 7103 }, { "epoch": 0.33, "grad_norm": 0.7506369913067107, "learning_rate": 1.5737592468751416e-05, "loss": 0.4781, "step": 7104 }, { "epoch": 0.33, "grad_norm": 0.4172636683387461, "learning_rate": 1.5736373763674687e-05, "loss": 0.3167, "step": 7105 }, { "epoch": 0.33, "grad_norm": 0.3112558738276658, "learning_rate": 1.5735154931600167e-05, "loss": 0.2301, "step": 7106 }, { "epoch": 0.33, "grad_norm": 0.37786987762603524, "learning_rate": 1.5733935972554845e-05, "loss": 0.2581, "step": 7107 }, { "epoch": 0.33, "grad_norm": 1.327503534836091, "learning_rate": 1.573271688656571e-05, "loss": 0.8152, "step": 7108 }, { "epoch": 0.33, "grad_norm": 0.3698643869827666, "learning_rate": 1.5731497673659745e-05, "loss": 0.2929, "step": 7109 }, { "epoch": 0.33, "grad_norm": 0.6790864194175057, "learning_rate": 1.573027833386395e-05, "loss": 0.3466, "step": 7110 }, { "epoch": 0.33, "grad_norm": 0.34638630134117615, "learning_rate": 1.572905886720531e-05, "loss": 0.3208, "step": 7111 }, { "epoch": 0.33, "grad_norm": 0.39089769934773166, "learning_rate": 1.5727839273710832e-05, "loss": 0.2821, "step": 7112 }, { "epoch": 0.33, "grad_norm": 0.24847650391447956, "learning_rate": 1.5726619553407514e-05, "loss": 0.1385, "step": 7113 }, { "epoch": 0.33, "grad_norm": 0.457963662632025, "learning_rate": 1.5725399706322354e-05, "loss": 0.3384, "step": 7114 }, { "epoch": 0.33, "grad_norm": 0.40754667812445927, "learning_rate": 1.572417973248237e-05, "loss": 0.2885, "step": 7115 }, { "epoch": 0.33, "grad_norm": 0.6041748004387976, "learning_rate": 1.5722959631914557e-05, "loss": 0.3255, "step": 7116 }, { "epoch": 0.33, "grad_norm": 0.811632165477902, "learning_rate": 1.5721739404645937e-05, "loss": 0.5461, "step": 7117 }, { "epoch": 0.33, "grad_norm": 0.32102186820855, "learning_rate": 1.572051905070352e-05, "loss": 0.2463, "step": 7118 }, { "epoch": 0.33, "grad_norm": 0.31020247295364595, "learning_rate": 1.5719298570114324e-05, "loss": 0.2534, "step": 7119 }, { "epoch": 0.33, "grad_norm": 1.3857281878838819, "learning_rate": 1.5718077962905372e-05, "loss": 0.5051, "step": 7120 }, { "epoch": 0.33, "grad_norm": 0.3955549776279057, "learning_rate": 1.5716857229103684e-05, "loss": 0.3045, "step": 7121 }, { "epoch": 0.33, "grad_norm": 0.9215115648902779, "learning_rate": 1.5715636368736286e-05, "loss": 0.4584, "step": 7122 }, { "epoch": 0.33, "grad_norm": 0.36169753622232714, "learning_rate": 1.571441538183021e-05, "loss": 0.2523, "step": 7123 }, { "epoch": 0.33, "grad_norm": 0.37251684751015984, "learning_rate": 1.571319426841248e-05, "loss": 0.2716, "step": 7124 }, { "epoch": 0.33, "grad_norm": 0.33448749099587655, "learning_rate": 1.571197302851014e-05, "loss": 0.1892, "step": 7125 }, { "epoch": 0.33, "grad_norm": 0.66609176016729, "learning_rate": 1.5710751662150224e-05, "loss": 0.3015, "step": 7126 }, { "epoch": 0.33, "grad_norm": 0.47153187488750503, "learning_rate": 1.5709530169359767e-05, "loss": 0.2829, "step": 7127 }, { "epoch": 0.33, "grad_norm": 1.32082095765461, "learning_rate": 1.5708308550165818e-05, "loss": 0.4778, "step": 7128 }, { "epoch": 0.33, "grad_norm": 0.4432579614150149, "learning_rate": 1.5707086804595417e-05, "loss": 0.2222, "step": 7129 }, { "epoch": 0.33, "grad_norm": 0.48765120003463264, "learning_rate": 1.570586493267561e-05, "loss": 0.3146, "step": 7130 }, { "epoch": 0.33, "grad_norm": 0.33470262723482036, "learning_rate": 1.570464293443346e-05, "loss": 0.2772, "step": 7131 }, { "epoch": 0.33, "grad_norm": 1.039026491174222, "learning_rate": 1.570342080989601e-05, "loss": 0.5921, "step": 7132 }, { "epoch": 0.33, "grad_norm": 0.3758338848060208, "learning_rate": 1.5702198559090322e-05, "loss": 0.2065, "step": 7133 }, { "epoch": 0.33, "grad_norm": 0.7322717127393783, "learning_rate": 1.5700976182043452e-05, "loss": 0.377, "step": 7134 }, { "epoch": 0.33, "grad_norm": 0.5079023901244897, "learning_rate": 1.5699753678782466e-05, "loss": 0.3723, "step": 7135 }, { "epoch": 0.33, "grad_norm": 0.3229311670899918, "learning_rate": 1.5698531049334428e-05, "loss": 0.2147, "step": 7136 }, { "epoch": 0.33, "grad_norm": 0.33137566175899336, "learning_rate": 1.5697308293726403e-05, "loss": 0.223, "step": 7137 }, { "epoch": 0.33, "grad_norm": 0.5091467081996569, "learning_rate": 1.569608541198546e-05, "loss": 0.4034, "step": 7138 }, { "epoch": 0.33, "grad_norm": 0.3175224511941382, "learning_rate": 1.569486240413868e-05, "loss": 0.2416, "step": 7139 }, { "epoch": 0.33, "grad_norm": 0.9687388130809695, "learning_rate": 1.5693639270213138e-05, "loss": 0.52, "step": 7140 }, { "epoch": 0.33, "grad_norm": 1.1882606778332527, "learning_rate": 1.5692416010235905e-05, "loss": 0.6448, "step": 7141 }, { "epoch": 0.33, "grad_norm": 0.3868377381438459, "learning_rate": 1.569119262423407e-05, "loss": 0.2711, "step": 7142 }, { "epoch": 0.33, "grad_norm": 0.2979496682498275, "learning_rate": 1.5689969112234717e-05, "loss": 0.1972, "step": 7143 }, { "epoch": 0.33, "grad_norm": 0.5885685019469463, "learning_rate": 1.568874547426493e-05, "loss": 0.3385, "step": 7144 }, { "epoch": 0.33, "grad_norm": 0.4007307443970035, "learning_rate": 1.56875217103518e-05, "loss": 0.2714, "step": 7145 }, { "epoch": 0.33, "grad_norm": 1.0913132740372513, "learning_rate": 1.5686297820522423e-05, "loss": 0.3769, "step": 7146 }, { "epoch": 0.33, "grad_norm": 0.39495196122521425, "learning_rate": 1.5685073804803892e-05, "loss": 0.3058, "step": 7147 }, { "epoch": 0.33, "grad_norm": 0.5968129159623581, "learning_rate": 1.5683849663223306e-05, "loss": 0.3158, "step": 7148 }, { "epoch": 0.33, "grad_norm": 0.2646926232108532, "learning_rate": 1.568262539580777e-05, "loss": 0.1615, "step": 7149 }, { "epoch": 0.33, "grad_norm": 0.6336038388159662, "learning_rate": 1.568140100258438e-05, "loss": 0.3484, "step": 7150 }, { "epoch": 0.33, "grad_norm": 0.7403507754082647, "learning_rate": 1.5680176483580248e-05, "loss": 0.3975, "step": 7151 }, { "epoch": 0.33, "grad_norm": 0.48286920819962725, "learning_rate": 1.5678951838822487e-05, "loss": 0.2738, "step": 7152 }, { "epoch": 0.33, "grad_norm": 1.1208821195765513, "learning_rate": 1.5677727068338203e-05, "loss": 0.6201, "step": 7153 }, { "epoch": 0.33, "grad_norm": 0.47186658346814064, "learning_rate": 1.5676502172154514e-05, "loss": 0.2815, "step": 7154 }, { "epoch": 0.33, "grad_norm": 0.2834258173205169, "learning_rate": 1.567527715029854e-05, "loss": 0.242, "step": 7155 }, { "epoch": 0.33, "grad_norm": 0.6348510350104531, "learning_rate": 1.5674052002797396e-05, "loss": 0.3902, "step": 7156 }, { "epoch": 0.33, "grad_norm": 0.4710698356386421, "learning_rate": 1.567282672967821e-05, "loss": 0.3092, "step": 7157 }, { "epoch": 0.33, "grad_norm": 0.35536440317264967, "learning_rate": 1.5671601330968112e-05, "loss": 0.27, "step": 7158 }, { "epoch": 0.33, "grad_norm": 0.5243560848875567, "learning_rate": 1.5670375806694226e-05, "loss": 0.2436, "step": 7159 }, { "epoch": 0.33, "grad_norm": 0.514432307671831, "learning_rate": 1.566915015688368e-05, "loss": 0.333, "step": 7160 }, { "epoch": 0.33, "grad_norm": 0.3757433316908943, "learning_rate": 1.5667924381563618e-05, "loss": 0.2427, "step": 7161 }, { "epoch": 0.33, "grad_norm": 0.44377785401053943, "learning_rate": 1.566669848076117e-05, "loss": 0.3049, "step": 7162 }, { "epoch": 0.33, "grad_norm": 0.3539044950454571, "learning_rate": 1.5665472454503484e-05, "loss": 0.2658, "step": 7163 }, { "epoch": 0.33, "grad_norm": 1.2991584943025944, "learning_rate": 1.5664246302817695e-05, "loss": 0.7675, "step": 7164 }, { "epoch": 0.33, "grad_norm": 0.3161897144435263, "learning_rate": 1.566302002573095e-05, "loss": 0.1418, "step": 7165 }, { "epoch": 0.33, "grad_norm": 0.42169995020107076, "learning_rate": 1.5661793623270404e-05, "loss": 0.2565, "step": 7166 }, { "epoch": 0.33, "grad_norm": 0.43209305825695626, "learning_rate": 1.5660567095463202e-05, "loss": 0.3358, "step": 7167 }, { "epoch": 0.33, "grad_norm": 0.6754068686278579, "learning_rate": 1.56593404423365e-05, "loss": 0.4258, "step": 7168 }, { "epoch": 0.33, "grad_norm": 0.4043740072161991, "learning_rate": 1.5658113663917455e-05, "loss": 0.165, "step": 7169 }, { "epoch": 0.33, "grad_norm": 0.3548682311398915, "learning_rate": 1.5656886760233227e-05, "loss": 0.2689, "step": 7170 }, { "epoch": 0.33, "grad_norm": 0.363160094008727, "learning_rate": 1.565565973131098e-05, "loss": 0.2869, "step": 7171 }, { "epoch": 0.33, "grad_norm": 0.4189473866378317, "learning_rate": 1.5654432577177874e-05, "loss": 0.1886, "step": 7172 }, { "epoch": 0.33, "grad_norm": 0.41373198295723707, "learning_rate": 1.5653205297861082e-05, "loss": 0.3504, "step": 7173 }, { "epoch": 0.33, "grad_norm": 0.5595121133142632, "learning_rate": 1.5651977893387776e-05, "loss": 0.3828, "step": 7174 }, { "epoch": 0.33, "grad_norm": 0.3361240314977648, "learning_rate": 1.5650750363785126e-05, "loss": 0.2348, "step": 7175 }, { "epoch": 0.33, "grad_norm": 0.3661761405136072, "learning_rate": 1.5649522709080306e-05, "loss": 0.2792, "step": 7176 }, { "epoch": 0.33, "grad_norm": 0.4383883539439163, "learning_rate": 1.56482949293005e-05, "loss": 0.2411, "step": 7177 }, { "epoch": 0.33, "grad_norm": 0.3124234668800184, "learning_rate": 1.564706702447289e-05, "loss": 0.2349, "step": 7178 }, { "epoch": 0.33, "grad_norm": 0.5445968592040877, "learning_rate": 1.564583899462466e-05, "loss": 0.3727, "step": 7179 }, { "epoch": 0.33, "grad_norm": 0.6688811591722729, "learning_rate": 1.5644610839782992e-05, "loss": 0.4952, "step": 7180 }, { "epoch": 0.33, "grad_norm": 0.6398563219462625, "learning_rate": 1.564338255997508e-05, "loss": 0.4357, "step": 7181 }, { "epoch": 0.33, "grad_norm": 0.36291319982309433, "learning_rate": 1.5642154155228124e-05, "loss": 0.2415, "step": 7182 }, { "epoch": 0.33, "grad_norm": 0.31247966006971944, "learning_rate": 1.5640925625569305e-05, "loss": 0.2215, "step": 7183 }, { "epoch": 0.33, "grad_norm": 0.5795375405427187, "learning_rate": 1.5639696971025836e-05, "loss": 0.4005, "step": 7184 }, { "epoch": 0.33, "grad_norm": 0.3837492249362803, "learning_rate": 1.5638468191624906e-05, "loss": 0.2778, "step": 7185 }, { "epoch": 0.33, "grad_norm": 0.3910527205603508, "learning_rate": 1.5637239287393725e-05, "loss": 0.3343, "step": 7186 }, { "epoch": 0.33, "grad_norm": 0.8912829329638559, "learning_rate": 1.56360102583595e-05, "loss": 0.4828, "step": 7187 }, { "epoch": 0.33, "grad_norm": 0.3555184524458391, "learning_rate": 1.5634781104549442e-05, "loss": 0.2364, "step": 7188 }, { "epoch": 0.33, "grad_norm": 0.31836586253936744, "learning_rate": 1.5633551825990763e-05, "loss": 0.2458, "step": 7189 }, { "epoch": 0.33, "grad_norm": 0.34067311977772946, "learning_rate": 1.5632322422710674e-05, "loss": 0.3025, "step": 7190 }, { "epoch": 0.33, "grad_norm": 0.420791213050203, "learning_rate": 1.5631092894736398e-05, "loss": 0.2512, "step": 7191 }, { "epoch": 0.33, "grad_norm": 1.5326139215859171, "learning_rate": 1.562986324209515e-05, "loss": 0.7641, "step": 7192 }, { "epoch": 0.33, "grad_norm": 0.7316824576386303, "learning_rate": 1.5628633464814153e-05, "loss": 0.5138, "step": 7193 }, { "epoch": 0.33, "grad_norm": 0.3210373902959111, "learning_rate": 1.5627403562920638e-05, "loss": 0.2865, "step": 7194 }, { "epoch": 0.33, "grad_norm": 0.3207851570263174, "learning_rate": 1.5626173536441835e-05, "loss": 0.1531, "step": 7195 }, { "epoch": 0.33, "grad_norm": 0.46722250405108007, "learning_rate": 1.5624943385404966e-05, "loss": 0.3859, "step": 7196 }, { "epoch": 0.33, "grad_norm": 0.5154957649384645, "learning_rate": 1.5623713109837276e-05, "loss": 0.3171, "step": 7197 }, { "epoch": 0.33, "grad_norm": 0.4333503285662579, "learning_rate": 1.5622482709766e-05, "loss": 0.2562, "step": 7198 }, { "epoch": 0.33, "grad_norm": 0.6168185256551033, "learning_rate": 1.562125218521837e-05, "loss": 0.3749, "step": 7199 }, { "epoch": 0.33, "grad_norm": 0.4536718044436831, "learning_rate": 1.562002153622164e-05, "loss": 0.3389, "step": 7200 }, { "epoch": 0.33, "grad_norm": 0.5081990660446354, "learning_rate": 1.561879076280305e-05, "loss": 0.3676, "step": 7201 }, { "epoch": 0.33, "grad_norm": 0.3333958215393233, "learning_rate": 1.561755986498984e-05, "loss": 0.2586, "step": 7202 }, { "epoch": 0.33, "grad_norm": 0.33908555835291326, "learning_rate": 1.561632884280928e-05, "loss": 0.2192, "step": 7203 }, { "epoch": 0.33, "grad_norm": 0.7707277378235458, "learning_rate": 1.5615097696288605e-05, "loss": 0.4571, "step": 7204 }, { "epoch": 0.33, "grad_norm": 0.9867961497140618, "learning_rate": 1.561386642545508e-05, "loss": 0.5279, "step": 7205 }, { "epoch": 0.33, "grad_norm": 0.3560124344464534, "learning_rate": 1.5612635030335966e-05, "loss": 0.2779, "step": 7206 }, { "epoch": 0.33, "grad_norm": 0.7713873920601063, "learning_rate": 1.561140351095852e-05, "loss": 0.4286, "step": 7207 }, { "epoch": 0.33, "grad_norm": 0.21018184911171545, "learning_rate": 1.561017186735001e-05, "loss": 0.071, "step": 7208 }, { "epoch": 0.33, "grad_norm": 0.33550706806427805, "learning_rate": 1.56089400995377e-05, "loss": 0.2474, "step": 7209 }, { "epoch": 0.33, "grad_norm": 0.4218696544981165, "learning_rate": 1.560770820754886e-05, "loss": 0.329, "step": 7210 }, { "epoch": 0.33, "grad_norm": 0.5104159400023495, "learning_rate": 1.5606476191410772e-05, "loss": 0.3157, "step": 7211 }, { "epoch": 0.33, "grad_norm": 0.3875085172092279, "learning_rate": 1.5605244051150698e-05, "loss": 0.2946, "step": 7212 }, { "epoch": 0.33, "grad_norm": 0.7809115165370621, "learning_rate": 1.5604011786795927e-05, "loss": 0.4467, "step": 7213 }, { "epoch": 0.33, "grad_norm": 0.27610778806682057, "learning_rate": 1.5602779398373735e-05, "loss": 0.1915, "step": 7214 }, { "epoch": 0.33, "grad_norm": 0.3759967090479265, "learning_rate": 1.5601546885911406e-05, "loss": 0.2962, "step": 7215 }, { "epoch": 0.33, "grad_norm": 0.4907483496752828, "learning_rate": 1.560031424943623e-05, "loss": 0.33, "step": 7216 }, { "epoch": 0.33, "grad_norm": 0.36305100430257287, "learning_rate": 1.559908148897549e-05, "loss": 0.257, "step": 7217 }, { "epoch": 0.33, "grad_norm": 0.40223797398827604, "learning_rate": 1.5597848604556488e-05, "loss": 0.2926, "step": 7218 }, { "epoch": 0.33, "grad_norm": 0.8040309229597306, "learning_rate": 1.5596615596206512e-05, "loss": 0.4309, "step": 7219 }, { "epoch": 0.33, "grad_norm": 0.40595266153971077, "learning_rate": 1.5595382463952858e-05, "loss": 0.2479, "step": 7220 }, { "epoch": 0.33, "grad_norm": 0.2827865230782367, "learning_rate": 1.559414920782283e-05, "loss": 0.1776, "step": 7221 }, { "epoch": 0.33, "grad_norm": 0.4094213171883501, "learning_rate": 1.559291582784373e-05, "loss": 0.3498, "step": 7222 }, { "epoch": 0.33, "grad_norm": 0.8675898338255126, "learning_rate": 1.559168232404287e-05, "loss": 0.5771, "step": 7223 }, { "epoch": 0.33, "grad_norm": 0.3984800330069923, "learning_rate": 1.5590448696447545e-05, "loss": 0.2487, "step": 7224 }, { "epoch": 0.33, "grad_norm": 0.44839662806816233, "learning_rate": 1.5589214945085076e-05, "loss": 0.3634, "step": 7225 }, { "epoch": 0.33, "grad_norm": 0.5079989575213073, "learning_rate": 1.5587981069982775e-05, "loss": 0.3815, "step": 7226 }, { "epoch": 0.33, "grad_norm": 0.20657435904280966, "learning_rate": 1.5586747071167962e-05, "loss": 0.1468, "step": 7227 }, { "epoch": 0.33, "grad_norm": 1.395722828491078, "learning_rate": 1.5585512948667947e-05, "loss": 0.765, "step": 7228 }, { "epoch": 0.33, "grad_norm": 0.5152327862917196, "learning_rate": 1.5584278702510064e-05, "loss": 0.364, "step": 7229 }, { "epoch": 0.33, "grad_norm": 0.2845438497631263, "learning_rate": 1.558304433272163e-05, "loss": 0.2385, "step": 7230 }, { "epoch": 0.33, "grad_norm": 1.1032499243277467, "learning_rate": 1.558180983932998e-05, "loss": 0.5609, "step": 7231 }, { "epoch": 0.33, "grad_norm": 0.4557111944237515, "learning_rate": 1.5580575222362435e-05, "loss": 0.2997, "step": 7232 }, { "epoch": 0.33, "grad_norm": 0.383603110348889, "learning_rate": 1.5579340481846338e-05, "loss": 0.2532, "step": 7233 }, { "epoch": 0.33, "grad_norm": 0.3359675817572818, "learning_rate": 1.5578105617809013e-05, "loss": 0.238, "step": 7234 }, { "epoch": 0.33, "grad_norm": 0.7486837059712673, "learning_rate": 1.557687063027781e-05, "loss": 0.45, "step": 7235 }, { "epoch": 0.33, "grad_norm": 0.4398333842353121, "learning_rate": 1.557563551928007e-05, "loss": 0.2762, "step": 7236 }, { "epoch": 0.33, "grad_norm": 0.36979728767995995, "learning_rate": 1.5574400284843127e-05, "loss": 0.2753, "step": 7237 }, { "epoch": 0.33, "grad_norm": 0.43559393296177684, "learning_rate": 1.5573164926994338e-05, "loss": 0.3262, "step": 7238 }, { "epoch": 0.33, "grad_norm": 0.3998039766746459, "learning_rate": 1.5571929445761048e-05, "loss": 0.3135, "step": 7239 }, { "epoch": 0.33, "grad_norm": 0.46343463954721115, "learning_rate": 1.5570693841170613e-05, "loss": 0.2523, "step": 7240 }, { "epoch": 0.33, "grad_norm": 0.5534276790453356, "learning_rate": 1.556945811325038e-05, "loss": 0.3947, "step": 7241 }, { "epoch": 0.33, "grad_norm": 0.303043670407606, "learning_rate": 1.5568222262027716e-05, "loss": 0.2527, "step": 7242 }, { "epoch": 0.33, "grad_norm": 0.31537841830908697, "learning_rate": 1.5566986287529976e-05, "loss": 0.1797, "step": 7243 }, { "epoch": 0.33, "grad_norm": 1.174326367371531, "learning_rate": 1.5565750189784528e-05, "loss": 0.5412, "step": 7244 }, { "epoch": 0.33, "grad_norm": 0.29494614213006176, "learning_rate": 1.5564513968818733e-05, "loss": 0.2231, "step": 7245 }, { "epoch": 0.33, "grad_norm": 0.4217980039726196, "learning_rate": 1.5563277624659962e-05, "loss": 0.339, "step": 7246 }, { "epoch": 0.33, "grad_norm": 0.8200991445494084, "learning_rate": 1.5562041157335587e-05, "loss": 0.3719, "step": 7247 }, { "epoch": 0.33, "grad_norm": 0.40060375451114766, "learning_rate": 1.556080456687298e-05, "loss": 0.2718, "step": 7248 }, { "epoch": 0.33, "grad_norm": 0.4415866101219472, "learning_rate": 1.555956785329952e-05, "loss": 0.2416, "step": 7249 }, { "epoch": 0.33, "grad_norm": 0.3792975682597965, "learning_rate": 1.5558331016642586e-05, "loss": 0.2779, "step": 7250 }, { "epoch": 0.33, "grad_norm": 0.37915029551331114, "learning_rate": 1.5557094056929566e-05, "loss": 0.286, "step": 7251 }, { "epoch": 0.33, "grad_norm": 1.9586424349610925, "learning_rate": 1.555585697418783e-05, "loss": 0.4531, "step": 7252 }, { "epoch": 0.33, "grad_norm": 0.3824188545006304, "learning_rate": 1.5554619768444784e-05, "loss": 0.3059, "step": 7253 }, { "epoch": 0.33, "grad_norm": 0.41475810516595324, "learning_rate": 1.5553382439727803e-05, "loss": 0.2916, "step": 7254 }, { "epoch": 0.33, "grad_norm": 0.27718589411617034, "learning_rate": 1.5552144988064292e-05, "loss": 0.2064, "step": 7255 }, { "epoch": 0.33, "grad_norm": 1.0152361741711482, "learning_rate": 1.5550907413481643e-05, "loss": 0.3491, "step": 7256 }, { "epoch": 0.33, "grad_norm": 0.445658861119678, "learning_rate": 1.554966971600725e-05, "loss": 0.2971, "step": 7257 }, { "epoch": 0.33, "grad_norm": 0.41529695397042315, "learning_rate": 1.5548431895668515e-05, "loss": 0.3225, "step": 7258 }, { "epoch": 0.33, "grad_norm": 0.9078236749585062, "learning_rate": 1.5547193952492856e-05, "loss": 0.5104, "step": 7259 }, { "epoch": 0.33, "grad_norm": 0.35151250102631537, "learning_rate": 1.554595588650766e-05, "loss": 0.2266, "step": 7260 }, { "epoch": 0.33, "grad_norm": 0.27864828666307884, "learning_rate": 1.554471769774035e-05, "loss": 0.2153, "step": 7261 }, { "epoch": 0.33, "grad_norm": 1.68318183005361, "learning_rate": 1.5543479386218334e-05, "loss": 0.784, "step": 7262 }, { "epoch": 0.33, "grad_norm": 0.34525833982700205, "learning_rate": 1.5542240951969028e-05, "loss": 0.2149, "step": 7263 }, { "epoch": 0.33, "grad_norm": 0.8238686503566567, "learning_rate": 1.5541002395019847e-05, "loss": 0.4544, "step": 7264 }, { "epoch": 0.33, "grad_norm": 0.3939276721743022, "learning_rate": 1.5539763715398215e-05, "loss": 0.3259, "step": 7265 }, { "epoch": 0.33, "grad_norm": 0.3980155903036159, "learning_rate": 1.5538524913131556e-05, "loss": 0.2277, "step": 7266 }, { "epoch": 0.33, "grad_norm": 0.31045819182200635, "learning_rate": 1.5537285988247285e-05, "loss": 0.1754, "step": 7267 }, { "epoch": 0.33, "grad_norm": 0.8494943481266864, "learning_rate": 1.5536046940772848e-05, "loss": 0.4817, "step": 7268 }, { "epoch": 0.33, "grad_norm": 0.3808360227914345, "learning_rate": 1.5534807770735663e-05, "loss": 0.2822, "step": 7269 }, { "epoch": 0.33, "grad_norm": 0.400736253809875, "learning_rate": 1.5533568478163172e-05, "loss": 0.3085, "step": 7270 }, { "epoch": 0.33, "grad_norm": 1.0427840238340422, "learning_rate": 1.5532329063082806e-05, "loss": 0.6733, "step": 7271 }, { "epoch": 0.33, "grad_norm": 0.7706327793184947, "learning_rate": 1.5531089525522006e-05, "loss": 0.4136, "step": 7272 }, { "epoch": 0.33, "grad_norm": 0.2513051568602051, "learning_rate": 1.5529849865508215e-05, "loss": 0.2204, "step": 7273 }, { "epoch": 0.33, "grad_norm": 0.46764522216603815, "learning_rate": 1.5528610083068877e-05, "loss": 0.3368, "step": 7274 }, { "epoch": 0.33, "grad_norm": 0.5585406392653689, "learning_rate": 1.552737017823144e-05, "loss": 0.384, "step": 7275 }, { "epoch": 0.33, "grad_norm": 0.3797885734576441, "learning_rate": 1.5526130151023358e-05, "loss": 0.2823, "step": 7276 }, { "epoch": 0.33, "grad_norm": 0.460146659543978, "learning_rate": 1.5524890001472076e-05, "loss": 0.3282, "step": 7277 }, { "epoch": 0.33, "grad_norm": 0.43894175133531294, "learning_rate": 1.552364972960506e-05, "loss": 0.2927, "step": 7278 }, { "epoch": 0.33, "grad_norm": 0.24954728538905976, "learning_rate": 1.552240933544976e-05, "loss": 0.1691, "step": 7279 }, { "epoch": 0.33, "grad_norm": 0.7539887492684836, "learning_rate": 1.5521168819033642e-05, "loss": 0.5176, "step": 7280 }, { "epoch": 0.33, "grad_norm": 0.3402457634166276, "learning_rate": 1.5519928180384164e-05, "loss": 0.272, "step": 7281 }, { "epoch": 0.33, "grad_norm": 0.398858163035011, "learning_rate": 1.5518687419528794e-05, "loss": 0.3225, "step": 7282 }, { "epoch": 0.33, "grad_norm": 1.0527400418521429, "learning_rate": 1.551744653649501e-05, "loss": 0.4963, "step": 7283 }, { "epoch": 0.33, "grad_norm": 0.4876798596333133, "learning_rate": 1.5516205531310272e-05, "loss": 0.293, "step": 7284 }, { "epoch": 0.33, "grad_norm": 0.42954597479704787, "learning_rate": 1.5514964404002066e-05, "loss": 0.325, "step": 7285 }, { "epoch": 0.33, "grad_norm": 0.3277277479915988, "learning_rate": 1.5513723154597858e-05, "loss": 0.2122, "step": 7286 }, { "epoch": 0.33, "grad_norm": 0.42357581791313226, "learning_rate": 1.5512481783125134e-05, "loss": 0.3186, "step": 7287 }, { "epoch": 0.33, "grad_norm": 1.7015549735365534, "learning_rate": 1.551124028961138e-05, "loss": 0.4452, "step": 7288 }, { "epoch": 0.33, "grad_norm": 0.36763289855824954, "learning_rate": 1.5509998674084076e-05, "loss": 0.272, "step": 7289 }, { "epoch": 0.33, "grad_norm": 0.6470444214710763, "learning_rate": 1.550875693657071e-05, "loss": 0.3612, "step": 7290 }, { "epoch": 0.33, "grad_norm": 0.44763101653933857, "learning_rate": 1.5507515077098776e-05, "loss": 0.365, "step": 7291 }, { "epoch": 0.33, "grad_norm": 0.3321645545268741, "learning_rate": 1.5506273095695767e-05, "loss": 0.2654, "step": 7292 }, { "epoch": 0.34, "grad_norm": 0.409088843036453, "learning_rate": 1.550503099238918e-05, "loss": 0.3239, "step": 7293 }, { "epoch": 0.34, "grad_norm": 0.3434822726566993, "learning_rate": 1.5503788767206512e-05, "loss": 0.2453, "step": 7294 }, { "epoch": 0.34, "grad_norm": 1.2835784228491813, "learning_rate": 1.5502546420175266e-05, "loss": 0.7984, "step": 7295 }, { "epoch": 0.34, "grad_norm": 0.480717554256865, "learning_rate": 1.5501303951322942e-05, "loss": 0.1235, "step": 7296 }, { "epoch": 0.34, "grad_norm": 0.3081454384504014, "learning_rate": 1.5500061360677055e-05, "loss": 0.2864, "step": 7297 }, { "epoch": 0.34, "grad_norm": 0.6629919427998986, "learning_rate": 1.549881864826511e-05, "loss": 0.466, "step": 7298 }, { "epoch": 0.34, "grad_norm": 0.255953260423203, "learning_rate": 1.5497575814114615e-05, "loss": 0.1495, "step": 7299 }, { "epoch": 0.34, "grad_norm": 0.5778440832622904, "learning_rate": 1.5496332858253095e-05, "loss": 0.3863, "step": 7300 }, { "epoch": 0.34, "grad_norm": 0.45265301241439254, "learning_rate": 1.5495089780708062e-05, "loss": 0.3496, "step": 7301 }, { "epoch": 0.34, "grad_norm": 0.39439851004760107, "learning_rate": 1.5493846581507037e-05, "loss": 0.2316, "step": 7302 }, { "epoch": 0.34, "grad_norm": 0.5251924834347689, "learning_rate": 1.5492603260677543e-05, "loss": 0.3958, "step": 7303 }, { "epoch": 0.34, "grad_norm": 0.7488466539809208, "learning_rate": 1.549135981824711e-05, "loss": 0.4411, "step": 7304 }, { "epoch": 0.34, "grad_norm": 0.3270073220641253, "learning_rate": 1.5490116254243258e-05, "loss": 0.2137, "step": 7305 }, { "epoch": 0.34, "grad_norm": 0.37670808538205175, "learning_rate": 1.5488872568693527e-05, "loss": 0.2387, "step": 7306 }, { "epoch": 0.34, "grad_norm": 1.0137888817084433, "learning_rate": 1.5487628761625447e-05, "loss": 0.7319, "step": 7307 }, { "epoch": 0.34, "grad_norm": 1.072991964754532, "learning_rate": 1.5486384833066557e-05, "loss": 0.4958, "step": 7308 }, { "epoch": 0.34, "grad_norm": 0.30827171842582807, "learning_rate": 1.548514078304439e-05, "loss": 0.2322, "step": 7309 }, { "epoch": 0.34, "grad_norm": 0.6801808728909504, "learning_rate": 1.5483896611586494e-05, "loss": 0.4395, "step": 7310 }, { "epoch": 0.34, "grad_norm": 0.29364941078367696, "learning_rate": 1.5482652318720418e-05, "loss": 0.1574, "step": 7311 }, { "epoch": 0.34, "grad_norm": 0.39312722141277956, "learning_rate": 1.54814079044737e-05, "loss": 0.2532, "step": 7312 }, { "epoch": 0.34, "grad_norm": 0.44994090441442663, "learning_rate": 1.5480163368873894e-05, "loss": 0.3485, "step": 7313 }, { "epoch": 0.34, "grad_norm": 0.8034370671407369, "learning_rate": 1.547891871194855e-05, "loss": 0.4271, "step": 7314 }, { "epoch": 0.34, "grad_norm": 0.4445151350677581, "learning_rate": 1.547767393372523e-05, "loss": 0.2811, "step": 7315 }, { "epoch": 0.34, "grad_norm": 1.6979193415557734, "learning_rate": 1.5476429034231487e-05, "loss": 0.8639, "step": 7316 }, { "epoch": 0.34, "grad_norm": 0.3074120943064323, "learning_rate": 1.5475184013494885e-05, "loss": 0.2474, "step": 7317 }, { "epoch": 0.34, "grad_norm": 0.3282388596142066, "learning_rate": 1.5473938871542986e-05, "loss": 0.1787, "step": 7318 }, { "epoch": 0.34, "grad_norm": 1.3040770555683425, "learning_rate": 1.5472693608403355e-05, "loss": 0.7642, "step": 7319 }, { "epoch": 0.34, "grad_norm": 0.6894958110766907, "learning_rate": 1.5471448224103563e-05, "loss": 0.361, "step": 7320 }, { "epoch": 0.34, "grad_norm": 0.4347101599532764, "learning_rate": 1.547020271867118e-05, "loss": 0.3015, "step": 7321 }, { "epoch": 0.34, "grad_norm": 0.4254386839523883, "learning_rate": 1.546895709213378e-05, "loss": 0.3134, "step": 7322 }, { "epoch": 0.34, "grad_norm": 0.3849815591436858, "learning_rate": 1.5467711344518943e-05, "loss": 0.1687, "step": 7323 }, { "epoch": 0.34, "grad_norm": 0.38342134387308424, "learning_rate": 1.5466465475854246e-05, "loss": 0.2456, "step": 7324 }, { "epoch": 0.34, "grad_norm": 0.469572052101874, "learning_rate": 1.5465219486167273e-05, "loss": 0.3026, "step": 7325 }, { "epoch": 0.34, "grad_norm": 1.1263510303295086, "learning_rate": 1.5463973375485605e-05, "loss": 0.4743, "step": 7326 }, { "epoch": 0.34, "grad_norm": 0.39128333220231315, "learning_rate": 1.5462727143836834e-05, "loss": 0.2959, "step": 7327 }, { "epoch": 0.34, "grad_norm": 0.3757492257109844, "learning_rate": 1.5461480791248553e-05, "loss": 0.2822, "step": 7328 }, { "epoch": 0.34, "grad_norm": 0.5367070461021922, "learning_rate": 1.5460234317748345e-05, "loss": 0.2654, "step": 7329 }, { "epoch": 0.34, "grad_norm": 0.2875925377917111, "learning_rate": 1.545898772336382e-05, "loss": 0.2352, "step": 7330 }, { "epoch": 0.34, "grad_norm": 1.2084110881132275, "learning_rate": 1.545774100812256e-05, "loss": 0.418, "step": 7331 }, { "epoch": 0.34, "grad_norm": 0.5133562420013291, "learning_rate": 1.5456494172052175e-05, "loss": 0.3603, "step": 7332 }, { "epoch": 0.34, "grad_norm": 0.3511194580199034, "learning_rate": 1.5455247215180273e-05, "loss": 0.3013, "step": 7333 }, { "epoch": 0.34, "grad_norm": 1.238638097809351, "learning_rate": 1.5454000137534455e-05, "loss": 0.6843, "step": 7334 }, { "epoch": 0.34, "grad_norm": 0.34904139164487696, "learning_rate": 1.545275293914233e-05, "loss": 0.1749, "step": 7335 }, { "epoch": 0.34, "grad_norm": 0.40864715840179605, "learning_rate": 1.5451505620031505e-05, "loss": 0.2927, "step": 7336 }, { "epoch": 0.34, "grad_norm": 0.4305989599640417, "learning_rate": 1.5450258180229606e-05, "loss": 0.3232, "step": 7337 }, { "epoch": 0.34, "grad_norm": 1.033748741272164, "learning_rate": 1.544901061976424e-05, "loss": 0.3634, "step": 7338 }, { "epoch": 0.34, "grad_norm": 0.39759818288280063, "learning_rate": 1.544776293866303e-05, "loss": 0.2884, "step": 7339 }, { "epoch": 0.34, "grad_norm": 0.3496864587160516, "learning_rate": 1.5446515136953603e-05, "loss": 0.2485, "step": 7340 }, { "epoch": 0.34, "grad_norm": 0.39967120012355933, "learning_rate": 1.544526721466358e-05, "loss": 0.2078, "step": 7341 }, { "epoch": 0.34, "grad_norm": 0.34444800664401537, "learning_rate": 1.5444019171820588e-05, "loss": 0.2487, "step": 7342 }, { "epoch": 0.34, "grad_norm": 0.8556852570631819, "learning_rate": 1.544277100845226e-05, "loss": 0.4622, "step": 7343 }, { "epoch": 0.34, "grad_norm": 0.4586721112103685, "learning_rate": 1.5441522724586225e-05, "loss": 0.2816, "step": 7344 }, { "epoch": 0.34, "grad_norm": 0.4092204511702058, "learning_rate": 1.544027432025012e-05, "loss": 0.298, "step": 7345 }, { "epoch": 0.34, "grad_norm": 0.5581460267402473, "learning_rate": 1.543902579547159e-05, "loss": 0.3323, "step": 7346 }, { "epoch": 0.34, "grad_norm": 1.5691261067187017, "learning_rate": 1.5437777150278268e-05, "loss": 0.8669, "step": 7347 }, { "epoch": 0.34, "grad_norm": 0.36315022344227127, "learning_rate": 1.54365283846978e-05, "loss": 0.239, "step": 7348 }, { "epoch": 0.34, "grad_norm": 0.4427822082767784, "learning_rate": 1.5435279498757835e-05, "loss": 0.3429, "step": 7349 }, { "epoch": 0.34, "grad_norm": 0.938060928123741, "learning_rate": 1.5434030492486023e-05, "loss": 0.6152, "step": 7350 }, { "epoch": 0.34, "grad_norm": 0.26164154313858956, "learning_rate": 1.543278136591001e-05, "loss": 0.1521, "step": 7351 }, { "epoch": 0.34, "grad_norm": 1.86534581940236, "learning_rate": 1.5431532119057454e-05, "loss": 0.798, "step": 7352 }, { "epoch": 0.34, "grad_norm": 0.43616199251134036, "learning_rate": 1.543028275195601e-05, "loss": 0.3096, "step": 7353 }, { "epoch": 0.34, "grad_norm": 0.39084898288994674, "learning_rate": 1.542903326463334e-05, "loss": 0.2532, "step": 7354 }, { "epoch": 0.34, "grad_norm": 0.7460740534665524, "learning_rate": 1.542778365711711e-05, "loss": 0.4902, "step": 7355 }, { "epoch": 0.34, "grad_norm": 0.3996947780335268, "learning_rate": 1.542653392943498e-05, "loss": 0.3127, "step": 7356 }, { "epoch": 0.34, "grad_norm": 0.3450781590464792, "learning_rate": 1.542528408161462e-05, "loss": 0.2014, "step": 7357 }, { "epoch": 0.34, "grad_norm": 0.35199887402860136, "learning_rate": 1.5424034113683697e-05, "loss": 0.2251, "step": 7358 }, { "epoch": 0.34, "grad_norm": 0.8443066001551855, "learning_rate": 1.542278402566989e-05, "loss": 0.461, "step": 7359 }, { "epoch": 0.34, "grad_norm": 0.4544159028718788, "learning_rate": 1.5421533817600868e-05, "loss": 0.2638, "step": 7360 }, { "epoch": 0.34, "grad_norm": 0.4018558336195311, "learning_rate": 1.542028348950431e-05, "loss": 0.2909, "step": 7361 }, { "epoch": 0.34, "grad_norm": 1.2312465536476065, "learning_rate": 1.5419033041407906e-05, "loss": 0.7561, "step": 7362 }, { "epoch": 0.34, "grad_norm": 0.38075197235910907, "learning_rate": 1.5417782473339325e-05, "loss": 0.2523, "step": 7363 }, { "epoch": 0.34, "grad_norm": 0.24227287055955618, "learning_rate": 1.5416531785326267e-05, "loss": 0.1864, "step": 7364 }, { "epoch": 0.34, "grad_norm": 1.0785080266234088, "learning_rate": 1.5415280977396417e-05, "loss": 0.5329, "step": 7365 }, { "epoch": 0.34, "grad_norm": 0.35761441314569825, "learning_rate": 1.5414030049577466e-05, "loss": 0.2959, "step": 7366 }, { "epoch": 0.34, "grad_norm": 0.9020323711900209, "learning_rate": 1.5412779001897105e-05, "loss": 0.3927, "step": 7367 }, { "epoch": 0.34, "grad_norm": 0.40620721788680136, "learning_rate": 1.5411527834383032e-05, "loss": 0.3265, "step": 7368 }, { "epoch": 0.34, "grad_norm": 0.3817695189331422, "learning_rate": 1.5410276547062953e-05, "loss": 0.2895, "step": 7369 }, { "epoch": 0.34, "grad_norm": 0.2760804363651166, "learning_rate": 1.540902513996456e-05, "loss": 0.0976, "step": 7370 }, { "epoch": 0.34, "grad_norm": 1.0737188966455447, "learning_rate": 1.540777361311557e-05, "loss": 0.4769, "step": 7371 }, { "epoch": 0.34, "grad_norm": 0.3319918272140716, "learning_rate": 1.5406521966543682e-05, "loss": 0.2823, "step": 7372 }, { "epoch": 0.34, "grad_norm": 0.5099080476005865, "learning_rate": 1.540527020027661e-05, "loss": 0.3625, "step": 7373 }, { "epoch": 0.34, "grad_norm": 0.8021986789118483, "learning_rate": 1.540401831434206e-05, "loss": 0.498, "step": 7374 }, { "epoch": 0.34, "grad_norm": 0.4089391632910359, "learning_rate": 1.540276630876776e-05, "loss": 0.2847, "step": 7375 }, { "epoch": 0.34, "grad_norm": 0.2714364479928965, "learning_rate": 1.5401514183581418e-05, "loss": 0.2322, "step": 7376 }, { "epoch": 0.34, "grad_norm": 0.49052451952927545, "learning_rate": 1.5400261938810755e-05, "loss": 0.3522, "step": 7377 }, { "epoch": 0.34, "grad_norm": 0.4078628594878965, "learning_rate": 1.5399009574483502e-05, "loss": 0.2679, "step": 7378 }, { "epoch": 0.34, "grad_norm": 0.5789472513464345, "learning_rate": 1.539775709062738e-05, "loss": 0.3932, "step": 7379 }, { "epoch": 0.34, "grad_norm": 0.3841698273356213, "learning_rate": 1.5396504487270118e-05, "loss": 0.2864, "step": 7380 }, { "epoch": 0.34, "grad_norm": 0.39943907862912165, "learning_rate": 1.5395251764439446e-05, "loss": 0.2762, "step": 7381 }, { "epoch": 0.34, "grad_norm": 0.29954546107529756, "learning_rate": 1.53939989221631e-05, "loss": 0.2206, "step": 7382 }, { "epoch": 0.34, "grad_norm": 0.6063921387719042, "learning_rate": 1.539274596046882e-05, "loss": 0.5113, "step": 7383 }, { "epoch": 0.34, "grad_norm": 0.30672464938880906, "learning_rate": 1.539149287938434e-05, "loss": 0.2193, "step": 7384 }, { "epoch": 0.34, "grad_norm": 0.5050504068049764, "learning_rate": 1.5390239678937403e-05, "loss": 0.3965, "step": 7385 }, { "epoch": 0.34, "grad_norm": 1.4887813619116648, "learning_rate": 1.538898635915576e-05, "loss": 0.8324, "step": 7386 }, { "epoch": 0.34, "grad_norm": 0.3421941765892758, "learning_rate": 1.5387732920067146e-05, "loss": 0.194, "step": 7387 }, { "epoch": 0.34, "grad_norm": 0.4664760207787426, "learning_rate": 1.538647936169932e-05, "loss": 0.3651, "step": 7388 }, { "epoch": 0.34, "grad_norm": 0.38854910918919533, "learning_rate": 1.5385225684080032e-05, "loss": 0.3194, "step": 7389 }, { "epoch": 0.34, "grad_norm": 0.2759203334053479, "learning_rate": 1.5383971887237042e-05, "loss": 0.174, "step": 7390 }, { "epoch": 0.34, "grad_norm": 1.1720049999591449, "learning_rate": 1.53827179711981e-05, "loss": 0.6836, "step": 7391 }, { "epoch": 0.34, "grad_norm": 0.3919618407232766, "learning_rate": 1.5381463935990967e-05, "loss": 0.3323, "step": 7392 }, { "epoch": 0.34, "grad_norm": 0.3147333987404266, "learning_rate": 1.538020978164341e-05, "loss": 0.0758, "step": 7393 }, { "epoch": 0.34, "grad_norm": 0.44173726411433945, "learning_rate": 1.53789555081832e-05, "loss": 0.3726, "step": 7394 }, { "epoch": 0.34, "grad_norm": 0.28256007490076435, "learning_rate": 1.5377701115638096e-05, "loss": 0.2302, "step": 7395 }, { "epoch": 0.34, "grad_norm": 0.42893203206038816, "learning_rate": 1.5376446604035874e-05, "loss": 0.273, "step": 7396 }, { "epoch": 0.34, "grad_norm": 0.3847872569536585, "learning_rate": 1.5375191973404303e-05, "loss": 0.2739, "step": 7397 }, { "epoch": 0.34, "grad_norm": 1.0296614026962205, "learning_rate": 1.5373937223771163e-05, "loss": 0.6673, "step": 7398 }, { "epoch": 0.34, "grad_norm": 0.5817193537553901, "learning_rate": 1.5372682355164232e-05, "loss": 0.3482, "step": 7399 }, { "epoch": 0.34, "grad_norm": 0.3272074305825856, "learning_rate": 1.5371427367611293e-05, "loss": 0.2701, "step": 7400 }, { "epoch": 0.34, "grad_norm": 0.7872163492659507, "learning_rate": 1.537017226114013e-05, "loss": 0.5491, "step": 7401 }, { "epoch": 0.34, "grad_norm": 0.28011974507690085, "learning_rate": 1.536891703577853e-05, "loss": 0.1558, "step": 7402 }, { "epoch": 0.34, "grad_norm": 0.38297167108067376, "learning_rate": 1.5367661691554282e-05, "loss": 0.2797, "step": 7403 }, { "epoch": 0.34, "grad_norm": 0.44674679049483046, "learning_rate": 1.5366406228495173e-05, "loss": 0.3293, "step": 7404 }, { "epoch": 0.34, "grad_norm": 0.44660925094256254, "learning_rate": 1.5365150646629004e-05, "loss": 0.3293, "step": 7405 }, { "epoch": 0.34, "grad_norm": 0.4113891216351563, "learning_rate": 1.5363894945983567e-05, "loss": 0.3141, "step": 7406 }, { "epoch": 0.34, "grad_norm": 0.3462404533984853, "learning_rate": 1.5362639126586673e-05, "loss": 0.2135, "step": 7407 }, { "epoch": 0.34, "grad_norm": 0.30868257800846194, "learning_rate": 1.5361383188466113e-05, "loss": 0.2776, "step": 7408 }, { "epoch": 0.34, "grad_norm": 0.5389168960232992, "learning_rate": 1.53601271316497e-05, "loss": 0.3855, "step": 7409 }, { "epoch": 0.34, "grad_norm": 0.6772778026739175, "learning_rate": 1.5358870956165236e-05, "loss": 0.4117, "step": 7410 }, { "epoch": 0.34, "grad_norm": 0.5157806534004208, "learning_rate": 1.5357614662040533e-05, "loss": 0.3433, "step": 7411 }, { "epoch": 0.34, "grad_norm": 0.3463625131416063, "learning_rate": 1.535635824930341e-05, "loss": 0.3019, "step": 7412 }, { "epoch": 0.34, "grad_norm": 0.34017885423262006, "learning_rate": 1.5355101717981668e-05, "loss": 0.2059, "step": 7413 }, { "epoch": 0.34, "grad_norm": 0.45054604936019377, "learning_rate": 1.5353845068103145e-05, "loss": 0.2522, "step": 7414 }, { "epoch": 0.34, "grad_norm": 0.4521110168199353, "learning_rate": 1.535258829969565e-05, "loss": 0.3136, "step": 7415 }, { "epoch": 0.34, "grad_norm": 0.41067475335875075, "learning_rate": 1.5351331412787004e-05, "loss": 0.3066, "step": 7416 }, { "epoch": 0.34, "grad_norm": 0.6088159607865851, "learning_rate": 1.5350074407405046e-05, "loss": 0.3619, "step": 7417 }, { "epoch": 0.34, "grad_norm": 0.42153144735752185, "learning_rate": 1.5348817283577592e-05, "loss": 0.3403, "step": 7418 }, { "epoch": 0.34, "grad_norm": 0.9740823187645358, "learning_rate": 1.534756004133248e-05, "loss": 0.2614, "step": 7419 }, { "epoch": 0.34, "grad_norm": 0.28870250007105147, "learning_rate": 1.534630268069754e-05, "loss": 0.2422, "step": 7420 }, { "epoch": 0.34, "grad_norm": 0.4448792083275207, "learning_rate": 1.5345045201700614e-05, "loss": 0.3507, "step": 7421 }, { "epoch": 0.34, "grad_norm": 0.8636266578613819, "learning_rate": 1.534378760436954e-05, "loss": 0.5364, "step": 7422 }, { "epoch": 0.34, "grad_norm": 0.30035283359769777, "learning_rate": 1.5342529888732152e-05, "loss": 0.2025, "step": 7423 }, { "epoch": 0.34, "grad_norm": 0.3411670466454486, "learning_rate": 1.534127205481631e-05, "loss": 0.309, "step": 7424 }, { "epoch": 0.34, "grad_norm": 1.0763322034249985, "learning_rate": 1.5340014102649853e-05, "loss": 0.5384, "step": 7425 }, { "epoch": 0.34, "grad_norm": 0.2812896741343816, "learning_rate": 1.5338756032260628e-05, "loss": 0.1605, "step": 7426 }, { "epoch": 0.34, "grad_norm": 0.5749663122594008, "learning_rate": 1.5337497843676486e-05, "loss": 0.3857, "step": 7427 }, { "epoch": 0.34, "grad_norm": 0.39827004840795666, "learning_rate": 1.533623953692529e-05, "loss": 0.3244, "step": 7428 }, { "epoch": 0.34, "grad_norm": 0.38405146896738, "learning_rate": 1.533498111203489e-05, "loss": 0.1877, "step": 7429 }, { "epoch": 0.34, "grad_norm": 0.42747989819313825, "learning_rate": 1.5333722569033155e-05, "loss": 0.3116, "step": 7430 }, { "epoch": 0.34, "grad_norm": 0.5631259746134563, "learning_rate": 1.533246390794794e-05, "loss": 0.4154, "step": 7431 }, { "epoch": 0.34, "grad_norm": 0.39337118104131297, "learning_rate": 1.5331205128807115e-05, "loss": 0.216, "step": 7432 }, { "epoch": 0.34, "grad_norm": 0.39615780435011894, "learning_rate": 1.5329946231638547e-05, "loss": 0.3215, "step": 7433 }, { "epoch": 0.34, "grad_norm": 0.5783803326651233, "learning_rate": 1.5328687216470107e-05, "loss": 0.451, "step": 7434 }, { "epoch": 0.34, "grad_norm": 0.28589090781261717, "learning_rate": 1.5327428083329666e-05, "loss": 0.2123, "step": 7435 }, { "epoch": 0.34, "grad_norm": 0.3202665731246141, "learning_rate": 1.5326168832245102e-05, "loss": 0.2074, "step": 7436 }, { "epoch": 0.34, "grad_norm": 1.2945968531203285, "learning_rate": 1.53249094632443e-05, "loss": 0.6154, "step": 7437 }, { "epoch": 0.34, "grad_norm": 0.8224152641369826, "learning_rate": 1.5323649976355123e-05, "loss": 0.4852, "step": 7438 }, { "epoch": 0.34, "grad_norm": 0.3536179545599055, "learning_rate": 1.5322390371605473e-05, "loss": 0.2556, "step": 7439 }, { "epoch": 0.34, "grad_norm": 0.5272178587023302, "learning_rate": 1.532113064902323e-05, "loss": 0.41, "step": 7440 }, { "epoch": 0.34, "grad_norm": 0.3253688956313545, "learning_rate": 1.5319870808636283e-05, "loss": 0.1583, "step": 7441 }, { "epoch": 0.34, "grad_norm": 0.3885265717992272, "learning_rate": 1.531861085047252e-05, "loss": 0.2243, "step": 7442 }, { "epoch": 0.34, "grad_norm": 0.5426092885015029, "learning_rate": 1.5317350774559846e-05, "loss": 0.4036, "step": 7443 }, { "epoch": 0.34, "grad_norm": 0.47425564815526683, "learning_rate": 1.5316090580926142e-05, "loss": 0.3365, "step": 7444 }, { "epoch": 0.34, "grad_norm": 0.3638417196434101, "learning_rate": 1.5314830269599325e-05, "loss": 0.251, "step": 7445 }, { "epoch": 0.34, "grad_norm": 0.6034329735415199, "learning_rate": 1.5313569840607285e-05, "loss": 0.491, "step": 7446 }, { "epoch": 0.34, "grad_norm": 0.2816419348045927, "learning_rate": 1.531230929397793e-05, "loss": 0.2131, "step": 7447 }, { "epoch": 0.34, "grad_norm": 0.37760397647907457, "learning_rate": 1.5311048629739165e-05, "loss": 0.3218, "step": 7448 }, { "epoch": 0.34, "grad_norm": 0.3239141979729318, "learning_rate": 1.5309787847918905e-05, "loss": 0.117, "step": 7449 }, { "epoch": 0.34, "grad_norm": 0.9030421158672063, "learning_rate": 1.530852694854506e-05, "loss": 0.4943, "step": 7450 }, { "epoch": 0.34, "grad_norm": 0.37429477607621975, "learning_rate": 1.530726593164554e-05, "loss": 0.3023, "step": 7451 }, { "epoch": 0.34, "grad_norm": 0.3761822738189655, "learning_rate": 1.5306004797248274e-05, "loss": 0.3101, "step": 7452 }, { "epoch": 0.34, "grad_norm": 0.4680229755743139, "learning_rate": 1.5304743545381167e-05, "loss": 0.2984, "step": 7453 }, { "epoch": 0.34, "grad_norm": 0.27341575051143846, "learning_rate": 1.530348217607216e-05, "loss": 0.2215, "step": 7454 }, { "epoch": 0.34, "grad_norm": 0.46700699220111536, "learning_rate": 1.5302220689349164e-05, "loss": 0.2893, "step": 7455 }, { "epoch": 0.34, "grad_norm": 0.5203159400463176, "learning_rate": 1.5300959085240116e-05, "loss": 0.3764, "step": 7456 }, { "epoch": 0.34, "grad_norm": 0.37016794019196847, "learning_rate": 1.529969736377294e-05, "loss": 0.3002, "step": 7457 }, { "epoch": 0.34, "grad_norm": 0.9185725531214263, "learning_rate": 1.5298435524975572e-05, "loss": 0.5663, "step": 7458 }, { "epoch": 0.34, "grad_norm": 0.3486328200511371, "learning_rate": 1.529717356887595e-05, "loss": 0.3062, "step": 7459 }, { "epoch": 0.34, "grad_norm": 0.29142841504966366, "learning_rate": 1.5295911495502013e-05, "loss": 0.2395, "step": 7460 }, { "epoch": 0.34, "grad_norm": 0.4455480554847195, "learning_rate": 1.529464930488169e-05, "loss": 0.2378, "step": 7461 }, { "epoch": 0.34, "grad_norm": 0.7359637657650808, "learning_rate": 1.5293386997042943e-05, "loss": 0.3277, "step": 7462 }, { "epoch": 0.34, "grad_norm": 0.38545745589505565, "learning_rate": 1.529212457201371e-05, "loss": 0.2857, "step": 7463 }, { "epoch": 0.34, "grad_norm": 0.41056274313660246, "learning_rate": 1.5290862029821935e-05, "loss": 0.3461, "step": 7464 }, { "epoch": 0.34, "grad_norm": 0.2976494354578043, "learning_rate": 1.5289599370495576e-05, "loss": 0.1503, "step": 7465 }, { "epoch": 0.34, "grad_norm": 0.39757251881996175, "learning_rate": 1.5288336594062586e-05, "loss": 0.2953, "step": 7466 }, { "epoch": 0.34, "grad_norm": 0.29660060828542445, "learning_rate": 1.5287073700550923e-05, "loss": 0.2518, "step": 7467 }, { "epoch": 0.34, "grad_norm": 1.2487615456532466, "learning_rate": 1.528581068998854e-05, "loss": 0.3927, "step": 7468 }, { "epoch": 0.34, "grad_norm": 0.341170981655371, "learning_rate": 1.5284547562403403e-05, "loss": 0.2572, "step": 7469 }, { "epoch": 0.34, "grad_norm": 1.0567752346894428, "learning_rate": 1.5283284317823478e-05, "loss": 0.7389, "step": 7470 }, { "epoch": 0.34, "grad_norm": 0.33374293652781567, "learning_rate": 1.528202095627673e-05, "loss": 0.2647, "step": 7471 }, { "epoch": 0.34, "grad_norm": 0.30698538525180963, "learning_rate": 1.528075747779113e-05, "loss": 0.2374, "step": 7472 }, { "epoch": 0.34, "grad_norm": 0.549488764202552, "learning_rate": 1.5279493882394648e-05, "loss": 0.3092, "step": 7473 }, { "epoch": 0.34, "grad_norm": 0.9672870777023324, "learning_rate": 1.527823017011526e-05, "loss": 0.5008, "step": 7474 }, { "epoch": 0.34, "grad_norm": 0.268426252447338, "learning_rate": 1.527696634098094e-05, "loss": 0.2245, "step": 7475 }, { "epoch": 0.34, "grad_norm": 0.5063629176984931, "learning_rate": 1.5275702395019675e-05, "loss": 0.3793, "step": 7476 }, { "epoch": 0.34, "grad_norm": 0.7662958564459699, "learning_rate": 1.5274438332259442e-05, "loss": 0.6107, "step": 7477 }, { "epoch": 0.34, "grad_norm": 0.27659298797488513, "learning_rate": 1.527317415272823e-05, "loss": 0.1717, "step": 7478 }, { "epoch": 0.34, "grad_norm": 0.3390568180645851, "learning_rate": 1.5271909856454024e-05, "loss": 0.2908, "step": 7479 }, { "epoch": 0.34, "grad_norm": 0.8323832038949739, "learning_rate": 1.5270645443464817e-05, "loss": 0.4798, "step": 7480 }, { "epoch": 0.34, "grad_norm": 0.3289274546627059, "learning_rate": 1.526938091378859e-05, "loss": 0.2005, "step": 7481 }, { "epoch": 0.34, "grad_norm": 1.2108898337498082, "learning_rate": 1.5268116267453358e-05, "loss": 0.8383, "step": 7482 }, { "epoch": 0.34, "grad_norm": 0.4080409825021209, "learning_rate": 1.5266851504487105e-05, "loss": 0.3201, "step": 7483 }, { "epoch": 0.34, "grad_norm": 0.32407646629228454, "learning_rate": 1.5265586624917842e-05, "loss": 0.2061, "step": 7484 }, { "epoch": 0.34, "grad_norm": 0.5629160279005104, "learning_rate": 1.526432162877356e-05, "loss": 0.4575, "step": 7485 }, { "epoch": 0.34, "grad_norm": 0.2948531088485579, "learning_rate": 1.5263056516082273e-05, "loss": 0.1963, "step": 7486 }, { "epoch": 0.34, "grad_norm": 0.36735111558375133, "learning_rate": 1.5261791286871986e-05, "loss": 0.3104, "step": 7487 }, { "epoch": 0.34, "grad_norm": 0.47347670733903713, "learning_rate": 1.526052594117071e-05, "loss": 0.2843, "step": 7488 }, { "epoch": 0.34, "grad_norm": 1.8179242295709765, "learning_rate": 1.5259260479006465e-05, "loss": 0.7135, "step": 7489 }, { "epoch": 0.34, "grad_norm": 0.33706820734056187, "learning_rate": 1.525799490040726e-05, "loss": 0.2956, "step": 7490 }, { "epoch": 0.34, "grad_norm": 0.3606023174329244, "learning_rate": 1.5256729205401112e-05, "loss": 0.2892, "step": 7491 }, { "epoch": 0.34, "grad_norm": 0.3100718256304107, "learning_rate": 1.525546339401605e-05, "loss": 0.1919, "step": 7492 }, { "epoch": 0.34, "grad_norm": 0.3816464395095946, "learning_rate": 1.5254197466280091e-05, "loss": 0.2915, "step": 7493 }, { "epoch": 0.34, "grad_norm": 1.0806042282622113, "learning_rate": 1.5252931422221266e-05, "loss": 0.5796, "step": 7494 }, { "epoch": 0.34, "grad_norm": 0.4271866844900743, "learning_rate": 1.5251665261867602e-05, "loss": 0.3301, "step": 7495 }, { "epoch": 0.34, "grad_norm": 0.3831815223534995, "learning_rate": 1.525039898524713e-05, "loss": 0.293, "step": 7496 }, { "epoch": 0.34, "grad_norm": 0.6244372298677882, "learning_rate": 1.5249132592387888e-05, "loss": 0.3659, "step": 7497 }, { "epoch": 0.34, "grad_norm": 0.26633684243163896, "learning_rate": 1.5247866083317907e-05, "loss": 0.2006, "step": 7498 }, { "epoch": 0.34, "grad_norm": 0.41427157436356155, "learning_rate": 1.5246599458065228e-05, "loss": 0.2664, "step": 7499 }, { "epoch": 0.34, "grad_norm": 0.4423490591743375, "learning_rate": 1.5245332716657892e-05, "loss": 0.3273, "step": 7500 }, { "epoch": 0.34, "grad_norm": 0.6046665380529342, "learning_rate": 1.5244065859123949e-05, "loss": 0.3737, "step": 7501 }, { "epoch": 0.34, "grad_norm": 0.4927001615674848, "learning_rate": 1.5242798885491442e-05, "loss": 0.3128, "step": 7502 }, { "epoch": 0.34, "grad_norm": 0.36468526505287974, "learning_rate": 1.5241531795788417e-05, "loss": 0.2972, "step": 7503 }, { "epoch": 0.34, "grad_norm": 0.2542059378937968, "learning_rate": 1.5240264590042935e-05, "loss": 0.0722, "step": 7504 }, { "epoch": 0.34, "grad_norm": 0.45555414330273286, "learning_rate": 1.5238997268283042e-05, "loss": 0.3005, "step": 7505 }, { "epoch": 0.34, "grad_norm": 0.5887346619932107, "learning_rate": 1.5237729830536798e-05, "loss": 0.4516, "step": 7506 }, { "epoch": 0.34, "grad_norm": 0.3578551983361436, "learning_rate": 1.523646227683227e-05, "loss": 0.2811, "step": 7507 }, { "epoch": 0.34, "grad_norm": 0.4550691073689091, "learning_rate": 1.5235194607197508e-05, "loss": 0.3108, "step": 7508 }, { "epoch": 0.34, "grad_norm": 0.5318724983277151, "learning_rate": 1.5233926821660585e-05, "loss": 0.3677, "step": 7509 }, { "epoch": 0.35, "grad_norm": 0.2806655152821678, "learning_rate": 1.5232658920249566e-05, "loss": 0.1812, "step": 7510 }, { "epoch": 0.35, "grad_norm": 0.263792744228147, "learning_rate": 1.5231390902992522e-05, "loss": 0.234, "step": 7511 }, { "epoch": 0.35, "grad_norm": 1.2966600076615102, "learning_rate": 1.5230122769917528e-05, "loss": 0.7995, "step": 7512 }, { "epoch": 0.35, "grad_norm": 0.6890544113894183, "learning_rate": 1.5228854521052655e-05, "loss": 0.454, "step": 7513 }, { "epoch": 0.35, "grad_norm": 0.3170781589725387, "learning_rate": 1.5227586156425982e-05, "loss": 0.2305, "step": 7514 }, { "epoch": 0.35, "grad_norm": 0.38436896926568165, "learning_rate": 1.522631767606559e-05, "loss": 0.3147, "step": 7515 }, { "epoch": 0.35, "grad_norm": 0.4538049040640755, "learning_rate": 1.5225049079999561e-05, "loss": 0.3114, "step": 7516 }, { "epoch": 0.35, "grad_norm": 0.3536241715541373, "learning_rate": 1.522378036825598e-05, "loss": 0.1675, "step": 7517 }, { "epoch": 0.35, "grad_norm": 0.3847510700233104, "learning_rate": 1.5222511540862941e-05, "loss": 0.3244, "step": 7518 }, { "epoch": 0.35, "grad_norm": 0.4314688330144183, "learning_rate": 1.5221242597848527e-05, "loss": 0.3334, "step": 7519 }, { "epoch": 0.35, "grad_norm": 0.22922207842728282, "learning_rate": 1.5219973539240838e-05, "loss": 0.0957, "step": 7520 }, { "epoch": 0.35, "grad_norm": 0.460065962373246, "learning_rate": 1.521870436506796e-05, "loss": 0.3394, "step": 7521 }, { "epoch": 0.35, "grad_norm": 0.33771746726605406, "learning_rate": 1.5217435075358e-05, "loss": 0.2672, "step": 7522 }, { "epoch": 0.35, "grad_norm": 0.4096262723596227, "learning_rate": 1.5216165670139055e-05, "loss": 0.2944, "step": 7523 }, { "epoch": 0.35, "grad_norm": 0.37129980981951305, "learning_rate": 1.521489614943923e-05, "loss": 0.2975, "step": 7524 }, { "epoch": 0.35, "grad_norm": 0.6952761247654623, "learning_rate": 1.5213626513286632e-05, "loss": 0.5164, "step": 7525 }, { "epoch": 0.35, "grad_norm": 0.3666491862133794, "learning_rate": 1.5212356761709368e-05, "loss": 0.1817, "step": 7526 }, { "epoch": 0.35, "grad_norm": 0.3191021051242505, "learning_rate": 1.5211086894735547e-05, "loss": 0.2605, "step": 7527 }, { "epoch": 0.35, "grad_norm": 1.0857125271459127, "learning_rate": 1.5209816912393284e-05, "loss": 0.6784, "step": 7528 }, { "epoch": 0.35, "grad_norm": 0.6088230829130713, "learning_rate": 1.5208546814710701e-05, "loss": 0.3808, "step": 7529 }, { "epoch": 0.35, "grad_norm": 0.3633523535313251, "learning_rate": 1.5207276601715906e-05, "loss": 0.269, "step": 7530 }, { "epoch": 0.35, "grad_norm": 0.38317521185310766, "learning_rate": 1.5206006273437031e-05, "loss": 0.3545, "step": 7531 }, { "epoch": 0.35, "grad_norm": 0.243114007169776, "learning_rate": 1.5204735829902188e-05, "loss": 0.1825, "step": 7532 }, { "epoch": 0.35, "grad_norm": 0.501772504164025, "learning_rate": 1.5203465271139517e-05, "loss": 0.2926, "step": 7533 }, { "epoch": 0.35, "grad_norm": 0.401238621785067, "learning_rate": 1.5202194597177134e-05, "loss": 0.3192, "step": 7534 }, { "epoch": 0.35, "grad_norm": 0.924095619600899, "learning_rate": 1.520092380804318e-05, "loss": 0.5454, "step": 7535 }, { "epoch": 0.35, "grad_norm": 0.37424485973635235, "learning_rate": 1.5199652903765784e-05, "loss": 0.3403, "step": 7536 }, { "epoch": 0.35, "grad_norm": 0.3083202719942609, "learning_rate": 1.5198381884373088e-05, "loss": 0.1979, "step": 7537 }, { "epoch": 0.35, "grad_norm": 0.5007422101297211, "learning_rate": 1.5197110749893225e-05, "loss": 0.3345, "step": 7538 }, { "epoch": 0.35, "grad_norm": 0.3306098922238994, "learning_rate": 1.5195839500354337e-05, "loss": 0.257, "step": 7539 }, { "epoch": 0.35, "grad_norm": 0.7484481808203862, "learning_rate": 1.5194568135784573e-05, "loss": 0.3601, "step": 7540 }, { "epoch": 0.35, "grad_norm": 0.5132107700274147, "learning_rate": 1.5193296656212075e-05, "loss": 0.4012, "step": 7541 }, { "epoch": 0.35, "grad_norm": 0.36927532916218836, "learning_rate": 1.5192025061664994e-05, "loss": 0.2886, "step": 7542 }, { "epoch": 0.35, "grad_norm": 0.37028677569356977, "learning_rate": 1.5190753352171485e-05, "loss": 0.2739, "step": 7543 }, { "epoch": 0.35, "grad_norm": 0.30138740496114497, "learning_rate": 1.51894815277597e-05, "loss": 0.1564, "step": 7544 }, { "epoch": 0.35, "grad_norm": 0.42385233806185363, "learning_rate": 1.5188209588457791e-05, "loss": 0.3134, "step": 7545 }, { "epoch": 0.35, "grad_norm": 0.5152810910436364, "learning_rate": 1.5186937534293926e-05, "loss": 0.2671, "step": 7546 }, { "epoch": 0.35, "grad_norm": 0.43127471855073274, "learning_rate": 1.518566536529626e-05, "loss": 0.3013, "step": 7547 }, { "epoch": 0.35, "grad_norm": 0.4227657429301361, "learning_rate": 1.5184393081492966e-05, "loss": 0.3489, "step": 7548 }, { "epoch": 0.35, "grad_norm": 0.8779681589153319, "learning_rate": 1.5183120682912203e-05, "loss": 0.6061, "step": 7549 }, { "epoch": 0.35, "grad_norm": 0.24829017852142088, "learning_rate": 1.5181848169582143e-05, "loss": 0.1925, "step": 7550 }, { "epoch": 0.35, "grad_norm": 0.3374194623370902, "learning_rate": 1.518057554153096e-05, "loss": 0.2403, "step": 7551 }, { "epoch": 0.35, "grad_norm": 0.8005939118982769, "learning_rate": 1.5179302798786827e-05, "loss": 0.5152, "step": 7552 }, { "epoch": 0.35, "grad_norm": 0.5611860342144768, "learning_rate": 1.5178029941377924e-05, "loss": 0.3069, "step": 7553 }, { "epoch": 0.35, "grad_norm": 0.41647710662864074, "learning_rate": 1.5176756969332428e-05, "loss": 0.3213, "step": 7554 }, { "epoch": 0.35, "grad_norm": 0.38664346700979757, "learning_rate": 1.5175483882678519e-05, "loss": 0.3363, "step": 7555 }, { "epoch": 0.35, "grad_norm": 0.17927402266255998, "learning_rate": 1.5174210681444388e-05, "loss": 0.0927, "step": 7556 }, { "epoch": 0.35, "grad_norm": 0.40567816147288766, "learning_rate": 1.5172937365658217e-05, "loss": 0.2843, "step": 7557 }, { "epoch": 0.35, "grad_norm": 0.5283520845558017, "learning_rate": 1.51716639353482e-05, "loss": 0.348, "step": 7558 }, { "epoch": 0.35, "grad_norm": 0.9836608529368737, "learning_rate": 1.5170390390542529e-05, "loss": 0.3047, "step": 7559 }, { "epoch": 0.35, "grad_norm": 0.3906558081242866, "learning_rate": 1.5169116731269395e-05, "loss": 0.2966, "step": 7560 }, { "epoch": 0.35, "grad_norm": 1.2684109880625636, "learning_rate": 1.5167842957557004e-05, "loss": 0.7875, "step": 7561 }, { "epoch": 0.35, "grad_norm": 0.31159977117813276, "learning_rate": 1.5166569069433545e-05, "loss": 0.2429, "step": 7562 }, { "epoch": 0.35, "grad_norm": 0.2812627631005785, "learning_rate": 1.5165295066927231e-05, "loss": 0.1775, "step": 7563 }, { "epoch": 0.35, "grad_norm": 1.0080329600784457, "learning_rate": 1.5164020950066259e-05, "loss": 0.5499, "step": 7564 }, { "epoch": 0.35, "grad_norm": 0.9775537408268301, "learning_rate": 1.5162746718878843e-05, "loss": 0.4402, "step": 7565 }, { "epoch": 0.35, "grad_norm": 0.34102108756192173, "learning_rate": 1.5161472373393186e-05, "loss": 0.2269, "step": 7566 }, { "epoch": 0.35, "grad_norm": 0.42193899661358886, "learning_rate": 1.516019791363751e-05, "loss": 0.3601, "step": 7567 }, { "epoch": 0.35, "grad_norm": 0.3621361342381857, "learning_rate": 1.5158923339640026e-05, "loss": 0.1865, "step": 7568 }, { "epoch": 0.35, "grad_norm": 0.4256351463922086, "learning_rate": 1.5157648651428948e-05, "loss": 0.2196, "step": 7569 }, { "epoch": 0.35, "grad_norm": 0.7870718793752617, "learning_rate": 1.5156373849032501e-05, "loss": 0.3496, "step": 7570 }, { "epoch": 0.35, "grad_norm": 1.3327880687335067, "learning_rate": 1.5155098932478906e-05, "loss": 0.5292, "step": 7571 }, { "epoch": 0.35, "grad_norm": 0.3405054545020583, "learning_rate": 1.5153823901796395e-05, "loss": 0.2106, "step": 7572 }, { "epoch": 0.35, "grad_norm": 1.0741856026735557, "learning_rate": 1.5152548757013183e-05, "loss": 0.7554, "step": 7573 }, { "epoch": 0.35, "grad_norm": 0.4320639637568891, "learning_rate": 1.5151273498157513e-05, "loss": 0.3321, "step": 7574 }, { "epoch": 0.35, "grad_norm": 0.32477358409682133, "learning_rate": 1.5149998125257608e-05, "loss": 0.2305, "step": 7575 }, { "epoch": 0.35, "grad_norm": 0.8171054108796164, "learning_rate": 1.5148722638341711e-05, "loss": 0.2892, "step": 7576 }, { "epoch": 0.35, "grad_norm": 1.6467837539717225, "learning_rate": 1.5147447037438055e-05, "loss": 0.8094, "step": 7577 }, { "epoch": 0.35, "grad_norm": 0.41701858396939506, "learning_rate": 1.5146171322574885e-05, "loss": 0.2826, "step": 7578 }, { "epoch": 0.35, "grad_norm": 0.5081375639789141, "learning_rate": 1.5144895493780441e-05, "loss": 0.2605, "step": 7579 }, { "epoch": 0.35, "grad_norm": 0.8876155800421572, "learning_rate": 1.5143619551082972e-05, "loss": 0.5119, "step": 7580 }, { "epoch": 0.35, "grad_norm": 0.3537848449426154, "learning_rate": 1.5142343494510718e-05, "loss": 0.2866, "step": 7581 }, { "epoch": 0.35, "grad_norm": 0.42436352312052644, "learning_rate": 1.5141067324091939e-05, "loss": 0.2973, "step": 7582 }, { "epoch": 0.35, "grad_norm": 0.30803405189709004, "learning_rate": 1.5139791039854883e-05, "loss": 0.2091, "step": 7583 }, { "epoch": 0.35, "grad_norm": 0.39497355367382236, "learning_rate": 1.5138514641827809e-05, "loss": 0.2356, "step": 7584 }, { "epoch": 0.35, "grad_norm": 1.3422919264751791, "learning_rate": 1.5137238130038973e-05, "loss": 0.4163, "step": 7585 }, { "epoch": 0.35, "grad_norm": 0.444213110946341, "learning_rate": 1.5135961504516634e-05, "loss": 0.326, "step": 7586 }, { "epoch": 0.35, "grad_norm": 0.3936246885820805, "learning_rate": 1.5134684765289059e-05, "loss": 0.2807, "step": 7587 }, { "epoch": 0.35, "grad_norm": 0.5301838391452397, "learning_rate": 1.5133407912384513e-05, "loss": 0.3674, "step": 7588 }, { "epoch": 0.35, "grad_norm": 0.326394672139095, "learning_rate": 1.5132130945831262e-05, "loss": 0.2131, "step": 7589 }, { "epoch": 0.35, "grad_norm": 0.3666651160668729, "learning_rate": 1.513085386565758e-05, "loss": 0.2636, "step": 7590 }, { "epoch": 0.35, "grad_norm": 0.6096838271605424, "learning_rate": 1.5129576671891737e-05, "loss": 0.4235, "step": 7591 }, { "epoch": 0.35, "grad_norm": 1.0194321285021413, "learning_rate": 1.512829936456201e-05, "loss": 0.3706, "step": 7592 }, { "epoch": 0.35, "grad_norm": 0.3962853125563876, "learning_rate": 1.5127021943696678e-05, "loss": 0.3147, "step": 7593 }, { "epoch": 0.35, "grad_norm": 0.40398912277396476, "learning_rate": 1.5125744409324022e-05, "loss": 0.3557, "step": 7594 }, { "epoch": 0.35, "grad_norm": 0.1944773788288787, "learning_rate": 1.5124466761472326e-05, "loss": 0.1061, "step": 7595 }, { "epoch": 0.35, "grad_norm": 0.3481602901099738, "learning_rate": 1.5123189000169874e-05, "loss": 0.2643, "step": 7596 }, { "epoch": 0.35, "grad_norm": 1.1299204314982574, "learning_rate": 1.5121911125444956e-05, "loss": 0.6227, "step": 7597 }, { "epoch": 0.35, "grad_norm": 0.6183005053027494, "learning_rate": 1.5120633137325861e-05, "loss": 0.3213, "step": 7598 }, { "epoch": 0.35, "grad_norm": 0.40217222536614167, "learning_rate": 1.5119355035840884e-05, "loss": 0.2745, "step": 7599 }, { "epoch": 0.35, "grad_norm": 1.211868304264924, "learning_rate": 1.5118076821018322e-05, "loss": 0.5877, "step": 7600 }, { "epoch": 0.35, "grad_norm": 0.32740997237068087, "learning_rate": 1.5116798492886472e-05, "loss": 0.2256, "step": 7601 }, { "epoch": 0.35, "grad_norm": 0.3351996380454803, "learning_rate": 1.5115520051473636e-05, "loss": 0.2276, "step": 7602 }, { "epoch": 0.35, "grad_norm": 0.5361811550270056, "learning_rate": 1.511424149680811e-05, "loss": 0.385, "step": 7603 }, { "epoch": 0.35, "grad_norm": 0.7855661597264844, "learning_rate": 1.5112962828918214e-05, "loss": 0.4592, "step": 7604 }, { "epoch": 0.35, "grad_norm": 0.34497535939865176, "learning_rate": 1.5111684047832245e-05, "loss": 0.1994, "step": 7605 }, { "epoch": 0.35, "grad_norm": 0.3457506564380581, "learning_rate": 1.5110405153578517e-05, "loss": 0.3025, "step": 7606 }, { "epoch": 0.35, "grad_norm": 0.4824846860815226, "learning_rate": 1.5109126146185347e-05, "loss": 0.3205, "step": 7607 }, { "epoch": 0.35, "grad_norm": 0.3200978072050853, "learning_rate": 1.5107847025681048e-05, "loss": 0.193, "step": 7608 }, { "epoch": 0.35, "grad_norm": 0.5544670726121207, "learning_rate": 1.5106567792093938e-05, "loss": 0.3305, "step": 7609 }, { "epoch": 0.35, "grad_norm": 0.42115896993822066, "learning_rate": 1.510528844545234e-05, "loss": 0.3451, "step": 7610 }, { "epoch": 0.35, "grad_norm": 0.3147934078938412, "learning_rate": 1.5104008985784572e-05, "loss": 0.2052, "step": 7611 }, { "epoch": 0.35, "grad_norm": 1.0450622854914953, "learning_rate": 1.5102729413118971e-05, "loss": 0.5182, "step": 7612 }, { "epoch": 0.35, "grad_norm": 1.2360645630668692, "learning_rate": 1.5101449727483855e-05, "loss": 0.8049, "step": 7613 }, { "epoch": 0.35, "grad_norm": 0.30681133462817733, "learning_rate": 1.5100169928907562e-05, "loss": 0.2734, "step": 7614 }, { "epoch": 0.35, "grad_norm": 0.3237638544247339, "learning_rate": 1.5098890017418419e-05, "loss": 0.2182, "step": 7615 }, { "epoch": 0.35, "grad_norm": 0.4749421238351701, "learning_rate": 1.5097609993044767e-05, "loss": 0.3092, "step": 7616 }, { "epoch": 0.35, "grad_norm": 0.4131216903242655, "learning_rate": 1.5096329855814942e-05, "loss": 0.2847, "step": 7617 }, { "epoch": 0.35, "grad_norm": 0.3839366551721288, "learning_rate": 1.5095049605757285e-05, "loss": 0.2527, "step": 7618 }, { "epoch": 0.35, "grad_norm": 1.0316988508153542, "learning_rate": 1.5093769242900145e-05, "loss": 0.6807, "step": 7619 }, { "epoch": 0.35, "grad_norm": 0.42735204352842515, "learning_rate": 1.5092488767271858e-05, "loss": 0.2907, "step": 7620 }, { "epoch": 0.35, "grad_norm": 0.4392654067677224, "learning_rate": 1.509120817890078e-05, "loss": 0.3263, "step": 7621 }, { "epoch": 0.35, "grad_norm": 0.2729869543529425, "learning_rate": 1.5089927477815258e-05, "loss": 0.2209, "step": 7622 }, { "epoch": 0.35, "grad_norm": 0.8064194296670322, "learning_rate": 1.5088646664043652e-05, "loss": 0.3873, "step": 7623 }, { "epoch": 0.35, "grad_norm": 0.44523674137514896, "learning_rate": 1.5087365737614308e-05, "loss": 0.3043, "step": 7624 }, { "epoch": 0.35, "grad_norm": 0.38740331821457247, "learning_rate": 1.5086084698555594e-05, "loss": 0.2754, "step": 7625 }, { "epoch": 0.35, "grad_norm": 0.4375651543201528, "learning_rate": 1.5084803546895863e-05, "loss": 0.2616, "step": 7626 }, { "epoch": 0.35, "grad_norm": 0.6177360691352405, "learning_rate": 1.5083522282663486e-05, "loss": 0.3665, "step": 7627 }, { "epoch": 0.35, "grad_norm": 0.23558660247991184, "learning_rate": 1.5082240905886825e-05, "loss": 0.1082, "step": 7628 }, { "epoch": 0.35, "grad_norm": 0.422250524836257, "learning_rate": 1.5080959416594246e-05, "loss": 0.2757, "step": 7629 }, { "epoch": 0.35, "grad_norm": 0.3274222668178951, "learning_rate": 1.5079677814814124e-05, "loss": 0.317, "step": 7630 }, { "epoch": 0.35, "grad_norm": 0.9961536923718076, "learning_rate": 1.507839610057483e-05, "loss": 0.491, "step": 7631 }, { "epoch": 0.35, "grad_norm": 0.43286476213631525, "learning_rate": 1.5077114273904743e-05, "loss": 0.3061, "step": 7632 }, { "epoch": 0.35, "grad_norm": 0.616657586769195, "learning_rate": 1.5075832334832239e-05, "loss": 0.3839, "step": 7633 }, { "epoch": 0.35, "grad_norm": 0.2599163145138823, "learning_rate": 1.5074550283385699e-05, "loss": 0.1791, "step": 7634 }, { "epoch": 0.35, "grad_norm": 0.4176350237068114, "learning_rate": 1.5073268119593504e-05, "loss": 0.2739, "step": 7635 }, { "epoch": 0.35, "grad_norm": 0.6147367108104824, "learning_rate": 1.5071985843484047e-05, "loss": 0.3929, "step": 7636 }, { "epoch": 0.35, "grad_norm": 0.48234697499563384, "learning_rate": 1.507070345508571e-05, "loss": 0.3586, "step": 7637 }, { "epoch": 0.35, "grad_norm": 0.2850951220253969, "learning_rate": 1.5069420954426886e-05, "loss": 0.222, "step": 7638 }, { "epoch": 0.35, "grad_norm": 0.5010867255937782, "learning_rate": 1.5068138341535964e-05, "loss": 0.4048, "step": 7639 }, { "epoch": 0.35, "grad_norm": 0.30703609605610654, "learning_rate": 1.506685561644135e-05, "loss": 0.2229, "step": 7640 }, { "epoch": 0.35, "grad_norm": 0.32916150211071815, "learning_rate": 1.506557277917143e-05, "loss": 0.074, "step": 7641 }, { "epoch": 0.35, "grad_norm": 0.34304553881879934, "learning_rate": 1.5064289829754618e-05, "loss": 0.2958, "step": 7642 }, { "epoch": 0.35, "grad_norm": 0.6518775278263368, "learning_rate": 1.5063006768219306e-05, "loss": 0.4409, "step": 7643 }, { "epoch": 0.35, "grad_norm": 0.5026966366970531, "learning_rate": 1.5061723594593903e-05, "loss": 0.2161, "step": 7644 }, { "epoch": 0.35, "grad_norm": 0.37209818630973324, "learning_rate": 1.506044030890682e-05, "loss": 0.3005, "step": 7645 }, { "epoch": 0.35, "grad_norm": 0.5776690749178832, "learning_rate": 1.5059156911186465e-05, "loss": 0.4037, "step": 7646 }, { "epoch": 0.35, "grad_norm": 0.2043797321336634, "learning_rate": 1.5057873401461253e-05, "loss": 0.1303, "step": 7647 }, { "epoch": 0.35, "grad_norm": 0.6148729492844035, "learning_rate": 1.5056589779759599e-05, "loss": 0.3955, "step": 7648 }, { "epoch": 0.35, "grad_norm": 0.4793994118030412, "learning_rate": 1.5055306046109922e-05, "loss": 0.3697, "step": 7649 }, { "epoch": 0.35, "grad_norm": 0.4287614043512806, "learning_rate": 1.5054022200540636e-05, "loss": 0.3156, "step": 7650 }, { "epoch": 0.35, "grad_norm": 0.39242122953709163, "learning_rate": 1.5052738243080173e-05, "loss": 0.2716, "step": 7651 }, { "epoch": 0.35, "grad_norm": 0.42138512885165935, "learning_rate": 1.5051454173756956e-05, "loss": 0.2941, "step": 7652 }, { "epoch": 0.35, "grad_norm": 0.48147549615505125, "learning_rate": 1.5050169992599412e-05, "loss": 0.231, "step": 7653 }, { "epoch": 0.35, "grad_norm": 0.3392201113875556, "learning_rate": 1.5048885699635972e-05, "loss": 0.2527, "step": 7654 }, { "epoch": 0.35, "grad_norm": 0.7620454601538579, "learning_rate": 1.5047601294895069e-05, "loss": 0.4223, "step": 7655 }, { "epoch": 0.35, "grad_norm": 0.6455478219550163, "learning_rate": 1.5046316778405137e-05, "loss": 0.4275, "step": 7656 }, { "epoch": 0.35, "grad_norm": 0.4306450400178657, "learning_rate": 1.5045032150194617e-05, "loss": 0.2519, "step": 7657 }, { "epoch": 0.35, "grad_norm": 0.3788328239620782, "learning_rate": 1.5043747410291945e-05, "loss": 0.3242, "step": 7658 }, { "epoch": 0.35, "grad_norm": 0.2880517418249159, "learning_rate": 1.5042462558725568e-05, "loss": 0.1721, "step": 7659 }, { "epoch": 0.35, "grad_norm": 0.40647069405350517, "learning_rate": 1.504117759552393e-05, "loss": 0.2661, "step": 7660 }, { "epoch": 0.35, "grad_norm": 0.37664810413778893, "learning_rate": 1.503989252071548e-05, "loss": 0.3385, "step": 7661 }, { "epoch": 0.35, "grad_norm": 0.7359037639572519, "learning_rate": 1.5038607334328666e-05, "loss": 0.4013, "step": 7662 }, { "epoch": 0.35, "grad_norm": 0.4041293370839083, "learning_rate": 1.503732203639194e-05, "loss": 0.2908, "step": 7663 }, { "epoch": 0.35, "grad_norm": 0.7867829645627987, "learning_rate": 1.5036036626933763e-05, "loss": 0.2581, "step": 7664 }, { "epoch": 0.35, "grad_norm": 0.25621004984259216, "learning_rate": 1.5034751105982585e-05, "loss": 0.2155, "step": 7665 }, { "epoch": 0.35, "grad_norm": 0.4355670620186746, "learning_rate": 1.5033465473566873e-05, "loss": 0.3089, "step": 7666 }, { "epoch": 0.35, "grad_norm": 0.6308414772533807, "learning_rate": 1.5032179729715087e-05, "loss": 0.3388, "step": 7667 }, { "epoch": 0.35, "grad_norm": 0.6213229663823759, "learning_rate": 1.5030893874455688e-05, "loss": 0.3687, "step": 7668 }, { "epoch": 0.35, "grad_norm": 0.379547874469156, "learning_rate": 1.502960790781715e-05, "loss": 0.2898, "step": 7669 }, { "epoch": 0.35, "grad_norm": 0.408334640795198, "learning_rate": 1.5028321829827942e-05, "loss": 0.2667, "step": 7670 }, { "epoch": 0.35, "grad_norm": 0.40065966203811404, "learning_rate": 1.5027035640516533e-05, "loss": 0.2451, "step": 7671 }, { "epoch": 0.35, "grad_norm": 0.4174767569956399, "learning_rate": 1.5025749339911401e-05, "loss": 0.336, "step": 7672 }, { "epoch": 0.35, "grad_norm": 0.33774688670473024, "learning_rate": 1.5024462928041021e-05, "loss": 0.2526, "step": 7673 }, { "epoch": 0.35, "grad_norm": 0.5522583782308245, "learning_rate": 1.5023176404933875e-05, "loss": 0.3528, "step": 7674 }, { "epoch": 0.35, "grad_norm": 0.45323000335035246, "learning_rate": 1.5021889770618445e-05, "loss": 0.3123, "step": 7675 }, { "epoch": 0.35, "grad_norm": 1.0091943037805404, "learning_rate": 1.5020603025123215e-05, "loss": 0.627, "step": 7676 }, { "epoch": 0.35, "grad_norm": 0.4204407410105205, "learning_rate": 1.5019316168476673e-05, "loss": 0.2827, "step": 7677 }, { "epoch": 0.35, "grad_norm": 0.34255019450096147, "learning_rate": 1.5018029200707312e-05, "loss": 0.2953, "step": 7678 }, { "epoch": 0.35, "grad_norm": 0.3134957641459456, "learning_rate": 1.5016742121843617e-05, "loss": 0.185, "step": 7679 }, { "epoch": 0.35, "grad_norm": 0.4978238301625696, "learning_rate": 1.5015454931914088e-05, "loss": 0.2701, "step": 7680 }, { "epoch": 0.35, "grad_norm": 0.35924586076609377, "learning_rate": 1.501416763094722e-05, "loss": 0.2907, "step": 7681 }, { "epoch": 0.35, "grad_norm": 0.4996443820322406, "learning_rate": 1.5012880218971515e-05, "loss": 0.336, "step": 7682 }, { "epoch": 0.35, "grad_norm": 0.5616092042389272, "learning_rate": 1.5011592696015474e-05, "loss": 0.3253, "step": 7683 }, { "epoch": 0.35, "grad_norm": 0.3920430722015312, "learning_rate": 1.5010305062107598e-05, "loss": 0.3268, "step": 7684 }, { "epoch": 0.35, "grad_norm": 0.28130144604997476, "learning_rate": 1.50090173172764e-05, "loss": 0.2035, "step": 7685 }, { "epoch": 0.35, "grad_norm": 0.4323259333906215, "learning_rate": 1.5007729461550384e-05, "loss": 0.27, "step": 7686 }, { "epoch": 0.35, "grad_norm": 0.3746408540182315, "learning_rate": 1.5006441494958065e-05, "loss": 0.2834, "step": 7687 }, { "epoch": 0.35, "grad_norm": 0.8267041857808203, "learning_rate": 1.5005153417527955e-05, "loss": 0.5503, "step": 7688 }, { "epoch": 0.35, "grad_norm": 0.35781716106855976, "learning_rate": 1.5003865229288576e-05, "loss": 0.3256, "step": 7689 }, { "epoch": 0.35, "grad_norm": 0.35196877169483254, "learning_rate": 1.5002576930268444e-05, "loss": 0.2438, "step": 7690 }, { "epoch": 0.35, "grad_norm": 0.29976794823650044, "learning_rate": 1.5001288520496076e-05, "loss": 0.187, "step": 7691 }, { "epoch": 0.35, "grad_norm": 0.7963967688505865, "learning_rate": 1.5000000000000002e-05, "loss": 0.5699, "step": 7692 }, { "epoch": 0.35, "grad_norm": 0.3045204299775969, "learning_rate": 1.4998711368808748e-05, "loss": 0.2335, "step": 7693 }, { "epoch": 0.35, "grad_norm": 0.46379940345193477, "learning_rate": 1.499742262695084e-05, "loss": 0.3597, "step": 7694 }, { "epoch": 0.35, "grad_norm": 0.7466706018466931, "learning_rate": 1.4996133774454813e-05, "loss": 0.4758, "step": 7695 }, { "epoch": 0.35, "grad_norm": 0.32355291719558715, "learning_rate": 1.49948448113492e-05, "loss": 0.2046, "step": 7696 }, { "epoch": 0.35, "grad_norm": 0.24033206281582406, "learning_rate": 1.4993555737662537e-05, "loss": 0.2273, "step": 7697 }, { "epoch": 0.35, "grad_norm": 1.4812987952261059, "learning_rate": 1.4992266553423363e-05, "loss": 0.7697, "step": 7698 }, { "epoch": 0.35, "grad_norm": 0.31304246170596267, "learning_rate": 1.4990977258660218e-05, "loss": 0.2012, "step": 7699 }, { "epoch": 0.35, "grad_norm": 0.8968750469262925, "learning_rate": 1.4989687853401647e-05, "loss": 0.44, "step": 7700 }, { "epoch": 0.35, "grad_norm": 0.3889300157166741, "learning_rate": 1.4988398337676198e-05, "loss": 0.3494, "step": 7701 }, { "epoch": 0.35, "grad_norm": 0.3392461051254853, "learning_rate": 1.4987108711512417e-05, "loss": 0.2476, "step": 7702 }, { "epoch": 0.35, "grad_norm": 0.29171322683745304, "learning_rate": 1.4985818974938855e-05, "loss": 0.1125, "step": 7703 }, { "epoch": 0.35, "grad_norm": 0.5450539871834215, "learning_rate": 1.4984529127984064e-05, "loss": 0.4419, "step": 7704 }, { "epoch": 0.35, "grad_norm": 0.3283305667759528, "learning_rate": 1.4983239170676606e-05, "loss": 0.2857, "step": 7705 }, { "epoch": 0.35, "grad_norm": 0.7781263567606692, "learning_rate": 1.4981949103045033e-05, "loss": 0.2515, "step": 7706 }, { "epoch": 0.35, "grad_norm": 0.7129518940959256, "learning_rate": 1.498065892511791e-05, "loss": 0.4767, "step": 7707 }, { "epoch": 0.35, "grad_norm": 0.4355172629132053, "learning_rate": 1.4979368636923799e-05, "loss": 0.2423, "step": 7708 }, { "epoch": 0.35, "grad_norm": 0.406451466054222, "learning_rate": 1.4978078238491267e-05, "loss": 0.2506, "step": 7709 }, { "epoch": 0.35, "grad_norm": 0.8510379029456987, "learning_rate": 1.4976787729848876e-05, "loss": 0.5211, "step": 7710 }, { "epoch": 0.35, "grad_norm": 0.43072614021521766, "learning_rate": 1.4975497111025205e-05, "loss": 0.2869, "step": 7711 }, { "epoch": 0.35, "grad_norm": 0.6109401829733558, "learning_rate": 1.4974206382048821e-05, "loss": 0.2221, "step": 7712 }, { "epoch": 0.35, "grad_norm": 0.37449759308013053, "learning_rate": 1.4972915542948307e-05, "loss": 0.3323, "step": 7713 }, { "epoch": 0.35, "grad_norm": 0.36826296259947433, "learning_rate": 1.497162459375223e-05, "loss": 0.2771, "step": 7714 }, { "epoch": 0.35, "grad_norm": 1.000364802798678, "learning_rate": 1.4970333534489179e-05, "loss": 0.6142, "step": 7715 }, { "epoch": 0.35, "grad_norm": 0.49316606147010944, "learning_rate": 1.4969042365187733e-05, "loss": 0.3156, "step": 7716 }, { "epoch": 0.35, "grad_norm": 0.3433967400706558, "learning_rate": 1.4967751085876478e-05, "loss": 0.2627, "step": 7717 }, { "epoch": 0.35, "grad_norm": 0.5341254820092465, "learning_rate": 1.4966459696584003e-05, "loss": 0.3337, "step": 7718 }, { "epoch": 0.35, "grad_norm": 0.28607003233058537, "learning_rate": 1.49651681973389e-05, "loss": 0.1145, "step": 7719 }, { "epoch": 0.35, "grad_norm": 0.3984425535122404, "learning_rate": 1.4963876588169755e-05, "loss": 0.2787, "step": 7720 }, { "epoch": 0.35, "grad_norm": 0.42526240417533256, "learning_rate": 1.4962584869105165e-05, "loss": 0.3325, "step": 7721 }, { "epoch": 0.35, "grad_norm": 0.764820698383291, "learning_rate": 1.4961293040173732e-05, "loss": 0.3994, "step": 7722 }, { "epoch": 0.35, "grad_norm": 0.38728910270694583, "learning_rate": 1.4960001101404049e-05, "loss": 0.3046, "step": 7723 }, { "epoch": 0.35, "grad_norm": 1.1857177378591928, "learning_rate": 1.4958709052824726e-05, "loss": 0.6226, "step": 7724 }, { "epoch": 0.35, "grad_norm": 0.24510107023633101, "learning_rate": 1.4957416894464365e-05, "loss": 0.1817, "step": 7725 }, { "epoch": 0.35, "grad_norm": 0.44042378925609915, "learning_rate": 1.4956124626351569e-05, "loss": 0.294, "step": 7726 }, { "epoch": 0.35, "grad_norm": 0.5992472934075173, "learning_rate": 1.495483224851495e-05, "loss": 0.3541, "step": 7727 }, { "epoch": 0.36, "grad_norm": 0.4858652438128188, "learning_rate": 1.4953539760983123e-05, "loss": 0.328, "step": 7728 }, { "epoch": 0.36, "grad_norm": 0.4153391474138831, "learning_rate": 1.4952247163784699e-05, "loss": 0.221, "step": 7729 }, { "epoch": 0.36, "grad_norm": 0.5950909605946109, "learning_rate": 1.4950954456948294e-05, "loss": 0.4104, "step": 7730 }, { "epoch": 0.36, "grad_norm": 0.2941639226048363, "learning_rate": 1.4949661640502534e-05, "loss": 0.1822, "step": 7731 }, { "epoch": 0.36, "grad_norm": 0.3339222126817955, "learning_rate": 1.4948368714476031e-05, "loss": 0.2084, "step": 7732 }, { "epoch": 0.36, "grad_norm": 0.4458916348847113, "learning_rate": 1.4947075678897417e-05, "loss": 0.3505, "step": 7733 }, { "epoch": 0.36, "grad_norm": 0.827883699899436, "learning_rate": 1.4945782533795312e-05, "loss": 0.5298, "step": 7734 }, { "epoch": 0.36, "grad_norm": 0.34221002409591883, "learning_rate": 1.494448927919835e-05, "loss": 0.2503, "step": 7735 }, { "epoch": 0.36, "grad_norm": 0.4829806350476349, "learning_rate": 1.4943195915135164e-05, "loss": 0.3654, "step": 7736 }, { "epoch": 0.36, "grad_norm": 0.2522977925675003, "learning_rate": 1.4941902441634382e-05, "loss": 0.2067, "step": 7737 }, { "epoch": 0.36, "grad_norm": 0.3248101548722159, "learning_rate": 1.494060885872464e-05, "loss": 0.228, "step": 7738 }, { "epoch": 0.36, "grad_norm": 0.9739493727312746, "learning_rate": 1.4939315166434587e-05, "loss": 0.6741, "step": 7739 }, { "epoch": 0.36, "grad_norm": 0.4600819355638349, "learning_rate": 1.4938021364792849e-05, "loss": 0.4051, "step": 7740 }, { "epoch": 0.36, "grad_norm": 0.37952075397481044, "learning_rate": 1.4936727453828084e-05, "loss": 0.2897, "step": 7741 }, { "epoch": 0.36, "grad_norm": 0.494547835450643, "learning_rate": 1.4935433433568928e-05, "loss": 0.286, "step": 7742 }, { "epoch": 0.36, "grad_norm": 0.2468075657762374, "learning_rate": 1.4934139304044033e-05, "loss": 0.1567, "step": 7743 }, { "epoch": 0.36, "grad_norm": 0.41247573255124825, "learning_rate": 1.4932845065282049e-05, "loss": 0.2699, "step": 7744 }, { "epoch": 0.36, "grad_norm": 0.3027500351803961, "learning_rate": 1.4931550717311631e-05, "loss": 0.2631, "step": 7745 }, { "epoch": 0.36, "grad_norm": 0.6105508173776636, "learning_rate": 1.493025626016143e-05, "loss": 0.4396, "step": 7746 }, { "epoch": 0.36, "grad_norm": 0.6433401918505083, "learning_rate": 1.492896169386011e-05, "loss": 0.3324, "step": 7747 }, { "epoch": 0.36, "grad_norm": 0.3325590133324734, "learning_rate": 1.4927667018436329e-05, "loss": 0.2662, "step": 7748 }, { "epoch": 0.36, "grad_norm": 0.2687559377333157, "learning_rate": 1.4926372233918748e-05, "loss": 0.2029, "step": 7749 }, { "epoch": 0.36, "grad_norm": 0.7050268010839703, "learning_rate": 1.4925077340336037e-05, "loss": 0.3963, "step": 7750 }, { "epoch": 0.36, "grad_norm": 0.46462923554628316, "learning_rate": 1.4923782337716857e-05, "loss": 0.3402, "step": 7751 }, { "epoch": 0.36, "grad_norm": 0.530941931348958, "learning_rate": 1.4922487226089881e-05, "loss": 0.3117, "step": 7752 }, { "epoch": 0.36, "grad_norm": 0.33025233495241746, "learning_rate": 1.4921192005483783e-05, "loss": 0.2743, "step": 7753 }, { "epoch": 0.36, "grad_norm": 0.4935835185317433, "learning_rate": 1.4919896675927238e-05, "loss": 0.3958, "step": 7754 }, { "epoch": 0.36, "grad_norm": 0.21426978207574748, "learning_rate": 1.4918601237448925e-05, "loss": 0.0752, "step": 7755 }, { "epoch": 0.36, "grad_norm": 0.33190740819757114, "learning_rate": 1.4917305690077517e-05, "loss": 0.2542, "step": 7756 }, { "epoch": 0.36, "grad_norm": 0.36897115364795396, "learning_rate": 1.4916010033841702e-05, "loss": 0.3372, "step": 7757 }, { "epoch": 0.36, "grad_norm": 0.8169951342206778, "learning_rate": 1.4914714268770162e-05, "loss": 0.3886, "step": 7758 }, { "epoch": 0.36, "grad_norm": 0.4114012127335106, "learning_rate": 1.4913418394891586e-05, "loss": 0.2869, "step": 7759 }, { "epoch": 0.36, "grad_norm": 0.5771681194701475, "learning_rate": 1.4912122412234665e-05, "loss": 0.3654, "step": 7760 }, { "epoch": 0.36, "grad_norm": 0.24522022364610135, "learning_rate": 1.4910826320828085e-05, "loss": 0.189, "step": 7761 }, { "epoch": 0.36, "grad_norm": 0.6115058390436778, "learning_rate": 1.4909530120700542e-05, "loss": 0.34, "step": 7762 }, { "epoch": 0.36, "grad_norm": 0.4266171602863389, "learning_rate": 1.4908233811880737e-05, "loss": 0.3283, "step": 7763 }, { "epoch": 0.36, "grad_norm": 0.4012109715726487, "learning_rate": 1.4906937394397362e-05, "loss": 0.3491, "step": 7764 }, { "epoch": 0.36, "grad_norm": 0.4084112474140164, "learning_rate": 1.4905640868279128e-05, "loss": 0.1656, "step": 7765 }, { "epoch": 0.36, "grad_norm": 0.4775386224395846, "learning_rate": 1.490434423355473e-05, "loss": 0.3445, "step": 7766 }, { "epoch": 0.36, "grad_norm": 0.43953484370791385, "learning_rate": 1.490304749025288e-05, "loss": 0.2808, "step": 7767 }, { "epoch": 0.36, "grad_norm": 0.42510950162149563, "learning_rate": 1.490175063840228e-05, "loss": 0.2435, "step": 7768 }, { "epoch": 0.36, "grad_norm": 0.3742307647408235, "learning_rate": 1.4900453678031648e-05, "loss": 0.3288, "step": 7769 }, { "epoch": 0.36, "grad_norm": 0.4733216652930082, "learning_rate": 1.4899156609169693e-05, "loss": 0.2591, "step": 7770 }, { "epoch": 0.36, "grad_norm": 0.3275064916987912, "learning_rate": 1.4897859431845135e-05, "loss": 0.235, "step": 7771 }, { "epoch": 0.36, "grad_norm": 0.32275580791758907, "learning_rate": 1.4896562146086688e-05, "loss": 0.2729, "step": 7772 }, { "epoch": 0.36, "grad_norm": 1.0341525979507533, "learning_rate": 1.4895264751923075e-05, "loss": 0.5519, "step": 7773 }, { "epoch": 0.36, "grad_norm": 0.39680009365542435, "learning_rate": 1.4893967249383017e-05, "loss": 0.2581, "step": 7774 }, { "epoch": 0.36, "grad_norm": 0.545285040204286, "learning_rate": 1.4892669638495246e-05, "loss": 0.3783, "step": 7775 }, { "epoch": 0.36, "grad_norm": 0.2774160551576022, "learning_rate": 1.4891371919288478e-05, "loss": 0.2349, "step": 7776 }, { "epoch": 0.36, "grad_norm": 0.6715648090914152, "learning_rate": 1.4890074091791453e-05, "loss": 0.4031, "step": 7777 }, { "epoch": 0.36, "grad_norm": 0.38754611699503205, "learning_rate": 1.4888776156032905e-05, "loss": 0.2495, "step": 7778 }, { "epoch": 0.36, "grad_norm": 0.5739918941718014, "learning_rate": 1.488747811204156e-05, "loss": 0.4566, "step": 7779 }, { "epoch": 0.36, "grad_norm": 0.38215697134655247, "learning_rate": 1.4886179959846161e-05, "loss": 0.3076, "step": 7780 }, { "epoch": 0.36, "grad_norm": 0.36295234732575, "learning_rate": 1.4884881699475444e-05, "loss": 0.217, "step": 7781 }, { "epoch": 0.36, "grad_norm": 0.32531548143010985, "learning_rate": 1.488358333095816e-05, "loss": 0.2034, "step": 7782 }, { "epoch": 0.36, "grad_norm": 1.4258753296914577, "learning_rate": 1.4882284854323046e-05, "loss": 0.7234, "step": 7783 }, { "epoch": 0.36, "grad_norm": 0.25709501174008975, "learning_rate": 1.488098626959885e-05, "loss": 0.2203, "step": 7784 }, { "epoch": 0.36, "grad_norm": 0.671774870135189, "learning_rate": 1.4879687576814321e-05, "loss": 0.4725, "step": 7785 }, { "epoch": 0.36, "grad_norm": 0.7245898679451107, "learning_rate": 1.4878388775998213e-05, "loss": 0.4229, "step": 7786 }, { "epoch": 0.36, "grad_norm": 0.27899485682105696, "learning_rate": 1.4877089867179279e-05, "loss": 0.1771, "step": 7787 }, { "epoch": 0.36, "grad_norm": 0.4215972194127377, "learning_rate": 1.4875790850386278e-05, "loss": 0.3543, "step": 7788 }, { "epoch": 0.36, "grad_norm": 0.45163028722179144, "learning_rate": 1.4874491725647966e-05, "loss": 0.3017, "step": 7789 }, { "epoch": 0.36, "grad_norm": 0.40182115791885586, "learning_rate": 1.4873192492993108e-05, "loss": 0.3093, "step": 7790 }, { "epoch": 0.36, "grad_norm": 1.0182945471476186, "learning_rate": 1.487189315245046e-05, "loss": 0.3651, "step": 7791 }, { "epoch": 0.36, "grad_norm": 0.3433225963607869, "learning_rate": 1.4870593704048797e-05, "loss": 0.3, "step": 7792 }, { "epoch": 0.36, "grad_norm": 0.3855147290128399, "learning_rate": 1.4869294147816882e-05, "loss": 0.3057, "step": 7793 }, { "epoch": 0.36, "grad_norm": 0.3476959585570811, "learning_rate": 1.4867994483783485e-05, "loss": 0.194, "step": 7794 }, { "epoch": 0.36, "grad_norm": 0.3291448556784333, "learning_rate": 1.4866694711977387e-05, "loss": 0.2068, "step": 7795 }, { "epoch": 0.36, "grad_norm": 0.4661788272851446, "learning_rate": 1.4865394832427359e-05, "loss": 0.3505, "step": 7796 }, { "epoch": 0.36, "grad_norm": 0.49707687985821103, "learning_rate": 1.4864094845162176e-05, "loss": 0.3077, "step": 7797 }, { "epoch": 0.36, "grad_norm": 1.0456008003947193, "learning_rate": 1.4862794750210618e-05, "loss": 0.4252, "step": 7798 }, { "epoch": 0.36, "grad_norm": 0.39351834082360715, "learning_rate": 1.4861494547601476e-05, "loss": 0.3077, "step": 7799 }, { "epoch": 0.36, "grad_norm": 0.3359619855240162, "learning_rate": 1.4860194237363529e-05, "loss": 0.2657, "step": 7800 }, { "epoch": 0.36, "grad_norm": 0.36573436025086803, "learning_rate": 1.4858893819525566e-05, "loss": 0.1942, "step": 7801 }, { "epoch": 0.36, "grad_norm": 0.3332011760622113, "learning_rate": 1.4857593294116374e-05, "loss": 0.2754, "step": 7802 }, { "epoch": 0.36, "grad_norm": 0.6039928726613857, "learning_rate": 1.4856292661164752e-05, "loss": 0.4356, "step": 7803 }, { "epoch": 0.36, "grad_norm": 0.3630117179081526, "learning_rate": 1.4854991920699489e-05, "loss": 0.2599, "step": 7804 }, { "epoch": 0.36, "grad_norm": 0.359519962779008, "learning_rate": 1.4853691072749385e-05, "loss": 0.2881, "step": 7805 }, { "epoch": 0.36, "grad_norm": 1.3777172877747461, "learning_rate": 1.4852390117343241e-05, "loss": 0.7615, "step": 7806 }, { "epoch": 0.36, "grad_norm": 0.5887963994130917, "learning_rate": 1.4851089054509852e-05, "loss": 0.3318, "step": 7807 }, { "epoch": 0.36, "grad_norm": 0.3148495586312449, "learning_rate": 1.484978788427803e-05, "loss": 0.2984, "step": 7808 }, { "epoch": 0.36, "grad_norm": 0.4805865433174144, "learning_rate": 1.484848660667658e-05, "loss": 0.2931, "step": 7809 }, { "epoch": 0.36, "grad_norm": 0.3478883102181937, "learning_rate": 1.4847185221734306e-05, "loss": 0.1368, "step": 7810 }, { "epoch": 0.36, "grad_norm": 0.4151252874908043, "learning_rate": 1.4845883729480024e-05, "loss": 0.3202, "step": 7811 }, { "epoch": 0.36, "grad_norm": 0.43112096262158095, "learning_rate": 1.4844582129942546e-05, "loss": 0.3458, "step": 7812 }, { "epoch": 0.36, "grad_norm": 0.6330890758935299, "learning_rate": 1.4843280423150692e-05, "loss": 0.3452, "step": 7813 }, { "epoch": 0.36, "grad_norm": 0.38028321962458783, "learning_rate": 1.4841978609133274e-05, "loss": 0.2931, "step": 7814 }, { "epoch": 0.36, "grad_norm": 0.32305049548798515, "learning_rate": 1.4840676687919117e-05, "loss": 0.1921, "step": 7815 }, { "epoch": 0.36, "grad_norm": 0.35247332815829113, "learning_rate": 1.4839374659537047e-05, "loss": 0.325, "step": 7816 }, { "epoch": 0.36, "grad_norm": 0.36784026367040007, "learning_rate": 1.483807252401588e-05, "loss": 0.2262, "step": 7817 }, { "epoch": 0.36, "grad_norm": 1.0739606124948378, "learning_rate": 1.4836770281384456e-05, "loss": 0.7776, "step": 7818 }, { "epoch": 0.36, "grad_norm": 0.7955495594178713, "learning_rate": 1.4835467931671597e-05, "loss": 0.4915, "step": 7819 }, { "epoch": 0.36, "grad_norm": 0.26523101678262884, "learning_rate": 1.4834165474906139e-05, "loss": 0.2348, "step": 7820 }, { "epoch": 0.36, "grad_norm": 0.2875616605412137, "learning_rate": 1.4832862911116917e-05, "loss": 0.204, "step": 7821 }, { "epoch": 0.36, "grad_norm": 1.8374595968464622, "learning_rate": 1.4831560240332769e-05, "loss": 0.6774, "step": 7822 }, { "epoch": 0.36, "grad_norm": 0.3591524367459379, "learning_rate": 1.4830257462582533e-05, "loss": 0.2252, "step": 7823 }, { "epoch": 0.36, "grad_norm": 0.4348323718918711, "learning_rate": 1.4828954577895051e-05, "loss": 0.3103, "step": 7824 }, { "epoch": 0.36, "grad_norm": 0.9635652804767664, "learning_rate": 1.4827651586299172e-05, "loss": 0.4392, "step": 7825 }, { "epoch": 0.36, "grad_norm": 0.3344395841305085, "learning_rate": 1.4826348487823737e-05, "loss": 0.205, "step": 7826 }, { "epoch": 0.36, "grad_norm": 0.3311737333956951, "learning_rate": 1.4825045282497598e-05, "loss": 0.1857, "step": 7827 }, { "epoch": 0.36, "grad_norm": 0.4317332179036389, "learning_rate": 1.4823741970349608e-05, "loss": 0.33, "step": 7828 }, { "epoch": 0.36, "grad_norm": 0.3901225200296523, "learning_rate": 1.482243855140862e-05, "loss": 0.2666, "step": 7829 }, { "epoch": 0.36, "grad_norm": 1.3315418016923604, "learning_rate": 1.4821135025703491e-05, "loss": 0.3777, "step": 7830 }, { "epoch": 0.36, "grad_norm": 0.4125330967983228, "learning_rate": 1.481983139326308e-05, "loss": 0.3349, "step": 7831 }, { "epoch": 0.36, "grad_norm": 0.43664740143919506, "learning_rate": 1.4818527654116244e-05, "loss": 0.2689, "step": 7832 }, { "epoch": 0.36, "grad_norm": 0.3208479633938048, "learning_rate": 1.4817223808291851e-05, "loss": 0.171, "step": 7833 }, { "epoch": 0.36, "grad_norm": 0.6909219444727045, "learning_rate": 1.4815919855818766e-05, "loss": 0.2952, "step": 7834 }, { "epoch": 0.36, "grad_norm": 0.5140361748698637, "learning_rate": 1.4814615796725858e-05, "loss": 0.2851, "step": 7835 }, { "epoch": 0.36, "grad_norm": 0.6842546017757796, "learning_rate": 1.4813311631041996e-05, "loss": 0.2913, "step": 7836 }, { "epoch": 0.36, "grad_norm": 1.4476760441102245, "learning_rate": 1.4812007358796054e-05, "loss": 0.4931, "step": 7837 }, { "epoch": 0.36, "grad_norm": 0.4069654960558933, "learning_rate": 1.4810702980016909e-05, "loss": 0.278, "step": 7838 }, { "epoch": 0.36, "grad_norm": 0.36574871739631487, "learning_rate": 1.480939849473343e-05, "loss": 0.2569, "step": 7839 }, { "epoch": 0.36, "grad_norm": 0.33657563163694953, "learning_rate": 1.4808093902974512e-05, "loss": 0.1968, "step": 7840 }, { "epoch": 0.36, "grad_norm": 0.4742847426973587, "learning_rate": 1.4806789204769023e-05, "loss": 0.2926, "step": 7841 }, { "epoch": 0.36, "grad_norm": 1.3767674557240077, "learning_rate": 1.4805484400145856e-05, "loss": 0.4907, "step": 7842 }, { "epoch": 0.36, "grad_norm": 0.56125278220848, "learning_rate": 1.4804179489133896e-05, "loss": 0.3001, "step": 7843 }, { "epoch": 0.36, "grad_norm": 0.3708613289363053, "learning_rate": 1.4802874471762034e-05, "loss": 0.297, "step": 7844 }, { "epoch": 0.36, "grad_norm": 1.3387905667086102, "learning_rate": 1.4801569348059158e-05, "loss": 0.7449, "step": 7845 }, { "epoch": 0.36, "grad_norm": 0.24962037965703626, "learning_rate": 1.4800264118054164e-05, "loss": 0.1377, "step": 7846 }, { "epoch": 0.36, "grad_norm": 0.44834685204531427, "learning_rate": 1.4798958781775949e-05, "loss": 0.2869, "step": 7847 }, { "epoch": 0.36, "grad_norm": 0.39958852727916466, "learning_rate": 1.4797653339253418e-05, "loss": 0.3118, "step": 7848 }, { "epoch": 0.36, "grad_norm": 0.9075717803086096, "learning_rate": 1.4796347790515458e-05, "loss": 0.4521, "step": 7849 }, { "epoch": 0.36, "grad_norm": 0.4846194757919043, "learning_rate": 1.4795042135590984e-05, "loss": 0.2819, "step": 7850 }, { "epoch": 0.36, "grad_norm": 0.6310186984864378, "learning_rate": 1.4793736374508898e-05, "loss": 0.3485, "step": 7851 }, { "epoch": 0.36, "grad_norm": 0.27679613071230025, "learning_rate": 1.479243050729811e-05, "loss": 0.195, "step": 7852 }, { "epoch": 0.36, "grad_norm": 0.4883150077277149, "learning_rate": 1.4791124533987529e-05, "loss": 0.2984, "step": 7853 }, { "epoch": 0.36, "grad_norm": 0.5010279218325235, "learning_rate": 1.4789818454606069e-05, "loss": 0.3807, "step": 7854 }, { "epoch": 0.36, "grad_norm": 0.42157385868804226, "learning_rate": 1.4788512269182644e-05, "loss": 0.3327, "step": 7855 }, { "epoch": 0.36, "grad_norm": 0.40346775485871417, "learning_rate": 1.478720597774617e-05, "loss": 0.2016, "step": 7856 }, { "epoch": 0.36, "grad_norm": 0.6244678470236887, "learning_rate": 1.4785899580325575e-05, "loss": 0.3865, "step": 7857 }, { "epoch": 0.36, "grad_norm": 0.3220452116200983, "learning_rate": 1.4784593076949772e-05, "loss": 0.2133, "step": 7858 }, { "epoch": 0.36, "grad_norm": 0.3150546508299406, "learning_rate": 1.4783286467647693e-05, "loss": 0.2465, "step": 7859 }, { "epoch": 0.36, "grad_norm": 0.4247304489678315, "learning_rate": 1.478197975244826e-05, "loss": 0.3519, "step": 7860 }, { "epoch": 0.36, "grad_norm": 1.3221130619484365, "learning_rate": 1.4780672931380404e-05, "loss": 0.8256, "step": 7861 }, { "epoch": 0.36, "grad_norm": 0.35784978331638584, "learning_rate": 1.4779366004473057e-05, "loss": 0.2162, "step": 7862 }, { "epoch": 0.36, "grad_norm": 1.3485660639077333, "learning_rate": 1.4778058971755154e-05, "loss": 0.7468, "step": 7863 }, { "epoch": 0.36, "grad_norm": 0.41328308533914754, "learning_rate": 1.477675183325563e-05, "loss": 0.3547, "step": 7864 }, { "epoch": 0.36, "grad_norm": 0.47410654010358383, "learning_rate": 1.4775444589003423e-05, "loss": 0.3158, "step": 7865 }, { "epoch": 0.36, "grad_norm": 0.26471436289337624, "learning_rate": 1.4774137239027478e-05, "loss": 0.1481, "step": 7866 }, { "epoch": 0.36, "grad_norm": 0.4832334330896569, "learning_rate": 1.4772829783356735e-05, "loss": 0.37, "step": 7867 }, { "epoch": 0.36, "grad_norm": 1.0606737178225087, "learning_rate": 1.477152222202014e-05, "loss": 0.4994, "step": 7868 }, { "epoch": 0.36, "grad_norm": 0.4027624659810944, "learning_rate": 1.4770214555046641e-05, "loss": 0.2399, "step": 7869 }, { "epoch": 0.36, "grad_norm": 0.6097939129384986, "learning_rate": 1.4768906782465191e-05, "loss": 0.441, "step": 7870 }, { "epoch": 0.36, "grad_norm": 0.40794221594144514, "learning_rate": 1.4767598904304738e-05, "loss": 0.3193, "step": 7871 }, { "epoch": 0.36, "grad_norm": 0.27930766864815143, "learning_rate": 1.4766290920594246e-05, "loss": 0.1902, "step": 7872 }, { "epoch": 0.36, "grad_norm": 1.3486073932357248, "learning_rate": 1.4764982831362662e-05, "loss": 0.8363, "step": 7873 }, { "epoch": 0.36, "grad_norm": 0.6144086859802763, "learning_rate": 1.4763674636638953e-05, "loss": 0.3423, "step": 7874 }, { "epoch": 0.36, "grad_norm": 0.2866945476137367, "learning_rate": 1.4762366336452076e-05, "loss": 0.2483, "step": 7875 }, { "epoch": 0.36, "grad_norm": 0.6851916229472477, "learning_rate": 1.4761057930831002e-05, "loss": 0.4337, "step": 7876 }, { "epoch": 0.36, "grad_norm": 0.5971826068425716, "learning_rate": 1.4759749419804695e-05, "loss": 0.3384, "step": 7877 }, { "epoch": 0.36, "grad_norm": 0.2899109982770521, "learning_rate": 1.4758440803402121e-05, "loss": 0.2257, "step": 7878 }, { "epoch": 0.36, "grad_norm": 0.3749389145746594, "learning_rate": 1.4757132081652252e-05, "loss": 0.258, "step": 7879 }, { "epoch": 0.36, "grad_norm": 0.4596507876383496, "learning_rate": 1.4755823254584067e-05, "loss": 0.3191, "step": 7880 }, { "epoch": 0.36, "grad_norm": 0.5316077432732018, "learning_rate": 1.4754514322226536e-05, "loss": 0.3551, "step": 7881 }, { "epoch": 0.36, "grad_norm": 0.7087777964089552, "learning_rate": 1.4753205284608642e-05, "loss": 0.357, "step": 7882 }, { "epoch": 0.36, "grad_norm": 0.32519538989043967, "learning_rate": 1.4751896141759365e-05, "loss": 0.2771, "step": 7883 }, { "epoch": 0.36, "grad_norm": 0.6411091437119668, "learning_rate": 1.4750586893707687e-05, "loss": 0.3432, "step": 7884 }, { "epoch": 0.36, "grad_norm": 0.2674743095274538, "learning_rate": 1.4749277540482594e-05, "loss": 0.1276, "step": 7885 }, { "epoch": 0.36, "grad_norm": 0.9305203339120885, "learning_rate": 1.474796808211307e-05, "loss": 0.4692, "step": 7886 }, { "epoch": 0.36, "grad_norm": 0.32261790119606776, "learning_rate": 1.4746658518628113e-05, "loss": 0.2825, "step": 7887 }, { "epoch": 0.36, "grad_norm": 0.492738703007906, "learning_rate": 1.4745348850056708e-05, "loss": 0.3242, "step": 7888 }, { "epoch": 0.36, "grad_norm": 0.8626728663106363, "learning_rate": 1.4744039076427855e-05, "loss": 0.3639, "step": 7889 }, { "epoch": 0.36, "grad_norm": 0.34171653536367097, "learning_rate": 1.4742729197770551e-05, "loss": 0.2413, "step": 7890 }, { "epoch": 0.36, "grad_norm": 0.3856710106346861, "learning_rate": 1.4741419214113794e-05, "loss": 0.336, "step": 7891 }, { "epoch": 0.36, "grad_norm": 0.29951083161481323, "learning_rate": 1.4740109125486582e-05, "loss": 0.1222, "step": 7892 }, { "epoch": 0.36, "grad_norm": 0.39610128264818445, "learning_rate": 1.4738798931917924e-05, "loss": 0.3514, "step": 7893 }, { "epoch": 0.36, "grad_norm": 0.8783801048081712, "learning_rate": 1.4737488633436825e-05, "loss": 0.5725, "step": 7894 }, { "epoch": 0.36, "grad_norm": 0.30549693646298526, "learning_rate": 1.4736178230072295e-05, "loss": 0.2359, "step": 7895 }, { "epoch": 0.36, "grad_norm": 0.4282791303883486, "learning_rate": 1.4734867721853341e-05, "loss": 0.3233, "step": 7896 }, { "epoch": 0.36, "grad_norm": 1.3717924910965442, "learning_rate": 1.4733557108808983e-05, "loss": 0.837, "step": 7897 }, { "epoch": 0.36, "grad_norm": 0.4023643378076422, "learning_rate": 1.473224639096823e-05, "loss": 0.2132, "step": 7898 }, { "epoch": 0.36, "grad_norm": 0.3448370395064776, "learning_rate": 1.4730935568360103e-05, "loss": 0.2977, "step": 7899 }, { "epoch": 0.36, "grad_norm": 0.3585050401691324, "learning_rate": 1.4729624641013622e-05, "loss": 0.2672, "step": 7900 }, { "epoch": 0.36, "grad_norm": 0.38230463246587654, "learning_rate": 1.4728313608957812e-05, "loss": 0.1898, "step": 7901 }, { "epoch": 0.36, "grad_norm": 0.6300879809765629, "learning_rate": 1.4727002472221695e-05, "loss": 0.3924, "step": 7902 }, { "epoch": 0.36, "grad_norm": 0.39544912362634554, "learning_rate": 1.4725691230834295e-05, "loss": 0.3392, "step": 7903 }, { "epoch": 0.36, "grad_norm": 0.6209054387540943, "learning_rate": 1.472437988482465e-05, "loss": 0.4153, "step": 7904 }, { "epoch": 0.36, "grad_norm": 0.361204160532707, "learning_rate": 1.4723068434221788e-05, "loss": 0.2555, "step": 7905 }, { "epoch": 0.36, "grad_norm": 0.3448970376454606, "learning_rate": 1.4721756879054743e-05, "loss": 0.1952, "step": 7906 }, { "epoch": 0.36, "grad_norm": 0.40748728629967584, "learning_rate": 1.472044521935255e-05, "loss": 0.3115, "step": 7907 }, { "epoch": 0.36, "grad_norm": 0.3186705918019939, "learning_rate": 1.4719133455144252e-05, "loss": 0.1954, "step": 7908 }, { "epoch": 0.36, "grad_norm": 0.8681171058775894, "learning_rate": 1.4717821586458884e-05, "loss": 0.6072, "step": 7909 }, { "epoch": 0.36, "grad_norm": 0.6189510283099342, "learning_rate": 1.4716509613325497e-05, "loss": 0.3696, "step": 7910 }, { "epoch": 0.36, "grad_norm": 0.2997072648652239, "learning_rate": 1.471519753577313e-05, "loss": 0.224, "step": 7911 }, { "epoch": 0.36, "grad_norm": 0.30499374642371, "learning_rate": 1.4713885353830835e-05, "loss": 0.1816, "step": 7912 }, { "epoch": 0.36, "grad_norm": 0.6811118302366835, "learning_rate": 1.4712573067527665e-05, "loss": 0.4057, "step": 7913 }, { "epoch": 0.36, "grad_norm": 0.39607287294458915, "learning_rate": 1.4711260676892664e-05, "loss": 0.2485, "step": 7914 }, { "epoch": 0.36, "grad_norm": 0.363824877807546, "learning_rate": 1.4709948181954894e-05, "loss": 0.308, "step": 7915 }, { "epoch": 0.36, "grad_norm": 0.6801429841160178, "learning_rate": 1.4708635582743412e-05, "loss": 0.396, "step": 7916 }, { "epoch": 0.36, "grad_norm": 0.3852123363715314, "learning_rate": 1.4707322879287277e-05, "loss": 0.2913, "step": 7917 }, { "epoch": 0.36, "grad_norm": 0.18182427487734024, "learning_rate": 1.4706010071615548e-05, "loss": 0.0881, "step": 7918 }, { "epoch": 0.36, "grad_norm": 0.3677885832689667, "learning_rate": 1.4704697159757294e-05, "loss": 0.3203, "step": 7919 }, { "epoch": 0.36, "grad_norm": 0.4127680168204437, "learning_rate": 1.4703384143741578e-05, "loss": 0.2991, "step": 7920 }, { "epoch": 0.36, "grad_norm": 0.9476618834183624, "learning_rate": 1.4702071023597469e-05, "loss": 0.3927, "step": 7921 }, { "epoch": 0.36, "grad_norm": 0.4260168722688486, "learning_rate": 1.470075779935404e-05, "loss": 0.3501, "step": 7922 }, { "epoch": 0.36, "grad_norm": 0.3108313647986387, "learning_rate": 1.4699444471040366e-05, "loss": 0.2775, "step": 7923 }, { "epoch": 0.36, "grad_norm": 0.20386817269294166, "learning_rate": 1.469813103868552e-05, "loss": 0.1156, "step": 7924 }, { "epoch": 0.36, "grad_norm": 1.2270832462906907, "learning_rate": 1.469681750231858e-05, "loss": 0.6753, "step": 7925 }, { "epoch": 0.36, "grad_norm": 0.3622557090887446, "learning_rate": 1.4695503861968627e-05, "loss": 0.2843, "step": 7926 }, { "epoch": 0.36, "grad_norm": 0.5879505062303672, "learning_rate": 1.4694190117664747e-05, "loss": 0.2963, "step": 7927 }, { "epoch": 0.36, "grad_norm": 1.0449068552269294, "learning_rate": 1.4692876269436021e-05, "loss": 0.5348, "step": 7928 }, { "epoch": 0.36, "grad_norm": 0.3716668457941565, "learning_rate": 1.4691562317311533e-05, "loss": 0.2616, "step": 7929 }, { "epoch": 0.36, "grad_norm": 0.31224075015169955, "learning_rate": 1.4690248261320383e-05, "loss": 0.1878, "step": 7930 }, { "epoch": 0.36, "grad_norm": 0.3823552913583326, "learning_rate": 1.4688934101491654e-05, "loss": 0.2697, "step": 7931 }, { "epoch": 0.36, "grad_norm": 0.38544328469743905, "learning_rate": 1.4687619837854446e-05, "loss": 0.2965, "step": 7932 }, { "epoch": 0.36, "grad_norm": 1.0319126120574171, "learning_rate": 1.468630547043785e-05, "loss": 0.456, "step": 7933 }, { "epoch": 0.36, "grad_norm": 0.44713541274359253, "learning_rate": 1.4684990999270967e-05, "loss": 0.2449, "step": 7934 }, { "epoch": 0.36, "grad_norm": 0.41193796182559833, "learning_rate": 1.46836764243829e-05, "loss": 0.3045, "step": 7935 }, { "epoch": 0.36, "grad_norm": 0.3508664970758144, "learning_rate": 1.468236174580275e-05, "loss": 0.2796, "step": 7936 }, { "epoch": 0.36, "grad_norm": 0.3092557933293933, "learning_rate": 1.4681046963559627e-05, "loss": 0.1842, "step": 7937 }, { "epoch": 0.36, "grad_norm": 0.41481532539277405, "learning_rate": 1.4679732077682634e-05, "loss": 0.2982, "step": 7938 }, { "epoch": 0.36, "grad_norm": 0.5574293810218082, "learning_rate": 1.4678417088200883e-05, "loss": 0.3214, "step": 7939 }, { "epoch": 0.36, "grad_norm": 0.95554345530316, "learning_rate": 1.4677101995143485e-05, "loss": 0.4674, "step": 7940 }, { "epoch": 0.36, "grad_norm": 0.33058097614808263, "learning_rate": 1.467578679853956e-05, "loss": 0.2423, "step": 7941 }, { "epoch": 0.36, "grad_norm": 0.26044777945431324, "learning_rate": 1.4674471498418222e-05, "loss": 0.1978, "step": 7942 }, { "epoch": 0.36, "grad_norm": 0.5269813827628365, "learning_rate": 1.467315609480859e-05, "loss": 0.3923, "step": 7943 }, { "epoch": 0.36, "grad_norm": 0.3098751073540968, "learning_rate": 1.4671840587739782e-05, "loss": 0.2437, "step": 7944 }, { "epoch": 0.36, "grad_norm": 0.7314225034006392, "learning_rate": 1.4670524977240929e-05, "loss": 0.5218, "step": 7945 }, { "epoch": 0.37, "grad_norm": 0.5604349577158864, "learning_rate": 1.4669209263341156e-05, "loss": 0.4049, "step": 7946 }, { "epoch": 0.37, "grad_norm": 0.34016908362061904, "learning_rate": 1.4667893446069588e-05, "loss": 0.2359, "step": 7947 }, { "epoch": 0.37, "grad_norm": 1.2806633215675647, "learning_rate": 1.4666577525455359e-05, "loss": 0.7104, "step": 7948 }, { "epoch": 0.37, "grad_norm": 0.3691276647491108, "learning_rate": 1.4665261501527602e-05, "loss": 0.2811, "step": 7949 }, { "epoch": 0.37, "grad_norm": 0.3217065540986312, "learning_rate": 1.4663945374315449e-05, "loss": 0.2361, "step": 7950 }, { "epoch": 0.37, "grad_norm": 0.5234297630236737, "learning_rate": 1.4662629143848045e-05, "loss": 0.3545, "step": 7951 }, { "epoch": 0.37, "grad_norm": 1.0610173942891301, "learning_rate": 1.466131281015452e-05, "loss": 0.6923, "step": 7952 }, { "epoch": 0.37, "grad_norm": 0.4066451580341624, "learning_rate": 1.4659996373264027e-05, "loss": 0.1763, "step": 7953 }, { "epoch": 0.37, "grad_norm": 0.30718121880010313, "learning_rate": 1.4658679833205705e-05, "loss": 0.2561, "step": 7954 }, { "epoch": 0.37, "grad_norm": 0.39582908930815763, "learning_rate": 1.46573631900087e-05, "loss": 0.3028, "step": 7955 }, { "epoch": 0.37, "grad_norm": 0.4308606047808635, "learning_rate": 1.465604644370216e-05, "loss": 0.2918, "step": 7956 }, { "epoch": 0.37, "grad_norm": 0.4949526990915972, "learning_rate": 1.4654729594315245e-05, "loss": 0.3158, "step": 7957 }, { "epoch": 0.37, "grad_norm": 0.4352898980238516, "learning_rate": 1.4653412641877099e-05, "loss": 0.3235, "step": 7958 }, { "epoch": 0.37, "grad_norm": 0.4124904737182277, "learning_rate": 1.4652095586416884e-05, "loss": 0.3006, "step": 7959 }, { "epoch": 0.37, "grad_norm": 0.4549120262659146, "learning_rate": 1.465077842796376e-05, "loss": 0.3107, "step": 7960 }, { "epoch": 0.37, "grad_norm": 0.5637242397746781, "learning_rate": 1.4649461166546877e-05, "loss": 0.4341, "step": 7961 }, { "epoch": 0.37, "grad_norm": 0.28111213130277996, "learning_rate": 1.464814380219541e-05, "loss": 0.1985, "step": 7962 }, { "epoch": 0.37, "grad_norm": 0.3545821316310394, "learning_rate": 1.4646826334938516e-05, "loss": 0.2733, "step": 7963 }, { "epoch": 0.37, "grad_norm": 1.916469835146868, "learning_rate": 1.4645508764805368e-05, "loss": 0.867, "step": 7964 }, { "epoch": 0.37, "grad_norm": 0.4289015987938433, "learning_rate": 1.4644191091825132e-05, "loss": 0.2952, "step": 7965 }, { "epoch": 0.37, "grad_norm": 0.43287004660534717, "learning_rate": 1.4642873316026982e-05, "loss": 0.2844, "step": 7966 }, { "epoch": 0.37, "grad_norm": 0.45822651917734164, "learning_rate": 1.4641555437440088e-05, "loss": 0.3597, "step": 7967 }, { "epoch": 0.37, "grad_norm": 0.35311949847482255, "learning_rate": 1.4640237456093636e-05, "loss": 0.2458, "step": 7968 }, { "epoch": 0.37, "grad_norm": 0.48707537427700254, "learning_rate": 1.4638919372016796e-05, "loss": 0.3445, "step": 7969 }, { "epoch": 0.37, "grad_norm": 0.2933314698164553, "learning_rate": 1.4637601185238753e-05, "loss": 0.2222, "step": 7970 }, { "epoch": 0.37, "grad_norm": 0.47918523378380506, "learning_rate": 1.4636282895788689e-05, "loss": 0.3265, "step": 7971 }, { "epoch": 0.37, "grad_norm": 0.5620082348171247, "learning_rate": 1.4634964503695792e-05, "loss": 0.4098, "step": 7972 }, { "epoch": 0.37, "grad_norm": 0.41554754790742304, "learning_rate": 1.4633646008989245e-05, "loss": 0.2601, "step": 7973 }, { "epoch": 0.37, "grad_norm": 0.4391490946720585, "learning_rate": 1.4632327411698244e-05, "loss": 0.2964, "step": 7974 }, { "epoch": 0.37, "grad_norm": 0.35979452627317526, "learning_rate": 1.4631008711851977e-05, "loss": 0.3149, "step": 7975 }, { "epoch": 0.37, "grad_norm": 0.4462055948579366, "learning_rate": 1.4629689909479641e-05, "loss": 0.166, "step": 7976 }, { "epoch": 0.37, "grad_norm": 0.4650248868492633, "learning_rate": 1.4628371004610434e-05, "loss": 0.2555, "step": 7977 }, { "epoch": 0.37, "grad_norm": 0.3188051069245708, "learning_rate": 1.4627051997273553e-05, "loss": 0.3218, "step": 7978 }, { "epoch": 0.37, "grad_norm": 0.740762007040696, "learning_rate": 1.46257328874982e-05, "loss": 0.349, "step": 7979 }, { "epoch": 0.37, "grad_norm": 0.4354668947724718, "learning_rate": 1.4624413675313577e-05, "loss": 0.3068, "step": 7980 }, { "epoch": 0.37, "grad_norm": 0.545635119385423, "learning_rate": 1.4623094360748895e-05, "loss": 0.4421, "step": 7981 }, { "epoch": 0.37, "grad_norm": 0.28604889240642384, "learning_rate": 1.4621774943833358e-05, "loss": 0.2373, "step": 7982 }, { "epoch": 0.37, "grad_norm": 0.2551424749260242, "learning_rate": 1.462045542459618e-05, "loss": 0.1791, "step": 7983 }, { "epoch": 0.37, "grad_norm": 0.4782568669574651, "learning_rate": 1.4619135803066573e-05, "loss": 0.3854, "step": 7984 }, { "epoch": 0.37, "grad_norm": 0.7936927435364832, "learning_rate": 1.461781607927375e-05, "loss": 0.5208, "step": 7985 }, { "epoch": 0.37, "grad_norm": 0.29132110275660117, "learning_rate": 1.4616496253246931e-05, "loss": 0.2214, "step": 7986 }, { "epoch": 0.37, "grad_norm": 0.5277803275108777, "learning_rate": 1.4615176325015332e-05, "loss": 0.3618, "step": 7987 }, { "epoch": 0.37, "grad_norm": 0.29609419222625566, "learning_rate": 1.4613856294608178e-05, "loss": 0.2035, "step": 7988 }, { "epoch": 0.37, "grad_norm": 0.3965303688114541, "learning_rate": 1.4612536162054694e-05, "loss": 0.2469, "step": 7989 }, { "epoch": 0.37, "grad_norm": 0.3730951390092551, "learning_rate": 1.4611215927384103e-05, "loss": 0.3542, "step": 7990 }, { "epoch": 0.37, "grad_norm": 0.475740104862716, "learning_rate": 1.4609895590625635e-05, "loss": 0.3555, "step": 7991 }, { "epoch": 0.37, "grad_norm": 0.8080030082141457, "learning_rate": 1.4608575151808526e-05, "loss": 0.4257, "step": 7992 }, { "epoch": 0.37, "grad_norm": 0.36773386971342104, "learning_rate": 1.4607254610962001e-05, "loss": 0.2525, "step": 7993 }, { "epoch": 0.37, "grad_norm": 0.30063479440270674, "learning_rate": 1.46059339681153e-05, "loss": 0.2634, "step": 7994 }, { "epoch": 0.37, "grad_norm": 0.5509987118400429, "learning_rate": 1.4604613223297661e-05, "loss": 0.389, "step": 7995 }, { "epoch": 0.37, "grad_norm": 0.2804161171950647, "learning_rate": 1.4603292376538325e-05, "loss": 0.2215, "step": 7996 }, { "epoch": 0.37, "grad_norm": 1.6034458191076457, "learning_rate": 1.4601971427866527e-05, "loss": 0.7563, "step": 7997 }, { "epoch": 0.37, "grad_norm": 0.36151741144295596, "learning_rate": 1.4600650377311523e-05, "loss": 0.2918, "step": 7998 }, { "epoch": 0.37, "grad_norm": 0.3628868281688461, "learning_rate": 1.459932922490255e-05, "loss": 0.281, "step": 7999 }, { "epoch": 0.37, "grad_norm": 0.6754704718248982, "learning_rate": 1.4598007970668864e-05, "loss": 0.5219, "step": 8000 }, { "epoch": 0.37, "grad_norm": 0.4893416568201468, "learning_rate": 1.4596686614639711e-05, "loss": 0.3385, "step": 8001 }, { "epoch": 0.37, "grad_norm": 0.268571154790321, "learning_rate": 1.4595365156844347e-05, "loss": 0.2088, "step": 8002 }, { "epoch": 0.37, "grad_norm": 0.3532852324644539, "learning_rate": 1.4594043597312026e-05, "loss": 0.2828, "step": 8003 }, { "epoch": 0.37, "grad_norm": 0.6471508509849552, "learning_rate": 1.459272193607201e-05, "loss": 0.3422, "step": 8004 }, { "epoch": 0.37, "grad_norm": 0.49560777106379306, "learning_rate": 1.4591400173153555e-05, "loss": 0.3336, "step": 8005 }, { "epoch": 0.37, "grad_norm": 0.3650982076948691, "learning_rate": 1.4590078308585927e-05, "loss": 0.2824, "step": 8006 }, { "epoch": 0.37, "grad_norm": 0.42206199653928783, "learning_rate": 1.4588756342398391e-05, "loss": 0.3005, "step": 8007 }, { "epoch": 0.37, "grad_norm": 0.3229834078313099, "learning_rate": 1.458743427462021e-05, "loss": 0.2155, "step": 8008 }, { "epoch": 0.37, "grad_norm": 0.29701145820685576, "learning_rate": 1.4586112105280656e-05, "loss": 0.2055, "step": 8009 }, { "epoch": 0.37, "grad_norm": 0.4481510108506722, "learning_rate": 1.4584789834408996e-05, "loss": 0.3244, "step": 8010 }, { "epoch": 0.37, "grad_norm": 0.422876623188142, "learning_rate": 1.4583467462034512e-05, "loss": 0.3201, "step": 8011 }, { "epoch": 0.37, "grad_norm": 0.6249250607455239, "learning_rate": 1.4582144988186478e-05, "loss": 0.3641, "step": 8012 }, { "epoch": 0.37, "grad_norm": 0.8859015110191942, "learning_rate": 1.4580822412894168e-05, "loss": 0.4503, "step": 8013 }, { "epoch": 0.37, "grad_norm": 0.31746829573320323, "learning_rate": 1.4579499736186864e-05, "loss": 0.2702, "step": 8014 }, { "epoch": 0.37, "grad_norm": 0.19655667254801149, "learning_rate": 1.457817695809385e-05, "loss": 0.1117, "step": 8015 }, { "epoch": 0.37, "grad_norm": 0.5492147687185173, "learning_rate": 1.457685407864441e-05, "loss": 0.3534, "step": 8016 }, { "epoch": 0.37, "grad_norm": 0.3906783478305109, "learning_rate": 1.4575531097867834e-05, "loss": 0.3238, "step": 8017 }, { "epoch": 0.37, "grad_norm": 0.3992223382717474, "learning_rate": 1.4574208015793407e-05, "loss": 0.3093, "step": 8018 }, { "epoch": 0.37, "grad_norm": 0.44315464009185956, "learning_rate": 1.4572884832450427e-05, "loss": 0.2287, "step": 8019 }, { "epoch": 0.37, "grad_norm": 0.36875858245248444, "learning_rate": 1.4571561547868182e-05, "loss": 0.2836, "step": 8020 }, { "epoch": 0.37, "grad_norm": 0.4653032759768289, "learning_rate": 1.4570238162075969e-05, "loss": 0.2676, "step": 8021 }, { "epoch": 0.37, "grad_norm": 0.32209944375250216, "learning_rate": 1.456891467510309e-05, "loss": 0.2332, "step": 8022 }, { "epoch": 0.37, "grad_norm": 0.441351734883321, "learning_rate": 1.456759108697884e-05, "loss": 0.3106, "step": 8023 }, { "epoch": 0.37, "grad_norm": 0.654544008503864, "learning_rate": 1.456626739773253e-05, "loss": 0.4452, "step": 8024 }, { "epoch": 0.37, "grad_norm": 0.3647808442251664, "learning_rate": 1.456494360739346e-05, "loss": 0.2568, "step": 8025 }, { "epoch": 0.37, "grad_norm": 0.3539861803083868, "learning_rate": 1.4563619715990939e-05, "loss": 0.294, "step": 8026 }, { "epoch": 0.37, "grad_norm": 0.3486686042009448, "learning_rate": 1.4562295723554272e-05, "loss": 0.1726, "step": 8027 }, { "epoch": 0.37, "grad_norm": 0.7341945958988937, "learning_rate": 1.4560971630112779e-05, "loss": 0.286, "step": 8028 }, { "epoch": 0.37, "grad_norm": 0.37637362903680693, "learning_rate": 1.4559647435695768e-05, "loss": 0.3177, "step": 8029 }, { "epoch": 0.37, "grad_norm": 0.405284206088478, "learning_rate": 1.455832314033256e-05, "loss": 0.3565, "step": 8030 }, { "epoch": 0.37, "grad_norm": 1.9913398430753853, "learning_rate": 1.4556998744052466e-05, "loss": 0.7404, "step": 8031 }, { "epoch": 0.37, "grad_norm": 0.3448339217456296, "learning_rate": 1.4555674246884816e-05, "loss": 0.2276, "step": 8032 }, { "epoch": 0.37, "grad_norm": 0.36053317996356626, "learning_rate": 1.4554349648858928e-05, "loss": 0.1867, "step": 8033 }, { "epoch": 0.37, "grad_norm": 0.4726418571400346, "learning_rate": 1.4553024950004129e-05, "loss": 0.3274, "step": 8034 }, { "epoch": 0.37, "grad_norm": 0.376644859459166, "learning_rate": 1.4551700150349746e-05, "loss": 0.2505, "step": 8035 }, { "epoch": 0.37, "grad_norm": 2.175901953623385, "learning_rate": 1.4550375249925106e-05, "loss": 0.8313, "step": 8036 }, { "epoch": 0.37, "grad_norm": 0.4715688639245605, "learning_rate": 1.4549050248759546e-05, "loss": 0.3191, "step": 8037 }, { "epoch": 0.37, "grad_norm": 0.40813108551233657, "learning_rate": 1.4547725146882396e-05, "loss": 0.2419, "step": 8038 }, { "epoch": 0.37, "grad_norm": 0.32460135125273704, "learning_rate": 1.4546399944322998e-05, "loss": 0.1819, "step": 8039 }, { "epoch": 0.37, "grad_norm": 0.6802466804452063, "learning_rate": 1.4545074641110684e-05, "loss": 0.4107, "step": 8040 }, { "epoch": 0.37, "grad_norm": 0.3555828740566606, "learning_rate": 1.4543749237274798e-05, "loss": 0.2484, "step": 8041 }, { "epoch": 0.37, "grad_norm": 0.5293511349965812, "learning_rate": 1.4542423732844684e-05, "loss": 0.3662, "step": 8042 }, { "epoch": 0.37, "grad_norm": 0.9536441657203929, "learning_rate": 1.4541098127849686e-05, "loss": 0.4382, "step": 8043 }, { "epoch": 0.37, "grad_norm": 0.3998095093885355, "learning_rate": 1.4539772422319151e-05, "loss": 0.2806, "step": 8044 }, { "epoch": 0.37, "grad_norm": 0.2599654016165371, "learning_rate": 1.4538446616282435e-05, "loss": 0.1919, "step": 8045 }, { "epoch": 0.37, "grad_norm": 0.5015844075892185, "learning_rate": 1.453712070976888e-05, "loss": 0.3635, "step": 8046 }, { "epoch": 0.37, "grad_norm": 0.36512299858641384, "learning_rate": 1.4535794702807849e-05, "loss": 0.2826, "step": 8047 }, { "epoch": 0.37, "grad_norm": 0.7780282750486254, "learning_rate": 1.4534468595428695e-05, "loss": 0.5078, "step": 8048 }, { "epoch": 0.37, "grad_norm": 0.424220793708698, "learning_rate": 1.4533142387660774e-05, "loss": 0.3505, "step": 8049 }, { "epoch": 0.37, "grad_norm": 0.42625350930980693, "learning_rate": 1.4531816079533452e-05, "loss": 0.3056, "step": 8050 }, { "epoch": 0.37, "grad_norm": 0.40515935262875485, "learning_rate": 1.4530489671076087e-05, "loss": 0.1621, "step": 8051 }, { "epoch": 0.37, "grad_norm": 0.43297166869404236, "learning_rate": 1.4529163162318049e-05, "loss": 0.3214, "step": 8052 }, { "epoch": 0.37, "grad_norm": 0.35201827911462164, "learning_rate": 1.4527836553288701e-05, "loss": 0.2601, "step": 8053 }, { "epoch": 0.37, "grad_norm": 0.4504865676055261, "learning_rate": 1.4526509844017422e-05, "loss": 0.2874, "step": 8054 }, { "epoch": 0.37, "grad_norm": 0.9297956553604682, "learning_rate": 1.4525183034533572e-05, "loss": 0.6048, "step": 8055 }, { "epoch": 0.37, "grad_norm": 0.3730699503991378, "learning_rate": 1.4523856124866536e-05, "loss": 0.2746, "step": 8056 }, { "epoch": 0.37, "grad_norm": 0.8923270077576486, "learning_rate": 1.452252911504568e-05, "loss": 0.5308, "step": 8057 }, { "epoch": 0.37, "grad_norm": 0.2699751089111817, "learning_rate": 1.4521202005100392e-05, "loss": 0.2193, "step": 8058 }, { "epoch": 0.37, "grad_norm": 0.38553112362435155, "learning_rate": 1.4519874795060048e-05, "loss": 0.2894, "step": 8059 }, { "epoch": 0.37, "grad_norm": 0.45159444370752067, "learning_rate": 1.4518547484954033e-05, "loss": 0.3391, "step": 8060 }, { "epoch": 0.37, "grad_norm": 0.3825125598512783, "learning_rate": 1.4517220074811729e-05, "loss": 0.2463, "step": 8061 }, { "epoch": 0.37, "grad_norm": 0.3849553801244855, "learning_rate": 1.451589256466253e-05, "loss": 0.2753, "step": 8062 }, { "epoch": 0.37, "grad_norm": 0.7650084573978162, "learning_rate": 1.4514564954535819e-05, "loss": 0.4657, "step": 8063 }, { "epoch": 0.37, "grad_norm": 0.4333268174377405, "learning_rate": 1.4513237244460995e-05, "loss": 0.198, "step": 8064 }, { "epoch": 0.37, "grad_norm": 0.3649220846152661, "learning_rate": 1.4511909434467444e-05, "loss": 0.2895, "step": 8065 }, { "epoch": 0.37, "grad_norm": 0.415473897899505, "learning_rate": 1.4510581524584571e-05, "loss": 0.3636, "step": 8066 }, { "epoch": 0.37, "grad_norm": 0.30697044659719647, "learning_rate": 1.4509253514841769e-05, "loss": 0.1109, "step": 8067 }, { "epoch": 0.37, "grad_norm": 0.3762822036066887, "learning_rate": 1.4507925405268437e-05, "loss": 0.296, "step": 8068 }, { "epoch": 0.37, "grad_norm": 0.5363757838522297, "learning_rate": 1.4506597195893983e-05, "loss": 0.3862, "step": 8069 }, { "epoch": 0.37, "grad_norm": 0.4813313634789555, "learning_rate": 1.450526888674781e-05, "loss": 0.3489, "step": 8070 }, { "epoch": 0.37, "grad_norm": 0.23232794222403005, "learning_rate": 1.4503940477859329e-05, "loss": 0.1742, "step": 8071 }, { "epoch": 0.37, "grad_norm": 1.2613382524063457, "learning_rate": 1.4502611969257947e-05, "loss": 0.777, "step": 8072 }, { "epoch": 0.37, "grad_norm": 0.3202289642631333, "learning_rate": 1.4501283360973073e-05, "loss": 0.2686, "step": 8073 }, { "epoch": 0.37, "grad_norm": 0.3702647826983844, "learning_rate": 1.4499954653034123e-05, "loss": 0.2357, "step": 8074 }, { "epoch": 0.37, "grad_norm": 0.7259843664084988, "learning_rate": 1.4498625845470518e-05, "loss": 0.4323, "step": 8075 }, { "epoch": 0.37, "grad_norm": 1.0928180213495502, "learning_rate": 1.4497296938311669e-05, "loss": 0.6414, "step": 8076 }, { "epoch": 0.37, "grad_norm": 0.34071987657911107, "learning_rate": 1.4495967931587003e-05, "loss": 0.2116, "step": 8077 }, { "epoch": 0.37, "grad_norm": 0.41112295076863176, "learning_rate": 1.4494638825325938e-05, "loss": 0.328, "step": 8078 }, { "epoch": 0.37, "grad_norm": 0.36314544481567956, "learning_rate": 1.4493309619557903e-05, "loss": 0.2385, "step": 8079 }, { "epoch": 0.37, "grad_norm": 0.3332147159866553, "learning_rate": 1.4491980314312324e-05, "loss": 0.1959, "step": 8080 }, { "epoch": 0.37, "grad_norm": 0.4486453044276179, "learning_rate": 1.4490650909618626e-05, "loss": 0.339, "step": 8081 }, { "epoch": 0.37, "grad_norm": 1.3046603885399826, "learning_rate": 1.4489321405506249e-05, "loss": 0.6039, "step": 8082 }, { "epoch": 0.37, "grad_norm": 0.4638141581238621, "learning_rate": 1.4487991802004625e-05, "loss": 0.3174, "step": 8083 }, { "epoch": 0.37, "grad_norm": 0.22334586381428592, "learning_rate": 1.4486662099143185e-05, "loss": 0.1543, "step": 8084 }, { "epoch": 0.37, "grad_norm": 0.39841130964991045, "learning_rate": 1.4485332296951368e-05, "loss": 0.3155, "step": 8085 }, { "epoch": 0.37, "grad_norm": 0.40266154801848825, "learning_rate": 1.4484002395458624e-05, "loss": 0.2942, "step": 8086 }, { "epoch": 0.37, "grad_norm": 0.4285046575652066, "learning_rate": 1.4482672394694381e-05, "loss": 0.2944, "step": 8087 }, { "epoch": 0.37, "grad_norm": 1.0399857910914847, "learning_rate": 1.4481342294688095e-05, "loss": 0.6765, "step": 8088 }, { "epoch": 0.37, "grad_norm": 0.27826370025961555, "learning_rate": 1.4480012095469212e-05, "loss": 0.2657, "step": 8089 }, { "epoch": 0.37, "grad_norm": 0.47245128907897627, "learning_rate": 1.4478681797067177e-05, "loss": 0.2905, "step": 8090 }, { "epoch": 0.37, "grad_norm": 0.4204030978730879, "learning_rate": 1.4477351399511442e-05, "loss": 0.344, "step": 8091 }, { "epoch": 0.37, "grad_norm": 0.33994301941192795, "learning_rate": 1.4476020902831464e-05, "loss": 0.2802, "step": 8092 }, { "epoch": 0.37, "grad_norm": 0.27278899255742867, "learning_rate": 1.4474690307056695e-05, "loss": 0.2008, "step": 8093 }, { "epoch": 0.37, "grad_norm": 1.0989553363859383, "learning_rate": 1.44733596122166e-05, "loss": 0.6389, "step": 8094 }, { "epoch": 0.37, "grad_norm": 0.7174319425326565, "learning_rate": 1.4472028818340632e-05, "loss": 0.3997, "step": 8095 }, { "epoch": 0.37, "grad_norm": 0.43043280549769275, "learning_rate": 1.4470697925458258e-05, "loss": 0.3359, "step": 8096 }, { "epoch": 0.37, "grad_norm": 0.40546881578014465, "learning_rate": 1.4469366933598938e-05, "loss": 0.2923, "step": 8097 }, { "epoch": 0.37, "grad_norm": 0.7012003848679189, "learning_rate": 1.4468035842792143e-05, "loss": 0.3312, "step": 8098 }, { "epoch": 0.37, "grad_norm": 0.26961485819138026, "learning_rate": 1.446670465306734e-05, "loss": 0.2108, "step": 8099 }, { "epoch": 0.37, "grad_norm": 0.8683246083681054, "learning_rate": 1.4465373364454001e-05, "loss": 0.3428, "step": 8100 }, { "epoch": 0.37, "grad_norm": 0.3765437512663046, "learning_rate": 1.4464041976981602e-05, "loss": 0.2809, "step": 8101 }, { "epoch": 0.37, "grad_norm": 0.4044416045096495, "learning_rate": 1.4462710490679614e-05, "loss": 0.3448, "step": 8102 }, { "epoch": 0.37, "grad_norm": 0.6968941292712909, "learning_rate": 1.4461378905577518e-05, "loss": 0.3526, "step": 8103 }, { "epoch": 0.37, "grad_norm": 0.38395395000015514, "learning_rate": 1.4460047221704791e-05, "loss": 0.2343, "step": 8104 }, { "epoch": 0.37, "grad_norm": 0.2830876233267711, "learning_rate": 1.445871543909092e-05, "loss": 0.2394, "step": 8105 }, { "epoch": 0.37, "grad_norm": 1.7952650045326775, "learning_rate": 1.4457383557765385e-05, "loss": 0.8264, "step": 8106 }, { "epoch": 0.37, "grad_norm": 0.36648782546590747, "learning_rate": 1.4456051577757675e-05, "loss": 0.2232, "step": 8107 }, { "epoch": 0.37, "grad_norm": 0.5557893341907113, "learning_rate": 1.4454719499097274e-05, "loss": 0.3735, "step": 8108 }, { "epoch": 0.37, "grad_norm": 0.44457788567359824, "learning_rate": 1.445338732181368e-05, "loss": 0.3399, "step": 8109 }, { "epoch": 0.37, "grad_norm": 0.377295470876217, "learning_rate": 1.4452055045936384e-05, "loss": 0.2182, "step": 8110 }, { "epoch": 0.37, "grad_norm": 0.27680526318806714, "learning_rate": 1.4450722671494878e-05, "loss": 0.1883, "step": 8111 }, { "epoch": 0.37, "grad_norm": 1.0434715039722036, "learning_rate": 1.4449390198518663e-05, "loss": 0.7002, "step": 8112 }, { "epoch": 0.37, "grad_norm": 0.3046090265375082, "learning_rate": 1.4448057627037238e-05, "loss": 0.2192, "step": 8113 }, { "epoch": 0.37, "grad_norm": 0.3921660931816046, "learning_rate": 1.4446724957080105e-05, "loss": 0.3254, "step": 8114 }, { "epoch": 0.37, "grad_norm": 0.6994491471845978, "learning_rate": 1.4445392188676764e-05, "loss": 0.4547, "step": 8115 }, { "epoch": 0.37, "grad_norm": 0.26871783575947744, "learning_rate": 1.4444059321856725e-05, "loss": 0.0719, "step": 8116 }, { "epoch": 0.37, "grad_norm": 0.2792463394041331, "learning_rate": 1.4442726356649497e-05, "loss": 0.2444, "step": 8117 }, { "epoch": 0.37, "grad_norm": 1.1861117440030566, "learning_rate": 1.4441393293084589e-05, "loss": 0.6726, "step": 8118 }, { "epoch": 0.37, "grad_norm": 0.7797025946871594, "learning_rate": 1.4440060131191515e-05, "loss": 0.4208, "step": 8119 }, { "epoch": 0.37, "grad_norm": 0.3785789840265222, "learning_rate": 1.443872687099979e-05, "loss": 0.2703, "step": 8120 }, { "epoch": 0.37, "grad_norm": 0.4021594988038672, "learning_rate": 1.4437393512538926e-05, "loss": 0.3176, "step": 8121 }, { "epoch": 0.37, "grad_norm": 0.5744238816396605, "learning_rate": 1.443606005583845e-05, "loss": 0.3289, "step": 8122 }, { "epoch": 0.37, "grad_norm": 0.24735474269333255, "learning_rate": 1.4434726500927879e-05, "loss": 0.1651, "step": 8123 }, { "epoch": 0.37, "grad_norm": 1.211791538324062, "learning_rate": 1.443339284783674e-05, "loss": 0.6564, "step": 8124 }, { "epoch": 0.37, "grad_norm": 0.40119978313973204, "learning_rate": 1.4432059096594553e-05, "loss": 0.2828, "step": 8125 }, { "epoch": 0.37, "grad_norm": 0.49271095317703384, "learning_rate": 1.443072524723085e-05, "loss": 0.2801, "step": 8126 }, { "epoch": 0.37, "grad_norm": 0.9114656136689938, "learning_rate": 1.442939129977516e-05, "loss": 0.5515, "step": 8127 }, { "epoch": 0.37, "grad_norm": 0.6944807003011761, "learning_rate": 1.4428057254257014e-05, "loss": 0.2886, "step": 8128 }, { "epoch": 0.37, "grad_norm": 0.3967429256594281, "learning_rate": 1.442672311070595e-05, "loss": 0.2535, "step": 8129 }, { "epoch": 0.37, "grad_norm": 0.3170589010166853, "learning_rate": 1.4425388869151506e-05, "loss": 0.2206, "step": 8130 }, { "epoch": 0.37, "grad_norm": 0.6541096200829364, "learning_rate": 1.4424054529623214e-05, "loss": 0.3823, "step": 8131 }, { "epoch": 0.37, "grad_norm": 0.428923930151477, "learning_rate": 1.4422720092150622e-05, "loss": 0.3368, "step": 8132 }, { "epoch": 0.37, "grad_norm": 0.4077737286229152, "learning_rate": 1.4421385556763268e-05, "loss": 0.2913, "step": 8133 }, { "epoch": 0.37, "grad_norm": 0.780064609035624, "learning_rate": 1.4420050923490696e-05, "loss": 0.377, "step": 8134 }, { "epoch": 0.37, "grad_norm": 0.2901392780899888, "learning_rate": 1.4418716192362458e-05, "loss": 0.2318, "step": 8135 }, { "epoch": 0.37, "grad_norm": 0.30757732191557946, "learning_rate": 1.4417381363408107e-05, "loss": 0.2323, "step": 8136 }, { "epoch": 0.37, "grad_norm": 0.4548606743024571, "learning_rate": 1.4416046436657187e-05, "loss": 0.3587, "step": 8137 }, { "epoch": 0.37, "grad_norm": 0.45732355159535487, "learning_rate": 1.4414711412139254e-05, "loss": 0.303, "step": 8138 }, { "epoch": 0.37, "grad_norm": 1.0822017475316053, "learning_rate": 1.4413376289883868e-05, "loss": 0.3963, "step": 8139 }, { "epoch": 0.37, "grad_norm": 0.45383526659823176, "learning_rate": 1.4412041069920582e-05, "loss": 0.3112, "step": 8140 }, { "epoch": 0.37, "grad_norm": 0.3372666816026221, "learning_rate": 1.4410705752278957e-05, "loss": 0.2814, "step": 8141 }, { "epoch": 0.37, "grad_norm": 0.32524645401580127, "learning_rate": 1.4409370336988564e-05, "loss": 0.169, "step": 8142 }, { "epoch": 0.37, "grad_norm": 0.5685776051589425, "learning_rate": 1.4408034824078959e-05, "loss": 0.3688, "step": 8143 }, { "epoch": 0.37, "grad_norm": 0.4376313230990527, "learning_rate": 1.440669921357971e-05, "loss": 0.3087, "step": 8144 }, { "epoch": 0.37, "grad_norm": 0.4569660570837889, "learning_rate": 1.4405363505520386e-05, "loss": 0.3339, "step": 8145 }, { "epoch": 0.37, "grad_norm": 0.4012077787576056, "learning_rate": 1.4404027699930563e-05, "loss": 0.0717, "step": 8146 }, { "epoch": 0.37, "grad_norm": 0.4509739968710311, "learning_rate": 1.4402691796839812e-05, "loss": 0.3429, "step": 8147 }, { "epoch": 0.37, "grad_norm": 0.377727085417594, "learning_rate": 1.4401355796277707e-05, "loss": 0.2985, "step": 8148 }, { "epoch": 0.37, "grad_norm": 0.32875092038198983, "learning_rate": 1.4400019698273826e-05, "loss": 0.2148, "step": 8149 }, { "epoch": 0.37, "grad_norm": 0.40830849008245623, "learning_rate": 1.4398683502857748e-05, "loss": 0.2902, "step": 8150 }, { "epoch": 0.37, "grad_norm": 1.0769865822653262, "learning_rate": 1.4397347210059059e-05, "loss": 0.5932, "step": 8151 }, { "epoch": 0.37, "grad_norm": 0.38318973891823555, "learning_rate": 1.439601081990734e-05, "loss": 0.2475, "step": 8152 }, { "epoch": 0.37, "grad_norm": 0.3825986230766878, "learning_rate": 1.4394674332432182e-05, "loss": 0.2794, "step": 8153 }, { "epoch": 0.37, "grad_norm": 0.5371953677536965, "learning_rate": 1.4393337747663168e-05, "loss": 0.4304, "step": 8154 }, { "epoch": 0.37, "grad_norm": 0.3018934112398851, "learning_rate": 1.4392001065629888e-05, "loss": 0.1138, "step": 8155 }, { "epoch": 0.37, "grad_norm": 0.420633317378892, "learning_rate": 1.439066428636194e-05, "loss": 0.3172, "step": 8156 }, { "epoch": 0.37, "grad_norm": 0.3274556845005094, "learning_rate": 1.4389327409888916e-05, "loss": 0.2437, "step": 8157 }, { "epoch": 0.37, "grad_norm": 1.016920581456003, "learning_rate": 1.438799043624041e-05, "loss": 0.572, "step": 8158 }, { "epoch": 0.37, "grad_norm": 0.3438425472043183, "learning_rate": 1.438665336544603e-05, "loss": 0.2424, "step": 8159 }, { "epoch": 0.37, "grad_norm": 0.4852958734828313, "learning_rate": 1.4385316197535373e-05, "loss": 0.3629, "step": 8160 }, { "epoch": 0.37, "grad_norm": 0.7604373800181158, "learning_rate": 1.4383978932538038e-05, "loss": 0.3557, "step": 8161 }, { "epoch": 0.37, "grad_norm": 0.36121423116163215, "learning_rate": 1.4382641570483635e-05, "loss": 0.2032, "step": 8162 }, { "epoch": 0.38, "grad_norm": 0.37971718292547935, "learning_rate": 1.4381304111401775e-05, "loss": 0.2528, "step": 8163 }, { "epoch": 0.38, "grad_norm": 0.3247067814956869, "learning_rate": 1.4379966555322061e-05, "loss": 0.2716, "step": 8164 }, { "epoch": 0.38, "grad_norm": 0.370823121370817, "learning_rate": 1.4378628902274112e-05, "loss": 0.2502, "step": 8165 }, { "epoch": 0.38, "grad_norm": 0.5896055000558335, "learning_rate": 1.4377291152287538e-05, "loss": 0.4204, "step": 8166 }, { "epoch": 0.38, "grad_norm": 0.7751249439047946, "learning_rate": 1.4375953305391956e-05, "loss": 0.5046, "step": 8167 }, { "epoch": 0.38, "grad_norm": 0.3569893717916408, "learning_rate": 1.4374615361616985e-05, "loss": 0.2225, "step": 8168 }, { "epoch": 0.38, "grad_norm": 0.3487667059860045, "learning_rate": 1.437327732099225e-05, "loss": 0.2331, "step": 8169 }, { "epoch": 0.38, "grad_norm": 0.8249438071964644, "learning_rate": 1.4371939183547368e-05, "loss": 0.4399, "step": 8170 }, { "epoch": 0.38, "grad_norm": 0.32898537178932663, "learning_rate": 1.4370600949311967e-05, "loss": 0.2841, "step": 8171 }, { "epoch": 0.38, "grad_norm": 0.4560493355111065, "learning_rate": 1.4369262618315672e-05, "loss": 0.2589, "step": 8172 }, { "epoch": 0.38, "grad_norm": 1.152462184301265, "learning_rate": 1.4367924190588114e-05, "loss": 0.631, "step": 8173 }, { "epoch": 0.38, "grad_norm": 0.37560296611791244, "learning_rate": 1.4366585666158924e-05, "loss": 0.2774, "step": 8174 }, { "epoch": 0.38, "grad_norm": 0.1922203290623445, "learning_rate": 1.4365247045057732e-05, "loss": 0.1059, "step": 8175 }, { "epoch": 0.38, "grad_norm": 0.4279193601201062, "learning_rate": 1.4363908327314185e-05, "loss": 0.3036, "step": 8176 }, { "epoch": 0.38, "grad_norm": 0.3777746994396621, "learning_rate": 1.4362569512957912e-05, "loss": 0.289, "step": 8177 }, { "epoch": 0.38, "grad_norm": 0.8190570137297476, "learning_rate": 1.4361230602018551e-05, "loss": 0.2977, "step": 8178 }, { "epoch": 0.38, "grad_norm": 1.2220837858016589, "learning_rate": 1.435989159452575e-05, "loss": 0.8406, "step": 8179 }, { "epoch": 0.38, "grad_norm": 0.3693450586663808, "learning_rate": 1.4358552490509152e-05, "loss": 0.2599, "step": 8180 }, { "epoch": 0.38, "grad_norm": 0.36190938695966485, "learning_rate": 1.4357213289998402e-05, "loss": 0.1645, "step": 8181 }, { "epoch": 0.38, "grad_norm": 0.9532698894067595, "learning_rate": 1.4355873993023149e-05, "loss": 0.4447, "step": 8182 }, { "epoch": 0.38, "grad_norm": 0.476370849568272, "learning_rate": 1.4354534599613045e-05, "loss": 0.2957, "step": 8183 }, { "epoch": 0.38, "grad_norm": 0.38947420523982107, "learning_rate": 1.4353195109797742e-05, "loss": 0.3335, "step": 8184 }, { "epoch": 0.38, "grad_norm": 0.48843814860484114, "learning_rate": 1.4351855523606895e-05, "loss": 0.3199, "step": 8185 }, { "epoch": 0.38, "grad_norm": 0.35179459738077856, "learning_rate": 1.4350515841070159e-05, "loss": 0.2515, "step": 8186 }, { "epoch": 0.38, "grad_norm": 0.8533334882972816, "learning_rate": 1.4349176062217197e-05, "loss": 0.4837, "step": 8187 }, { "epoch": 0.38, "grad_norm": 0.3758362055607333, "learning_rate": 1.4347836187077665e-05, "loss": 0.2903, "step": 8188 }, { "epoch": 0.38, "grad_norm": 0.2955437267555108, "learning_rate": 1.4346496215681237e-05, "loss": 0.2421, "step": 8189 }, { "epoch": 0.38, "grad_norm": 0.48187030322490054, "learning_rate": 1.4345156148057569e-05, "loss": 0.3413, "step": 8190 }, { "epoch": 0.38, "grad_norm": 1.0071674295529929, "learning_rate": 1.434381598423633e-05, "loss": 0.5564, "step": 8191 }, { "epoch": 0.38, "grad_norm": 0.3314808494783778, "learning_rate": 1.4342475724247194e-05, "loss": 0.2826, "step": 8192 }, { "epoch": 0.38, "grad_norm": 0.5616032074099029, "learning_rate": 1.4341135368119829e-05, "loss": 0.3901, "step": 8193 }, { "epoch": 0.38, "grad_norm": 0.7617297336530914, "learning_rate": 1.4339794915883913e-05, "loss": 0.2899, "step": 8194 }, { "epoch": 0.38, "grad_norm": 0.2824971092991806, "learning_rate": 1.4338454367569122e-05, "loss": 0.221, "step": 8195 }, { "epoch": 0.38, "grad_norm": 0.5290168829688746, "learning_rate": 1.4337113723205127e-05, "loss": 0.4032, "step": 8196 }, { "epoch": 0.38, "grad_norm": 0.6235443281168692, "learning_rate": 1.4335772982821618e-05, "loss": 0.3837, "step": 8197 }, { "epoch": 0.38, "grad_norm": 0.3771828397558655, "learning_rate": 1.4334432146448272e-05, "loss": 0.2226, "step": 8198 }, { "epoch": 0.38, "grad_norm": 0.7929446769568814, "learning_rate": 1.4333091214114778e-05, "loss": 0.4607, "step": 8199 }, { "epoch": 0.38, "grad_norm": 0.3515116027312931, "learning_rate": 1.4331750185850821e-05, "loss": 0.3106, "step": 8200 }, { "epoch": 0.38, "grad_norm": 0.2551655177347878, "learning_rate": 1.433040906168609e-05, "loss": 0.1327, "step": 8201 }, { "epoch": 0.38, "grad_norm": 0.33681451534038465, "learning_rate": 1.4329067841650274e-05, "loss": 0.2049, "step": 8202 }, { "epoch": 0.38, "grad_norm": 0.5442830045690067, "learning_rate": 1.4327726525773069e-05, "loss": 0.4134, "step": 8203 }, { "epoch": 0.38, "grad_norm": 0.31528552034531016, "learning_rate": 1.4326385114084172e-05, "loss": 0.2284, "step": 8204 }, { "epoch": 0.38, "grad_norm": 0.6830644309840728, "learning_rate": 1.4325043606613274e-05, "loss": 0.3603, "step": 8205 }, { "epoch": 0.38, "grad_norm": 1.290346700820485, "learning_rate": 1.4323702003390084e-05, "loss": 0.6148, "step": 8206 }, { "epoch": 0.38, "grad_norm": 0.2575142791168545, "learning_rate": 1.4322360304444296e-05, "loss": 0.1823, "step": 8207 }, { "epoch": 0.38, "grad_norm": 0.34372574308053383, "learning_rate": 1.4321018509805617e-05, "loss": 0.2604, "step": 8208 }, { "epoch": 0.38, "grad_norm": 1.3397963864650597, "learning_rate": 1.4319676619503754e-05, "loss": 0.8465, "step": 8209 }, { "epoch": 0.38, "grad_norm": 0.41605963002925217, "learning_rate": 1.4318334633568415e-05, "loss": 0.2905, "step": 8210 }, { "epoch": 0.38, "grad_norm": 0.5057832879966968, "learning_rate": 1.4316992552029305e-05, "loss": 0.3177, "step": 8211 }, { "epoch": 0.38, "grad_norm": 0.44220618867046024, "learning_rate": 1.4315650374916143e-05, "loss": 0.3329, "step": 8212 }, { "epoch": 0.38, "grad_norm": 0.34443428837699497, "learning_rate": 1.4314308102258643e-05, "loss": 0.2111, "step": 8213 }, { "epoch": 0.38, "grad_norm": 0.3342774080954743, "learning_rate": 1.4312965734086519e-05, "loss": 0.1568, "step": 8214 }, { "epoch": 0.38, "grad_norm": 0.5348091614613677, "learning_rate": 1.4311623270429488e-05, "loss": 0.4093, "step": 8215 }, { "epoch": 0.38, "grad_norm": 0.3518870455826157, "learning_rate": 1.4310280711317274e-05, "loss": 0.278, "step": 8216 }, { "epoch": 0.38, "grad_norm": 0.46793460911610657, "learning_rate": 1.4308938056779602e-05, "loss": 0.3051, "step": 8217 }, { "epoch": 0.38, "grad_norm": 1.251438584228942, "learning_rate": 1.4307595306846194e-05, "loss": 0.5727, "step": 8218 }, { "epoch": 0.38, "grad_norm": 0.33538450523095514, "learning_rate": 1.4306252461546777e-05, "loss": 0.1887, "step": 8219 }, { "epoch": 0.38, "grad_norm": 0.33891000885463735, "learning_rate": 1.4304909520911079e-05, "loss": 0.2221, "step": 8220 }, { "epoch": 0.38, "grad_norm": 0.6147435561695751, "learning_rate": 1.4303566484968836e-05, "loss": 0.4614, "step": 8221 }, { "epoch": 0.38, "grad_norm": 0.6739274115676755, "learning_rate": 1.4302223353749776e-05, "loss": 0.3476, "step": 8222 }, { "epoch": 0.38, "grad_norm": 0.500693917552505, "learning_rate": 1.430088012728364e-05, "loss": 0.3466, "step": 8223 }, { "epoch": 0.38, "grad_norm": 0.3785647625340663, "learning_rate": 1.4299536805600162e-05, "loss": 0.2482, "step": 8224 }, { "epoch": 0.38, "grad_norm": 0.6809297901984518, "learning_rate": 1.4298193388729085e-05, "loss": 0.3459, "step": 8225 }, { "epoch": 0.38, "grad_norm": 0.2940116330640593, "learning_rate": 1.4296849876700147e-05, "loss": 0.2244, "step": 8226 }, { "epoch": 0.38, "grad_norm": 0.5055469737966245, "learning_rate": 1.4295506269543094e-05, "loss": 0.3049, "step": 8227 }, { "epoch": 0.38, "grad_norm": 0.32497666957214005, "learning_rate": 1.429416256728767e-05, "loss": 0.2787, "step": 8228 }, { "epoch": 0.38, "grad_norm": 0.5563397653226816, "learning_rate": 1.429281876996363e-05, "loss": 0.4059, "step": 8229 }, { "epoch": 0.38, "grad_norm": 0.8293007549369125, "learning_rate": 1.4291474877600719e-05, "loss": 0.3414, "step": 8230 }, { "epoch": 0.38, "grad_norm": 0.3154983003339873, "learning_rate": 1.4290130890228691e-05, "loss": 0.215, "step": 8231 }, { "epoch": 0.38, "grad_norm": 0.3432024288293899, "learning_rate": 1.42887868078773e-05, "loss": 0.2873, "step": 8232 }, { "epoch": 0.38, "grad_norm": 0.508429358465658, "learning_rate": 1.42874426305763e-05, "loss": 0.3167, "step": 8233 }, { "epoch": 0.38, "grad_norm": 0.36930622311605776, "learning_rate": 1.4286098358355454e-05, "loss": 0.2438, "step": 8234 }, { "epoch": 0.38, "grad_norm": 0.6014374445171841, "learning_rate": 1.428475399124452e-05, "loss": 0.3574, "step": 8235 }, { "epoch": 0.38, "grad_norm": 0.3992719536261348, "learning_rate": 1.4283409529273264e-05, "loss": 0.3409, "step": 8236 }, { "epoch": 0.38, "grad_norm": 0.7516816042485746, "learning_rate": 1.4282064972471447e-05, "loss": 0.1686, "step": 8237 }, { "epoch": 0.38, "grad_norm": 0.32408571371041206, "learning_rate": 1.4280720320868842e-05, "loss": 0.2472, "step": 8238 }, { "epoch": 0.38, "grad_norm": 0.4998774123811883, "learning_rate": 1.4279375574495213e-05, "loss": 0.3805, "step": 8239 }, { "epoch": 0.38, "grad_norm": 0.3312330896032824, "learning_rate": 1.4278030733380335e-05, "loss": 0.2028, "step": 8240 }, { "epoch": 0.38, "grad_norm": 0.3263480030716372, "learning_rate": 1.4276685797553977e-05, "loss": 0.2666, "step": 8241 }, { "epoch": 0.38, "grad_norm": 1.1651884783421904, "learning_rate": 1.427534076704592e-05, "loss": 0.5726, "step": 8242 }, { "epoch": 0.38, "grad_norm": 0.3411234157170078, "learning_rate": 1.4273995641885935e-05, "loss": 0.2167, "step": 8243 }, { "epoch": 0.38, "grad_norm": 0.324672025747547, "learning_rate": 1.427265042210381e-05, "loss": 0.2751, "step": 8244 }, { "epoch": 0.38, "grad_norm": 0.6773576992257967, "learning_rate": 1.4271305107729323e-05, "loss": 0.4632, "step": 8245 }, { "epoch": 0.38, "grad_norm": 0.7871594143109953, "learning_rate": 1.4269959698792254e-05, "loss": 0.6308, "step": 8246 }, { "epoch": 0.38, "grad_norm": 0.27081595429461885, "learning_rate": 1.4268614195322397e-05, "loss": 0.1842, "step": 8247 }, { "epoch": 0.38, "grad_norm": 0.4328919037659009, "learning_rate": 1.4267268597349532e-05, "loss": 0.3012, "step": 8248 }, { "epoch": 0.38, "grad_norm": 0.6474695241528116, "learning_rate": 1.4265922904903455e-05, "loss": 0.3572, "step": 8249 }, { "epoch": 0.38, "grad_norm": 0.3901395456490721, "learning_rate": 1.4264577118013956e-05, "loss": 0.2612, "step": 8250 }, { "epoch": 0.38, "grad_norm": 0.5435741516641559, "learning_rate": 1.426323123671083e-05, "loss": 0.4197, "step": 8251 }, { "epoch": 0.38, "grad_norm": 0.41057205997666657, "learning_rate": 1.4261885261023874e-05, "loss": 0.3085, "step": 8252 }, { "epoch": 0.38, "grad_norm": 0.3208076110916021, "learning_rate": 1.4260539190982885e-05, "loss": 0.2044, "step": 8253 }, { "epoch": 0.38, "grad_norm": 0.39738324868320724, "learning_rate": 1.4259193026617666e-05, "loss": 0.2586, "step": 8254 }, { "epoch": 0.38, "grad_norm": 0.5767641286449069, "learning_rate": 1.4257846767958017e-05, "loss": 0.3441, "step": 8255 }, { "epoch": 0.38, "grad_norm": 0.32950558612937514, "learning_rate": 1.4256500415033743e-05, "loss": 0.2495, "step": 8256 }, { "epoch": 0.38, "grad_norm": 0.8659393031052606, "learning_rate": 1.4255153967874655e-05, "loss": 0.5898, "step": 8257 }, { "epoch": 0.38, "grad_norm": 1.2129542811941005, "learning_rate": 1.4253807426510557e-05, "loss": 0.7702, "step": 8258 }, { "epoch": 0.38, "grad_norm": 0.296508594716759, "learning_rate": 1.4252460790971267e-05, "loss": 0.2256, "step": 8259 }, { "epoch": 0.38, "grad_norm": 0.3024211221431034, "learning_rate": 1.4251114061286591e-05, "loss": 0.2054, "step": 8260 }, { "epoch": 0.38, "grad_norm": 0.629455061463019, "learning_rate": 1.4249767237486344e-05, "loss": 0.404, "step": 8261 }, { "epoch": 0.38, "grad_norm": 0.3592135681012521, "learning_rate": 1.4248420319600351e-05, "loss": 0.3016, "step": 8262 }, { "epoch": 0.38, "grad_norm": 0.42935572212673834, "learning_rate": 1.4247073307658422e-05, "loss": 0.3038, "step": 8263 }, { "epoch": 0.38, "grad_norm": 0.4893976846326565, "learning_rate": 1.4245726201690386e-05, "loss": 0.3563, "step": 8264 }, { "epoch": 0.38, "grad_norm": 0.28904360618292674, "learning_rate": 1.4244379001726065e-05, "loss": 0.2514, "step": 8265 }, { "epoch": 0.38, "grad_norm": 0.3715148170386901, "learning_rate": 1.4243031707795283e-05, "loss": 0.1751, "step": 8266 }, { "epoch": 0.38, "grad_norm": 0.3769245694937176, "learning_rate": 1.4241684319927869e-05, "loss": 0.3347, "step": 8267 }, { "epoch": 0.38, "grad_norm": 0.4310484106575423, "learning_rate": 1.424033683815365e-05, "loss": 0.3046, "step": 8268 }, { "epoch": 0.38, "grad_norm": 0.6512186791516351, "learning_rate": 1.4238989262502461e-05, "loss": 0.3629, "step": 8269 }, { "epoch": 0.38, "grad_norm": 0.5867256433960768, "learning_rate": 1.4237641593004137e-05, "loss": 0.4039, "step": 8270 }, { "epoch": 0.38, "grad_norm": 0.4412019197574116, "learning_rate": 1.4236293829688512e-05, "loss": 0.3168, "step": 8271 }, { "epoch": 0.38, "grad_norm": 0.3316372908240092, "learning_rate": 1.4234945972585425e-05, "loss": 0.2509, "step": 8272 }, { "epoch": 0.38, "grad_norm": 0.8027012524175728, "learning_rate": 1.4233598021724713e-05, "loss": 0.3549, "step": 8273 }, { "epoch": 0.38, "grad_norm": 0.38313271633334883, "learning_rate": 1.4232249977136223e-05, "loss": 0.2873, "step": 8274 }, { "epoch": 0.38, "grad_norm": 0.37618331604683003, "learning_rate": 1.42309018388498e-05, "loss": 0.3376, "step": 8275 }, { "epoch": 0.38, "grad_norm": 1.020892246020979, "learning_rate": 1.4229553606895283e-05, "loss": 0.5781, "step": 8276 }, { "epoch": 0.38, "grad_norm": 0.35412187163161724, "learning_rate": 1.422820528130253e-05, "loss": 0.2652, "step": 8277 }, { "epoch": 0.38, "grad_norm": 0.3469304159858399, "learning_rate": 1.4226856862101386e-05, "loss": 0.2316, "step": 8278 }, { "epoch": 0.38, "grad_norm": 0.40010826622359014, "learning_rate": 1.4225508349321703e-05, "loss": 0.2923, "step": 8279 }, { "epoch": 0.38, "grad_norm": 0.38629007148138467, "learning_rate": 1.422415974299334e-05, "loss": 0.2742, "step": 8280 }, { "epoch": 0.38, "grad_norm": 1.4801700007586074, "learning_rate": 1.4222811043146151e-05, "loss": 0.6828, "step": 8281 }, { "epoch": 0.38, "grad_norm": 0.7985265986567396, "learning_rate": 1.4221462249809995e-05, "loss": 0.3236, "step": 8282 }, { "epoch": 0.38, "grad_norm": 0.3486461059814203, "learning_rate": 1.4220113363014737e-05, "loss": 0.2847, "step": 8283 }, { "epoch": 0.38, "grad_norm": 0.6958512211605081, "learning_rate": 1.4218764382790232e-05, "loss": 0.4726, "step": 8284 }, { "epoch": 0.38, "grad_norm": 0.3062411974786232, "learning_rate": 1.4217415309166351e-05, "loss": 0.2239, "step": 8285 }, { "epoch": 0.38, "grad_norm": 0.3752021628541434, "learning_rate": 1.421606614217296e-05, "loss": 0.212, "step": 8286 }, { "epoch": 0.38, "grad_norm": 0.4256591722831189, "learning_rate": 1.4214716881839927e-05, "loss": 0.3223, "step": 8287 }, { "epoch": 0.38, "grad_norm": 1.286491178139137, "learning_rate": 1.4213367528197127e-05, "loss": 0.8123, "step": 8288 }, { "epoch": 0.38, "grad_norm": 0.3167630392597797, "learning_rate": 1.421201808127443e-05, "loss": 0.1936, "step": 8289 }, { "epoch": 0.38, "grad_norm": 0.6625154944774565, "learning_rate": 1.4210668541101713e-05, "loss": 0.4146, "step": 8290 }, { "epoch": 0.38, "grad_norm": 0.3311778954071521, "learning_rate": 1.4209318907708849e-05, "loss": 0.2565, "step": 8291 }, { "epoch": 0.38, "grad_norm": 0.25608659693305835, "learning_rate": 1.4207969181125724e-05, "loss": 0.1672, "step": 8292 }, { "epoch": 0.38, "grad_norm": 1.042094085495781, "learning_rate": 1.4206619361382217e-05, "loss": 0.5922, "step": 8293 }, { "epoch": 0.38, "grad_norm": 1.1386649402286328, "learning_rate": 1.4205269448508212e-05, "loss": 0.7319, "step": 8294 }, { "epoch": 0.38, "grad_norm": 0.2925751990324721, "learning_rate": 1.4203919442533597e-05, "loss": 0.2352, "step": 8295 }, { "epoch": 0.38, "grad_norm": 0.47186431465199724, "learning_rate": 1.4202569343488252e-05, "loss": 0.3621, "step": 8296 }, { "epoch": 0.38, "grad_norm": 0.28595574622766345, "learning_rate": 1.4201219151402073e-05, "loss": 0.1503, "step": 8297 }, { "epoch": 0.38, "grad_norm": 0.35606121252602985, "learning_rate": 1.4199868866304956e-05, "loss": 0.2787, "step": 8298 }, { "epoch": 0.38, "grad_norm": 0.3502827033922692, "learning_rate": 1.4198518488226784e-05, "loss": 0.2661, "step": 8299 }, { "epoch": 0.38, "grad_norm": 0.8021081853826463, "learning_rate": 1.4197168017197462e-05, "loss": 0.6023, "step": 8300 }, { "epoch": 0.38, "grad_norm": 0.33458848921008755, "learning_rate": 1.4195817453246887e-05, "loss": 0.2604, "step": 8301 }, { "epoch": 0.38, "grad_norm": 0.6828502740801683, "learning_rate": 1.4194466796404958e-05, "loss": 0.3457, "step": 8302 }, { "epoch": 0.38, "grad_norm": 0.3023315998949838, "learning_rate": 1.4193116046701572e-05, "loss": 0.2546, "step": 8303 }, { "epoch": 0.38, "grad_norm": 0.4738507904651978, "learning_rate": 1.4191765204166643e-05, "loss": 0.3487, "step": 8304 }, { "epoch": 0.38, "grad_norm": 0.33281990801805617, "learning_rate": 1.419041426883007e-05, "loss": 0.1806, "step": 8305 }, { "epoch": 0.38, "grad_norm": 0.637455605167356, "learning_rate": 1.4189063240721766e-05, "loss": 0.367, "step": 8306 }, { "epoch": 0.38, "grad_norm": 0.41095119840036776, "learning_rate": 1.418771211987164e-05, "loss": 0.2842, "step": 8307 }, { "epoch": 0.38, "grad_norm": 0.39602692584253674, "learning_rate": 1.41863609063096e-05, "loss": 0.2876, "step": 8308 }, { "epoch": 0.38, "grad_norm": 0.30913304420687365, "learning_rate": 1.4185009600065565e-05, "loss": 0.21, "step": 8309 }, { "epoch": 0.38, "grad_norm": 0.4305256156674565, "learning_rate": 1.418365820116945e-05, "loss": 0.291, "step": 8310 }, { "epoch": 0.38, "grad_norm": 0.3446494696603676, "learning_rate": 1.4182306709651177e-05, "loss": 0.2971, "step": 8311 }, { "epoch": 0.38, "grad_norm": 0.8826954763928107, "learning_rate": 1.4180955125540665e-05, "loss": 0.381, "step": 8312 }, { "epoch": 0.38, "grad_norm": 0.3581151018596223, "learning_rate": 1.4179603448867836e-05, "loss": 0.2849, "step": 8313 }, { "epoch": 0.38, "grad_norm": 0.8359842363230909, "learning_rate": 1.4178251679662614e-05, "loss": 0.566, "step": 8314 }, { "epoch": 0.38, "grad_norm": 0.37843562191127134, "learning_rate": 1.4176899817954928e-05, "loss": 0.2519, "step": 8315 }, { "epoch": 0.38, "grad_norm": 0.30080564416317757, "learning_rate": 1.4175547863774703e-05, "loss": 0.2298, "step": 8316 }, { "epoch": 0.38, "grad_norm": 0.49607200884047337, "learning_rate": 1.4174195817151875e-05, "loss": 0.3165, "step": 8317 }, { "epoch": 0.38, "grad_norm": 0.7897007575682163, "learning_rate": 1.4172843678116375e-05, "loss": 0.3481, "step": 8318 }, { "epoch": 0.38, "grad_norm": 0.3373544346959952, "learning_rate": 1.4171491446698138e-05, "loss": 0.2736, "step": 8319 }, { "epoch": 0.38, "grad_norm": 1.3269886988420891, "learning_rate": 1.4170139122927102e-05, "loss": 0.665, "step": 8320 }, { "epoch": 0.38, "grad_norm": 0.3243934960004902, "learning_rate": 1.4168786706833201e-05, "loss": 0.1458, "step": 8321 }, { "epoch": 0.38, "grad_norm": 0.33387000967091074, "learning_rate": 1.4167434198446385e-05, "loss": 0.1778, "step": 8322 }, { "epoch": 0.38, "grad_norm": 0.43336848222540136, "learning_rate": 1.4166081597796585e-05, "loss": 0.2884, "step": 8323 }, { "epoch": 0.38, "grad_norm": 0.9538955930483495, "learning_rate": 1.4164728904913763e-05, "loss": 0.4561, "step": 8324 }, { "epoch": 0.38, "grad_norm": 0.41910496610832654, "learning_rate": 1.4163376119827852e-05, "loss": 0.1941, "step": 8325 }, { "epoch": 0.38, "grad_norm": 0.5271766647170193, "learning_rate": 1.4162023242568808e-05, "loss": 0.3904, "step": 8326 }, { "epoch": 0.38, "grad_norm": 0.3869840551181553, "learning_rate": 1.4160670273166577e-05, "loss": 0.3283, "step": 8327 }, { "epoch": 0.38, "grad_norm": 0.271840515044541, "learning_rate": 1.4159317211651121e-05, "loss": 0.1418, "step": 8328 }, { "epoch": 0.38, "grad_norm": 0.5354729510812137, "learning_rate": 1.4157964058052386e-05, "loss": 0.2977, "step": 8329 }, { "epoch": 0.38, "grad_norm": 0.9964154731159086, "learning_rate": 1.415661081240034e-05, "loss": 0.4394, "step": 8330 }, { "epoch": 0.38, "grad_norm": 0.3260220703713384, "learning_rate": 1.4155257474724933e-05, "loss": 0.2387, "step": 8331 }, { "epoch": 0.38, "grad_norm": 0.5818842654350443, "learning_rate": 1.4153904045056133e-05, "loss": 0.3696, "step": 8332 }, { "epoch": 0.38, "grad_norm": 1.0454566898057644, "learning_rate": 1.4152550523423899e-05, "loss": 0.6645, "step": 8333 }, { "epoch": 0.38, "grad_norm": 0.25305444840855956, "learning_rate": 1.4151196909858198e-05, "loss": 0.1572, "step": 8334 }, { "epoch": 0.38, "grad_norm": 0.46436510899334427, "learning_rate": 1.4149843204389e-05, "loss": 0.3453, "step": 8335 }, { "epoch": 0.38, "grad_norm": 1.2816850198830303, "learning_rate": 1.4148489407046274e-05, "loss": 0.4429, "step": 8336 }, { "epoch": 0.38, "grad_norm": 0.4543095119203716, "learning_rate": 1.414713551785999e-05, "loss": 0.2968, "step": 8337 }, { "epoch": 0.38, "grad_norm": 0.33814364495431637, "learning_rate": 1.4145781536860121e-05, "loss": 0.157, "step": 8338 }, { "epoch": 0.38, "grad_norm": 0.5369432056711996, "learning_rate": 1.4144427464076646e-05, "loss": 0.3656, "step": 8339 }, { "epoch": 0.38, "grad_norm": 0.6198441021701011, "learning_rate": 1.414307329953954e-05, "loss": 0.3451, "step": 8340 }, { "epoch": 0.38, "grad_norm": 0.6267565658793902, "learning_rate": 1.4141719043278785e-05, "loss": 0.2989, "step": 8341 }, { "epoch": 0.38, "grad_norm": 0.3364937231571777, "learning_rate": 1.4140364695324365e-05, "loss": 0.2504, "step": 8342 }, { "epoch": 0.38, "grad_norm": 0.8974299082522351, "learning_rate": 1.4139010255706259e-05, "loss": 0.5453, "step": 8343 }, { "epoch": 0.38, "grad_norm": 0.2984702575793649, "learning_rate": 1.4137655724454453e-05, "loss": 0.1837, "step": 8344 }, { "epoch": 0.38, "grad_norm": 1.2317971121554436, "learning_rate": 1.4136301101598942e-05, "loss": 0.5802, "step": 8345 }, { "epoch": 0.38, "grad_norm": 0.5169261515583555, "learning_rate": 1.4134946387169705e-05, "loss": 0.3726, "step": 8346 }, { "epoch": 0.38, "grad_norm": 0.37980059124722226, "learning_rate": 1.4133591581196746e-05, "loss": 0.3058, "step": 8347 }, { "epoch": 0.38, "grad_norm": 0.7735793752953474, "learning_rate": 1.413223668371005e-05, "loss": 0.3914, "step": 8348 }, { "epoch": 0.38, "grad_norm": 0.3346244705925037, "learning_rate": 1.4130881694739617e-05, "loss": 0.2256, "step": 8349 }, { "epoch": 0.38, "grad_norm": 0.4122299241979648, "learning_rate": 1.4129526614315448e-05, "loss": 0.2699, "step": 8350 }, { "epoch": 0.38, "grad_norm": 0.4209972973913896, "learning_rate": 1.4128171442467535e-05, "loss": 0.2708, "step": 8351 }, { "epoch": 0.38, "grad_norm": 0.5985139734984927, "learning_rate": 1.4126816179225889e-05, "loss": 0.3478, "step": 8352 }, { "epoch": 0.38, "grad_norm": 0.40030918772224366, "learning_rate": 1.4125460824620509e-05, "loss": 0.3038, "step": 8353 }, { "epoch": 0.38, "grad_norm": 0.387367158986872, "learning_rate": 1.4124105378681403e-05, "loss": 0.2657, "step": 8354 }, { "epoch": 0.38, "grad_norm": 0.4387209029325645, "learning_rate": 1.4122749841438576e-05, "loss": 0.3061, "step": 8355 }, { "epoch": 0.38, "grad_norm": 0.4044172792348891, "learning_rate": 1.4121394212922043e-05, "loss": 0.2619, "step": 8356 }, { "epoch": 0.38, "grad_norm": 0.5365427164354623, "learning_rate": 1.4120038493161814e-05, "loss": 0.2597, "step": 8357 }, { "epoch": 0.38, "grad_norm": 0.3957579301613287, "learning_rate": 1.4118682682187903e-05, "loss": 0.2931, "step": 8358 }, { "epoch": 0.38, "grad_norm": 0.3717424904229399, "learning_rate": 1.411732678003033e-05, "loss": 0.3024, "step": 8359 }, { "epoch": 0.38, "grad_norm": 1.3263915218207296, "learning_rate": 1.4115970786719109e-05, "loss": 0.8239, "step": 8360 }, { "epoch": 0.38, "grad_norm": 0.2742231609470218, "learning_rate": 1.411461470228426e-05, "loss": 0.1239, "step": 8361 }, { "epoch": 0.38, "grad_norm": 0.31242284483508165, "learning_rate": 1.4113258526755808e-05, "loss": 0.2428, "step": 8362 }, { "epoch": 0.38, "grad_norm": 0.4240100975667483, "learning_rate": 1.4111902260163776e-05, "loss": 0.3457, "step": 8363 }, { "epoch": 0.38, "grad_norm": 0.5959482051496351, "learning_rate": 1.4110545902538193e-05, "loss": 0.2535, "step": 8364 }, { "epoch": 0.38, "grad_norm": 0.4177513987410519, "learning_rate": 1.4109189453909085e-05, "loss": 0.3469, "step": 8365 }, { "epoch": 0.38, "grad_norm": 0.5453172208280188, "learning_rate": 1.4107832914306482e-05, "loss": 0.4051, "step": 8366 }, { "epoch": 0.38, "grad_norm": 0.3586084145948304, "learning_rate": 1.410647628376042e-05, "loss": 0.2226, "step": 8367 }, { "epoch": 0.38, "grad_norm": 0.2418591975043847, "learning_rate": 1.4105119562300928e-05, "loss": 0.1918, "step": 8368 }, { "epoch": 0.38, "grad_norm": 0.6902292244709147, "learning_rate": 1.4103762749958048e-05, "loss": 0.455, "step": 8369 }, { "epoch": 0.38, "grad_norm": 0.3351449713623426, "learning_rate": 1.4102405846761813e-05, "loss": 0.2478, "step": 8370 }, { "epoch": 0.38, "grad_norm": 0.49323792866826704, "learning_rate": 1.410104885274227e-05, "loss": 0.3139, "step": 8371 }, { "epoch": 0.38, "grad_norm": 1.0110069175331307, "learning_rate": 1.4099691767929455e-05, "loss": 0.6657, "step": 8372 }, { "epoch": 0.38, "grad_norm": 0.38350253352465036, "learning_rate": 1.4098334592353418e-05, "loss": 0.2799, "step": 8373 }, { "epoch": 0.38, "grad_norm": 0.3809796130037751, "learning_rate": 1.4096977326044199e-05, "loss": 0.2574, "step": 8374 }, { "epoch": 0.38, "grad_norm": 0.38894388978141226, "learning_rate": 1.4095619969031854e-05, "loss": 0.3504, "step": 8375 }, { "epoch": 0.38, "grad_norm": 0.35202368400193174, "learning_rate": 1.4094262521346429e-05, "loss": 0.2049, "step": 8376 }, { "epoch": 0.38, "grad_norm": 0.44750788603893854, "learning_rate": 1.4092904983017981e-05, "loss": 0.2484, "step": 8377 }, { "epoch": 0.38, "grad_norm": 0.3739361510890599, "learning_rate": 1.4091547354076558e-05, "loss": 0.3233, "step": 8378 }, { "epoch": 0.38, "grad_norm": 1.2724970426941196, "learning_rate": 1.4090189634552221e-05, "loss": 0.7023, "step": 8379 }, { "epoch": 0.38, "grad_norm": 0.3268668473401568, "learning_rate": 1.408883182447503e-05, "loss": 0.2206, "step": 8380 }, { "epoch": 0.39, "grad_norm": 0.3779306796608664, "learning_rate": 1.4087473923875039e-05, "loss": 0.2583, "step": 8381 }, { "epoch": 0.39, "grad_norm": 0.395837958980147, "learning_rate": 1.4086115932782316e-05, "loss": 0.2936, "step": 8382 }, { "epoch": 0.39, "grad_norm": 0.3989767050434608, "learning_rate": 1.4084757851226926e-05, "loss": 0.2023, "step": 8383 }, { "epoch": 0.39, "grad_norm": 1.2778752243504499, "learning_rate": 1.4083399679238936e-05, "loss": 0.823, "step": 8384 }, { "epoch": 0.39, "grad_norm": 0.6473078055819477, "learning_rate": 1.4082041416848409e-05, "loss": 0.456, "step": 8385 }, { "epoch": 0.39, "grad_norm": 0.32157155140672566, "learning_rate": 1.408068306408542e-05, "loss": 0.3073, "step": 8386 }, { "epoch": 0.39, "grad_norm": 0.42496594944160104, "learning_rate": 1.4079324620980042e-05, "loss": 0.2947, "step": 8387 }, { "epoch": 0.39, "grad_norm": 0.25490104677522024, "learning_rate": 1.4077966087562348e-05, "loss": 0.1624, "step": 8388 }, { "epoch": 0.39, "grad_norm": 0.41096192040201357, "learning_rate": 1.4076607463862417e-05, "loss": 0.2942, "step": 8389 }, { "epoch": 0.39, "grad_norm": 0.3830051081627851, "learning_rate": 1.4075248749910323e-05, "loss": 0.2854, "step": 8390 }, { "epoch": 0.39, "grad_norm": 0.5900140540608588, "learning_rate": 1.407388994573615e-05, "loss": 0.3997, "step": 8391 }, { "epoch": 0.39, "grad_norm": 0.4188817398600617, "learning_rate": 1.4072531051369983e-05, "loss": 0.3371, "step": 8392 }, { "epoch": 0.39, "grad_norm": 0.3733857060062306, "learning_rate": 1.4071172066841898e-05, "loss": 0.2264, "step": 8393 }, { "epoch": 0.39, "grad_norm": 0.2716939649546834, "learning_rate": 1.4069812992181992e-05, "loss": 0.2271, "step": 8394 }, { "epoch": 0.39, "grad_norm": 0.39234828651491327, "learning_rate": 1.4068453827420352e-05, "loss": 0.2745, "step": 8395 }, { "epoch": 0.39, "grad_norm": 0.7365202622938131, "learning_rate": 1.4067094572587059e-05, "loss": 0.4841, "step": 8396 }, { "epoch": 0.39, "grad_norm": 0.7547279656587319, "learning_rate": 1.4065735227712213e-05, "loss": 0.4704, "step": 8397 }, { "epoch": 0.39, "grad_norm": 0.32114775016531544, "learning_rate": 1.4064375792825909e-05, "loss": 0.2829, "step": 8398 }, { "epoch": 0.39, "grad_norm": 0.46779251729312765, "learning_rate": 1.4063016267958242e-05, "loss": 0.3586, "step": 8399 }, { "epoch": 0.39, "grad_norm": 0.25185112691888867, "learning_rate": 1.4061656653139312e-05, "loss": 0.0957, "step": 8400 }, { "epoch": 0.39, "grad_norm": 0.38763830760633167, "learning_rate": 1.4060296948399219e-05, "loss": 0.297, "step": 8401 }, { "epoch": 0.39, "grad_norm": 0.38427400808556933, "learning_rate": 1.4058937153768062e-05, "loss": 0.3255, "step": 8402 }, { "epoch": 0.39, "grad_norm": 0.817437157993242, "learning_rate": 1.4057577269275951e-05, "loss": 0.3598, "step": 8403 }, { "epoch": 0.39, "grad_norm": 0.357050636439786, "learning_rate": 1.4056217294952985e-05, "loss": 0.2792, "step": 8404 }, { "epoch": 0.39, "grad_norm": 1.079065450734748, "learning_rate": 1.4054857230829284e-05, "loss": 0.6757, "step": 8405 }, { "epoch": 0.39, "grad_norm": 0.2423752338683816, "learning_rate": 1.4053497076934948e-05, "loss": 0.1776, "step": 8406 }, { "epoch": 0.39, "grad_norm": 0.39856406086672413, "learning_rate": 1.4052136833300096e-05, "loss": 0.2933, "step": 8407 }, { "epoch": 0.39, "grad_norm": 0.9826072317811388, "learning_rate": 1.4050776499954836e-05, "loss": 0.5867, "step": 8408 }, { "epoch": 0.39, "grad_norm": 0.5999054965175215, "learning_rate": 1.4049416076929294e-05, "loss": 0.3108, "step": 8409 }, { "epoch": 0.39, "grad_norm": 0.41138789647026763, "learning_rate": 1.404805556425358e-05, "loss": 0.31, "step": 8410 }, { "epoch": 0.39, "grad_norm": 0.48498338479675973, "learning_rate": 1.4046694961957816e-05, "loss": 0.3664, "step": 8411 }, { "epoch": 0.39, "grad_norm": 0.28553114364080506, "learning_rate": 1.4045334270072129e-05, "loss": 0.1605, "step": 8412 }, { "epoch": 0.39, "grad_norm": 0.3395833842939998, "learning_rate": 1.4043973488626638e-05, "loss": 0.2023, "step": 8413 }, { "epoch": 0.39, "grad_norm": 0.470706823202285, "learning_rate": 1.4042612617651475e-05, "loss": 0.3458, "step": 8414 }, { "epoch": 0.39, "grad_norm": 1.024884683962352, "learning_rate": 1.404125165717676e-05, "loss": 0.471, "step": 8415 }, { "epoch": 0.39, "grad_norm": 0.3463040562914271, "learning_rate": 1.4039890607232631e-05, "loss": 0.2106, "step": 8416 }, { "epoch": 0.39, "grad_norm": 0.8753635101713028, "learning_rate": 1.4038529467849216e-05, "loss": 0.5764, "step": 8417 }, { "epoch": 0.39, "grad_norm": 0.40906770315930097, "learning_rate": 1.4037168239056653e-05, "loss": 0.3254, "step": 8418 }, { "epoch": 0.39, "grad_norm": 0.2648254490613112, "learning_rate": 1.4035806920885076e-05, "loss": 0.1844, "step": 8419 }, { "epoch": 0.39, "grad_norm": 0.9510440019615164, "learning_rate": 1.4034445513364623e-05, "loss": 0.4473, "step": 8420 }, { "epoch": 0.39, "grad_norm": 0.5837046686067077, "learning_rate": 1.4033084016525434e-05, "loss": 0.38, "step": 8421 }, { "epoch": 0.39, "grad_norm": 0.29198736947130144, "learning_rate": 1.4031722430397654e-05, "loss": 0.1977, "step": 8422 }, { "epoch": 0.39, "grad_norm": 1.091303170510882, "learning_rate": 1.4030360755011423e-05, "loss": 0.6062, "step": 8423 }, { "epoch": 0.39, "grad_norm": 0.41463392639862046, "learning_rate": 1.4028998990396892e-05, "loss": 0.2174, "step": 8424 }, { "epoch": 0.39, "grad_norm": 0.3820207331337777, "learning_rate": 1.4027637136584204e-05, "loss": 0.2732, "step": 8425 }, { "epoch": 0.39, "grad_norm": 0.39145817235672925, "learning_rate": 1.4026275193603512e-05, "loss": 0.284, "step": 8426 }, { "epoch": 0.39, "grad_norm": 0.4699027382928074, "learning_rate": 1.402491316148497e-05, "loss": 0.3157, "step": 8427 }, { "epoch": 0.39, "grad_norm": 0.44876850117075945, "learning_rate": 1.4023551040258726e-05, "loss": 0.2683, "step": 8428 }, { "epoch": 0.39, "grad_norm": 0.5694311260236654, "learning_rate": 1.4022188829954942e-05, "loss": 0.2653, "step": 8429 }, { "epoch": 0.39, "grad_norm": 0.4293590810229933, "learning_rate": 1.4020826530603775e-05, "loss": 0.3149, "step": 8430 }, { "epoch": 0.39, "grad_norm": 0.41571343746067607, "learning_rate": 1.4019464142235384e-05, "loss": 0.2928, "step": 8431 }, { "epoch": 0.39, "grad_norm": 0.544280640505497, "learning_rate": 1.4018101664879928e-05, "loss": 0.3351, "step": 8432 }, { "epoch": 0.39, "grad_norm": 0.5099393882434021, "learning_rate": 1.4016739098567578e-05, "loss": 0.2912, "step": 8433 }, { "epoch": 0.39, "grad_norm": 0.3061957128159386, "learning_rate": 1.4015376443328492e-05, "loss": 0.2655, "step": 8434 }, { "epoch": 0.39, "grad_norm": 0.3032689568287926, "learning_rate": 1.4014013699192844e-05, "loss": 0.1833, "step": 8435 }, { "epoch": 0.39, "grad_norm": 0.8849504701893046, "learning_rate": 1.40126508661908e-05, "loss": 0.5205, "step": 8436 }, { "epoch": 0.39, "grad_norm": 0.39717313317905134, "learning_rate": 1.4011287944352534e-05, "loss": 0.3223, "step": 8437 }, { "epoch": 0.39, "grad_norm": 0.33860011089813125, "learning_rate": 1.4009924933708216e-05, "loss": 0.3001, "step": 8438 }, { "epoch": 0.39, "grad_norm": 0.3014424498564491, "learning_rate": 1.400856183428803e-05, "loss": 0.1792, "step": 8439 }, { "epoch": 0.39, "grad_norm": 0.3488479813098447, "learning_rate": 1.4007198646122146e-05, "loss": 0.2338, "step": 8440 }, { "epoch": 0.39, "grad_norm": 1.0454745472972045, "learning_rate": 1.4005835369240748e-05, "loss": 0.6543, "step": 8441 }, { "epoch": 0.39, "grad_norm": 0.3697541376870566, "learning_rate": 1.4004472003674015e-05, "loss": 0.3042, "step": 8442 }, { "epoch": 0.39, "grad_norm": 0.3816200180279807, "learning_rate": 1.400310854945213e-05, "loss": 0.3064, "step": 8443 }, { "epoch": 0.39, "grad_norm": 0.9658734496525833, "learning_rate": 1.4001745006605281e-05, "loss": 0.494, "step": 8444 }, { "epoch": 0.39, "grad_norm": 0.37319705630787964, "learning_rate": 1.4000381375163652e-05, "loss": 0.2762, "step": 8445 }, { "epoch": 0.39, "grad_norm": 0.39397362636972316, "learning_rate": 1.3999017655157436e-05, "loss": 0.2403, "step": 8446 }, { "epoch": 0.39, "grad_norm": 0.3135257580097957, "learning_rate": 1.3997653846616825e-05, "loss": 0.2279, "step": 8447 }, { "epoch": 0.39, "grad_norm": 0.6525687511957984, "learning_rate": 1.399628994957201e-05, "loss": 0.3726, "step": 8448 }, { "epoch": 0.39, "grad_norm": 0.4407266394218482, "learning_rate": 1.3994925964053187e-05, "loss": 0.3021, "step": 8449 }, { "epoch": 0.39, "grad_norm": 0.36298166467163834, "learning_rate": 1.3993561890090555e-05, "loss": 0.2882, "step": 8450 }, { "epoch": 0.39, "grad_norm": 0.9384397449173023, "learning_rate": 1.399219772771431e-05, "loss": 0.602, "step": 8451 }, { "epoch": 0.39, "grad_norm": 0.25441765582543197, "learning_rate": 1.3990833476954654e-05, "loss": 0.1476, "step": 8452 }, { "epoch": 0.39, "grad_norm": 0.30130512182544705, "learning_rate": 1.3989469137841796e-05, "loss": 0.2638, "step": 8453 }, { "epoch": 0.39, "grad_norm": 0.5048584270870148, "learning_rate": 1.3988104710405935e-05, "loss": 0.346, "step": 8454 }, { "epoch": 0.39, "grad_norm": 0.36874212917559185, "learning_rate": 1.3986740194677276e-05, "loss": 0.2467, "step": 8455 }, { "epoch": 0.39, "grad_norm": 0.5650466546811083, "learning_rate": 1.3985375590686032e-05, "loss": 0.411, "step": 8456 }, { "epoch": 0.39, "grad_norm": 0.5477315217213206, "learning_rate": 1.3984010898462417e-05, "loss": 0.378, "step": 8457 }, { "epoch": 0.39, "grad_norm": 0.24891331773560105, "learning_rate": 1.3982646118036636e-05, "loss": 0.1791, "step": 8458 }, { "epoch": 0.39, "grad_norm": 0.34171174029909035, "learning_rate": 1.3981281249438912e-05, "loss": 0.256, "step": 8459 }, { "epoch": 0.39, "grad_norm": 0.6123843231220961, "learning_rate": 1.3979916292699459e-05, "loss": 0.4523, "step": 8460 }, { "epoch": 0.39, "grad_norm": 0.35757927094487424, "learning_rate": 1.3978551247848493e-05, "loss": 0.2186, "step": 8461 }, { "epoch": 0.39, "grad_norm": 0.4092818692279441, "learning_rate": 1.3977186114916237e-05, "loss": 0.3525, "step": 8462 }, { "epoch": 0.39, "grad_norm": 1.0717378054467916, "learning_rate": 1.3975820893932915e-05, "loss": 0.6972, "step": 8463 }, { "epoch": 0.39, "grad_norm": 0.4293976133490958, "learning_rate": 1.3974455584928748e-05, "loss": 0.2555, "step": 8464 }, { "epoch": 0.39, "grad_norm": 0.2643967117599458, "learning_rate": 1.397309018793397e-05, "loss": 0.2137, "step": 8465 }, { "epoch": 0.39, "grad_norm": 0.45123197854750835, "learning_rate": 1.3971724702978797e-05, "loss": 0.3619, "step": 8466 }, { "epoch": 0.39, "grad_norm": 0.9838178837311349, "learning_rate": 1.3970359130093472e-05, "loss": 0.556, "step": 8467 }, { "epoch": 0.39, "grad_norm": 0.3711195720528197, "learning_rate": 1.396899346930822e-05, "loss": 0.252, "step": 8468 }, { "epoch": 0.39, "grad_norm": 0.4184727112273616, "learning_rate": 1.396762772065328e-05, "loss": 0.3446, "step": 8469 }, { "epoch": 0.39, "grad_norm": 0.6404714497359931, "learning_rate": 1.3966261884158886e-05, "loss": 0.3513, "step": 8470 }, { "epoch": 0.39, "grad_norm": 0.2883200748739, "learning_rate": 1.396489595985528e-05, "loss": 0.24, "step": 8471 }, { "epoch": 0.39, "grad_norm": 0.4235786280547083, "learning_rate": 1.3963529947772694e-05, "loss": 0.2843, "step": 8472 }, { "epoch": 0.39, "grad_norm": 0.3946153849328818, "learning_rate": 1.3962163847941377e-05, "loss": 0.2833, "step": 8473 }, { "epoch": 0.39, "grad_norm": 0.3555982280832828, "learning_rate": 1.396079766039157e-05, "loss": 0.2972, "step": 8474 }, { "epoch": 0.39, "grad_norm": 0.9023830755959277, "learning_rate": 1.3959431385153518e-05, "loss": 0.4539, "step": 8475 }, { "epoch": 0.39, "grad_norm": 0.4427341069784527, "learning_rate": 1.3958065022257477e-05, "loss": 0.3485, "step": 8476 }, { "epoch": 0.39, "grad_norm": 0.44892885144620304, "learning_rate": 1.395669857173369e-05, "loss": 0.3297, "step": 8477 }, { "epoch": 0.39, "grad_norm": 0.271523710271832, "learning_rate": 1.3955332033612408e-05, "loss": 0.1984, "step": 8478 }, { "epoch": 0.39, "grad_norm": 0.664420495817769, "learning_rate": 1.3953965407923886e-05, "loss": 0.3441, "step": 8479 }, { "epoch": 0.39, "grad_norm": 0.44802949150614396, "learning_rate": 1.3952598694698383e-05, "loss": 0.3437, "step": 8480 }, { "epoch": 0.39, "grad_norm": 0.3887416718120399, "learning_rate": 1.3951231893966153e-05, "loss": 0.2813, "step": 8481 }, { "epoch": 0.39, "grad_norm": 0.762944238041198, "learning_rate": 1.3949865005757457e-05, "loss": 0.4188, "step": 8482 }, { "epoch": 0.39, "grad_norm": 0.41592069846814067, "learning_rate": 1.394849803010256e-05, "loss": 0.345, "step": 8483 }, { "epoch": 0.39, "grad_norm": 0.2950019950437227, "learning_rate": 1.394713096703172e-05, "loss": 0.1558, "step": 8484 }, { "epoch": 0.39, "grad_norm": 0.38871049944651764, "learning_rate": 1.3945763816575201e-05, "loss": 0.2958, "step": 8485 }, { "epoch": 0.39, "grad_norm": 0.3876362689948571, "learning_rate": 1.394439657876328e-05, "loss": 0.2703, "step": 8486 }, { "epoch": 0.39, "grad_norm": 0.847185267272819, "learning_rate": 1.394302925362622e-05, "loss": 0.5585, "step": 8487 }, { "epoch": 0.39, "grad_norm": 0.550811164903522, "learning_rate": 1.3941661841194289e-05, "loss": 0.308, "step": 8488 }, { "epoch": 0.39, "grad_norm": 0.305925136493401, "learning_rate": 1.3940294341497766e-05, "loss": 0.2999, "step": 8489 }, { "epoch": 0.39, "grad_norm": 0.5358463537813835, "learning_rate": 1.3938926754566922e-05, "loss": 0.3734, "step": 8490 }, { "epoch": 0.39, "grad_norm": 0.2133394742991905, "learning_rate": 1.393755908043204e-05, "loss": 0.1076, "step": 8491 }, { "epoch": 0.39, "grad_norm": 0.45524234141582276, "learning_rate": 1.393619131912339e-05, "loss": 0.3638, "step": 8492 }, { "epoch": 0.39, "grad_norm": 0.41980553631650835, "learning_rate": 1.393482347067126e-05, "loss": 0.3377, "step": 8493 }, { "epoch": 0.39, "grad_norm": 0.8103515758561848, "learning_rate": 1.3933455535105932e-05, "loss": 0.3086, "step": 8494 }, { "epoch": 0.39, "grad_norm": 0.42955318250226254, "learning_rate": 1.393208751245769e-05, "loss": 0.3205, "step": 8495 }, { "epoch": 0.39, "grad_norm": 0.5213164709636929, "learning_rate": 1.3930719402756818e-05, "loss": 0.3206, "step": 8496 }, { "epoch": 0.39, "grad_norm": 0.22754755576489064, "learning_rate": 1.3929351206033607e-05, "loss": 0.1973, "step": 8497 }, { "epoch": 0.39, "grad_norm": 0.40256385885824786, "learning_rate": 1.3927982922318349e-05, "loss": 0.299, "step": 8498 }, { "epoch": 0.39, "grad_norm": 0.7297263084128381, "learning_rate": 1.3926614551641335e-05, "loss": 0.4933, "step": 8499 }, { "epoch": 0.39, "grad_norm": 0.7301142126172254, "learning_rate": 1.392524609403286e-05, "loss": 0.4733, "step": 8500 }, { "epoch": 0.39, "grad_norm": 0.3265960954692485, "learning_rate": 1.3923877549523219e-05, "loss": 0.2285, "step": 8501 }, { "epoch": 0.39, "grad_norm": 0.3469803655585575, "learning_rate": 1.392250891814271e-05, "loss": 0.2375, "step": 8502 }, { "epoch": 0.39, "grad_norm": 0.4321739886589292, "learning_rate": 1.3921140199921635e-05, "loss": 0.2752, "step": 8503 }, { "epoch": 0.39, "grad_norm": 0.3460432679318932, "learning_rate": 1.3919771394890293e-05, "loss": 0.223, "step": 8504 }, { "epoch": 0.39, "grad_norm": 0.4348710954226238, "learning_rate": 1.391840250307899e-05, "loss": 0.3391, "step": 8505 }, { "epoch": 0.39, "grad_norm": 0.8265907255909595, "learning_rate": 1.3917033524518035e-05, "loss": 0.5123, "step": 8506 }, { "epoch": 0.39, "grad_norm": 0.3489211464382827, "learning_rate": 1.3915664459237735e-05, "loss": 0.2117, "step": 8507 }, { "epoch": 0.39, "grad_norm": 0.9873339184232549, "learning_rate": 1.3914295307268396e-05, "loss": 0.5917, "step": 8508 }, { "epoch": 0.39, "grad_norm": 0.25125081952338174, "learning_rate": 1.3912926068640326e-05, "loss": 0.2104, "step": 8509 }, { "epoch": 0.39, "grad_norm": 0.392106905491276, "learning_rate": 1.3911556743383852e-05, "loss": 0.2346, "step": 8510 }, { "epoch": 0.39, "grad_norm": 0.8862667865136324, "learning_rate": 1.3910187331529277e-05, "loss": 0.4758, "step": 8511 }, { "epoch": 0.39, "grad_norm": 0.8121733769609694, "learning_rate": 1.3908817833106927e-05, "loss": 0.4823, "step": 8512 }, { "epoch": 0.39, "grad_norm": 0.33753175505956434, "learning_rate": 1.3907448248147112e-05, "loss": 0.2917, "step": 8513 }, { "epoch": 0.39, "grad_norm": 0.4311536816934009, "learning_rate": 1.3906078576680165e-05, "loss": 0.2489, "step": 8514 }, { "epoch": 0.39, "grad_norm": 0.4118459922090234, "learning_rate": 1.3904708818736397e-05, "loss": 0.298, "step": 8515 }, { "epoch": 0.39, "grad_norm": 0.40259324114076384, "learning_rate": 1.3903338974346143e-05, "loss": 0.3133, "step": 8516 }, { "epoch": 0.39, "grad_norm": 0.5330180654191928, "learning_rate": 1.3901969043539727e-05, "loss": 0.3049, "step": 8517 }, { "epoch": 0.39, "grad_norm": 0.4918108665273485, "learning_rate": 1.3900599026347474e-05, "loss": 0.3012, "step": 8518 }, { "epoch": 0.39, "grad_norm": 0.37210821466478, "learning_rate": 1.3899228922799721e-05, "loss": 0.2698, "step": 8519 }, { "epoch": 0.39, "grad_norm": 0.4926874018782073, "learning_rate": 1.3897858732926794e-05, "loss": 0.2857, "step": 8520 }, { "epoch": 0.39, "grad_norm": 0.39544578983252554, "learning_rate": 1.3896488456759034e-05, "loss": 0.2839, "step": 8521 }, { "epoch": 0.39, "grad_norm": 0.3521972793437678, "learning_rate": 1.3895118094326776e-05, "loss": 0.2801, "step": 8522 }, { "epoch": 0.39, "grad_norm": 0.5917044218146319, "learning_rate": 1.3893747645660357e-05, "loss": 0.3738, "step": 8523 }, { "epoch": 0.39, "grad_norm": 0.33872910584403115, "learning_rate": 1.3892377110790117e-05, "loss": 0.24, "step": 8524 }, { "epoch": 0.39, "grad_norm": 0.29950024922731255, "learning_rate": 1.38910064897464e-05, "loss": 0.2535, "step": 8525 }, { "epoch": 0.39, "grad_norm": 1.3657862338424902, "learning_rate": 1.3889635782559548e-05, "loss": 0.7538, "step": 8526 }, { "epoch": 0.39, "grad_norm": 0.6790184248671534, "learning_rate": 1.388826498925991e-05, "loss": 0.354, "step": 8527 }, { "epoch": 0.39, "grad_norm": 0.36538731649848455, "learning_rate": 1.3886894109877832e-05, "loss": 0.2871, "step": 8528 }, { "epoch": 0.39, "grad_norm": 0.38833651903061167, "learning_rate": 1.3885523144443665e-05, "loss": 0.3105, "step": 8529 }, { "epoch": 0.39, "grad_norm": 0.25278595991068525, "learning_rate": 1.3884152092987762e-05, "loss": 0.1321, "step": 8530 }, { "epoch": 0.39, "grad_norm": 0.40154922520378683, "learning_rate": 1.3882780955540474e-05, "loss": 0.255, "step": 8531 }, { "epoch": 0.39, "grad_norm": 0.5363540839521053, "learning_rate": 1.3881409732132159e-05, "loss": 0.3501, "step": 8532 }, { "epoch": 0.39, "grad_norm": 0.3891319896615226, "learning_rate": 1.388003842279317e-05, "loss": 0.2721, "step": 8533 }, { "epoch": 0.39, "grad_norm": 0.3924241639750246, "learning_rate": 1.3878667027553871e-05, "loss": 0.3081, "step": 8534 }, { "epoch": 0.39, "grad_norm": 0.9071713896189254, "learning_rate": 1.3877295546444627e-05, "loss": 0.507, "step": 8535 }, { "epoch": 0.39, "grad_norm": 0.2350539384135555, "learning_rate": 1.3875923979495793e-05, "loss": 0.1817, "step": 8536 }, { "epoch": 0.39, "grad_norm": 0.3756452380790013, "learning_rate": 1.3874552326737736e-05, "loss": 0.2782, "step": 8537 }, { "epoch": 0.39, "grad_norm": 0.7953992082280263, "learning_rate": 1.3873180588200827e-05, "loss": 0.5154, "step": 8538 }, { "epoch": 0.39, "grad_norm": 0.8467157834770388, "learning_rate": 1.3871808763915434e-05, "loss": 0.4623, "step": 8539 }, { "epoch": 0.39, "grad_norm": 0.3635755082421646, "learning_rate": 1.3870436853911924e-05, "loss": 0.2254, "step": 8540 }, { "epoch": 0.39, "grad_norm": 0.39805891412900407, "learning_rate": 1.3869064858220673e-05, "loss": 0.3426, "step": 8541 }, { "epoch": 0.39, "grad_norm": 0.4985862714740669, "learning_rate": 1.3867692776872057e-05, "loss": 0.3133, "step": 8542 }, { "epoch": 0.39, "grad_norm": 0.2543250971402099, "learning_rate": 1.3866320609896449e-05, "loss": 0.1796, "step": 8543 }, { "epoch": 0.39, "grad_norm": 0.8933081037109439, "learning_rate": 1.3864948357324228e-05, "loss": 0.5262, "step": 8544 }, { "epoch": 0.39, "grad_norm": 0.3785326900899198, "learning_rate": 1.3863576019185776e-05, "loss": 0.3467, "step": 8545 }, { "epoch": 0.39, "grad_norm": 0.34074301743836977, "learning_rate": 1.3862203595511476e-05, "loss": 0.2149, "step": 8546 }, { "epoch": 0.39, "grad_norm": 1.4116896385471303, "learning_rate": 1.3860831086331711e-05, "loss": 0.8098, "step": 8547 }, { "epoch": 0.39, "grad_norm": 0.3725347958256613, "learning_rate": 1.3859458491676868e-05, "loss": 0.3533, "step": 8548 }, { "epoch": 0.39, "grad_norm": 0.24237380130949845, "learning_rate": 1.3858085811577333e-05, "loss": 0.136, "step": 8549 }, { "epoch": 0.39, "grad_norm": 0.4108720602231526, "learning_rate": 1.3856713046063494e-05, "loss": 0.2878, "step": 8550 }, { "epoch": 0.39, "grad_norm": 0.7534602379688196, "learning_rate": 1.3855340195165748e-05, "loss": 0.4351, "step": 8551 }, { "epoch": 0.39, "grad_norm": 0.7357473279493222, "learning_rate": 1.3853967258914483e-05, "loss": 0.4393, "step": 8552 }, { "epoch": 0.39, "grad_norm": 0.3476098133818706, "learning_rate": 1.3852594237340103e-05, "loss": 0.2495, "step": 8553 }, { "epoch": 0.39, "grad_norm": 0.3298992903722264, "learning_rate": 1.3851221130472994e-05, "loss": 0.2206, "step": 8554 }, { "epoch": 0.39, "grad_norm": 0.47718717855671533, "learning_rate": 1.3849847938343564e-05, "loss": 0.2811, "step": 8555 }, { "epoch": 0.39, "grad_norm": 0.40029906194730525, "learning_rate": 1.3848474660982208e-05, "loss": 0.3039, "step": 8556 }, { "epoch": 0.39, "grad_norm": 0.4841693401790011, "learning_rate": 1.3847101298419334e-05, "loss": 0.3788, "step": 8557 }, { "epoch": 0.39, "grad_norm": 0.4468751660134071, "learning_rate": 1.3845727850685347e-05, "loss": 0.2963, "step": 8558 }, { "epoch": 0.39, "grad_norm": 0.45348599711579485, "learning_rate": 1.3844354317810651e-05, "loss": 0.3194, "step": 8559 }, { "epoch": 0.39, "grad_norm": 0.3666102181793443, "learning_rate": 1.3842980699825655e-05, "loss": 0.3151, "step": 8560 }, { "epoch": 0.39, "grad_norm": 0.3558561702445882, "learning_rate": 1.384160699676077e-05, "loss": 0.2033, "step": 8561 }, { "epoch": 0.39, "grad_norm": 0.4306755303296113, "learning_rate": 1.384023320864641e-05, "loss": 0.2978, "step": 8562 }, { "epoch": 0.39, "grad_norm": 0.42462646802794346, "learning_rate": 1.3838859335512986e-05, "loss": 0.2937, "step": 8563 }, { "epoch": 0.39, "grad_norm": 0.3206359744488347, "learning_rate": 1.3837485377390918e-05, "loss": 0.2668, "step": 8564 }, { "epoch": 0.39, "grad_norm": 0.4073148246289972, "learning_rate": 1.3836111334310622e-05, "loss": 0.3229, "step": 8565 }, { "epoch": 0.39, "grad_norm": 0.2926870897542749, "learning_rate": 1.3834737206302519e-05, "loss": 0.1226, "step": 8566 }, { "epoch": 0.39, "grad_norm": 0.392373176057603, "learning_rate": 1.3833362993397028e-05, "loss": 0.3063, "step": 8567 }, { "epoch": 0.39, "grad_norm": 0.46194554178905367, "learning_rate": 1.3831988695624576e-05, "loss": 0.352, "step": 8568 }, { "epoch": 0.39, "grad_norm": 0.27144045868036837, "learning_rate": 1.3830614313015587e-05, "loss": 0.2196, "step": 8569 }, { "epoch": 0.39, "grad_norm": 0.6702440454370174, "learning_rate": 1.382923984560049e-05, "loss": 0.3708, "step": 8570 }, { "epoch": 0.39, "grad_norm": 0.43832872911757315, "learning_rate": 1.3827865293409715e-05, "loss": 0.3446, "step": 8571 }, { "epoch": 0.39, "grad_norm": 0.3147556300917032, "learning_rate": 1.3826490656473692e-05, "loss": 0.2626, "step": 8572 }, { "epoch": 0.39, "grad_norm": 0.5029217273093667, "learning_rate": 1.382511593482285e-05, "loss": 0.3283, "step": 8573 }, { "epoch": 0.39, "grad_norm": 0.48749862615751444, "learning_rate": 1.382374112848763e-05, "loss": 0.342, "step": 8574 }, { "epoch": 0.39, "grad_norm": 0.21966296374757854, "learning_rate": 1.3822366237498466e-05, "loss": 0.098, "step": 8575 }, { "epoch": 0.39, "grad_norm": 0.32125786250437016, "learning_rate": 1.3820991261885798e-05, "loss": 0.2688, "step": 8576 }, { "epoch": 0.39, "grad_norm": 0.3841107663733378, "learning_rate": 1.381961620168007e-05, "loss": 0.3168, "step": 8577 }, { "epoch": 0.39, "grad_norm": 0.9182309082448304, "learning_rate": 1.3818241056911715e-05, "loss": 0.5026, "step": 8578 }, { "epoch": 0.39, "grad_norm": 0.41796742487398103, "learning_rate": 1.3816865827611187e-05, "loss": 0.2587, "step": 8579 }, { "epoch": 0.39, "grad_norm": 0.38813807138056006, "learning_rate": 1.3815490513808925e-05, "loss": 0.328, "step": 8580 }, { "epoch": 0.39, "grad_norm": 0.2744726806605093, "learning_rate": 1.3814115115535382e-05, "loss": 0.2007, "step": 8581 }, { "epoch": 0.39, "grad_norm": 0.3476757208034699, "learning_rate": 1.3812739632821006e-05, "loss": 0.2098, "step": 8582 }, { "epoch": 0.39, "grad_norm": 0.5380871886146342, "learning_rate": 1.3811364065696251e-05, "loss": 0.3998, "step": 8583 }, { "epoch": 0.39, "grad_norm": 0.3314041636451239, "learning_rate": 1.3809988414191566e-05, "loss": 0.3139, "step": 8584 }, { "epoch": 0.39, "grad_norm": 0.6697829072305175, "learning_rate": 1.3808612678337415e-05, "loss": 0.2801, "step": 8585 }, { "epoch": 0.39, "grad_norm": 0.3923519072713705, "learning_rate": 1.3807236858164243e-05, "loss": 0.3145, "step": 8586 }, { "epoch": 0.39, "grad_norm": 0.35885279339079024, "learning_rate": 1.3805860953702522e-05, "loss": 0.2019, "step": 8587 }, { "epoch": 0.39, "grad_norm": 0.3960835542833339, "learning_rate": 1.3804484964982705e-05, "loss": 0.3114, "step": 8588 }, { "epoch": 0.39, "grad_norm": 0.3471894063103433, "learning_rate": 1.3803108892035259e-05, "loss": 0.2416, "step": 8589 }, { "epoch": 0.39, "grad_norm": 0.6102921391444078, "learning_rate": 1.3801732734890645e-05, "loss": 0.4456, "step": 8590 }, { "epoch": 0.39, "grad_norm": 0.7977910041127219, "learning_rate": 1.3800356493579336e-05, "loss": 0.4504, "step": 8591 }, { "epoch": 0.39, "grad_norm": 0.27585825792928814, "learning_rate": 1.3798980168131795e-05, "loss": 0.2409, "step": 8592 }, { "epoch": 0.39, "grad_norm": 0.47023750366455985, "learning_rate": 1.3797603758578496e-05, "loss": 0.298, "step": 8593 }, { "epoch": 0.39, "grad_norm": 0.361229902662536, "learning_rate": 1.379622726494991e-05, "loss": 0.245, "step": 8594 }, { "epoch": 0.39, "grad_norm": 0.3450307824261173, "learning_rate": 1.3794850687276508e-05, "loss": 0.274, "step": 8595 }, { "epoch": 0.39, "grad_norm": 0.3480710410329912, "learning_rate": 1.379347402558877e-05, "loss": 0.3059, "step": 8596 }, { "epoch": 0.39, "grad_norm": 0.49593250225412516, "learning_rate": 1.3792097279917175e-05, "loss": 0.3011, "step": 8597 }, { "epoch": 0.39, "grad_norm": 0.41360550626745235, "learning_rate": 1.3790720450292201e-05, "loss": 0.2331, "step": 8598 }, { "epoch": 0.4, "grad_norm": 0.29420693063586745, "learning_rate": 1.3789343536744329e-05, "loss": 0.1491, "step": 8599 }, { "epoch": 0.4, "grad_norm": 0.30848477002234603, "learning_rate": 1.3787966539304046e-05, "loss": 0.2668, "step": 8600 }, { "epoch": 0.4, "grad_norm": 0.4493017563020744, "learning_rate": 1.378658945800183e-05, "loss": 0.3468, "step": 8601 }, { "epoch": 0.4, "grad_norm": 0.6156918714497636, "learning_rate": 1.3785212292868178e-05, "loss": 0.3641, "step": 8602 }, { "epoch": 0.4, "grad_norm": 0.5021558443670983, "learning_rate": 1.3783835043933569e-05, "loss": 0.3243, "step": 8603 }, { "epoch": 0.4, "grad_norm": 0.36670039871869275, "learning_rate": 1.3782457711228503e-05, "loss": 0.3004, "step": 8604 }, { "epoch": 0.4, "grad_norm": 0.3383115096058593, "learning_rate": 1.3781080294783467e-05, "loss": 0.2066, "step": 8605 }, { "epoch": 0.4, "grad_norm": 0.9327065962507903, "learning_rate": 1.377970279462896e-05, "loss": 0.5537, "step": 8606 }, { "epoch": 0.4, "grad_norm": 0.36457560839679215, "learning_rate": 1.3778325210795474e-05, "loss": 0.2849, "step": 8607 }, { "epoch": 0.4, "grad_norm": 0.28168257873170016, "learning_rate": 1.3776947543313508e-05, "loss": 0.2259, "step": 8608 }, { "epoch": 0.4, "grad_norm": 0.7442547475732373, "learning_rate": 1.3775569792213565e-05, "loss": 0.4388, "step": 8609 }, { "epoch": 0.4, "grad_norm": 0.3979109251120208, "learning_rate": 1.3774191957526144e-05, "loss": 0.2649, "step": 8610 }, { "epoch": 0.4, "grad_norm": 0.6701917452710291, "learning_rate": 1.3772814039281754e-05, "loss": 0.2384, "step": 8611 }, { "epoch": 0.4, "grad_norm": 0.3538263325465825, "learning_rate": 1.3771436037510897e-05, "loss": 0.3426, "step": 8612 }, { "epoch": 0.4, "grad_norm": 0.39564838793694423, "learning_rate": 1.377005795224408e-05, "loss": 0.3283, "step": 8613 }, { "epoch": 0.4, "grad_norm": 0.8961983213977719, "learning_rate": 1.3768679783511814e-05, "loss": 0.5772, "step": 8614 }, { "epoch": 0.4, "grad_norm": 0.25300801278634716, "learning_rate": 1.376730153134461e-05, "loss": 0.152, "step": 8615 }, { "epoch": 0.4, "grad_norm": 0.34023061137446936, "learning_rate": 1.376592319577298e-05, "loss": 0.2856, "step": 8616 }, { "epoch": 0.4, "grad_norm": 0.9183032076479685, "learning_rate": 1.376454477682744e-05, "loss": 0.5284, "step": 8617 }, { "epoch": 0.4, "grad_norm": 0.47746295526648286, "learning_rate": 1.3763166274538509e-05, "loss": 0.2674, "step": 8618 }, { "epoch": 0.4, "grad_norm": 0.4164348285000339, "learning_rate": 1.3761787688936701e-05, "loss": 0.3309, "step": 8619 }, { "epoch": 0.4, "grad_norm": 0.3906352575147355, "learning_rate": 1.376040902005254e-05, "loss": 0.3595, "step": 8620 }, { "epoch": 0.4, "grad_norm": 0.20291304617321065, "learning_rate": 1.3759030267916549e-05, "loss": 0.0731, "step": 8621 }, { "epoch": 0.4, "grad_norm": 0.40933179720041696, "learning_rate": 1.375765143255925e-05, "loss": 0.2979, "step": 8622 }, { "epoch": 0.4, "grad_norm": 0.9888147321932022, "learning_rate": 1.3756272514011169e-05, "loss": 0.5189, "step": 8623 }, { "epoch": 0.4, "grad_norm": 0.3854084683881432, "learning_rate": 1.3754893512302838e-05, "loss": 0.3037, "step": 8624 }, { "epoch": 0.4, "grad_norm": 0.3997433447191998, "learning_rate": 1.375351442746478e-05, "loss": 0.2932, "step": 8625 }, { "epoch": 0.4, "grad_norm": 0.41829591931841, "learning_rate": 1.3752135259527533e-05, "loss": 0.2915, "step": 8626 }, { "epoch": 0.4, "grad_norm": 0.4677416379920142, "learning_rate": 1.3750756008521626e-05, "loss": 0.2019, "step": 8627 }, { "epoch": 0.4, "grad_norm": 0.27172381162365083, "learning_rate": 1.3749376674477598e-05, "loss": 0.2302, "step": 8628 }, { "epoch": 0.4, "grad_norm": 0.985807238685192, "learning_rate": 1.3747997257425982e-05, "loss": 0.5732, "step": 8629 }, { "epoch": 0.4, "grad_norm": 0.7186049324915126, "learning_rate": 1.374661775739732e-05, "loss": 0.4207, "step": 8630 }, { "epoch": 0.4, "grad_norm": 0.3186680560885979, "learning_rate": 1.374523817442215e-05, "loss": 0.2202, "step": 8631 }, { "epoch": 0.4, "grad_norm": 0.4006043365775831, "learning_rate": 1.3743858508531018e-05, "loss": 0.3563, "step": 8632 }, { "epoch": 0.4, "grad_norm": 0.28666518186036966, "learning_rate": 1.3742478759754466e-05, "loss": 0.1563, "step": 8633 }, { "epoch": 0.4, "grad_norm": 0.3055597939352883, "learning_rate": 1.3741098928123044e-05, "loss": 0.1989, "step": 8634 }, { "epoch": 0.4, "grad_norm": 1.333643226836793, "learning_rate": 1.3739719013667297e-05, "loss": 0.4676, "step": 8635 }, { "epoch": 0.4, "grad_norm": 0.4556038645961717, "learning_rate": 1.3738339016417774e-05, "loss": 0.3234, "step": 8636 }, { "epoch": 0.4, "grad_norm": 0.3514975195170233, "learning_rate": 1.373695893640503e-05, "loss": 0.2029, "step": 8637 }, { "epoch": 0.4, "grad_norm": 0.8892435360895058, "learning_rate": 1.3735578773659612e-05, "loss": 0.6352, "step": 8638 }, { "epoch": 0.4, "grad_norm": 0.2697349763991779, "learning_rate": 1.3734198528212086e-05, "loss": 0.218, "step": 8639 }, { "epoch": 0.4, "grad_norm": 0.40669278570064593, "learning_rate": 1.3732818200092998e-05, "loss": 0.2968, "step": 8640 }, { "epoch": 0.4, "grad_norm": 0.9426623030004377, "learning_rate": 1.3731437789332917e-05, "loss": 0.3378, "step": 8641 }, { "epoch": 0.4, "grad_norm": 1.1173107795000576, "learning_rate": 1.37300572959624e-05, "loss": 0.5854, "step": 8642 }, { "epoch": 0.4, "grad_norm": 0.3343465471799973, "learning_rate": 1.372867672001201e-05, "loss": 0.2864, "step": 8643 }, { "epoch": 0.4, "grad_norm": 0.37236350398819035, "learning_rate": 1.3727296061512307e-05, "loss": 0.286, "step": 8644 }, { "epoch": 0.4, "grad_norm": 0.2729334026807821, "learning_rate": 1.3725915320493865e-05, "loss": 0.1515, "step": 8645 }, { "epoch": 0.4, "grad_norm": 0.3879874357179999, "learning_rate": 1.3724534496987248e-05, "loss": 0.2687, "step": 8646 }, { "epoch": 0.4, "grad_norm": 0.82792246903581, "learning_rate": 1.372315359102303e-05, "loss": 0.3446, "step": 8647 }, { "epoch": 0.4, "grad_norm": 0.5621181163852842, "learning_rate": 1.3721772602631775e-05, "loss": 0.3575, "step": 8648 }, { "epoch": 0.4, "grad_norm": 0.3461963442451924, "learning_rate": 1.3720391531844066e-05, "loss": 0.2708, "step": 8649 }, { "epoch": 0.4, "grad_norm": 0.939207857358149, "learning_rate": 1.371901037869047e-05, "loss": 0.4102, "step": 8650 }, { "epoch": 0.4, "grad_norm": 0.27595802012328063, "learning_rate": 1.371762914320157e-05, "loss": 0.2277, "step": 8651 }, { "epoch": 0.4, "grad_norm": 0.4058081968038128, "learning_rate": 1.3716247825407947e-05, "loss": 0.2582, "step": 8652 }, { "epoch": 0.4, "grad_norm": 0.5763086174772531, "learning_rate": 1.3714866425340176e-05, "loss": 0.4197, "step": 8653 }, { "epoch": 0.4, "grad_norm": 0.665686817200467, "learning_rate": 1.3713484943028843e-05, "loss": 0.4188, "step": 8654 }, { "epoch": 0.4, "grad_norm": 0.4198170071023023, "learning_rate": 1.3712103378504532e-05, "loss": 0.2519, "step": 8655 }, { "epoch": 0.4, "grad_norm": 0.41561431362723383, "learning_rate": 1.3710721731797831e-05, "loss": 0.3321, "step": 8656 }, { "epoch": 0.4, "grad_norm": 0.44795318829467673, "learning_rate": 1.3709340002939327e-05, "loss": 0.2395, "step": 8657 }, { "epoch": 0.4, "grad_norm": 0.45358081113982307, "learning_rate": 1.3707958191959609e-05, "loss": 0.2958, "step": 8658 }, { "epoch": 0.4, "grad_norm": 0.3314426129950775, "learning_rate": 1.3706576298889273e-05, "loss": 0.2741, "step": 8659 }, { "epoch": 0.4, "grad_norm": 0.5002454360741769, "learning_rate": 1.370519432375891e-05, "loss": 0.2632, "step": 8660 }, { "epoch": 0.4, "grad_norm": 0.3639338605036829, "learning_rate": 1.3703812266599113e-05, "loss": 0.291, "step": 8661 }, { "epoch": 0.4, "grad_norm": 0.8353738319844893, "learning_rate": 1.3702430127440484e-05, "loss": 0.5764, "step": 8662 }, { "epoch": 0.4, "grad_norm": 0.37522208023988485, "learning_rate": 1.3701047906313619e-05, "loss": 0.274, "step": 8663 }, { "epoch": 0.4, "grad_norm": 0.38085539086061726, "learning_rate": 1.3699665603249121e-05, "loss": 0.2747, "step": 8664 }, { "epoch": 0.4, "grad_norm": 0.346566065682707, "learning_rate": 1.3698283218277594e-05, "loss": 0.2474, "step": 8665 }, { "epoch": 0.4, "grad_norm": 1.2668059028589966, "learning_rate": 1.3696900751429642e-05, "loss": 0.6696, "step": 8666 }, { "epoch": 0.4, "grad_norm": 0.3625954250602369, "learning_rate": 1.369551820273587e-05, "loss": 0.2241, "step": 8667 }, { "epoch": 0.4, "grad_norm": 0.43985536436081485, "learning_rate": 1.3694135572226883e-05, "loss": 0.3336, "step": 8668 }, { "epoch": 0.4, "grad_norm": 0.7519150845287341, "learning_rate": 1.3692752859933299e-05, "loss": 0.537, "step": 8669 }, { "epoch": 0.4, "grad_norm": 0.33074245750827436, "learning_rate": 1.3691370065885723e-05, "loss": 0.2453, "step": 8670 }, { "epoch": 0.4, "grad_norm": 0.34771745181015057, "learning_rate": 1.3689987190114775e-05, "loss": 0.2403, "step": 8671 }, { "epoch": 0.4, "grad_norm": 0.30891014395848365, "learning_rate": 1.3688604232651064e-05, "loss": 0.2659, "step": 8672 }, { "epoch": 0.4, "grad_norm": 0.3363764272038372, "learning_rate": 1.3687221193525211e-05, "loss": 0.1941, "step": 8673 }, { "epoch": 0.4, "grad_norm": 0.5653594502781631, "learning_rate": 1.3685838072767832e-05, "loss": 0.4083, "step": 8674 }, { "epoch": 0.4, "grad_norm": 0.4060637346788626, "learning_rate": 1.3684454870409554e-05, "loss": 0.341, "step": 8675 }, { "epoch": 0.4, "grad_norm": 0.3721382384025377, "learning_rate": 1.3683071586480997e-05, "loss": 0.2102, "step": 8676 }, { "epoch": 0.4, "grad_norm": 0.3370288345469713, "learning_rate": 1.3681688221012784e-05, "loss": 0.2007, "step": 8677 }, { "epoch": 0.4, "grad_norm": 0.46616576075335564, "learning_rate": 1.368030477403554e-05, "loss": 0.2926, "step": 8678 }, { "epoch": 0.4, "grad_norm": 0.40971001522562034, "learning_rate": 1.3678921245579898e-05, "loss": 0.3056, "step": 8679 }, { "epoch": 0.4, "grad_norm": 0.3187020563129749, "learning_rate": 1.3677537635676484e-05, "loss": 0.2672, "step": 8680 }, { "epoch": 0.4, "grad_norm": 0.6481375902780272, "learning_rate": 1.367615394435593e-05, "loss": 0.4201, "step": 8681 }, { "epoch": 0.4, "grad_norm": 0.42457878461365256, "learning_rate": 1.3674770171648875e-05, "loss": 0.2884, "step": 8682 }, { "epoch": 0.4, "grad_norm": 0.27816786957335066, "learning_rate": 1.3673386317585946e-05, "loss": 0.1944, "step": 8683 }, { "epoch": 0.4, "grad_norm": 0.3436104352230831, "learning_rate": 1.3672002382197787e-05, "loss": 0.266, "step": 8684 }, { "epoch": 0.4, "grad_norm": 0.4409790981947072, "learning_rate": 1.3670618365515034e-05, "loss": 0.3099, "step": 8685 }, { "epoch": 0.4, "grad_norm": 0.4570934278665918, "learning_rate": 1.3669234267568325e-05, "loss": 0.2897, "step": 8686 }, { "epoch": 0.4, "grad_norm": 0.37401780532354695, "learning_rate": 1.3667850088388308e-05, "loss": 0.3325, "step": 8687 }, { "epoch": 0.4, "grad_norm": 0.46844276185607947, "learning_rate": 1.3666465828005626e-05, "loss": 0.3226, "step": 8688 }, { "epoch": 0.4, "grad_norm": 0.4911838502081208, "learning_rate": 1.3665081486450924e-05, "loss": 0.2571, "step": 8689 }, { "epoch": 0.4, "grad_norm": 0.34333891409734957, "learning_rate": 1.3663697063754853e-05, "loss": 0.2179, "step": 8690 }, { "epoch": 0.4, "grad_norm": 0.4003986229681892, "learning_rate": 1.3662312559948054e-05, "loss": 0.2954, "step": 8691 }, { "epoch": 0.4, "grad_norm": 0.3772491817417094, "learning_rate": 1.3660927975061188e-05, "loss": 0.3221, "step": 8692 }, { "epoch": 0.4, "grad_norm": 0.6132215052412152, "learning_rate": 1.3659543309124906e-05, "loss": 0.3512, "step": 8693 }, { "epoch": 0.4, "grad_norm": 0.59524903827153, "learning_rate": 1.3658158562169862e-05, "loss": 0.3449, "step": 8694 }, { "epoch": 0.4, "grad_norm": 0.38310771066277083, "learning_rate": 1.3656773734226714e-05, "loss": 0.3077, "step": 8695 }, { "epoch": 0.4, "grad_norm": 0.22732684083697202, "learning_rate": 1.3655388825326117e-05, "loss": 0.1623, "step": 8696 }, { "epoch": 0.4, "grad_norm": 0.5544553603461901, "learning_rate": 1.3654003835498737e-05, "loss": 0.3875, "step": 8697 }, { "epoch": 0.4, "grad_norm": 0.4317430241903873, "learning_rate": 1.3652618764775231e-05, "loss": 0.3441, "step": 8698 }, { "epoch": 0.4, "grad_norm": 0.3325118401168639, "learning_rate": 1.365123361318627e-05, "loss": 0.2403, "step": 8699 }, { "epoch": 0.4, "grad_norm": 0.6576025063661516, "learning_rate": 1.3649848380762513e-05, "loss": 0.3405, "step": 8700 }, { "epoch": 0.4, "grad_norm": 0.4256055319216024, "learning_rate": 1.3648463067534632e-05, "loss": 0.3268, "step": 8701 }, { "epoch": 0.4, "grad_norm": 0.32938920931524, "learning_rate": 1.3647077673533294e-05, "loss": 0.1575, "step": 8702 }, { "epoch": 0.4, "grad_norm": 0.30266021076061217, "learning_rate": 1.3645692198789173e-05, "loss": 0.2705, "step": 8703 }, { "epoch": 0.4, "grad_norm": 0.5634310216697219, "learning_rate": 1.3644306643332939e-05, "loss": 0.385, "step": 8704 }, { "epoch": 0.4, "grad_norm": 0.46310496006354945, "learning_rate": 1.3642921007195269e-05, "loss": 0.3233, "step": 8705 }, { "epoch": 0.4, "grad_norm": 0.3478454414447397, "learning_rate": 1.3641535290406837e-05, "loss": 0.2004, "step": 8706 }, { "epoch": 0.4, "grad_norm": 0.4427521005433169, "learning_rate": 1.3640149492998326e-05, "loss": 0.3428, "step": 8707 }, { "epoch": 0.4, "grad_norm": 0.5419645688317273, "learning_rate": 1.3638763615000412e-05, "loss": 0.4006, "step": 8708 }, { "epoch": 0.4, "grad_norm": 0.6814457043018262, "learning_rate": 1.363737765644378e-05, "loss": 0.3025, "step": 8709 }, { "epoch": 0.4, "grad_norm": 0.3978273375443263, "learning_rate": 1.3635991617359111e-05, "loss": 0.3283, "step": 8710 }, { "epoch": 0.4, "grad_norm": 0.2570544564849038, "learning_rate": 1.3634605497777094e-05, "loss": 0.2137, "step": 8711 }, { "epoch": 0.4, "grad_norm": 0.42478702714075917, "learning_rate": 1.3633219297728415e-05, "loss": 0.1695, "step": 8712 }, { "epoch": 0.4, "grad_norm": 0.4460605113562629, "learning_rate": 1.363183301724376e-05, "loss": 0.3134, "step": 8713 }, { "epoch": 0.4, "grad_norm": 0.5812377045420741, "learning_rate": 1.3630446656353823e-05, "loss": 0.3869, "step": 8714 }, { "epoch": 0.4, "grad_norm": 0.4867522575059979, "learning_rate": 1.3629060215089296e-05, "loss": 0.3507, "step": 8715 }, { "epoch": 0.4, "grad_norm": 0.3371382537219159, "learning_rate": 1.3627673693480874e-05, "loss": 0.2492, "step": 8716 }, { "epoch": 0.4, "grad_norm": 0.33416232002628654, "learning_rate": 1.3626287091559254e-05, "loss": 0.1652, "step": 8717 }, { "epoch": 0.4, "grad_norm": 0.6589556116555825, "learning_rate": 1.362490040935513e-05, "loss": 0.3624, "step": 8718 }, { "epoch": 0.4, "grad_norm": 0.2947981378644011, "learning_rate": 1.3623513646899207e-05, "loss": 0.2291, "step": 8719 }, { "epoch": 0.4, "grad_norm": 0.8265033885426144, "learning_rate": 1.3622126804222185e-05, "loss": 0.5417, "step": 8720 }, { "epoch": 0.4, "grad_norm": 0.5095397186311522, "learning_rate": 1.3620739881354763e-05, "loss": 0.3741, "step": 8721 }, { "epoch": 0.4, "grad_norm": 0.4328123526839169, "learning_rate": 1.3619352878327653e-05, "loss": 0.2303, "step": 8722 }, { "epoch": 0.4, "grad_norm": 0.3511611936312261, "learning_rate": 1.3617965795171558e-05, "loss": 0.2616, "step": 8723 }, { "epoch": 0.4, "grad_norm": 0.38555917957038816, "learning_rate": 1.3616578631917186e-05, "loss": 0.2259, "step": 8724 }, { "epoch": 0.4, "grad_norm": 0.3858369579159727, "learning_rate": 1.3615191388595248e-05, "loss": 0.2404, "step": 8725 }, { "epoch": 0.4, "grad_norm": 0.5416035939255519, "learning_rate": 1.361380406523646e-05, "loss": 0.3507, "step": 8726 }, { "epoch": 0.4, "grad_norm": 0.5008441631654945, "learning_rate": 1.3612416661871532e-05, "loss": 0.4134, "step": 8727 }, { "epoch": 0.4, "grad_norm": 0.39276595495173466, "learning_rate": 1.3611029178531179e-05, "loss": 0.3108, "step": 8728 }, { "epoch": 0.4, "grad_norm": 0.2376502035588918, "learning_rate": 1.3609641615246121e-05, "loss": 0.1226, "step": 8729 }, { "epoch": 0.4, "grad_norm": 0.5517193603015693, "learning_rate": 1.3608253972047078e-05, "loss": 0.3564, "step": 8730 }, { "epoch": 0.4, "grad_norm": 0.3222165623987306, "learning_rate": 1.3606866248964771e-05, "loss": 0.2714, "step": 8731 }, { "epoch": 0.4, "grad_norm": 0.5802938614525948, "learning_rate": 1.3605478446029918e-05, "loss": 0.3458, "step": 8732 }, { "epoch": 0.4, "grad_norm": 0.8822950210543202, "learning_rate": 1.3604090563273249e-05, "loss": 0.5559, "step": 8733 }, { "epoch": 0.4, "grad_norm": 0.4154395050784564, "learning_rate": 1.3602702600725488e-05, "loss": 0.2906, "step": 8734 }, { "epoch": 0.4, "grad_norm": 0.3304901320183655, "learning_rate": 1.3601314558417365e-05, "loss": 0.2656, "step": 8735 }, { "epoch": 0.4, "grad_norm": 0.24383654093517726, "learning_rate": 1.3599926436379609e-05, "loss": 0.1377, "step": 8736 }, { "epoch": 0.4, "grad_norm": 0.38804398660356565, "learning_rate": 1.359853823464295e-05, "loss": 0.2966, "step": 8737 }, { "epoch": 0.4, "grad_norm": 0.74164266417967, "learning_rate": 1.3597149953238122e-05, "loss": 0.3563, "step": 8738 }, { "epoch": 0.4, "grad_norm": 0.3693261980991238, "learning_rate": 1.3595761592195861e-05, "loss": 0.3253, "step": 8739 }, { "epoch": 0.4, "grad_norm": 0.3928545254552329, "learning_rate": 1.3594373151546904e-05, "loss": 0.2891, "step": 8740 }, { "epoch": 0.4, "grad_norm": 1.0163685219589829, "learning_rate": 1.3592984631321995e-05, "loss": 0.6771, "step": 8741 }, { "epoch": 0.4, "grad_norm": 0.24996498743501916, "learning_rate": 1.3591596031551865e-05, "loss": 0.1724, "step": 8742 }, { "epoch": 0.4, "grad_norm": 0.3374841038315729, "learning_rate": 1.3590207352267259e-05, "loss": 0.2553, "step": 8743 }, { "epoch": 0.4, "grad_norm": 0.9775017869969177, "learning_rate": 1.3588818593498926e-05, "loss": 0.4144, "step": 8744 }, { "epoch": 0.4, "grad_norm": 0.5228173664923256, "learning_rate": 1.3587429755277604e-05, "loss": 0.3035, "step": 8745 }, { "epoch": 0.4, "grad_norm": 0.40143151656805903, "learning_rate": 1.3586040837634049e-05, "loss": 0.3044, "step": 8746 }, { "epoch": 0.4, "grad_norm": 0.40744837410384854, "learning_rate": 1.3584651840599003e-05, "loss": 0.3194, "step": 8747 }, { "epoch": 0.4, "grad_norm": 0.3273173938903404, "learning_rate": 1.3583262764203222e-05, "loss": 0.1651, "step": 8748 }, { "epoch": 0.4, "grad_norm": 0.36273459660802265, "learning_rate": 1.3581873608477457e-05, "loss": 0.2799, "step": 8749 }, { "epoch": 0.4, "grad_norm": 0.46580172255496183, "learning_rate": 1.3580484373452462e-05, "loss": 0.3351, "step": 8750 }, { "epoch": 0.4, "grad_norm": 0.43703808875263966, "learning_rate": 1.3579095059158993e-05, "loss": 0.2981, "step": 8751 }, { "epoch": 0.4, "grad_norm": 0.3806374779632683, "learning_rate": 1.357770566562781e-05, "loss": 0.3024, "step": 8752 }, { "epoch": 0.4, "grad_norm": 1.2703854131245793, "learning_rate": 1.3576316192889673e-05, "loss": 0.6439, "step": 8753 }, { "epoch": 0.4, "grad_norm": 0.28833614921138023, "learning_rate": 1.3574926640975341e-05, "loss": 0.2472, "step": 8754 }, { "epoch": 0.4, "grad_norm": 0.3237312757462981, "learning_rate": 1.3573537009915579e-05, "loss": 0.2366, "step": 8755 }, { "epoch": 0.4, "grad_norm": 0.43458492915488356, "learning_rate": 1.357214729974115e-05, "loss": 0.2776, "step": 8756 }, { "epoch": 0.4, "grad_norm": 1.160604188342031, "learning_rate": 1.3570757510482827e-05, "loss": 0.6972, "step": 8757 }, { "epoch": 0.4, "grad_norm": 0.37660898340391047, "learning_rate": 1.356936764217137e-05, "loss": 0.2177, "step": 8758 }, { "epoch": 0.4, "grad_norm": 0.4168055795214102, "learning_rate": 1.3567977694837557e-05, "loss": 0.3485, "step": 8759 }, { "epoch": 0.4, "grad_norm": 0.5456244585980462, "learning_rate": 1.3566587668512154e-05, "loss": 0.4018, "step": 8760 }, { "epoch": 0.4, "grad_norm": 0.3181215819371659, "learning_rate": 1.3565197563225937e-05, "loss": 0.1911, "step": 8761 }, { "epoch": 0.4, "grad_norm": 0.2860589771072182, "learning_rate": 1.3563807379009684e-05, "loss": 0.2242, "step": 8762 }, { "epoch": 0.4, "grad_norm": 0.5655925113097174, "learning_rate": 1.356241711589417e-05, "loss": 0.3868, "step": 8763 }, { "epoch": 0.4, "grad_norm": 0.3604462044358751, "learning_rate": 1.3561026773910176e-05, "loss": 0.1971, "step": 8764 }, { "epoch": 0.4, "grad_norm": 0.7971389515926197, "learning_rate": 1.355963635308848e-05, "loss": 0.4281, "step": 8765 }, { "epoch": 0.4, "grad_norm": 0.5015987774431565, "learning_rate": 1.3558245853459864e-05, "loss": 0.359, "step": 8766 }, { "epoch": 0.4, "grad_norm": 0.315408197933237, "learning_rate": 1.3556855275055116e-05, "loss": 0.2694, "step": 8767 }, { "epoch": 0.4, "grad_norm": 0.24332873938752128, "learning_rate": 1.3555464617905018e-05, "loss": 0.1392, "step": 8768 }, { "epoch": 0.4, "grad_norm": 1.295748051023343, "learning_rate": 1.3554073882040366e-05, "loss": 0.7932, "step": 8769 }, { "epoch": 0.4, "grad_norm": 0.3740338536927202, "learning_rate": 1.3552683067491941e-05, "loss": 0.271, "step": 8770 }, { "epoch": 0.4, "grad_norm": 0.3912770601481214, "learning_rate": 1.3551292174290537e-05, "loss": 0.2534, "step": 8771 }, { "epoch": 0.4, "grad_norm": 0.8456408483422618, "learning_rate": 1.3549901202466946e-05, "loss": 0.4322, "step": 8772 }, { "epoch": 0.4, "grad_norm": 0.43363651859754715, "learning_rate": 1.3548510152051963e-05, "loss": 0.2873, "step": 8773 }, { "epoch": 0.4, "grad_norm": 0.25187771510946716, "learning_rate": 1.3547119023076387e-05, "loss": 0.1466, "step": 8774 }, { "epoch": 0.4, "grad_norm": 0.5666171022432636, "learning_rate": 1.3545727815571015e-05, "loss": 0.4353, "step": 8775 }, { "epoch": 0.4, "grad_norm": 0.38116419268481283, "learning_rate": 1.3544336529566645e-05, "loss": 0.2848, "step": 8776 }, { "epoch": 0.4, "grad_norm": 0.7957296759240025, "learning_rate": 1.354294516509408e-05, "loss": 0.3549, "step": 8777 }, { "epoch": 0.4, "grad_norm": 0.3902767055187228, "learning_rate": 1.3541553722184127e-05, "loss": 0.3059, "step": 8778 }, { "epoch": 0.4, "grad_norm": 0.4161552057413789, "learning_rate": 1.3540162200867584e-05, "loss": 0.2948, "step": 8779 }, { "epoch": 0.4, "grad_norm": 0.280184106790731, "learning_rate": 1.3538770601175264e-05, "loss": 0.1938, "step": 8780 }, { "epoch": 0.4, "grad_norm": 0.46962895127779297, "learning_rate": 1.3537378923137973e-05, "loss": 0.3248, "step": 8781 }, { "epoch": 0.4, "grad_norm": 0.4976584257328208, "learning_rate": 1.3535987166786523e-05, "loss": 0.2931, "step": 8782 }, { "epoch": 0.4, "grad_norm": 0.4084182638417186, "learning_rate": 1.3534595332151726e-05, "loss": 0.3388, "step": 8783 }, { "epoch": 0.4, "grad_norm": 0.7730585071991901, "learning_rate": 1.3533203419264393e-05, "loss": 0.3367, "step": 8784 }, { "epoch": 0.4, "grad_norm": 0.4123965169271035, "learning_rate": 1.3531811428155341e-05, "loss": 0.2535, "step": 8785 }, { "epoch": 0.4, "grad_norm": 0.31925579814729654, "learning_rate": 1.3530419358855392e-05, "loss": 0.2513, "step": 8786 }, { "epoch": 0.4, "grad_norm": 0.8571686224642743, "learning_rate": 1.3529027211395355e-05, "loss": 0.5658, "step": 8787 }, { "epoch": 0.4, "grad_norm": 0.2649254092656279, "learning_rate": 1.3527634985806062e-05, "loss": 0.2152, "step": 8788 }, { "epoch": 0.4, "grad_norm": 0.7384508851908317, "learning_rate": 1.3526242682118329e-05, "loss": 0.4227, "step": 8789 }, { "epoch": 0.4, "grad_norm": 0.35304242065128766, "learning_rate": 1.3524850300362982e-05, "loss": 0.2494, "step": 8790 }, { "epoch": 0.4, "grad_norm": 0.3314067479142375, "learning_rate": 1.3523457840570844e-05, "loss": 0.2675, "step": 8791 }, { "epoch": 0.4, "grad_norm": 0.3634245034497393, "learning_rate": 1.3522065302772747e-05, "loss": 0.2311, "step": 8792 }, { "epoch": 0.4, "grad_norm": 0.7662109567161808, "learning_rate": 1.3520672686999519e-05, "loss": 0.5526, "step": 8793 }, { "epoch": 0.4, "grad_norm": 0.3406564499553857, "learning_rate": 1.3519279993281993e-05, "loss": 0.2314, "step": 8794 }, { "epoch": 0.4, "grad_norm": 0.4027767667837165, "learning_rate": 1.3517887221650998e-05, "loss": 0.3502, "step": 8795 }, { "epoch": 0.4, "grad_norm": 0.5168517183996955, "learning_rate": 1.3516494372137368e-05, "loss": 0.2205, "step": 8796 }, { "epoch": 0.4, "grad_norm": 0.36940013599399546, "learning_rate": 1.3515101444771945e-05, "loss": 0.1741, "step": 8797 }, { "epoch": 0.4, "grad_norm": 0.3278759079232815, "learning_rate": 1.3513708439585562e-05, "loss": 0.3069, "step": 8798 }, { "epoch": 0.4, "grad_norm": 0.7914289778572776, "learning_rate": 1.3512315356609062e-05, "loss": 0.4718, "step": 8799 }, { "epoch": 0.4, "grad_norm": 0.3692063948905585, "learning_rate": 1.3510922195873286e-05, "loss": 0.2103, "step": 8800 }, { "epoch": 0.4, "grad_norm": 0.28623811007505745, "learning_rate": 1.3509528957409077e-05, "loss": 0.2289, "step": 8801 }, { "epoch": 0.4, "grad_norm": 0.3508211496165993, "learning_rate": 1.3508135641247278e-05, "loss": 0.3151, "step": 8802 }, { "epoch": 0.4, "grad_norm": 0.3853368375265332, "learning_rate": 1.3506742247418734e-05, "loss": 0.1969, "step": 8803 }, { "epoch": 0.4, "grad_norm": 0.5694448536698385, "learning_rate": 1.3505348775954302e-05, "loss": 0.3991, "step": 8804 }, { "epoch": 0.4, "grad_norm": 1.0583924042259596, "learning_rate": 1.3503955226884822e-05, "loss": 0.4524, "step": 8805 }, { "epoch": 0.4, "grad_norm": 0.3499912837666646, "learning_rate": 1.3502561600241155e-05, "loss": 0.3053, "step": 8806 }, { "epoch": 0.4, "grad_norm": 0.4278409958859237, "learning_rate": 1.3501167896054146e-05, "loss": 0.2694, "step": 8807 }, { "epoch": 0.4, "grad_norm": 0.3394277437513173, "learning_rate": 1.3499774114354655e-05, "loss": 0.1867, "step": 8808 }, { "epoch": 0.4, "grad_norm": 0.39467608906437435, "learning_rate": 1.3498380255173537e-05, "loss": 0.296, "step": 8809 }, { "epoch": 0.4, "grad_norm": 0.30980146303961265, "learning_rate": 1.3496986318541656e-05, "loss": 0.2449, "step": 8810 }, { "epoch": 0.4, "grad_norm": 1.0645314964474912, "learning_rate": 1.3495592304489869e-05, "loss": 0.4639, "step": 8811 }, { "epoch": 0.4, "grad_norm": 0.42701640784780215, "learning_rate": 1.3494198213049035e-05, "loss": 0.3244, "step": 8812 }, { "epoch": 0.4, "grad_norm": 0.2829388090291178, "learning_rate": 1.3492804044250016e-05, "loss": 0.1751, "step": 8813 }, { "epoch": 0.4, "grad_norm": 0.3086710418980984, "learning_rate": 1.3491409798123687e-05, "loss": 0.2762, "step": 8814 }, { "epoch": 0.4, "grad_norm": 0.6855261581233567, "learning_rate": 1.3490015474700908e-05, "loss": 0.4613, "step": 8815 }, { "epoch": 0.4, "grad_norm": 0.3601747557741626, "learning_rate": 1.348862107401255e-05, "loss": 0.2451, "step": 8816 }, { "epoch": 0.41, "grad_norm": 0.5733870734956155, "learning_rate": 1.3487226596089489e-05, "loss": 0.3685, "step": 8817 }, { "epoch": 0.41, "grad_norm": 0.38578006423209626, "learning_rate": 1.3485832040962588e-05, "loss": 0.2633, "step": 8818 }, { "epoch": 0.41, "grad_norm": 0.44872581050769716, "learning_rate": 1.3484437408662725e-05, "loss": 0.3314, "step": 8819 }, { "epoch": 0.41, "grad_norm": 0.26979532585769717, "learning_rate": 1.3483042699220774e-05, "loss": 0.0721, "step": 8820 }, { "epoch": 0.41, "grad_norm": 0.5324370097246657, "learning_rate": 1.348164791266762e-05, "loss": 0.3189, "step": 8821 }, { "epoch": 0.41, "grad_norm": 0.327657570172009, "learning_rate": 1.3480253049034131e-05, "loss": 0.3021, "step": 8822 }, { "epoch": 0.41, "grad_norm": 0.6549123818959309, "learning_rate": 1.3478858108351198e-05, "loss": 0.3519, "step": 8823 }, { "epoch": 0.41, "grad_norm": 0.668904420527574, "learning_rate": 1.3477463090649701e-05, "loss": 0.4421, "step": 8824 }, { "epoch": 0.41, "grad_norm": 0.31253938196621533, "learning_rate": 1.347606799596052e-05, "loss": 0.2224, "step": 8825 }, { "epoch": 0.41, "grad_norm": 0.3038172141773224, "learning_rate": 1.3474672824314541e-05, "loss": 0.2126, "step": 8826 }, { "epoch": 0.41, "grad_norm": 0.589882032785637, "learning_rate": 1.3473277575742659e-05, "loss": 0.2683, "step": 8827 }, { "epoch": 0.41, "grad_norm": 0.4467383661546802, "learning_rate": 1.3471882250275757e-05, "loss": 0.3037, "step": 8828 }, { "epoch": 0.41, "grad_norm": 0.8575324043004919, "learning_rate": 1.347048684794473e-05, "loss": 0.4459, "step": 8829 }, { "epoch": 0.41, "grad_norm": 0.2952587775980895, "learning_rate": 1.3469091368780468e-05, "loss": 0.2321, "step": 8830 }, { "epoch": 0.41, "grad_norm": 0.6272553379159224, "learning_rate": 1.3467695812813866e-05, "loss": 0.4043, "step": 8831 }, { "epoch": 0.41, "grad_norm": 0.30869951284163244, "learning_rate": 1.3466300180075822e-05, "loss": 0.1901, "step": 8832 }, { "epoch": 0.41, "grad_norm": 0.718881542439528, "learning_rate": 1.3464904470597231e-05, "loss": 0.2659, "step": 8833 }, { "epoch": 0.41, "grad_norm": 0.29640618684949604, "learning_rate": 1.3463508684408997e-05, "loss": 0.2882, "step": 8834 }, { "epoch": 0.41, "grad_norm": 0.6868297550384124, "learning_rate": 1.3462112821542016e-05, "loss": 0.4649, "step": 8835 }, { "epoch": 0.41, "grad_norm": 0.5954246100377746, "learning_rate": 1.3460716882027199e-05, "loss": 0.2606, "step": 8836 }, { "epoch": 0.41, "grad_norm": 0.3956742335343249, "learning_rate": 1.345932086589544e-05, "loss": 0.2899, "step": 8837 }, { "epoch": 0.41, "grad_norm": 0.3321499429220548, "learning_rate": 1.3457924773177655e-05, "loss": 0.2794, "step": 8838 }, { "epoch": 0.41, "grad_norm": 0.43358493692564626, "learning_rate": 1.3456528603904746e-05, "loss": 0.1419, "step": 8839 }, { "epoch": 0.41, "grad_norm": 0.5142859662281241, "learning_rate": 1.3455132358107626e-05, "loss": 0.3401, "step": 8840 }, { "epoch": 0.41, "grad_norm": 1.3893144746205917, "learning_rate": 1.3453736035817206e-05, "loss": 0.7966, "step": 8841 }, { "epoch": 0.41, "grad_norm": 0.33691657051846896, "learning_rate": 1.34523396370644e-05, "loss": 0.292, "step": 8842 }, { "epoch": 0.41, "grad_norm": 0.34617687913703377, "learning_rate": 1.3450943161880118e-05, "loss": 0.2001, "step": 8843 }, { "epoch": 0.41, "grad_norm": 0.2700806796461998, "learning_rate": 1.3449546610295285e-05, "loss": 0.1868, "step": 8844 }, { "epoch": 0.41, "grad_norm": 0.4537734020686143, "learning_rate": 1.3448149982340812e-05, "loss": 0.3629, "step": 8845 }, { "epoch": 0.41, "grad_norm": 0.3111853603913905, "learning_rate": 1.3446753278047623e-05, "loss": 0.2312, "step": 8846 }, { "epoch": 0.41, "grad_norm": 0.8188901394909537, "learning_rate": 1.3445356497446637e-05, "loss": 0.5686, "step": 8847 }, { "epoch": 0.41, "grad_norm": 0.7270922676575237, "learning_rate": 1.344395964056878e-05, "loss": 0.4779, "step": 8848 }, { "epoch": 0.41, "grad_norm": 0.3851836188579967, "learning_rate": 1.3442562707444977e-05, "loss": 0.2294, "step": 8849 }, { "epoch": 0.41, "grad_norm": 0.36486827087896073, "learning_rate": 1.3441165698106151e-05, "loss": 0.2903, "step": 8850 }, { "epoch": 0.41, "grad_norm": 0.6918410527416364, "learning_rate": 1.3439768612583235e-05, "loss": 0.4341, "step": 8851 }, { "epoch": 0.41, "grad_norm": 0.20101754153820162, "learning_rate": 1.3438371450907155e-05, "loss": 0.15, "step": 8852 }, { "epoch": 0.41, "grad_norm": 0.36068821915518895, "learning_rate": 1.343697421310885e-05, "loss": 0.3159, "step": 8853 }, { "epoch": 0.41, "grad_norm": 1.1394677951882988, "learning_rate": 1.3435576899219243e-05, "loss": 0.6246, "step": 8854 }, { "epoch": 0.41, "grad_norm": 0.372099713828924, "learning_rate": 1.3434179509269278e-05, "loss": 0.2701, "step": 8855 }, { "epoch": 0.41, "grad_norm": 0.7275006693515084, "learning_rate": 1.3432782043289887e-05, "loss": 0.3347, "step": 8856 }, { "epoch": 0.41, "grad_norm": 0.3983435069008562, "learning_rate": 1.343138450131201e-05, "loss": 0.3555, "step": 8857 }, { "epoch": 0.41, "grad_norm": 0.28979993223117084, "learning_rate": 1.342998688336659e-05, "loss": 0.2268, "step": 8858 }, { "epoch": 0.41, "grad_norm": 0.29095056498331384, "learning_rate": 1.3428589189484564e-05, "loss": 0.1237, "step": 8859 }, { "epoch": 0.41, "grad_norm": 0.6707278199042213, "learning_rate": 1.3427191419696876e-05, "loss": 0.4639, "step": 8860 }, { "epoch": 0.41, "grad_norm": 0.36855426718374623, "learning_rate": 1.3425793574034476e-05, "loss": 0.2795, "step": 8861 }, { "epoch": 0.41, "grad_norm": 0.4087644984246095, "learning_rate": 1.3424395652528308e-05, "loss": 0.2953, "step": 8862 }, { "epoch": 0.41, "grad_norm": 0.82146841635332, "learning_rate": 1.3422997655209318e-05, "loss": 0.3998, "step": 8863 }, { "epoch": 0.41, "grad_norm": 0.40874296998314763, "learning_rate": 1.3421599582108462e-05, "loss": 0.3036, "step": 8864 }, { "epoch": 0.41, "grad_norm": 0.2759018259216273, "learning_rate": 1.342020143325669e-05, "loss": 0.1836, "step": 8865 }, { "epoch": 0.41, "grad_norm": 0.780031928076596, "learning_rate": 1.3418803208684951e-05, "loss": 0.4358, "step": 8866 }, { "epoch": 0.41, "grad_norm": 0.360339998618353, "learning_rate": 1.3417404908424207e-05, "loss": 0.2532, "step": 8867 }, { "epoch": 0.41, "grad_norm": 0.7480806614099489, "learning_rate": 1.341600653250541e-05, "loss": 0.3903, "step": 8868 }, { "epoch": 0.41, "grad_norm": 0.3692848632557834, "learning_rate": 1.3414608080959521e-05, "loss": 0.2725, "step": 8869 }, { "epoch": 0.41, "grad_norm": 0.3499696672606069, "learning_rate": 1.34132095538175e-05, "loss": 0.2732, "step": 8870 }, { "epoch": 0.41, "grad_norm": 0.30073652702981063, "learning_rate": 1.3411810951110311e-05, "loss": 0.2172, "step": 8871 }, { "epoch": 0.41, "grad_norm": 1.0619183971128496, "learning_rate": 1.3410412272868915e-05, "loss": 0.4187, "step": 8872 }, { "epoch": 0.41, "grad_norm": 0.4114597958579147, "learning_rate": 1.340901351912428e-05, "loss": 0.2948, "step": 8873 }, { "epoch": 0.41, "grad_norm": 0.41833466660848806, "learning_rate": 1.3407614689907368e-05, "loss": 0.3419, "step": 8874 }, { "epoch": 0.41, "grad_norm": 0.9584470255701635, "learning_rate": 1.3406215785249153e-05, "loss": 0.2469, "step": 8875 }, { "epoch": 0.41, "grad_norm": 0.28298234149738544, "learning_rate": 1.3404816805180603e-05, "loss": 0.2226, "step": 8876 }, { "epoch": 0.41, "grad_norm": 0.4766201009716676, "learning_rate": 1.3403417749732693e-05, "loss": 0.2939, "step": 8877 }, { "epoch": 0.41, "grad_norm": 0.4622338815087517, "learning_rate": 1.340201861893639e-05, "loss": 0.2867, "step": 8878 }, { "epoch": 0.41, "grad_norm": 0.3794872248387296, "learning_rate": 1.3400619412822675e-05, "loss": 0.2809, "step": 8879 }, { "epoch": 0.41, "grad_norm": 0.6966273280311868, "learning_rate": 1.3399220131422524e-05, "loss": 0.4611, "step": 8880 }, { "epoch": 0.41, "grad_norm": 0.43061467780981344, "learning_rate": 1.3397820774766917e-05, "loss": 0.3098, "step": 8881 }, { "epoch": 0.41, "grad_norm": 0.3274878429252231, "learning_rate": 1.3396421342886832e-05, "loss": 0.1881, "step": 8882 }, { "epoch": 0.41, "grad_norm": 0.4954802742931216, "learning_rate": 1.3395021835813251e-05, "loss": 0.3005, "step": 8883 }, { "epoch": 0.41, "grad_norm": 0.6593296652337535, "learning_rate": 1.3393622253577158e-05, "loss": 0.4505, "step": 8884 }, { "epoch": 0.41, "grad_norm": 0.30096762982435143, "learning_rate": 1.3392222596209541e-05, "loss": 0.195, "step": 8885 }, { "epoch": 0.41, "grad_norm": 0.3007338612402302, "learning_rate": 1.3390822863741384e-05, "loss": 0.2582, "step": 8886 }, { "epoch": 0.41, "grad_norm": 1.0239192141914364, "learning_rate": 1.3389423056203679e-05, "loss": 0.5669, "step": 8887 }, { "epoch": 0.41, "grad_norm": 0.34995993379880824, "learning_rate": 1.3388023173627413e-05, "loss": 0.1901, "step": 8888 }, { "epoch": 0.41, "grad_norm": 0.319735617535886, "learning_rate": 1.338662321604358e-05, "loss": 0.2712, "step": 8889 }, { "epoch": 0.41, "grad_norm": 1.4907212178380154, "learning_rate": 1.3385223183483169e-05, "loss": 0.5113, "step": 8890 }, { "epoch": 0.41, "grad_norm": 0.38749623322000315, "learning_rate": 1.3383823075977185e-05, "loss": 0.2429, "step": 8891 }, { "epoch": 0.41, "grad_norm": 0.45320028208011, "learning_rate": 1.3382422893556617e-05, "loss": 0.3141, "step": 8892 }, { "epoch": 0.41, "grad_norm": 0.4131838699877759, "learning_rate": 1.3381022636252466e-05, "loss": 0.3442, "step": 8893 }, { "epoch": 0.41, "grad_norm": 0.42251298101776746, "learning_rate": 1.3379622304095734e-05, "loss": 0.2637, "step": 8894 }, { "epoch": 0.41, "grad_norm": 1.0288784137594331, "learning_rate": 1.337822189711742e-05, "loss": 0.4013, "step": 8895 }, { "epoch": 0.41, "grad_norm": 0.9357166800909519, "learning_rate": 1.337682141534853e-05, "loss": 0.4471, "step": 8896 }, { "epoch": 0.41, "grad_norm": 0.2971423875894324, "learning_rate": 1.3375420858820067e-05, "loss": 0.2584, "step": 8897 }, { "epoch": 0.41, "grad_norm": 0.23648286514364847, "learning_rate": 1.337402022756304e-05, "loss": 0.1466, "step": 8898 }, { "epoch": 0.41, "grad_norm": 1.3099631984023072, "learning_rate": 1.3372619521608459e-05, "loss": 0.5497, "step": 8899 }, { "epoch": 0.41, "grad_norm": 0.44831279957872217, "learning_rate": 1.3371218740987334e-05, "loss": 0.2918, "step": 8900 }, { "epoch": 0.41, "grad_norm": 0.344533256630212, "learning_rate": 1.3369817885730667e-05, "loss": 0.2547, "step": 8901 }, { "epoch": 0.41, "grad_norm": 0.9313573102771588, "learning_rate": 1.3368416955869487e-05, "loss": 0.4577, "step": 8902 }, { "epoch": 0.41, "grad_norm": 0.4081045282033635, "learning_rate": 1.3367015951434798e-05, "loss": 0.2677, "step": 8903 }, { "epoch": 0.41, "grad_norm": 0.2693074964926061, "learning_rate": 1.3365614872457627e-05, "loss": 0.1497, "step": 8904 }, { "epoch": 0.41, "grad_norm": 0.4084497218868415, "learning_rate": 1.3364213718968981e-05, "loss": 0.3142, "step": 8905 }, { "epoch": 0.41, "grad_norm": 0.45328938099614763, "learning_rate": 1.3362812490999888e-05, "loss": 0.3199, "step": 8906 }, { "epoch": 0.41, "grad_norm": 0.5529362532730133, "learning_rate": 1.3361411188581368e-05, "loss": 0.3945, "step": 8907 }, { "epoch": 0.41, "grad_norm": 0.9329231184712226, "learning_rate": 1.3360009811744444e-05, "loss": 0.3928, "step": 8908 }, { "epoch": 0.41, "grad_norm": 0.3042853210492065, "learning_rate": 1.3358608360520138e-05, "loss": 0.2768, "step": 8909 }, { "epoch": 0.41, "grad_norm": 0.25993578280891794, "learning_rate": 1.3357206834939483e-05, "loss": 0.2158, "step": 8910 }, { "epoch": 0.41, "grad_norm": 1.1308517700880316, "learning_rate": 1.3355805235033503e-05, "loss": 0.2575, "step": 8911 }, { "epoch": 0.41, "grad_norm": 0.42482511897671343, "learning_rate": 1.3354403560833232e-05, "loss": 0.305, "step": 8912 }, { "epoch": 0.41, "grad_norm": 0.4338363460367922, "learning_rate": 1.3353001812369696e-05, "loss": 0.317, "step": 8913 }, { "epoch": 0.41, "grad_norm": 0.7129310475429949, "learning_rate": 1.3351599989673934e-05, "loss": 0.333, "step": 8914 }, { "epoch": 0.41, "grad_norm": 0.35652947805842033, "learning_rate": 1.3350198092776977e-05, "loss": 0.2611, "step": 8915 }, { "epoch": 0.41, "grad_norm": 0.43798474629042206, "learning_rate": 1.3348796121709862e-05, "loss": 0.2925, "step": 8916 }, { "epoch": 0.41, "grad_norm": 0.3153409776203602, "learning_rate": 1.334739407650363e-05, "loss": 0.2183, "step": 8917 }, { "epoch": 0.41, "grad_norm": 0.40619524857895895, "learning_rate": 1.3345991957189322e-05, "loss": 0.2589, "step": 8918 }, { "epoch": 0.41, "grad_norm": 0.5089885280562814, "learning_rate": 1.3344589763797973e-05, "loss": 0.3426, "step": 8919 }, { "epoch": 0.41, "grad_norm": 0.5057943231440046, "learning_rate": 1.3343187496360632e-05, "loss": 0.3982, "step": 8920 }, { "epoch": 0.41, "grad_norm": 0.3578569427712817, "learning_rate": 1.3341785154908342e-05, "loss": 0.2208, "step": 8921 }, { "epoch": 0.41, "grad_norm": 0.33046451908412405, "learning_rate": 1.334038273947215e-05, "loss": 0.2296, "step": 8922 }, { "epoch": 0.41, "grad_norm": 0.4452025283139508, "learning_rate": 1.3338980250083102e-05, "loss": 0.294, "step": 8923 }, { "epoch": 0.41, "grad_norm": 0.3848990428093696, "learning_rate": 1.3337577686772252e-05, "loss": 0.215, "step": 8924 }, { "epoch": 0.41, "grad_norm": 0.2971459010138684, "learning_rate": 1.3336175049570646e-05, "loss": 0.3198, "step": 8925 }, { "epoch": 0.41, "grad_norm": 0.8949721688818129, "learning_rate": 1.3334772338509341e-05, "loss": 0.5932, "step": 8926 }, { "epoch": 0.41, "grad_norm": 0.3556011302631098, "learning_rate": 1.3333369553619388e-05, "loss": 0.2007, "step": 8927 }, { "epoch": 0.41, "grad_norm": 0.2767542376674371, "learning_rate": 1.333196669493185e-05, "loss": 0.2055, "step": 8928 }, { "epoch": 0.41, "grad_norm": 0.3832328229036367, "learning_rate": 1.333056376247778e-05, "loss": 0.3078, "step": 8929 }, { "epoch": 0.41, "grad_norm": 0.3833514183119835, "learning_rate": 1.3329160756288237e-05, "loss": 0.2567, "step": 8930 }, { "epoch": 0.41, "grad_norm": 0.4874578743272871, "learning_rate": 1.3327757676394284e-05, "loss": 0.3634, "step": 8931 }, { "epoch": 0.41, "grad_norm": 1.1772221293806329, "learning_rate": 1.3326354522826983e-05, "loss": 0.7295, "step": 8932 }, { "epoch": 0.41, "grad_norm": 0.3060047758006496, "learning_rate": 1.3324951295617398e-05, "loss": 0.2648, "step": 8933 }, { "epoch": 0.41, "grad_norm": 0.4279310065372146, "learning_rate": 1.3323547994796597e-05, "loss": 0.2496, "step": 8934 }, { "epoch": 0.41, "grad_norm": 0.4266465051863254, "learning_rate": 1.3322144620395648e-05, "loss": 0.2212, "step": 8935 }, { "epoch": 0.41, "grad_norm": 0.5185478312006149, "learning_rate": 1.3320741172445616e-05, "loss": 0.3454, "step": 8936 }, { "epoch": 0.41, "grad_norm": 0.3129177121334974, "learning_rate": 1.3319337650977579e-05, "loss": 0.2555, "step": 8937 }, { "epoch": 0.41, "grad_norm": 1.3915010247032102, "learning_rate": 1.3317934056022603e-05, "loss": 0.8919, "step": 8938 }, { "epoch": 0.41, "grad_norm": 0.8352484302333893, "learning_rate": 1.3316530387611766e-05, "loss": 0.4441, "step": 8939 }, { "epoch": 0.41, "grad_norm": 0.2898286228083498, "learning_rate": 1.331512664577614e-05, "loss": 0.1757, "step": 8940 }, { "epoch": 0.41, "grad_norm": 0.3249272638365113, "learning_rate": 1.331372283054681e-05, "loss": 0.2975, "step": 8941 }, { "epoch": 0.41, "grad_norm": 0.6108816346014615, "learning_rate": 1.3312318941954846e-05, "loss": 0.3701, "step": 8942 }, { "epoch": 0.41, "grad_norm": 0.3740221654399229, "learning_rate": 1.3310914980031335e-05, "loss": 0.234, "step": 8943 }, { "epoch": 0.41, "grad_norm": 1.3205964361097111, "learning_rate": 1.3309510944807355e-05, "loss": 0.8254, "step": 8944 }, { "epoch": 0.41, "grad_norm": 0.341928256271378, "learning_rate": 1.3308106836313996e-05, "loss": 0.2684, "step": 8945 }, { "epoch": 0.41, "grad_norm": 0.4637535038594764, "learning_rate": 1.330670265458234e-05, "loss": 0.3169, "step": 8946 }, { "epoch": 0.41, "grad_norm": 0.8777589709084394, "learning_rate": 1.3305298399643474e-05, "loss": 0.362, "step": 8947 }, { "epoch": 0.41, "grad_norm": 0.28814130025307433, "learning_rate": 1.3303894071528485e-05, "loss": 0.2245, "step": 8948 }, { "epoch": 0.41, "grad_norm": 0.31895025537374105, "learning_rate": 1.3302489670268466e-05, "loss": 0.2438, "step": 8949 }, { "epoch": 0.41, "grad_norm": 0.9849895134683403, "learning_rate": 1.3301085195894507e-05, "loss": 0.5536, "step": 8950 }, { "epoch": 0.41, "grad_norm": 0.6518251820710435, "learning_rate": 1.3299680648437707e-05, "loss": 0.3751, "step": 8951 }, { "epoch": 0.41, "grad_norm": 0.4527310430043478, "learning_rate": 1.3298276027929158e-05, "loss": 0.3278, "step": 8952 }, { "epoch": 0.41, "grad_norm": 0.5137272781195599, "learning_rate": 1.3296871334399955e-05, "loss": 0.2968, "step": 8953 }, { "epoch": 0.41, "grad_norm": 0.2697688705554102, "learning_rate": 1.3295466567881198e-05, "loss": 0.169, "step": 8954 }, { "epoch": 0.41, "grad_norm": 0.40870246536529564, "learning_rate": 1.3294061728403986e-05, "loss": 0.2903, "step": 8955 }, { "epoch": 0.41, "grad_norm": 0.6238460702430969, "learning_rate": 1.3292656815999426e-05, "loss": 0.4113, "step": 8956 }, { "epoch": 0.41, "grad_norm": 0.4140332092046376, "learning_rate": 1.3291251830698615e-05, "loss": 0.242, "step": 8957 }, { "epoch": 0.41, "grad_norm": 0.41323845411892096, "learning_rate": 1.3289846772532663e-05, "loss": 0.3406, "step": 8958 }, { "epoch": 0.41, "grad_norm": 0.6255248437792479, "learning_rate": 1.328844164153267e-05, "loss": 0.4566, "step": 8959 }, { "epoch": 0.41, "grad_norm": 0.2921177105939241, "learning_rate": 1.3287036437729753e-05, "loss": 0.1713, "step": 8960 }, { "epoch": 0.41, "grad_norm": 0.27839447788952537, "learning_rate": 1.3285631161155013e-05, "loss": 0.2628, "step": 8961 }, { "epoch": 0.41, "grad_norm": 1.2975321286625572, "learning_rate": 1.3284225811839568e-05, "loss": 0.8504, "step": 8962 }, { "epoch": 0.41, "grad_norm": 0.521140196841499, "learning_rate": 1.3282820389814527e-05, "loss": 0.2734, "step": 8963 }, { "epoch": 0.41, "grad_norm": 0.3842503400147209, "learning_rate": 1.3281414895111011e-05, "loss": 0.2814, "step": 8964 }, { "epoch": 0.41, "grad_norm": 0.5213603419034515, "learning_rate": 1.3280009327760129e-05, "loss": 0.3707, "step": 8965 }, { "epoch": 0.41, "grad_norm": 0.4221994415334029, "learning_rate": 1.3278603687793003e-05, "loss": 0.1708, "step": 8966 }, { "epoch": 0.41, "grad_norm": 0.3107497415006731, "learning_rate": 1.327719797524075e-05, "loss": 0.2412, "step": 8967 }, { "epoch": 0.41, "grad_norm": 0.5167556919738907, "learning_rate": 1.3275792190134493e-05, "loss": 0.389, "step": 8968 }, { "epoch": 0.41, "grad_norm": 0.35225293364639143, "learning_rate": 1.3274386332505353e-05, "loss": 0.2446, "step": 8969 }, { "epoch": 0.41, "grad_norm": 0.3991350944388709, "learning_rate": 1.3272980402384459e-05, "loss": 0.2457, "step": 8970 }, { "epoch": 0.41, "grad_norm": 0.6628228444375425, "learning_rate": 1.3271574399802931e-05, "loss": 0.4298, "step": 8971 }, { "epoch": 0.41, "grad_norm": 0.3757419282214292, "learning_rate": 1.3270168324791896e-05, "loss": 0.3108, "step": 8972 }, { "epoch": 0.41, "grad_norm": 0.25862282789655244, "learning_rate": 1.3268762177382492e-05, "loss": 0.1749, "step": 8973 }, { "epoch": 0.41, "grad_norm": 0.5382103070163592, "learning_rate": 1.3267355957605839e-05, "loss": 0.3864, "step": 8974 }, { "epoch": 0.41, "grad_norm": 0.9606969651938579, "learning_rate": 1.3265949665493077e-05, "loss": 0.4271, "step": 8975 }, { "epoch": 0.41, "grad_norm": 0.30954799608278394, "learning_rate": 1.3264543301075336e-05, "loss": 0.2379, "step": 8976 }, { "epoch": 0.41, "grad_norm": 0.4569552160956904, "learning_rate": 1.3263136864383755e-05, "loss": 0.3555, "step": 8977 }, { "epoch": 0.41, "grad_norm": 1.2361221481596918, "learning_rate": 1.3261730355449464e-05, "loss": 0.6905, "step": 8978 }, { "epoch": 0.41, "grad_norm": 0.32699749434924574, "learning_rate": 1.3260323774303612e-05, "loss": 0.225, "step": 8979 }, { "epoch": 0.41, "grad_norm": 0.5436810652585612, "learning_rate": 1.3258917120977327e-05, "loss": 0.4361, "step": 8980 }, { "epoch": 0.41, "grad_norm": 0.41270259371223267, "learning_rate": 1.3257510395501766e-05, "loss": 0.2856, "step": 8981 }, { "epoch": 0.41, "grad_norm": 0.328897229364264, "learning_rate": 1.325610359790806e-05, "loss": 0.2705, "step": 8982 }, { "epoch": 0.41, "grad_norm": 0.31606888868494354, "learning_rate": 1.325469672822736e-05, "loss": 0.1124, "step": 8983 }, { "epoch": 0.41, "grad_norm": 0.4166250327652007, "learning_rate": 1.3253289786490812e-05, "loss": 0.3023, "step": 8984 }, { "epoch": 0.41, "grad_norm": 0.3704296370329955, "learning_rate": 1.325188277272956e-05, "loss": 0.2855, "step": 8985 }, { "epoch": 0.41, "grad_norm": 0.8221563950085035, "learning_rate": 1.3250475686974762e-05, "loss": 0.3869, "step": 8986 }, { "epoch": 0.41, "grad_norm": 0.4896714148057406, "learning_rate": 1.3249068529257562e-05, "loss": 0.295, "step": 8987 }, { "epoch": 0.41, "grad_norm": 0.38847287241181544, "learning_rate": 1.324766129960912e-05, "loss": 0.3066, "step": 8988 }, { "epoch": 0.41, "grad_norm": 0.26426543681400977, "learning_rate": 1.3246253998060584e-05, "loss": 0.2125, "step": 8989 }, { "epoch": 0.41, "grad_norm": 0.7794595006630068, "learning_rate": 1.3244846624643115e-05, "loss": 0.4922, "step": 8990 }, { "epoch": 0.41, "grad_norm": 0.3946328849607798, "learning_rate": 1.3243439179387867e-05, "loss": 0.2861, "step": 8991 }, { "epoch": 0.41, "grad_norm": 0.4065140732351681, "learning_rate": 1.3242031662326003e-05, "loss": 0.2953, "step": 8992 }, { "epoch": 0.41, "grad_norm": 0.7482514676758332, "learning_rate": 1.324062407348868e-05, "loss": 0.427, "step": 8993 }, { "epoch": 0.41, "grad_norm": 0.3422742743190214, "learning_rate": 1.3239216412907068e-05, "loss": 0.2555, "step": 8994 }, { "epoch": 0.41, "grad_norm": 0.3789458853092186, "learning_rate": 1.3237808680612323e-05, "loss": 0.2641, "step": 8995 }, { "epoch": 0.41, "grad_norm": 0.3639989799434384, "learning_rate": 1.3236400876635616e-05, "loss": 0.2484, "step": 8996 }, { "epoch": 0.41, "grad_norm": 0.35703365534003695, "learning_rate": 1.3234993001008112e-05, "loss": 0.27, "step": 8997 }, { "epoch": 0.41, "grad_norm": 0.8437915356119837, "learning_rate": 1.3233585053760982e-05, "loss": 0.4841, "step": 8998 }, { "epoch": 0.41, "grad_norm": 0.7005382855928975, "learning_rate": 1.3232177034925395e-05, "loss": 0.3746, "step": 8999 }, { "epoch": 0.41, "grad_norm": 0.3236407265419388, "learning_rate": 1.3230768944532526e-05, "loss": 0.2606, "step": 9000 }, { "epoch": 0.41, "grad_norm": 0.28562255660616803, "learning_rate": 1.3229360782613543e-05, "loss": 0.2207, "step": 9001 }, { "epoch": 0.41, "grad_norm": 0.8322587227320745, "learning_rate": 1.3227952549199625e-05, "loss": 0.41, "step": 9002 }, { "epoch": 0.41, "grad_norm": 0.4007713361278303, "learning_rate": 1.322654424432195e-05, "loss": 0.2708, "step": 9003 }, { "epoch": 0.41, "grad_norm": 0.4753209483485224, "learning_rate": 1.3225135868011694e-05, "loss": 0.3227, "step": 9004 }, { "epoch": 0.41, "grad_norm": 0.9340170792592087, "learning_rate": 1.322372742030004e-05, "loss": 0.395, "step": 9005 }, { "epoch": 0.41, "grad_norm": 0.42288668978760635, "learning_rate": 1.3222318901218168e-05, "loss": 0.2859, "step": 9006 }, { "epoch": 0.41, "grad_norm": 0.2874551564156162, "learning_rate": 1.3220910310797259e-05, "loss": 0.1942, "step": 9007 }, { "epoch": 0.41, "grad_norm": 0.42457515340392443, "learning_rate": 1.3219501649068502e-05, "loss": 0.3277, "step": 9008 }, { "epoch": 0.41, "grad_norm": 0.3611574555598921, "learning_rate": 1.3218092916063081e-05, "loss": 0.1983, "step": 9009 }, { "epoch": 0.41, "grad_norm": 1.037041625603372, "learning_rate": 1.3216684111812184e-05, "loss": 0.4415, "step": 9010 }, { "epoch": 0.41, "grad_norm": 1.1366824801234976, "learning_rate": 1.3215275236347002e-05, "loss": 0.6449, "step": 9011 }, { "epoch": 0.41, "grad_norm": 0.30405615441726225, "learning_rate": 1.3213866289698725e-05, "loss": 0.2213, "step": 9012 }, { "epoch": 0.41, "grad_norm": 0.2733217459002581, "learning_rate": 1.3212457271898545e-05, "loss": 0.2317, "step": 9013 }, { "epoch": 0.41, "grad_norm": 1.6052998956694644, "learning_rate": 1.3211048182977657e-05, "loss": 0.7396, "step": 9014 }, { "epoch": 0.41, "grad_norm": 0.47472761337952085, "learning_rate": 1.3209639022967257e-05, "loss": 0.2349, "step": 9015 }, { "epoch": 0.41, "grad_norm": 0.46122250480679783, "learning_rate": 1.3208229791898544e-05, "loss": 0.3561, "step": 9016 }, { "epoch": 0.41, "grad_norm": 1.212655520943246, "learning_rate": 1.3206820489802716e-05, "loss": 0.7041, "step": 9017 }, { "epoch": 0.41, "grad_norm": 0.35926243825252124, "learning_rate": 1.3205411116710973e-05, "loss": 0.1941, "step": 9018 }, { "epoch": 0.41, "grad_norm": 0.3021669973641154, "learning_rate": 1.3204001672654514e-05, "loss": 0.183, "step": 9019 }, { "epoch": 0.41, "grad_norm": 0.3955366758126534, "learning_rate": 1.3202592157664549e-05, "loss": 0.3261, "step": 9020 }, { "epoch": 0.41, "grad_norm": 0.3686715990911606, "learning_rate": 1.320118257177228e-05, "loss": 0.2779, "step": 9021 }, { "epoch": 0.41, "grad_norm": 0.9016014570712456, "learning_rate": 1.3199772915008912e-05, "loss": 0.3908, "step": 9022 }, { "epoch": 0.41, "grad_norm": 0.511404004774544, "learning_rate": 1.3198363187405661e-05, "loss": 0.4023, "step": 9023 }, { "epoch": 0.41, "grad_norm": 0.42377473910471614, "learning_rate": 1.3196953388993727e-05, "loss": 0.3006, "step": 9024 }, { "epoch": 0.41, "grad_norm": 0.22634842551984938, "learning_rate": 1.3195543519804327e-05, "loss": 0.1648, "step": 9025 }, { "epoch": 0.41, "grad_norm": 0.6971480296711122, "learning_rate": 1.3194133579868672e-05, "loss": 0.4696, "step": 9026 }, { "epoch": 0.41, "grad_norm": 0.5921164469941854, "learning_rate": 1.319272356921798e-05, "loss": 0.3463, "step": 9027 }, { "epoch": 0.41, "grad_norm": 0.31370138563369404, "learning_rate": 1.3191313487883465e-05, "loss": 0.2473, "step": 9028 }, { "epoch": 0.41, "grad_norm": 1.1788590339276992, "learning_rate": 1.3189903335896345e-05, "loss": 0.6487, "step": 9029 }, { "epoch": 0.41, "grad_norm": 0.44322783386724995, "learning_rate": 1.3188493113287841e-05, "loss": 0.2771, "step": 9030 }, { "epoch": 0.41, "grad_norm": 0.25340010044512534, "learning_rate": 1.3187082820089172e-05, "loss": 0.1785, "step": 9031 }, { "epoch": 0.41, "grad_norm": 0.48904608267518923, "learning_rate": 1.318567245633156e-05, "loss": 0.3818, "step": 9032 }, { "epoch": 0.41, "grad_norm": 0.40949237155104234, "learning_rate": 1.3184262022046233e-05, "loss": 0.3392, "step": 9033 }, { "epoch": 0.42, "grad_norm": 0.5395896984840701, "learning_rate": 1.318285151726441e-05, "loss": 0.4071, "step": 9034 }, { "epoch": 0.42, "grad_norm": 0.49329290235317624, "learning_rate": 1.3181440942017325e-05, "loss": 0.289, "step": 9035 }, { "epoch": 0.42, "grad_norm": 0.3437047905679964, "learning_rate": 1.31800302963362e-05, "loss": 0.2698, "step": 9036 }, { "epoch": 0.42, "grad_norm": 0.6136823663012815, "learning_rate": 1.3178619580252275e-05, "loss": 0.3834, "step": 9037 }, { "epoch": 0.42, "grad_norm": 0.3333903952554828, "learning_rate": 1.3177208793796772e-05, "loss": 0.2159, "step": 9038 }, { "epoch": 0.42, "grad_norm": 0.4208993055674658, "learning_rate": 1.317579793700093e-05, "loss": 0.2742, "step": 9039 }, { "epoch": 0.42, "grad_norm": 0.45215343136563835, "learning_rate": 1.317438700989598e-05, "loss": 0.297, "step": 9040 }, { "epoch": 0.42, "grad_norm": 0.7606301033542168, "learning_rate": 1.3172976012513165e-05, "loss": 0.4487, "step": 9041 }, { "epoch": 0.42, "grad_norm": 0.6217067865690995, "learning_rate": 1.3171564944883717e-05, "loss": 0.336, "step": 9042 }, { "epoch": 0.42, "grad_norm": 0.3856566763331818, "learning_rate": 1.3170153807038878e-05, "loss": 0.3241, "step": 9043 }, { "epoch": 0.42, "grad_norm": 0.2831202508617231, "learning_rate": 1.3168742599009892e-05, "loss": 0.2465, "step": 9044 }, { "epoch": 0.42, "grad_norm": 0.3173145897046266, "learning_rate": 1.3167331320827994e-05, "loss": 0.1695, "step": 9045 }, { "epoch": 0.42, "grad_norm": 0.44482702383221334, "learning_rate": 1.3165919972524437e-05, "loss": 0.3378, "step": 9046 }, { "epoch": 0.42, "grad_norm": 0.5090139106707258, "learning_rate": 1.3164508554130461e-05, "loss": 0.3617, "step": 9047 }, { "epoch": 0.42, "grad_norm": 0.38552243847284595, "learning_rate": 1.3163097065677316e-05, "loss": 0.2134, "step": 9048 }, { "epoch": 0.42, "grad_norm": 0.41395654200245086, "learning_rate": 1.3161685507196251e-05, "loss": 0.3414, "step": 9049 }, { "epoch": 0.42, "grad_norm": 0.6687275341297567, "learning_rate": 1.3160273878718516e-05, "loss": 0.4311, "step": 9050 }, { "epoch": 0.42, "grad_norm": 0.20731555804597276, "learning_rate": 1.3158862180275362e-05, "loss": 0.1229, "step": 9051 }, { "epoch": 0.42, "grad_norm": 0.3201130342557867, "learning_rate": 1.3157450411898047e-05, "loss": 0.2847, "step": 9052 }, { "epoch": 0.42, "grad_norm": 1.2516016246283954, "learning_rate": 1.3156038573617822e-05, "loss": 0.6587, "step": 9053 }, { "epoch": 0.42, "grad_norm": 0.33043805948141686, "learning_rate": 1.3154626665465947e-05, "loss": 0.2204, "step": 9054 }, { "epoch": 0.42, "grad_norm": 0.5255089380781177, "learning_rate": 1.3153214687473673e-05, "loss": 0.3812, "step": 9055 }, { "epoch": 0.42, "grad_norm": 0.3916441191377418, "learning_rate": 1.3151802639672267e-05, "loss": 0.3433, "step": 9056 }, { "epoch": 0.42, "grad_norm": 0.24695801472687676, "learning_rate": 1.3150390522092987e-05, "loss": 0.1647, "step": 9057 }, { "epoch": 0.42, "grad_norm": 0.5439125914026678, "learning_rate": 1.3148978334767101e-05, "loss": 0.3825, "step": 9058 }, { "epoch": 0.42, "grad_norm": 0.30476705087573086, "learning_rate": 1.3147566077725869e-05, "loss": 0.274, "step": 9059 }, { "epoch": 0.42, "grad_norm": 0.6579463224877468, "learning_rate": 1.3146153751000554e-05, "loss": 0.3945, "step": 9060 }, { "epoch": 0.42, "grad_norm": 0.3498031954752658, "learning_rate": 1.314474135462243e-05, "loss": 0.2511, "step": 9061 }, { "epoch": 0.42, "grad_norm": 0.914907492923861, "learning_rate": 1.3143328888622761e-05, "loss": 0.5218, "step": 9062 }, { "epoch": 0.42, "grad_norm": 0.45569315747089423, "learning_rate": 1.3141916353032822e-05, "loss": 0.3481, "step": 9063 }, { "epoch": 0.42, "grad_norm": 0.2901017738049763, "learning_rate": 1.3140503747883884e-05, "loss": 0.2382, "step": 9064 }, { "epoch": 0.42, "grad_norm": 0.2517876353948198, "learning_rate": 1.313909107320722e-05, "loss": 0.1549, "step": 9065 }, { "epoch": 0.42, "grad_norm": 0.7543588272489495, "learning_rate": 1.3137678329034103e-05, "loss": 0.3788, "step": 9066 }, { "epoch": 0.42, "grad_norm": 0.30092046058393357, "learning_rate": 1.3136265515395812e-05, "loss": 0.2467, "step": 9067 }, { "epoch": 0.42, "grad_norm": 0.4747967146662547, "learning_rate": 1.3134852632323625e-05, "loss": 0.3632, "step": 9068 }, { "epoch": 0.42, "grad_norm": 0.8241360203770558, "learning_rate": 1.3133439679848824e-05, "loss": 0.4519, "step": 9069 }, { "epoch": 0.42, "grad_norm": 0.27199366746236864, "learning_rate": 1.3132026658002688e-05, "loss": 0.1864, "step": 9070 }, { "epoch": 0.42, "grad_norm": 0.3546102812268334, "learning_rate": 1.3130613566816501e-05, "loss": 0.2332, "step": 9071 }, { "epoch": 0.42, "grad_norm": 0.5070129459303292, "learning_rate": 1.3129200406321545e-05, "loss": 0.2983, "step": 9072 }, { "epoch": 0.42, "grad_norm": 0.4609219149436638, "learning_rate": 1.312778717654911e-05, "loss": 0.3297, "step": 9073 }, { "epoch": 0.42, "grad_norm": 0.875544674467338, "learning_rate": 1.312637387753048e-05, "loss": 0.3444, "step": 9074 }, { "epoch": 0.42, "grad_norm": 0.3049291187219597, "learning_rate": 1.3124960509296945e-05, "loss": 0.2663, "step": 9075 }, { "epoch": 0.42, "grad_norm": 0.42384806756131305, "learning_rate": 1.3123547071879801e-05, "loss": 0.3256, "step": 9076 }, { "epoch": 0.42, "grad_norm": 0.44608303538832583, "learning_rate": 1.312213356531033e-05, "loss": 0.2501, "step": 9077 }, { "epoch": 0.42, "grad_norm": 0.4496466565786788, "learning_rate": 1.3120719989619832e-05, "loss": 0.2731, "step": 9078 }, { "epoch": 0.42, "grad_norm": 0.3987297095840659, "learning_rate": 1.3119306344839601e-05, "loss": 0.2838, "step": 9079 }, { "epoch": 0.42, "grad_norm": 0.3903273144247722, "learning_rate": 1.3117892631000936e-05, "loss": 0.2726, "step": 9080 }, { "epoch": 0.42, "grad_norm": 0.7691705027011358, "learning_rate": 1.311647884813513e-05, "loss": 0.4326, "step": 9081 }, { "epoch": 0.42, "grad_norm": 0.39006601073096353, "learning_rate": 1.3115064996273492e-05, "loss": 0.3233, "step": 9082 }, { "epoch": 0.42, "grad_norm": 0.38396612125624047, "learning_rate": 1.311365107544731e-05, "loss": 0.3196, "step": 9083 }, { "epoch": 0.42, "grad_norm": 0.3762054034873624, "learning_rate": 1.31122370856879e-05, "loss": 0.1806, "step": 9084 }, { "epoch": 0.42, "grad_norm": 0.30564800900531475, "learning_rate": 1.3110823027026558e-05, "loss": 0.2373, "step": 9085 }, { "epoch": 0.42, "grad_norm": 1.718601985082989, "learning_rate": 1.310940889949459e-05, "loss": 0.7979, "step": 9086 }, { "epoch": 0.42, "grad_norm": 0.3381244216576654, "learning_rate": 1.3107994703123312e-05, "loss": 0.2444, "step": 9087 }, { "epoch": 0.42, "grad_norm": 0.42401327681745565, "learning_rate": 1.3106580437944023e-05, "loss": 0.3151, "step": 9088 }, { "epoch": 0.42, "grad_norm": 0.6727120921260503, "learning_rate": 1.310516610398804e-05, "loss": 0.441, "step": 9089 }, { "epoch": 0.42, "grad_norm": 0.26345175681186583, "learning_rate": 1.3103751701286667e-05, "loss": 0.179, "step": 9090 }, { "epoch": 0.42, "grad_norm": 0.4211460415658565, "learning_rate": 1.3102337229871224e-05, "loss": 0.2995, "step": 9091 }, { "epoch": 0.42, "grad_norm": 0.44217693251947277, "learning_rate": 1.3100922689773028e-05, "loss": 0.3232, "step": 9092 }, { "epoch": 0.42, "grad_norm": 0.36679176236124206, "learning_rate": 1.3099508081023391e-05, "loss": 0.1986, "step": 9093 }, { "epoch": 0.42, "grad_norm": 0.4568985063735754, "learning_rate": 1.309809340365363e-05, "loss": 0.3479, "step": 9094 }, { "epoch": 0.42, "grad_norm": 0.3707018228039586, "learning_rate": 1.3096678657695072e-05, "loss": 0.3172, "step": 9095 }, { "epoch": 0.42, "grad_norm": 0.8199785924300981, "learning_rate": 1.3095263843179029e-05, "loss": 0.5059, "step": 9096 }, { "epoch": 0.42, "grad_norm": 0.24773077829773296, "learning_rate": 1.309384896013683e-05, "loss": 0.1649, "step": 9097 }, { "epoch": 0.42, "grad_norm": 0.49796003383230464, "learning_rate": 1.3092434008599795e-05, "loss": 0.2792, "step": 9098 }, { "epoch": 0.42, "grad_norm": 0.4055082678909088, "learning_rate": 1.3091018988599254e-05, "loss": 0.3154, "step": 9099 }, { "epoch": 0.42, "grad_norm": 0.38677445096568425, "learning_rate": 1.308960390016653e-05, "loss": 0.232, "step": 9100 }, { "epoch": 0.42, "grad_norm": 0.8059031550255847, "learning_rate": 1.3088188743332955e-05, "loss": 0.4787, "step": 9101 }, { "epoch": 0.42, "grad_norm": 0.4813228173866095, "learning_rate": 1.3086773518129853e-05, "loss": 0.266, "step": 9102 }, { "epoch": 0.42, "grad_norm": 0.2710552364501726, "learning_rate": 1.3085358224588565e-05, "loss": 0.2299, "step": 9103 }, { "epoch": 0.42, "grad_norm": 0.5454247305922598, "learning_rate": 1.308394286274042e-05, "loss": 0.2701, "step": 9104 }, { "epoch": 0.42, "grad_norm": 0.8539115277521903, "learning_rate": 1.308252743261675e-05, "loss": 0.528, "step": 9105 }, { "epoch": 0.42, "grad_norm": 0.2960486453416966, "learning_rate": 1.3081111934248895e-05, "loss": 0.2383, "step": 9106 }, { "epoch": 0.42, "grad_norm": 0.3864224262719897, "learning_rate": 1.3079696367668192e-05, "loss": 0.3131, "step": 9107 }, { "epoch": 0.42, "grad_norm": 0.9678461363381875, "learning_rate": 1.3078280732905976e-05, "loss": 0.6467, "step": 9108 }, { "epoch": 0.42, "grad_norm": 0.3844811970569384, "learning_rate": 1.3076865029993595e-05, "loss": 0.2839, "step": 9109 }, { "epoch": 0.42, "grad_norm": 0.21323739682973292, "learning_rate": 1.3075449258962384e-05, "loss": 0.091, "step": 9110 }, { "epoch": 0.42, "grad_norm": 0.3934755159192334, "learning_rate": 1.3074033419843697e-05, "loss": 0.3263, "step": 9111 }, { "epoch": 0.42, "grad_norm": 0.41849630898208734, "learning_rate": 1.3072617512668869e-05, "loss": 0.2617, "step": 9112 }, { "epoch": 0.42, "grad_norm": 0.5321544990901166, "learning_rate": 1.307120153746925e-05, "loss": 0.3266, "step": 9113 }, { "epoch": 0.42, "grad_norm": 0.4980210889786207, "learning_rate": 1.306978549427619e-05, "loss": 0.3316, "step": 9114 }, { "epoch": 0.42, "grad_norm": 0.34847259181197665, "learning_rate": 1.3068369383121036e-05, "loss": 0.2571, "step": 9115 }, { "epoch": 0.42, "grad_norm": 0.24092964261768782, "learning_rate": 1.3066953204035145e-05, "loss": 0.1592, "step": 9116 }, { "epoch": 0.42, "grad_norm": 0.6700819670552765, "learning_rate": 1.3065536957049863e-05, "loss": 0.4273, "step": 9117 }, { "epoch": 0.42, "grad_norm": 0.3582286079732149, "learning_rate": 1.3064120642196549e-05, "loss": 0.2809, "step": 9118 }, { "epoch": 0.42, "grad_norm": 0.3687328280600011, "learning_rate": 1.3062704259506559e-05, "loss": 0.288, "step": 9119 }, { "epoch": 0.42, "grad_norm": 1.6245068934609712, "learning_rate": 1.3061287809011243e-05, "loss": 0.6703, "step": 9120 }, { "epoch": 0.42, "grad_norm": 0.3897479416867728, "learning_rate": 1.3059871290741968e-05, "loss": 0.2855, "step": 9121 }, { "epoch": 0.42, "grad_norm": 0.3384079666272606, "learning_rate": 1.3058454704730092e-05, "loss": 0.1815, "step": 9122 }, { "epoch": 0.42, "grad_norm": 0.40188875530537144, "learning_rate": 1.305703805100698e-05, "loss": 0.292, "step": 9123 }, { "epoch": 0.42, "grad_norm": 0.37598848497130544, "learning_rate": 1.3055621329603988e-05, "loss": 0.269, "step": 9124 }, { "epoch": 0.42, "grad_norm": 1.4317095788728902, "learning_rate": 1.3054204540552483e-05, "loss": 0.8335, "step": 9125 }, { "epoch": 0.42, "grad_norm": 0.36952538033272525, "learning_rate": 1.3052787683883837e-05, "loss": 0.2457, "step": 9126 }, { "epoch": 0.42, "grad_norm": 0.3904765445753142, "learning_rate": 1.3051370759629411e-05, "loss": 0.2879, "step": 9127 }, { "epoch": 0.42, "grad_norm": 0.3172336014665754, "learning_rate": 1.3049953767820583e-05, "loss": 0.18, "step": 9128 }, { "epoch": 0.42, "grad_norm": 0.6173133042736832, "learning_rate": 1.3048536708488712e-05, "loss": 0.3628, "step": 9129 }, { "epoch": 0.42, "grad_norm": 0.4559953869090191, "learning_rate": 1.304711958166518e-05, "loss": 0.2933, "step": 9130 }, { "epoch": 0.42, "grad_norm": 0.3590272121156264, "learning_rate": 1.3045702387381355e-05, "loss": 0.3395, "step": 9131 }, { "epoch": 0.42, "grad_norm": 0.9009812412257739, "learning_rate": 1.3044285125668614e-05, "loss": 0.3408, "step": 9132 }, { "epoch": 0.42, "grad_norm": 0.47319866934921134, "learning_rate": 1.3042867796558338e-05, "loss": 0.298, "step": 9133 }, { "epoch": 0.42, "grad_norm": 0.4806836273199474, "learning_rate": 1.3041450400081901e-05, "loss": 0.2924, "step": 9134 }, { "epoch": 0.42, "grad_norm": 0.3712659723457226, "learning_rate": 1.3040032936270683e-05, "loss": 0.309, "step": 9135 }, { "epoch": 0.42, "grad_norm": 0.23668364509236967, "learning_rate": 1.3038615405156066e-05, "loss": 0.1738, "step": 9136 }, { "epoch": 0.42, "grad_norm": 1.3518357107034507, "learning_rate": 1.3037197806769429e-05, "loss": 0.7925, "step": 9137 }, { "epoch": 0.42, "grad_norm": 1.1133863622803866, "learning_rate": 1.3035780141142164e-05, "loss": 0.6159, "step": 9138 }, { "epoch": 0.42, "grad_norm": 0.2867399540529521, "learning_rate": 1.303436240830565e-05, "loss": 0.2244, "step": 9139 }, { "epoch": 0.42, "grad_norm": 0.7665825500675789, "learning_rate": 1.3032944608291279e-05, "loss": 0.4874, "step": 9140 }, { "epoch": 0.42, "grad_norm": 0.34021334321100616, "learning_rate": 1.3031526741130435e-05, "loss": 0.2393, "step": 9141 }, { "epoch": 0.42, "grad_norm": 0.3753465199102772, "learning_rate": 1.3030108806854516e-05, "loss": 0.2207, "step": 9142 }, { "epoch": 0.42, "grad_norm": 0.41536192194990573, "learning_rate": 1.3028690805494901e-05, "loss": 0.3207, "step": 9143 }, { "epoch": 0.42, "grad_norm": 0.9797439368270568, "learning_rate": 1.3027272737082997e-05, "loss": 0.6143, "step": 9144 }, { "epoch": 0.42, "grad_norm": 0.327025625133217, "learning_rate": 1.3025854601650187e-05, "loss": 0.2013, "step": 9145 }, { "epoch": 0.42, "grad_norm": 0.6735357941268292, "learning_rate": 1.3024436399227877e-05, "loss": 0.4301, "step": 9146 }, { "epoch": 0.42, "grad_norm": 0.2586462509406168, "learning_rate": 1.3023018129847459e-05, "loss": 0.1995, "step": 9147 }, { "epoch": 0.42, "grad_norm": 0.6068403340114009, "learning_rate": 1.3021599793540335e-05, "loss": 0.3291, "step": 9148 }, { "epoch": 0.42, "grad_norm": 0.4117029347481617, "learning_rate": 1.30201813903379e-05, "loss": 0.2746, "step": 9149 }, { "epoch": 0.42, "grad_norm": 0.39771834131571226, "learning_rate": 1.3018762920271559e-05, "loss": 0.3067, "step": 9150 }, { "epoch": 0.42, "grad_norm": 0.5819094996013092, "learning_rate": 1.3017344383372721e-05, "loss": 0.3505, "step": 9151 }, { "epoch": 0.42, "grad_norm": 0.39513389438569263, "learning_rate": 1.3015925779672784e-05, "loss": 0.2916, "step": 9152 }, { "epoch": 0.42, "grad_norm": 0.5005286404328148, "learning_rate": 1.301450710920316e-05, "loss": 0.2897, "step": 9153 }, { "epoch": 0.42, "grad_norm": 0.424679186075288, "learning_rate": 1.301308837199525e-05, "loss": 0.3035, "step": 9154 }, { "epoch": 0.42, "grad_norm": 0.27621476396891415, "learning_rate": 1.3011669568080469e-05, "loss": 0.2136, "step": 9155 }, { "epoch": 0.42, "grad_norm": 1.309873643197906, "learning_rate": 1.3010250697490225e-05, "loss": 0.7549, "step": 9156 }, { "epoch": 0.42, "grad_norm": 0.524616900068898, "learning_rate": 1.3008831760255933e-05, "loss": 0.3012, "step": 9157 }, { "epoch": 0.42, "grad_norm": 0.49302414551542845, "learning_rate": 1.3007412756409009e-05, "loss": 0.285, "step": 9158 }, { "epoch": 0.42, "grad_norm": 0.4065408395581251, "learning_rate": 1.3005993685980862e-05, "loss": 0.3263, "step": 9159 }, { "epoch": 0.42, "grad_norm": 0.4202475079823806, "learning_rate": 1.300457454900291e-05, "loss": 0.2773, "step": 9160 }, { "epoch": 0.42, "grad_norm": 0.5290986590331028, "learning_rate": 1.3003155345506575e-05, "loss": 0.3457, "step": 9161 }, { "epoch": 0.42, "grad_norm": 0.2791501826422814, "learning_rate": 1.3001736075523277e-05, "loss": 0.1966, "step": 9162 }, { "epoch": 0.42, "grad_norm": 0.5913920665516836, "learning_rate": 1.3000316739084433e-05, "loss": 0.3711, "step": 9163 }, { "epoch": 0.42, "grad_norm": 0.43271288303366157, "learning_rate": 1.299889733622147e-05, "loss": 0.3207, "step": 9164 }, { "epoch": 0.42, "grad_norm": 0.895806232506536, "learning_rate": 1.299747786696581e-05, "loss": 0.4164, "step": 9165 }, { "epoch": 0.42, "grad_norm": 0.7309719822108601, "learning_rate": 1.299605833134888e-05, "loss": 0.3862, "step": 9166 }, { "epoch": 0.42, "grad_norm": 0.3249870916905667, "learning_rate": 1.2994638729402102e-05, "loss": 0.3045, "step": 9167 }, { "epoch": 0.42, "grad_norm": 0.35519963442672736, "learning_rate": 1.2993219061156914e-05, "loss": 0.2087, "step": 9168 }, { "epoch": 0.42, "grad_norm": 0.6788800371377359, "learning_rate": 1.2991799326644736e-05, "loss": 0.3557, "step": 9169 }, { "epoch": 0.42, "grad_norm": 0.38044301789460366, "learning_rate": 1.299037952589701e-05, "loss": 0.2897, "step": 9170 }, { "epoch": 0.42, "grad_norm": 0.5125383907599383, "learning_rate": 1.298895965894516e-05, "loss": 0.2873, "step": 9171 }, { "epoch": 0.42, "grad_norm": 0.6207478436073067, "learning_rate": 1.2987539725820624e-05, "loss": 0.3831, "step": 9172 }, { "epoch": 0.42, "grad_norm": 0.3220543021453989, "learning_rate": 1.2986119726554836e-05, "loss": 0.2414, "step": 9173 }, { "epoch": 0.42, "grad_norm": 0.5422675052890263, "learning_rate": 1.2984699661179238e-05, "loss": 0.3747, "step": 9174 }, { "epoch": 0.42, "grad_norm": 0.3314111570386824, "learning_rate": 1.2983279529725268e-05, "loss": 0.2078, "step": 9175 }, { "epoch": 0.42, "grad_norm": 0.4257128289906499, "learning_rate": 1.2981859332224362e-05, "loss": 0.3167, "step": 9176 }, { "epoch": 0.42, "grad_norm": 0.9338535350838162, "learning_rate": 1.2980439068707964e-05, "loss": 0.5831, "step": 9177 }, { "epoch": 0.42, "grad_norm": 0.321498461574755, "learning_rate": 1.2979018739207518e-05, "loss": 0.2257, "step": 9178 }, { "epoch": 0.42, "grad_norm": 0.39523796936286, "learning_rate": 1.297759834375447e-05, "loss": 0.3283, "step": 9179 }, { "epoch": 0.42, "grad_norm": 0.6338557855334457, "learning_rate": 1.297617788238026e-05, "loss": 0.4115, "step": 9180 }, { "epoch": 0.42, "grad_norm": 0.21045759130221833, "learning_rate": 1.2974757355116344e-05, "loss": 0.0737, "step": 9181 }, { "epoch": 0.42, "grad_norm": 0.3887189494908164, "learning_rate": 1.2973336761994168e-05, "loss": 0.2888, "step": 9182 }, { "epoch": 0.42, "grad_norm": 0.5567940460733649, "learning_rate": 1.297191610304518e-05, "loss": 0.3766, "step": 9183 }, { "epoch": 0.42, "grad_norm": 0.4884782477352658, "learning_rate": 1.2970495378300834e-05, "loss": 0.2632, "step": 9184 }, { "epoch": 0.42, "grad_norm": 0.38951540177713306, "learning_rate": 1.2969074587792583e-05, "loss": 0.3208, "step": 9185 }, { "epoch": 0.42, "grad_norm": 0.3699008799668862, "learning_rate": 1.2967653731551881e-05, "loss": 0.3473, "step": 9186 }, { "epoch": 0.42, "grad_norm": 0.4470966365802915, "learning_rate": 1.2966232809610189e-05, "loss": 0.2744, "step": 9187 }, { "epoch": 0.42, "grad_norm": 0.26081461929945565, "learning_rate": 1.2964811821998961e-05, "loss": 0.1676, "step": 9188 }, { "epoch": 0.42, "grad_norm": 1.8002601435944738, "learning_rate": 1.2963390768749655e-05, "loss": 0.8674, "step": 9189 }, { "epoch": 0.42, "grad_norm": 0.3572184913664882, "learning_rate": 1.2961969649893732e-05, "loss": 0.3244, "step": 9190 }, { "epoch": 0.42, "grad_norm": 0.35352476648188086, "learning_rate": 1.2960548465462658e-05, "loss": 0.2573, "step": 9191 }, { "epoch": 0.42, "grad_norm": 0.7518776254676474, "learning_rate": 1.2959127215487894e-05, "loss": 0.5121, "step": 9192 }, { "epoch": 0.42, "grad_norm": 0.28183382216949426, "learning_rate": 1.2957705900000907e-05, "loss": 0.1672, "step": 9193 }, { "epoch": 0.42, "grad_norm": 0.3042201610839729, "learning_rate": 1.2956284519033165e-05, "loss": 0.2327, "step": 9194 }, { "epoch": 0.42, "grad_norm": 0.4945094068591413, "learning_rate": 1.2954863072616127e-05, "loss": 0.4178, "step": 9195 }, { "epoch": 0.42, "grad_norm": 0.5972086658647482, "learning_rate": 1.295344156078127e-05, "loss": 0.4252, "step": 9196 }, { "epoch": 0.42, "grad_norm": 0.38583420616495356, "learning_rate": 1.2952019983560062e-05, "loss": 0.2876, "step": 9197 }, { "epoch": 0.42, "grad_norm": 0.34613346335715556, "learning_rate": 1.295059834098398e-05, "loss": 0.2926, "step": 9198 }, { "epoch": 0.42, "grad_norm": 0.3269169500766363, "learning_rate": 1.2949176633084494e-05, "loss": 0.1794, "step": 9199 }, { "epoch": 0.42, "grad_norm": 0.3320053570230296, "learning_rate": 1.294775485989308e-05, "loss": 0.2315, "step": 9200 }, { "epoch": 0.42, "grad_norm": 0.9209511929468019, "learning_rate": 1.2946333021441211e-05, "loss": 0.3717, "step": 9201 }, { "epoch": 0.42, "grad_norm": 0.35655274450370195, "learning_rate": 1.2944911117760372e-05, "loss": 0.2979, "step": 9202 }, { "epoch": 0.42, "grad_norm": 0.33188512255103086, "learning_rate": 1.2943489148882038e-05, "loss": 0.277, "step": 9203 }, { "epoch": 0.42, "grad_norm": 0.6369611405567255, "learning_rate": 1.294206711483769e-05, "loss": 0.3754, "step": 9204 }, { "epoch": 0.42, "grad_norm": 0.26858314289529717, "learning_rate": 1.2940645015658814e-05, "loss": 0.1571, "step": 9205 }, { "epoch": 0.42, "grad_norm": 0.459168709094042, "learning_rate": 1.2939222851376891e-05, "loss": 0.2715, "step": 9206 }, { "epoch": 0.42, "grad_norm": 0.435507454972667, "learning_rate": 1.2937800622023407e-05, "loss": 0.2786, "step": 9207 }, { "epoch": 0.42, "grad_norm": 0.6383914352932758, "learning_rate": 1.2936378327629849e-05, "loss": 0.4205, "step": 9208 }, { "epoch": 0.42, "grad_norm": 0.3834138532187214, "learning_rate": 1.2934955968227705e-05, "loss": 0.335, "step": 9209 }, { "epoch": 0.42, "grad_norm": 0.37856879915810826, "learning_rate": 1.2933533543848462e-05, "loss": 0.3254, "step": 9210 }, { "epoch": 0.42, "grad_norm": 0.2764526077080524, "learning_rate": 1.2932111054523615e-05, "loss": 0.1516, "step": 9211 }, { "epoch": 0.42, "grad_norm": 0.28489638212999746, "learning_rate": 1.2930688500284659e-05, "loss": 0.2403, "step": 9212 }, { "epoch": 0.42, "grad_norm": 0.8867247900480723, "learning_rate": 1.292926588116308e-05, "loss": 0.5108, "step": 9213 }, { "epoch": 0.42, "grad_norm": 0.3365305180228177, "learning_rate": 1.2927843197190377e-05, "loss": 0.2747, "step": 9214 }, { "epoch": 0.42, "grad_norm": 0.38319802896539457, "learning_rate": 1.2926420448398051e-05, "loss": 0.2953, "step": 9215 }, { "epoch": 0.42, "grad_norm": 1.0872457181583672, "learning_rate": 1.2924997634817593e-05, "loss": 0.7089, "step": 9216 }, { "epoch": 0.42, "grad_norm": 0.27469806576565337, "learning_rate": 1.2923574756480512e-05, "loss": 0.1937, "step": 9217 }, { "epoch": 0.42, "grad_norm": 0.36918583653454734, "learning_rate": 1.2922151813418298e-05, "loss": 0.2883, "step": 9218 }, { "epoch": 0.42, "grad_norm": 0.4498237502137872, "learning_rate": 1.2920728805662462e-05, "loss": 0.2928, "step": 9219 }, { "epoch": 0.42, "grad_norm": 1.0715695875812654, "learning_rate": 1.2919305733244503e-05, "loss": 0.3994, "step": 9220 }, { "epoch": 0.42, "grad_norm": 0.36717051230277065, "learning_rate": 1.2917882596195932e-05, "loss": 0.2651, "step": 9221 }, { "epoch": 0.42, "grad_norm": 0.38108542109729615, "learning_rate": 1.291645939454825e-05, "loss": 0.3117, "step": 9222 }, { "epoch": 0.42, "grad_norm": 1.3081138690257212, "learning_rate": 1.2915036128332972e-05, "loss": 0.6199, "step": 9223 }, { "epoch": 0.42, "grad_norm": 0.28556975276899965, "learning_rate": 1.29136127975816e-05, "loss": 0.1705, "step": 9224 }, { "epoch": 0.42, "grad_norm": 0.7478664881648842, "learning_rate": 1.2912189402325647e-05, "loss": 0.3451, "step": 9225 }, { "epoch": 0.42, "grad_norm": 0.40576814872766537, "learning_rate": 1.2910765942596632e-05, "loss": 0.3509, "step": 9226 }, { "epoch": 0.42, "grad_norm": 0.31359612060460024, "learning_rate": 1.2909342418426062e-05, "loss": 0.2196, "step": 9227 }, { "epoch": 0.42, "grad_norm": 0.9812326793123388, "learning_rate": 1.2907918829845456e-05, "loss": 0.6035, "step": 9228 }, { "epoch": 0.42, "grad_norm": 0.49906772845363845, "learning_rate": 1.290649517688633e-05, "loss": 0.3985, "step": 9229 }, { "epoch": 0.42, "grad_norm": 0.2602044477218655, "learning_rate": 1.2905071459580201e-05, "loss": 0.1951, "step": 9230 }, { "epoch": 0.42, "grad_norm": 0.7006466237762627, "learning_rate": 1.2903647677958588e-05, "loss": 0.4565, "step": 9231 }, { "epoch": 0.42, "grad_norm": 0.42734410659168814, "learning_rate": 1.2902223832053018e-05, "loss": 0.3224, "step": 9232 }, { "epoch": 0.42, "grad_norm": 0.36413259084467886, "learning_rate": 1.2900799921895004e-05, "loss": 0.1991, "step": 9233 }, { "epoch": 0.42, "grad_norm": 0.4079955549655896, "learning_rate": 1.2899375947516082e-05, "loss": 0.3517, "step": 9234 }, { "epoch": 0.42, "grad_norm": 0.9609102035568737, "learning_rate": 1.2897951908947768e-05, "loss": 0.5279, "step": 9235 }, { "epoch": 0.42, "grad_norm": 0.44847967024400237, "learning_rate": 1.2896527806221592e-05, "loss": 0.2955, "step": 9236 }, { "epoch": 0.42, "grad_norm": 0.341410058789977, "learning_rate": 1.2895103639369083e-05, "loss": 0.2861, "step": 9237 }, { "epoch": 0.42, "grad_norm": 0.33387151505557394, "learning_rate": 1.2893679408421766e-05, "loss": 0.2516, "step": 9238 }, { "epoch": 0.42, "grad_norm": 0.4938747131041917, "learning_rate": 1.2892255113411181e-05, "loss": 0.3239, "step": 9239 }, { "epoch": 0.42, "grad_norm": 0.35092957640965905, "learning_rate": 1.2890830754368855e-05, "loss": 0.1915, "step": 9240 }, { "epoch": 0.42, "grad_norm": 0.4552596237003159, "learning_rate": 1.288940633132632e-05, "loss": 0.3607, "step": 9241 }, { "epoch": 0.42, "grad_norm": 0.3384308263839799, "learning_rate": 1.2887981844315114e-05, "loss": 0.2652, "step": 9242 }, { "epoch": 0.42, "grad_norm": 0.7182167582623974, "learning_rate": 1.2886557293366773e-05, "loss": 0.3612, "step": 9243 }, { "epoch": 0.42, "grad_norm": 0.42771286647029166, "learning_rate": 1.2885132678512834e-05, "loss": 0.3032, "step": 9244 }, { "epoch": 0.42, "grad_norm": 0.35597363157577383, "learning_rate": 1.288370799978484e-05, "loss": 0.279, "step": 9245 }, { "epoch": 0.42, "grad_norm": 0.2781024329452538, "learning_rate": 1.2882283257214332e-05, "loss": 0.2105, "step": 9246 }, { "epoch": 0.42, "grad_norm": 0.6965117482475031, "learning_rate": 1.288085845083285e-05, "loss": 0.4606, "step": 9247 }, { "epoch": 0.42, "grad_norm": 0.31677890119118707, "learning_rate": 1.2879433580671937e-05, "loss": 0.287, "step": 9248 }, { "epoch": 0.42, "grad_norm": 0.7265120111951784, "learning_rate": 1.287800864676314e-05, "loss": 0.4457, "step": 9249 }, { "epoch": 0.42, "grad_norm": 0.3513048664155185, "learning_rate": 1.2876583649138005e-05, "loss": 0.2815, "step": 9250 }, { "epoch": 0.42, "grad_norm": 0.4569076711507936, "learning_rate": 1.2875158587828082e-05, "loss": 0.2611, "step": 9251 }, { "epoch": 0.43, "grad_norm": 0.2718861630962785, "learning_rate": 1.2873733462864919e-05, "loss": 0.2011, "step": 9252 }, { "epoch": 0.43, "grad_norm": 0.41604911877923323, "learning_rate": 1.2872308274280067e-05, "loss": 0.2941, "step": 9253 }, { "epoch": 0.43, "grad_norm": 0.4634007956067311, "learning_rate": 1.2870883022105079e-05, "loss": 0.3283, "step": 9254 }, { "epoch": 0.43, "grad_norm": 0.5334835499328268, "learning_rate": 1.2869457706371503e-05, "loss": 0.4032, "step": 9255 }, { "epoch": 0.43, "grad_norm": 1.0337936929917264, "learning_rate": 1.2868032327110904e-05, "loss": 0.5082, "step": 9256 }, { "epoch": 0.43, "grad_norm": 0.4497371917837551, "learning_rate": 1.2866606884354831e-05, "loss": 0.3157, "step": 9257 }, { "epoch": 0.43, "grad_norm": 0.2698418805435096, "learning_rate": 1.2865181378134845e-05, "loss": 0.2238, "step": 9258 }, { "epoch": 0.43, "grad_norm": 0.6420644624710392, "learning_rate": 1.2863755808482505e-05, "loss": 0.3353, "step": 9259 }, { "epoch": 0.43, "grad_norm": 0.4271061651692161, "learning_rate": 1.2862330175429374e-05, "loss": 0.298, "step": 9260 }, { "epoch": 0.43, "grad_norm": 0.4165571392190505, "learning_rate": 1.2860904479007008e-05, "loss": 0.3478, "step": 9261 }, { "epoch": 0.43, "grad_norm": 0.5816711263495682, "learning_rate": 1.2859478719246976e-05, "loss": 0.4165, "step": 9262 }, { "epoch": 0.43, "grad_norm": 0.3272428994892283, "learning_rate": 1.285805289618084e-05, "loss": 0.2162, "step": 9263 }, { "epoch": 0.43, "grad_norm": 0.3898187144407041, "learning_rate": 1.285662700984017e-05, "loss": 0.2356, "step": 9264 }, { "epoch": 0.43, "grad_norm": 0.43108173504372005, "learning_rate": 1.2855201060256528e-05, "loss": 0.3388, "step": 9265 }, { "epoch": 0.43, "grad_norm": 0.3311893827795691, "learning_rate": 1.285377504746149e-05, "loss": 0.231, "step": 9266 }, { "epoch": 0.43, "grad_norm": 1.2658118343812825, "learning_rate": 1.2852348971486618e-05, "loss": 0.5285, "step": 9267 }, { "epoch": 0.43, "grad_norm": 1.3165124603576754, "learning_rate": 1.2850922832363493e-05, "loss": 0.8713, "step": 9268 }, { "epoch": 0.43, "grad_norm": 0.4208343047056562, "learning_rate": 1.2849496630123683e-05, "loss": 0.2025, "step": 9269 }, { "epoch": 0.43, "grad_norm": 0.38286662246203435, "learning_rate": 1.2848070364798763e-05, "loss": 0.3088, "step": 9270 }, { "epoch": 0.43, "grad_norm": 0.34771023238961446, "learning_rate": 1.2846644036420313e-05, "loss": 0.2501, "step": 9271 }, { "epoch": 0.43, "grad_norm": 0.38705870108181156, "learning_rate": 1.2845217645019906e-05, "loss": 0.1998, "step": 9272 }, { "epoch": 0.43, "grad_norm": 0.4198386033394339, "learning_rate": 1.284379119062912e-05, "loss": 0.3412, "step": 9273 }, { "epoch": 0.43, "grad_norm": 0.5441535957920729, "learning_rate": 1.284236467327954e-05, "loss": 0.3874, "step": 9274 }, { "epoch": 0.43, "grad_norm": 0.704644004348439, "learning_rate": 1.2840938093002745e-05, "loss": 0.4272, "step": 9275 }, { "epoch": 0.43, "grad_norm": 0.3921787667686933, "learning_rate": 1.2839511449830323e-05, "loss": 0.2941, "step": 9276 }, { "epoch": 0.43, "grad_norm": 0.3507548039206577, "learning_rate": 1.283808474379385e-05, "loss": 0.2594, "step": 9277 }, { "epoch": 0.43, "grad_norm": 0.3166677398320318, "learning_rate": 1.2836657974924915e-05, "loss": 0.2091, "step": 9278 }, { "epoch": 0.43, "grad_norm": 0.41400884141288585, "learning_rate": 1.283523114325511e-05, "loss": 0.2528, "step": 9279 }, { "epoch": 0.43, "grad_norm": 1.3707460866928427, "learning_rate": 1.2833804248816018e-05, "loss": 0.8134, "step": 9280 }, { "epoch": 0.43, "grad_norm": 0.38484701445474057, "learning_rate": 1.283237729163923e-05, "loss": 0.2783, "step": 9281 }, { "epoch": 0.43, "grad_norm": 0.41817841082458845, "learning_rate": 1.2830950271756341e-05, "loss": 0.2583, "step": 9282 }, { "epoch": 0.43, "grad_norm": 0.4407431743626982, "learning_rate": 1.2829523189198942e-05, "loss": 0.2824, "step": 9283 }, { "epoch": 0.43, "grad_norm": 0.3176283830955911, "learning_rate": 1.2828096043998627e-05, "loss": 0.1914, "step": 9284 }, { "epoch": 0.43, "grad_norm": 0.363969820657224, "learning_rate": 1.2826668836186988e-05, "loss": 0.2561, "step": 9285 }, { "epoch": 0.43, "grad_norm": 0.5237257890566737, "learning_rate": 1.2825241565795628e-05, "loss": 0.4138, "step": 9286 }, { "epoch": 0.43, "grad_norm": 0.48437039925801284, "learning_rate": 1.2823814232856143e-05, "loss": 0.3118, "step": 9287 }, { "epoch": 0.43, "grad_norm": 0.47353915829006527, "learning_rate": 1.2822386837400132e-05, "loss": 0.3333, "step": 9288 }, { "epoch": 0.43, "grad_norm": 0.414526081073365, "learning_rate": 1.2820959379459194e-05, "loss": 0.2603, "step": 9289 }, { "epoch": 0.43, "grad_norm": 0.24897793223696746, "learning_rate": 1.281953185906494e-05, "loss": 0.1359, "step": 9290 }, { "epoch": 0.43, "grad_norm": 0.36089582952149185, "learning_rate": 1.2818104276248962e-05, "loss": 0.2842, "step": 9291 }, { "epoch": 0.43, "grad_norm": 0.8192955616657643, "learning_rate": 1.2816676631042874e-05, "loss": 0.4148, "step": 9292 }, { "epoch": 0.43, "grad_norm": 0.3524332559756306, "learning_rate": 1.281524892347828e-05, "loss": 0.2896, "step": 9293 }, { "epoch": 0.43, "grad_norm": 0.3625218614291083, "learning_rate": 1.2813821153586789e-05, "loss": 0.3285, "step": 9294 }, { "epoch": 0.43, "grad_norm": 0.9369178190642404, "learning_rate": 1.2812393321400008e-05, "loss": 0.3677, "step": 9295 }, { "epoch": 0.43, "grad_norm": 0.2696797954693466, "learning_rate": 1.2810965426949551e-05, "loss": 0.1501, "step": 9296 }, { "epoch": 0.43, "grad_norm": 0.2890507106504111, "learning_rate": 1.2809537470267029e-05, "loss": 0.2707, "step": 9297 }, { "epoch": 0.43, "grad_norm": 1.001910033546983, "learning_rate": 1.2808109451384054e-05, "loss": 0.3726, "step": 9298 }, { "epoch": 0.43, "grad_norm": 0.4942897489319573, "learning_rate": 1.2806681370332244e-05, "loss": 0.3285, "step": 9299 }, { "epoch": 0.43, "grad_norm": 0.41120020767750787, "learning_rate": 1.2805253227143214e-05, "loss": 0.3094, "step": 9300 }, { "epoch": 0.43, "grad_norm": 0.3610766477986101, "learning_rate": 1.2803825021848577e-05, "loss": 0.3106, "step": 9301 }, { "epoch": 0.43, "grad_norm": 0.1848146151695569, "learning_rate": 1.2802396754479958e-05, "loss": 0.0699, "step": 9302 }, { "epoch": 0.43, "grad_norm": 0.4052299901142605, "learning_rate": 1.2800968425068977e-05, "loss": 0.2971, "step": 9303 }, { "epoch": 0.43, "grad_norm": 1.1993166168178706, "learning_rate": 1.2799540033647255e-05, "loss": 0.4536, "step": 9304 }, { "epoch": 0.43, "grad_norm": 0.32194111221947364, "learning_rate": 1.2798111580246416e-05, "loss": 0.237, "step": 9305 }, { "epoch": 0.43, "grad_norm": 0.3961794664125094, "learning_rate": 1.2796683064898081e-05, "loss": 0.3093, "step": 9306 }, { "epoch": 0.43, "grad_norm": 1.5462192707150104, "learning_rate": 1.279525448763388e-05, "loss": 0.9516, "step": 9307 }, { "epoch": 0.43, "grad_norm": 0.18914600244791074, "learning_rate": 1.2793825848485435e-05, "loss": 0.0959, "step": 9308 }, { "epoch": 0.43, "grad_norm": 0.3096574501319118, "learning_rate": 1.2792397147484384e-05, "loss": 0.2649, "step": 9309 }, { "epoch": 0.43, "grad_norm": 1.2853785205518868, "learning_rate": 1.2790968384662348e-05, "loss": 0.4844, "step": 9310 }, { "epoch": 0.43, "grad_norm": 0.8248647768908512, "learning_rate": 1.2789539560050965e-05, "loss": 0.3154, "step": 9311 }, { "epoch": 0.43, "grad_norm": 0.3658096888201, "learning_rate": 1.2788110673681859e-05, "loss": 0.2902, "step": 9312 }, { "epoch": 0.43, "grad_norm": 0.4915760628579432, "learning_rate": 1.2786681725586677e-05, "loss": 0.3642, "step": 9313 }, { "epoch": 0.43, "grad_norm": 0.3716185062901843, "learning_rate": 1.2785252715797044e-05, "loss": 0.2299, "step": 9314 }, { "epoch": 0.43, "grad_norm": 0.36733762789403324, "learning_rate": 1.2783823644344598e-05, "loss": 0.2022, "step": 9315 }, { "epoch": 0.43, "grad_norm": 1.2950186586496824, "learning_rate": 1.2782394511260983e-05, "loss": 0.4518, "step": 9316 }, { "epoch": 0.43, "grad_norm": 0.4012955608603009, "learning_rate": 1.2780965316577833e-05, "loss": 0.3106, "step": 9317 }, { "epoch": 0.43, "grad_norm": 0.3445466186469441, "learning_rate": 1.2779536060326793e-05, "loss": 0.2222, "step": 9318 }, { "epoch": 0.43, "grad_norm": 0.8184373286714502, "learning_rate": 1.2778106742539502e-05, "loss": 0.5672, "step": 9319 }, { "epoch": 0.43, "grad_norm": 0.2989657090615629, "learning_rate": 1.2776677363247607e-05, "loss": 0.2237, "step": 9320 }, { "epoch": 0.43, "grad_norm": 0.30900918197882027, "learning_rate": 1.277524792248275e-05, "loss": 0.2249, "step": 9321 }, { "epoch": 0.43, "grad_norm": 0.544796978332322, "learning_rate": 1.277381842027658e-05, "loss": 0.332, "step": 9322 }, { "epoch": 0.43, "grad_norm": 0.8295120421820827, "learning_rate": 1.2772388856660744e-05, "loss": 0.5224, "step": 9323 }, { "epoch": 0.43, "grad_norm": 0.4316388336314948, "learning_rate": 1.277095923166689e-05, "loss": 0.3021, "step": 9324 }, { "epoch": 0.43, "grad_norm": 0.3799684844825383, "learning_rate": 1.2769529545326669e-05, "loss": 0.2436, "step": 9325 }, { "epoch": 0.43, "grad_norm": 0.4850281374257871, "learning_rate": 1.2768099797671734e-05, "loss": 0.2688, "step": 9326 }, { "epoch": 0.43, "grad_norm": 0.3431443769690651, "learning_rate": 1.2766669988733734e-05, "loss": 0.2871, "step": 9327 }, { "epoch": 0.43, "grad_norm": 0.3566606445597884, "learning_rate": 1.2765240118544328e-05, "loss": 0.2492, "step": 9328 }, { "epoch": 0.43, "grad_norm": 0.5097445652037658, "learning_rate": 1.2763810187135177e-05, "loss": 0.3977, "step": 9329 }, { "epoch": 0.43, "grad_norm": 0.3757814362775745, "learning_rate": 1.2762380194537927e-05, "loss": 0.276, "step": 9330 }, { "epoch": 0.43, "grad_norm": 0.8006749527216803, "learning_rate": 1.2760950140784244e-05, "loss": 0.2923, "step": 9331 }, { "epoch": 0.43, "grad_norm": 0.46078463533789227, "learning_rate": 1.2759520025905783e-05, "loss": 0.3487, "step": 9332 }, { "epoch": 0.43, "grad_norm": 0.31828217656758173, "learning_rate": 1.275808984993421e-05, "loss": 0.2799, "step": 9333 }, { "epoch": 0.43, "grad_norm": 0.44326460259013684, "learning_rate": 1.2756659612901188e-05, "loss": 0.2287, "step": 9334 }, { "epoch": 0.43, "grad_norm": 0.8391256229892663, "learning_rate": 1.2755229314838376e-05, "loss": 0.5699, "step": 9335 }, { "epoch": 0.43, "grad_norm": 0.3128622031074628, "learning_rate": 1.2753798955777442e-05, "loss": 0.2325, "step": 9336 }, { "epoch": 0.43, "grad_norm": 0.4011887162391267, "learning_rate": 1.2752368535750054e-05, "loss": 0.3336, "step": 9337 }, { "epoch": 0.43, "grad_norm": 0.5089257739353394, "learning_rate": 1.275093805478788e-05, "loss": 0.3208, "step": 9338 }, { "epoch": 0.43, "grad_norm": 0.37985286666620316, "learning_rate": 1.274950751292259e-05, "loss": 0.3044, "step": 9339 }, { "epoch": 0.43, "grad_norm": 0.47106033402320757, "learning_rate": 1.2748076910185854e-05, "loss": 0.336, "step": 9340 }, { "epoch": 0.43, "grad_norm": 0.33114496840275004, "learning_rate": 1.2746646246609341e-05, "loss": 0.2746, "step": 9341 }, { "epoch": 0.43, "grad_norm": 0.38078784490602813, "learning_rate": 1.274521552222473e-05, "loss": 0.2677, "step": 9342 }, { "epoch": 0.43, "grad_norm": 0.44074706887799464, "learning_rate": 1.274378473706369e-05, "loss": 0.3018, "step": 9343 }, { "epoch": 0.43, "grad_norm": 0.4188651720631131, "learning_rate": 1.2742353891157905e-05, "loss": 0.297, "step": 9344 }, { "epoch": 0.43, "grad_norm": 0.3087815084045747, "learning_rate": 1.2740922984539043e-05, "loss": 0.2589, "step": 9345 }, { "epoch": 0.43, "grad_norm": 0.9511424435288572, "learning_rate": 1.2739492017238793e-05, "loss": 0.5156, "step": 9346 }, { "epoch": 0.43, "grad_norm": 0.2804427348362116, "learning_rate": 1.2738060989288827e-05, "loss": 0.142, "step": 9347 }, { "epoch": 0.43, "grad_norm": 0.27217675731479063, "learning_rate": 1.2736629900720832e-05, "loss": 0.221, "step": 9348 }, { "epoch": 0.43, "grad_norm": 0.3688374112719879, "learning_rate": 1.2735198751566484e-05, "loss": 0.2931, "step": 9349 }, { "epoch": 0.43, "grad_norm": 0.645820516888122, "learning_rate": 1.2733767541857476e-05, "loss": 0.428, "step": 9350 }, { "epoch": 0.43, "grad_norm": 0.3012673018601823, "learning_rate": 1.2732336271625486e-05, "loss": 0.2065, "step": 9351 }, { "epoch": 0.43, "grad_norm": 1.0196639318946006, "learning_rate": 1.2730904940902209e-05, "loss": 0.6425, "step": 9352 }, { "epoch": 0.43, "grad_norm": 0.3787475509752184, "learning_rate": 1.2729473549719324e-05, "loss": 0.3297, "step": 9353 }, { "epoch": 0.43, "grad_norm": 0.36313741274140404, "learning_rate": 1.2728042098108529e-05, "loss": 0.2075, "step": 9354 }, { "epoch": 0.43, "grad_norm": 0.3425708528290884, "learning_rate": 1.2726610586101509e-05, "loss": 0.2159, "step": 9355 }, { "epoch": 0.43, "grad_norm": 0.38054785969483185, "learning_rate": 1.2725179013729961e-05, "loss": 0.3202, "step": 9356 }, { "epoch": 0.43, "grad_norm": 0.3693017945803192, "learning_rate": 1.2723747381025572e-05, "loss": 0.1998, "step": 9357 }, { "epoch": 0.43, "grad_norm": 1.1594884823754317, "learning_rate": 1.2722315688020046e-05, "loss": 0.6269, "step": 9358 }, { "epoch": 0.43, "grad_norm": 0.4299145292575415, "learning_rate": 1.2720883934745071e-05, "loss": 0.3057, "step": 9359 }, { "epoch": 0.43, "grad_norm": 0.3201252041069684, "learning_rate": 1.2719452121232349e-05, "loss": 0.1987, "step": 9360 }, { "epoch": 0.43, "grad_norm": 0.2828518438157333, "learning_rate": 1.271802024751358e-05, "loss": 0.2772, "step": 9361 }, { "epoch": 0.43, "grad_norm": 0.6496512794784586, "learning_rate": 1.2716588313620459e-05, "loss": 0.4307, "step": 9362 }, { "epoch": 0.43, "grad_norm": 0.45488374724617453, "learning_rate": 1.2715156319584692e-05, "loss": 0.2921, "step": 9363 }, { "epoch": 0.43, "grad_norm": 0.33503804431897677, "learning_rate": 1.2713724265437983e-05, "loss": 0.2795, "step": 9364 }, { "epoch": 0.43, "grad_norm": 1.3396300894241442, "learning_rate": 1.2712292151212034e-05, "loss": 0.7817, "step": 9365 }, { "epoch": 0.43, "grad_norm": 0.4034591287152584, "learning_rate": 1.2710859976938548e-05, "loss": 0.2992, "step": 9366 }, { "epoch": 0.43, "grad_norm": 0.5122151056418088, "learning_rate": 1.2709427742649238e-05, "loss": 0.2966, "step": 9367 }, { "epoch": 0.43, "grad_norm": 0.25821475837525226, "learning_rate": 1.2707995448375807e-05, "loss": 0.235, "step": 9368 }, { "epoch": 0.43, "grad_norm": 0.4034461318359468, "learning_rate": 1.2706563094149967e-05, "loss": 0.2725, "step": 9369 }, { "epoch": 0.43, "grad_norm": 1.004871897039284, "learning_rate": 1.270513068000343e-05, "loss": 0.3679, "step": 9370 }, { "epoch": 0.43, "grad_norm": 0.7881206852937602, "learning_rate": 1.2703698205967907e-05, "loss": 0.5385, "step": 9371 }, { "epoch": 0.43, "grad_norm": 0.35178470040104903, "learning_rate": 1.2702265672075108e-05, "loss": 0.2788, "step": 9372 }, { "epoch": 0.43, "grad_norm": 0.34987363199215055, "learning_rate": 1.2700833078356759e-05, "loss": 0.2815, "step": 9373 }, { "epoch": 0.43, "grad_norm": 0.27874537305099073, "learning_rate": 1.2699400424844563e-05, "loss": 0.1939, "step": 9374 }, { "epoch": 0.43, "grad_norm": 0.6675182692172947, "learning_rate": 1.2697967711570243e-05, "loss": 0.3747, "step": 9375 }, { "epoch": 0.43, "grad_norm": 0.33233795917513964, "learning_rate": 1.2696534938565524e-05, "loss": 0.2921, "step": 9376 }, { "epoch": 0.43, "grad_norm": 0.5763156229044814, "learning_rate": 1.2695102105862114e-05, "loss": 0.3211, "step": 9377 }, { "epoch": 0.43, "grad_norm": 0.4269783880724685, "learning_rate": 1.2693669213491741e-05, "loss": 0.2767, "step": 9378 }, { "epoch": 0.43, "grad_norm": 0.4655623224224928, "learning_rate": 1.269223626148613e-05, "loss": 0.3596, "step": 9379 }, { "epoch": 0.43, "grad_norm": 0.2605431209672585, "learning_rate": 1.2690803249877003e-05, "loss": 0.1899, "step": 9380 }, { "epoch": 0.43, "grad_norm": 0.3014886577535861, "learning_rate": 1.2689370178696088e-05, "loss": 0.2006, "step": 9381 }, { "epoch": 0.43, "grad_norm": 0.5630434249393794, "learning_rate": 1.2687937047975108e-05, "loss": 0.4216, "step": 9382 }, { "epoch": 0.43, "grad_norm": 0.7273132245068582, "learning_rate": 1.2686503857745787e-05, "loss": 0.3639, "step": 9383 }, { "epoch": 0.43, "grad_norm": 0.34119290798276586, "learning_rate": 1.2685070608039865e-05, "loss": 0.2836, "step": 9384 }, { "epoch": 0.43, "grad_norm": 0.3791565190202857, "learning_rate": 1.2683637298889067e-05, "loss": 0.3171, "step": 9385 }, { "epoch": 0.43, "grad_norm": 0.2618065671854353, "learning_rate": 1.2682203930325123e-05, "loss": 0.0977, "step": 9386 }, { "epoch": 0.43, "grad_norm": 0.4073330570746324, "learning_rate": 1.2680770502379773e-05, "loss": 0.2627, "step": 9387 }, { "epoch": 0.43, "grad_norm": 0.5760195819526027, "learning_rate": 1.2679337015084747e-05, "loss": 0.3774, "step": 9388 }, { "epoch": 0.43, "grad_norm": 0.3959896687954381, "learning_rate": 1.2677903468471781e-05, "loss": 0.3224, "step": 9389 }, { "epoch": 0.43, "grad_norm": 0.3604525759897088, "learning_rate": 1.2676469862572614e-05, "loss": 0.2447, "step": 9390 }, { "epoch": 0.43, "grad_norm": 1.1364314143266312, "learning_rate": 1.2675036197418984e-05, "loss": 0.5899, "step": 9391 }, { "epoch": 0.43, "grad_norm": 0.28841422983357917, "learning_rate": 1.2673602473042628e-05, "loss": 0.2655, "step": 9392 }, { "epoch": 0.43, "grad_norm": 0.22556161400212862, "learning_rate": 1.2672168689475293e-05, "loss": 0.0978, "step": 9393 }, { "epoch": 0.43, "grad_norm": 0.437416348480585, "learning_rate": 1.2670734846748717e-05, "loss": 0.3243, "step": 9394 }, { "epoch": 0.43, "grad_norm": 0.6683355974541376, "learning_rate": 1.2669300944894647e-05, "loss": 0.4078, "step": 9395 }, { "epoch": 0.43, "grad_norm": 0.3017049616391861, "learning_rate": 1.2667866983944825e-05, "loss": 0.2194, "step": 9396 }, { "epoch": 0.43, "grad_norm": 0.45566119415040446, "learning_rate": 1.2666432963931e-05, "loss": 0.3404, "step": 9397 }, { "epoch": 0.43, "grad_norm": 0.4276431915377918, "learning_rate": 1.266499888488492e-05, "loss": 0.203, "step": 9398 }, { "epoch": 0.43, "grad_norm": 0.2626220352638622, "learning_rate": 1.2663564746838335e-05, "loss": 0.1448, "step": 9399 }, { "epoch": 0.43, "grad_norm": 0.35847557019536114, "learning_rate": 1.266213054982299e-05, "loss": 0.3096, "step": 9400 }, { "epoch": 0.43, "grad_norm": 0.9299157735124534, "learning_rate": 1.2660696293870642e-05, "loss": 0.4168, "step": 9401 }, { "epoch": 0.43, "grad_norm": 0.5441320061557557, "learning_rate": 1.2659261979013043e-05, "loss": 0.3712, "step": 9402 }, { "epoch": 0.43, "grad_norm": 0.3747397797899474, "learning_rate": 1.2657827605281944e-05, "loss": 0.2215, "step": 9403 }, { "epoch": 0.43, "grad_norm": 0.4249229705858929, "learning_rate": 1.2656393172709107e-05, "loss": 0.3287, "step": 9404 }, { "epoch": 0.43, "grad_norm": 0.2344257536169026, "learning_rate": 1.2654958681326286e-05, "loss": 0.145, "step": 9405 }, { "epoch": 0.43, "grad_norm": 0.4224690329888101, "learning_rate": 1.2653524131165238e-05, "loss": 0.2686, "step": 9406 }, { "epoch": 0.43, "grad_norm": 1.1100567522037954, "learning_rate": 1.265208952225772e-05, "loss": 0.4848, "step": 9407 }, { "epoch": 0.43, "grad_norm": 0.36267938912720626, "learning_rate": 1.2650654854635498e-05, "loss": 0.3018, "step": 9408 }, { "epoch": 0.43, "grad_norm": 0.41927720920381195, "learning_rate": 1.264922012833033e-05, "loss": 0.2426, "step": 9409 }, { "epoch": 0.43, "grad_norm": 0.5038614684791575, "learning_rate": 1.2647785343373986e-05, "loss": 0.3012, "step": 9410 }, { "epoch": 0.43, "grad_norm": 0.4140824423670641, "learning_rate": 1.2646350499798226e-05, "loss": 0.231, "step": 9411 }, { "epoch": 0.43, "grad_norm": 0.3321038828553742, "learning_rate": 1.2644915597634815e-05, "loss": 0.2473, "step": 9412 }, { "epoch": 0.43, "grad_norm": 1.1892279437893527, "learning_rate": 1.2643480636915522e-05, "loss": 0.4599, "step": 9413 }, { "epoch": 0.43, "grad_norm": 0.9127410702927927, "learning_rate": 1.2642045617672114e-05, "loss": 0.5234, "step": 9414 }, { "epoch": 0.43, "grad_norm": 0.4284539268960658, "learning_rate": 1.2640610539936363e-05, "loss": 0.3135, "step": 9415 }, { "epoch": 0.43, "grad_norm": 0.40918361329265673, "learning_rate": 1.263917540374004e-05, "loss": 0.2482, "step": 9416 }, { "epoch": 0.43, "grad_norm": 0.4202514850170202, "learning_rate": 1.2637740209114918e-05, "loss": 0.259, "step": 9417 }, { "epoch": 0.43, "grad_norm": 0.4318377129770745, "learning_rate": 1.2636304956092773e-05, "loss": 0.3213, "step": 9418 }, { "epoch": 0.43, "grad_norm": 1.1940034487157207, "learning_rate": 1.2634869644705374e-05, "loss": 0.409, "step": 9419 }, { "epoch": 0.43, "grad_norm": 0.3978571900016964, "learning_rate": 1.26334342749845e-05, "loss": 0.3133, "step": 9420 }, { "epoch": 0.43, "grad_norm": 0.31418940102939463, "learning_rate": 1.263199884696193e-05, "loss": 0.2404, "step": 9421 }, { "epoch": 0.43, "grad_norm": 0.7516631652311326, "learning_rate": 1.2630563360669444e-05, "loss": 0.304, "step": 9422 }, { "epoch": 0.43, "grad_norm": 0.6254121282941459, "learning_rate": 1.2629127816138818e-05, "loss": 0.3666, "step": 9423 }, { "epoch": 0.43, "grad_norm": 0.41268360849468866, "learning_rate": 1.2627692213401836e-05, "loss": 0.2876, "step": 9424 }, { "epoch": 0.43, "grad_norm": 0.33417791235787325, "learning_rate": 1.2626256552490283e-05, "loss": 0.2339, "step": 9425 }, { "epoch": 0.43, "grad_norm": 0.39089160640967713, "learning_rate": 1.2624820833435939e-05, "loss": 0.246, "step": 9426 }, { "epoch": 0.43, "grad_norm": 0.4087341467962649, "learning_rate": 1.2623385056270592e-05, "loss": 0.3029, "step": 9427 }, { "epoch": 0.43, "grad_norm": 0.4454686000017699, "learning_rate": 1.2621949221026028e-05, "loss": 0.3529, "step": 9428 }, { "epoch": 0.43, "grad_norm": 0.717996982393554, "learning_rate": 1.2620513327734038e-05, "loss": 0.3451, "step": 9429 }, { "epoch": 0.43, "grad_norm": 0.361365982504197, "learning_rate": 1.2619077376426407e-05, "loss": 0.2819, "step": 9430 }, { "epoch": 0.43, "grad_norm": 0.54715594172966, "learning_rate": 1.2617641367134928e-05, "loss": 0.4256, "step": 9431 }, { "epoch": 0.43, "grad_norm": 0.2926911651619761, "learning_rate": 1.2616205299891388e-05, "loss": 0.1709, "step": 9432 }, { "epoch": 0.43, "grad_norm": 0.28034504306147434, "learning_rate": 1.2614769174727588e-05, "loss": 0.2356, "step": 9433 }, { "epoch": 0.43, "grad_norm": 0.8988749745559306, "learning_rate": 1.2613332991675318e-05, "loss": 0.4677, "step": 9434 }, { "epoch": 0.43, "grad_norm": 0.38634978397743036, "learning_rate": 1.2611896750766377e-05, "loss": 0.2878, "step": 9435 }, { "epoch": 0.43, "grad_norm": 0.3418576000897716, "learning_rate": 1.2610460452032556e-05, "loss": 0.2742, "step": 9436 }, { "epoch": 0.43, "grad_norm": 1.1357984964185623, "learning_rate": 1.2609024095505655e-05, "loss": 0.6399, "step": 9437 }, { "epoch": 0.43, "grad_norm": 0.2963124518074352, "learning_rate": 1.260758768121748e-05, "loss": 0.1909, "step": 9438 }, { "epoch": 0.43, "grad_norm": 0.29919215416021205, "learning_rate": 1.2606151209199822e-05, "loss": 0.2139, "step": 9439 }, { "epoch": 0.43, "grad_norm": 0.3691023082466476, "learning_rate": 1.260471467948449e-05, "loss": 0.3199, "step": 9440 }, { "epoch": 0.43, "grad_norm": 0.6727305416223096, "learning_rate": 1.2603278092103288e-05, "loss": 0.4493, "step": 9441 }, { "epoch": 0.43, "grad_norm": 0.32917621419629844, "learning_rate": 1.2601841447088017e-05, "loss": 0.2165, "step": 9442 }, { "epoch": 0.43, "grad_norm": 1.170248146272205, "learning_rate": 1.2600404744470481e-05, "loss": 0.7662, "step": 9443 }, { "epoch": 0.43, "grad_norm": 0.2744027059196429, "learning_rate": 1.2598967984282494e-05, "loss": 0.2388, "step": 9444 }, { "epoch": 0.43, "grad_norm": 0.25818215850327236, "learning_rate": 1.2597531166555857e-05, "loss": 0.1647, "step": 9445 }, { "epoch": 0.43, "grad_norm": 0.7190296411178465, "learning_rate": 1.2596094291322388e-05, "loss": 0.4211, "step": 9446 }, { "epoch": 0.43, "grad_norm": 0.3740443930494949, "learning_rate": 1.259465735861389e-05, "loss": 0.3133, "step": 9447 }, { "epoch": 0.43, "grad_norm": 0.3334026807035171, "learning_rate": 1.2593220368462178e-05, "loss": 0.2047, "step": 9448 }, { "epoch": 0.43, "grad_norm": 1.2744951776544007, "learning_rate": 1.259178332089907e-05, "loss": 0.7327, "step": 9449 }, { "epoch": 0.43, "grad_norm": 0.497689882630781, "learning_rate": 1.2590346215956372e-05, "loss": 0.291, "step": 9450 }, { "epoch": 0.43, "grad_norm": 0.30007265761149987, "learning_rate": 1.2588909053665912e-05, "loss": 0.2332, "step": 9451 }, { "epoch": 0.43, "grad_norm": 0.4947529682315777, "learning_rate": 1.2587471834059498e-05, "loss": 0.3003, "step": 9452 }, { "epoch": 0.43, "grad_norm": 0.8107848461877484, "learning_rate": 1.2586034557168951e-05, "loss": 0.5135, "step": 9453 }, { "epoch": 0.43, "grad_norm": 0.3751737978543826, "learning_rate": 1.2584597223026092e-05, "loss": 0.266, "step": 9454 }, { "epoch": 0.43, "grad_norm": 1.0233463136645495, "learning_rate": 1.258315983166274e-05, "loss": 0.566, "step": 9455 }, { "epoch": 0.43, "grad_norm": 0.3377751533254102, "learning_rate": 1.2581722383110719e-05, "loss": 0.2749, "step": 9456 }, { "epoch": 0.43, "grad_norm": 0.3419054223813326, "learning_rate": 1.2580284877401853e-05, "loss": 0.2623, "step": 9457 }, { "epoch": 0.43, "grad_norm": 0.6575813959770792, "learning_rate": 1.2578847314567968e-05, "loss": 0.2501, "step": 9458 }, { "epoch": 0.43, "grad_norm": 0.38777597362819477, "learning_rate": 1.257740969464089e-05, "loss": 0.2975, "step": 9459 }, { "epoch": 0.43, "grad_norm": 0.43275220632395844, "learning_rate": 1.2575972017652442e-05, "loss": 0.3012, "step": 9460 }, { "epoch": 0.43, "grad_norm": 0.49163558230656573, "learning_rate": 1.257453428363446e-05, "loss": 0.3028, "step": 9461 }, { "epoch": 0.43, "grad_norm": 0.6275284392053907, "learning_rate": 1.2573096492618766e-05, "loss": 0.3684, "step": 9462 }, { "epoch": 0.43, "grad_norm": 0.3776346850865423, "learning_rate": 1.25716586446372e-05, "loss": 0.2854, "step": 9463 }, { "epoch": 0.43, "grad_norm": 0.3588057533578193, "learning_rate": 1.2570220739721588e-05, "loss": 0.3425, "step": 9464 }, { "epoch": 0.43, "grad_norm": 0.1714204211179907, "learning_rate": 1.2568782777903768e-05, "loss": 0.0656, "step": 9465 }, { "epoch": 0.43, "grad_norm": 0.3560142739096228, "learning_rate": 1.2567344759215571e-05, "loss": 0.2507, "step": 9466 }, { "epoch": 0.43, "grad_norm": 0.5442199859267705, "learning_rate": 1.2565906683688836e-05, "loss": 0.4249, "step": 9467 }, { "epoch": 0.43, "grad_norm": 0.4741256619713081, "learning_rate": 1.25644685513554e-05, "loss": 0.2616, "step": 9468 }, { "epoch": 0.43, "grad_norm": 0.34331080104918166, "learning_rate": 1.2563030362247105e-05, "loss": 0.2826, "step": 9469 }, { "epoch": 0.44, "grad_norm": 0.3244728122383523, "learning_rate": 1.2561592116395785e-05, "loss": 0.227, "step": 9470 }, { "epoch": 0.44, "grad_norm": 0.34244100844683467, "learning_rate": 1.2560153813833283e-05, "loss": 0.2656, "step": 9471 }, { "epoch": 0.44, "grad_norm": 0.3455335480342148, "learning_rate": 1.2558715454591444e-05, "loss": 0.2815, "step": 9472 }, { "epoch": 0.44, "grad_norm": 1.433864542368922, "learning_rate": 1.2557277038702109e-05, "loss": 0.8198, "step": 9473 }, { "epoch": 0.44, "grad_norm": 0.6082788976385156, "learning_rate": 1.2555838566197129e-05, "loss": 0.3512, "step": 9474 }, { "epoch": 0.44, "grad_norm": 0.3207895346819442, "learning_rate": 1.2554400037108345e-05, "loss": 0.2582, "step": 9475 }, { "epoch": 0.44, "grad_norm": 0.5110578991809333, "learning_rate": 1.2552961451467609e-05, "loss": 0.3536, "step": 9476 }, { "epoch": 0.44, "grad_norm": 0.3064113085637141, "learning_rate": 1.2551522809306762e-05, "loss": 0.1849, "step": 9477 }, { "epoch": 0.44, "grad_norm": 0.3529613272022501, "learning_rate": 1.2550084110657663e-05, "loss": 0.189, "step": 9478 }, { "epoch": 0.44, "grad_norm": 0.41888908145484505, "learning_rate": 1.2548645355552156e-05, "loss": 0.3396, "step": 9479 }, { "epoch": 0.44, "grad_norm": 0.5902939809583563, "learning_rate": 1.2547206544022102e-05, "loss": 0.3784, "step": 9480 }, { "epoch": 0.44, "grad_norm": 0.3936840695842908, "learning_rate": 1.2545767676099345e-05, "loss": 0.2183, "step": 9481 }, { "epoch": 0.44, "grad_norm": 0.5497509177495692, "learning_rate": 1.2544328751815749e-05, "loss": 0.3452, "step": 9482 }, { "epoch": 0.44, "grad_norm": 0.2714434631891859, "learning_rate": 1.2542889771203166e-05, "loss": 0.2326, "step": 9483 }, { "epoch": 0.44, "grad_norm": 0.3131408589993388, "learning_rate": 1.2541450734293452e-05, "loss": 0.1892, "step": 9484 }, { "epoch": 0.44, "grad_norm": 0.9590983860750499, "learning_rate": 1.2540011641118472e-05, "loss": 0.5866, "step": 9485 }, { "epoch": 0.44, "grad_norm": 0.6811746208258505, "learning_rate": 1.2538572491710079e-05, "loss": 0.415, "step": 9486 }, { "epoch": 0.44, "grad_norm": 0.2845844611578793, "learning_rate": 1.2537133286100141e-05, "loss": 0.2226, "step": 9487 }, { "epoch": 0.44, "grad_norm": 0.619186101438695, "learning_rate": 1.2535694024320514e-05, "loss": 0.3727, "step": 9488 }, { "epoch": 0.44, "grad_norm": 0.3795551826258032, "learning_rate": 1.2534254706403068e-05, "loss": 0.2372, "step": 9489 }, { "epoch": 0.44, "grad_norm": 0.44608843674598136, "learning_rate": 1.2532815332379661e-05, "loss": 0.2593, "step": 9490 }, { "epoch": 0.44, "grad_norm": 0.4387537115416498, "learning_rate": 1.253137590228217e-05, "loss": 0.274, "step": 9491 }, { "epoch": 0.44, "grad_norm": 0.5750553798334395, "learning_rate": 1.2529936416142452e-05, "loss": 0.3689, "step": 9492 }, { "epoch": 0.44, "grad_norm": 0.3786398037234063, "learning_rate": 1.2528496873992384e-05, "loss": 0.2763, "step": 9493 }, { "epoch": 0.44, "grad_norm": 0.7242925874880445, "learning_rate": 1.2527057275863828e-05, "loss": 0.3485, "step": 9494 }, { "epoch": 0.44, "grad_norm": 0.2569446270666328, "learning_rate": 1.252561762178866e-05, "loss": 0.2235, "step": 9495 }, { "epoch": 0.44, "grad_norm": 0.48866000515209407, "learning_rate": 1.2524177911798753e-05, "loss": 0.3191, "step": 9496 }, { "epoch": 0.44, "grad_norm": 0.5841411511787737, "learning_rate": 1.252273814592598e-05, "loss": 0.3102, "step": 9497 }, { "epoch": 0.44, "grad_norm": 0.8300876382045795, "learning_rate": 1.2521298324202217e-05, "loss": 0.5125, "step": 9498 }, { "epoch": 0.44, "grad_norm": 0.3994239912705845, "learning_rate": 1.2519858446659339e-05, "loss": 0.299, "step": 9499 }, { "epoch": 0.44, "grad_norm": 0.3263345393824156, "learning_rate": 1.2518418513329223e-05, "loss": 0.2377, "step": 9500 }, { "epoch": 0.44, "grad_norm": 0.31140039536698183, "learning_rate": 1.2516978524243747e-05, "loss": 0.1856, "step": 9501 }, { "epoch": 0.44, "grad_norm": 0.6278373526893891, "learning_rate": 1.2515538479434795e-05, "loss": 0.3373, "step": 9502 }, { "epoch": 0.44, "grad_norm": 0.3079186405326252, "learning_rate": 1.251409837893424e-05, "loss": 0.2746, "step": 9503 }, { "epoch": 0.44, "grad_norm": 0.9697008656036147, "learning_rate": 1.2512658222773975e-05, "loss": 0.382, "step": 9504 }, { "epoch": 0.44, "grad_norm": 0.4282804534788201, "learning_rate": 1.2511218010985879e-05, "loss": 0.3342, "step": 9505 }, { "epoch": 0.44, "grad_norm": 0.52761505470986, "learning_rate": 1.2509777743601834e-05, "loss": 0.3429, "step": 9506 }, { "epoch": 0.44, "grad_norm": 0.2835547740002297, "learning_rate": 1.2508337420653729e-05, "loss": 0.2025, "step": 9507 }, { "epoch": 0.44, "grad_norm": 0.3496414928349116, "learning_rate": 1.2506897042173454e-05, "loss": 0.23, "step": 9508 }, { "epoch": 0.44, "grad_norm": 0.6993862738815619, "learning_rate": 1.2505456608192889e-05, "loss": 0.351, "step": 9509 }, { "epoch": 0.44, "grad_norm": 0.9001929853737437, "learning_rate": 1.2504016118743936e-05, "loss": 0.5513, "step": 9510 }, { "epoch": 0.44, "grad_norm": 0.2821800165107059, "learning_rate": 1.250257557385848e-05, "loss": 0.2676, "step": 9511 }, { "epoch": 0.44, "grad_norm": 0.5643722533773718, "learning_rate": 1.2501134973568407e-05, "loss": 0.3817, "step": 9512 }, { "epoch": 0.44, "grad_norm": 0.4061162839403097, "learning_rate": 1.249969431790562e-05, "loss": 0.19, "step": 9513 }, { "epoch": 0.44, "grad_norm": 0.44805134819715936, "learning_rate": 1.2498253606902007e-05, "loss": 0.306, "step": 9514 }, { "epoch": 0.44, "grad_norm": 0.35436118099807895, "learning_rate": 1.249681284058947e-05, "loss": 0.3191, "step": 9515 }, { "epoch": 0.44, "grad_norm": 0.7658228637031458, "learning_rate": 1.2495372018999904e-05, "loss": 0.5499, "step": 9516 }, { "epoch": 0.44, "grad_norm": 0.23863211714930668, "learning_rate": 1.2493931142165202e-05, "loss": 0.0986, "step": 9517 }, { "epoch": 0.44, "grad_norm": 0.450162260873162, "learning_rate": 1.2492490210117272e-05, "loss": 0.2862, "step": 9518 }, { "epoch": 0.44, "grad_norm": 0.3795813389571156, "learning_rate": 1.249104922288801e-05, "loss": 0.3352, "step": 9519 }, { "epoch": 0.44, "grad_norm": 0.549055750396541, "learning_rate": 1.2489608180509316e-05, "loss": 0.2412, "step": 9520 }, { "epoch": 0.44, "grad_norm": 0.335807905986383, "learning_rate": 1.2488167083013101e-05, "loss": 0.2777, "step": 9521 }, { "epoch": 0.44, "grad_norm": 1.2033157174734814, "learning_rate": 1.2486725930431263e-05, "loss": 0.7831, "step": 9522 }, { "epoch": 0.44, "grad_norm": 0.26491861254859733, "learning_rate": 1.2485284722795711e-05, "loss": 0.1878, "step": 9523 }, { "epoch": 0.44, "grad_norm": 0.47528350940347347, "learning_rate": 1.2483843460138348e-05, "loss": 0.3625, "step": 9524 }, { "epoch": 0.44, "grad_norm": 0.6774648563052934, "learning_rate": 1.2482402142491087e-05, "loss": 0.4422, "step": 9525 }, { "epoch": 0.44, "grad_norm": 0.3881143480506948, "learning_rate": 1.2480960769885835e-05, "loss": 0.2559, "step": 9526 }, { "epoch": 0.44, "grad_norm": 0.3980774464370524, "learning_rate": 1.24795193423545e-05, "loss": 0.3456, "step": 9527 }, { "epoch": 0.44, "grad_norm": 0.525139313913425, "learning_rate": 1.2478077859929e-05, "loss": 0.3824, "step": 9528 }, { "epoch": 0.44, "grad_norm": 0.23809505073485704, "learning_rate": 1.2476636322641245e-05, "loss": 0.1483, "step": 9529 }, { "epoch": 0.44, "grad_norm": 0.4016254269233144, "learning_rate": 1.2475194730523148e-05, "loss": 0.2266, "step": 9530 }, { "epoch": 0.44, "grad_norm": 0.4150109764932381, "learning_rate": 1.2473753083606621e-05, "loss": 0.3114, "step": 9531 }, { "epoch": 0.44, "grad_norm": 0.6666840336836596, "learning_rate": 1.247231138192359e-05, "loss": 0.3405, "step": 9532 }, { "epoch": 0.44, "grad_norm": 0.3622340205516867, "learning_rate": 1.2470869625505964e-05, "loss": 0.2465, "step": 9533 }, { "epoch": 0.44, "grad_norm": 0.3208603394078894, "learning_rate": 1.246942781438567e-05, "loss": 0.2606, "step": 9534 }, { "epoch": 0.44, "grad_norm": 0.28379015156960213, "learning_rate": 1.2467985948594622e-05, "loss": 0.2245, "step": 9535 }, { "epoch": 0.44, "grad_norm": 0.31982995306285394, "learning_rate": 1.2466544028164744e-05, "loss": 0.2178, "step": 9536 }, { "epoch": 0.44, "grad_norm": 0.6717124978923377, "learning_rate": 1.2465102053127957e-05, "loss": 0.426, "step": 9537 }, { "epoch": 0.44, "grad_norm": 0.862258560502964, "learning_rate": 1.246366002351619e-05, "loss": 0.5074, "step": 9538 }, { "epoch": 0.44, "grad_norm": 0.27785369165041746, "learning_rate": 1.246221793936136e-05, "loss": 0.2453, "step": 9539 }, { "epoch": 0.44, "grad_norm": 1.3446003332045038, "learning_rate": 1.2460775800695404e-05, "loss": 0.7774, "step": 9540 }, { "epoch": 0.44, "grad_norm": 0.2611312429197009, "learning_rate": 1.2459333607550241e-05, "loss": 0.1425, "step": 9541 }, { "epoch": 0.44, "grad_norm": 0.41386162811816574, "learning_rate": 1.2457891359957801e-05, "loss": 0.3185, "step": 9542 }, { "epoch": 0.44, "grad_norm": 0.3634473264801504, "learning_rate": 1.2456449057950015e-05, "loss": 0.2814, "step": 9543 }, { "epoch": 0.44, "grad_norm": 0.8451981203061738, "learning_rate": 1.2455006701558815e-05, "loss": 0.3914, "step": 9544 }, { "epoch": 0.44, "grad_norm": 0.37556247253808395, "learning_rate": 1.2453564290816132e-05, "loss": 0.2961, "step": 9545 }, { "epoch": 0.44, "grad_norm": 0.4222578598088155, "learning_rate": 1.2452121825753902e-05, "loss": 0.3013, "step": 9546 }, { "epoch": 0.44, "grad_norm": 0.2842831040712511, "learning_rate": 1.2450679306404059e-05, "loss": 0.2382, "step": 9547 }, { "epoch": 0.44, "grad_norm": 0.3344938558149986, "learning_rate": 1.2449236732798536e-05, "loss": 0.2449, "step": 9548 }, { "epoch": 0.44, "grad_norm": 0.5414165756598206, "learning_rate": 1.2447794104969271e-05, "loss": 0.3516, "step": 9549 }, { "epoch": 0.44, "grad_norm": 0.3832912520369787, "learning_rate": 1.2446351422948207e-05, "loss": 0.2918, "step": 9550 }, { "epoch": 0.44, "grad_norm": 0.3360145377394796, "learning_rate": 1.2444908686767278e-05, "loss": 0.2925, "step": 9551 }, { "epoch": 0.44, "grad_norm": 0.8557389540022056, "learning_rate": 1.2443465896458429e-05, "loss": 0.544, "step": 9552 }, { "epoch": 0.44, "grad_norm": 0.447660407622772, "learning_rate": 1.24420230520536e-05, "loss": 0.2699, "step": 9553 }, { "epoch": 0.44, "grad_norm": 0.35897826002603556, "learning_rate": 1.2440580153584732e-05, "loss": 0.2753, "step": 9554 }, { "epoch": 0.44, "grad_norm": 0.29429391263681925, "learning_rate": 1.2439137201083772e-05, "loss": 0.2608, "step": 9555 }, { "epoch": 0.44, "grad_norm": 0.4911344184125994, "learning_rate": 1.2437694194582668e-05, "loss": 0.1434, "step": 9556 }, { "epoch": 0.44, "grad_norm": 0.40721635466757367, "learning_rate": 1.243625113411336e-05, "loss": 0.3257, "step": 9557 }, { "epoch": 0.44, "grad_norm": 0.5311028757325035, "learning_rate": 1.2434808019707804e-05, "loss": 0.4216, "step": 9558 }, { "epoch": 0.44, "grad_norm": 0.3743964493118494, "learning_rate": 1.2433364851397944e-05, "loss": 0.263, "step": 9559 }, { "epoch": 0.44, "grad_norm": 0.376240469157283, "learning_rate": 1.243192162921573e-05, "loss": 0.3259, "step": 9560 }, { "epoch": 0.44, "grad_norm": 0.2862136385513368, "learning_rate": 1.2430478353193115e-05, "loss": 0.15, "step": 9561 }, { "epoch": 0.44, "grad_norm": 0.32892232958813616, "learning_rate": 1.2429035023362055e-05, "loss": 0.2555, "step": 9562 }, { "epoch": 0.44, "grad_norm": 0.3940948777682774, "learning_rate": 1.2427591639754496e-05, "loss": 0.2998, "step": 9563 }, { "epoch": 0.44, "grad_norm": 1.2919151231278225, "learning_rate": 1.2426148202402405e-05, "loss": 0.8189, "step": 9564 }, { "epoch": 0.44, "grad_norm": 0.6080089539265596, "learning_rate": 1.2424704711337723e-05, "loss": 0.4203, "step": 9565 }, { "epoch": 0.44, "grad_norm": 0.33166402354490326, "learning_rate": 1.242326116659242e-05, "loss": 0.2008, "step": 9566 }, { "epoch": 0.44, "grad_norm": 0.2381784833668846, "learning_rate": 1.2421817568198446e-05, "loss": 0.2185, "step": 9567 }, { "epoch": 0.44, "grad_norm": 0.8431144710530735, "learning_rate": 1.2420373916187771e-05, "loss": 0.4615, "step": 9568 }, { "epoch": 0.44, "grad_norm": 0.38840062715512647, "learning_rate": 1.2418930210592348e-05, "loss": 0.218, "step": 9569 }, { "epoch": 0.44, "grad_norm": 0.4073936431648435, "learning_rate": 1.2417486451444144e-05, "loss": 0.3462, "step": 9570 }, { "epoch": 0.44, "grad_norm": 0.7888506067044139, "learning_rate": 1.2416042638775116e-05, "loss": 0.4172, "step": 9571 }, { "epoch": 0.44, "grad_norm": 0.3186256468721699, "learning_rate": 1.2414598772617233e-05, "loss": 0.2345, "step": 9572 }, { "epoch": 0.44, "grad_norm": 0.2938581933553553, "learning_rate": 1.2413154853002462e-05, "loss": 0.1918, "step": 9573 }, { "epoch": 0.44, "grad_norm": 0.36637135558619166, "learning_rate": 1.2411710879962767e-05, "loss": 0.2848, "step": 9574 }, { "epoch": 0.44, "grad_norm": 0.3728737557494692, "learning_rate": 1.241026685353012e-05, "loss": 0.2203, "step": 9575 }, { "epoch": 0.44, "grad_norm": 0.8934077931088511, "learning_rate": 1.2408822773736487e-05, "loss": 0.4695, "step": 9576 }, { "epoch": 0.44, "grad_norm": 0.6666598305683288, "learning_rate": 1.240737864061384e-05, "loss": 0.4639, "step": 9577 }, { "epoch": 0.44, "grad_norm": 0.3579599228603201, "learning_rate": 1.2405934454194146e-05, "loss": 0.2823, "step": 9578 }, { "epoch": 0.44, "grad_norm": 0.37676047320879386, "learning_rate": 1.2404490214509385e-05, "loss": 0.2755, "step": 9579 }, { "epoch": 0.44, "grad_norm": 0.30817480657786145, "learning_rate": 1.2403045921591528e-05, "loss": 0.2043, "step": 9580 }, { "epoch": 0.44, "grad_norm": 0.3493268863369354, "learning_rate": 1.2401601575472552e-05, "loss": 0.2614, "step": 9581 }, { "epoch": 0.44, "grad_norm": 0.6789197777552087, "learning_rate": 1.2400157176184428e-05, "loss": 0.3293, "step": 9582 }, { "epoch": 0.44, "grad_norm": 0.5807957038525572, "learning_rate": 1.2398712723759141e-05, "loss": 0.3912, "step": 9583 }, { "epoch": 0.44, "grad_norm": 0.47637302397872444, "learning_rate": 1.2397268218228664e-05, "loss": 0.2932, "step": 9584 }, { "epoch": 0.44, "grad_norm": 0.2510626410695625, "learning_rate": 1.2395823659624982e-05, "loss": 0.1329, "step": 9585 }, { "epoch": 0.44, "grad_norm": 0.39358706924813713, "learning_rate": 1.239437904798007e-05, "loss": 0.3275, "step": 9586 }, { "epoch": 0.44, "grad_norm": 0.4331028826493053, "learning_rate": 1.2392934383325917e-05, "loss": 0.2577, "step": 9587 }, { "epoch": 0.44, "grad_norm": 0.603658254894565, "learning_rate": 1.2391489665694501e-05, "loss": 0.3197, "step": 9588 }, { "epoch": 0.44, "grad_norm": 1.294903917659896, "learning_rate": 1.2390044895117807e-05, "loss": 0.6753, "step": 9589 }, { "epoch": 0.44, "grad_norm": 0.3628923058748068, "learning_rate": 1.2388600071627825e-05, "loss": 0.2377, "step": 9590 }, { "epoch": 0.44, "grad_norm": 0.3904723514891969, "learning_rate": 1.2387155195256537e-05, "loss": 0.3198, "step": 9591 }, { "epoch": 0.44, "grad_norm": 0.2103867084893919, "learning_rate": 1.2385710266035937e-05, "loss": 0.0967, "step": 9592 }, { "epoch": 0.44, "grad_norm": 0.32897157379292796, "learning_rate": 1.238426528399801e-05, "loss": 0.28, "step": 9593 }, { "epoch": 0.44, "grad_norm": 0.447313185224507, "learning_rate": 1.2382820249174747e-05, "loss": 0.3432, "step": 9594 }, { "epoch": 0.44, "grad_norm": 0.4637941626461378, "learning_rate": 1.2381375161598141e-05, "loss": 0.283, "step": 9595 }, { "epoch": 0.44, "grad_norm": 0.37435580844485417, "learning_rate": 1.2379930021300184e-05, "loss": 0.2708, "step": 9596 }, { "epoch": 0.44, "grad_norm": 1.4307387727731835, "learning_rate": 1.2378484828312868e-05, "loss": 0.689, "step": 9597 }, { "epoch": 0.44, "grad_norm": 0.24653470870973798, "learning_rate": 1.2377039582668193e-05, "loss": 0.1887, "step": 9598 }, { "epoch": 0.44, "grad_norm": 0.33488964789200004, "learning_rate": 1.2375594284398154e-05, "loss": 0.2924, "step": 9599 }, { "epoch": 0.44, "grad_norm": 0.649248743732412, "learning_rate": 1.2374148933534744e-05, "loss": 0.4185, "step": 9600 }, { "epoch": 0.44, "grad_norm": 0.4971778169313545, "learning_rate": 1.2372703530109967e-05, "loss": 0.287, "step": 9601 }, { "epoch": 0.44, "grad_norm": 0.467947894336131, "learning_rate": 1.2371258074155818e-05, "loss": 0.296, "step": 9602 }, { "epoch": 0.44, "grad_norm": 0.37579042849010924, "learning_rate": 1.2369812565704302e-05, "loss": 0.3226, "step": 9603 }, { "epoch": 0.44, "grad_norm": 0.4307594614227347, "learning_rate": 1.236836700478742e-05, "loss": 0.3097, "step": 9604 }, { "epoch": 0.44, "grad_norm": 0.36834214981869845, "learning_rate": 1.2366921391437179e-05, "loss": 0.2087, "step": 9605 }, { "epoch": 0.44, "grad_norm": 0.3501598262689063, "learning_rate": 1.2365475725685574e-05, "loss": 0.2913, "step": 9606 }, { "epoch": 0.44, "grad_norm": 0.5125241091625219, "learning_rate": 1.2364030007564618e-05, "loss": 0.3171, "step": 9607 }, { "epoch": 0.44, "grad_norm": 0.3555258453868249, "learning_rate": 1.2362584237106315e-05, "loss": 0.1781, "step": 9608 }, { "epoch": 0.44, "grad_norm": 0.5361810015285421, "learning_rate": 1.2361138414342676e-05, "loss": 0.3812, "step": 9609 }, { "epoch": 0.44, "grad_norm": 0.4307069918427517, "learning_rate": 1.2359692539305707e-05, "loss": 0.3552, "step": 9610 }, { "epoch": 0.44, "grad_norm": 0.35790148179259906, "learning_rate": 1.2358246612027422e-05, "loss": 0.2498, "step": 9611 }, { "epoch": 0.44, "grad_norm": 0.9053816119011324, "learning_rate": 1.2356800632539827e-05, "loss": 0.5611, "step": 9612 }, { "epoch": 0.44, "grad_norm": 0.3088654507360139, "learning_rate": 1.235535460087494e-05, "loss": 0.2003, "step": 9613 }, { "epoch": 0.44, "grad_norm": 0.2871338908174627, "learning_rate": 1.2353908517064768e-05, "loss": 0.2236, "step": 9614 }, { "epoch": 0.44, "grad_norm": 1.0277085790305298, "learning_rate": 1.2352462381141335e-05, "loss": 0.5663, "step": 9615 }, { "epoch": 0.44, "grad_norm": 0.7203127520736397, "learning_rate": 1.235101619313665e-05, "loss": 0.412, "step": 9616 }, { "epoch": 0.44, "grad_norm": 0.3995772490589311, "learning_rate": 1.2349569953082734e-05, "loss": 0.2818, "step": 9617 }, { "epoch": 0.44, "grad_norm": 0.33827905472787684, "learning_rate": 1.2348123661011602e-05, "loss": 0.2514, "step": 9618 }, { "epoch": 0.44, "grad_norm": 0.31993889063108794, "learning_rate": 1.2346677316955275e-05, "loss": 0.1938, "step": 9619 }, { "epoch": 0.44, "grad_norm": 0.638384747400576, "learning_rate": 1.2345230920945774e-05, "loss": 0.3077, "step": 9620 }, { "epoch": 0.44, "grad_norm": 0.40529294768659263, "learning_rate": 1.234378447301512e-05, "loss": 0.2734, "step": 9621 }, { "epoch": 0.44, "grad_norm": 0.40724537667499666, "learning_rate": 1.234233797319534e-05, "loss": 0.3284, "step": 9622 }, { "epoch": 0.44, "grad_norm": 0.5813297011056503, "learning_rate": 1.2340891421518453e-05, "loss": 0.2169, "step": 9623 }, { "epoch": 0.44, "grad_norm": 0.363407278448993, "learning_rate": 1.2339444818016488e-05, "loss": 0.2774, "step": 9624 }, { "epoch": 0.44, "grad_norm": 0.2528992805256744, "learning_rate": 1.2337998162721466e-05, "loss": 0.2131, "step": 9625 }, { "epoch": 0.44, "grad_norm": 0.39581694018725494, "learning_rate": 1.233655145566542e-05, "loss": 0.281, "step": 9626 }, { "epoch": 0.44, "grad_norm": 0.40460639478854604, "learning_rate": 1.2335104696880376e-05, "loss": 0.2885, "step": 9627 }, { "epoch": 0.44, "grad_norm": 0.7825841157818485, "learning_rate": 1.2333657886398367e-05, "loss": 0.4903, "step": 9628 }, { "epoch": 0.44, "grad_norm": 0.6105665996378035, "learning_rate": 1.2332211024251418e-05, "loss": 0.3968, "step": 9629 }, { "epoch": 0.44, "grad_norm": 0.2927046306155492, "learning_rate": 1.2330764110471567e-05, "loss": 0.2887, "step": 9630 }, { "epoch": 0.44, "grad_norm": 0.2497398390452131, "learning_rate": 1.2329317145090844e-05, "loss": 0.1103, "step": 9631 }, { "epoch": 0.44, "grad_norm": 0.4189561970016693, "learning_rate": 1.2327870128141284e-05, "loss": 0.2483, "step": 9632 }, { "epoch": 0.44, "grad_norm": 0.6527335113034519, "learning_rate": 1.2326423059654927e-05, "loss": 0.3651, "step": 9633 }, { "epoch": 0.44, "grad_norm": 0.4082084727137672, "learning_rate": 1.2324975939663801e-05, "loss": 0.2906, "step": 9634 }, { "epoch": 0.44, "grad_norm": 0.42908281375043117, "learning_rate": 1.232352876819995e-05, "loss": 0.3115, "step": 9635 }, { "epoch": 0.44, "grad_norm": 0.5956308543988919, "learning_rate": 1.2322081545295412e-05, "loss": 0.3898, "step": 9636 }, { "epoch": 0.44, "grad_norm": 0.23004935655931547, "learning_rate": 1.2320634270982226e-05, "loss": 0.1604, "step": 9637 }, { "epoch": 0.44, "grad_norm": 0.4620577546254248, "learning_rate": 1.2319186945292434e-05, "loss": 0.3208, "step": 9638 }, { "epoch": 0.44, "grad_norm": 0.38688860714694545, "learning_rate": 1.2317739568258078e-05, "loss": 0.294, "step": 9639 }, { "epoch": 0.44, "grad_norm": 0.766648741152106, "learning_rate": 1.2316292139911204e-05, "loss": 0.3594, "step": 9640 }, { "epoch": 0.44, "grad_norm": 0.63975493027773, "learning_rate": 1.2314844660283853e-05, "loss": 0.361, "step": 9641 }, { "epoch": 0.44, "grad_norm": 0.33159347563531844, "learning_rate": 1.231339712940807e-05, "loss": 0.3052, "step": 9642 }, { "epoch": 0.44, "grad_norm": 0.4428056012950506, "learning_rate": 1.2311949547315906e-05, "loss": 0.2905, "step": 9643 }, { "epoch": 0.44, "grad_norm": 0.5629543441522619, "learning_rate": 1.2310501914039407e-05, "loss": 0.2437, "step": 9644 }, { "epoch": 0.44, "grad_norm": 0.3593639546762421, "learning_rate": 1.2309054229610625e-05, "loss": 0.2855, "step": 9645 }, { "epoch": 0.44, "grad_norm": 0.3592582670912627, "learning_rate": 1.2307606494061608e-05, "loss": 0.292, "step": 9646 }, { "epoch": 0.44, "grad_norm": 0.4170304508839579, "learning_rate": 1.2306158707424402e-05, "loss": 0.2035, "step": 9647 }, { "epoch": 0.44, "grad_norm": 0.43586187249573893, "learning_rate": 1.2304710869731072e-05, "loss": 0.346, "step": 9648 }, { "epoch": 0.44, "grad_norm": 0.437043393776642, "learning_rate": 1.2303262981013657e-05, "loss": 0.2845, "step": 9649 }, { "epoch": 0.44, "grad_norm": 0.36173202652707226, "learning_rate": 1.2301815041304226e-05, "loss": 0.2572, "step": 9650 }, { "epoch": 0.44, "grad_norm": 0.42077006517873183, "learning_rate": 1.2300367050634825e-05, "loss": 0.3072, "step": 9651 }, { "epoch": 0.44, "grad_norm": 0.5205610596943908, "learning_rate": 1.2298919009037518e-05, "loss": 0.3335, "step": 9652 }, { "epoch": 0.44, "grad_norm": 0.3147560017699276, "learning_rate": 1.2297470916544354e-05, "loss": 0.2309, "step": 9653 }, { "epoch": 0.44, "grad_norm": 0.37854837728116186, "learning_rate": 1.2296022773187404e-05, "loss": 0.2977, "step": 9654 }, { "epoch": 0.44, "grad_norm": 0.9365465464747741, "learning_rate": 1.2294574578998717e-05, "loss": 0.5633, "step": 9655 }, { "epoch": 0.44, "grad_norm": 0.5488220345811485, "learning_rate": 1.2293126334010365e-05, "loss": 0.3653, "step": 9656 }, { "epoch": 0.44, "grad_norm": 0.2627226276630762, "learning_rate": 1.2291678038254406e-05, "loss": 0.1614, "step": 9657 }, { "epoch": 0.44, "grad_norm": 0.32992006937915863, "learning_rate": 1.2290229691762903e-05, "loss": 0.2584, "step": 9658 }, { "epoch": 0.44, "grad_norm": 1.3531818343867195, "learning_rate": 1.228878129456792e-05, "loss": 0.8383, "step": 9659 }, { "epoch": 0.44, "grad_norm": 0.34147047026405486, "learning_rate": 1.2287332846701528e-05, "loss": 0.2165, "step": 9660 }, { "epoch": 0.44, "grad_norm": 0.47166849528965465, "learning_rate": 1.2285884348195792e-05, "loss": 0.3452, "step": 9661 }, { "epoch": 0.44, "grad_norm": 0.46533522669274496, "learning_rate": 1.2284435799082774e-05, "loss": 0.3235, "step": 9662 }, { "epoch": 0.44, "grad_norm": 0.3053715938859802, "learning_rate": 1.2282987199394556e-05, "loss": 0.1649, "step": 9663 }, { "epoch": 0.44, "grad_norm": 0.5211688582173295, "learning_rate": 1.22815385491632e-05, "loss": 0.2321, "step": 9664 }, { "epoch": 0.44, "grad_norm": 0.43790209331438057, "learning_rate": 1.2280089848420778e-05, "loss": 0.3001, "step": 9665 }, { "epoch": 0.44, "grad_norm": 0.34836144457036783, "learning_rate": 1.2278641097199362e-05, "loss": 0.241, "step": 9666 }, { "epoch": 0.44, "grad_norm": 0.9077883607530999, "learning_rate": 1.2277192295531033e-05, "loss": 0.4616, "step": 9667 }, { "epoch": 0.44, "grad_norm": 0.6257765567985331, "learning_rate": 1.2275743443447858e-05, "loss": 0.3713, "step": 9668 }, { "epoch": 0.44, "grad_norm": 0.369322750500985, "learning_rate": 1.2274294540981917e-05, "loss": 0.2782, "step": 9669 }, { "epoch": 0.44, "grad_norm": 0.22909056633402966, "learning_rate": 1.227284558816529e-05, "loss": 0.1752, "step": 9670 }, { "epoch": 0.44, "grad_norm": 0.9120830419115059, "learning_rate": 1.2271396585030049e-05, "loss": 0.5374, "step": 9671 }, { "epoch": 0.44, "grad_norm": 0.4000589686572815, "learning_rate": 1.2269947531608277e-05, "loss": 0.3016, "step": 9672 }, { "epoch": 0.44, "grad_norm": 0.48044750546188886, "learning_rate": 1.2268498427932055e-05, "loss": 0.3028, "step": 9673 }, { "epoch": 0.44, "grad_norm": 0.7377510286532568, "learning_rate": 1.2267049274033465e-05, "loss": 0.415, "step": 9674 }, { "epoch": 0.44, "grad_norm": 0.3928056313699874, "learning_rate": 1.226560006994459e-05, "loss": 0.2965, "step": 9675 }, { "epoch": 0.44, "grad_norm": 0.2211153595281202, "learning_rate": 1.2264150815697512e-05, "loss": 0.1213, "step": 9676 }, { "epoch": 0.44, "grad_norm": 0.3989834535149172, "learning_rate": 1.2262701511324315e-05, "loss": 0.3244, "step": 9677 }, { "epoch": 0.44, "grad_norm": 0.3654257044198351, "learning_rate": 1.2261252156857091e-05, "loss": 0.2725, "step": 9678 }, { "epoch": 0.44, "grad_norm": 0.7791841085327847, "learning_rate": 1.2259802752327921e-05, "loss": 0.4339, "step": 9679 }, { "epoch": 0.44, "grad_norm": 0.8153146536859531, "learning_rate": 1.2258353297768897e-05, "loss": 0.2951, "step": 9680 }, { "epoch": 0.44, "grad_norm": 0.3531856259134076, "learning_rate": 1.2256903793212107e-05, "loss": 0.2708, "step": 9681 }, { "epoch": 0.44, "grad_norm": 0.34553279678939053, "learning_rate": 1.2255454238689643e-05, "loss": 0.25, "step": 9682 }, { "epoch": 0.44, "grad_norm": 0.32491390701370587, "learning_rate": 1.2254004634233596e-05, "loss": 0.1675, "step": 9683 }, { "epoch": 0.44, "grad_norm": 0.4298912036748761, "learning_rate": 1.225255497987606e-05, "loss": 0.3139, "step": 9684 }, { "epoch": 0.44, "grad_norm": 0.545577302055996, "learning_rate": 1.2251105275649125e-05, "loss": 0.3838, "step": 9685 }, { "epoch": 0.44, "grad_norm": 0.435414894881538, "learning_rate": 1.224965552158489e-05, "loss": 0.2543, "step": 9686 }, { "epoch": 0.45, "grad_norm": 0.38494241083985226, "learning_rate": 1.224820571771545e-05, "loss": 0.2814, "step": 9687 }, { "epoch": 0.45, "grad_norm": 0.31640813574011656, "learning_rate": 1.2246755864072903e-05, "loss": 0.1806, "step": 9688 }, { "epoch": 0.45, "grad_norm": 0.31102503739315834, "learning_rate": 1.2245305960689346e-05, "loss": 0.2644, "step": 9689 }, { "epoch": 0.45, "grad_norm": 0.37351233648265275, "learning_rate": 1.2243856007596879e-05, "loss": 0.2678, "step": 9690 }, { "epoch": 0.45, "grad_norm": 0.5664346786615366, "learning_rate": 1.2242406004827605e-05, "loss": 0.4753, "step": 9691 }, { "epoch": 0.45, "grad_norm": 1.163119450270987, "learning_rate": 1.2240955952413618e-05, "loss": 0.6101, "step": 9692 }, { "epoch": 0.45, "grad_norm": 0.33797792996651504, "learning_rate": 1.2239505850387032e-05, "loss": 0.1908, "step": 9693 }, { "epoch": 0.45, "grad_norm": 0.2624590551502283, "learning_rate": 1.2238055698779943e-05, "loss": 0.2089, "step": 9694 }, { "epoch": 0.45, "grad_norm": 0.6643778601093144, "learning_rate": 1.2236605497624456e-05, "loss": 0.4655, "step": 9695 }, { "epoch": 0.45, "grad_norm": 0.2935776948767263, "learning_rate": 1.2235155246952679e-05, "loss": 0.2358, "step": 9696 }, { "epoch": 0.45, "grad_norm": 0.3863133014200058, "learning_rate": 1.223370494679672e-05, "loss": 0.3187, "step": 9697 }, { "epoch": 0.45, "grad_norm": 1.2816111024089614, "learning_rate": 1.223225459718869e-05, "loss": 0.6438, "step": 9698 }, { "epoch": 0.45, "grad_norm": 0.3301988037120032, "learning_rate": 1.223080419816069e-05, "loss": 0.2184, "step": 9699 }, { "epoch": 0.45, "grad_norm": 0.4745226708584871, "learning_rate": 1.2229353749744835e-05, "loss": 0.2643, "step": 9700 }, { "epoch": 0.45, "grad_norm": 0.3999480638430245, "learning_rate": 1.2227903251973239e-05, "loss": 0.3165, "step": 9701 }, { "epoch": 0.45, "grad_norm": 0.36929209564575416, "learning_rate": 1.2226452704878009e-05, "loss": 0.2425, "step": 9702 }, { "epoch": 0.45, "grad_norm": 1.3582211691518813, "learning_rate": 1.2225002108491264e-05, "loss": 0.811, "step": 9703 }, { "epoch": 0.45, "grad_norm": 0.48611364316709277, "learning_rate": 1.2223551462845118e-05, "loss": 0.3014, "step": 9704 }, { "epoch": 0.45, "grad_norm": 0.37539057064334597, "learning_rate": 1.2222100767971686e-05, "loss": 0.2842, "step": 9705 }, { "epoch": 0.45, "grad_norm": 0.33383080871257836, "learning_rate": 1.2220650023903085e-05, "loss": 0.2446, "step": 9706 }, { "epoch": 0.45, "grad_norm": 0.654155837491421, "learning_rate": 1.2219199230671428e-05, "loss": 0.3976, "step": 9707 }, { "epoch": 0.45, "grad_norm": 0.4316797456072796, "learning_rate": 1.2217748388308844e-05, "loss": 0.2912, "step": 9708 }, { "epoch": 0.45, "grad_norm": 0.3113384635270941, "learning_rate": 1.2216297496847445e-05, "loss": 0.2596, "step": 9709 }, { "epoch": 0.45, "grad_norm": 0.31844514497911575, "learning_rate": 1.2214846556319357e-05, "loss": 0.1936, "step": 9710 }, { "epoch": 0.45, "grad_norm": 0.43440287691212437, "learning_rate": 1.2213395566756701e-05, "loss": 0.2897, "step": 9711 }, { "epoch": 0.45, "grad_norm": 0.4958796569335049, "learning_rate": 1.2211944528191602e-05, "loss": 0.2987, "step": 9712 }, { "epoch": 0.45, "grad_norm": 0.4126437930230702, "learning_rate": 1.2210493440656179e-05, "loss": 0.3226, "step": 9713 }, { "epoch": 0.45, "grad_norm": 0.4094317950000837, "learning_rate": 1.2209042304182565e-05, "loss": 0.3074, "step": 9714 }, { "epoch": 0.45, "grad_norm": 0.2250550662466489, "learning_rate": 1.220759111880288e-05, "loss": 0.1588, "step": 9715 }, { "epoch": 0.45, "grad_norm": 1.244877091671976, "learning_rate": 1.220613988454926e-05, "loss": 0.7579, "step": 9716 }, { "epoch": 0.45, "grad_norm": 0.3036630664209244, "learning_rate": 1.2204688601453827e-05, "loss": 0.2693, "step": 9717 }, { "epoch": 0.45, "grad_norm": 0.525547726921368, "learning_rate": 1.2203237269548713e-05, "loss": 0.3557, "step": 9718 }, { "epoch": 0.45, "grad_norm": 0.6155026243694358, "learning_rate": 1.2201785888866049e-05, "loss": 0.3545, "step": 9719 }, { "epoch": 0.45, "grad_norm": 0.3570954748087456, "learning_rate": 1.2200334459437967e-05, "loss": 0.2876, "step": 9720 }, { "epoch": 0.45, "grad_norm": 0.5068079239030496, "learning_rate": 1.2198882981296604e-05, "loss": 0.3732, "step": 9721 }, { "epoch": 0.45, "grad_norm": 0.2678773984369712, "learning_rate": 1.219743145447409e-05, "loss": 0.1779, "step": 9722 }, { "epoch": 0.45, "grad_norm": 0.4195719551579831, "learning_rate": 1.2195979879002562e-05, "loss": 0.2813, "step": 9723 }, { "epoch": 0.45, "grad_norm": 0.4683178673355252, "learning_rate": 1.2194528254914154e-05, "loss": 0.3275, "step": 9724 }, { "epoch": 0.45, "grad_norm": 0.36522845933001624, "learning_rate": 1.2193076582241006e-05, "loss": 0.2769, "step": 9725 }, { "epoch": 0.45, "grad_norm": 0.45219249121897936, "learning_rate": 1.2191624861015255e-05, "loss": 0.2922, "step": 9726 }, { "epoch": 0.45, "grad_norm": 0.539546942913735, "learning_rate": 1.2190173091269042e-05, "loss": 0.3785, "step": 9727 }, { "epoch": 0.45, "grad_norm": 0.2338016503542038, "learning_rate": 1.2188721273034511e-05, "loss": 0.1728, "step": 9728 }, { "epoch": 0.45, "grad_norm": 0.4390010563633753, "learning_rate": 1.2187269406343798e-05, "loss": 0.3097, "step": 9729 }, { "epoch": 0.45, "grad_norm": 0.4913244430598165, "learning_rate": 1.2185817491229049e-05, "loss": 0.3738, "step": 9730 }, { "epoch": 0.45, "grad_norm": 1.0648646722406994, "learning_rate": 1.2184365527722406e-05, "loss": 0.6025, "step": 9731 }, { "epoch": 0.45, "grad_norm": 0.36604230013530015, "learning_rate": 1.2182913515856016e-05, "loss": 0.2161, "step": 9732 }, { "epoch": 0.45, "grad_norm": 0.3736570465282811, "learning_rate": 1.2181461455662026e-05, "loss": 0.3012, "step": 9733 }, { "epoch": 0.45, "grad_norm": 0.3401483893757466, "learning_rate": 1.2180009347172583e-05, "loss": 0.2062, "step": 9734 }, { "epoch": 0.45, "grad_norm": 0.3670483470021049, "learning_rate": 1.2178557190419833e-05, "loss": 0.2276, "step": 9735 }, { "epoch": 0.45, "grad_norm": 0.4891308733721713, "learning_rate": 1.2177104985435929e-05, "loss": 0.3528, "step": 9736 }, { "epoch": 0.45, "grad_norm": 0.37094413377213675, "learning_rate": 1.2175652732253012e-05, "loss": 0.3165, "step": 9737 }, { "epoch": 0.45, "grad_norm": 0.35858412828135794, "learning_rate": 1.2174200430903244e-05, "loss": 0.2138, "step": 9738 }, { "epoch": 0.45, "grad_norm": 1.3779449738366647, "learning_rate": 1.2172748081418775e-05, "loss": 0.799, "step": 9739 }, { "epoch": 0.45, "grad_norm": 0.33925978883796937, "learning_rate": 1.2171295683831761e-05, "loss": 0.299, "step": 9740 }, { "epoch": 0.45, "grad_norm": 0.27037012395734517, "learning_rate": 1.2169843238174346e-05, "loss": 0.222, "step": 9741 }, { "epoch": 0.45, "grad_norm": 0.37367918505594144, "learning_rate": 1.2168390744478697e-05, "loss": 0.2903, "step": 9742 }, { "epoch": 0.45, "grad_norm": 1.1503831981016803, "learning_rate": 1.2166938202776966e-05, "loss": 0.5653, "step": 9743 }, { "epoch": 0.45, "grad_norm": 0.8689418238862675, "learning_rate": 1.2165485613101314e-05, "loss": 0.4739, "step": 9744 }, { "epoch": 0.45, "grad_norm": 0.2835147128858741, "learning_rate": 1.2164032975483894e-05, "loss": 0.2395, "step": 9745 }, { "epoch": 0.45, "grad_norm": 0.6140801373803363, "learning_rate": 1.216258028995687e-05, "loss": 0.4019, "step": 9746 }, { "epoch": 0.45, "grad_norm": 0.7213564285035652, "learning_rate": 1.2161127556552405e-05, "loss": 0.3406, "step": 9747 }, { "epoch": 0.45, "grad_norm": 0.22960229144515007, "learning_rate": 1.2159674775302659e-05, "loss": 0.1588, "step": 9748 }, { "epoch": 0.45, "grad_norm": 0.41566153736692524, "learning_rate": 1.2158221946239791e-05, "loss": 0.2962, "step": 9749 }, { "epoch": 0.45, "grad_norm": 0.6512553975021785, "learning_rate": 1.2156769069395973e-05, "loss": 0.3645, "step": 9750 }, { "epoch": 0.45, "grad_norm": 0.4059432971407028, "learning_rate": 1.2155316144803366e-05, "loss": 0.2608, "step": 9751 }, { "epoch": 0.45, "grad_norm": 0.5295887338838029, "learning_rate": 1.2153863172494137e-05, "loss": 0.3682, "step": 9752 }, { "epoch": 0.45, "grad_norm": 0.4306915664495503, "learning_rate": 1.2152410152500454e-05, "loss": 0.3252, "step": 9753 }, { "epoch": 0.45, "grad_norm": 0.3088914041703925, "learning_rate": 1.2150957084854482e-05, "loss": 0.1925, "step": 9754 }, { "epoch": 0.45, "grad_norm": 0.45589358501535854, "learning_rate": 1.2149503969588397e-05, "loss": 0.3086, "step": 9755 }, { "epoch": 0.45, "grad_norm": 0.3760141854306172, "learning_rate": 1.214805080673436e-05, "loss": 0.2941, "step": 9756 }, { "epoch": 0.45, "grad_norm": 0.35035999799741574, "learning_rate": 1.2146597596324554e-05, "loss": 0.2815, "step": 9757 }, { "epoch": 0.45, "grad_norm": 0.6224921295160596, "learning_rate": 1.2145144338391143e-05, "loss": 0.3504, "step": 9758 }, { "epoch": 0.45, "grad_norm": 0.42800079302071287, "learning_rate": 1.2143691032966308e-05, "loss": 0.2637, "step": 9759 }, { "epoch": 0.45, "grad_norm": 0.25494414624199474, "learning_rate": 1.2142237680082215e-05, "loss": 0.1951, "step": 9760 }, { "epoch": 0.45, "grad_norm": 0.3359462720547074, "learning_rate": 1.2140784279771046e-05, "loss": 0.273, "step": 9761 }, { "epoch": 0.45, "grad_norm": 0.9511380781922887, "learning_rate": 1.2139330832064975e-05, "loss": 0.5621, "step": 9762 }, { "epoch": 0.45, "grad_norm": 0.3903353731323217, "learning_rate": 1.2137877336996185e-05, "loss": 0.3151, "step": 9763 }, { "epoch": 0.45, "grad_norm": 0.33316748092213944, "learning_rate": 1.2136423794596848e-05, "loss": 0.2795, "step": 9764 }, { "epoch": 0.45, "grad_norm": 0.7749806065546372, "learning_rate": 1.2134970204899148e-05, "loss": 0.3924, "step": 9765 }, { "epoch": 0.45, "grad_norm": 0.31004463128406, "learning_rate": 1.2133516567935266e-05, "loss": 0.2179, "step": 9766 }, { "epoch": 0.45, "grad_norm": 0.27513628064909423, "learning_rate": 1.2132062883737383e-05, "loss": 0.1274, "step": 9767 }, { "epoch": 0.45, "grad_norm": 0.4523267302554366, "learning_rate": 1.2130609152337683e-05, "loss": 0.3202, "step": 9768 }, { "epoch": 0.45, "grad_norm": 0.37371469218567344, "learning_rate": 1.2129155373768351e-05, "loss": 0.3105, "step": 9769 }, { "epoch": 0.45, "grad_norm": 0.6639688486753837, "learning_rate": 1.2127701548061571e-05, "loss": 0.4584, "step": 9770 }, { "epoch": 0.45, "grad_norm": 0.5043326619893206, "learning_rate": 1.2126247675249525e-05, "loss": 0.1665, "step": 9771 }, { "epoch": 0.45, "grad_norm": 0.29739973373773143, "learning_rate": 1.212479375536441e-05, "loss": 0.2397, "step": 9772 }, { "epoch": 0.45, "grad_norm": 0.3276105582455289, "learning_rate": 1.2123339788438404e-05, "loss": 0.2624, "step": 9773 }, { "epoch": 0.45, "grad_norm": 0.5537175826070679, "learning_rate": 1.2121885774503707e-05, "loss": 0.2522, "step": 9774 }, { "epoch": 0.45, "grad_norm": 0.4160929478233597, "learning_rate": 1.2120431713592501e-05, "loss": 0.3199, "step": 9775 }, { "epoch": 0.45, "grad_norm": 0.46208741405327036, "learning_rate": 1.2118977605736983e-05, "loss": 0.3689, "step": 9776 }, { "epoch": 0.45, "grad_norm": 0.3334562461233583, "learning_rate": 1.211752345096934e-05, "loss": 0.2055, "step": 9777 }, { "epoch": 0.45, "grad_norm": 0.3984728491694812, "learning_rate": 1.211606924932177e-05, "loss": 0.3037, "step": 9778 }, { "epoch": 0.45, "grad_norm": 0.3081280490235535, "learning_rate": 1.2114615000826466e-05, "loss": 0.2178, "step": 9779 }, { "epoch": 0.45, "grad_norm": 0.34121178701248567, "learning_rate": 1.2113160705515626e-05, "loss": 0.2503, "step": 9780 }, { "epoch": 0.45, "grad_norm": 0.3645815776055045, "learning_rate": 1.2111706363421442e-05, "loss": 0.287, "step": 9781 }, { "epoch": 0.45, "grad_norm": 0.7007580616821898, "learning_rate": 1.2110251974576117e-05, "loss": 0.4935, "step": 9782 }, { "epoch": 0.45, "grad_norm": 1.6116781110259237, "learning_rate": 1.2108797539011847e-05, "loss": 0.6702, "step": 9783 }, { "epoch": 0.45, "grad_norm": 0.286998580309271, "learning_rate": 1.2107343056760829e-05, "loss": 0.2225, "step": 9784 }, { "epoch": 0.45, "grad_norm": 0.3631676797283083, "learning_rate": 1.210588852785527e-05, "loss": 0.2818, "step": 9785 }, { "epoch": 0.45, "grad_norm": 0.6485406394530175, "learning_rate": 1.2104433952327366e-05, "loss": 0.4345, "step": 9786 }, { "epoch": 0.45, "grad_norm": 0.35574802495082974, "learning_rate": 1.2102979330209325e-05, "loss": 0.2388, "step": 9787 }, { "epoch": 0.45, "grad_norm": 0.3424847531270152, "learning_rate": 1.2101524661533347e-05, "loss": 0.2634, "step": 9788 }, { "epoch": 0.45, "grad_norm": 0.40872203304728333, "learning_rate": 1.210006994633164e-05, "loss": 0.3069, "step": 9789 }, { "epoch": 0.45, "grad_norm": 0.3378375924408665, "learning_rate": 1.2098615184636403e-05, "loss": 0.2169, "step": 9790 }, { "epoch": 0.45, "grad_norm": 0.4240667148873578, "learning_rate": 1.2097160376479855e-05, "loss": 0.3078, "step": 9791 }, { "epoch": 0.45, "grad_norm": 0.3416360335363821, "learning_rate": 1.2095705521894196e-05, "loss": 0.3078, "step": 9792 }, { "epoch": 0.45, "grad_norm": 0.34859788217475013, "learning_rate": 1.2094250620911636e-05, "loss": 0.2308, "step": 9793 }, { "epoch": 0.45, "grad_norm": 0.3999443815089186, "learning_rate": 1.2092795673564384e-05, "loss": 0.2647, "step": 9794 }, { "epoch": 0.45, "grad_norm": 0.7802817232056284, "learning_rate": 1.2091340679884656e-05, "loss": 0.4076, "step": 9795 }, { "epoch": 0.45, "grad_norm": 0.39252456727000395, "learning_rate": 1.208988563990466e-05, "loss": 0.2851, "step": 9796 }, { "epoch": 0.45, "grad_norm": 0.33136039596870226, "learning_rate": 1.208843055365661e-05, "loss": 0.2858, "step": 9797 }, { "epoch": 0.45, "grad_norm": 0.7234533770186289, "learning_rate": 1.208697542117272e-05, "loss": 0.5125, "step": 9798 }, { "epoch": 0.45, "grad_norm": 0.400788613151131, "learning_rate": 1.2085520242485205e-05, "loss": 0.2743, "step": 9799 }, { "epoch": 0.45, "grad_norm": 0.22915453909584654, "learning_rate": 1.2084065017626282e-05, "loss": 0.1778, "step": 9800 }, { "epoch": 0.45, "grad_norm": 0.9230868217101192, "learning_rate": 1.2082609746628169e-05, "loss": 0.5609, "step": 9801 }, { "epoch": 0.45, "grad_norm": 0.3455153426624651, "learning_rate": 1.2081154429523084e-05, "loss": 0.289, "step": 9802 }, { "epoch": 0.45, "grad_norm": 0.904840523925394, "learning_rate": 1.2079699066343242e-05, "loss": 0.3584, "step": 9803 }, { "epoch": 0.45, "grad_norm": 0.3680123567835089, "learning_rate": 1.2078243657120871e-05, "loss": 0.3227, "step": 9804 }, { "epoch": 0.45, "grad_norm": 0.3580933864450864, "learning_rate": 1.2076788201888187e-05, "loss": 0.2752, "step": 9805 }, { "epoch": 0.45, "grad_norm": 0.49760555510209187, "learning_rate": 1.2075332700677418e-05, "loss": 0.3219, "step": 9806 }, { "epoch": 0.45, "grad_norm": 0.32365328037705554, "learning_rate": 1.2073877153520776e-05, "loss": 0.1982, "step": 9807 }, { "epoch": 0.45, "grad_norm": 0.2988868872201726, "learning_rate": 1.2072421560450497e-05, "loss": 0.2504, "step": 9808 }, { "epoch": 0.45, "grad_norm": 1.4911225225553888, "learning_rate": 1.2070965921498801e-05, "loss": 0.457, "step": 9809 }, { "epoch": 0.45, "grad_norm": 0.713714900647917, "learning_rate": 1.2069510236697918e-05, "loss": 0.335, "step": 9810 }, { "epoch": 0.45, "grad_norm": 0.3976964988004283, "learning_rate": 1.2068054506080071e-05, "loss": 0.2771, "step": 9811 }, { "epoch": 0.45, "grad_norm": 0.3511786209754486, "learning_rate": 1.206659872967749e-05, "loss": 0.2752, "step": 9812 }, { "epoch": 0.45, "grad_norm": 0.2573427025749381, "learning_rate": 1.2065142907522405e-05, "loss": 0.1016, "step": 9813 }, { "epoch": 0.45, "grad_norm": 0.4215648233180496, "learning_rate": 1.2063687039647045e-05, "loss": 0.2876, "step": 9814 }, { "epoch": 0.45, "grad_norm": 2.169463531768768, "learning_rate": 1.2062231126083645e-05, "loss": 0.4939, "step": 9815 }, { "epoch": 0.45, "grad_norm": 0.3767853167882123, "learning_rate": 1.2060775166864435e-05, "loss": 0.2606, "step": 9816 }, { "epoch": 0.45, "grad_norm": 0.3675148869718269, "learning_rate": 1.205931916202165e-05, "loss": 0.2465, "step": 9817 }, { "epoch": 0.45, "grad_norm": 0.24739530758944192, "learning_rate": 1.2057863111587521e-05, "loss": 0.158, "step": 9818 }, { "epoch": 0.45, "grad_norm": 0.9310876030736751, "learning_rate": 1.205640701559429e-05, "loss": 0.5798, "step": 9819 }, { "epoch": 0.45, "grad_norm": 0.3587609416274913, "learning_rate": 1.2054950874074185e-05, "loss": 0.2299, "step": 9820 }, { "epoch": 0.45, "grad_norm": 1.0910233222151184, "learning_rate": 1.2053494687059453e-05, "loss": 0.4773, "step": 9821 }, { "epoch": 0.45, "grad_norm": 1.402513700023617, "learning_rate": 1.2052038454582325e-05, "loss": 0.8226, "step": 9822 }, { "epoch": 0.45, "grad_norm": 0.3847890491427266, "learning_rate": 1.2050582176675045e-05, "loss": 0.211, "step": 9823 }, { "epoch": 0.45, "grad_norm": 0.4451760185603885, "learning_rate": 1.204912585336985e-05, "loss": 0.3247, "step": 9824 }, { "epoch": 0.45, "grad_norm": 0.28257036452890893, "learning_rate": 1.2047669484698985e-05, "loss": 0.2049, "step": 9825 }, { "epoch": 0.45, "grad_norm": 0.3477096002316174, "learning_rate": 1.204621307069469e-05, "loss": 0.2494, "step": 9826 }, { "epoch": 0.45, "grad_norm": 1.0404416534333325, "learning_rate": 1.204475661138921e-05, "loss": 0.5631, "step": 9827 }, { "epoch": 0.45, "grad_norm": 0.4775966066776573, "learning_rate": 1.204330010681479e-05, "loss": 0.3728, "step": 9828 }, { "epoch": 0.45, "grad_norm": 0.40197001356967377, "learning_rate": 1.2041843557003674e-05, "loss": 0.1955, "step": 9829 }, { "epoch": 0.45, "grad_norm": 0.3493036876943971, "learning_rate": 1.2040386961988111e-05, "loss": 0.2356, "step": 9830 }, { "epoch": 0.45, "grad_norm": 0.4340541695765449, "learning_rate": 1.2038930321800346e-05, "loss": 0.2801, "step": 9831 }, { "epoch": 0.45, "grad_norm": 0.42660088999761014, "learning_rate": 1.203747363647263e-05, "loss": 0.2919, "step": 9832 }, { "epoch": 0.45, "grad_norm": 0.45173310947220213, "learning_rate": 1.2036016906037208e-05, "loss": 0.277, "step": 9833 }, { "epoch": 0.45, "grad_norm": 1.5439288365132544, "learning_rate": 1.2034560130526341e-05, "loss": 0.8163, "step": 9834 }, { "epoch": 0.45, "grad_norm": 0.44370932924822437, "learning_rate": 1.2033103309972268e-05, "loss": 0.2872, "step": 9835 }, { "epoch": 0.45, "grad_norm": 0.3395466984360209, "learning_rate": 1.203164644440725e-05, "loss": 0.2456, "step": 9836 }, { "epoch": 0.45, "grad_norm": 0.5640636664426414, "learning_rate": 1.2030189533863534e-05, "loss": 0.2837, "step": 9837 }, { "epoch": 0.45, "grad_norm": 0.4929367045310672, "learning_rate": 1.2028732578373381e-05, "loss": 0.3143, "step": 9838 }, { "epoch": 0.45, "grad_norm": 0.314763231174707, "learning_rate": 1.2027275577969046e-05, "loss": 0.1592, "step": 9839 }, { "epoch": 0.45, "grad_norm": 0.45987812327961775, "learning_rate": 1.2025818532682783e-05, "loss": 0.3678, "step": 9840 }, { "epoch": 0.45, "grad_norm": 0.3935448290689008, "learning_rate": 1.2024361442546849e-05, "loss": 0.2689, "step": 9841 }, { "epoch": 0.45, "grad_norm": 0.8941428513169435, "learning_rate": 1.2022904307593502e-05, "loss": 0.3277, "step": 9842 }, { "epoch": 0.45, "grad_norm": 0.4541836223235423, "learning_rate": 1.2021447127855005e-05, "loss": 0.3599, "step": 9843 }, { "epoch": 0.45, "grad_norm": 0.30117761435340923, "learning_rate": 1.2019989903363616e-05, "loss": 0.2526, "step": 9844 }, { "epoch": 0.45, "grad_norm": 0.31439134566434324, "learning_rate": 1.2018532634151598e-05, "loss": 0.1847, "step": 9845 }, { "epoch": 0.45, "grad_norm": 1.320955756950874, "learning_rate": 1.2017075320251215e-05, "loss": 0.5562, "step": 9846 }, { "epoch": 0.45, "grad_norm": 0.6641414379747985, "learning_rate": 1.2015617961694727e-05, "loss": 0.4212, "step": 9847 }, { "epoch": 0.45, "grad_norm": 0.3322556877763977, "learning_rate": 1.2014160558514398e-05, "loss": 0.2817, "step": 9848 }, { "epoch": 0.45, "grad_norm": 0.8939247092135392, "learning_rate": 1.2012703110742498e-05, "loss": 0.3594, "step": 9849 }, { "epoch": 0.45, "grad_norm": 0.3274390566188609, "learning_rate": 1.201124561841129e-05, "loss": 0.1882, "step": 9850 }, { "epoch": 0.45, "grad_norm": 0.3426358085861394, "learning_rate": 1.2009788081553042e-05, "loss": 0.2647, "step": 9851 }, { "epoch": 0.45, "grad_norm": 0.40052228770882586, "learning_rate": 1.2008330500200025e-05, "loss": 0.2815, "step": 9852 }, { "epoch": 0.45, "grad_norm": 0.44757964211627904, "learning_rate": 1.2006872874384505e-05, "loss": 0.3171, "step": 9853 }, { "epoch": 0.45, "grad_norm": 0.5155837001346982, "learning_rate": 1.2005415204138753e-05, "loss": 0.3767, "step": 9854 }, { "epoch": 0.45, "grad_norm": 0.49261191226632833, "learning_rate": 1.2003957489495043e-05, "loss": 0.2855, "step": 9855 }, { "epoch": 0.45, "grad_norm": 0.35595148383086916, "learning_rate": 1.2002499730485643e-05, "loss": 0.2745, "step": 9856 }, { "epoch": 0.45, "grad_norm": 0.2824074841956882, "learning_rate": 1.2001041927142833e-05, "loss": 0.1969, "step": 9857 }, { "epoch": 0.45, "grad_norm": 1.176496021438052, "learning_rate": 1.1999584079498883e-05, "loss": 0.7641, "step": 9858 }, { "epoch": 0.45, "grad_norm": 0.38426625399968406, "learning_rate": 1.1998126187586064e-05, "loss": 0.2196, "step": 9859 }, { "epoch": 0.45, "grad_norm": 0.31508060860118714, "learning_rate": 1.199666825143666e-05, "loss": 0.2836, "step": 9860 }, { "epoch": 0.45, "grad_norm": 0.6994740764647053, "learning_rate": 1.1995210271082944e-05, "loss": 0.4165, "step": 9861 }, { "epoch": 0.45, "grad_norm": 0.3213200341219081, "learning_rate": 1.1993752246557197e-05, "loss": 0.2073, "step": 9862 }, { "epoch": 0.45, "grad_norm": 0.31620948065138504, "learning_rate": 1.1992294177891697e-05, "loss": 0.1869, "step": 9863 }, { "epoch": 0.45, "grad_norm": 0.37732292842176324, "learning_rate": 1.1990836065118725e-05, "loss": 0.3314, "step": 9864 }, { "epoch": 0.45, "grad_norm": 0.4371786052422833, "learning_rate": 1.1989377908270559e-05, "loss": 0.2103, "step": 9865 }, { "epoch": 0.45, "grad_norm": 0.4273489372030156, "learning_rate": 1.1987919707379486e-05, "loss": 0.3445, "step": 9866 }, { "epoch": 0.45, "grad_norm": 0.3803194276353355, "learning_rate": 1.1986461462477783e-05, "loss": 0.3278, "step": 9867 }, { "epoch": 0.45, "grad_norm": 0.3992800754820834, "learning_rate": 1.198500317359774e-05, "loss": 0.1265, "step": 9868 }, { "epoch": 0.45, "grad_norm": 0.3167209282769644, "learning_rate": 1.1983544840771639e-05, "loss": 0.2525, "step": 9869 }, { "epoch": 0.45, "grad_norm": 0.4433473920545439, "learning_rate": 1.198208646403177e-05, "loss": 0.2906, "step": 9870 }, { "epoch": 0.45, "grad_norm": 0.4587377413914146, "learning_rate": 1.1980628043410417e-05, "loss": 0.3146, "step": 9871 }, { "epoch": 0.45, "grad_norm": 0.31101859211616145, "learning_rate": 1.1979169578939863e-05, "loss": 0.2589, "step": 9872 }, { "epoch": 0.45, "grad_norm": 1.1648152548915247, "learning_rate": 1.1977711070652405e-05, "loss": 0.5716, "step": 9873 }, { "epoch": 0.45, "grad_norm": 0.5591310212453935, "learning_rate": 1.197625251858033e-05, "loss": 0.3483, "step": 9874 }, { "epoch": 0.45, "grad_norm": 0.24482419041979983, "learning_rate": 1.1974793922755931e-05, "loss": 0.2116, "step": 9875 }, { "epoch": 0.45, "grad_norm": 1.3333560926228034, "learning_rate": 1.1973335283211494e-05, "loss": 0.8547, "step": 9876 }, { "epoch": 0.45, "grad_norm": 0.5544080391728592, "learning_rate": 1.197187659997932e-05, "loss": 0.3456, "step": 9877 }, { "epoch": 0.45, "grad_norm": 0.30995905476407815, "learning_rate": 1.1970417873091694e-05, "loss": 0.2078, "step": 9878 }, { "epoch": 0.45, "grad_norm": 0.38920138673578636, "learning_rate": 1.1968959102580917e-05, "loss": 0.3105, "step": 9879 }, { "epoch": 0.45, "grad_norm": 0.5877853632541026, "learning_rate": 1.1967500288479286e-05, "loss": 0.2957, "step": 9880 }, { "epoch": 0.45, "grad_norm": 0.299097380123809, "learning_rate": 1.1966041430819093e-05, "loss": 0.174, "step": 9881 }, { "epoch": 0.45, "grad_norm": 0.6740685784022121, "learning_rate": 1.1964582529632636e-05, "loss": 0.4685, "step": 9882 }, { "epoch": 0.45, "grad_norm": 0.3936863531525683, "learning_rate": 1.1963123584952216e-05, "loss": 0.3088, "step": 9883 }, { "epoch": 0.45, "grad_norm": 0.3230673172312626, "learning_rate": 1.1961664596810132e-05, "loss": 0.3076, "step": 9884 }, { "epoch": 0.45, "grad_norm": 0.3079285530118613, "learning_rate": 1.1960205565238685e-05, "loss": 0.1353, "step": 9885 }, { "epoch": 0.45, "grad_norm": 0.6791099530219162, "learning_rate": 1.1958746490270178e-05, "loss": 0.4013, "step": 9886 }, { "epoch": 0.45, "grad_norm": 0.27475832200133365, "learning_rate": 1.195728737193691e-05, "loss": 0.2418, "step": 9887 }, { "epoch": 0.45, "grad_norm": 0.32133589862570605, "learning_rate": 1.1955828210271187e-05, "loss": 0.2818, "step": 9888 }, { "epoch": 0.45, "grad_norm": 0.7307020481778115, "learning_rate": 1.1954369005305308e-05, "loss": 0.4087, "step": 9889 }, { "epoch": 0.45, "grad_norm": 0.27730206076175884, "learning_rate": 1.1952909757071587e-05, "loss": 0.2384, "step": 9890 }, { "epoch": 0.45, "grad_norm": 0.3371990170578978, "learning_rate": 1.1951450465602326e-05, "loss": 0.259, "step": 9891 }, { "epoch": 0.45, "grad_norm": 0.8661464633744825, "learning_rate": 1.1949991130929833e-05, "loss": 0.3902, "step": 9892 }, { "epoch": 0.45, "grad_norm": 0.39570760398043525, "learning_rate": 1.1948531753086415e-05, "loss": 0.3243, "step": 9893 }, { "epoch": 0.45, "grad_norm": 0.7596920450160195, "learning_rate": 1.1947072332104381e-05, "loss": 0.3484, "step": 9894 }, { "epoch": 0.45, "grad_norm": 0.321648116380903, "learning_rate": 1.1945612868016041e-05, "loss": 0.2655, "step": 9895 }, { "epoch": 0.45, "grad_norm": 0.4545342733738934, "learning_rate": 1.194415336085371e-05, "loss": 0.3285, "step": 9896 }, { "epoch": 0.45, "grad_norm": 0.29334201039261004, "learning_rate": 1.1942693810649695e-05, "loss": 0.1654, "step": 9897 }, { "epoch": 0.45, "grad_norm": 0.3847209842026497, "learning_rate": 1.1941234217436315e-05, "loss": 0.1227, "step": 9898 }, { "epoch": 0.45, "grad_norm": 0.4135355616341419, "learning_rate": 1.1939774581245878e-05, "loss": 0.3212, "step": 9899 }, { "epoch": 0.45, "grad_norm": 0.4250930230310954, "learning_rate": 1.1938314902110701e-05, "loss": 0.3181, "step": 9900 }, { "epoch": 0.45, "grad_norm": 0.5455250109658105, "learning_rate": 1.1936855180063102e-05, "loss": 0.3129, "step": 9901 }, { "epoch": 0.45, "grad_norm": 0.4059397815569489, "learning_rate": 1.1935395415135393e-05, "loss": 0.3016, "step": 9902 }, { "epoch": 0.45, "grad_norm": 0.2617455222505545, "learning_rate": 1.19339356073599e-05, "loss": 0.216, "step": 9903 }, { "epoch": 0.45, "grad_norm": 0.5803570776410119, "learning_rate": 1.1932475756768933e-05, "loss": 0.1574, "step": 9904 }, { "epoch": 0.46, "grad_norm": 0.3936338782340999, "learning_rate": 1.1931015863394818e-05, "loss": 0.2918, "step": 9905 }, { "epoch": 0.46, "grad_norm": 0.6880842529572427, "learning_rate": 1.1929555927269866e-05, "loss": 0.4387, "step": 9906 }, { "epoch": 0.46, "grad_norm": 0.3303837993545562, "learning_rate": 1.1928095948426413e-05, "loss": 0.2291, "step": 9907 }, { "epoch": 0.46, "grad_norm": 0.3670635646926979, "learning_rate": 1.192663592689677e-05, "loss": 0.295, "step": 9908 }, { "epoch": 0.46, "grad_norm": 0.3077128779601124, "learning_rate": 1.1925175862713266e-05, "loss": 0.1725, "step": 9909 }, { "epoch": 0.46, "grad_norm": 0.6334251586941848, "learning_rate": 1.1923715755908223e-05, "loss": 0.4326, "step": 9910 }, { "epoch": 0.46, "grad_norm": 0.28847088873818116, "learning_rate": 1.192225560651397e-05, "loss": 0.2265, "step": 9911 }, { "epoch": 0.46, "grad_norm": 0.7631666592526921, "learning_rate": 1.1920795414562826e-05, "loss": 0.5227, "step": 9912 }, { "epoch": 0.46, "grad_norm": 1.1749461419021119, "learning_rate": 1.1919335180087126e-05, "loss": 0.847, "step": 9913 }, { "epoch": 0.46, "grad_norm": 0.311823246546308, "learning_rate": 1.1917874903119194e-05, "loss": 0.1948, "step": 9914 }, { "epoch": 0.46, "grad_norm": 0.2472200517274404, "learning_rate": 1.1916414583691361e-05, "loss": 0.2141, "step": 9915 }, { "epoch": 0.46, "grad_norm": 0.7626581093143945, "learning_rate": 1.1914954221835955e-05, "loss": 0.3537, "step": 9916 }, { "epoch": 0.46, "grad_norm": 0.3481459782528476, "learning_rate": 1.191349381758531e-05, "loss": 0.2311, "step": 9917 }, { "epoch": 0.46, "grad_norm": 0.7331413037321006, "learning_rate": 1.1912033370971756e-05, "loss": 0.5156, "step": 9918 }, { "epoch": 0.46, "grad_norm": 0.35566066218461456, "learning_rate": 1.1910572882027623e-05, "loss": 0.3039, "step": 9919 }, { "epoch": 0.46, "grad_norm": 0.3795137161497298, "learning_rate": 1.190911235078525e-05, "loss": 0.296, "step": 9920 }, { "epoch": 0.46, "grad_norm": 0.2535394418761758, "learning_rate": 1.1907651777276967e-05, "loss": 0.1216, "step": 9921 }, { "epoch": 0.46, "grad_norm": 0.7488300069094642, "learning_rate": 1.1906191161535119e-05, "loss": 0.4561, "step": 9922 }, { "epoch": 0.46, "grad_norm": 0.2746399923359979, "learning_rate": 1.190473050359203e-05, "loss": 0.2527, "step": 9923 }, { "epoch": 0.46, "grad_norm": 0.46329624575458994, "learning_rate": 1.1903269803480045e-05, "loss": 0.3201, "step": 9924 }, { "epoch": 0.46, "grad_norm": 1.4276736312772949, "learning_rate": 1.19018090612315e-05, "loss": 0.7823, "step": 9925 }, { "epoch": 0.46, "grad_norm": 0.32784192284998426, "learning_rate": 1.1900348276878738e-05, "loss": 0.2365, "step": 9926 }, { "epoch": 0.46, "grad_norm": 0.26209183107872597, "learning_rate": 1.1898887450454093e-05, "loss": 0.2043, "step": 9927 }, { "epoch": 0.46, "grad_norm": 0.5636331979362261, "learning_rate": 1.1897426581989913e-05, "loss": 0.3169, "step": 9928 }, { "epoch": 0.46, "grad_norm": 0.3531844670292111, "learning_rate": 1.1895965671518534e-05, "loss": 0.2807, "step": 9929 }, { "epoch": 0.46, "grad_norm": 0.9268695413847821, "learning_rate": 1.1894504719072307e-05, "loss": 0.408, "step": 9930 }, { "epoch": 0.46, "grad_norm": 0.37456140485854067, "learning_rate": 1.1893043724683568e-05, "loss": 0.3252, "step": 9931 }, { "epoch": 0.46, "grad_norm": 0.3337654133468087, "learning_rate": 1.1891582688384666e-05, "loss": 0.2732, "step": 9932 }, { "epoch": 0.46, "grad_norm": 1.07336037739983, "learning_rate": 1.1890121610207947e-05, "loss": 0.5646, "step": 9933 }, { "epoch": 0.46, "grad_norm": 0.25964141165240134, "learning_rate": 1.188866049018576e-05, "loss": 0.2528, "step": 9934 }, { "epoch": 0.46, "grad_norm": 0.4386058924901409, "learning_rate": 1.1887199328350448e-05, "loss": 0.2648, "step": 9935 }, { "epoch": 0.46, "grad_norm": 0.398351022470657, "learning_rate": 1.1885738124734359e-05, "loss": 0.2892, "step": 9936 }, { "epoch": 0.46, "grad_norm": 0.4536883075573066, "learning_rate": 1.1884276879369846e-05, "loss": 0.2626, "step": 9937 }, { "epoch": 0.46, "grad_norm": 0.38837597515236055, "learning_rate": 1.188281559228926e-05, "loss": 0.2737, "step": 9938 }, { "epoch": 0.46, "grad_norm": 0.39877837219033074, "learning_rate": 1.1881354263524954e-05, "loss": 0.3366, "step": 9939 }, { "epoch": 0.46, "grad_norm": 0.49402524934098146, "learning_rate": 1.1879892893109276e-05, "loss": 0.2327, "step": 9940 }, { "epoch": 0.46, "grad_norm": 0.29265428000924, "learning_rate": 1.1878431481074581e-05, "loss": 0.2187, "step": 9941 }, { "epoch": 0.46, "grad_norm": 1.0914368040113336, "learning_rate": 1.1876970027453223e-05, "loss": 0.6868, "step": 9942 }, { "epoch": 0.46, "grad_norm": 0.3902454261858537, "learning_rate": 1.1875508532277558e-05, "loss": 0.2819, "step": 9943 }, { "epoch": 0.46, "grad_norm": 0.35081192657451765, "learning_rate": 1.187404699557994e-05, "loss": 0.2649, "step": 9944 }, { "epoch": 0.46, "grad_norm": 0.6471659102116764, "learning_rate": 1.187258541739273e-05, "loss": 0.4158, "step": 9945 }, { "epoch": 0.46, "grad_norm": 0.4613188426994112, "learning_rate": 1.1871123797748285e-05, "loss": 0.3479, "step": 9946 }, { "epoch": 0.46, "grad_norm": 0.22975847548854253, "learning_rate": 1.1869662136678961e-05, "loss": 0.184, "step": 9947 }, { "epoch": 0.46, "grad_norm": 0.3790048834431771, "learning_rate": 1.1868200434217118e-05, "loss": 0.2082, "step": 9948 }, { "epoch": 0.46, "grad_norm": 1.3551484766539017, "learning_rate": 1.1866738690395119e-05, "loss": 0.8304, "step": 9949 }, { "epoch": 0.46, "grad_norm": 0.32535832966142647, "learning_rate": 1.1865276905245325e-05, "loss": 0.2242, "step": 9950 }, { "epoch": 0.46, "grad_norm": 0.3770566395613993, "learning_rate": 1.1863815078800098e-05, "loss": 0.3251, "step": 9951 }, { "epoch": 0.46, "grad_norm": 0.4496230494640166, "learning_rate": 1.1862353211091801e-05, "loss": 0.3407, "step": 9952 }, { "epoch": 0.46, "grad_norm": 0.23653133617366773, "learning_rate": 1.1860891302152799e-05, "loss": 0.1268, "step": 9953 }, { "epoch": 0.46, "grad_norm": 0.596460487512924, "learning_rate": 1.185942935201546e-05, "loss": 0.4363, "step": 9954 }, { "epoch": 0.46, "grad_norm": 0.3862863752900431, "learning_rate": 1.1857967360712142e-05, "loss": 0.3503, "step": 9955 }, { "epoch": 0.46, "grad_norm": 0.33660560748983254, "learning_rate": 1.1856505328275221e-05, "loss": 0.2113, "step": 9956 }, { "epoch": 0.46, "grad_norm": 0.4269648453307695, "learning_rate": 1.185504325473706e-05, "loss": 0.3691, "step": 9957 }, { "epoch": 0.46, "grad_norm": 0.45667565812390193, "learning_rate": 1.185358114013003e-05, "loss": 0.3255, "step": 9958 }, { "epoch": 0.46, "grad_norm": 0.351376706289423, "learning_rate": 1.1852118984486498e-05, "loss": 0.2915, "step": 9959 }, { "epoch": 0.46, "grad_norm": 0.21055874523444296, "learning_rate": 1.1850656787838839e-05, "loss": 0.1509, "step": 9960 }, { "epoch": 0.46, "grad_norm": 0.720376522029616, "learning_rate": 1.1849194550219421e-05, "loss": 0.5095, "step": 9961 }, { "epoch": 0.46, "grad_norm": 0.38867734529433834, "learning_rate": 1.184773227166062e-05, "loss": 0.3024, "step": 9962 }, { "epoch": 0.46, "grad_norm": 0.335671074685415, "learning_rate": 1.1846269952194804e-05, "loss": 0.2538, "step": 9963 }, { "epoch": 0.46, "grad_norm": 1.0600234296015851, "learning_rate": 1.1844807591854354e-05, "loss": 0.5572, "step": 9964 }, { "epoch": 0.46, "grad_norm": 0.30521236993003353, "learning_rate": 1.1843345190671642e-05, "loss": 0.2294, "step": 9965 }, { "epoch": 0.46, "grad_norm": 0.5079665718404314, "learning_rate": 1.184188274867904e-05, "loss": 0.2849, "step": 9966 }, { "epoch": 0.46, "grad_norm": 0.4317216784124042, "learning_rate": 1.1840420265908934e-05, "loss": 0.3274, "step": 9967 }, { "epoch": 0.46, "grad_norm": 0.343065309880247, "learning_rate": 1.1838957742393692e-05, "loss": 0.3103, "step": 9968 }, { "epoch": 0.46, "grad_norm": 0.3980142194212502, "learning_rate": 1.1837495178165706e-05, "loss": 0.1821, "step": 9969 }, { "epoch": 0.46, "grad_norm": 0.35446937461373595, "learning_rate": 1.183603257325734e-05, "loss": 0.295, "step": 9970 }, { "epoch": 0.46, "grad_norm": 0.6911963616725106, "learning_rate": 1.1834569927700988e-05, "loss": 0.3232, "step": 9971 }, { "epoch": 0.46, "grad_norm": 0.3668883650477848, "learning_rate": 1.1833107241529023e-05, "loss": 0.2728, "step": 9972 }, { "epoch": 0.46, "grad_norm": 0.7049872039823518, "learning_rate": 1.183164451477383e-05, "loss": 0.3494, "step": 9973 }, { "epoch": 0.46, "grad_norm": 0.4128469845793274, "learning_rate": 1.1830181747467794e-05, "loss": 0.2879, "step": 9974 }, { "epoch": 0.46, "grad_norm": 0.29210906609087, "learning_rate": 1.1828718939643298e-05, "loss": 0.2432, "step": 9975 }, { "epoch": 0.46, "grad_norm": 1.0282089677745876, "learning_rate": 1.1827256091332726e-05, "loss": 0.336, "step": 9976 }, { "epoch": 0.46, "grad_norm": 0.5862158510123182, "learning_rate": 1.1825793202568467e-05, "loss": 0.3543, "step": 9977 }, { "epoch": 0.46, "grad_norm": 0.3615488792859387, "learning_rate": 1.1824330273382904e-05, "loss": 0.3005, "step": 9978 }, { "epoch": 0.46, "grad_norm": 0.5779046108189536, "learning_rate": 1.1822867303808429e-05, "loss": 0.3073, "step": 9979 }, { "epoch": 0.46, "grad_norm": 0.4123805359267903, "learning_rate": 1.1821404293877428e-05, "loss": 0.295, "step": 9980 }, { "epoch": 0.46, "grad_norm": 0.3219027977000585, "learning_rate": 1.1819941243622292e-05, "loss": 0.2092, "step": 9981 }, { "epoch": 0.46, "grad_norm": 0.3632084879187309, "learning_rate": 1.1818478153075412e-05, "loss": 0.2638, "step": 9982 }, { "epoch": 0.46, "grad_norm": 0.4333576162175848, "learning_rate": 1.1817015022269175e-05, "loss": 0.2589, "step": 9983 }, { "epoch": 0.46, "grad_norm": 0.5388033499749486, "learning_rate": 1.181555185123598e-05, "loss": 0.3673, "step": 9984 }, { "epoch": 0.46, "grad_norm": 0.720768040349031, "learning_rate": 1.1814088640008215e-05, "loss": 0.4269, "step": 9985 }, { "epoch": 0.46, "grad_norm": 0.2866352493752822, "learning_rate": 1.1812625388618275e-05, "loss": 0.2332, "step": 9986 }, { "epoch": 0.46, "grad_norm": 0.2628642338439257, "learning_rate": 1.1811162097098559e-05, "loss": 0.1872, "step": 9987 }, { "epoch": 0.46, "grad_norm": 1.1211379493555538, "learning_rate": 1.1809698765481458e-05, "loss": 0.5937, "step": 9988 }, { "epoch": 0.46, "grad_norm": 0.278649613867333, "learning_rate": 1.180823539379937e-05, "loss": 0.1206, "step": 9989 }, { "epoch": 0.46, "grad_norm": 0.45990426333653806, "learning_rate": 1.1806771982084694e-05, "loss": 0.3134, "step": 9990 }, { "epoch": 0.46, "grad_norm": 0.8295897500802334, "learning_rate": 1.1805308530369826e-05, "loss": 0.3658, "step": 9991 }, { "epoch": 0.46, "grad_norm": 0.37033929569541063, "learning_rate": 1.1803845038687171e-05, "loss": 0.1199, "step": 9992 }, { "epoch": 0.46, "grad_norm": 0.28367780366899964, "learning_rate": 1.1802381507069125e-05, "loss": 0.2148, "step": 9993 }, { "epoch": 0.46, "grad_norm": 0.3966903362216353, "learning_rate": 1.1800917935548086e-05, "loss": 0.311, "step": 9994 }, { "epoch": 0.46, "grad_norm": 0.3058701451982702, "learning_rate": 1.1799454324156463e-05, "loss": 0.1153, "step": 9995 }, { "epoch": 0.46, "grad_norm": 0.4138375822555433, "learning_rate": 1.1797990672926652e-05, "loss": 0.3425, "step": 9996 }, { "epoch": 0.46, "grad_norm": 1.0261541459760841, "learning_rate": 1.1796526981891063e-05, "loss": 0.5243, "step": 9997 }, { "epoch": 0.46, "grad_norm": 0.3397860160358067, "learning_rate": 1.1795063251082098e-05, "loss": 0.2942, "step": 9998 }, { "epoch": 0.46, "grad_norm": 0.2875764422949156, "learning_rate": 1.1793599480532163e-05, "loss": 0.1837, "step": 9999 }, { "epoch": 0.46, "grad_norm": 0.48693126612691257, "learning_rate": 1.179213567027366e-05, "loss": 0.2685, "step": 10000 }, { "epoch": 0.46, "grad_norm": 0.5262746196859454, "learning_rate": 1.1790671820339007e-05, "loss": 0.3271, "step": 10001 }, { "epoch": 0.46, "grad_norm": 0.331191491793911, "learning_rate": 1.17892079307606e-05, "loss": 0.256, "step": 10002 }, { "epoch": 0.46, "grad_norm": 0.47533247846589405, "learning_rate": 1.1787744001570858e-05, "loss": 0.3955, "step": 10003 }, { "epoch": 0.46, "grad_norm": 0.634528387425142, "learning_rate": 1.1786280032802186e-05, "loss": 0.3471, "step": 10004 }, { "epoch": 0.46, "grad_norm": 0.23419412428601935, "learning_rate": 1.1784816024486996e-05, "loss": 0.1506, "step": 10005 }, { "epoch": 0.46, "grad_norm": 0.3628402176831377, "learning_rate": 1.1783351976657698e-05, "loss": 0.3049, "step": 10006 }, { "epoch": 0.46, "grad_norm": 0.7129072502403367, "learning_rate": 1.1781887889346706e-05, "loss": 0.4136, "step": 10007 }, { "epoch": 0.46, "grad_norm": 0.34800764173189175, "learning_rate": 1.1780423762586435e-05, "loss": 0.2354, "step": 10008 }, { "epoch": 0.46, "grad_norm": 1.0608697482293123, "learning_rate": 1.1778959596409296e-05, "loss": 0.6971, "step": 10009 }, { "epoch": 0.46, "grad_norm": 0.35001696954576883, "learning_rate": 1.1777495390847709e-05, "loss": 0.2668, "step": 10010 }, { "epoch": 0.46, "grad_norm": 0.3899176525881227, "learning_rate": 1.1776031145934085e-05, "loss": 0.3123, "step": 10011 }, { "epoch": 0.46, "grad_norm": 0.33632989599035634, "learning_rate": 1.1774566861700845e-05, "loss": 0.1211, "step": 10012 }, { "epoch": 0.46, "grad_norm": 0.7870621695475499, "learning_rate": 1.17731025381804e-05, "loss": 0.3774, "step": 10013 }, { "epoch": 0.46, "grad_norm": 0.3232130074405032, "learning_rate": 1.177163817540518e-05, "loss": 0.2801, "step": 10014 }, { "epoch": 0.46, "grad_norm": 1.0085214397916038, "learning_rate": 1.1770173773407594e-05, "loss": 0.4783, "step": 10015 }, { "epoch": 0.46, "grad_norm": 0.6170896757165163, "learning_rate": 1.1768709332220072e-05, "loss": 0.4236, "step": 10016 }, { "epoch": 0.46, "grad_norm": 0.24345223369578334, "learning_rate": 1.1767244851875023e-05, "loss": 0.1897, "step": 10017 }, { "epoch": 0.46, "grad_norm": 0.45952341848879974, "learning_rate": 1.1765780332404882e-05, "loss": 0.2689, "step": 10018 }, { "epoch": 0.46, "grad_norm": 0.983075136703261, "learning_rate": 1.1764315773842063e-05, "loss": 0.3733, "step": 10019 }, { "epoch": 0.46, "grad_norm": 0.39421875605177625, "learning_rate": 1.1762851176218994e-05, "loss": 0.289, "step": 10020 }, { "epoch": 0.46, "grad_norm": 0.942509375955075, "learning_rate": 1.1761386539568101e-05, "loss": 0.5045, "step": 10021 }, { "epoch": 0.46, "grad_norm": 0.36049146647764785, "learning_rate": 1.1759921863921807e-05, "loss": 0.2921, "step": 10022 }, { "epoch": 0.46, "grad_norm": 0.4522278003230106, "learning_rate": 1.1758457149312539e-05, "loss": 0.3353, "step": 10023 }, { "epoch": 0.46, "grad_norm": 0.387448627058914, "learning_rate": 1.1756992395772722e-05, "loss": 0.2124, "step": 10024 }, { "epoch": 0.46, "grad_norm": 1.0168979438401233, "learning_rate": 1.1755527603334789e-05, "loss": 0.3412, "step": 10025 }, { "epoch": 0.46, "grad_norm": 0.29098794628053565, "learning_rate": 1.1754062772031166e-05, "loss": 0.2588, "step": 10026 }, { "epoch": 0.46, "grad_norm": 0.49414261762359024, "learning_rate": 1.1752597901894285e-05, "loss": 0.4067, "step": 10027 }, { "epoch": 0.46, "grad_norm": 0.8832224154941617, "learning_rate": 1.1751132992956576e-05, "loss": 0.2319, "step": 10028 }, { "epoch": 0.46, "grad_norm": 0.3452635533653455, "learning_rate": 1.1749668045250468e-05, "loss": 0.2581, "step": 10029 }, { "epoch": 0.46, "grad_norm": 0.4524947484214568, "learning_rate": 1.1748203058808397e-05, "loss": 0.329, "step": 10030 }, { "epoch": 0.46, "grad_norm": 0.24481683816584915, "learning_rate": 1.1746738033662795e-05, "loss": 0.1296, "step": 10031 }, { "epoch": 0.46, "grad_norm": 0.4237164562582197, "learning_rate": 1.1745272969846095e-05, "loss": 0.3036, "step": 10032 }, { "epoch": 0.46, "grad_norm": 1.2774864905348657, "learning_rate": 1.1743807867390735e-05, "loss": 0.773, "step": 10033 }, { "epoch": 0.46, "grad_norm": 0.3426542215106476, "learning_rate": 1.1742342726329152e-05, "loss": 0.2422, "step": 10034 }, { "epoch": 0.46, "grad_norm": 0.37401341473111255, "learning_rate": 1.1740877546693779e-05, "loss": 0.2992, "step": 10035 }, { "epoch": 0.46, "grad_norm": 0.7844990791731473, "learning_rate": 1.1739412328517052e-05, "loss": 0.4441, "step": 10036 }, { "epoch": 0.46, "grad_norm": 0.26717972784740507, "learning_rate": 1.1737947071831415e-05, "loss": 0.1982, "step": 10037 }, { "epoch": 0.46, "grad_norm": 0.3694765856263234, "learning_rate": 1.1736481776669307e-05, "loss": 0.199, "step": 10038 }, { "epoch": 0.46, "grad_norm": 0.5577101472482502, "learning_rate": 1.1735016443063162e-05, "loss": 0.3804, "step": 10039 }, { "epoch": 0.46, "grad_norm": 0.8761907294366034, "learning_rate": 1.1733551071045429e-05, "loss": 0.5249, "step": 10040 }, { "epoch": 0.46, "grad_norm": 0.3492945697820342, "learning_rate": 1.1732085660648543e-05, "loss": 0.1985, "step": 10041 }, { "epoch": 0.46, "grad_norm": 0.4172582836194634, "learning_rate": 1.173062021190495e-05, "loss": 0.3381, "step": 10042 }, { "epoch": 0.46, "grad_norm": 0.2718675904894732, "learning_rate": 1.1729154724847093e-05, "loss": 0.1515, "step": 10043 }, { "epoch": 0.46, "grad_norm": 0.3417266288523314, "learning_rate": 1.172768919950742e-05, "loss": 0.2119, "step": 10044 }, { "epoch": 0.46, "grad_norm": 0.5379451675445792, "learning_rate": 1.172622363591837e-05, "loss": 0.4336, "step": 10045 }, { "epoch": 0.46, "grad_norm": 0.5106053664108272, "learning_rate": 1.1724758034112395e-05, "loss": 0.346, "step": 10046 }, { "epoch": 0.46, "grad_norm": 0.34315683299857874, "learning_rate": 1.1723292394121937e-05, "loss": 0.2597, "step": 10047 }, { "epoch": 0.46, "grad_norm": 0.6462392634525331, "learning_rate": 1.172182671597945e-05, "loss": 0.3431, "step": 10048 }, { "epoch": 0.46, "grad_norm": 0.3445759201366232, "learning_rate": 1.1720360999717374e-05, "loss": 0.1953, "step": 10049 }, { "epoch": 0.46, "grad_norm": 0.29286083060104, "learning_rate": 1.1718895245368167e-05, "loss": 0.2606, "step": 10050 }, { "epoch": 0.46, "grad_norm": 1.064674360482196, "learning_rate": 1.1717429452964275e-05, "loss": 0.5577, "step": 10051 }, { "epoch": 0.46, "grad_norm": 0.6968155836386715, "learning_rate": 1.1715963622538154e-05, "loss": 0.4205, "step": 10052 }, { "epoch": 0.46, "grad_norm": 0.3449299380611466, "learning_rate": 1.1714497754122247e-05, "loss": 0.2838, "step": 10053 }, { "epoch": 0.46, "grad_norm": 0.3722150608048308, "learning_rate": 1.1713031847749013e-05, "loss": 0.2586, "step": 10054 }, { "epoch": 0.46, "grad_norm": 0.5082005346233696, "learning_rate": 1.1711565903450907e-05, "loss": 0.3316, "step": 10055 }, { "epoch": 0.46, "grad_norm": 0.38249320169277673, "learning_rate": 1.1710099921260378e-05, "loss": 0.3035, "step": 10056 }, { "epoch": 0.46, "grad_norm": 0.37052627275505423, "learning_rate": 1.1708633901209891e-05, "loss": 0.2155, "step": 10057 }, { "epoch": 0.46, "grad_norm": 0.5639801095558974, "learning_rate": 1.1707167843331893e-05, "loss": 0.3629, "step": 10058 }, { "epoch": 0.46, "grad_norm": 0.42879980352996244, "learning_rate": 1.1705701747658842e-05, "loss": 0.2854, "step": 10059 }, { "epoch": 0.46, "grad_norm": 0.46889373778146665, "learning_rate": 1.17042356142232e-05, "loss": 0.3393, "step": 10060 }, { "epoch": 0.46, "grad_norm": 0.3467921033670613, "learning_rate": 1.1702769443057425e-05, "loss": 0.2681, "step": 10061 }, { "epoch": 0.46, "grad_norm": 0.43823880404190485, "learning_rate": 1.1701303234193972e-05, "loss": 0.3282, "step": 10062 }, { "epoch": 0.46, "grad_norm": 0.36819436456423654, "learning_rate": 1.1699836987665312e-05, "loss": 0.2773, "step": 10063 }, { "epoch": 0.46, "grad_norm": 0.6810566865838882, "learning_rate": 1.1698370703503895e-05, "loss": 0.3255, "step": 10064 }, { "epoch": 0.46, "grad_norm": 0.2813410771509748, "learning_rate": 1.1696904381742188e-05, "loss": 0.2148, "step": 10065 }, { "epoch": 0.46, "grad_norm": 0.38198903431816406, "learning_rate": 1.1695438022412653e-05, "loss": 0.3306, "step": 10066 }, { "epoch": 0.46, "grad_norm": 0.9591736329880927, "learning_rate": 1.1693971625547756e-05, "loss": 0.2603, "step": 10067 }, { "epoch": 0.46, "grad_norm": 0.38755000727143635, "learning_rate": 1.1692505191179957e-05, "loss": 0.2854, "step": 10068 }, { "epoch": 0.46, "grad_norm": 0.9468788599281269, "learning_rate": 1.1691038719341727e-05, "loss": 0.5708, "step": 10069 }, { "epoch": 0.46, "grad_norm": 0.28448224741662526, "learning_rate": 1.168957221006553e-05, "loss": 0.2396, "step": 10070 }, { "epoch": 0.46, "grad_norm": 0.27914646512412794, "learning_rate": 1.168810566338383e-05, "loss": 0.2059, "step": 10071 }, { "epoch": 0.46, "grad_norm": 1.1471242556890526, "learning_rate": 1.1686639079329099e-05, "loss": 0.6389, "step": 10072 }, { "epoch": 0.46, "grad_norm": 0.38827093268989066, "learning_rate": 1.1685172457933804e-05, "loss": 0.3276, "step": 10073 }, { "epoch": 0.46, "grad_norm": 0.31971591487797246, "learning_rate": 1.1683705799230416e-05, "loss": 0.214, "step": 10074 }, { "epoch": 0.46, "grad_norm": 0.834221192586116, "learning_rate": 1.1682239103251405e-05, "loss": 0.5776, "step": 10075 }, { "epoch": 0.46, "grad_norm": 0.4418135536538834, "learning_rate": 1.168077237002924e-05, "loss": 0.3399, "step": 10076 }, { "epoch": 0.46, "grad_norm": 0.2433392767901945, "learning_rate": 1.1679305599596394e-05, "loss": 0.139, "step": 10077 }, { "epoch": 0.46, "grad_norm": 0.35031849479653154, "learning_rate": 1.167783879198534e-05, "loss": 0.2985, "step": 10078 }, { "epoch": 0.46, "grad_norm": 1.0479222544180729, "learning_rate": 1.1676371947228554e-05, "loss": 0.6134, "step": 10079 }, { "epoch": 0.46, "grad_norm": 0.35472402349721904, "learning_rate": 1.1674905065358508e-05, "loss": 0.1908, "step": 10080 }, { "epoch": 0.46, "grad_norm": 0.3950027183429008, "learning_rate": 1.1673438146407681e-05, "loss": 0.3295, "step": 10081 }, { "epoch": 0.46, "grad_norm": 0.5189445655197306, "learning_rate": 1.1671971190408544e-05, "loss": 0.4005, "step": 10082 }, { "epoch": 0.46, "grad_norm": 0.19542021191802572, "learning_rate": 1.1670504197393577e-05, "loss": 0.1261, "step": 10083 }, { "epoch": 0.46, "grad_norm": 0.5282637306389921, "learning_rate": 1.1669037167395256e-05, "loss": 0.3552, "step": 10084 }, { "epoch": 0.46, "grad_norm": 0.37666133888041864, "learning_rate": 1.1667570100446062e-05, "loss": 0.3314, "step": 10085 }, { "epoch": 0.46, "grad_norm": 0.4183750925885515, "learning_rate": 1.1666102996578473e-05, "loss": 0.2925, "step": 10086 }, { "epoch": 0.46, "grad_norm": 0.43029546417603587, "learning_rate": 1.1664635855824969e-05, "loss": 0.2905, "step": 10087 }, { "epoch": 0.46, "grad_norm": 0.7582523291449792, "learning_rate": 1.166316867821803e-05, "loss": 0.5447, "step": 10088 }, { "epoch": 0.46, "grad_norm": 0.30026263312907653, "learning_rate": 1.1661701463790142e-05, "loss": 0.2335, "step": 10089 }, { "epoch": 0.46, "grad_norm": 0.27797907014048207, "learning_rate": 1.1660234212573782e-05, "loss": 0.1912, "step": 10090 }, { "epoch": 0.46, "grad_norm": 0.9879002182889989, "learning_rate": 1.1658766924601439e-05, "loss": 0.5006, "step": 10091 }, { "epoch": 0.46, "grad_norm": 0.5551307056286501, "learning_rate": 1.1657299599905596e-05, "loss": 0.3847, "step": 10092 }, { "epoch": 0.46, "grad_norm": 0.38600070831419114, "learning_rate": 1.1655832238518739e-05, "loss": 0.2488, "step": 10093 }, { "epoch": 0.46, "grad_norm": 0.5049749729702442, "learning_rate": 1.1654364840473348e-05, "loss": 0.401, "step": 10094 }, { "epoch": 0.46, "grad_norm": 0.5878774863194353, "learning_rate": 1.1652897405801913e-05, "loss": 0.3225, "step": 10095 }, { "epoch": 0.46, "grad_norm": 0.24560335841127917, "learning_rate": 1.1651429934536923e-05, "loss": 0.1602, "step": 10096 }, { "epoch": 0.46, "grad_norm": 0.399986882390551, "learning_rate": 1.1649962426710868e-05, "loss": 0.3273, "step": 10097 }, { "epoch": 0.46, "grad_norm": 0.6193514136707808, "learning_rate": 1.1648494882356236e-05, "loss": 0.3437, "step": 10098 }, { "epoch": 0.46, "grad_norm": 0.3940600291985681, "learning_rate": 1.1647027301505515e-05, "loss": 0.3417, "step": 10099 }, { "epoch": 0.46, "grad_norm": 1.0711245132108864, "learning_rate": 1.1645559684191199e-05, "loss": 0.4641, "step": 10100 }, { "epoch": 0.46, "grad_norm": 0.3375833101781181, "learning_rate": 1.1644092030445773e-05, "loss": 0.3003, "step": 10101 }, { "epoch": 0.46, "grad_norm": 0.25513721523809546, "learning_rate": 1.1642624340301738e-05, "loss": 0.2068, "step": 10102 }, { "epoch": 0.46, "grad_norm": 0.7934123075779773, "learning_rate": 1.164115661379158e-05, "loss": 0.3336, "step": 10103 }, { "epoch": 0.46, "grad_norm": 0.5065106547257751, "learning_rate": 1.1639688850947798e-05, "loss": 0.3483, "step": 10104 }, { "epoch": 0.46, "grad_norm": 0.3577604358511736, "learning_rate": 1.1638221051802887e-05, "loss": 0.2941, "step": 10105 }, { "epoch": 0.46, "grad_norm": 0.4589062341799034, "learning_rate": 1.1636753216389339e-05, "loss": 0.2771, "step": 10106 }, { "epoch": 0.46, "grad_norm": 0.4281054672833545, "learning_rate": 1.163528534473965e-05, "loss": 0.3081, "step": 10107 }, { "epoch": 0.46, "grad_norm": 0.2824307114296, "learning_rate": 1.1633817436886323e-05, "loss": 0.2066, "step": 10108 }, { "epoch": 0.46, "grad_norm": 0.44703723851206306, "learning_rate": 1.1632349492861853e-05, "loss": 0.2768, "step": 10109 }, { "epoch": 0.46, "grad_norm": 0.6647260680728823, "learning_rate": 1.1630881512698737e-05, "loss": 0.3909, "step": 10110 }, { "epoch": 0.46, "grad_norm": 0.44158533127606425, "learning_rate": 1.1629413496429476e-05, "loss": 0.3167, "step": 10111 }, { "epoch": 0.46, "grad_norm": 1.074768989574492, "learning_rate": 1.162794544408657e-05, "loss": 0.7815, "step": 10112 }, { "epoch": 0.46, "grad_norm": 0.32357173280404755, "learning_rate": 1.1626477355702523e-05, "loss": 0.2302, "step": 10113 }, { "epoch": 0.46, "grad_norm": 0.23911827121319967, "learning_rate": 1.1625009231309832e-05, "loss": 0.199, "step": 10114 }, { "epoch": 0.46, "grad_norm": 0.8236922628410613, "learning_rate": 1.1623541070941005e-05, "loss": 0.4067, "step": 10115 }, { "epoch": 0.46, "grad_norm": 0.4590700241122029, "learning_rate": 1.1622072874628546e-05, "loss": 0.1673, "step": 10116 }, { "epoch": 0.46, "grad_norm": 0.2901775791358577, "learning_rate": 1.1620604642404954e-05, "loss": 0.2678, "step": 10117 }, { "epoch": 0.46, "grad_norm": 1.1149126751571659, "learning_rate": 1.1619136374302735e-05, "loss": 0.7265, "step": 10118 }, { "epoch": 0.46, "grad_norm": 0.5095702673853327, "learning_rate": 1.16176680703544e-05, "loss": 0.1956, "step": 10119 }, { "epoch": 0.46, "grad_norm": 0.36228373996312824, "learning_rate": 1.161619973059245e-05, "loss": 0.2735, "step": 10120 }, { "epoch": 0.46, "grad_norm": 0.37977997556040965, "learning_rate": 1.16147313550494e-05, "loss": 0.2926, "step": 10121 }, { "epoch": 0.46, "grad_norm": 0.29049154264179633, "learning_rate": 1.1613262943757752e-05, "loss": 0.1378, "step": 10122 }, { "epoch": 0.47, "grad_norm": 0.40635621024717733, "learning_rate": 1.1611794496750019e-05, "loss": 0.3375, "step": 10123 }, { "epoch": 0.47, "grad_norm": 1.285520796142388, "learning_rate": 1.1610326014058706e-05, "loss": 0.6434, "step": 10124 }, { "epoch": 0.47, "grad_norm": 0.3871601265255735, "learning_rate": 1.160885749571633e-05, "loss": 0.3031, "step": 10125 }, { "epoch": 0.47, "grad_norm": 0.3585881971646937, "learning_rate": 1.1607388941755397e-05, "loss": 0.2273, "step": 10126 }, { "epoch": 0.47, "grad_norm": 0.49237165609772104, "learning_rate": 1.1605920352208424e-05, "loss": 0.3163, "step": 10127 }, { "epoch": 0.47, "grad_norm": 0.3113632537840092, "learning_rate": 1.1604451727107927e-05, "loss": 0.2083, "step": 10128 }, { "epoch": 0.47, "grad_norm": 0.2899162748184306, "learning_rate": 1.1602983066486407e-05, "loss": 0.2368, "step": 10129 }, { "epoch": 0.47, "grad_norm": 1.3046450864715353, "learning_rate": 1.160151437037639e-05, "loss": 0.7733, "step": 10130 }, { "epoch": 0.47, "grad_norm": 0.7221710243487537, "learning_rate": 1.1600045638810387e-05, "loss": 0.5441, "step": 10131 }, { "epoch": 0.47, "grad_norm": 0.31307580511611083, "learning_rate": 1.159857687182092e-05, "loss": 0.2396, "step": 10132 }, { "epoch": 0.47, "grad_norm": 0.28238246997415495, "learning_rate": 1.1597108069440498e-05, "loss": 0.2605, "step": 10133 }, { "epoch": 0.47, "grad_norm": 0.5083964742205858, "learning_rate": 1.1595639231701642e-05, "loss": 0.2701, "step": 10134 }, { "epoch": 0.47, "grad_norm": 0.32711623583178534, "learning_rate": 1.1594170358636873e-05, "loss": 0.2183, "step": 10135 }, { "epoch": 0.47, "grad_norm": 1.0660538605992356, "learning_rate": 1.159270145027871e-05, "loss": 0.64, "step": 10136 }, { "epoch": 0.47, "grad_norm": 0.3637501740379312, "learning_rate": 1.1591232506659668e-05, "loss": 0.3231, "step": 10137 }, { "epoch": 0.47, "grad_norm": 0.3288145824895556, "learning_rate": 1.1589763527812275e-05, "loss": 0.2928, "step": 10138 }, { "epoch": 0.47, "grad_norm": 0.7209188924215945, "learning_rate": 1.158829451376905e-05, "loss": 0.3338, "step": 10139 }, { "epoch": 0.47, "grad_norm": 0.2515148906195833, "learning_rate": 1.1586825464562515e-05, "loss": 0.2012, "step": 10140 }, { "epoch": 0.47, "grad_norm": 0.32950260892852024, "learning_rate": 1.1585356380225193e-05, "loss": 0.2841, "step": 10141 }, { "epoch": 0.47, "grad_norm": 0.9641189761444086, "learning_rate": 1.1583887260789608e-05, "loss": 0.3995, "step": 10142 }, { "epoch": 0.47, "grad_norm": 0.6865735991543428, "learning_rate": 1.1582418106288286e-05, "loss": 0.3578, "step": 10143 }, { "epoch": 0.47, "grad_norm": 0.3752926440043566, "learning_rate": 1.1580948916753751e-05, "loss": 0.2857, "step": 10144 }, { "epoch": 0.47, "grad_norm": 0.34351153336140117, "learning_rate": 1.1579479692218534e-05, "loss": 0.2534, "step": 10145 }, { "epoch": 0.47, "grad_norm": 0.22869140603523305, "learning_rate": 1.1578010432715159e-05, "loss": 0.1466, "step": 10146 }, { "epoch": 0.47, "grad_norm": 0.384761339676635, "learning_rate": 1.1576541138276154e-05, "loss": 0.2821, "step": 10147 }, { "epoch": 0.47, "grad_norm": 0.5105976001308454, "learning_rate": 1.1575071808934042e-05, "loss": 0.2987, "step": 10148 }, { "epoch": 0.47, "grad_norm": 0.5901954839109667, "learning_rate": 1.1573602444721363e-05, "loss": 0.3622, "step": 10149 }, { "epoch": 0.47, "grad_norm": 0.33350458793944854, "learning_rate": 1.1572133045670642e-05, "loss": 0.2823, "step": 10150 }, { "epoch": 0.47, "grad_norm": 1.323818656901192, "learning_rate": 1.1570663611814411e-05, "loss": 0.5051, "step": 10151 }, { "epoch": 0.47, "grad_norm": 0.28338214582361987, "learning_rate": 1.15691941431852e-05, "loss": 0.2243, "step": 10152 }, { "epoch": 0.47, "grad_norm": 0.4077989680410675, "learning_rate": 1.1567724639815546e-05, "loss": 0.2665, "step": 10153 }, { "epoch": 0.47, "grad_norm": 0.554358578283371, "learning_rate": 1.1566255101737976e-05, "loss": 0.3248, "step": 10154 }, { "epoch": 0.47, "grad_norm": 0.9698303720443032, "learning_rate": 1.156478552898503e-05, "loss": 0.342, "step": 10155 }, { "epoch": 0.47, "grad_norm": 0.3800940413104027, "learning_rate": 1.156331592158924e-05, "loss": 0.2768, "step": 10156 }, { "epoch": 0.47, "grad_norm": 0.44187168502477925, "learning_rate": 1.1561846279583142e-05, "loss": 0.331, "step": 10157 }, { "epoch": 0.47, "grad_norm": 0.7720060610582207, "learning_rate": 1.1560376602999272e-05, "loss": 0.3012, "step": 10158 }, { "epoch": 0.47, "grad_norm": 0.3809622337637473, "learning_rate": 1.1558906891870167e-05, "loss": 0.2872, "step": 10159 }, { "epoch": 0.47, "grad_norm": 0.5929966023069846, "learning_rate": 1.1557437146228368e-05, "loss": 0.2618, "step": 10160 }, { "epoch": 0.47, "grad_norm": 0.3767094534754204, "learning_rate": 1.155596736610641e-05, "loss": 0.264, "step": 10161 }, { "epoch": 0.47, "grad_norm": 0.3305448337736627, "learning_rate": 1.1554497551536836e-05, "loss": 0.2001, "step": 10162 }, { "epoch": 0.47, "grad_norm": 1.0091538655296923, "learning_rate": 1.1553027702552184e-05, "loss": 0.5113, "step": 10163 }, { "epoch": 0.47, "grad_norm": 0.37395529881074846, "learning_rate": 1.1551557819184995e-05, "loss": 0.3261, "step": 10164 }, { "epoch": 0.47, "grad_norm": 0.3275817512209674, "learning_rate": 1.155008790146781e-05, "loss": 0.194, "step": 10165 }, { "epoch": 0.47, "grad_norm": 0.643705748089485, "learning_rate": 1.1548617949433174e-05, "loss": 0.4454, "step": 10166 }, { "epoch": 0.47, "grad_norm": 0.3176775556513125, "learning_rate": 1.1547147963113629e-05, "loss": 0.2036, "step": 10167 }, { "epoch": 0.47, "grad_norm": 0.38662721924187926, "learning_rate": 1.154567794254172e-05, "loss": 0.2001, "step": 10168 }, { "epoch": 0.47, "grad_norm": 0.42723856561523843, "learning_rate": 1.1544207887749992e-05, "loss": 0.3135, "step": 10169 }, { "epoch": 0.47, "grad_norm": 1.1742859992214958, "learning_rate": 1.1542737798770989e-05, "loss": 0.6796, "step": 10170 }, { "epoch": 0.47, "grad_norm": 0.3351483656920204, "learning_rate": 1.1541267675637256e-05, "loss": 0.2276, "step": 10171 }, { "epoch": 0.47, "grad_norm": 0.4592562570752391, "learning_rate": 1.1539797518381344e-05, "loss": 0.3451, "step": 10172 }, { "epoch": 0.47, "grad_norm": 0.3768267462197124, "learning_rate": 1.1538327327035799e-05, "loss": 0.2628, "step": 10173 }, { "epoch": 0.47, "grad_norm": 0.3028772591952371, "learning_rate": 1.1536857101633168e-05, "loss": 0.2216, "step": 10174 }, { "epoch": 0.47, "grad_norm": 0.8891457891304007, "learning_rate": 1.1535386842206006e-05, "loss": 0.3362, "step": 10175 }, { "epoch": 0.47, "grad_norm": 0.38591006043636733, "learning_rate": 1.1533916548786856e-05, "loss": 0.3243, "step": 10176 }, { "epoch": 0.47, "grad_norm": 0.38706503699571043, "learning_rate": 1.1532446221408274e-05, "loss": 0.2729, "step": 10177 }, { "epoch": 0.47, "grad_norm": 0.6405423489008124, "learning_rate": 1.1530975860102805e-05, "loss": 0.3426, "step": 10178 }, { "epoch": 0.47, "grad_norm": 0.32671437326514413, "learning_rate": 1.152950546490301e-05, "loss": 0.1948, "step": 10179 }, { "epoch": 0.47, "grad_norm": 0.4362351742549119, "learning_rate": 1.1528035035841438e-05, "loss": 0.2797, "step": 10180 }, { "epoch": 0.47, "grad_norm": 0.3217913873289673, "learning_rate": 1.1526564572950643e-05, "loss": 0.2483, "step": 10181 }, { "epoch": 0.47, "grad_norm": 0.6902061746318561, "learning_rate": 1.1525094076263177e-05, "loss": 0.4961, "step": 10182 }, { "epoch": 0.47, "grad_norm": 0.42200558154878137, "learning_rate": 1.1523623545811603e-05, "loss": 0.2739, "step": 10183 }, { "epoch": 0.47, "grad_norm": 0.31679117784970634, "learning_rate": 1.1522152981628465e-05, "loss": 0.2676, "step": 10184 }, { "epoch": 0.47, "grad_norm": 1.2663529047614424, "learning_rate": 1.1520682383746334e-05, "loss": 0.651, "step": 10185 }, { "epoch": 0.47, "grad_norm": 0.24770425953787112, "learning_rate": 1.151921175219776e-05, "loss": 0.1583, "step": 10186 }, { "epoch": 0.47, "grad_norm": 0.5926337662263135, "learning_rate": 1.1517741087015297e-05, "loss": 0.3936, "step": 10187 }, { "epoch": 0.47, "grad_norm": 0.36496979234608184, "learning_rate": 1.1516270388231513e-05, "loss": 0.2693, "step": 10188 }, { "epoch": 0.47, "grad_norm": 0.3565303636247562, "learning_rate": 1.1514799655878964e-05, "loss": 0.3038, "step": 10189 }, { "epoch": 0.47, "grad_norm": 0.7211374844523986, "learning_rate": 1.151332888999021e-05, "loss": 0.5078, "step": 10190 }, { "epoch": 0.47, "grad_norm": 0.29169914698755656, "learning_rate": 1.151185809059781e-05, "loss": 0.1417, "step": 10191 }, { "epoch": 0.47, "grad_norm": 0.2848598452822274, "learning_rate": 1.1510387257734332e-05, "loss": 0.2144, "step": 10192 }, { "epoch": 0.47, "grad_norm": 0.4000164331422373, "learning_rate": 1.1508916391432337e-05, "loss": 0.3232, "step": 10193 }, { "epoch": 0.47, "grad_norm": 0.6592543777555149, "learning_rate": 1.1507445491724387e-05, "loss": 0.3274, "step": 10194 }, { "epoch": 0.47, "grad_norm": 0.3744945105204196, "learning_rate": 1.1505974558643045e-05, "loss": 0.2813, "step": 10195 }, { "epoch": 0.47, "grad_norm": 0.48982917350955646, "learning_rate": 1.1504503592220879e-05, "loss": 0.3489, "step": 10196 }, { "epoch": 0.47, "grad_norm": 0.34494703773002466, "learning_rate": 1.150303259249045e-05, "loss": 0.2195, "step": 10197 }, { "epoch": 0.47, "grad_norm": 0.3435925037013483, "learning_rate": 1.1501561559484334e-05, "loss": 0.1931, "step": 10198 }, { "epoch": 0.47, "grad_norm": 0.4365245082354594, "learning_rate": 1.1500090493235088e-05, "loss": 0.3136, "step": 10199 }, { "epoch": 0.47, "grad_norm": 0.491218393856028, "learning_rate": 1.1498619393775287e-05, "loss": 0.3142, "step": 10200 }, { "epoch": 0.47, "grad_norm": 0.3744755480645695, "learning_rate": 1.1497148261137495e-05, "loss": 0.2145, "step": 10201 }, { "epoch": 0.47, "grad_norm": 0.5684253867297732, "learning_rate": 1.1495677095354283e-05, "loss": 0.4221, "step": 10202 }, { "epoch": 0.47, "grad_norm": 0.8930526729422205, "learning_rate": 1.1494205896458224e-05, "loss": 0.5626, "step": 10203 }, { "epoch": 0.47, "grad_norm": 0.23963355279530044, "learning_rate": 1.1492734664481886e-05, "loss": 0.1649, "step": 10204 }, { "epoch": 0.47, "grad_norm": 0.29581266240009396, "learning_rate": 1.1491263399457841e-05, "loss": 0.2571, "step": 10205 }, { "epoch": 0.47, "grad_norm": 0.7106020016739378, "learning_rate": 1.148979210141866e-05, "loss": 0.4159, "step": 10206 }, { "epoch": 0.47, "grad_norm": 0.36174752534838955, "learning_rate": 1.1488320770396919e-05, "loss": 0.2142, "step": 10207 }, { "epoch": 0.47, "grad_norm": 0.3289790523624033, "learning_rate": 1.1486849406425189e-05, "loss": 0.2841, "step": 10208 }, { "epoch": 0.47, "grad_norm": 1.1218822824462256, "learning_rate": 1.148537800953605e-05, "loss": 0.474, "step": 10209 }, { "epoch": 0.47, "grad_norm": 0.2687911136569279, "learning_rate": 1.1483906579762072e-05, "loss": 0.1649, "step": 10210 }, { "epoch": 0.47, "grad_norm": 0.5015248941606871, "learning_rate": 1.148243511713583e-05, "loss": 0.295, "step": 10211 }, { "epoch": 0.47, "grad_norm": 0.37488584104066136, "learning_rate": 1.1480963621689904e-05, "loss": 0.3237, "step": 10212 }, { "epoch": 0.47, "grad_norm": 0.43109118287211484, "learning_rate": 1.1479492093456874e-05, "loss": 0.3259, "step": 10213 }, { "epoch": 0.47, "grad_norm": 0.5191117791983719, "learning_rate": 1.1478020532469311e-05, "loss": 0.285, "step": 10214 }, { "epoch": 0.47, "grad_norm": 1.648772730083126, "learning_rate": 1.1476548938759803e-05, "loss": 0.6368, "step": 10215 }, { "epoch": 0.47, "grad_norm": 0.33326809680769903, "learning_rate": 1.1475077312360921e-05, "loss": 0.2618, "step": 10216 }, { "epoch": 0.47, "grad_norm": 0.33912314075181815, "learning_rate": 1.1473605653305248e-05, "loss": 0.209, "step": 10217 }, { "epoch": 0.47, "grad_norm": 0.40825646366260115, "learning_rate": 1.1472133961625368e-05, "loss": 0.3281, "step": 10218 }, { "epoch": 0.47, "grad_norm": 0.9115765089233979, "learning_rate": 1.147066223735386e-05, "loss": 0.5649, "step": 10219 }, { "epoch": 0.47, "grad_norm": 0.2647785821512999, "learning_rate": 1.1469190480523308e-05, "loss": 0.234, "step": 10220 }, { "epoch": 0.47, "grad_norm": 1.0759309611842651, "learning_rate": 1.1467718691166296e-05, "loss": 0.6602, "step": 10221 }, { "epoch": 0.47, "grad_norm": 0.58331837780062, "learning_rate": 1.1466246869315407e-05, "loss": 0.3291, "step": 10222 }, { "epoch": 0.47, "grad_norm": 0.3630081565728696, "learning_rate": 1.1464775015003223e-05, "loss": 0.2737, "step": 10223 }, { "epoch": 0.47, "grad_norm": 0.3375725919254272, "learning_rate": 1.1463303128262332e-05, "loss": 0.2627, "step": 10224 }, { "epoch": 0.47, "grad_norm": 0.3891784965956488, "learning_rate": 1.146183120912532e-05, "loss": 0.2516, "step": 10225 }, { "epoch": 0.47, "grad_norm": 0.4823553011029802, "learning_rate": 1.1460359257624775e-05, "loss": 0.3168, "step": 10226 }, { "epoch": 0.47, "grad_norm": 0.8738341770979363, "learning_rate": 1.1458887273793284e-05, "loss": 0.3351, "step": 10227 }, { "epoch": 0.47, "grad_norm": 0.33517463305405243, "learning_rate": 1.1457415257663436e-05, "loss": 0.2987, "step": 10228 }, { "epoch": 0.47, "grad_norm": 0.40067792989432044, "learning_rate": 1.1455943209267817e-05, "loss": 0.292, "step": 10229 }, { "epoch": 0.47, "grad_norm": 0.29907360793259075, "learning_rate": 1.145447112863902e-05, "loss": 0.1861, "step": 10230 }, { "epoch": 0.47, "grad_norm": 0.3974779189034825, "learning_rate": 1.1452999015809629e-05, "loss": 0.2666, "step": 10231 }, { "epoch": 0.47, "grad_norm": 0.4168640175920408, "learning_rate": 1.1451526870812247e-05, "loss": 0.3396, "step": 10232 }, { "epoch": 0.47, "grad_norm": 0.4938468291372108, "learning_rate": 1.1450054693679455e-05, "loss": 0.2815, "step": 10233 }, { "epoch": 0.47, "grad_norm": 0.49496518119075433, "learning_rate": 1.1448582484443853e-05, "loss": 0.3112, "step": 10234 }, { "epoch": 0.47, "grad_norm": 0.40971457986124626, "learning_rate": 1.1447110243138029e-05, "loss": 0.3512, "step": 10235 }, { "epoch": 0.47, "grad_norm": 0.28601452017274603, "learning_rate": 1.1445637969794578e-05, "loss": 0.2, "step": 10236 }, { "epoch": 0.47, "grad_norm": 0.8234833694499625, "learning_rate": 1.1444165664446097e-05, "loss": 0.5126, "step": 10237 }, { "epoch": 0.47, "grad_norm": 0.2910269256410644, "learning_rate": 1.1442693327125179e-05, "loss": 0.2373, "step": 10238 }, { "epoch": 0.47, "grad_norm": 0.445956859376595, "learning_rate": 1.1441220957864421e-05, "loss": 0.3613, "step": 10239 }, { "epoch": 0.47, "grad_norm": 0.4212051157676677, "learning_rate": 1.1439748556696422e-05, "loss": 0.2633, "step": 10240 }, { "epoch": 0.47, "grad_norm": 0.4105376894196895, "learning_rate": 1.1438276123653777e-05, "loss": 0.2974, "step": 10241 }, { "epoch": 0.47, "grad_norm": 0.44955678308112823, "learning_rate": 1.1436803658769082e-05, "loss": 0.25, "step": 10242 }, { "epoch": 0.47, "grad_norm": 0.4034492310112792, "learning_rate": 1.1435331162074944e-05, "loss": 0.2406, "step": 10243 }, { "epoch": 0.47, "grad_norm": 0.2614146797009579, "learning_rate": 1.143385863360395e-05, "loss": 0.2495, "step": 10244 }, { "epoch": 0.47, "grad_norm": 0.9287387766195588, "learning_rate": 1.1432386073388718e-05, "loss": 0.5735, "step": 10245 }, { "epoch": 0.47, "grad_norm": 0.4761256895277955, "learning_rate": 1.1430913481461831e-05, "loss": 0.2654, "step": 10246 }, { "epoch": 0.47, "grad_norm": 0.41709491754546196, "learning_rate": 1.1429440857855903e-05, "loss": 0.3101, "step": 10247 }, { "epoch": 0.47, "grad_norm": 0.39695366596301934, "learning_rate": 1.142796820260353e-05, "loss": 0.3024, "step": 10248 }, { "epoch": 0.47, "grad_norm": 0.42567568059294664, "learning_rate": 1.1426495515737314e-05, "loss": 0.2345, "step": 10249 }, { "epoch": 0.47, "grad_norm": 0.3062563473505486, "learning_rate": 1.1425022797289869e-05, "loss": 0.2184, "step": 10250 }, { "epoch": 0.47, "grad_norm": 0.4846357718123354, "learning_rate": 1.142355004729379e-05, "loss": 0.2796, "step": 10251 }, { "epoch": 0.47, "grad_norm": 0.4793455563062799, "learning_rate": 1.1422077265781684e-05, "loss": 0.3047, "step": 10252 }, { "epoch": 0.47, "grad_norm": 0.37087493666425386, "learning_rate": 1.1420604452786158e-05, "loss": 0.2371, "step": 10253 }, { "epoch": 0.47, "grad_norm": 1.2210868350158257, "learning_rate": 1.1419131608339817e-05, "loss": 0.6439, "step": 10254 }, { "epoch": 0.47, "grad_norm": 0.4855698945279773, "learning_rate": 1.1417658732475272e-05, "loss": 0.3809, "step": 10255 }, { "epoch": 0.47, "grad_norm": 0.2637302181082653, "learning_rate": 1.1416185825225128e-05, "loss": 0.2055, "step": 10256 }, { "epoch": 0.47, "grad_norm": 0.4818568528380723, "learning_rate": 1.1414712886621997e-05, "loss": 0.3007, "step": 10257 }, { "epoch": 0.47, "grad_norm": 0.4345055090756826, "learning_rate": 1.1413239916698486e-05, "loss": 0.2686, "step": 10258 }, { "epoch": 0.47, "grad_norm": 0.38136010183540703, "learning_rate": 1.14117669154872e-05, "loss": 0.2156, "step": 10259 }, { "epoch": 0.47, "grad_norm": 0.3898164021292159, "learning_rate": 1.1410293883020762e-05, "loss": 0.3046, "step": 10260 }, { "epoch": 0.47, "grad_norm": 1.0238676149240145, "learning_rate": 1.1408820819331771e-05, "loss": 0.663, "step": 10261 }, { "epoch": 0.47, "grad_norm": 0.33782345567894206, "learning_rate": 1.140734772445285e-05, "loss": 0.2364, "step": 10262 }, { "epoch": 0.47, "grad_norm": 0.4340291922830695, "learning_rate": 1.1405874598416608e-05, "loss": 0.3574, "step": 10263 }, { "epoch": 0.47, "grad_norm": 0.27849993533083767, "learning_rate": 1.1404401441255652e-05, "loss": 0.1976, "step": 10264 }, { "epoch": 0.47, "grad_norm": 0.4449873625792461, "learning_rate": 1.1402928253002606e-05, "loss": 0.2765, "step": 10265 }, { "epoch": 0.47, "grad_norm": 1.0691891015940598, "learning_rate": 1.1401455033690076e-05, "loss": 0.3985, "step": 10266 }, { "epoch": 0.47, "grad_norm": 0.34771383805519146, "learning_rate": 1.1399981783350685e-05, "loss": 0.3077, "step": 10267 }, { "epoch": 0.47, "grad_norm": 0.39605919001193546, "learning_rate": 1.1398508502017047e-05, "loss": 0.2845, "step": 10268 }, { "epoch": 0.47, "grad_norm": 0.6816193445514331, "learning_rate": 1.1397035189721779e-05, "loss": 0.3356, "step": 10269 }, { "epoch": 0.47, "grad_norm": 0.31991346895367145, "learning_rate": 1.1395561846497495e-05, "loss": 0.2039, "step": 10270 }, { "epoch": 0.47, "grad_norm": 0.35895653795127924, "learning_rate": 1.139408847237682e-05, "loss": 0.248, "step": 10271 }, { "epoch": 0.47, "grad_norm": 0.3809574960695648, "learning_rate": 1.1392615067392368e-05, "loss": 0.2787, "step": 10272 }, { "epoch": 0.47, "grad_norm": 0.6639648479341921, "learning_rate": 1.1391141631576762e-05, "loss": 0.4803, "step": 10273 }, { "epoch": 0.47, "grad_norm": 0.3423545991517534, "learning_rate": 1.1389668164962622e-05, "loss": 0.2647, "step": 10274 }, { "epoch": 0.47, "grad_norm": 0.31776632972741903, "learning_rate": 1.1388194667582565e-05, "loss": 0.2879, "step": 10275 }, { "epoch": 0.47, "grad_norm": 0.2779867389835172, "learning_rate": 1.138672113946922e-05, "loss": 0.161, "step": 10276 }, { "epoch": 0.47, "grad_norm": 0.36519260585574503, "learning_rate": 1.1385247580655203e-05, "loss": 0.2814, "step": 10277 }, { "epoch": 0.47, "grad_norm": 0.9885177349014758, "learning_rate": 1.138377399117314e-05, "loss": 0.6528, "step": 10278 }, { "epoch": 0.47, "grad_norm": 0.43579339112583904, "learning_rate": 1.1382300371055655e-05, "loss": 0.2766, "step": 10279 }, { "epoch": 0.47, "grad_norm": 0.39417927095653055, "learning_rate": 1.1380826720335372e-05, "loss": 0.2855, "step": 10280 }, { "epoch": 0.47, "grad_norm": 0.8743122534537983, "learning_rate": 1.1379353039044915e-05, "loss": 0.6, "step": 10281 }, { "epoch": 0.47, "grad_norm": 0.2000181944542249, "learning_rate": 1.1377879327216914e-05, "loss": 0.0901, "step": 10282 }, { "epoch": 0.47, "grad_norm": 0.3458166170039005, "learning_rate": 1.1376405584883989e-05, "loss": 0.2575, "step": 10283 }, { "epoch": 0.47, "grad_norm": 0.39648603757402784, "learning_rate": 1.1374931812078774e-05, "loss": 0.3212, "step": 10284 }, { "epoch": 0.47, "grad_norm": 0.6135255567182756, "learning_rate": 1.137345800883389e-05, "loss": 0.3187, "step": 10285 }, { "epoch": 0.47, "grad_norm": 0.3716330568238397, "learning_rate": 1.1371984175181975e-05, "loss": 0.2929, "step": 10286 }, { "epoch": 0.47, "grad_norm": 0.5060002001710548, "learning_rate": 1.1370510311155649e-05, "loss": 0.3895, "step": 10287 }, { "epoch": 0.47, "grad_norm": 0.25080205312299664, "learning_rate": 1.1369036416787547e-05, "loss": 0.1821, "step": 10288 }, { "epoch": 0.47, "grad_norm": 0.3124648326795265, "learning_rate": 1.1367562492110295e-05, "loss": 0.1889, "step": 10289 }, { "epoch": 0.47, "grad_norm": 0.69978826119039, "learning_rate": 1.1366088537156531e-05, "loss": 0.46, "step": 10290 }, { "epoch": 0.47, "grad_norm": 0.37295039236498245, "learning_rate": 1.136461455195888e-05, "loss": 0.3156, "step": 10291 }, { "epoch": 0.47, "grad_norm": 0.33243915659654566, "learning_rate": 1.1363140536549985e-05, "loss": 0.2319, "step": 10292 }, { "epoch": 0.47, "grad_norm": 0.9001388826056123, "learning_rate": 1.1361666490962468e-05, "loss": 0.6388, "step": 10293 }, { "epoch": 0.47, "grad_norm": 0.297744693106454, "learning_rate": 1.1360192415228966e-05, "loss": 0.1802, "step": 10294 }, { "epoch": 0.47, "grad_norm": 0.2961885815447325, "learning_rate": 1.1358718309382117e-05, "loss": 0.2233, "step": 10295 }, { "epoch": 0.47, "grad_norm": 0.5072913810635437, "learning_rate": 1.1357244173454554e-05, "loss": 0.3217, "step": 10296 }, { "epoch": 0.47, "grad_norm": 0.7473094718037766, "learning_rate": 1.1355770007478911e-05, "loss": 0.4135, "step": 10297 }, { "epoch": 0.47, "grad_norm": 0.3205890023637088, "learning_rate": 1.135429581148783e-05, "loss": 0.2159, "step": 10298 }, { "epoch": 0.47, "grad_norm": 0.48876937748996746, "learning_rate": 1.1352821585513944e-05, "loss": 0.3569, "step": 10299 }, { "epoch": 0.47, "grad_norm": 0.3382032736580527, "learning_rate": 1.135134732958989e-05, "loss": 0.2527, "step": 10300 }, { "epoch": 0.47, "grad_norm": 0.28534189833192425, "learning_rate": 1.1349873043748309e-05, "loss": 0.2197, "step": 10301 }, { "epoch": 0.47, "grad_norm": 0.8703848331355342, "learning_rate": 1.1348398728021839e-05, "loss": 0.3579, "step": 10302 }, { "epoch": 0.47, "grad_norm": 0.3884191793569075, "learning_rate": 1.1346924382443123e-05, "loss": 0.3269, "step": 10303 }, { "epoch": 0.47, "grad_norm": 0.6195843442666087, "learning_rate": 1.13454500070448e-05, "loss": 0.3559, "step": 10304 }, { "epoch": 0.47, "grad_norm": 0.4020835947342085, "learning_rate": 1.134397560185951e-05, "loss": 0.264, "step": 10305 }, { "epoch": 0.47, "grad_norm": 0.31855783027763895, "learning_rate": 1.1342501166919892e-05, "loss": 0.2141, "step": 10306 }, { "epoch": 0.47, "grad_norm": 0.4312139279807672, "learning_rate": 1.1341026702258597e-05, "loss": 0.2995, "step": 10307 }, { "epoch": 0.47, "grad_norm": 0.7585701943419573, "learning_rate": 1.1339552207908261e-05, "loss": 0.2959, "step": 10308 }, { "epoch": 0.47, "grad_norm": 1.5999188780741584, "learning_rate": 1.133807768390153e-05, "loss": 0.8873, "step": 10309 }, { "epoch": 0.47, "grad_norm": 0.42757841261243795, "learning_rate": 1.133660313027105e-05, "loss": 0.3068, "step": 10310 }, { "epoch": 0.47, "grad_norm": 0.2867664246976474, "learning_rate": 1.1335128547049465e-05, "loss": 0.2456, "step": 10311 }, { "epoch": 0.47, "grad_norm": 1.1522713996381815, "learning_rate": 1.133365393426942e-05, "loss": 0.6862, "step": 10312 }, { "epoch": 0.47, "grad_norm": 0.2802975675512129, "learning_rate": 1.133217929196356e-05, "loss": 0.225, "step": 10313 }, { "epoch": 0.47, "grad_norm": 0.5019230593365495, "learning_rate": 1.133070462016454e-05, "loss": 0.3705, "step": 10314 }, { "epoch": 0.47, "grad_norm": 0.27335855591609165, "learning_rate": 1.1329229918904998e-05, "loss": 0.2194, "step": 10315 }, { "epoch": 0.47, "grad_norm": 0.35374739141694717, "learning_rate": 1.1327755188217589e-05, "loss": 0.2562, "step": 10316 }, { "epoch": 0.47, "grad_norm": 1.1337975192663041, "learning_rate": 1.1326280428134955e-05, "loss": 0.7098, "step": 10317 }, { "epoch": 0.47, "grad_norm": 0.43959187821650814, "learning_rate": 1.1324805638689755e-05, "loss": 0.2591, "step": 10318 }, { "epoch": 0.47, "grad_norm": 0.31528131134359977, "learning_rate": 1.1323330819914633e-05, "loss": 0.2631, "step": 10319 }, { "epoch": 0.47, "grad_norm": 0.4423086109707012, "learning_rate": 1.1321855971842243e-05, "loss": 0.3337, "step": 10320 }, { "epoch": 0.47, "grad_norm": 0.18445951665694407, "learning_rate": 1.1320381094505237e-05, "loss": 0.1166, "step": 10321 }, { "epoch": 0.47, "grad_norm": 0.6696726318044355, "learning_rate": 1.1318906187936264e-05, "loss": 0.3999, "step": 10322 }, { "epoch": 0.47, "grad_norm": 0.33117515248752394, "learning_rate": 1.1317431252167982e-05, "loss": 0.2855, "step": 10323 }, { "epoch": 0.47, "grad_norm": 0.5654776779593655, "learning_rate": 1.1315956287233041e-05, "loss": 0.3441, "step": 10324 }, { "epoch": 0.47, "grad_norm": 0.5920476639011267, "learning_rate": 1.1314481293164094e-05, "loss": 0.3681, "step": 10325 }, { "epoch": 0.47, "grad_norm": 0.3288642528875592, "learning_rate": 1.1313006269993798e-05, "loss": 0.2635, "step": 10326 }, { "epoch": 0.47, "grad_norm": 0.30848342236159026, "learning_rate": 1.1311531217754811e-05, "loss": 0.2537, "step": 10327 }, { "epoch": 0.47, "grad_norm": 0.35074493176966903, "learning_rate": 1.1310056136479788e-05, "loss": 0.2037, "step": 10328 }, { "epoch": 0.47, "grad_norm": 0.5465964774385761, "learning_rate": 1.1308581026201382e-05, "loss": 0.379, "step": 10329 }, { "epoch": 0.47, "grad_norm": 0.7923109677071862, "learning_rate": 1.1307105886952252e-05, "loss": 0.4294, "step": 10330 }, { "epoch": 0.47, "grad_norm": 0.2705009215670562, "learning_rate": 1.1305630718765062e-05, "loss": 0.2311, "step": 10331 }, { "epoch": 0.47, "grad_norm": 0.52122179232924, "learning_rate": 1.1304155521672462e-05, "loss": 0.3922, "step": 10332 }, { "epoch": 0.47, "grad_norm": 0.4841463485034858, "learning_rate": 1.130268029570712e-05, "loss": 0.3063, "step": 10333 }, { "epoch": 0.47, "grad_norm": 0.2555582088417973, "learning_rate": 1.1301205040901688e-05, "loss": 0.1522, "step": 10334 }, { "epoch": 0.47, "grad_norm": 0.34725897140016204, "learning_rate": 1.1299729757288831e-05, "loss": 0.283, "step": 10335 }, { "epoch": 0.47, "grad_norm": 0.7768461671055017, "learning_rate": 1.129825444490121e-05, "loss": 0.3859, "step": 10336 }, { "epoch": 0.47, "grad_norm": 0.3524429920000527, "learning_rate": 1.129677910377149e-05, "loss": 0.2158, "step": 10337 }, { "epoch": 0.47, "grad_norm": 0.5036851506241941, "learning_rate": 1.1295303733932327e-05, "loss": 0.3666, "step": 10338 }, { "epoch": 0.47, "grad_norm": 0.2857278056397454, "learning_rate": 1.129382833541639e-05, "loss": 0.2792, "step": 10339 }, { "epoch": 0.48, "grad_norm": 0.8514205573203432, "learning_rate": 1.1292352908256339e-05, "loss": 0.5409, "step": 10340 }, { "epoch": 0.48, "grad_norm": 0.3151899311681576, "learning_rate": 1.1290877452484839e-05, "loss": 0.2029, "step": 10341 }, { "epoch": 0.48, "grad_norm": 0.6038445200055288, "learning_rate": 1.1289401968134559e-05, "loss": 0.3361, "step": 10342 }, { "epoch": 0.48, "grad_norm": 0.47885151197512227, "learning_rate": 1.1287926455238161e-05, "loss": 0.3372, "step": 10343 }, { "epoch": 0.48, "grad_norm": 0.3287649613145602, "learning_rate": 1.1286450913828313e-05, "loss": 0.2238, "step": 10344 }, { "epoch": 0.48, "grad_norm": 1.0318625080307138, "learning_rate": 1.128497534393768e-05, "loss": 0.6484, "step": 10345 }, { "epoch": 0.48, "grad_norm": 0.3484331906177394, "learning_rate": 1.1283499745598934e-05, "loss": 0.224, "step": 10346 }, { "epoch": 0.48, "grad_norm": 0.2508115041356779, "learning_rate": 1.1282024118844739e-05, "loss": 0.2142, "step": 10347 }, { "epoch": 0.48, "grad_norm": 0.8771639175612558, "learning_rate": 1.1280548463707768e-05, "loss": 0.4832, "step": 10348 }, { "epoch": 0.48, "grad_norm": 0.5115356403139549, "learning_rate": 1.1279072780220683e-05, "loss": 0.305, "step": 10349 }, { "epoch": 0.48, "grad_norm": 0.4584300789461833, "learning_rate": 1.1277597068416164e-05, "loss": 0.2214, "step": 10350 }, { "epoch": 0.48, "grad_norm": 0.42663526425557635, "learning_rate": 1.1276121328326877e-05, "loss": 0.3658, "step": 10351 }, { "epoch": 0.48, "grad_norm": 0.6576434355627522, "learning_rate": 1.1274645559985493e-05, "loss": 0.3718, "step": 10352 }, { "epoch": 0.48, "grad_norm": 0.4721620815114536, "learning_rate": 1.1273169763424686e-05, "loss": 0.3281, "step": 10353 }, { "epoch": 0.48, "grad_norm": 0.4333122680678041, "learning_rate": 1.1271693938677125e-05, "loss": 0.2161, "step": 10354 }, { "epoch": 0.48, "grad_norm": 0.3954661912479092, "learning_rate": 1.1270218085775486e-05, "loss": 0.2726, "step": 10355 }, { "epoch": 0.48, "grad_norm": 0.3980889572487219, "learning_rate": 1.1268742204752443e-05, "loss": 0.3231, "step": 10356 }, { "epoch": 0.48, "grad_norm": 0.8014019329086285, "learning_rate": 1.1267266295640674e-05, "loss": 0.465, "step": 10357 }, { "epoch": 0.48, "grad_norm": 0.5926428564063171, "learning_rate": 1.1265790358472845e-05, "loss": 0.3616, "step": 10358 }, { "epoch": 0.48, "grad_norm": 0.28229545465090206, "learning_rate": 1.1264314393281644e-05, "loss": 0.2734, "step": 10359 }, { "epoch": 0.48, "grad_norm": 0.2784193430339106, "learning_rate": 1.1262838400099733e-05, "loss": 0.1592, "step": 10360 }, { "epoch": 0.48, "grad_norm": 0.7779545616168673, "learning_rate": 1.1261362378959804e-05, "loss": 0.3976, "step": 10361 }, { "epoch": 0.48, "grad_norm": 0.36453024874176576, "learning_rate": 1.1259886329894525e-05, "loss": 0.272, "step": 10362 }, { "epoch": 0.48, "grad_norm": 0.379909447301651, "learning_rate": 1.1258410252936579e-05, "loss": 0.2593, "step": 10363 }, { "epoch": 0.48, "grad_norm": 0.8047358860883154, "learning_rate": 1.1256934148118638e-05, "loss": 0.3858, "step": 10364 }, { "epoch": 0.48, "grad_norm": 0.37764827946750706, "learning_rate": 1.1255458015473389e-05, "loss": 0.3089, "step": 10365 }, { "epoch": 0.48, "grad_norm": 0.4906176457167061, "learning_rate": 1.1253981855033506e-05, "loss": 0.2446, "step": 10366 }, { "epoch": 0.48, "grad_norm": 0.28792063780380467, "learning_rate": 1.1252505666831678e-05, "loss": 0.2103, "step": 10367 }, { "epoch": 0.48, "grad_norm": 0.42098664105199396, "learning_rate": 1.1251029450900583e-05, "loss": 0.3091, "step": 10368 }, { "epoch": 0.48, "grad_norm": 1.316902525492053, "learning_rate": 1.12495532072729e-05, "loss": 0.7238, "step": 10369 }, { "epoch": 0.48, "grad_norm": 0.5179790073499815, "learning_rate": 1.1248076935981313e-05, "loss": 0.2572, "step": 10370 }, { "epoch": 0.48, "grad_norm": 0.340750577520889, "learning_rate": 1.1246600637058504e-05, "loss": 0.2853, "step": 10371 }, { "epoch": 0.48, "grad_norm": 0.42242178433156535, "learning_rate": 1.1245124310537162e-05, "loss": 0.3107, "step": 10372 }, { "epoch": 0.48, "grad_norm": 0.22737409600273112, "learning_rate": 1.1243647956449962e-05, "loss": 0.0977, "step": 10373 }, { "epoch": 0.48, "grad_norm": 0.4625992143785026, "learning_rate": 1.1242171574829599e-05, "loss": 0.3135, "step": 10374 }, { "epoch": 0.48, "grad_norm": 0.5067547115062119, "learning_rate": 1.1240695165708756e-05, "loss": 0.3233, "step": 10375 }, { "epoch": 0.48, "grad_norm": 0.873340469094776, "learning_rate": 1.1239218729120116e-05, "loss": 0.3334, "step": 10376 }, { "epoch": 0.48, "grad_norm": 0.3766129159465697, "learning_rate": 1.1237742265096365e-05, "loss": 0.2573, "step": 10377 }, { "epoch": 0.48, "grad_norm": 0.38241036402717354, "learning_rate": 1.1236265773670196e-05, "loss": 0.3241, "step": 10378 }, { "epoch": 0.48, "grad_norm": 0.27457262063928434, "learning_rate": 1.1234789254874292e-05, "loss": 0.1985, "step": 10379 }, { "epoch": 0.48, "grad_norm": 0.34112933437618415, "learning_rate": 1.123331270874135e-05, "loss": 0.2137, "step": 10380 }, { "epoch": 0.48, "grad_norm": 0.9560648293149798, "learning_rate": 1.1231836135304048e-05, "loss": 0.4868, "step": 10381 }, { "epoch": 0.48, "grad_norm": 0.42202100817100363, "learning_rate": 1.1230359534595083e-05, "loss": 0.3213, "step": 10382 }, { "epoch": 0.48, "grad_norm": 0.3286914144069076, "learning_rate": 1.1228882906647142e-05, "loss": 0.2192, "step": 10383 }, { "epoch": 0.48, "grad_norm": 1.1711469445351945, "learning_rate": 1.1227406251492918e-05, "loss": 0.6999, "step": 10384 }, { "epoch": 0.48, "grad_norm": 0.3110423390905839, "learning_rate": 1.1225929569165107e-05, "loss": 0.164, "step": 10385 }, { "epoch": 0.48, "grad_norm": 0.2548628792637335, "learning_rate": 1.1224452859696392e-05, "loss": 0.2194, "step": 10386 }, { "epoch": 0.48, "grad_norm": 0.6524479101520946, "learning_rate": 1.1222976123119473e-05, "loss": 0.35, "step": 10387 }, { "epoch": 0.48, "grad_norm": 0.8857400083685697, "learning_rate": 1.122149935946704e-05, "loss": 0.5202, "step": 10388 }, { "epoch": 0.48, "grad_norm": 0.35600713296597736, "learning_rate": 1.1220022568771788e-05, "loss": 0.1922, "step": 10389 }, { "epoch": 0.48, "grad_norm": 0.38658788927666876, "learning_rate": 1.1218545751066414e-05, "loss": 0.3249, "step": 10390 }, { "epoch": 0.48, "grad_norm": 0.2527668790943352, "learning_rate": 1.1217068906383613e-05, "loss": 0.1617, "step": 10391 }, { "epoch": 0.48, "grad_norm": 0.3834348457702718, "learning_rate": 1.1215592034756077e-05, "loss": 0.2592, "step": 10392 }, { "epoch": 0.48, "grad_norm": 0.9949861829669839, "learning_rate": 1.1214115136216506e-05, "loss": 0.3621, "step": 10393 }, { "epoch": 0.48, "grad_norm": 0.3859834539232413, "learning_rate": 1.1212638210797594e-05, "loss": 0.3407, "step": 10394 }, { "epoch": 0.48, "grad_norm": 0.30591351181056525, "learning_rate": 1.1211161258532042e-05, "loss": 0.24, "step": 10395 }, { "epoch": 0.48, "grad_norm": 0.742687768117351, "learning_rate": 1.1209684279452546e-05, "loss": 0.3602, "step": 10396 }, { "epoch": 0.48, "grad_norm": 0.30733927886895707, "learning_rate": 1.120820727359181e-05, "loss": 0.1833, "step": 10397 }, { "epoch": 0.48, "grad_norm": 0.372134978281447, "learning_rate": 1.1206730240982528e-05, "loss": 0.2581, "step": 10398 }, { "epoch": 0.48, "grad_norm": 0.46219096621048067, "learning_rate": 1.1205253181657399e-05, "loss": 0.3009, "step": 10399 }, { "epoch": 0.48, "grad_norm": 1.134721235434766, "learning_rate": 1.1203776095649128e-05, "loss": 0.7256, "step": 10400 }, { "epoch": 0.48, "grad_norm": 0.3412777286918592, "learning_rate": 1.1202298982990411e-05, "loss": 0.2431, "step": 10401 }, { "epoch": 0.48, "grad_norm": 0.5081468439957402, "learning_rate": 1.1200821843713957e-05, "loss": 0.3602, "step": 10402 }, { "epoch": 0.48, "grad_norm": 0.2477793485426281, "learning_rate": 1.1199344677852466e-05, "loss": 0.172, "step": 10403 }, { "epoch": 0.48, "grad_norm": 0.3075261009742982, "learning_rate": 1.1197867485438639e-05, "loss": 0.2802, "step": 10404 }, { "epoch": 0.48, "grad_norm": 0.6733694983512934, "learning_rate": 1.1196390266505177e-05, "loss": 0.4787, "step": 10405 }, { "epoch": 0.48, "grad_norm": 0.3575938386729694, "learning_rate": 1.119491302108479e-05, "loss": 0.2811, "step": 10406 }, { "epoch": 0.48, "grad_norm": 0.4654503493358451, "learning_rate": 1.1193435749210177e-05, "loss": 0.287, "step": 10407 }, { "epoch": 0.48, "grad_norm": 0.534033289151677, "learning_rate": 1.1191958450914051e-05, "loss": 0.3269, "step": 10408 }, { "epoch": 0.48, "grad_norm": 0.4256576207131305, "learning_rate": 1.1190481126229114e-05, "loss": 0.3059, "step": 10409 }, { "epoch": 0.48, "grad_norm": 0.453286447449527, "learning_rate": 1.1189003775188072e-05, "loss": 0.2999, "step": 10410 }, { "epoch": 0.48, "grad_norm": 0.25140840652982527, "learning_rate": 1.1187526397823629e-05, "loss": 0.2267, "step": 10411 }, { "epoch": 0.48, "grad_norm": 1.0343000981659312, "learning_rate": 1.1186048994168498e-05, "loss": 0.5581, "step": 10412 }, { "epoch": 0.48, "grad_norm": 0.3732133320214956, "learning_rate": 1.1184571564255386e-05, "loss": 0.2902, "step": 10413 }, { "epoch": 0.48, "grad_norm": 0.395518793607198, "learning_rate": 1.1183094108117001e-05, "loss": 0.3347, "step": 10414 }, { "epoch": 0.48, "grad_norm": 0.9108237040742793, "learning_rate": 1.1181616625786054e-05, "loss": 0.504, "step": 10415 }, { "epoch": 0.48, "grad_norm": 0.32794292198632086, "learning_rate": 1.1180139117295252e-05, "loss": 0.2453, "step": 10416 }, { "epoch": 0.48, "grad_norm": 0.4609480277122226, "learning_rate": 1.1178661582677309e-05, "loss": 0.3029, "step": 10417 }, { "epoch": 0.48, "grad_norm": 0.38068689510527065, "learning_rate": 1.1177184021964931e-05, "loss": 0.3185, "step": 10418 }, { "epoch": 0.48, "grad_norm": 0.2548184733522785, "learning_rate": 1.1175706435190836e-05, "loss": 0.1718, "step": 10419 }, { "epoch": 0.48, "grad_norm": 1.385584649017272, "learning_rate": 1.1174228822387731e-05, "loss": 0.6268, "step": 10420 }, { "epoch": 0.48, "grad_norm": 0.7638754660959325, "learning_rate": 1.1172751183588337e-05, "loss": 0.4243, "step": 10421 }, { "epoch": 0.48, "grad_norm": 0.26955637367568647, "learning_rate": 1.1171273518825358e-05, "loss": 0.2323, "step": 10422 }, { "epoch": 0.48, "grad_norm": 0.41421620458973796, "learning_rate": 1.1169795828131516e-05, "loss": 0.1927, "step": 10423 }, { "epoch": 0.48, "grad_norm": 0.4002641911615186, "learning_rate": 1.1168318111539518e-05, "loss": 0.2619, "step": 10424 }, { "epoch": 0.48, "grad_norm": 0.3372361609461174, "learning_rate": 1.1166840369082083e-05, "loss": 0.1617, "step": 10425 }, { "epoch": 0.48, "grad_norm": 0.3318177550830705, "learning_rate": 1.1165362600791927e-05, "loss": 0.3035, "step": 10426 }, { "epoch": 0.48, "grad_norm": 0.7190438087088021, "learning_rate": 1.116388480670177e-05, "loss": 0.4008, "step": 10427 }, { "epoch": 0.48, "grad_norm": 0.616686482761905, "learning_rate": 1.1162406986844322e-05, "loss": 0.277, "step": 10428 }, { "epoch": 0.48, "grad_norm": 0.38363452276997356, "learning_rate": 1.1160929141252303e-05, "loss": 0.2536, "step": 10429 }, { "epoch": 0.48, "grad_norm": 0.40029414295024074, "learning_rate": 1.1159451269958434e-05, "loss": 0.3627, "step": 10430 }, { "epoch": 0.48, "grad_norm": 0.2748184490233543, "learning_rate": 1.1157973372995429e-05, "loss": 0.1667, "step": 10431 }, { "epoch": 0.48, "grad_norm": 0.3509352153472403, "learning_rate": 1.1156495450396013e-05, "loss": 0.2693, "step": 10432 }, { "epoch": 0.48, "grad_norm": 0.7587009043983467, "learning_rate": 1.1155017502192899e-05, "loss": 0.4336, "step": 10433 }, { "epoch": 0.48, "grad_norm": 0.32325222741894793, "learning_rate": 1.1153539528418813e-05, "loss": 0.2756, "step": 10434 }, { "epoch": 0.48, "grad_norm": 0.4938881979964008, "learning_rate": 1.115206152910647e-05, "loss": 0.272, "step": 10435 }, { "epoch": 0.48, "grad_norm": 0.405804646060389, "learning_rate": 1.1150583504288598e-05, "loss": 0.25, "step": 10436 }, { "epoch": 0.48, "grad_norm": 0.2806747439880976, "learning_rate": 1.1149105453997915e-05, "loss": 0.2208, "step": 10437 }, { "epoch": 0.48, "grad_norm": 0.3283333334967406, "learning_rate": 1.1147627378267147e-05, "loss": 0.2728, "step": 10438 }, { "epoch": 0.48, "grad_norm": 0.9404324281567027, "learning_rate": 1.1146149277129013e-05, "loss": 0.4163, "step": 10439 }, { "epoch": 0.48, "grad_norm": 0.4173360998227774, "learning_rate": 1.114467115061624e-05, "loss": 0.2693, "step": 10440 }, { "epoch": 0.48, "grad_norm": 0.5734416808103949, "learning_rate": 1.114319299876155e-05, "loss": 0.3988, "step": 10441 }, { "epoch": 0.48, "grad_norm": 0.3347746686570038, "learning_rate": 1.114171482159767e-05, "loss": 0.2858, "step": 10442 }, { "epoch": 0.48, "grad_norm": 0.2695008301412517, "learning_rate": 1.1140236619157322e-05, "loss": 0.185, "step": 10443 }, { "epoch": 0.48, "grad_norm": 0.6402666690788119, "learning_rate": 1.1138758391473235e-05, "loss": 0.3683, "step": 10444 }, { "epoch": 0.48, "grad_norm": 0.2789258281802634, "learning_rate": 1.1137280138578137e-05, "loss": 0.2223, "step": 10445 }, { "epoch": 0.48, "grad_norm": 0.6394255792266542, "learning_rate": 1.113580186050475e-05, "loss": 0.3373, "step": 10446 }, { "epoch": 0.48, "grad_norm": 0.4030214831218338, "learning_rate": 1.1134323557285806e-05, "loss": 0.3375, "step": 10447 }, { "epoch": 0.48, "grad_norm": 0.7383010094661877, "learning_rate": 1.113284522895403e-05, "loss": 0.4267, "step": 10448 }, { "epoch": 0.48, "grad_norm": 0.5460729461816785, "learning_rate": 1.1131366875542154e-05, "loss": 0.3546, "step": 10449 }, { "epoch": 0.48, "grad_norm": 0.30763357299544747, "learning_rate": 1.1129888497082905e-05, "loss": 0.2865, "step": 10450 }, { "epoch": 0.48, "grad_norm": 0.22325843858625022, "learning_rate": 1.1128410093609017e-05, "loss": 0.1183, "step": 10451 }, { "epoch": 0.48, "grad_norm": 0.6126246244005373, "learning_rate": 1.1126931665153213e-05, "loss": 0.3401, "step": 10452 }, { "epoch": 0.48, "grad_norm": 0.4676837632395992, "learning_rate": 1.112545321174823e-05, "loss": 0.3529, "step": 10453 }, { "epoch": 0.48, "grad_norm": 0.3892445801631183, "learning_rate": 1.1123974733426794e-05, "loss": 0.3265, "step": 10454 }, { "epoch": 0.48, "grad_norm": 0.4302163898061417, "learning_rate": 1.1122496230221644e-05, "loss": 0.2371, "step": 10455 }, { "epoch": 0.48, "grad_norm": 0.49071261732353244, "learning_rate": 1.1121017702165511e-05, "loss": 0.3573, "step": 10456 }, { "epoch": 0.48, "grad_norm": 0.2751448161761606, "learning_rate": 1.1119539149291125e-05, "loss": 0.2085, "step": 10457 }, { "epoch": 0.48, "grad_norm": 0.2787839345259726, "learning_rate": 1.111806057163122e-05, "loss": 0.1997, "step": 10458 }, { "epoch": 0.48, "grad_norm": 0.5340163881751548, "learning_rate": 1.1116581969218533e-05, "loss": 0.3978, "step": 10459 }, { "epoch": 0.48, "grad_norm": 0.6928228228360358, "learning_rate": 1.1115103342085799e-05, "loss": 0.4762, "step": 10460 }, { "epoch": 0.48, "grad_norm": 0.380686297683117, "learning_rate": 1.1113624690265747e-05, "loss": 0.2457, "step": 10461 }, { "epoch": 0.48, "grad_norm": 0.34721988564697875, "learning_rate": 1.111214601379112e-05, "loss": 0.3077, "step": 10462 }, { "epoch": 0.48, "grad_norm": 0.28904915072329407, "learning_rate": 1.1110667312694654e-05, "loss": 0.1434, "step": 10463 }, { "epoch": 0.48, "grad_norm": 0.4132329147682992, "learning_rate": 1.1109188587009083e-05, "loss": 0.1284, "step": 10464 }, { "epoch": 0.48, "grad_norm": 0.40050171431398346, "learning_rate": 1.1107709836767145e-05, "loss": 0.3495, "step": 10465 }, { "epoch": 0.48, "grad_norm": 0.40062463100196644, "learning_rate": 1.1106231062001577e-05, "loss": 0.3264, "step": 10466 }, { "epoch": 0.48, "grad_norm": 0.6595916567624143, "learning_rate": 1.1104752262745122e-05, "loss": 0.3891, "step": 10467 }, { "epoch": 0.48, "grad_norm": 0.3393951274548719, "learning_rate": 1.1103273439030516e-05, "loss": 0.2405, "step": 10468 }, { "epoch": 0.48, "grad_norm": 0.25121963057674984, "learning_rate": 1.11017945908905e-05, "loss": 0.1368, "step": 10469 }, { "epoch": 0.48, "grad_norm": 0.3887917716849789, "learning_rate": 1.1100315718357815e-05, "loss": 0.297, "step": 10470 }, { "epoch": 0.48, "grad_norm": 0.362258615457602, "learning_rate": 1.1098836821465198e-05, "loss": 0.2399, "step": 10471 }, { "epoch": 0.48, "grad_norm": 0.836081355682549, "learning_rate": 1.1097357900245394e-05, "loss": 0.443, "step": 10472 }, { "epoch": 0.48, "grad_norm": 0.3868486433914077, "learning_rate": 1.1095878954731142e-05, "loss": 0.3141, "step": 10473 }, { "epoch": 0.48, "grad_norm": 0.3521297342992916, "learning_rate": 1.1094399984955189e-05, "loss": 0.213, "step": 10474 }, { "epoch": 0.48, "grad_norm": 0.3000543897222317, "learning_rate": 1.1092920990950276e-05, "loss": 0.1672, "step": 10475 }, { "epoch": 0.48, "grad_norm": 0.5572693038545837, "learning_rate": 1.1091441972749143e-05, "loss": 0.3156, "step": 10476 }, { "epoch": 0.48, "grad_norm": 0.4057833305067021, "learning_rate": 1.1089962930384535e-05, "loss": 0.2602, "step": 10477 }, { "epoch": 0.48, "grad_norm": 0.4324789851934438, "learning_rate": 1.1088483863889198e-05, "loss": 0.3499, "step": 10478 }, { "epoch": 0.48, "grad_norm": 0.6105258851223422, "learning_rate": 1.1087004773295881e-05, "loss": 0.3558, "step": 10479 }, { "epoch": 0.48, "grad_norm": 0.44911456048286996, "learning_rate": 1.1085525658637327e-05, "loss": 0.3338, "step": 10480 }, { "epoch": 0.48, "grad_norm": 0.2743707012878145, "learning_rate": 1.1084046519946279e-05, "loss": 0.2263, "step": 10481 }, { "epoch": 0.48, "grad_norm": 0.4518151786986908, "learning_rate": 1.1082567357255484e-05, "loss": 0.2925, "step": 10482 }, { "epoch": 0.48, "grad_norm": 0.36213720835369695, "learning_rate": 1.1081088170597694e-05, "loss": 0.2738, "step": 10483 }, { "epoch": 0.48, "grad_norm": 0.6329368495177247, "learning_rate": 1.107960896000565e-05, "loss": 0.3422, "step": 10484 }, { "epoch": 0.48, "grad_norm": 0.3359221937165981, "learning_rate": 1.1078129725512108e-05, "loss": 0.2858, "step": 10485 }, { "epoch": 0.48, "grad_norm": 0.36158589568386224, "learning_rate": 1.1076650467149812e-05, "loss": 0.2786, "step": 10486 }, { "epoch": 0.48, "grad_norm": 0.28435259863671286, "learning_rate": 1.1075171184951512e-05, "loss": 0.0954, "step": 10487 }, { "epoch": 0.48, "grad_norm": 0.37313586731579, "learning_rate": 1.1073691878949958e-05, "loss": 0.2526, "step": 10488 }, { "epoch": 0.48, "grad_norm": 0.3548702928360621, "learning_rate": 1.10722125491779e-05, "loss": 0.2932, "step": 10489 }, { "epoch": 0.48, "grad_norm": 0.46231028756359355, "learning_rate": 1.1070733195668093e-05, "loss": 0.3072, "step": 10490 }, { "epoch": 0.48, "grad_norm": 0.8279114477556175, "learning_rate": 1.106925381845328e-05, "loss": 0.4493, "step": 10491 }, { "epoch": 0.48, "grad_norm": 0.3998047182720395, "learning_rate": 1.1067774417566225e-05, "loss": 0.2833, "step": 10492 }, { "epoch": 0.48, "grad_norm": 0.5277023979282565, "learning_rate": 1.1066294993039668e-05, "loss": 0.335, "step": 10493 }, { "epoch": 0.48, "grad_norm": 0.24838392954017763, "learning_rate": 1.1064815544906368e-05, "loss": 0.1847, "step": 10494 }, { "epoch": 0.48, "grad_norm": 0.3825003401325567, "learning_rate": 1.1063336073199078e-05, "loss": 0.2771, "step": 10495 }, { "epoch": 0.48, "grad_norm": 0.6841718039066883, "learning_rate": 1.1061856577950554e-05, "loss": 0.4698, "step": 10496 }, { "epoch": 0.48, "grad_norm": 0.35380129930150256, "learning_rate": 1.1060377059193548e-05, "loss": 0.2558, "step": 10497 }, { "epoch": 0.48, "grad_norm": 0.4199770391428978, "learning_rate": 1.1058897516960817e-05, "loss": 0.3034, "step": 10498 }, { "epoch": 0.48, "grad_norm": 0.4474972150871595, "learning_rate": 1.1057417951285112e-05, "loss": 0.3073, "step": 10499 }, { "epoch": 0.48, "grad_norm": 0.49897879676509915, "learning_rate": 1.1055938362199194e-05, "loss": 0.2342, "step": 10500 }, { "epoch": 0.48, "grad_norm": 0.3172087798784572, "learning_rate": 1.1054458749735818e-05, "loss": 0.2517, "step": 10501 }, { "epoch": 0.48, "grad_norm": 0.46509604767043045, "learning_rate": 1.1052979113927744e-05, "loss": 0.361, "step": 10502 }, { "epoch": 0.48, "grad_norm": 0.9198672748557136, "learning_rate": 1.1051499454807729e-05, "loss": 0.4044, "step": 10503 }, { "epoch": 0.48, "grad_norm": 0.37011417945137387, "learning_rate": 1.1050019772408526e-05, "loss": 0.2976, "step": 10504 }, { "epoch": 0.48, "grad_norm": 0.5606465573241878, "learning_rate": 1.1048540066762898e-05, "loss": 0.3638, "step": 10505 }, { "epoch": 0.48, "grad_norm": 0.4762704360096836, "learning_rate": 1.1047060337903603e-05, "loss": 0.3391, "step": 10506 }, { "epoch": 0.48, "grad_norm": 0.28192907145802976, "learning_rate": 1.1045580585863403e-05, "loss": 0.1841, "step": 10507 }, { "epoch": 0.48, "grad_norm": 0.42639506962391144, "learning_rate": 1.1044100810675054e-05, "loss": 0.2773, "step": 10508 }, { "epoch": 0.48, "grad_norm": 0.4012117677692907, "learning_rate": 1.1042621012371322e-05, "loss": 0.3397, "step": 10509 }, { "epoch": 0.48, "grad_norm": 0.3403334699642882, "learning_rate": 1.1041141190984966e-05, "loss": 0.1884, "step": 10510 }, { "epoch": 0.48, "grad_norm": 0.9449126721424425, "learning_rate": 1.1039661346548745e-05, "loss": 0.4054, "step": 10511 }, { "epoch": 0.48, "grad_norm": 0.4581157169501, "learning_rate": 1.1038181479095422e-05, "loss": 0.3594, "step": 10512 }, { "epoch": 0.48, "grad_norm": 0.35248765887577094, "learning_rate": 1.1036701588657766e-05, "loss": 0.1952, "step": 10513 }, { "epoch": 0.48, "grad_norm": 0.35938071392328425, "learning_rate": 1.1035221675268533e-05, "loss": 0.2798, "step": 10514 }, { "epoch": 0.48, "grad_norm": 0.45964199401677747, "learning_rate": 1.1033741738960496e-05, "loss": 0.3127, "step": 10515 }, { "epoch": 0.48, "grad_norm": 0.314595576041925, "learning_rate": 1.1032261779766404e-05, "loss": 0.2003, "step": 10516 }, { "epoch": 0.48, "grad_norm": 0.4109678016229127, "learning_rate": 1.1030781797719037e-05, "loss": 0.3061, "step": 10517 }, { "epoch": 0.48, "grad_norm": 1.019781075819112, "learning_rate": 1.1029301792851152e-05, "loss": 0.4227, "step": 10518 }, { "epoch": 0.48, "grad_norm": 0.37620617565730624, "learning_rate": 1.102782176519552e-05, "loss": 0.2765, "step": 10519 }, { "epoch": 0.48, "grad_norm": 0.27311145885174176, "learning_rate": 1.1026341714784902e-05, "loss": 0.1645, "step": 10520 }, { "epoch": 0.48, "grad_norm": 0.30181095603023833, "learning_rate": 1.102486164165207e-05, "loss": 0.2375, "step": 10521 }, { "epoch": 0.48, "grad_norm": 0.33833045992181804, "learning_rate": 1.102338154582979e-05, "loss": 0.2686, "step": 10522 }, { "epoch": 0.48, "grad_norm": 0.7750944009035459, "learning_rate": 1.1021901427350825e-05, "loss": 0.3324, "step": 10523 }, { "epoch": 0.48, "grad_norm": 0.7863860443154413, "learning_rate": 1.1020421286247948e-05, "loss": 0.4526, "step": 10524 }, { "epoch": 0.48, "grad_norm": 0.3178439958999361, "learning_rate": 1.1018941122553929e-05, "loss": 0.2569, "step": 10525 }, { "epoch": 0.48, "grad_norm": 0.4358999354861604, "learning_rate": 1.1017460936301536e-05, "loss": 0.2602, "step": 10526 }, { "epoch": 0.48, "grad_norm": 0.35421558713601325, "learning_rate": 1.1015980727523537e-05, "loss": 0.2384, "step": 10527 }, { "epoch": 0.48, "grad_norm": 0.43305797235979415, "learning_rate": 1.1014500496252705e-05, "loss": 0.2697, "step": 10528 }, { "epoch": 0.48, "grad_norm": 0.45587729597697507, "learning_rate": 1.1013020242521809e-05, "loss": 0.3329, "step": 10529 }, { "epoch": 0.48, "grad_norm": 1.3164447007635103, "learning_rate": 1.1011539966363623e-05, "loss": 0.3682, "step": 10530 }, { "epoch": 0.48, "grad_norm": 0.47031391207400264, "learning_rate": 1.1010059667810912e-05, "loss": 0.3183, "step": 10531 }, { "epoch": 0.48, "grad_norm": 0.541001824597019, "learning_rate": 1.1008579346896458e-05, "loss": 0.3847, "step": 10532 }, { "epoch": 0.48, "grad_norm": 0.2551189672906195, "learning_rate": 1.100709900365303e-05, "loss": 0.1855, "step": 10533 }, { "epoch": 0.48, "grad_norm": 0.4412149642273209, "learning_rate": 1.1005618638113398e-05, "loss": 0.271, "step": 10534 }, { "epoch": 0.48, "grad_norm": 0.8296472941408071, "learning_rate": 1.1004138250310341e-05, "loss": 0.3771, "step": 10535 }, { "epoch": 0.48, "grad_norm": 0.9559094775531285, "learning_rate": 1.1002657840276627e-05, "loss": 0.3384, "step": 10536 }, { "epoch": 0.48, "grad_norm": 0.31850810093317444, "learning_rate": 1.1001177408045038e-05, "loss": 0.2703, "step": 10537 }, { "epoch": 0.48, "grad_norm": 0.5227635775065415, "learning_rate": 1.0999696953648344e-05, "loss": 0.3825, "step": 10538 }, { "epoch": 0.48, "grad_norm": 0.4110410218746679, "learning_rate": 1.0998216477119327e-05, "loss": 0.1906, "step": 10539 }, { "epoch": 0.48, "grad_norm": 0.35454569312027584, "learning_rate": 1.0996735978490756e-05, "loss": 0.2947, "step": 10540 }, { "epoch": 0.48, "grad_norm": 0.3566371808788487, "learning_rate": 1.0995255457795412e-05, "loss": 0.3369, "step": 10541 }, { "epoch": 0.48, "grad_norm": 0.4737570145416877, "learning_rate": 1.0993774915066071e-05, "loss": 0.275, "step": 10542 }, { "epoch": 0.48, "grad_norm": 0.41171487031609777, "learning_rate": 1.099229435033551e-05, "loss": 0.2055, "step": 10543 }, { "epoch": 0.48, "grad_norm": 1.3289081313675228, "learning_rate": 1.0990813763636511e-05, "loss": 0.8209, "step": 10544 }, { "epoch": 0.48, "grad_norm": 0.433228971571981, "learning_rate": 1.098933315500185e-05, "loss": 0.3369, "step": 10545 }, { "epoch": 0.48, "grad_norm": 0.36195300256043683, "learning_rate": 1.0987852524464304e-05, "loss": 0.2461, "step": 10546 }, { "epoch": 0.48, "grad_norm": 0.4460752123238476, "learning_rate": 1.0986371872056658e-05, "loss": 0.3054, "step": 10547 }, { "epoch": 0.48, "grad_norm": 0.4145390648947652, "learning_rate": 1.0984891197811686e-05, "loss": 0.2768, "step": 10548 }, { "epoch": 0.48, "grad_norm": 0.30891539996897377, "learning_rate": 1.0983410501762175e-05, "loss": 0.2065, "step": 10549 }, { "epoch": 0.48, "grad_norm": 0.5882592365379976, "learning_rate": 1.0981929783940904e-05, "loss": 0.4163, "step": 10550 }, { "epoch": 0.48, "grad_norm": 1.3073025517397505, "learning_rate": 1.0980449044380654e-05, "loss": 0.4083, "step": 10551 }, { "epoch": 0.48, "grad_norm": 0.4176127926343007, "learning_rate": 1.0978968283114207e-05, "loss": 0.2156, "step": 10552 }, { "epoch": 0.48, "grad_norm": 0.30070549285084547, "learning_rate": 1.0977487500174342e-05, "loss": 0.2571, "step": 10553 }, { "epoch": 0.48, "grad_norm": 0.4488736716399582, "learning_rate": 1.0976006695593849e-05, "loss": 0.2828, "step": 10554 }, { "epoch": 0.48, "grad_norm": 0.6858772807518426, "learning_rate": 1.0974525869405506e-05, "loss": 0.3399, "step": 10555 }, { "epoch": 0.48, "grad_norm": 0.3560952445689576, "learning_rate": 1.0973045021642103e-05, "loss": 0.266, "step": 10556 }, { "epoch": 0.48, "grad_norm": 0.518651057955851, "learning_rate": 1.097156415233642e-05, "loss": 0.3857, "step": 10557 }, { "epoch": 0.49, "grad_norm": 0.476061161862377, "learning_rate": 1.0970083261521243e-05, "loss": 0.3056, "step": 10558 }, { "epoch": 0.49, "grad_norm": 0.29437660462027354, "learning_rate": 1.0968602349229356e-05, "loss": 0.177, "step": 10559 }, { "epoch": 0.49, "grad_norm": 0.3670634193355905, "learning_rate": 1.0967121415493546e-05, "loss": 0.2626, "step": 10560 }, { "epoch": 0.49, "grad_norm": 0.3399862421465084, "learning_rate": 1.0965640460346603e-05, "loss": 0.2473, "step": 10561 }, { "epoch": 0.49, "grad_norm": 0.4786808125648303, "learning_rate": 1.096415948382131e-05, "loss": 0.2723, "step": 10562 }, { "epoch": 0.49, "grad_norm": 0.5972191172481575, "learning_rate": 1.0962678485950455e-05, "loss": 0.4096, "step": 10563 }, { "epoch": 0.49, "grad_norm": 0.3976040684743492, "learning_rate": 1.0961197466766826e-05, "loss": 0.2799, "step": 10564 }, { "epoch": 0.49, "grad_norm": 0.33488019046954265, "learning_rate": 1.0959716426303214e-05, "loss": 0.2375, "step": 10565 }, { "epoch": 0.49, "grad_norm": 0.30337444480723963, "learning_rate": 1.09582353645924e-05, "loss": 0.2168, "step": 10566 }, { "epoch": 0.49, "grad_norm": 0.9142107611906051, "learning_rate": 1.0956754281667182e-05, "loss": 0.4948, "step": 10567 }, { "epoch": 0.49, "grad_norm": 0.36824464406337265, "learning_rate": 1.0955273177560347e-05, "loss": 0.3096, "step": 10568 }, { "epoch": 0.49, "grad_norm": 0.3660790486324102, "learning_rate": 1.095379205230468e-05, "loss": 0.2971, "step": 10569 }, { "epoch": 0.49, "grad_norm": 0.5826729897583479, "learning_rate": 1.0952310905932982e-05, "loss": 0.2965, "step": 10570 }, { "epoch": 0.49, "grad_norm": 0.43161789614674323, "learning_rate": 1.0950829738478034e-05, "loss": 0.3266, "step": 10571 }, { "epoch": 0.49, "grad_norm": 0.24840161677097314, "learning_rate": 1.0949348549972635e-05, "loss": 0.1779, "step": 10572 }, { "epoch": 0.49, "grad_norm": 0.7086796526919262, "learning_rate": 1.0947867340449572e-05, "loss": 0.425, "step": 10573 }, { "epoch": 0.49, "grad_norm": 0.3755966136282917, "learning_rate": 1.094638610994164e-05, "loss": 0.3242, "step": 10574 }, { "epoch": 0.49, "grad_norm": 0.8603995071148285, "learning_rate": 1.0944904858481636e-05, "loss": 0.3633, "step": 10575 }, { "epoch": 0.49, "grad_norm": 0.38640523064264576, "learning_rate": 1.0943423586102343e-05, "loss": 0.3018, "step": 10576 }, { "epoch": 0.49, "grad_norm": 0.3417474325186601, "learning_rate": 1.0941942292836562e-05, "loss": 0.2926, "step": 10577 }, { "epoch": 0.49, "grad_norm": 0.2157028748813911, "learning_rate": 1.0940460978717087e-05, "loss": 0.1095, "step": 10578 }, { "epoch": 0.49, "grad_norm": 0.4074903364256941, "learning_rate": 1.0938979643776715e-05, "loss": 0.3048, "step": 10579 }, { "epoch": 0.49, "grad_norm": 0.5207802912047126, "learning_rate": 1.0937498288048239e-05, "loss": 0.3688, "step": 10580 }, { "epoch": 0.49, "grad_norm": 0.32101012214923214, "learning_rate": 1.0936016911564451e-05, "loss": 0.3113, "step": 10581 }, { "epoch": 0.49, "grad_norm": 0.36960418388080785, "learning_rate": 1.0934535514358153e-05, "loss": 0.1866, "step": 10582 }, { "epoch": 0.49, "grad_norm": 0.5318717651146868, "learning_rate": 1.0933054096462136e-05, "loss": 0.3415, "step": 10583 }, { "epoch": 0.49, "grad_norm": 0.2521014887560583, "learning_rate": 1.0931572657909207e-05, "loss": 0.2116, "step": 10584 }, { "epoch": 0.49, "grad_norm": 0.5267893329999455, "learning_rate": 1.0930091198732152e-05, "loss": 0.2639, "step": 10585 }, { "epoch": 0.49, "grad_norm": 0.3785844060009913, "learning_rate": 1.0928609718963777e-05, "loss": 0.3229, "step": 10586 }, { "epoch": 0.49, "grad_norm": 0.8488818690686816, "learning_rate": 1.0927128218636875e-05, "loss": 0.545, "step": 10587 }, { "epoch": 0.49, "grad_norm": 0.32838103523500994, "learning_rate": 1.0925646697784251e-05, "loss": 0.2502, "step": 10588 }, { "epoch": 0.49, "grad_norm": 0.4331605240552264, "learning_rate": 1.0924165156438697e-05, "loss": 0.308, "step": 10589 }, { "epoch": 0.49, "grad_norm": 0.2857572823818981, "learning_rate": 1.092268359463302e-05, "loss": 0.1849, "step": 10590 }, { "epoch": 0.49, "grad_norm": 0.6646503165177715, "learning_rate": 1.0921202012400019e-05, "loss": 0.2715, "step": 10591 }, { "epoch": 0.49, "grad_norm": 0.38230972442953065, "learning_rate": 1.0919720409772491e-05, "loss": 0.3174, "step": 10592 }, { "epoch": 0.49, "grad_norm": 0.5585228848785955, "learning_rate": 1.091823878678324e-05, "loss": 0.3986, "step": 10593 }, { "epoch": 0.49, "grad_norm": 0.848493367272372, "learning_rate": 1.0916757143465068e-05, "loss": 0.4971, "step": 10594 }, { "epoch": 0.49, "grad_norm": 0.3418417369915704, "learning_rate": 1.0915275479850777e-05, "loss": 0.2279, "step": 10595 }, { "epoch": 0.49, "grad_norm": 0.2951115298218996, "learning_rate": 1.0913793795973167e-05, "loss": 0.2784, "step": 10596 }, { "epoch": 0.49, "grad_norm": 0.5572261579387174, "learning_rate": 1.0912312091865045e-05, "loss": 0.3834, "step": 10597 }, { "epoch": 0.49, "grad_norm": 0.26349140837445534, "learning_rate": 1.0910830367559212e-05, "loss": 0.1709, "step": 10598 }, { "epoch": 0.49, "grad_norm": 1.252730453767162, "learning_rate": 1.0909348623088472e-05, "loss": 0.7936, "step": 10599 }, { "epoch": 0.49, "grad_norm": 0.3223358149457774, "learning_rate": 1.090786685848563e-05, "loss": 0.2738, "step": 10600 }, { "epoch": 0.49, "grad_norm": 0.39213706291046607, "learning_rate": 1.090638507378349e-05, "loss": 0.2496, "step": 10601 }, { "epoch": 0.49, "grad_norm": 0.6880017009555858, "learning_rate": 1.0904903269014856e-05, "loss": 0.4372, "step": 10602 }, { "epoch": 0.49, "grad_norm": 0.3748487966853121, "learning_rate": 1.090342144421254e-05, "loss": 0.2548, "step": 10603 }, { "epoch": 0.49, "grad_norm": 0.2973458648932974, "learning_rate": 1.0901939599409343e-05, "loss": 0.2278, "step": 10604 }, { "epoch": 0.49, "grad_norm": 0.37848135021478935, "learning_rate": 1.0900457734638074e-05, "loss": 0.3283, "step": 10605 }, { "epoch": 0.49, "grad_norm": 1.6364873597114724, "learning_rate": 1.0898975849931535e-05, "loss": 0.8297, "step": 10606 }, { "epoch": 0.49, "grad_norm": 0.3610541667210801, "learning_rate": 1.089749394532254e-05, "loss": 0.2947, "step": 10607 }, { "epoch": 0.49, "grad_norm": 0.4185592943048797, "learning_rate": 1.0896012020843892e-05, "loss": 0.2823, "step": 10608 }, { "epoch": 0.49, "grad_norm": 0.7135207282144652, "learning_rate": 1.0894530076528404e-05, "loss": 0.4283, "step": 10609 }, { "epoch": 0.49, "grad_norm": 0.3628564157355788, "learning_rate": 1.0893048112408882e-05, "loss": 0.2889, "step": 10610 }, { "epoch": 0.49, "grad_norm": 0.331634254493673, "learning_rate": 1.0891566128518133e-05, "loss": 0.1571, "step": 10611 }, { "epoch": 0.49, "grad_norm": 0.3060658979703778, "learning_rate": 1.0890084124888971e-05, "loss": 0.2638, "step": 10612 }, { "epoch": 0.49, "grad_norm": 0.395272086306695, "learning_rate": 1.0888602101554202e-05, "loss": 0.2862, "step": 10613 }, { "epoch": 0.49, "grad_norm": 0.9437320732050818, "learning_rate": 1.088712005854664e-05, "loss": 0.3437, "step": 10614 }, { "epoch": 0.49, "grad_norm": 0.8433383927800244, "learning_rate": 1.0885637995899099e-05, "loss": 0.4365, "step": 10615 }, { "epoch": 0.49, "grad_norm": 0.39863194323674406, "learning_rate": 1.0884155913644382e-05, "loss": 0.3032, "step": 10616 }, { "epoch": 0.49, "grad_norm": 0.2690947126743883, "learning_rate": 1.0882673811815306e-05, "loss": 0.2192, "step": 10617 }, { "epoch": 0.49, "grad_norm": 0.35836881143509813, "learning_rate": 1.0881191690444684e-05, "loss": 0.2256, "step": 10618 }, { "epoch": 0.49, "grad_norm": 0.4346163685143305, "learning_rate": 1.0879709549565323e-05, "loss": 0.2979, "step": 10619 }, { "epoch": 0.49, "grad_norm": 0.6881563396993722, "learning_rate": 1.0878227389210046e-05, "loss": 0.3289, "step": 10620 }, { "epoch": 0.49, "grad_norm": 0.8161821407890882, "learning_rate": 1.087674520941166e-05, "loss": 0.2802, "step": 10621 }, { "epoch": 0.49, "grad_norm": 0.4021588127220264, "learning_rate": 1.0875263010202977e-05, "loss": 0.3001, "step": 10622 }, { "epoch": 0.49, "grad_norm": 0.4706051959295663, "learning_rate": 1.0873780791616816e-05, "loss": 0.338, "step": 10623 }, { "epoch": 0.49, "grad_norm": 0.32423140455329363, "learning_rate": 1.0872298553685988e-05, "loss": 0.2271, "step": 10624 }, { "epoch": 0.49, "grad_norm": 0.42833020936734045, "learning_rate": 1.0870816296443317e-05, "loss": 0.2751, "step": 10625 }, { "epoch": 0.49, "grad_norm": 0.8629160951960159, "learning_rate": 1.0869334019921608e-05, "loss": 0.3963, "step": 10626 }, { "epoch": 0.49, "grad_norm": 0.4310006714810583, "learning_rate": 1.0867851724153683e-05, "loss": 0.237, "step": 10627 }, { "epoch": 0.49, "grad_norm": 0.3093428330672653, "learning_rate": 1.0866369409172357e-05, "loss": 0.2499, "step": 10628 }, { "epoch": 0.49, "grad_norm": 1.4060090308091724, "learning_rate": 1.0864887075010447e-05, "loss": 0.905, "step": 10629 }, { "epoch": 0.49, "grad_norm": 0.3563798835613191, "learning_rate": 1.086340472170077e-05, "loss": 0.1859, "step": 10630 }, { "epoch": 0.49, "grad_norm": 0.34326311197639064, "learning_rate": 1.0861922349276147e-05, "loss": 0.2675, "step": 10631 }, { "epoch": 0.49, "grad_norm": 0.4150440829291692, "learning_rate": 1.0860439957769392e-05, "loss": 0.3054, "step": 10632 }, { "epoch": 0.49, "grad_norm": 0.9535068235207886, "learning_rate": 1.0858957547213326e-05, "loss": 0.5629, "step": 10633 }, { "epoch": 0.49, "grad_norm": 0.35170511408267097, "learning_rate": 1.0857475117640766e-05, "loss": 0.2005, "step": 10634 }, { "epoch": 0.49, "grad_norm": 1.2420079671525877, "learning_rate": 1.0855992669084536e-05, "loss": 0.7487, "step": 10635 }, { "epoch": 0.49, "grad_norm": 0.3090264168405756, "learning_rate": 1.0854510201577451e-05, "loss": 0.2635, "step": 10636 }, { "epoch": 0.49, "grad_norm": 0.3800803067633803, "learning_rate": 1.0853027715152336e-05, "loss": 0.2008, "step": 10637 }, { "epoch": 0.49, "grad_norm": 0.46630868993904695, "learning_rate": 1.0851545209842009e-05, "loss": 0.2984, "step": 10638 }, { "epoch": 0.49, "grad_norm": 0.398957963454377, "learning_rate": 1.0850062685679292e-05, "loss": 0.3159, "step": 10639 }, { "epoch": 0.49, "grad_norm": 0.3430221090492787, "learning_rate": 1.0848580142697006e-05, "loss": 0.1869, "step": 10640 }, { "epoch": 0.49, "grad_norm": 1.09828167344895, "learning_rate": 1.0847097580927974e-05, "loss": 0.6335, "step": 10641 }, { "epoch": 0.49, "grad_norm": 0.6372480909358598, "learning_rate": 1.0845615000405018e-05, "loss": 0.4535, "step": 10642 }, { "epoch": 0.49, "grad_norm": 0.32720255286140965, "learning_rate": 1.0844132401160958e-05, "loss": 0.2762, "step": 10643 }, { "epoch": 0.49, "grad_norm": 0.2757530633703458, "learning_rate": 1.0842649783228624e-05, "loss": 0.1752, "step": 10644 }, { "epoch": 0.49, "grad_norm": 1.5266151392590883, "learning_rate": 1.0841167146640834e-05, "loss": 0.6542, "step": 10645 }, { "epoch": 0.49, "grad_norm": 0.392955445253096, "learning_rate": 1.0839684491430415e-05, "loss": 0.2982, "step": 10646 }, { "epoch": 0.49, "grad_norm": 0.7533056761169569, "learning_rate": 1.0838201817630189e-05, "loss": 0.356, "step": 10647 }, { "epoch": 0.49, "grad_norm": 0.3749672107369942, "learning_rate": 1.0836719125272986e-05, "loss": 0.3253, "step": 10648 }, { "epoch": 0.49, "grad_norm": 0.39622495268889213, "learning_rate": 1.0835236414391622e-05, "loss": 0.256, "step": 10649 }, { "epoch": 0.49, "grad_norm": 0.19048305983119787, "learning_rate": 1.0833753685018935e-05, "loss": 0.0725, "step": 10650 }, { "epoch": 0.49, "grad_norm": 0.38142334606071937, "learning_rate": 1.0832270937187745e-05, "loss": 0.3342, "step": 10651 }, { "epoch": 0.49, "grad_norm": 0.38650519873211114, "learning_rate": 1.0830788170930876e-05, "loss": 0.2578, "step": 10652 }, { "epoch": 0.49, "grad_norm": 0.5293537690764069, "learning_rate": 1.0829305386281158e-05, "loss": 0.296, "step": 10653 }, { "epoch": 0.49, "grad_norm": 0.8112702564889863, "learning_rate": 1.0827822583271418e-05, "loss": 0.4097, "step": 10654 }, { "epoch": 0.49, "grad_norm": 0.44259759590065606, "learning_rate": 1.0826339761934483e-05, "loss": 0.2916, "step": 10655 }, { "epoch": 0.49, "grad_norm": 0.28259821921716455, "learning_rate": 1.0824856922303183e-05, "loss": 0.2435, "step": 10656 }, { "epoch": 0.49, "grad_norm": 0.3155867563215487, "learning_rate": 1.0823374064410348e-05, "loss": 0.1561, "step": 10657 }, { "epoch": 0.49, "grad_norm": 0.42516065761004934, "learning_rate": 1.0821891188288803e-05, "loss": 0.3111, "step": 10658 }, { "epoch": 0.49, "grad_norm": 0.6337045826593662, "learning_rate": 1.082040829397138e-05, "loss": 0.397, "step": 10659 }, { "epoch": 0.49, "grad_norm": 0.42032059547767175, "learning_rate": 1.0818925381490904e-05, "loss": 0.2626, "step": 10660 }, { "epoch": 0.49, "grad_norm": 0.40809108791157067, "learning_rate": 1.081744245088021e-05, "loss": 0.247, "step": 10661 }, { "epoch": 0.49, "grad_norm": 0.2482786630310706, "learning_rate": 1.0815959502172133e-05, "loss": 0.1908, "step": 10662 }, { "epoch": 0.49, "grad_norm": 0.4565851764908628, "learning_rate": 1.0814476535399496e-05, "loss": 0.2775, "step": 10663 }, { "epoch": 0.49, "grad_norm": 0.2860235106911526, "learning_rate": 1.0812993550595131e-05, "loss": 0.2477, "step": 10664 }, { "epoch": 0.49, "grad_norm": 0.7484367457429841, "learning_rate": 1.0811510547791878e-05, "loss": 0.4959, "step": 10665 }, { "epoch": 0.49, "grad_norm": 0.8290799323261989, "learning_rate": 1.081002752702256e-05, "loss": 0.3549, "step": 10666 }, { "epoch": 0.49, "grad_norm": 0.378170469503295, "learning_rate": 1.0808544488320014e-05, "loss": 0.2898, "step": 10667 }, { "epoch": 0.49, "grad_norm": 0.41989594270813685, "learning_rate": 1.0807061431717072e-05, "loss": 0.3192, "step": 10668 }, { "epoch": 0.49, "grad_norm": 0.29147581458370114, "learning_rate": 1.080557835724657e-05, "loss": 0.2136, "step": 10669 }, { "epoch": 0.49, "grad_norm": 0.3513360284895416, "learning_rate": 1.0804095264941338e-05, "loss": 0.2122, "step": 10670 }, { "epoch": 0.49, "grad_norm": 0.47939196941232093, "learning_rate": 1.0802612154834211e-05, "loss": 0.386, "step": 10671 }, { "epoch": 0.49, "grad_norm": 0.41302445464470483, "learning_rate": 1.0801129026958025e-05, "loss": 0.3437, "step": 10672 }, { "epoch": 0.49, "grad_norm": 0.40562839485155766, "learning_rate": 1.0799645881345612e-05, "loss": 0.1666, "step": 10673 }, { "epoch": 0.49, "grad_norm": 0.3118309109725975, "learning_rate": 1.0798162718029816e-05, "loss": 0.2538, "step": 10674 }, { "epoch": 0.49, "grad_norm": 0.29334225964249144, "learning_rate": 1.0796679537043461e-05, "loss": 0.261, "step": 10675 }, { "epoch": 0.49, "grad_norm": 0.3526373695136828, "learning_rate": 1.0795196338419392e-05, "loss": 0.2103, "step": 10676 }, { "epoch": 0.49, "grad_norm": 0.5149197368860986, "learning_rate": 1.0793713122190439e-05, "loss": 0.354, "step": 10677 }, { "epoch": 0.49, "grad_norm": 1.2464857242689833, "learning_rate": 1.0792229888389447e-05, "loss": 0.6495, "step": 10678 }, { "epoch": 0.49, "grad_norm": 0.3035593289342247, "learning_rate": 1.0790746637049247e-05, "loss": 0.2107, "step": 10679 }, { "epoch": 0.49, "grad_norm": 0.28983493967421964, "learning_rate": 1.0789263368202678e-05, "loss": 0.2325, "step": 10680 }, { "epoch": 0.49, "grad_norm": 0.4467403772660091, "learning_rate": 1.0787780081882579e-05, "loss": 0.2994, "step": 10681 }, { "epoch": 0.49, "grad_norm": 0.5410587027553821, "learning_rate": 1.0786296778121787e-05, "loss": 0.3318, "step": 10682 }, { "epoch": 0.49, "grad_norm": 0.3982441070615042, "learning_rate": 1.0784813456953143e-05, "loss": 0.2392, "step": 10683 }, { "epoch": 0.49, "grad_norm": 0.38012384535553245, "learning_rate": 1.0783330118409488e-05, "loss": 0.3186, "step": 10684 }, { "epoch": 0.49, "grad_norm": 0.4227901344992665, "learning_rate": 1.0781846762523658e-05, "loss": 0.2921, "step": 10685 }, { "epoch": 0.49, "grad_norm": 0.522593100103755, "learning_rate": 1.0780363389328494e-05, "loss": 0.2567, "step": 10686 }, { "epoch": 0.49, "grad_norm": 0.2779076100227694, "learning_rate": 1.0778879998856836e-05, "loss": 0.2309, "step": 10687 }, { "epoch": 0.49, "grad_norm": 0.44870291121511924, "learning_rate": 1.0777396591141524e-05, "loss": 0.3046, "step": 10688 }, { "epoch": 0.49, "grad_norm": 0.6995097276285175, "learning_rate": 1.0775913166215403e-05, "loss": 0.2816, "step": 10689 }, { "epoch": 0.49, "grad_norm": 1.0627708581212783, "learning_rate": 1.077442972411131e-05, "loss": 0.6678, "step": 10690 }, { "epoch": 0.49, "grad_norm": 0.4472772152760946, "learning_rate": 1.0772946264862092e-05, "loss": 0.3219, "step": 10691 }, { "epoch": 0.49, "grad_norm": 0.3130977368733739, "learning_rate": 1.0771462788500588e-05, "loss": 0.2171, "step": 10692 }, { "epoch": 0.49, "grad_norm": 0.4558020702968821, "learning_rate": 1.0769979295059642e-05, "loss": 0.3173, "step": 10693 }, { "epoch": 0.49, "grad_norm": 0.35555044999252905, "learning_rate": 1.0768495784572092e-05, "loss": 0.2088, "step": 10694 }, { "epoch": 0.49, "grad_norm": 0.34514699827686085, "learning_rate": 1.0767012257070793e-05, "loss": 0.2802, "step": 10695 }, { "epoch": 0.49, "grad_norm": 0.47043578081251564, "learning_rate": 1.0765528712588575e-05, "loss": 0.2901, "step": 10696 }, { "epoch": 0.49, "grad_norm": 0.5529885611359966, "learning_rate": 1.0764045151158293e-05, "loss": 0.3779, "step": 10697 }, { "epoch": 0.49, "grad_norm": 0.409959459238842, "learning_rate": 1.0762561572812789e-05, "loss": 0.323, "step": 10698 }, { "epoch": 0.49, "grad_norm": 0.3247614878719977, "learning_rate": 1.0761077977584905e-05, "loss": 0.2693, "step": 10699 }, { "epoch": 0.49, "grad_norm": 0.3302206070332327, "learning_rate": 1.0759594365507491e-05, "loss": 0.2424, "step": 10700 }, { "epoch": 0.49, "grad_norm": 0.4050594115569477, "learning_rate": 1.0758110736613385e-05, "loss": 0.321, "step": 10701 }, { "epoch": 0.49, "grad_norm": 0.3178704082220715, "learning_rate": 1.0756627090935441e-05, "loss": 0.1467, "step": 10702 }, { "epoch": 0.49, "grad_norm": 0.33370061945729507, "learning_rate": 1.07551434285065e-05, "loss": 0.2925, "step": 10703 }, { "epoch": 0.49, "grad_norm": 0.39123781820110953, "learning_rate": 1.0753659749359416e-05, "loss": 0.2855, "step": 10704 }, { "epoch": 0.49, "grad_norm": 0.5971650476848853, "learning_rate": 1.0752176053527025e-05, "loss": 0.3404, "step": 10705 }, { "epoch": 0.49, "grad_norm": 0.40610297438547477, "learning_rate": 1.0750692341042187e-05, "loss": 0.2835, "step": 10706 }, { "epoch": 0.49, "grad_norm": 0.4171271164122111, "learning_rate": 1.0749208611937739e-05, "loss": 0.3402, "step": 10707 }, { "epoch": 0.49, "grad_norm": 0.2526297188239567, "learning_rate": 1.0747724866246539e-05, "loss": 0.2199, "step": 10708 }, { "epoch": 0.49, "grad_norm": 0.5764065034152747, "learning_rate": 1.0746241104001429e-05, "loss": 0.2342, "step": 10709 }, { "epoch": 0.49, "grad_norm": 0.41101913639268867, "learning_rate": 1.074475732523526e-05, "loss": 0.2788, "step": 10710 }, { "epoch": 0.49, "grad_norm": 0.3631026062093693, "learning_rate": 1.074327352998088e-05, "loss": 0.34, "step": 10711 }, { "epoch": 0.49, "grad_norm": 0.47791827249084246, "learning_rate": 1.0741789718271143e-05, "loss": 0.1421, "step": 10712 }, { "epoch": 0.49, "grad_norm": 0.38180976741596595, "learning_rate": 1.0740305890138896e-05, "loss": 0.3219, "step": 10713 }, { "epoch": 0.49, "grad_norm": 0.3132540773234309, "learning_rate": 1.073882204561699e-05, "loss": 0.1504, "step": 10714 }, { "epoch": 0.49, "grad_norm": 0.31882027602109486, "learning_rate": 1.0737338184738277e-05, "loss": 0.2391, "step": 10715 }, { "epoch": 0.49, "grad_norm": 0.3401453663074988, "learning_rate": 1.0735854307535607e-05, "loss": 0.2905, "step": 10716 }, { "epoch": 0.49, "grad_norm": 1.0921216325888152, "learning_rate": 1.073437041404183e-05, "loss": 0.5041, "step": 10717 }, { "epoch": 0.49, "grad_norm": 0.45499948096842907, "learning_rate": 1.0732886504289802e-05, "loss": 0.214, "step": 10718 }, { "epoch": 0.49, "grad_norm": 0.3454641512324536, "learning_rate": 1.073140257831237e-05, "loss": 0.2725, "step": 10719 }, { "epoch": 0.49, "grad_norm": 0.3631696473573739, "learning_rate": 1.0729918636142392e-05, "loss": 0.2737, "step": 10720 }, { "epoch": 0.49, "grad_norm": 0.4237599613033983, "learning_rate": 1.0728434677812722e-05, "loss": 0.2661, "step": 10721 }, { "epoch": 0.49, "grad_norm": 0.3376005097313622, "learning_rate": 1.0726950703356204e-05, "loss": 0.2203, "step": 10722 }, { "epoch": 0.49, "grad_norm": 0.38073067840459335, "learning_rate": 1.0725466712805704e-05, "loss": 0.3284, "step": 10723 }, { "epoch": 0.49, "grad_norm": 0.7746230814307562, "learning_rate": 1.0723982706194065e-05, "loss": 0.4933, "step": 10724 }, { "epoch": 0.49, "grad_norm": 0.3373284436144719, "learning_rate": 1.072249868355415e-05, "loss": 0.2174, "step": 10725 }, { "epoch": 0.49, "grad_norm": 0.29619639994340635, "learning_rate": 1.072101464491881e-05, "loss": 0.2367, "step": 10726 }, { "epoch": 0.49, "grad_norm": 0.37497113708768215, "learning_rate": 1.0719530590320902e-05, "loss": 0.3368, "step": 10727 }, { "epoch": 0.49, "grad_norm": 0.3441584402297922, "learning_rate": 1.0718046519793276e-05, "loss": 0.2483, "step": 10728 }, { "epoch": 0.49, "grad_norm": 0.9748468241471883, "learning_rate": 1.0716562433368796e-05, "loss": 0.5552, "step": 10729 }, { "epoch": 0.49, "grad_norm": 1.19400143789318, "learning_rate": 1.0715078331080314e-05, "loss": 0.7843, "step": 10730 }, { "epoch": 0.49, "grad_norm": 0.28104220881453945, "learning_rate": 1.0713594212960684e-05, "loss": 0.2247, "step": 10731 }, { "epoch": 0.49, "grad_norm": 0.782764002798639, "learning_rate": 1.0712110079042768e-05, "loss": 0.4602, "step": 10732 }, { "epoch": 0.49, "grad_norm": 0.4063728334239336, "learning_rate": 1.0710625929359422e-05, "loss": 0.3066, "step": 10733 }, { "epoch": 0.49, "grad_norm": 0.2842750633759195, "learning_rate": 1.0709141763943502e-05, "loss": 0.2306, "step": 10734 }, { "epoch": 0.49, "grad_norm": 0.35823541579209517, "learning_rate": 1.0707657582827867e-05, "loss": 0.2647, "step": 10735 }, { "epoch": 0.49, "grad_norm": 1.1948685031222255, "learning_rate": 1.0706173386045373e-05, "loss": 0.6795, "step": 10736 }, { "epoch": 0.49, "grad_norm": 0.37082182938493163, "learning_rate": 1.0704689173628882e-05, "loss": 0.2641, "step": 10737 }, { "epoch": 0.49, "grad_norm": 0.7566463268668385, "learning_rate": 1.0703204945611254e-05, "loss": 0.3018, "step": 10738 }, { "epoch": 0.49, "grad_norm": 0.32559458942662833, "learning_rate": 1.0701720702025344e-05, "loss": 0.3034, "step": 10739 }, { "epoch": 0.49, "grad_norm": 0.4250176516484628, "learning_rate": 1.0700236442904017e-05, "loss": 0.3004, "step": 10740 }, { "epoch": 0.49, "grad_norm": 0.2636505953531897, "learning_rate": 1.0698752168280126e-05, "loss": 0.1302, "step": 10741 }, { "epoch": 0.49, "grad_norm": 0.45078208495904315, "learning_rate": 1.0697267878186538e-05, "loss": 0.3505, "step": 10742 }, { "epoch": 0.49, "grad_norm": 0.3552488512346412, "learning_rate": 1.069578357265611e-05, "loss": 0.2663, "step": 10743 }, { "epoch": 0.49, "grad_norm": 0.8433590416512513, "learning_rate": 1.0694299251721708e-05, "loss": 0.3122, "step": 10744 }, { "epoch": 0.49, "grad_norm": 1.0117519060213622, "learning_rate": 1.0692814915416186e-05, "loss": 0.5295, "step": 10745 }, { "epoch": 0.49, "grad_norm": 0.2639455185458164, "learning_rate": 1.0691330563772408e-05, "loss": 0.2007, "step": 10746 }, { "epoch": 0.49, "grad_norm": 0.2903387475603825, "learning_rate": 1.0689846196823241e-05, "loss": 0.2478, "step": 10747 }, { "epoch": 0.49, "grad_norm": 1.1399263651724851, "learning_rate": 1.0688361814601542e-05, "loss": 0.4923, "step": 10748 }, { "epoch": 0.49, "grad_norm": 0.3278533066596291, "learning_rate": 1.0686877417140175e-05, "loss": 0.2559, "step": 10749 }, { "epoch": 0.49, "grad_norm": 1.102989476769933, "learning_rate": 1.0685393004472009e-05, "loss": 0.4062, "step": 10750 }, { "epoch": 0.49, "grad_norm": 0.34859304766190213, "learning_rate": 1.06839085766299e-05, "loss": 0.2456, "step": 10751 }, { "epoch": 0.49, "grad_norm": 0.3494895642775547, "learning_rate": 1.0682424133646712e-05, "loss": 0.2561, "step": 10752 }, { "epoch": 0.49, "grad_norm": 0.3253290798617155, "learning_rate": 1.0680939675555313e-05, "loss": 0.1885, "step": 10753 }, { "epoch": 0.49, "grad_norm": 0.38840125192380504, "learning_rate": 1.0679455202388565e-05, "loss": 0.2654, "step": 10754 }, { "epoch": 0.49, "grad_norm": 0.3343844024741166, "learning_rate": 1.0677970714179332e-05, "loss": 0.2459, "step": 10755 }, { "epoch": 0.49, "grad_norm": 0.8075885400467577, "learning_rate": 1.0676486210960486e-05, "loss": 0.4087, "step": 10756 }, { "epoch": 0.49, "grad_norm": 0.44633978789498896, "learning_rate": 1.0675001692764886e-05, "loss": 0.2099, "step": 10757 }, { "epoch": 0.49, "grad_norm": 0.4194208391871709, "learning_rate": 1.0673517159625395e-05, "loss": 0.2483, "step": 10758 }, { "epoch": 0.49, "grad_norm": 0.3027930547010427, "learning_rate": 1.0672032611574887e-05, "loss": 0.2464, "step": 10759 }, { "epoch": 0.49, "grad_norm": 0.9573748988839156, "learning_rate": 1.0670548048646224e-05, "loss": 0.5016, "step": 10760 }, { "epoch": 0.49, "grad_norm": 0.3495914415566046, "learning_rate": 1.0669063470872271e-05, "loss": 0.1965, "step": 10761 }, { "epoch": 0.49, "grad_norm": 0.4180381958315438, "learning_rate": 1.06675788782859e-05, "loss": 0.3614, "step": 10762 }, { "epoch": 0.49, "grad_norm": 0.5375715089934817, "learning_rate": 1.0666094270919978e-05, "loss": 0.3693, "step": 10763 }, { "epoch": 0.49, "grad_norm": 0.32621506596607897, "learning_rate": 1.0664609648807369e-05, "loss": 0.207, "step": 10764 }, { "epoch": 0.49, "grad_norm": 0.2927650635349133, "learning_rate": 1.0663125011980942e-05, "loss": 0.2038, "step": 10765 }, { "epoch": 0.49, "grad_norm": 0.4948211262709542, "learning_rate": 1.0661640360473566e-05, "loss": 0.3648, "step": 10766 }, { "epoch": 0.49, "grad_norm": 0.34376765299597767, "learning_rate": 1.0660155694318108e-05, "loss": 0.2354, "step": 10767 }, { "epoch": 0.49, "grad_norm": 0.9546727424251255, "learning_rate": 1.0658671013547446e-05, "loss": 0.4507, "step": 10768 }, { "epoch": 0.49, "grad_norm": 1.3046482047476735, "learning_rate": 1.0657186318194437e-05, "loss": 0.653, "step": 10769 }, { "epoch": 0.49, "grad_norm": 0.3446701950919525, "learning_rate": 1.0655701608291959e-05, "loss": 0.2679, "step": 10770 }, { "epoch": 0.49, "grad_norm": 0.3231523452460592, "learning_rate": 1.0654216883872876e-05, "loss": 0.2017, "step": 10771 }, { "epoch": 0.49, "grad_norm": 0.44000324995062373, "learning_rate": 1.0652732144970066e-05, "loss": 0.3288, "step": 10772 }, { "epoch": 0.49, "grad_norm": 0.34299125657546825, "learning_rate": 1.0651247391616395e-05, "loss": 0.2731, "step": 10773 }, { "epoch": 0.49, "grad_norm": 0.9851606272736743, "learning_rate": 1.0649762623844733e-05, "loss": 0.3819, "step": 10774 }, { "epoch": 0.49, "grad_norm": 0.37632614646509693, "learning_rate": 1.0648277841687956e-05, "loss": 0.2959, "step": 10775 }, { "epoch": 0.5, "grad_norm": 0.6681409765548855, "learning_rate": 1.0646793045178925e-05, "loss": 0.4481, "step": 10776 }, { "epoch": 0.5, "grad_norm": 0.21520073592461034, "learning_rate": 1.0645308234350525e-05, "loss": 0.1538, "step": 10777 }, { "epoch": 0.5, "grad_norm": 0.4099511083646181, "learning_rate": 1.064382340923562e-05, "loss": 0.3353, "step": 10778 }, { "epoch": 0.5, "grad_norm": 0.6083651559097929, "learning_rate": 1.0642338569867086e-05, "loss": 0.3554, "step": 10779 }, { "epoch": 0.5, "grad_norm": 0.34487059716258284, "learning_rate": 1.0640853716277797e-05, "loss": 0.2628, "step": 10780 }, { "epoch": 0.5, "grad_norm": 1.0124394049726113, "learning_rate": 1.0639368848500624e-05, "loss": 0.5962, "step": 10781 }, { "epoch": 0.5, "grad_norm": 0.4781928756058893, "learning_rate": 1.0637883966568438e-05, "loss": 0.325, "step": 10782 }, { "epoch": 0.5, "grad_norm": 0.26572435862100374, "learning_rate": 1.0636399070514118e-05, "loss": 0.2297, "step": 10783 }, { "epoch": 0.5, "grad_norm": 0.548778630875012, "learning_rate": 1.0634914160370536e-05, "loss": 0.328, "step": 10784 }, { "epoch": 0.5, "grad_norm": 0.41978860626118325, "learning_rate": 1.0633429236170565e-05, "loss": 0.3166, "step": 10785 }, { "epoch": 0.5, "grad_norm": 0.2954584757614555, "learning_rate": 1.0631944297947083e-05, "loss": 0.2495, "step": 10786 }, { "epoch": 0.5, "grad_norm": 0.4203474053476572, "learning_rate": 1.0630459345732964e-05, "loss": 0.2513, "step": 10787 }, { "epoch": 0.5, "grad_norm": 0.4038093560777572, "learning_rate": 1.062897437956108e-05, "loss": 0.2849, "step": 10788 }, { "epoch": 0.5, "grad_norm": 0.32968406490076513, "learning_rate": 1.062748939946431e-05, "loss": 0.2053, "step": 10789 }, { "epoch": 0.5, "grad_norm": 0.3226771139419802, "learning_rate": 1.0626004405475531e-05, "loss": 0.2925, "step": 10790 }, { "epoch": 0.5, "grad_norm": 0.3508119249826307, "learning_rate": 1.062451939762762e-05, "loss": 0.2604, "step": 10791 }, { "epoch": 0.5, "grad_norm": 1.2693555560306609, "learning_rate": 1.0623034375953447e-05, "loss": 0.7882, "step": 10792 }, { "epoch": 0.5, "grad_norm": 0.32363961872713526, "learning_rate": 1.0621549340485895e-05, "loss": 0.1308, "step": 10793 }, { "epoch": 0.5, "grad_norm": 0.43152299050792026, "learning_rate": 1.0620064291257839e-05, "loss": 0.2763, "step": 10794 }, { "epoch": 0.5, "grad_norm": 0.357500513025324, "learning_rate": 1.0618579228302157e-05, "loss": 0.3092, "step": 10795 }, { "epoch": 0.5, "grad_norm": 0.6891916062377235, "learning_rate": 1.0617094151651728e-05, "loss": 0.4516, "step": 10796 }, { "epoch": 0.5, "grad_norm": 0.3243649533562527, "learning_rate": 1.0615609061339431e-05, "loss": 0.158, "step": 10797 }, { "epoch": 0.5, "grad_norm": 0.27491934792676875, "learning_rate": 1.0614123957398142e-05, "loss": 0.242, "step": 10798 }, { "epoch": 0.5, "grad_norm": 0.34439776213334206, "learning_rate": 1.0612638839860736e-05, "loss": 0.2707, "step": 10799 }, { "epoch": 0.5, "grad_norm": 0.4960974529821682, "learning_rate": 1.06111537087601e-05, "loss": 0.1878, "step": 10800 }, { "epoch": 0.5, "grad_norm": 0.46768195754492564, "learning_rate": 1.060966856412911e-05, "loss": 0.3298, "step": 10801 }, { "epoch": 0.5, "grad_norm": 0.5408954127233903, "learning_rate": 1.0608183406000645e-05, "loss": 0.377, "step": 10802 }, { "epoch": 0.5, "grad_norm": 0.32753278935486535, "learning_rate": 1.0606698234407587e-05, "loss": 0.23, "step": 10803 }, { "epoch": 0.5, "grad_norm": 0.34149329887450186, "learning_rate": 1.0605213049382814e-05, "loss": 0.2719, "step": 10804 }, { "epoch": 0.5, "grad_norm": 0.4439961312816496, "learning_rate": 1.060372785095921e-05, "loss": 0.236, "step": 10805 }, { "epoch": 0.5, "grad_norm": 0.25953949040876356, "learning_rate": 1.0602242639169649e-05, "loss": 0.2235, "step": 10806 }, { "epoch": 0.5, "grad_norm": 0.5259770183749032, "learning_rate": 1.0600757414047019e-05, "loss": 0.3604, "step": 10807 }, { "epoch": 0.5, "grad_norm": 0.6469408790526499, "learning_rate": 1.0599272175624193e-05, "loss": 0.4549, "step": 10808 }, { "epoch": 0.5, "grad_norm": 0.6578517121909913, "learning_rate": 1.0597786923934066e-05, "loss": 0.3936, "step": 10809 }, { "epoch": 0.5, "grad_norm": 0.3253176222137389, "learning_rate": 1.0596301659009512e-05, "loss": 0.2317, "step": 10810 }, { "epoch": 0.5, "grad_norm": 0.24421515247892855, "learning_rate": 1.059481638088341e-05, "loss": 0.2018, "step": 10811 }, { "epoch": 0.5, "grad_norm": 0.8783169941153536, "learning_rate": 1.0593331089588648e-05, "loss": 0.3311, "step": 10812 }, { "epoch": 0.5, "grad_norm": 0.3863254161674765, "learning_rate": 1.0591845785158108e-05, "loss": 0.2653, "step": 10813 }, { "epoch": 0.5, "grad_norm": 0.3553612754972421, "learning_rate": 1.0590360467624673e-05, "loss": 0.3151, "step": 10814 }, { "epoch": 0.5, "grad_norm": 0.8799962388047933, "learning_rate": 1.0588875137021228e-05, "loss": 0.5138, "step": 10815 }, { "epoch": 0.5, "grad_norm": 0.3688801725877234, "learning_rate": 1.0587389793380652e-05, "loss": 0.2301, "step": 10816 }, { "epoch": 0.5, "grad_norm": 0.4527972515046298, "learning_rate": 1.0585904436735835e-05, "loss": 0.2149, "step": 10817 }, { "epoch": 0.5, "grad_norm": 0.35133140799945833, "learning_rate": 1.0584419067119655e-05, "loss": 0.3043, "step": 10818 }, { "epoch": 0.5, "grad_norm": 0.3286325043499027, "learning_rate": 1.0582933684565003e-05, "loss": 0.2188, "step": 10819 }, { "epoch": 0.5, "grad_norm": 1.4208958306656765, "learning_rate": 1.0581448289104759e-05, "loss": 0.7471, "step": 10820 }, { "epoch": 0.5, "grad_norm": 0.7718131004498932, "learning_rate": 1.0579962880771813e-05, "loss": 0.5496, "step": 10821 }, { "epoch": 0.5, "grad_norm": 0.2834451110466474, "learning_rate": 1.0578477459599046e-05, "loss": 0.2791, "step": 10822 }, { "epoch": 0.5, "grad_norm": 0.27917093969019297, "learning_rate": 1.0576992025619344e-05, "loss": 0.1396, "step": 10823 }, { "epoch": 0.5, "grad_norm": 0.500750297579144, "learning_rate": 1.0575506578865598e-05, "loss": 0.3335, "step": 10824 }, { "epoch": 0.5, "grad_norm": 0.40930629160329773, "learning_rate": 1.0574021119370688e-05, "loss": 0.3341, "step": 10825 }, { "epoch": 0.5, "grad_norm": 0.39987935088565585, "learning_rate": 1.0572535647167505e-05, "loss": 0.2633, "step": 10826 }, { "epoch": 0.5, "grad_norm": 0.6692530253469197, "learning_rate": 1.0571050162288935e-05, "loss": 0.3999, "step": 10827 }, { "epoch": 0.5, "grad_norm": 0.4147580587617464, "learning_rate": 1.0569564664767867e-05, "loss": 0.3112, "step": 10828 }, { "epoch": 0.5, "grad_norm": 0.5812865950283781, "learning_rate": 1.0568079154637181e-05, "loss": 0.3418, "step": 10829 }, { "epoch": 0.5, "grad_norm": 0.28204608831286854, "learning_rate": 1.0566593631929773e-05, "loss": 0.2554, "step": 10830 }, { "epoch": 0.5, "grad_norm": 0.29964942665373784, "learning_rate": 1.0565108096678526e-05, "loss": 0.2211, "step": 10831 }, { "epoch": 0.5, "grad_norm": 0.6948239314783347, "learning_rate": 1.0563622548916332e-05, "loss": 0.4141, "step": 10832 }, { "epoch": 0.5, "grad_norm": 0.9506403556827606, "learning_rate": 1.0562136988676079e-05, "loss": 0.5632, "step": 10833 }, { "epoch": 0.5, "grad_norm": 0.291525342840716, "learning_rate": 1.0560651415990655e-05, "loss": 0.2592, "step": 10834 }, { "epoch": 0.5, "grad_norm": 0.5936558736230145, "learning_rate": 1.055916583089295e-05, "loss": 0.4145, "step": 10835 }, { "epoch": 0.5, "grad_norm": 0.24436399187483046, "learning_rate": 1.0557680233415849e-05, "loss": 0.0717, "step": 10836 }, { "epoch": 0.5, "grad_norm": 0.29807326992599154, "learning_rate": 1.0556194623592247e-05, "loss": 0.2318, "step": 10837 }, { "epoch": 0.5, "grad_norm": 0.39456333565601415, "learning_rate": 1.0554709001455032e-05, "loss": 0.3132, "step": 10838 }, { "epoch": 0.5, "grad_norm": 0.485877781353574, "learning_rate": 1.0553223367037095e-05, "loss": 0.3125, "step": 10839 }, { "epoch": 0.5, "grad_norm": 0.34917632521900976, "learning_rate": 1.0551737720371322e-05, "loss": 0.2736, "step": 10840 }, { "epoch": 0.5, "grad_norm": 0.7141503684796044, "learning_rate": 1.0550252061490614e-05, "loss": 0.4242, "step": 10841 }, { "epoch": 0.5, "grad_norm": 0.22358704826378117, "learning_rate": 1.054876639042785e-05, "loss": 0.1824, "step": 10842 }, { "epoch": 0.5, "grad_norm": 0.3978079102492898, "learning_rate": 1.054728070721593e-05, "loss": 0.2626, "step": 10843 }, { "epoch": 0.5, "grad_norm": 0.3848898049144023, "learning_rate": 1.0545795011887743e-05, "loss": 0.2506, "step": 10844 }, { "epoch": 0.5, "grad_norm": 0.37338207222799513, "learning_rate": 1.0544309304476179e-05, "loss": 0.2435, "step": 10845 }, { "epoch": 0.5, "grad_norm": 0.38498981449243597, "learning_rate": 1.054282358501413e-05, "loss": 0.2844, "step": 10846 }, { "epoch": 0.5, "grad_norm": 0.9745387287820225, "learning_rate": 1.0541337853534492e-05, "loss": 0.4159, "step": 10847 }, { "epoch": 0.5, "grad_norm": 0.4631785386828675, "learning_rate": 1.0539852110070155e-05, "loss": 0.2186, "step": 10848 }, { "epoch": 0.5, "grad_norm": 0.25409007134739114, "learning_rate": 1.0538366354654016e-05, "loss": 0.1699, "step": 10849 }, { "epoch": 0.5, "grad_norm": 0.4141517312730471, "learning_rate": 1.053688058731896e-05, "loss": 0.3243, "step": 10850 }, { "epoch": 0.5, "grad_norm": 1.0049346176090797, "learning_rate": 1.053539480809789e-05, "loss": 0.5016, "step": 10851 }, { "epoch": 0.5, "grad_norm": 0.411183784603024, "learning_rate": 1.053390901702369e-05, "loss": 0.2373, "step": 10852 }, { "epoch": 0.5, "grad_norm": 0.47400615948304, "learning_rate": 1.0532423214129262e-05, "loss": 0.3505, "step": 10853 }, { "epoch": 0.5, "grad_norm": 0.5713871604370441, "learning_rate": 1.0530937399447496e-05, "loss": 0.3382, "step": 10854 }, { "epoch": 0.5, "grad_norm": 0.19563225960294914, "learning_rate": 1.0529451573011286e-05, "loss": 0.1409, "step": 10855 }, { "epoch": 0.5, "grad_norm": 1.3547123747177319, "learning_rate": 1.0527965734853536e-05, "loss": 0.7743, "step": 10856 }, { "epoch": 0.5, "grad_norm": 0.6556285973460387, "learning_rate": 1.0526479885007126e-05, "loss": 0.365, "step": 10857 }, { "epoch": 0.5, "grad_norm": 0.3428099069581227, "learning_rate": 1.0524994023504961e-05, "loss": 0.2383, "step": 10858 }, { "epoch": 0.5, "grad_norm": 0.9898684310762809, "learning_rate": 1.0523508150379933e-05, "loss": 0.5155, "step": 10859 }, { "epoch": 0.5, "grad_norm": 0.435803461121866, "learning_rate": 1.052202226566494e-05, "loss": 0.2688, "step": 10860 }, { "epoch": 0.5, "grad_norm": 0.3429406156614093, "learning_rate": 1.052053636939288e-05, "loss": 0.2706, "step": 10861 }, { "epoch": 0.5, "grad_norm": 0.30070908553798237, "learning_rate": 1.0519050461596643e-05, "loss": 0.2092, "step": 10862 }, { "epoch": 0.5, "grad_norm": 0.8618793313346347, "learning_rate": 1.0517564542309128e-05, "loss": 0.4173, "step": 10863 }, { "epoch": 0.5, "grad_norm": 0.4556668778896537, "learning_rate": 1.0516078611563237e-05, "loss": 0.2696, "step": 10864 }, { "epoch": 0.5, "grad_norm": 0.4067618522811854, "learning_rate": 1.0514592669391862e-05, "loss": 0.271, "step": 10865 }, { "epoch": 0.5, "grad_norm": 0.5633859474238586, "learning_rate": 1.0513106715827897e-05, "loss": 0.3502, "step": 10866 }, { "epoch": 0.5, "grad_norm": 0.38844360873415845, "learning_rate": 1.0511620750904248e-05, "loss": 0.3071, "step": 10867 }, { "epoch": 0.5, "grad_norm": 0.45420182734489783, "learning_rate": 1.0510134774653808e-05, "loss": 0.2233, "step": 10868 }, { "epoch": 0.5, "grad_norm": 0.4444677254443105, "learning_rate": 1.0508648787109477e-05, "loss": 0.3274, "step": 10869 }, { "epoch": 0.5, "grad_norm": 0.31284839931053543, "learning_rate": 1.0507162788304148e-05, "loss": 0.2734, "step": 10870 }, { "epoch": 0.5, "grad_norm": 0.335468458938466, "learning_rate": 1.0505676778270727e-05, "loss": 0.1723, "step": 10871 }, { "epoch": 0.5, "grad_norm": 1.247284135234285, "learning_rate": 1.0504190757042108e-05, "loss": 0.5805, "step": 10872 }, { "epoch": 0.5, "grad_norm": 0.2811821387029117, "learning_rate": 1.0502704724651192e-05, "loss": 0.2165, "step": 10873 }, { "epoch": 0.5, "grad_norm": 0.40171205166525964, "learning_rate": 1.0501218681130878e-05, "loss": 0.3287, "step": 10874 }, { "epoch": 0.5, "grad_norm": 0.762981366002367, "learning_rate": 1.0499732626514067e-05, "loss": 0.3396, "step": 10875 }, { "epoch": 0.5, "grad_norm": 0.3836054798623004, "learning_rate": 1.0498246560833653e-05, "loss": 0.2501, "step": 10876 }, { "epoch": 0.5, "grad_norm": 0.4490986178773323, "learning_rate": 1.0496760484122546e-05, "loss": 0.2566, "step": 10877 }, { "epoch": 0.5, "grad_norm": 0.39095447011221346, "learning_rate": 1.0495274396413635e-05, "loss": 0.2534, "step": 10878 }, { "epoch": 0.5, "grad_norm": 0.35520958885797627, "learning_rate": 1.0493788297739831e-05, "loss": 0.2592, "step": 10879 }, { "epoch": 0.5, "grad_norm": 0.8742815399813149, "learning_rate": 1.0492302188134032e-05, "loss": 0.4647, "step": 10880 }, { "epoch": 0.5, "grad_norm": 0.34256995935076606, "learning_rate": 1.049081606762913e-05, "loss": 0.2764, "step": 10881 }, { "epoch": 0.5, "grad_norm": 0.43773143195426606, "learning_rate": 1.0489329936258037e-05, "loss": 0.2789, "step": 10882 }, { "epoch": 0.5, "grad_norm": 0.27470968302460486, "learning_rate": 1.048784379405365e-05, "loss": 0.2099, "step": 10883 }, { "epoch": 0.5, "grad_norm": 0.97604513807605, "learning_rate": 1.0486357641048872e-05, "loss": 0.2962, "step": 10884 }, { "epoch": 0.5, "grad_norm": 0.45804569514200544, "learning_rate": 1.0484871477276605e-05, "loss": 0.2632, "step": 10885 }, { "epoch": 0.5, "grad_norm": 0.3311143080285236, "learning_rate": 1.0483385302769751e-05, "loss": 0.289, "step": 10886 }, { "epoch": 0.5, "grad_norm": 0.8801719538491538, "learning_rate": 1.048189911756121e-05, "loss": 0.5088, "step": 10887 }, { "epoch": 0.5, "grad_norm": 0.35028560257865166, "learning_rate": 1.0480412921683889e-05, "loss": 0.2225, "step": 10888 }, { "epoch": 0.5, "grad_norm": 0.24964343135958217, "learning_rate": 1.0478926715170687e-05, "loss": 0.2261, "step": 10889 }, { "epoch": 0.5, "grad_norm": 1.5628801636401644, "learning_rate": 1.0477440498054512e-05, "loss": 0.5982, "step": 10890 }, { "epoch": 0.5, "grad_norm": 0.3710797622479978, "learning_rate": 1.0475954270368265e-05, "loss": 0.208, "step": 10891 }, { "epoch": 0.5, "grad_norm": 0.8138146731173193, "learning_rate": 1.0474468032144846e-05, "loss": 0.385, "step": 10892 }, { "epoch": 0.5, "grad_norm": 0.421100019065941, "learning_rate": 1.0472981783417162e-05, "loss": 0.303, "step": 10893 }, { "epoch": 0.5, "grad_norm": 0.331172523192777, "learning_rate": 1.0471495524218121e-05, "loss": 0.2163, "step": 10894 }, { "epoch": 0.5, "grad_norm": 0.317756227905293, "learning_rate": 1.0470009254580622e-05, "loss": 0.1833, "step": 10895 }, { "epoch": 0.5, "grad_norm": 0.8821148693157057, "learning_rate": 1.0468522974537567e-05, "loss": 0.4935, "step": 10896 }, { "epoch": 0.5, "grad_norm": 0.3673361027049786, "learning_rate": 1.0467036684121869e-05, "loss": 0.2989, "step": 10897 }, { "epoch": 0.5, "grad_norm": 0.3589798578521269, "learning_rate": 1.046555038336643e-05, "loss": 0.2821, "step": 10898 }, { "epoch": 0.5, "grad_norm": 0.9321374682184632, "learning_rate": 1.0464064072304152e-05, "loss": 0.6071, "step": 10899 }, { "epoch": 0.5, "grad_norm": 0.6341214168393893, "learning_rate": 1.0462577750967941e-05, "loss": 0.3616, "step": 10900 }, { "epoch": 0.5, "grad_norm": 0.27638776912020274, "learning_rate": 1.0461091419390707e-05, "loss": 0.2102, "step": 10901 }, { "epoch": 0.5, "grad_norm": 0.4439847761153678, "learning_rate": 1.0459605077605353e-05, "loss": 0.249, "step": 10902 }, { "epoch": 0.5, "grad_norm": 0.5958962099716135, "learning_rate": 1.0458118725644788e-05, "loss": 0.3686, "step": 10903 }, { "epoch": 0.5, "grad_norm": 0.4035251922515375, "learning_rate": 1.0456632363541913e-05, "loss": 0.2748, "step": 10904 }, { "epoch": 0.5, "grad_norm": 0.4168429011256215, "learning_rate": 1.0455145991329639e-05, "loss": 0.3264, "step": 10905 }, { "epoch": 0.5, "grad_norm": 0.4291690764468666, "learning_rate": 1.0453659609040868e-05, "loss": 0.2955, "step": 10906 }, { "epoch": 0.5, "grad_norm": 0.2757052344906834, "learning_rate": 1.0452173216708515e-05, "loss": 0.1621, "step": 10907 }, { "epoch": 0.5, "grad_norm": 0.7198774767685849, "learning_rate": 1.045068681436548e-05, "loss": 0.4927, "step": 10908 }, { "epoch": 0.5, "grad_norm": 0.3096798546801564, "learning_rate": 1.0449200402044674e-05, "loss": 0.2604, "step": 10909 }, { "epoch": 0.5, "grad_norm": 0.3801091554125602, "learning_rate": 1.0447713979779006e-05, "loss": 0.308, "step": 10910 }, { "epoch": 0.5, "grad_norm": 0.9222941504763218, "learning_rate": 1.0446227547601378e-05, "loss": 0.4719, "step": 10911 }, { "epoch": 0.5, "grad_norm": 0.41731611639871197, "learning_rate": 1.0444741105544705e-05, "loss": 0.2869, "step": 10912 }, { "epoch": 0.5, "grad_norm": 0.4075029196754148, "learning_rate": 1.044325465364189e-05, "loss": 0.3071, "step": 10913 }, { "epoch": 0.5, "grad_norm": 0.3446128564891179, "learning_rate": 1.0441768191925848e-05, "loss": 0.2116, "step": 10914 }, { "epoch": 0.5, "grad_norm": 0.43155647726063834, "learning_rate": 1.044028172042948e-05, "loss": 0.2894, "step": 10915 }, { "epoch": 0.5, "grad_norm": 0.5842143192713741, "learning_rate": 1.0438795239185704e-05, "loss": 0.3502, "step": 10916 }, { "epoch": 0.5, "grad_norm": 0.39970246716336244, "learning_rate": 1.0437308748227419e-05, "loss": 0.2882, "step": 10917 }, { "epoch": 0.5, "grad_norm": 0.6601530977308432, "learning_rate": 1.0435822247587544e-05, "loss": 0.3817, "step": 10918 }, { "epoch": 0.5, "grad_norm": 0.39650530181159, "learning_rate": 1.043433573729898e-05, "loss": 0.3369, "step": 10919 }, { "epoch": 0.5, "grad_norm": 0.37490496063762124, "learning_rate": 1.0432849217394645e-05, "loss": 0.2613, "step": 10920 }, { "epoch": 0.5, "grad_norm": 0.4074541069519762, "learning_rate": 1.0431362687907445e-05, "loss": 0.3016, "step": 10921 }, { "epoch": 0.5, "grad_norm": 0.3254109694058706, "learning_rate": 1.042987614887029e-05, "loss": 0.2461, "step": 10922 }, { "epoch": 0.5, "grad_norm": 1.3503541464467523, "learning_rate": 1.0428389600316092e-05, "loss": 0.7921, "step": 10923 }, { "epoch": 0.5, "grad_norm": 0.30620043898111016, "learning_rate": 1.042690304227776e-05, "loss": 0.1173, "step": 10924 }, { "epoch": 0.5, "grad_norm": 0.33389068950972156, "learning_rate": 1.0425416474788209e-05, "loss": 0.2795, "step": 10925 }, { "epoch": 0.5, "grad_norm": 0.6268606255790703, "learning_rate": 1.0423929897880343e-05, "loss": 0.3996, "step": 10926 }, { "epoch": 0.5, "grad_norm": 0.23315679535593764, "learning_rate": 1.0422443311587083e-05, "loss": 0.1469, "step": 10927 }, { "epoch": 0.5, "grad_norm": 0.533691663205979, "learning_rate": 1.0420956715941331e-05, "loss": 0.3127, "step": 10928 }, { "epoch": 0.5, "grad_norm": 0.34280900553976923, "learning_rate": 1.0419470110976004e-05, "loss": 0.3081, "step": 10929 }, { "epoch": 0.5, "grad_norm": 0.35131805238793096, "learning_rate": 1.0417983496724013e-05, "loss": 0.2102, "step": 10930 }, { "epoch": 0.5, "grad_norm": 0.5464281988934574, "learning_rate": 1.0416496873218271e-05, "loss": 0.339, "step": 10931 }, { "epoch": 0.5, "grad_norm": 0.7383772090603596, "learning_rate": 1.041501024049169e-05, "loss": 0.3921, "step": 10932 }, { "epoch": 0.5, "grad_norm": 0.226172039066383, "learning_rate": 1.0413523598577182e-05, "loss": 0.1876, "step": 10933 }, { "epoch": 0.5, "grad_norm": 0.41271641921042634, "learning_rate": 1.0412036947507658e-05, "loss": 0.2332, "step": 10934 }, { "epoch": 0.5, "grad_norm": 1.0090279042895118, "learning_rate": 1.0410550287316035e-05, "loss": 0.6684, "step": 10935 }, { "epoch": 0.5, "grad_norm": 1.1705944740833938, "learning_rate": 1.0409063618035222e-05, "loss": 0.456, "step": 10936 }, { "epoch": 0.5, "grad_norm": 0.2923442724355534, "learning_rate": 1.040757693969814e-05, "loss": 0.2351, "step": 10937 }, { "epoch": 0.5, "grad_norm": 0.6308654023504193, "learning_rate": 1.0406090252337693e-05, "loss": 0.4298, "step": 10938 }, { "epoch": 0.5, "grad_norm": 0.2833128195985193, "learning_rate": 1.04046035559868e-05, "loss": 0.1634, "step": 10939 }, { "epoch": 0.5, "grad_norm": 0.3617117484426621, "learning_rate": 1.0403116850678376e-05, "loss": 0.2313, "step": 10940 }, { "epoch": 0.5, "grad_norm": 0.4068810820362841, "learning_rate": 1.0401630136445332e-05, "loss": 0.3683, "step": 10941 }, { "epoch": 0.5, "grad_norm": 0.7046554896419595, "learning_rate": 1.0400143413320582e-05, "loss": 0.407, "step": 10942 }, { "epoch": 0.5, "grad_norm": 0.4122696700413633, "learning_rate": 1.0398656681337042e-05, "loss": 0.2582, "step": 10943 }, { "epoch": 0.5, "grad_norm": 1.6393517902238877, "learning_rate": 1.0397169940527632e-05, "loss": 0.783, "step": 10944 }, { "epoch": 0.5, "grad_norm": 0.27306102576747404, "learning_rate": 1.0395683190925261e-05, "loss": 0.2369, "step": 10945 }, { "epoch": 0.5, "grad_norm": 0.23986544086345535, "learning_rate": 1.0394196432562843e-05, "loss": 0.1718, "step": 10946 }, { "epoch": 0.5, "grad_norm": 1.1443658046257628, "learning_rate": 1.0392709665473298e-05, "loss": 0.7575, "step": 10947 }, { "epoch": 0.5, "grad_norm": 0.5790385132234466, "learning_rate": 1.0391222889689538e-05, "loss": 0.3589, "step": 10948 }, { "epoch": 0.5, "grad_norm": 0.3397915777222104, "learning_rate": 1.038973610524448e-05, "loss": 0.2851, "step": 10949 }, { "epoch": 0.5, "grad_norm": 0.4731018438805068, "learning_rate": 1.0388249312171046e-05, "loss": 0.3052, "step": 10950 }, { "epoch": 0.5, "grad_norm": 0.39169263977964364, "learning_rate": 1.0386762510502139e-05, "loss": 0.2089, "step": 10951 }, { "epoch": 0.5, "grad_norm": 0.3149583358027602, "learning_rate": 1.0385275700270688e-05, "loss": 0.2441, "step": 10952 }, { "epoch": 0.5, "grad_norm": 0.37132215698082294, "learning_rate": 1.03837888815096e-05, "loss": 0.286, "step": 10953 }, { "epoch": 0.5, "grad_norm": 0.7762712916439668, "learning_rate": 1.0382302054251799e-05, "loss": 0.4246, "step": 10954 }, { "epoch": 0.5, "grad_norm": 0.3318439167508191, "learning_rate": 1.0380815218530197e-05, "loss": 0.2849, "step": 10955 }, { "epoch": 0.5, "grad_norm": 0.3425402393153413, "learning_rate": 1.0379328374377715e-05, "loss": 0.2391, "step": 10956 }, { "epoch": 0.5, "grad_norm": 0.470377333207402, "learning_rate": 1.0377841521827268e-05, "loss": 0.2374, "step": 10957 }, { "epoch": 0.5, "grad_norm": 0.266832987483294, "learning_rate": 1.0376354660911772e-05, "loss": 0.2101, "step": 10958 }, { "epoch": 0.5, "grad_norm": 0.6719407026251564, "learning_rate": 1.037486779166415e-05, "loss": 0.3738, "step": 10959 }, { "epoch": 0.5, "grad_norm": 0.38071576971336674, "learning_rate": 1.0373380914117313e-05, "loss": 0.3329, "step": 10960 }, { "epoch": 0.5, "grad_norm": 0.346423742973191, "learning_rate": 1.0371894028304184e-05, "loss": 0.2904, "step": 10961 }, { "epoch": 0.5, "grad_norm": 1.3876393021986768, "learning_rate": 1.037040713425768e-05, "loss": 0.7139, "step": 10962 }, { "epoch": 0.5, "grad_norm": 0.30609658202738504, "learning_rate": 1.036892023201072e-05, "loss": 0.1699, "step": 10963 }, { "epoch": 0.5, "grad_norm": 0.38894806507442203, "learning_rate": 1.0367433321596216e-05, "loss": 0.329, "step": 10964 }, { "epoch": 0.5, "grad_norm": 0.41442709369564207, "learning_rate": 1.0365946403047098e-05, "loss": 0.3269, "step": 10965 }, { "epoch": 0.5, "grad_norm": 0.7681640707075716, "learning_rate": 1.0364459476396276e-05, "loss": 0.3248, "step": 10966 }, { "epoch": 0.5, "grad_norm": 0.40340739975557066, "learning_rate": 1.0362972541676678e-05, "loss": 0.2685, "step": 10967 }, { "epoch": 0.5, "grad_norm": 0.35310313404924815, "learning_rate": 1.0361485598921213e-05, "loss": 0.2189, "step": 10968 }, { "epoch": 0.5, "grad_norm": 0.3517614313657591, "learning_rate": 1.0359998648162805e-05, "loss": 0.2094, "step": 10969 }, { "epoch": 0.5, "grad_norm": 0.3343792623918144, "learning_rate": 1.0358511689434376e-05, "loss": 0.2467, "step": 10970 }, { "epoch": 0.5, "grad_norm": 0.8353299690725504, "learning_rate": 1.0357024722768843e-05, "loss": 0.4269, "step": 10971 }, { "epoch": 0.5, "grad_norm": 0.38283065164636604, "learning_rate": 1.0355537748199128e-05, "loss": 0.2629, "step": 10972 }, { "epoch": 0.5, "grad_norm": 0.34676541908543557, "learning_rate": 1.0354050765758148e-05, "loss": 0.2648, "step": 10973 }, { "epoch": 0.5, "grad_norm": 0.46444387032238416, "learning_rate": 1.0352563775478828e-05, "loss": 0.2721, "step": 10974 }, { "epoch": 0.5, "grad_norm": 1.5275244150572005, "learning_rate": 1.0351076777394082e-05, "loss": 0.6063, "step": 10975 }, { "epoch": 0.5, "grad_norm": 0.32751616291954194, "learning_rate": 1.0349589771536836e-05, "loss": 0.2195, "step": 10976 }, { "epoch": 0.5, "grad_norm": 0.395455680560916, "learning_rate": 1.0348102757940008e-05, "loss": 0.3455, "step": 10977 }, { "epoch": 0.5, "grad_norm": 0.8568088916795507, "learning_rate": 1.0346615736636522e-05, "loss": 0.5895, "step": 10978 }, { "epoch": 0.5, "grad_norm": 0.2251148149751252, "learning_rate": 1.03451287076593e-05, "loss": 0.1477, "step": 10979 }, { "epoch": 0.5, "grad_norm": 1.3948912100838498, "learning_rate": 1.0343641671041259e-05, "loss": 0.6137, "step": 10980 }, { "epoch": 0.5, "grad_norm": 0.39548698238259355, "learning_rate": 1.0342154626815321e-05, "loss": 0.3108, "step": 10981 }, { "epoch": 0.5, "grad_norm": 0.30743040559451984, "learning_rate": 1.0340667575014412e-05, "loss": 0.2305, "step": 10982 }, { "epoch": 0.5, "grad_norm": 0.7381738071248749, "learning_rate": 1.0339180515671447e-05, "loss": 0.4809, "step": 10983 }, { "epoch": 0.5, "grad_norm": 0.4072206828153819, "learning_rate": 1.0337693448819357e-05, "loss": 0.3617, "step": 10984 }, { "epoch": 0.5, "grad_norm": 0.33722157496957406, "learning_rate": 1.0336206374491058e-05, "loss": 0.1908, "step": 10985 }, { "epoch": 0.5, "grad_norm": 0.32698856960693284, "learning_rate": 1.0334719292719475e-05, "loss": 0.2035, "step": 10986 }, { "epoch": 0.5, "grad_norm": 0.6667163496608453, "learning_rate": 1.0333232203537528e-05, "loss": 0.4291, "step": 10987 }, { "epoch": 0.5, "grad_norm": 0.439332253060987, "learning_rate": 1.033174510697814e-05, "loss": 0.2794, "step": 10988 }, { "epoch": 0.5, "grad_norm": 0.36037835896217024, "learning_rate": 1.0330258003074238e-05, "loss": 0.2735, "step": 10989 }, { "epoch": 0.5, "grad_norm": 1.0991663982993012, "learning_rate": 1.0328770891858739e-05, "loss": 0.6658, "step": 10990 }, { "epoch": 0.5, "grad_norm": 0.3641591490136669, "learning_rate": 1.0327283773364571e-05, "loss": 0.2472, "step": 10991 }, { "epoch": 0.5, "grad_norm": 0.23437937545218776, "learning_rate": 1.0325796647624655e-05, "loss": 0.1696, "step": 10992 }, { "epoch": 0.51, "grad_norm": 1.0369622737034812, "learning_rate": 1.0324309514671918e-05, "loss": 0.4836, "step": 10993 }, { "epoch": 0.51, "grad_norm": 0.3763981052147931, "learning_rate": 1.0322822374539276e-05, "loss": 0.2859, "step": 10994 }, { "epoch": 0.51, "grad_norm": 0.6915443796131604, "learning_rate": 1.0321335227259661e-05, "loss": 0.3736, "step": 10995 }, { "epoch": 0.51, "grad_norm": 0.46033592393109823, "learning_rate": 1.0319848072865993e-05, "loss": 0.3273, "step": 10996 }, { "epoch": 0.51, "grad_norm": 0.33996324283685775, "learning_rate": 1.0318360911391198e-05, "loss": 0.2416, "step": 10997 }, { "epoch": 0.51, "grad_norm": 0.21218199318159608, "learning_rate": 1.0316873742868199e-05, "loss": 0.0906, "step": 10998 }, { "epoch": 0.51, "grad_norm": 0.6553244582405582, "learning_rate": 1.0315386567329921e-05, "loss": 0.4053, "step": 10999 }, { "epoch": 0.51, "grad_norm": 0.29094045317347333, "learning_rate": 1.0313899384809286e-05, "loss": 0.2502, "step": 11000 }, { "epoch": 0.51, "grad_norm": 0.5164140744715733, "learning_rate": 1.0312412195339222e-05, "loss": 0.3466, "step": 11001 }, { "epoch": 0.51, "grad_norm": 0.8544883586570818, "learning_rate": 1.0310924998952655e-05, "loss": 0.4663, "step": 11002 }, { "epoch": 0.51, "grad_norm": 0.44703310529145185, "learning_rate": 1.030943779568251e-05, "loss": 0.3094, "step": 11003 }, { "epoch": 0.51, "grad_norm": 0.2588407630012365, "learning_rate": 1.0307950585561705e-05, "loss": 0.2127, "step": 11004 }, { "epoch": 0.51, "grad_norm": 0.4794553487699966, "learning_rate": 1.0306463368623174e-05, "loss": 0.313, "step": 11005 }, { "epoch": 0.51, "grad_norm": 0.4575444882011544, "learning_rate": 1.0304976144899839e-05, "loss": 0.2408, "step": 11006 }, { "epoch": 0.51, "grad_norm": 0.5148704658342735, "learning_rate": 1.0303488914424624e-05, "loss": 0.3921, "step": 11007 }, { "epoch": 0.51, "grad_norm": 0.3635600628818487, "learning_rate": 1.030200167723046e-05, "loss": 0.2548, "step": 11008 }, { "epoch": 0.51, "grad_norm": 0.39388683648969564, "learning_rate": 1.0300514433350268e-05, "loss": 0.2533, "step": 11009 }, { "epoch": 0.51, "grad_norm": 0.2681368570225848, "learning_rate": 1.0299027182816979e-05, "loss": 0.201, "step": 11010 }, { "epoch": 0.51, "grad_norm": 0.6084317703029285, "learning_rate": 1.0297539925663511e-05, "loss": 0.4504, "step": 11011 }, { "epoch": 0.51, "grad_norm": 0.2900179962026342, "learning_rate": 1.0296052661922799e-05, "loss": 0.2168, "step": 11012 }, { "epoch": 0.51, "grad_norm": 0.7121251470426043, "learning_rate": 1.0294565391627766e-05, "loss": 0.4035, "step": 11013 }, { "epoch": 0.51, "grad_norm": 1.3765180809227844, "learning_rate": 1.0293078114811341e-05, "loss": 0.7935, "step": 11014 }, { "epoch": 0.51, "grad_norm": 0.3028412415396035, "learning_rate": 1.0291590831506448e-05, "loss": 0.1932, "step": 11015 }, { "epoch": 0.51, "grad_norm": 0.505283728868044, "learning_rate": 1.0290103541746015e-05, "loss": 0.3679, "step": 11016 }, { "epoch": 0.51, "grad_norm": 0.38772457846229813, "learning_rate": 1.028861624556297e-05, "loss": 0.2904, "step": 11017 }, { "epoch": 0.51, "grad_norm": 0.24977925955522015, "learning_rate": 1.0287128942990237e-05, "loss": 0.1733, "step": 11018 }, { "epoch": 0.51, "grad_norm": 1.1362177076012334, "learning_rate": 1.0285641634060745e-05, "loss": 0.6657, "step": 11019 }, { "epoch": 0.51, "grad_norm": 0.39449784649860237, "learning_rate": 1.0284154318807426e-05, "loss": 0.3218, "step": 11020 }, { "epoch": 0.51, "grad_norm": 0.34483865933176594, "learning_rate": 1.0282666997263205e-05, "loss": 0.071, "step": 11021 }, { "epoch": 0.51, "grad_norm": 0.39267986100505314, "learning_rate": 1.0281179669461006e-05, "loss": 0.3304, "step": 11022 }, { "epoch": 0.51, "grad_norm": 0.28735006796092893, "learning_rate": 1.0279692335433762e-05, "loss": 0.2425, "step": 11023 }, { "epoch": 0.51, "grad_norm": 0.39880100407061914, "learning_rate": 1.0278204995214396e-05, "loss": 0.2707, "step": 11024 }, { "epoch": 0.51, "grad_norm": 0.3841482063529238, "learning_rate": 1.0276717648835843e-05, "loss": 0.253, "step": 11025 }, { "epoch": 0.51, "grad_norm": 1.1661155048610288, "learning_rate": 1.0275230296331027e-05, "loss": 0.6591, "step": 11026 }, { "epoch": 0.51, "grad_norm": 0.5803760305081045, "learning_rate": 1.0273742937732877e-05, "loss": 0.3469, "step": 11027 }, { "epoch": 0.51, "grad_norm": 0.29909570471365665, "learning_rate": 1.0272255573074323e-05, "loss": 0.255, "step": 11028 }, { "epoch": 0.51, "grad_norm": 0.8090921312617481, "learning_rate": 1.0270768202388293e-05, "loss": 0.481, "step": 11029 }, { "epoch": 0.51, "grad_norm": 0.23083177159951437, "learning_rate": 1.0269280825707714e-05, "loss": 0.1357, "step": 11030 }, { "epoch": 0.51, "grad_norm": 0.40498719838244374, "learning_rate": 1.0267793443065519e-05, "loss": 0.276, "step": 11031 }, { "epoch": 0.51, "grad_norm": 0.3999375361250701, "learning_rate": 1.0266306054494637e-05, "loss": 0.3144, "step": 11032 }, { "epoch": 0.51, "grad_norm": 0.4111834757576695, "learning_rate": 1.0264818660027993e-05, "loss": 0.29, "step": 11033 }, { "epoch": 0.51, "grad_norm": 0.4987383428746754, "learning_rate": 1.0263331259698521e-05, "loss": 0.2901, "step": 11034 }, { "epoch": 0.51, "grad_norm": 0.2792968209507697, "learning_rate": 1.0261843853539146e-05, "loss": 0.169, "step": 11035 }, { "epoch": 0.51, "grad_norm": 0.41905061574570557, "learning_rate": 1.0260356441582801e-05, "loss": 0.2442, "step": 11036 }, { "epoch": 0.51, "grad_norm": 0.5643798811830548, "learning_rate": 1.0258869023862417e-05, "loss": 0.3793, "step": 11037 }, { "epoch": 0.51, "grad_norm": 0.7166090505061682, "learning_rate": 1.025738160041092e-05, "loss": 0.3767, "step": 11038 }, { "epoch": 0.51, "grad_norm": 0.5682703194037347, "learning_rate": 1.0255894171261244e-05, "loss": 0.3431, "step": 11039 }, { "epoch": 0.51, "grad_norm": 0.29190235193587893, "learning_rate": 1.025440673644632e-05, "loss": 0.2632, "step": 11040 }, { "epoch": 0.51, "grad_norm": 0.36187728275762626, "learning_rate": 1.025291929599907e-05, "loss": 0.1938, "step": 11041 }, { "epoch": 0.51, "grad_norm": 0.39440781236173883, "learning_rate": 1.0251431849952436e-05, "loss": 0.2241, "step": 11042 }, { "epoch": 0.51, "grad_norm": 0.3762656525175716, "learning_rate": 1.024994439833934e-05, "loss": 0.2801, "step": 11043 }, { "epoch": 0.51, "grad_norm": 0.38880791323659164, "learning_rate": 1.0248456941192721e-05, "loss": 0.2843, "step": 11044 }, { "epoch": 0.51, "grad_norm": 0.569461206901041, "learning_rate": 1.02469694785455e-05, "loss": 0.3005, "step": 11045 }, { "epoch": 0.51, "grad_norm": 0.3820406078797207, "learning_rate": 1.0245482010430614e-05, "loss": 0.3164, "step": 11046 }, { "epoch": 0.51, "grad_norm": 0.9472376581983436, "learning_rate": 1.0243994536880992e-05, "loss": 0.2226, "step": 11047 }, { "epoch": 0.51, "grad_norm": 0.26675873590984084, "learning_rate": 1.0242507057929567e-05, "loss": 0.2244, "step": 11048 }, { "epoch": 0.51, "grad_norm": 0.4331316144123683, "learning_rate": 1.024101957360927e-05, "loss": 0.3305, "step": 11049 }, { "epoch": 0.51, "grad_norm": 0.9469171435862428, "learning_rate": 1.0239532083953032e-05, "loss": 0.4979, "step": 11050 }, { "epoch": 0.51, "grad_norm": 0.29055961217834547, "learning_rate": 1.0238044588993785e-05, "loss": 0.1972, "step": 11051 }, { "epoch": 0.51, "grad_norm": 0.3428076247520623, "learning_rate": 1.0236557088764455e-05, "loss": 0.2816, "step": 11052 }, { "epoch": 0.51, "grad_norm": 1.1127425737648, "learning_rate": 1.0235069583297985e-05, "loss": 0.6239, "step": 11053 }, { "epoch": 0.51, "grad_norm": 0.2687023460359806, "learning_rate": 1.0233582072627297e-05, "loss": 0.1561, "step": 11054 }, { "epoch": 0.51, "grad_norm": 0.607335502556524, "learning_rate": 1.023209455678533e-05, "loss": 0.3873, "step": 11055 }, { "epoch": 0.51, "grad_norm": 0.46210042673315166, "learning_rate": 1.0230607035805013e-05, "loss": 0.3126, "step": 11056 }, { "epoch": 0.51, "grad_norm": 0.45564722924557644, "learning_rate": 1.0229119509719278e-05, "loss": 0.1828, "step": 11057 }, { "epoch": 0.51, "grad_norm": 0.41967665158636935, "learning_rate": 1.0227631978561057e-05, "loss": 0.3054, "step": 11058 }, { "epoch": 0.51, "grad_norm": 0.5031241223818567, "learning_rate": 1.0226144442363286e-05, "loss": 0.37, "step": 11059 }, { "epoch": 0.51, "grad_norm": 0.3893517353524682, "learning_rate": 1.0224656901158891e-05, "loss": 0.2362, "step": 11060 }, { "epoch": 0.51, "grad_norm": 0.37879346296714017, "learning_rate": 1.0223169354980811e-05, "loss": 0.2803, "step": 11061 }, { "epoch": 0.51, "grad_norm": 0.5394255934981825, "learning_rate": 1.022168180386198e-05, "loss": 0.4137, "step": 11062 }, { "epoch": 0.51, "grad_norm": 0.2870044177986189, "learning_rate": 1.0220194247835323e-05, "loss": 0.1972, "step": 11063 }, { "epoch": 0.51, "grad_norm": 0.27755654435699006, "learning_rate": 1.0218706686933778e-05, "loss": 0.2135, "step": 11064 }, { "epoch": 0.51, "grad_norm": 1.2609809748743745, "learning_rate": 1.0217219121190275e-05, "loss": 0.5953, "step": 11065 }, { "epoch": 0.51, "grad_norm": 0.745515201422701, "learning_rate": 1.0215731550637755e-05, "loss": 0.4625, "step": 11066 }, { "epoch": 0.51, "grad_norm": 0.28242285496201786, "learning_rate": 1.0214243975309145e-05, "loss": 0.244, "step": 11067 }, { "epoch": 0.51, "grad_norm": 0.4765595891423058, "learning_rate": 1.0212756395237382e-05, "loss": 0.3436, "step": 11068 }, { "epoch": 0.51, "grad_norm": 0.2670918624973917, "learning_rate": 1.0211268810455392e-05, "loss": 0.1784, "step": 11069 }, { "epoch": 0.51, "grad_norm": 0.4106546836890868, "learning_rate": 1.0209781220996118e-05, "loss": 0.2257, "step": 11070 }, { "epoch": 0.51, "grad_norm": 0.4893760740504242, "learning_rate": 1.0208293626892489e-05, "loss": 0.3352, "step": 11071 }, { "epoch": 0.51, "grad_norm": 0.4558216031942054, "learning_rate": 1.020680602817744e-05, "loss": 0.3545, "step": 11072 }, { "epoch": 0.51, "grad_norm": 0.3314692422293178, "learning_rate": 1.0205318424883906e-05, "loss": 0.2297, "step": 11073 }, { "epoch": 0.51, "grad_norm": 0.5926536833698635, "learning_rate": 1.0203830817044819e-05, "loss": 0.457, "step": 11074 }, { "epoch": 0.51, "grad_norm": 0.27225545356059627, "learning_rate": 1.0202343204693113e-05, "loss": 0.2077, "step": 11075 }, { "epoch": 0.51, "grad_norm": 0.31931900465639884, "learning_rate": 1.0200855587861724e-05, "loss": 0.2819, "step": 11076 }, { "epoch": 0.51, "grad_norm": 0.31775347338410187, "learning_rate": 1.0199367966583586e-05, "loss": 0.1046, "step": 11077 }, { "epoch": 0.51, "grad_norm": 0.7086257884375718, "learning_rate": 1.0197880340891633e-05, "loss": 0.4505, "step": 11078 }, { "epoch": 0.51, "grad_norm": 0.3533880149938981, "learning_rate": 1.0196392710818802e-05, "loss": 0.2954, "step": 11079 }, { "epoch": 0.51, "grad_norm": 0.4068241679258334, "learning_rate": 1.0194905076398025e-05, "loss": 0.2945, "step": 11080 }, { "epoch": 0.51, "grad_norm": 0.4413309684829476, "learning_rate": 1.0193417437662238e-05, "loss": 0.2144, "step": 11081 }, { "epoch": 0.51, "grad_norm": 0.28626933198560534, "learning_rate": 1.0191929794644374e-05, "loss": 0.2145, "step": 11082 }, { "epoch": 0.51, "grad_norm": 0.47148906328932133, "learning_rate": 1.0190442147377368e-05, "loss": 0.2696, "step": 11083 }, { "epoch": 0.51, "grad_norm": 0.4398633553057758, "learning_rate": 1.0188954495894156e-05, "loss": 0.356, "step": 11084 }, { "epoch": 0.51, "grad_norm": 0.3605489273302093, "learning_rate": 1.018746684022768e-05, "loss": 0.269, "step": 11085 }, { "epoch": 0.51, "grad_norm": 2.0889720699607484, "learning_rate": 1.0185979180410862e-05, "loss": 0.5646, "step": 11086 }, { "epoch": 0.51, "grad_norm": 0.3720447660940607, "learning_rate": 1.0184491516476646e-05, "loss": 0.2983, "step": 11087 }, { "epoch": 0.51, "grad_norm": 0.2969989919841698, "learning_rate": 1.0183003848457967e-05, "loss": 0.2161, "step": 11088 }, { "epoch": 0.51, "grad_norm": 0.47486043090329283, "learning_rate": 1.0181516176387758e-05, "loss": 0.2681, "step": 11089 }, { "epoch": 0.51, "grad_norm": 0.5689962329665976, "learning_rate": 1.0180028500298956e-05, "loss": 0.3177, "step": 11090 }, { "epoch": 0.51, "grad_norm": 0.3786057278409134, "learning_rate": 1.0178540820224499e-05, "loss": 0.2877, "step": 11091 }, { "epoch": 0.51, "grad_norm": 0.3728506807663648, "learning_rate": 1.0177053136197317e-05, "loss": 0.324, "step": 11092 }, { "epoch": 0.51, "grad_norm": 0.26833464584916766, "learning_rate": 1.0175565448250348e-05, "loss": 0.1377, "step": 11093 }, { "epoch": 0.51, "grad_norm": 0.3663347916132832, "learning_rate": 1.0174077756416531e-05, "loss": 0.2888, "step": 11094 }, { "epoch": 0.51, "grad_norm": 0.32273087605780454, "learning_rate": 1.01725900607288e-05, "loss": 0.2327, "step": 11095 }, { "epoch": 0.51, "grad_norm": 0.7003760566807062, "learning_rate": 1.0171102361220093e-05, "loss": 0.3337, "step": 11096 }, { "epoch": 0.51, "grad_norm": 0.34411071652747965, "learning_rate": 1.0169614657923347e-05, "loss": 0.2546, "step": 11097 }, { "epoch": 0.51, "grad_norm": 1.318305182181283, "learning_rate": 1.016812695087149e-05, "loss": 0.8254, "step": 11098 }, { "epoch": 0.51, "grad_norm": 0.40275534848577, "learning_rate": 1.0166639240097467e-05, "loss": 0.2597, "step": 11099 }, { "epoch": 0.51, "grad_norm": 0.26243218270302904, "learning_rate": 1.0165151525634212e-05, "loss": 0.204, "step": 11100 }, { "epoch": 0.51, "grad_norm": 0.46647970031651376, "learning_rate": 1.0163663807514658e-05, "loss": 0.2833, "step": 11101 }, { "epoch": 0.51, "grad_norm": 1.2420320391880806, "learning_rate": 1.016217608577175e-05, "loss": 0.4895, "step": 11102 }, { "epoch": 0.51, "grad_norm": 0.305149182704416, "learning_rate": 1.016068836043842e-05, "loss": 0.228, "step": 11103 }, { "epoch": 0.51, "grad_norm": 0.5353289828393174, "learning_rate": 1.01592006315476e-05, "loss": 0.4009, "step": 11104 }, { "epoch": 0.51, "grad_norm": 0.9313808310150433, "learning_rate": 1.0157712899132235e-05, "loss": 0.5912, "step": 11105 }, { "epoch": 0.51, "grad_norm": 0.2734611193126308, "learning_rate": 1.0156225163225258e-05, "loss": 0.1553, "step": 11106 }, { "epoch": 0.51, "grad_norm": 0.30535790167524024, "learning_rate": 1.0154737423859606e-05, "loss": 0.2667, "step": 11107 }, { "epoch": 0.51, "grad_norm": 0.835876974205203, "learning_rate": 1.0153249681068216e-05, "loss": 0.4347, "step": 11108 }, { "epoch": 0.51, "grad_norm": 0.3283353816686232, "learning_rate": 1.0151761934884028e-05, "loss": 0.1955, "step": 11109 }, { "epoch": 0.51, "grad_norm": 1.2255012875432325, "learning_rate": 1.0150274185339974e-05, "loss": 0.8265, "step": 11110 }, { "epoch": 0.51, "grad_norm": 0.37852485985237627, "learning_rate": 1.0148786432468995e-05, "loss": 0.3051, "step": 11111 }, { "epoch": 0.51, "grad_norm": 0.3533096269035579, "learning_rate": 1.0147298676304027e-05, "loss": 0.1886, "step": 11112 }, { "epoch": 0.51, "grad_norm": 0.9719589734282299, "learning_rate": 1.0145810916878011e-05, "loss": 0.4137, "step": 11113 }, { "epoch": 0.51, "grad_norm": 0.3110445773409582, "learning_rate": 1.0144323154223881e-05, "loss": 0.2175, "step": 11114 }, { "epoch": 0.51, "grad_norm": 0.3448933787285444, "learning_rate": 1.0142835388374577e-05, "loss": 0.2828, "step": 11115 }, { "epoch": 0.51, "grad_norm": 0.3931999193714425, "learning_rate": 1.0141347619363031e-05, "loss": 0.2775, "step": 11116 }, { "epoch": 0.51, "grad_norm": 1.5693789764760682, "learning_rate": 1.0139859847222188e-05, "loss": 0.821, "step": 11117 }, { "epoch": 0.51, "grad_norm": 0.46335901055483497, "learning_rate": 1.0138372071984981e-05, "loss": 0.2697, "step": 11118 }, { "epoch": 0.51, "grad_norm": 0.5613642350450226, "learning_rate": 1.013688429368435e-05, "loss": 0.2838, "step": 11119 }, { "epoch": 0.51, "grad_norm": 0.30490186720772, "learning_rate": 1.0135396512353235e-05, "loss": 0.202, "step": 11120 }, { "epoch": 0.51, "grad_norm": 0.337587792984933, "learning_rate": 1.013390872802457e-05, "loss": 0.265, "step": 11121 }, { "epoch": 0.51, "grad_norm": 0.887161139634556, "learning_rate": 1.0132420940731296e-05, "loss": 0.5464, "step": 11122 }, { "epoch": 0.51, "grad_norm": 0.37721152658395274, "learning_rate": 1.0130933150506345e-05, "loss": 0.3038, "step": 11123 }, { "epoch": 0.51, "grad_norm": 0.3546766963781115, "learning_rate": 1.0129445357382665e-05, "loss": 0.2647, "step": 11124 }, { "epoch": 0.51, "grad_norm": 0.6832454112668832, "learning_rate": 1.012795756139319e-05, "loss": 0.3235, "step": 11125 }, { "epoch": 0.51, "grad_norm": 0.27656822087422844, "learning_rate": 1.0126469762570856e-05, "loss": 0.209, "step": 11126 }, { "epoch": 0.51, "grad_norm": 0.43252051357957694, "learning_rate": 1.0124981960948603e-05, "loss": 0.2931, "step": 11127 }, { "epoch": 0.51, "grad_norm": 0.4455438810013382, "learning_rate": 1.0123494156559372e-05, "loss": 0.3339, "step": 11128 }, { "epoch": 0.51, "grad_norm": 0.8139513224632986, "learning_rate": 1.0122006349436097e-05, "loss": 0.3379, "step": 11129 }, { "epoch": 0.51, "grad_norm": 0.3782009746274829, "learning_rate": 1.012051853961172e-05, "loss": 0.2403, "step": 11130 }, { "epoch": 0.51, "grad_norm": 0.3451349101496271, "learning_rate": 1.0119030727119177e-05, "loss": 0.3159, "step": 11131 }, { "epoch": 0.51, "grad_norm": 0.16693877286038192, "learning_rate": 1.0117542911991414e-05, "loss": 0.0735, "step": 11132 }, { "epoch": 0.51, "grad_norm": 0.4084853527808433, "learning_rate": 1.0116055094261358e-05, "loss": 0.2923, "step": 11133 }, { "epoch": 0.51, "grad_norm": 0.5909122892454841, "learning_rate": 1.0114567273961957e-05, "loss": 0.3923, "step": 11134 }, { "epoch": 0.51, "grad_norm": 0.43637866951388304, "learning_rate": 1.0113079451126147e-05, "loss": 0.2731, "step": 11135 }, { "epoch": 0.51, "grad_norm": 0.45436883259395205, "learning_rate": 1.0111591625786866e-05, "loss": 0.3079, "step": 11136 }, { "epoch": 0.51, "grad_norm": 0.5848877178866841, "learning_rate": 1.0110103797977056e-05, "loss": 0.3894, "step": 11137 }, { "epoch": 0.51, "grad_norm": 0.28410091510233076, "learning_rate": 1.0108615967729651e-05, "loss": 0.1863, "step": 11138 }, { "epoch": 0.51, "grad_norm": 0.2774330184663932, "learning_rate": 1.0107128135077594e-05, "loss": 0.2257, "step": 11139 }, { "epoch": 0.51, "grad_norm": 1.297269609552035, "learning_rate": 1.0105640300053825e-05, "loss": 0.8584, "step": 11140 }, { "epoch": 0.51, "grad_norm": 0.6296851065967783, "learning_rate": 1.010415246269128e-05, "loss": 0.4334, "step": 11141 }, { "epoch": 0.51, "grad_norm": 0.38546578461733194, "learning_rate": 1.01026646230229e-05, "loss": 0.227, "step": 11142 }, { "epoch": 0.51, "grad_norm": 0.36798934964874497, "learning_rate": 1.0101176781081625e-05, "loss": 0.3093, "step": 11143 }, { "epoch": 0.51, "grad_norm": 0.44068530217699664, "learning_rate": 1.0099688936900393e-05, "loss": 0.2462, "step": 11144 }, { "epoch": 0.51, "grad_norm": 0.350311681114434, "learning_rate": 1.0098201090512145e-05, "loss": 0.1592, "step": 11145 }, { "epoch": 0.51, "grad_norm": 0.42932559713064206, "learning_rate": 1.0096713241949818e-05, "loss": 0.3418, "step": 11146 }, { "epoch": 0.51, "grad_norm": 0.35953925831838174, "learning_rate": 1.0095225391246353e-05, "loss": 0.3393, "step": 11147 }, { "epoch": 0.51, "grad_norm": 0.20626637457769326, "learning_rate": 1.009373753843469e-05, "loss": 0.0922, "step": 11148 }, { "epoch": 0.51, "grad_norm": 0.4588956955044267, "learning_rate": 1.0092249683547767e-05, "loss": 0.2919, "step": 11149 }, { "epoch": 0.51, "grad_norm": 0.34776968576124057, "learning_rate": 1.009076182661853e-05, "loss": 0.2481, "step": 11150 }, { "epoch": 0.51, "grad_norm": 0.4035561576114649, "learning_rate": 1.0089273967679908e-05, "loss": 0.2858, "step": 11151 }, { "epoch": 0.51, "grad_norm": 0.3527852687609695, "learning_rate": 1.0087786106764849e-05, "loss": 0.2759, "step": 11152 }, { "epoch": 0.51, "grad_norm": 0.7045963099686833, "learning_rate": 1.008629824390629e-05, "loss": 0.4592, "step": 11153 }, { "epoch": 0.51, "grad_norm": 0.4065355394552864, "learning_rate": 1.0084810379137171e-05, "loss": 0.213, "step": 11154 }, { "epoch": 0.51, "grad_norm": 0.3352115365960826, "learning_rate": 1.008332251249043e-05, "loss": 0.2497, "step": 11155 }, { "epoch": 0.51, "grad_norm": 1.0664716385509445, "learning_rate": 1.0081834643999013e-05, "loss": 0.554, "step": 11156 }, { "epoch": 0.51, "grad_norm": 0.5800152037704983, "learning_rate": 1.0080346773695852e-05, "loss": 0.3827, "step": 11157 }, { "epoch": 0.51, "grad_norm": 0.42900808729035916, "learning_rate": 1.0078858901613893e-05, "loss": 0.2614, "step": 11158 }, { "epoch": 0.51, "grad_norm": 0.37349791529218285, "learning_rate": 1.0077371027786072e-05, "loss": 0.3323, "step": 11159 }, { "epoch": 0.51, "grad_norm": 0.24955603822488587, "learning_rate": 1.0075883152245334e-05, "loss": 0.1691, "step": 11160 }, { "epoch": 0.51, "grad_norm": 0.4946665323779758, "learning_rate": 1.0074395275024613e-05, "loss": 0.2561, "step": 11161 }, { "epoch": 0.51, "grad_norm": 0.3476103384693661, "learning_rate": 1.0072907396156854e-05, "loss": 0.3094, "step": 11162 }, { "epoch": 0.51, "grad_norm": 0.8422198629867995, "learning_rate": 1.0071419515674997e-05, "loss": 0.4948, "step": 11163 }, { "epoch": 0.51, "grad_norm": 0.3729865894270189, "learning_rate": 1.0069931633611978e-05, "loss": 0.2891, "step": 11164 }, { "epoch": 0.51, "grad_norm": 0.2972319733650828, "learning_rate": 1.006844375000074e-05, "loss": 0.1808, "step": 11165 }, { "epoch": 0.51, "grad_norm": 0.5009550121719311, "learning_rate": 1.0066955864874223e-05, "loss": 0.322, "step": 11166 }, { "epoch": 0.51, "grad_norm": 0.3493637813342311, "learning_rate": 1.0065467978265371e-05, "loss": 0.2818, "step": 11167 }, { "epoch": 0.51, "grad_norm": 1.0220257264761619, "learning_rate": 1.0063980090207119e-05, "loss": 0.3339, "step": 11168 }, { "epoch": 0.51, "grad_norm": 0.5615986811009043, "learning_rate": 1.0062492200732413e-05, "loss": 0.3697, "step": 11169 }, { "epoch": 0.51, "grad_norm": 0.3455543214289564, "learning_rate": 1.0061004309874183e-05, "loss": 0.2905, "step": 11170 }, { "epoch": 0.51, "grad_norm": 0.3423145742822945, "learning_rate": 1.005951641766538e-05, "loss": 0.2574, "step": 11171 }, { "epoch": 0.51, "grad_norm": 0.2393818136195504, "learning_rate": 1.005802852413894e-05, "loss": 0.1509, "step": 11172 }, { "epoch": 0.51, "grad_norm": 0.4073349823978717, "learning_rate": 1.0056540629327804e-05, "loss": 0.2843, "step": 11173 }, { "epoch": 0.51, "grad_norm": 0.47295796194697265, "learning_rate": 1.0055052733264916e-05, "loss": 0.258, "step": 11174 }, { "epoch": 0.51, "grad_norm": 0.44165954125187035, "learning_rate": 1.0053564835983212e-05, "loss": 0.3051, "step": 11175 }, { "epoch": 0.51, "grad_norm": 0.43851128554636953, "learning_rate": 1.0052076937515633e-05, "loss": 0.3454, "step": 11176 }, { "epoch": 0.51, "grad_norm": 0.8199366589898711, "learning_rate": 1.0050589037895122e-05, "loss": 0.523, "step": 11177 }, { "epoch": 0.51, "grad_norm": 0.2733779867214866, "learning_rate": 1.0049101137154617e-05, "loss": 0.1903, "step": 11178 }, { "epoch": 0.51, "grad_norm": 0.32619786994552724, "learning_rate": 1.0047613235327063e-05, "loss": 0.2509, "step": 11179 }, { "epoch": 0.51, "grad_norm": 0.8350626947716103, "learning_rate": 1.0046125332445396e-05, "loss": 0.4659, "step": 11180 }, { "epoch": 0.51, "grad_norm": 0.6936321210038078, "learning_rate": 1.0044637428542559e-05, "loss": 0.2969, "step": 11181 }, { "epoch": 0.51, "grad_norm": 0.36990743731921427, "learning_rate": 1.0043149523651492e-05, "loss": 0.3158, "step": 11182 }, { "epoch": 0.51, "grad_norm": 0.4087959050685897, "learning_rate": 1.0041661617805134e-05, "loss": 0.3317, "step": 11183 }, { "epoch": 0.51, "grad_norm": 0.1922514543858804, "learning_rate": 1.0040173711036431e-05, "loss": 0.0833, "step": 11184 }, { "epoch": 0.51, "grad_norm": 0.3759150054440568, "learning_rate": 1.0038685803378321e-05, "loss": 0.2984, "step": 11185 }, { "epoch": 0.51, "grad_norm": 0.39279462500015266, "learning_rate": 1.0037197894863744e-05, "loss": 0.3286, "step": 11186 }, { "epoch": 0.51, "grad_norm": 0.7265277052619795, "learning_rate": 1.0035709985525639e-05, "loss": 0.2795, "step": 11187 }, { "epoch": 0.51, "grad_norm": 0.3871245017582081, "learning_rate": 1.0034222075396954e-05, "loss": 0.2785, "step": 11188 }, { "epoch": 0.51, "grad_norm": 1.3284169911656147, "learning_rate": 1.003273416451062e-05, "loss": 0.7689, "step": 11189 }, { "epoch": 0.51, "grad_norm": 0.30639494721357774, "learning_rate": 1.0031246252899585e-05, "loss": 0.2726, "step": 11190 }, { "epoch": 0.51, "grad_norm": 0.265589377946439, "learning_rate": 1.002975834059679e-05, "loss": 0.1709, "step": 11191 }, { "epoch": 0.51, "grad_norm": 0.7955172943110976, "learning_rate": 1.0028270427635175e-05, "loss": 0.4264, "step": 11192 }, { "epoch": 0.51, "grad_norm": 0.6993300931080364, "learning_rate": 1.0026782514047675e-05, "loss": 0.4032, "step": 11193 }, { "epoch": 0.51, "grad_norm": 0.3412634157856217, "learning_rate": 1.002529459986724e-05, "loss": 0.2158, "step": 11194 }, { "epoch": 0.51, "grad_norm": 0.3758819272844595, "learning_rate": 1.0023806685126803e-05, "loss": 0.3144, "step": 11195 }, { "epoch": 0.51, "grad_norm": 0.23917139646982116, "learning_rate": 1.0022318769859311e-05, "loss": 0.1312, "step": 11196 }, { "epoch": 0.51, "grad_norm": 0.33747696853205955, "learning_rate": 1.0020830854097708e-05, "loss": 0.1998, "step": 11197 }, { "epoch": 0.51, "grad_norm": 0.46826489130353466, "learning_rate": 1.0019342937874923e-05, "loss": 0.3178, "step": 11198 }, { "epoch": 0.51, "grad_norm": 0.9646338159483399, "learning_rate": 1.0017855021223908e-05, "loss": 0.4758, "step": 11199 }, { "epoch": 0.51, "grad_norm": 0.29606727463378635, "learning_rate": 1.0016367104177596e-05, "loss": 0.202, "step": 11200 }, { "epoch": 0.51, "grad_norm": 1.0422383555108723, "learning_rate": 1.0014879186768936e-05, "loss": 0.6075, "step": 11201 }, { "epoch": 0.51, "grad_norm": 0.36708381040153193, "learning_rate": 1.0013391269030863e-05, "loss": 0.3222, "step": 11202 }, { "epoch": 0.51, "grad_norm": 0.2870291472069312, "learning_rate": 1.0011903350996321e-05, "loss": 0.2151, "step": 11203 }, { "epoch": 0.51, "grad_norm": 0.5161731641855376, "learning_rate": 1.001041543269825e-05, "loss": 0.2559, "step": 11204 }, { "epoch": 0.51, "grad_norm": 1.387145976415137, "learning_rate": 1.0008927514169593e-05, "loss": 0.7648, "step": 11205 }, { "epoch": 0.51, "grad_norm": 0.2941844415284564, "learning_rate": 1.0007439595443284e-05, "loss": 0.2497, "step": 11206 }, { "epoch": 0.51, "grad_norm": 0.45092554426041004, "learning_rate": 1.0005951676552277e-05, "loss": 0.2796, "step": 11207 }, { "epoch": 0.51, "grad_norm": 0.7936677439200935, "learning_rate": 1.0004463757529501e-05, "loss": 0.4903, "step": 11208 }, { "epoch": 0.51, "grad_norm": 0.33338521114220404, "learning_rate": 1.0002975838407904e-05, "loss": 0.2656, "step": 11209 }, { "epoch": 0.51, "grad_norm": 0.3685438092494757, "learning_rate": 1.0001487919220422e-05, "loss": 0.2723, "step": 11210 }, { "epoch": 0.52, "grad_norm": 0.2971943943916271, "learning_rate": 1e-05, "loss": 0.1866, "step": 11211 }, { "epoch": 0.52, "grad_norm": 0.33473866336032776, "learning_rate": 9.998512080779581e-06, "loss": 0.2558, "step": 11212 }, { "epoch": 0.52, "grad_norm": 0.9026448678234319, "learning_rate": 9.9970241615921e-06, "loss": 0.3847, "step": 11213 }, { "epoch": 0.52, "grad_norm": 0.3277416205435476, "learning_rate": 9.9955362424705e-06, "loss": 0.2882, "step": 11214 }, { "epoch": 0.52, "grad_norm": 0.3722278351398582, "learning_rate": 9.994048323447728e-06, "loss": 0.2672, "step": 11215 }, { "epoch": 0.52, "grad_norm": 0.41749483195574033, "learning_rate": 9.992560404556717e-06, "loss": 0.2839, "step": 11216 }, { "epoch": 0.52, "grad_norm": 0.2935212728183298, "learning_rate": 9.991072485830412e-06, "loss": 0.1747, "step": 11217 }, { "epoch": 0.52, "grad_norm": 0.38045787413717364, "learning_rate": 9.989584567301751e-06, "loss": 0.2632, "step": 11218 }, { "epoch": 0.52, "grad_norm": 0.5516680955671937, "learning_rate": 9.98809664900368e-06, "loss": 0.4078, "step": 11219 }, { "epoch": 0.52, "grad_norm": 0.6095561966577813, "learning_rate": 9.986608730969139e-06, "loss": 0.3166, "step": 11220 }, { "epoch": 0.52, "grad_norm": 0.33893447454938064, "learning_rate": 9.98512081323107e-06, "loss": 0.2665, "step": 11221 }, { "epoch": 0.52, "grad_norm": 0.3786551367630742, "learning_rate": 9.983632895822405e-06, "loss": 0.3031, "step": 11222 }, { "epoch": 0.52, "grad_norm": 0.200723937055775, "learning_rate": 9.982144978776096e-06, "loss": 0.0994, "step": 11223 }, { "epoch": 0.52, "grad_norm": 0.34562802744888405, "learning_rate": 9.980657062125079e-06, "loss": 0.2652, "step": 11224 }, { "epoch": 0.52, "grad_norm": 1.2122371224355302, "learning_rate": 9.979169145902297e-06, "loss": 0.5971, "step": 11225 }, { "epoch": 0.52, "grad_norm": 0.4319519760350492, "learning_rate": 9.977681230140689e-06, "loss": 0.2942, "step": 11226 }, { "epoch": 0.52, "grad_norm": 0.39573129478995217, "learning_rate": 9.976193314873199e-06, "loss": 0.2792, "step": 11227 }, { "epoch": 0.52, "grad_norm": 1.1944256466158096, "learning_rate": 9.974705400132764e-06, "loss": 0.5797, "step": 11228 }, { "epoch": 0.52, "grad_norm": 0.32227825350044303, "learning_rate": 9.973217485952329e-06, "loss": 0.2195, "step": 11229 }, { "epoch": 0.52, "grad_norm": 0.28344778821538724, "learning_rate": 9.971729572364832e-06, "loss": 0.2113, "step": 11230 }, { "epoch": 0.52, "grad_norm": 0.5204925423101915, "learning_rate": 9.970241659403212e-06, "loss": 0.3675, "step": 11231 }, { "epoch": 0.52, "grad_norm": 0.699345778993736, "learning_rate": 9.968753747100417e-06, "loss": 0.3945, "step": 11232 }, { "epoch": 0.52, "grad_norm": 0.36399141131955065, "learning_rate": 9.967265835489384e-06, "loss": 0.1773, "step": 11233 }, { "epoch": 0.52, "grad_norm": 0.30951729750322865, "learning_rate": 9.965777924603053e-06, "loss": 0.2813, "step": 11234 }, { "epoch": 0.52, "grad_norm": 0.38650993302524245, "learning_rate": 9.964290014474361e-06, "loss": 0.2628, "step": 11235 }, { "epoch": 0.52, "grad_norm": 0.34970039826059546, "learning_rate": 9.96280210513626e-06, "loss": 0.1945, "step": 11236 }, { "epoch": 0.52, "grad_norm": 0.4841620370183531, "learning_rate": 9.961314196621682e-06, "loss": 0.3121, "step": 11237 }, { "epoch": 0.52, "grad_norm": 0.3833296125253442, "learning_rate": 9.95982628896357e-06, "loss": 0.3079, "step": 11238 }, { "epoch": 0.52, "grad_norm": 0.33390159611624315, "learning_rate": 9.958338382194866e-06, "loss": 0.1924, "step": 11239 }, { "epoch": 0.52, "grad_norm": 0.992739510221968, "learning_rate": 9.956850476348512e-06, "loss": 0.5727, "step": 11240 }, { "epoch": 0.52, "grad_norm": 1.3499892433771807, "learning_rate": 9.955362571457445e-06, "loss": 0.7246, "step": 11241 }, { "epoch": 0.52, "grad_norm": 0.3185155876068158, "learning_rate": 9.953874667554608e-06, "loss": 0.2615, "step": 11242 }, { "epoch": 0.52, "grad_norm": 0.35312634375467317, "learning_rate": 9.952386764672942e-06, "loss": 0.2053, "step": 11243 }, { "epoch": 0.52, "grad_norm": 0.44030432490179977, "learning_rate": 9.950898862845385e-06, "loss": 0.2938, "step": 11244 }, { "epoch": 0.52, "grad_norm": 0.34866247325376065, "learning_rate": 9.949410962104881e-06, "loss": 0.235, "step": 11245 }, { "epoch": 0.52, "grad_norm": 0.38645176329131437, "learning_rate": 9.94792306248437e-06, "loss": 0.258, "step": 11246 }, { "epoch": 0.52, "grad_norm": 0.8863989591481527, "learning_rate": 9.946435164016793e-06, "loss": 0.5814, "step": 11247 }, { "epoch": 0.52, "grad_norm": 0.4353429256861426, "learning_rate": 9.944947266735084e-06, "loss": 0.3107, "step": 11248 }, { "epoch": 0.52, "grad_norm": 0.7305859198866698, "learning_rate": 9.943459370672197e-06, "loss": 0.2872, "step": 11249 }, { "epoch": 0.52, "grad_norm": 0.24294457256971033, "learning_rate": 9.941971475861063e-06, "loss": 0.2204, "step": 11250 }, { "epoch": 0.52, "grad_norm": 0.5799440963268557, "learning_rate": 9.940483582334625e-06, "loss": 0.2872, "step": 11251 }, { "epoch": 0.52, "grad_norm": 0.4541260190286222, "learning_rate": 9.938995690125819e-06, "loss": 0.3084, "step": 11252 }, { "epoch": 0.52, "grad_norm": 0.34213894040798587, "learning_rate": 9.937507799267592e-06, "loss": 0.2791, "step": 11253 }, { "epoch": 0.52, "grad_norm": 0.456746841725573, "learning_rate": 9.936019909792882e-06, "loss": 0.3071, "step": 11254 }, { "epoch": 0.52, "grad_norm": 0.5328886240404139, "learning_rate": 9.934532021734632e-06, "loss": 0.3467, "step": 11255 }, { "epoch": 0.52, "grad_norm": 0.22139082571634994, "learning_rate": 9.933044135125777e-06, "loss": 0.1058, "step": 11256 }, { "epoch": 0.52, "grad_norm": 0.45115088557737576, "learning_rate": 9.931556249999262e-06, "loss": 0.325, "step": 11257 }, { "epoch": 0.52, "grad_norm": 0.3327462141462597, "learning_rate": 9.930068366388026e-06, "loss": 0.3022, "step": 11258 }, { "epoch": 0.52, "grad_norm": 0.8672935438844294, "learning_rate": 9.928580484325008e-06, "loss": 0.4771, "step": 11259 }, { "epoch": 0.52, "grad_norm": 0.38600209057425866, "learning_rate": 9.927092603843149e-06, "loss": 0.288, "step": 11260 }, { "epoch": 0.52, "grad_norm": 0.5213589895469523, "learning_rate": 9.925604724975389e-06, "loss": 0.3664, "step": 11261 }, { "epoch": 0.52, "grad_norm": 0.22231424609114156, "learning_rate": 9.92411684775467e-06, "loss": 0.1709, "step": 11262 }, { "epoch": 0.52, "grad_norm": 0.40907901381302847, "learning_rate": 9.92262897221393e-06, "loss": 0.2596, "step": 11263 }, { "epoch": 0.52, "grad_norm": 0.5730078095353418, "learning_rate": 9.921141098386112e-06, "loss": 0.3276, "step": 11264 }, { "epoch": 0.52, "grad_norm": 0.5177099329467808, "learning_rate": 9.919653226304148e-06, "loss": 0.3828, "step": 11265 }, { "epoch": 0.52, "grad_norm": 0.31081082913775293, "learning_rate": 9.918165356000989e-06, "loss": 0.2191, "step": 11266 }, { "epoch": 0.52, "grad_norm": 0.5301305128225989, "learning_rate": 9.916677487509572e-06, "loss": 0.3618, "step": 11267 }, { "epoch": 0.52, "grad_norm": 0.2783692418220253, "learning_rate": 9.915189620862834e-06, "loss": 0.1747, "step": 11268 }, { "epoch": 0.52, "grad_norm": 0.21618053580410504, "learning_rate": 9.91370175609371e-06, "loss": 0.0636, "step": 11269 }, { "epoch": 0.52, "grad_norm": 0.3232330796749964, "learning_rate": 9.912213893235152e-06, "loss": 0.2903, "step": 11270 }, { "epoch": 0.52, "grad_norm": 0.5895960344730679, "learning_rate": 9.910726032320093e-06, "loss": 0.4033, "step": 11271 }, { "epoch": 0.52, "grad_norm": 0.42060888505678057, "learning_rate": 9.909238173381475e-06, "loss": 0.2008, "step": 11272 }, { "epoch": 0.52, "grad_norm": 0.33757020396779014, "learning_rate": 9.907750316452234e-06, "loss": 0.3106, "step": 11273 }, { "epoch": 0.52, "grad_norm": 0.5283602641719688, "learning_rate": 9.906262461565312e-06, "loss": 0.3552, "step": 11274 }, { "epoch": 0.52, "grad_norm": 0.1942295307555217, "learning_rate": 9.904774608753649e-06, "loss": 0.12, "step": 11275 }, { "epoch": 0.52, "grad_norm": 0.8435723647042749, "learning_rate": 9.903286758050185e-06, "loss": 0.3501, "step": 11276 }, { "epoch": 0.52, "grad_norm": 0.5686861867986577, "learning_rate": 9.90179890948786e-06, "loss": 0.3161, "step": 11277 }, { "epoch": 0.52, "grad_norm": 0.43755240658705796, "learning_rate": 9.900311063099608e-06, "loss": 0.3186, "step": 11278 }, { "epoch": 0.52, "grad_norm": 0.3752232116993393, "learning_rate": 9.898823218918378e-06, "loss": 0.2695, "step": 11279 }, { "epoch": 0.52, "grad_norm": 0.3873780875450621, "learning_rate": 9.897335376977104e-06, "loss": 0.2608, "step": 11280 }, { "epoch": 0.52, "grad_norm": 0.29040409170693654, "learning_rate": 9.895847537308724e-06, "loss": 0.2196, "step": 11281 }, { "epoch": 0.52, "grad_norm": 0.31076278805811025, "learning_rate": 9.894359699946177e-06, "loss": 0.2396, "step": 11282 }, { "epoch": 0.52, "grad_norm": 0.741094386318254, "learning_rate": 9.892871864922407e-06, "loss": 0.4055, "step": 11283 }, { "epoch": 0.52, "grad_norm": 0.6175234579844633, "learning_rate": 9.891384032270352e-06, "loss": 0.4118, "step": 11284 }, { "epoch": 0.52, "grad_norm": 0.3890266334144168, "learning_rate": 9.889896202022949e-06, "loss": 0.2347, "step": 11285 }, { "epoch": 0.52, "grad_norm": 0.34556806342497937, "learning_rate": 9.888408374213134e-06, "loss": 0.3193, "step": 11286 }, { "epoch": 0.52, "grad_norm": 0.276107078393333, "learning_rate": 9.886920548873855e-06, "loss": 0.1546, "step": 11287 }, { "epoch": 0.52, "grad_norm": 0.37905498470849025, "learning_rate": 9.885432726038044e-06, "loss": 0.2516, "step": 11288 }, { "epoch": 0.52, "grad_norm": 0.4098254633697511, "learning_rate": 9.883944905738643e-06, "loss": 0.3253, "step": 11289 }, { "epoch": 0.52, "grad_norm": 0.7697523784411152, "learning_rate": 9.882457088008591e-06, "loss": 0.4142, "step": 11290 }, { "epoch": 0.52, "grad_norm": 0.4037712267513204, "learning_rate": 9.880969272880823e-06, "loss": 0.3068, "step": 11291 }, { "epoch": 0.52, "grad_norm": 0.9777081574517789, "learning_rate": 9.879481460388283e-06, "loss": 0.2415, "step": 11292 }, { "epoch": 0.52, "grad_norm": 0.2543500946524688, "learning_rate": 9.877993650563906e-06, "loss": 0.2068, "step": 11293 }, { "epoch": 0.52, "grad_norm": 0.3466093786374618, "learning_rate": 9.876505843440633e-06, "loss": 0.2906, "step": 11294 }, { "epoch": 0.52, "grad_norm": 0.7228353620429991, "learning_rate": 9.875018039051397e-06, "loss": 0.3118, "step": 11295 }, { "epoch": 0.52, "grad_norm": 0.6457441899501678, "learning_rate": 9.873530237429147e-06, "loss": 0.3875, "step": 11296 }, { "epoch": 0.52, "grad_norm": 0.3750135617048421, "learning_rate": 9.872042438606814e-06, "loss": 0.308, "step": 11297 }, { "epoch": 0.52, "grad_norm": 0.4834283979178994, "learning_rate": 9.87055464261734e-06, "loss": 0.2526, "step": 11298 }, { "epoch": 0.52, "grad_norm": 0.38295621715687606, "learning_rate": 9.869066849493653e-06, "loss": 0.2284, "step": 11299 }, { "epoch": 0.52, "grad_norm": 0.4213315086374267, "learning_rate": 9.867579059268707e-06, "loss": 0.316, "step": 11300 }, { "epoch": 0.52, "grad_norm": 0.30341307995900973, "learning_rate": 9.866091271975433e-06, "loss": 0.2413, "step": 11301 }, { "epoch": 0.52, "grad_norm": 0.52109829567585, "learning_rate": 9.864603487646768e-06, "loss": 0.3096, "step": 11302 }, { "epoch": 0.52, "grad_norm": 0.4270480410419949, "learning_rate": 9.863115706315652e-06, "loss": 0.3173, "step": 11303 }, { "epoch": 0.52, "grad_norm": 1.0263401120999824, "learning_rate": 9.86162792801502e-06, "loss": 0.6095, "step": 11304 }, { "epoch": 0.52, "grad_norm": 0.35577829920607223, "learning_rate": 9.860140152777815e-06, "loss": 0.2679, "step": 11305 }, { "epoch": 0.52, "grad_norm": 0.35757300119462043, "learning_rate": 9.858652380636972e-06, "loss": 0.2686, "step": 11306 }, { "epoch": 0.52, "grad_norm": 0.2927006155894064, "learning_rate": 9.857164611625428e-06, "loss": 0.2103, "step": 11307 }, { "epoch": 0.52, "grad_norm": 0.5198037093704636, "learning_rate": 9.85567684577612e-06, "loss": 0.2643, "step": 11308 }, { "epoch": 0.52, "grad_norm": 0.3113552847165077, "learning_rate": 9.85418908312199e-06, "loss": 0.2696, "step": 11309 }, { "epoch": 0.52, "grad_norm": 0.5339207126231291, "learning_rate": 9.852701323695974e-06, "loss": 0.3749, "step": 11310 }, { "epoch": 0.52, "grad_norm": 0.5587483919713606, "learning_rate": 9.851213567531008e-06, "loss": 0.2957, "step": 11311 }, { "epoch": 0.52, "grad_norm": 0.367251509035229, "learning_rate": 9.849725814660027e-06, "loss": 0.3273, "step": 11312 }, { "epoch": 0.52, "grad_norm": 0.28592611477158114, "learning_rate": 9.848238065115975e-06, "loss": 0.1959, "step": 11313 }, { "epoch": 0.52, "grad_norm": 0.3443597095437619, "learning_rate": 9.846750318931788e-06, "loss": 0.2423, "step": 11314 }, { "epoch": 0.52, "grad_norm": 0.3884438100932372, "learning_rate": 9.8452625761404e-06, "loss": 0.2972, "step": 11315 }, { "epoch": 0.52, "grad_norm": 0.8485851369986501, "learning_rate": 9.843774836774744e-06, "loss": 0.4583, "step": 11316 }, { "epoch": 0.52, "grad_norm": 0.3285333780022082, "learning_rate": 9.842287100867765e-06, "loss": 0.2978, "step": 11317 }, { "epoch": 0.52, "grad_norm": 0.34938287201288365, "learning_rate": 9.840799368452401e-06, "loss": 0.2293, "step": 11318 }, { "epoch": 0.52, "grad_norm": 0.30362335455866146, "learning_rate": 9.839311639561584e-06, "loss": 0.1962, "step": 11319 }, { "epoch": 0.52, "grad_norm": 0.8043207420081717, "learning_rate": 9.837823914228253e-06, "loss": 0.4987, "step": 11320 }, { "epoch": 0.52, "grad_norm": 0.28869154554550674, "learning_rate": 9.83633619248534e-06, "loss": 0.2234, "step": 11321 }, { "epoch": 0.52, "grad_norm": 0.5543187565832229, "learning_rate": 9.834848474365792e-06, "loss": 0.3635, "step": 11322 }, { "epoch": 0.52, "grad_norm": 0.702133414958866, "learning_rate": 9.833360759902536e-06, "loss": 0.4383, "step": 11323 }, { "epoch": 0.52, "grad_norm": 0.3067174406482953, "learning_rate": 9.831873049128513e-06, "loss": 0.1878, "step": 11324 }, { "epoch": 0.52, "grad_norm": 0.2298526231679964, "learning_rate": 9.830385342076659e-06, "loss": 0.2125, "step": 11325 }, { "epoch": 0.52, "grad_norm": 1.2998040450816835, "learning_rate": 9.828897638779909e-06, "loss": 0.763, "step": 11326 }, { "epoch": 0.52, "grad_norm": 0.33562898688016185, "learning_rate": 9.827409939271201e-06, "loss": 0.2123, "step": 11327 }, { "epoch": 0.52, "grad_norm": 0.9228658731387412, "learning_rate": 9.825922243583472e-06, "loss": 0.3987, "step": 11328 }, { "epoch": 0.52, "grad_norm": 0.3723737939589882, "learning_rate": 9.824434551749652e-06, "loss": 0.3106, "step": 11329 }, { "epoch": 0.52, "grad_norm": 0.33547978258230177, "learning_rate": 9.822946863802686e-06, "loss": 0.2535, "step": 11330 }, { "epoch": 0.52, "grad_norm": 0.29328485358672146, "learning_rate": 9.821459179775506e-06, "loss": 0.1058, "step": 11331 }, { "epoch": 0.52, "grad_norm": 0.5077012902668878, "learning_rate": 9.819971499701046e-06, "loss": 0.4041, "step": 11332 }, { "epoch": 0.52, "grad_norm": 0.2989586151903439, "learning_rate": 9.818483823612249e-06, "loss": 0.2661, "step": 11333 }, { "epoch": 0.52, "grad_norm": 0.7657812988543198, "learning_rate": 9.816996151542034e-06, "loss": 0.2283, "step": 11334 }, { "epoch": 0.52, "grad_norm": 0.6749205299943981, "learning_rate": 9.815508483523355e-06, "loss": 0.4603, "step": 11335 }, { "epoch": 0.52, "grad_norm": 0.44628229994162955, "learning_rate": 9.81402081958914e-06, "loss": 0.2986, "step": 11336 }, { "epoch": 0.52, "grad_norm": 0.29996002564445495, "learning_rate": 9.812533159772327e-06, "loss": 0.2469, "step": 11337 }, { "epoch": 0.52, "grad_norm": 0.7651913848861398, "learning_rate": 9.811045504105844e-06, "loss": 0.5218, "step": 11338 }, { "epoch": 0.52, "grad_norm": 0.4012912215015603, "learning_rate": 9.809557852622634e-06, "loss": 0.2843, "step": 11339 }, { "epoch": 0.52, "grad_norm": 0.6749920705288126, "learning_rate": 9.808070205355631e-06, "loss": 0.2226, "step": 11340 }, { "epoch": 0.52, "grad_norm": 0.37475171715598016, "learning_rate": 9.806582562337768e-06, "loss": 0.3168, "step": 11341 }, { "epoch": 0.52, "grad_norm": 0.36189697779983065, "learning_rate": 9.805094923601975e-06, "loss": 0.2834, "step": 11342 }, { "epoch": 0.52, "grad_norm": 0.8689978307195035, "learning_rate": 9.8036072891812e-06, "loss": 0.511, "step": 11343 }, { "epoch": 0.52, "grad_norm": 0.4106378196832395, "learning_rate": 9.802119659108369e-06, "loss": 0.2731, "step": 11344 }, { "epoch": 0.52, "grad_norm": 0.3179697524653702, "learning_rate": 9.800632033416417e-06, "loss": 0.2674, "step": 11345 }, { "epoch": 0.52, "grad_norm": 0.41790338075564004, "learning_rate": 9.799144412138276e-06, "loss": 0.3271, "step": 11346 }, { "epoch": 0.52, "grad_norm": 0.2652859563513943, "learning_rate": 9.797656795306887e-06, "loss": 0.103, "step": 11347 }, { "epoch": 0.52, "grad_norm": 0.3431985531842182, "learning_rate": 9.796169182955184e-06, "loss": 0.2484, "step": 11348 }, { "epoch": 0.52, "grad_norm": 0.38953964463762425, "learning_rate": 9.794681575116097e-06, "loss": 0.3164, "step": 11349 }, { "epoch": 0.52, "grad_norm": 0.7037691358095647, "learning_rate": 9.793193971822563e-06, "loss": 0.3649, "step": 11350 }, { "epoch": 0.52, "grad_norm": 0.323916426803517, "learning_rate": 9.791706373107513e-06, "loss": 0.2573, "step": 11351 }, { "epoch": 0.52, "grad_norm": 0.9684206391950299, "learning_rate": 9.790218779003883e-06, "loss": 0.564, "step": 11352 }, { "epoch": 0.52, "grad_norm": 0.22811918207022813, "learning_rate": 9.78873118954461e-06, "loss": 0.1758, "step": 11353 }, { "epoch": 0.52, "grad_norm": 0.4433453636392534, "learning_rate": 9.787243604762625e-06, "loss": 0.2696, "step": 11354 }, { "epoch": 0.52, "grad_norm": 0.5587809723680933, "learning_rate": 9.785756024690856e-06, "loss": 0.3483, "step": 11355 }, { "epoch": 0.52, "grad_norm": 0.4007653472814551, "learning_rate": 9.784268449362247e-06, "loss": 0.3269, "step": 11356 }, { "epoch": 0.52, "grad_norm": 0.38592473956239026, "learning_rate": 9.782780878809726e-06, "loss": 0.2137, "step": 11357 }, { "epoch": 0.52, "grad_norm": 0.5862428367105833, "learning_rate": 9.781293313066227e-06, "loss": 0.4022, "step": 11358 }, { "epoch": 0.52, "grad_norm": 0.2938954387758226, "learning_rate": 9.779805752164679e-06, "loss": 0.1842, "step": 11359 }, { "epoch": 0.52, "grad_norm": 0.3232736996143368, "learning_rate": 9.778318196138024e-06, "loss": 0.2014, "step": 11360 }, { "epoch": 0.52, "grad_norm": 0.3694542875218248, "learning_rate": 9.776830645019192e-06, "loss": 0.2971, "step": 11361 }, { "epoch": 0.52, "grad_norm": 0.7968486225014186, "learning_rate": 9.775343098841112e-06, "loss": 0.4654, "step": 11362 }, { "epoch": 0.52, "grad_norm": 0.3572834455559596, "learning_rate": 9.77385555763672e-06, "loss": 0.2328, "step": 11363 }, { "epoch": 0.52, "grad_norm": 0.477523655943538, "learning_rate": 9.772368021438943e-06, "loss": 0.3552, "step": 11364 }, { "epoch": 0.52, "grad_norm": 0.265109374309628, "learning_rate": 9.770880490280724e-06, "loss": 0.1987, "step": 11365 }, { "epoch": 0.52, "grad_norm": 0.35509421018407383, "learning_rate": 9.76939296419499e-06, "loss": 0.2093, "step": 11366 }, { "epoch": 0.52, "grad_norm": 1.0692402796967047, "learning_rate": 9.767905443214673e-06, "loss": 0.6192, "step": 11367 }, { "epoch": 0.52, "grad_norm": 0.47784561463110303, "learning_rate": 9.766417927372703e-06, "loss": 0.373, "step": 11368 }, { "epoch": 0.52, "grad_norm": 0.29653148175576055, "learning_rate": 9.764930416702018e-06, "loss": 0.2624, "step": 11369 }, { "epoch": 0.52, "grad_norm": 0.45777707551587066, "learning_rate": 9.763442911235546e-06, "loss": 0.2722, "step": 11370 }, { "epoch": 0.52, "grad_norm": 0.34652033969508744, "learning_rate": 9.761955411006221e-06, "loss": 0.2287, "step": 11371 }, { "epoch": 0.52, "grad_norm": 0.4339725801221266, "learning_rate": 9.760467916046971e-06, "loss": 0.2984, "step": 11372 }, { "epoch": 0.52, "grad_norm": 0.2964375909671723, "learning_rate": 9.758980426390732e-06, "loss": 0.2499, "step": 11373 }, { "epoch": 0.52, "grad_norm": 0.6822223269178299, "learning_rate": 9.757492942070436e-06, "loss": 0.4439, "step": 11374 }, { "epoch": 0.52, "grad_norm": 0.5350618239062891, "learning_rate": 9.756005463119011e-06, "loss": 0.3142, "step": 11375 }, { "epoch": 0.52, "grad_norm": 0.3241055541072353, "learning_rate": 9.754517989569386e-06, "loss": 0.2485, "step": 11376 }, { "epoch": 0.52, "grad_norm": 0.3218213588275265, "learning_rate": 9.753030521454502e-06, "loss": 0.2014, "step": 11377 }, { "epoch": 0.52, "grad_norm": 0.5953684261381536, "learning_rate": 9.751543058807282e-06, "loss": 0.2674, "step": 11378 }, { "epoch": 0.52, "grad_norm": 0.4028297162092129, "learning_rate": 9.750055601660662e-06, "loss": 0.299, "step": 11379 }, { "epoch": 0.52, "grad_norm": 0.5187622947193219, "learning_rate": 9.748568150047568e-06, "loss": 0.2899, "step": 11380 }, { "epoch": 0.52, "grad_norm": 0.3304507039502457, "learning_rate": 9.747080704000928e-06, "loss": 0.2843, "step": 11381 }, { "epoch": 0.52, "grad_norm": 0.4789894900450128, "learning_rate": 9.745593263553683e-06, "loss": 0.3613, "step": 11382 }, { "epoch": 0.52, "grad_norm": 0.20835030026720966, "learning_rate": 9.744105828738758e-06, "loss": 0.0714, "step": 11383 }, { "epoch": 0.52, "grad_norm": 0.34496789877472606, "learning_rate": 9.742618399589082e-06, "loss": 0.2837, "step": 11384 }, { "epoch": 0.52, "grad_norm": 0.3628646663858775, "learning_rate": 9.741130976137585e-06, "loss": 0.3122, "step": 11385 }, { "epoch": 0.52, "grad_norm": 0.6510925053002953, "learning_rate": 9.7396435584172e-06, "loss": 0.3648, "step": 11386 }, { "epoch": 0.52, "grad_norm": 0.4253298923912513, "learning_rate": 9.738156146460857e-06, "loss": 0.289, "step": 11387 }, { "epoch": 0.52, "grad_norm": 0.5823188576601506, "learning_rate": 9.736668740301485e-06, "loss": 0.3535, "step": 11388 }, { "epoch": 0.52, "grad_norm": 0.23488840267704067, "learning_rate": 9.735181339972007e-06, "loss": 0.1812, "step": 11389 }, { "epoch": 0.52, "grad_norm": 0.5207796888508791, "learning_rate": 9.733693945505366e-06, "loss": 0.3079, "step": 11390 }, { "epoch": 0.52, "grad_norm": 0.4079513190936559, "learning_rate": 9.732206556934484e-06, "loss": 0.336, "step": 11391 }, { "epoch": 0.52, "grad_norm": 0.39151629863631365, "learning_rate": 9.73071917429229e-06, "loss": 0.3152, "step": 11392 }, { "epoch": 0.52, "grad_norm": 0.34194062562220184, "learning_rate": 9.729231797611712e-06, "loss": 0.1516, "step": 11393 }, { "epoch": 0.52, "grad_norm": 0.44007652946439585, "learning_rate": 9.727744426925678e-06, "loss": 0.3056, "step": 11394 }, { "epoch": 0.52, "grad_norm": 0.5124702481539362, "learning_rate": 9.726257062267124e-06, "loss": 0.3255, "step": 11395 }, { "epoch": 0.52, "grad_norm": 0.3641520661523698, "learning_rate": 9.724769703668976e-06, "loss": 0.2195, "step": 11396 }, { "epoch": 0.52, "grad_norm": 0.32891859657381134, "learning_rate": 9.72328235116416e-06, "loss": 0.2864, "step": 11397 }, { "epoch": 0.52, "grad_norm": 0.475697650649854, "learning_rate": 9.721795004785604e-06, "loss": 0.2183, "step": 11398 }, { "epoch": 0.52, "grad_norm": 0.401004429903904, "learning_rate": 9.720307664566241e-06, "loss": 0.2291, "step": 11399 }, { "epoch": 0.52, "grad_norm": 0.28910835961316905, "learning_rate": 9.718820330538999e-06, "loss": 0.2737, "step": 11400 }, { "epoch": 0.52, "grad_norm": 1.0152568760270457, "learning_rate": 9.7173330027368e-06, "loss": 0.5041, "step": 11401 }, { "epoch": 0.52, "grad_norm": 0.4011210892819425, "learning_rate": 9.715845681192576e-06, "loss": 0.2455, "step": 11402 }, { "epoch": 0.52, "grad_norm": 0.5155930391421709, "learning_rate": 9.714358365939256e-06, "loss": 0.3598, "step": 11403 }, { "epoch": 0.52, "grad_norm": 0.2363770099116954, "learning_rate": 9.712871057009768e-06, "loss": 0.2071, "step": 11404 }, { "epoch": 0.52, "grad_norm": 0.6507549608403904, "learning_rate": 9.711383754437035e-06, "loss": 0.4115, "step": 11405 }, { "epoch": 0.52, "grad_norm": 0.3935240946495296, "learning_rate": 9.709896458253985e-06, "loss": 0.2266, "step": 11406 }, { "epoch": 0.52, "grad_norm": 0.565479502819885, "learning_rate": 9.708409168493554e-06, "loss": 0.3616, "step": 11407 }, { "epoch": 0.52, "grad_norm": 0.36281857444155796, "learning_rate": 9.706921885188662e-06, "loss": 0.2865, "step": 11408 }, { "epoch": 0.52, "grad_norm": 0.34970632129672796, "learning_rate": 9.705434608372238e-06, "loss": 0.2077, "step": 11409 }, { "epoch": 0.52, "grad_norm": 0.27419626997472696, "learning_rate": 9.703947338077206e-06, "loss": 0.2048, "step": 11410 }, { "epoch": 0.52, "grad_norm": 1.4491909940359593, "learning_rate": 9.702460074336489e-06, "loss": 0.7524, "step": 11411 }, { "epoch": 0.52, "grad_norm": 0.2504680531291981, "learning_rate": 9.700972817183026e-06, "loss": 0.2172, "step": 11412 }, { "epoch": 0.52, "grad_norm": 0.7419744938866444, "learning_rate": 9.699485566649734e-06, "loss": 0.4234, "step": 11413 }, { "epoch": 0.52, "grad_norm": 0.6374358339232042, "learning_rate": 9.697998322769544e-06, "loss": 0.4044, "step": 11414 }, { "epoch": 0.52, "grad_norm": 0.26159092038567966, "learning_rate": 9.696511085575377e-06, "loss": 0.164, "step": 11415 }, { "epoch": 0.52, "grad_norm": 0.4110981291628727, "learning_rate": 9.695023855100164e-06, "loss": 0.3239, "step": 11416 }, { "epoch": 0.52, "grad_norm": 0.3713800293175979, "learning_rate": 9.69353663137683e-06, "loss": 0.259, "step": 11417 }, { "epoch": 0.52, "grad_norm": 0.43293500310566646, "learning_rate": 9.692049414438298e-06, "loss": 0.2842, "step": 11418 }, { "epoch": 0.52, "grad_norm": 1.1789619839219372, "learning_rate": 9.690562204317496e-06, "loss": 0.3271, "step": 11419 }, { "epoch": 0.52, "grad_norm": 0.3415861038345586, "learning_rate": 9.689075001047348e-06, "loss": 0.2745, "step": 11420 }, { "epoch": 0.52, "grad_norm": 0.38176274871500615, "learning_rate": 9.68758780466078e-06, "loss": 0.2599, "step": 11421 }, { "epoch": 0.52, "grad_norm": 0.32576720649743524, "learning_rate": 9.686100615190718e-06, "loss": 0.1784, "step": 11422 }, { "epoch": 0.52, "grad_norm": 0.35462859933227153, "learning_rate": 9.684613432670085e-06, "loss": 0.2277, "step": 11423 }, { "epoch": 0.52, "grad_norm": 0.5310161959798916, "learning_rate": 9.683126257131801e-06, "loss": 0.2938, "step": 11424 }, { "epoch": 0.52, "grad_norm": 0.6213188308901539, "learning_rate": 9.681639088608803e-06, "loss": 0.2943, "step": 11425 }, { "epoch": 0.52, "grad_norm": 0.860346333678611, "learning_rate": 9.68015192713401e-06, "loss": 0.3872, "step": 11426 }, { "epoch": 0.52, "grad_norm": 0.4108038617526493, "learning_rate": 9.678664772740342e-06, "loss": 0.2645, "step": 11427 }, { "epoch": 0.52, "grad_norm": 0.37620974391353745, "learning_rate": 9.677177625460724e-06, "loss": 0.2642, "step": 11428 }, { "epoch": 0.53, "grad_norm": 0.2999353062838426, "learning_rate": 9.675690485328085e-06, "loss": 0.195, "step": 11429 }, { "epoch": 0.53, "grad_norm": 0.36140638604317643, "learning_rate": 9.674203352375346e-06, "loss": 0.2791, "step": 11430 }, { "epoch": 0.53, "grad_norm": 0.7273629207317491, "learning_rate": 9.672716226635432e-06, "loss": 0.4268, "step": 11431 }, { "epoch": 0.53, "grad_norm": 0.3728610525623341, "learning_rate": 9.671229108141263e-06, "loss": 0.2594, "step": 11432 }, { "epoch": 0.53, "grad_norm": 0.34425995831737904, "learning_rate": 9.669741996925765e-06, "loss": 0.2621, "step": 11433 }, { "epoch": 0.53, "grad_norm": 1.3685462751774609, "learning_rate": 9.668254893021862e-06, "loss": 0.7272, "step": 11434 }, { "epoch": 0.53, "grad_norm": 0.5532222278961284, "learning_rate": 9.666767796462477e-06, "loss": 0.2928, "step": 11435 }, { "epoch": 0.53, "grad_norm": 0.27917039264465465, "learning_rate": 9.665280707280528e-06, "loss": 0.2671, "step": 11436 }, { "epoch": 0.53, "grad_norm": 0.43656386636292366, "learning_rate": 9.663793625508945e-06, "loss": 0.293, "step": 11437 }, { "epoch": 0.53, "grad_norm": 0.28771681496248114, "learning_rate": 9.662306551180646e-06, "loss": 0.1219, "step": 11438 }, { "epoch": 0.53, "grad_norm": 0.3954165854733189, "learning_rate": 9.660819484328555e-06, "loss": 0.2817, "step": 11439 }, { "epoch": 0.53, "grad_norm": 0.4100873445124824, "learning_rate": 9.659332424985593e-06, "loss": 0.3259, "step": 11440 }, { "epoch": 0.53, "grad_norm": 0.6575522012656696, "learning_rate": 9.65784537318468e-06, "loss": 0.3048, "step": 11441 }, { "epoch": 0.53, "grad_norm": 0.408816462969731, "learning_rate": 9.656358328958743e-06, "loss": 0.3029, "step": 11442 }, { "epoch": 0.53, "grad_norm": 0.32895365550896283, "learning_rate": 9.654871292340703e-06, "loss": 0.2107, "step": 11443 }, { "epoch": 0.53, "grad_norm": 0.3625582672326299, "learning_rate": 9.65338426336348e-06, "loss": 0.3024, "step": 11444 }, { "epoch": 0.53, "grad_norm": 0.3515425836853656, "learning_rate": 9.651897242059992e-06, "loss": 0.2165, "step": 11445 }, { "epoch": 0.53, "grad_norm": 1.0243762632578093, "learning_rate": 9.650410228463165e-06, "loss": 0.6793, "step": 11446 }, { "epoch": 0.53, "grad_norm": 0.7515515438749892, "learning_rate": 9.648923222605921e-06, "loss": 0.3942, "step": 11447 }, { "epoch": 0.53, "grad_norm": 0.2511288921956044, "learning_rate": 9.647436224521179e-06, "loss": 0.2136, "step": 11448 }, { "epoch": 0.53, "grad_norm": 0.3010626523734168, "learning_rate": 9.645949234241855e-06, "loss": 0.1905, "step": 11449 }, { "epoch": 0.53, "grad_norm": 1.6032184236839973, "learning_rate": 9.644462251800876e-06, "loss": 0.792, "step": 11450 }, { "epoch": 0.53, "grad_norm": 0.322202416207073, "learning_rate": 9.64297527723116e-06, "loss": 0.2098, "step": 11451 }, { "epoch": 0.53, "grad_norm": 0.35350893925203253, "learning_rate": 9.641488310565628e-06, "loss": 0.3195, "step": 11452 }, { "epoch": 0.53, "grad_norm": 0.8993610221272335, "learning_rate": 9.640001351837198e-06, "loss": 0.434, "step": 11453 }, { "epoch": 0.53, "grad_norm": 0.32850023361112574, "learning_rate": 9.638514401078789e-06, "loss": 0.2184, "step": 11454 }, { "epoch": 0.53, "grad_norm": 0.3094838477869714, "learning_rate": 9.637027458323327e-06, "loss": 0.1956, "step": 11455 }, { "epoch": 0.53, "grad_norm": 0.3892056663489639, "learning_rate": 9.635540523603725e-06, "loss": 0.3149, "step": 11456 }, { "epoch": 0.53, "grad_norm": 0.40090164086707025, "learning_rate": 9.634053596952907e-06, "loss": 0.2714, "step": 11457 }, { "epoch": 0.53, "grad_norm": 0.837736371935118, "learning_rate": 9.632566678403784e-06, "loss": 0.3355, "step": 11458 }, { "epoch": 0.53, "grad_norm": 0.3958149049475104, "learning_rate": 9.631079767989284e-06, "loss": 0.3129, "step": 11459 }, { "epoch": 0.53, "grad_norm": 0.43668091784294355, "learning_rate": 9.629592865742323e-06, "loss": 0.2782, "step": 11460 }, { "epoch": 0.53, "grad_norm": 0.32951508316024725, "learning_rate": 9.62810597169582e-06, "loss": 0.1818, "step": 11461 }, { "epoch": 0.53, "grad_norm": 0.43646122909435, "learning_rate": 9.626619085882689e-06, "loss": 0.2697, "step": 11462 }, { "epoch": 0.53, "grad_norm": 0.46476465391500404, "learning_rate": 9.625132208335854e-06, "loss": 0.3046, "step": 11463 }, { "epoch": 0.53, "grad_norm": 0.3834897371664547, "learning_rate": 9.62364533908823e-06, "loss": 0.2607, "step": 11464 }, { "epoch": 0.53, "grad_norm": 0.8226910349365744, "learning_rate": 9.622158478172736e-06, "loss": 0.4101, "step": 11465 }, { "epoch": 0.53, "grad_norm": 0.38305506334375866, "learning_rate": 9.620671625622287e-06, "loss": 0.2823, "step": 11466 }, { "epoch": 0.53, "grad_norm": 0.376581375482681, "learning_rate": 9.619184781469804e-06, "loss": 0.2515, "step": 11467 }, { "epoch": 0.53, "grad_norm": 0.2899337141137829, "learning_rate": 9.617697945748204e-06, "loss": 0.1654, "step": 11468 }, { "epoch": 0.53, "grad_norm": 0.3702992939202412, "learning_rate": 9.616211118490404e-06, "loss": 0.2566, "step": 11469 }, { "epoch": 0.53, "grad_norm": 0.7611140009130796, "learning_rate": 9.614724299729319e-06, "loss": 0.3857, "step": 11470 }, { "epoch": 0.53, "grad_norm": 0.4185914253704048, "learning_rate": 9.613237489497861e-06, "loss": 0.2841, "step": 11471 }, { "epoch": 0.53, "grad_norm": 0.3556707860652959, "learning_rate": 9.611750687828958e-06, "loss": 0.2665, "step": 11472 }, { "epoch": 0.53, "grad_norm": 1.31587248664105, "learning_rate": 9.610263894755523e-06, "loss": 0.6628, "step": 11473 }, { "epoch": 0.53, "grad_norm": 0.24921336836317573, "learning_rate": 9.608777110310467e-06, "loss": 0.1312, "step": 11474 }, { "epoch": 0.53, "grad_norm": 0.4261668666381127, "learning_rate": 9.607290334526704e-06, "loss": 0.2881, "step": 11475 }, { "epoch": 0.53, "grad_norm": 0.33894494953454546, "learning_rate": 9.605803567437158e-06, "loss": 0.2807, "step": 11476 }, { "epoch": 0.53, "grad_norm": 0.6633081680127221, "learning_rate": 9.604316809074742e-06, "loss": 0.4015, "step": 11477 }, { "epoch": 0.53, "grad_norm": 0.5041318621952977, "learning_rate": 9.602830059472371e-06, "loss": 0.2413, "step": 11478 }, { "epoch": 0.53, "grad_norm": 0.5517624589329848, "learning_rate": 9.601343318662956e-06, "loss": 0.3919, "step": 11479 }, { "epoch": 0.53, "grad_norm": 0.23437734206630984, "learning_rate": 9.59985658667942e-06, "loss": 0.1855, "step": 11480 }, { "epoch": 0.53, "grad_norm": 0.4424236742549651, "learning_rate": 9.598369863554674e-06, "loss": 0.2494, "step": 11481 }, { "epoch": 0.53, "grad_norm": 0.4978427178608124, "learning_rate": 9.59688314932163e-06, "loss": 0.3735, "step": 11482 }, { "epoch": 0.53, "grad_norm": 0.40522052234231914, "learning_rate": 9.595396444013206e-06, "loss": 0.3219, "step": 11483 }, { "epoch": 0.53, "grad_norm": 0.32504473580746174, "learning_rate": 9.59390974766231e-06, "loss": 0.1787, "step": 11484 }, { "epoch": 0.53, "grad_norm": 0.5151314633756906, "learning_rate": 9.592423060301866e-06, "loss": 0.3633, "step": 11485 }, { "epoch": 0.53, "grad_norm": 0.28335899724641594, "learning_rate": 9.590936381964781e-06, "loss": 0.2114, "step": 11486 }, { "epoch": 0.53, "grad_norm": 0.3576648760206836, "learning_rate": 9.58944971268397e-06, "loss": 0.233, "step": 11487 }, { "epoch": 0.53, "grad_norm": 0.36092478501131753, "learning_rate": 9.587963052492344e-06, "loss": 0.3166, "step": 11488 }, { "epoch": 0.53, "grad_norm": 1.285784762132118, "learning_rate": 9.586476401422822e-06, "loss": 0.8697, "step": 11489 }, { "epoch": 0.53, "grad_norm": 0.3271433148304856, "learning_rate": 9.584989759508313e-06, "loss": 0.2034, "step": 11490 }, { "epoch": 0.53, "grad_norm": 1.165163853497228, "learning_rate": 9.583503126781734e-06, "loss": 0.572, "step": 11491 }, { "epoch": 0.53, "grad_norm": 0.3473864364153112, "learning_rate": 9.582016503275989e-06, "loss": 0.3194, "step": 11492 }, { "epoch": 0.53, "grad_norm": 0.35292453358758263, "learning_rate": 9.580529889023998e-06, "loss": 0.2684, "step": 11493 }, { "epoch": 0.53, "grad_norm": 0.22612845669227344, "learning_rate": 9.579043284058672e-06, "loss": 0.1339, "step": 11494 }, { "epoch": 0.53, "grad_norm": 0.35773299212606535, "learning_rate": 9.577556688412922e-06, "loss": 0.3267, "step": 11495 }, { "epoch": 0.53, "grad_norm": 0.9268158973363738, "learning_rate": 9.576070102119657e-06, "loss": 0.4907, "step": 11496 }, { "epoch": 0.53, "grad_norm": 0.3782578944241578, "learning_rate": 9.574583525211795e-06, "loss": 0.2062, "step": 11497 }, { "epoch": 0.53, "grad_norm": 0.676121135766827, "learning_rate": 9.573096957722243e-06, "loss": 0.3766, "step": 11498 }, { "epoch": 0.53, "grad_norm": 0.41218950110309965, "learning_rate": 9.571610399683911e-06, "loss": 0.271, "step": 11499 }, { "epoch": 0.53, "grad_norm": 0.22735672429179699, "learning_rate": 9.570123851129715e-06, "loss": 0.1822, "step": 11500 }, { "epoch": 0.53, "grad_norm": 1.3586771869746004, "learning_rate": 9.568637312092555e-06, "loss": 0.7776, "step": 11501 }, { "epoch": 0.53, "grad_norm": 0.590202482734045, "learning_rate": 9.567150782605358e-06, "loss": 0.3229, "step": 11502 }, { "epoch": 0.53, "grad_norm": 0.30884804634170515, "learning_rate": 9.565664262701023e-06, "loss": 0.2464, "step": 11503 }, { "epoch": 0.53, "grad_norm": 0.7531336961030599, "learning_rate": 9.564177752412463e-06, "loss": 0.3844, "step": 11504 }, { "epoch": 0.53, "grad_norm": 0.689126686314734, "learning_rate": 9.562691251772583e-06, "loss": 0.3107, "step": 11505 }, { "epoch": 0.53, "grad_norm": 0.24896524377844456, "learning_rate": 9.5612047608143e-06, "loss": 0.2016, "step": 11506 }, { "epoch": 0.53, "grad_norm": 0.35394589055156567, "learning_rate": 9.55971827957052e-06, "loss": 0.2626, "step": 11507 }, { "epoch": 0.53, "grad_norm": 0.3955376312495729, "learning_rate": 9.558231808074157e-06, "loss": 0.2767, "step": 11508 }, { "epoch": 0.53, "grad_norm": 0.5132952207694231, "learning_rate": 9.55674534635811e-06, "loss": 0.3359, "step": 11509 }, { "epoch": 0.53, "grad_norm": 0.9645366780122393, "learning_rate": 9.555258894455298e-06, "loss": 0.3421, "step": 11510 }, { "epoch": 0.53, "grad_norm": 0.31286417768425295, "learning_rate": 9.553772452398625e-06, "loss": 0.2619, "step": 11511 }, { "epoch": 0.53, "grad_norm": 0.5820607574168946, "learning_rate": 9.552286020221e-06, "loss": 0.351, "step": 11512 }, { "epoch": 0.53, "grad_norm": 0.21536951924965123, "learning_rate": 9.55079959795533e-06, "loss": 0.1352, "step": 11513 }, { "epoch": 0.53, "grad_norm": 0.8762832582216499, "learning_rate": 9.549313185634523e-06, "loss": 0.3782, "step": 11514 }, { "epoch": 0.53, "grad_norm": 0.31732354238067395, "learning_rate": 9.54782678329149e-06, "loss": 0.2645, "step": 11515 }, { "epoch": 0.53, "grad_norm": 0.5904115097706445, "learning_rate": 9.546340390959134e-06, "loss": 0.2894, "step": 11516 }, { "epoch": 0.53, "grad_norm": 0.8290513744586341, "learning_rate": 9.544854008670366e-06, "loss": 0.3992, "step": 11517 }, { "epoch": 0.53, "grad_norm": 0.30181430877360416, "learning_rate": 9.543367636458089e-06, "loss": 0.227, "step": 11518 }, { "epoch": 0.53, "grad_norm": 0.4059646242566253, "learning_rate": 9.541881274355214e-06, "loss": 0.3388, "step": 11519 }, { "epoch": 0.53, "grad_norm": 0.2561918372961943, "learning_rate": 9.54039492239465e-06, "loss": 0.1189, "step": 11520 }, { "epoch": 0.53, "grad_norm": 0.4021804568327228, "learning_rate": 9.538908580609296e-06, "loss": 0.3159, "step": 11521 }, { "epoch": 0.53, "grad_norm": 0.9089202504352452, "learning_rate": 9.537422249032059e-06, "loss": 0.475, "step": 11522 }, { "epoch": 0.53, "grad_norm": 0.2974682866304729, "learning_rate": 9.53593592769585e-06, "loss": 0.2212, "step": 11523 }, { "epoch": 0.53, "grad_norm": 0.42981547972156187, "learning_rate": 9.534449616633574e-06, "loss": 0.3246, "step": 11524 }, { "epoch": 0.53, "grad_norm": 1.283905459616831, "learning_rate": 9.532963315878133e-06, "loss": 0.7906, "step": 11525 }, { "epoch": 0.53, "grad_norm": 0.3347864899338461, "learning_rate": 9.531477025462433e-06, "loss": 0.1852, "step": 11526 }, { "epoch": 0.53, "grad_norm": 0.34079882891682317, "learning_rate": 9.529990745419381e-06, "loss": 0.2821, "step": 11527 }, { "epoch": 0.53, "grad_norm": 0.34799159191397055, "learning_rate": 9.528504475781882e-06, "loss": 0.2716, "step": 11528 }, { "epoch": 0.53, "grad_norm": 0.3997992685807397, "learning_rate": 9.52701821658284e-06, "loss": 0.1911, "step": 11529 }, { "epoch": 0.53, "grad_norm": 0.6219409621018265, "learning_rate": 9.525531967855159e-06, "loss": 0.3631, "step": 11530 }, { "epoch": 0.53, "grad_norm": 0.39933345292519146, "learning_rate": 9.524045729631738e-06, "loss": 0.3516, "step": 11531 }, { "epoch": 0.53, "grad_norm": 1.1166430840848653, "learning_rate": 9.52255950194549e-06, "loss": 0.3612, "step": 11532 }, { "epoch": 0.53, "grad_norm": 0.30316440093048463, "learning_rate": 9.521073284829315e-06, "loss": 0.2382, "step": 11533 }, { "epoch": 0.53, "grad_norm": 0.2512861874036437, "learning_rate": 9.519587078316115e-06, "loss": 0.1424, "step": 11534 }, { "epoch": 0.53, "grad_norm": 0.3804224115024147, "learning_rate": 9.51810088243879e-06, "loss": 0.308, "step": 11535 }, { "epoch": 0.53, "grad_norm": 0.34004487062489147, "learning_rate": 9.516614697230252e-06, "loss": 0.2052, "step": 11536 }, { "epoch": 0.53, "grad_norm": 0.9817272443613348, "learning_rate": 9.515128522723398e-06, "loss": 0.5686, "step": 11537 }, { "epoch": 0.53, "grad_norm": 0.4493710073024638, "learning_rate": 9.513642358951133e-06, "loss": 0.3506, "step": 11538 }, { "epoch": 0.53, "grad_norm": 0.2845727005865016, "learning_rate": 9.51215620594635e-06, "loss": 0.2114, "step": 11539 }, { "epoch": 0.53, "grad_norm": 0.27225981121147663, "learning_rate": 9.510670063741965e-06, "loss": 0.1699, "step": 11540 }, { "epoch": 0.53, "grad_norm": 0.65804836733012, "learning_rate": 9.509183932370872e-06, "loss": 0.4474, "step": 11541 }, { "epoch": 0.53, "grad_norm": 0.3542132514944083, "learning_rate": 9.507697811865975e-06, "loss": 0.2488, "step": 11542 }, { "epoch": 0.53, "grad_norm": 0.40180888189751734, "learning_rate": 9.506211702260172e-06, "loss": 0.3024, "step": 11543 }, { "epoch": 0.53, "grad_norm": 0.8475062103142702, "learning_rate": 9.504725603586365e-06, "loss": 0.3678, "step": 11544 }, { "epoch": 0.53, "grad_norm": 0.38515022982294955, "learning_rate": 9.503239515877457e-06, "loss": 0.2637, "step": 11545 }, { "epoch": 0.53, "grad_norm": 0.2119658776146596, "learning_rate": 9.501753439166348e-06, "loss": 0.0901, "step": 11546 }, { "epoch": 0.53, "grad_norm": 0.36360739451863777, "learning_rate": 9.500267373485938e-06, "loss": 0.2743, "step": 11547 }, { "epoch": 0.53, "grad_norm": 0.3693671248971325, "learning_rate": 9.498781318869123e-06, "loss": 0.298, "step": 11548 }, { "epoch": 0.53, "grad_norm": 0.8571189591953227, "learning_rate": 9.497295275348811e-06, "loss": 0.3751, "step": 11549 }, { "epoch": 0.53, "grad_norm": 0.45386829398684075, "learning_rate": 9.495809242957897e-06, "loss": 0.3567, "step": 11550 }, { "epoch": 0.53, "grad_norm": 0.31929915068615444, "learning_rate": 9.494323221729278e-06, "loss": 0.2912, "step": 11551 }, { "epoch": 0.53, "grad_norm": 0.21952264411421285, "learning_rate": 9.492837211695852e-06, "loss": 0.0995, "step": 11552 }, { "epoch": 0.53, "grad_norm": 1.2729390185636968, "learning_rate": 9.491351212890528e-06, "loss": 0.6984, "step": 11553 }, { "epoch": 0.53, "grad_norm": 0.3377661202367108, "learning_rate": 9.489865225346195e-06, "loss": 0.2711, "step": 11554 }, { "epoch": 0.53, "grad_norm": 0.4662325011503825, "learning_rate": 9.488379249095755e-06, "loss": 0.2979, "step": 11555 }, { "epoch": 0.53, "grad_norm": 0.8504686812067698, "learning_rate": 9.486893284172103e-06, "loss": 0.5068, "step": 11556 }, { "epoch": 0.53, "grad_norm": 0.35756610182445453, "learning_rate": 9.485407330608142e-06, "loss": 0.2743, "step": 11557 }, { "epoch": 0.53, "grad_norm": 0.3112988437634165, "learning_rate": 9.483921388436767e-06, "loss": 0.179, "step": 11558 }, { "epoch": 0.53, "grad_norm": 0.35275450669082886, "learning_rate": 9.482435457690873e-06, "loss": 0.2561, "step": 11559 }, { "epoch": 0.53, "grad_norm": 0.3348128247596534, "learning_rate": 9.480949538403362e-06, "loss": 0.2738, "step": 11560 }, { "epoch": 0.53, "grad_norm": 0.8063144239181748, "learning_rate": 9.479463630607124e-06, "loss": 0.4397, "step": 11561 }, { "epoch": 0.53, "grad_norm": 0.4395064645676462, "learning_rate": 9.477977734335061e-06, "loss": 0.2702, "step": 11562 }, { "epoch": 0.53, "grad_norm": 0.41951147537300915, "learning_rate": 9.47649184962007e-06, "loss": 0.3008, "step": 11563 }, { "epoch": 0.53, "grad_norm": 0.3483407211210602, "learning_rate": 9.475005976495044e-06, "loss": 0.269, "step": 11564 }, { "epoch": 0.53, "grad_norm": 0.32277374727350777, "learning_rate": 9.473520114992876e-06, "loss": 0.1725, "step": 11565 }, { "epoch": 0.53, "grad_norm": 0.4954189472973688, "learning_rate": 9.472034265146467e-06, "loss": 0.2993, "step": 11566 }, { "epoch": 0.53, "grad_norm": 0.340603564636974, "learning_rate": 9.470548426988716e-06, "loss": 0.3096, "step": 11567 }, { "epoch": 0.53, "grad_norm": 1.0560843765898078, "learning_rate": 9.469062600552509e-06, "loss": 0.4219, "step": 11568 }, { "epoch": 0.53, "grad_norm": 0.3683445869916497, "learning_rate": 9.46757678587074e-06, "loss": 0.2813, "step": 11569 }, { "epoch": 0.53, "grad_norm": 0.23680972300463468, "learning_rate": 9.466090982976311e-06, "loss": 0.186, "step": 11570 }, { "epoch": 0.53, "grad_norm": 0.4975384315526279, "learning_rate": 9.464605191902114e-06, "loss": 0.3663, "step": 11571 }, { "epoch": 0.53, "grad_norm": 0.35518988058665824, "learning_rate": 9.463119412681041e-06, "loss": 0.2283, "step": 11572 }, { "epoch": 0.53, "grad_norm": 0.7382612936696473, "learning_rate": 9.46163364534599e-06, "loss": 0.4927, "step": 11573 }, { "epoch": 0.53, "grad_norm": 0.526748975236103, "learning_rate": 9.460147889929845e-06, "loss": 0.3644, "step": 11574 }, { "epoch": 0.53, "grad_norm": 0.28599848534733724, "learning_rate": 9.45866214646551e-06, "loss": 0.2204, "step": 11575 }, { "epoch": 0.53, "grad_norm": 1.3775656086321737, "learning_rate": 9.457176414985872e-06, "loss": 0.7204, "step": 11576 }, { "epoch": 0.53, "grad_norm": 0.34335188546405027, "learning_rate": 9.455690695523826e-06, "loss": 0.2519, "step": 11577 }, { "epoch": 0.53, "grad_norm": 0.28402440275357393, "learning_rate": 9.45420498811226e-06, "loss": 0.2217, "step": 11578 }, { "epoch": 0.53, "grad_norm": 0.5638977114741563, "learning_rate": 9.452719292784074e-06, "loss": 0.3835, "step": 11579 }, { "epoch": 0.53, "grad_norm": 1.0865335337319042, "learning_rate": 9.451233609572153e-06, "loss": 0.6611, "step": 11580 }, { "epoch": 0.53, "grad_norm": 0.3090676807047347, "learning_rate": 9.449747938509392e-06, "loss": 0.1635, "step": 11581 }, { "epoch": 0.53, "grad_norm": 0.2941631381155313, "learning_rate": 9.448262279628678e-06, "loss": 0.2542, "step": 11582 }, { "epoch": 0.53, "grad_norm": 0.38016154823497406, "learning_rate": 9.446776632962909e-06, "loss": 0.2909, "step": 11583 }, { "epoch": 0.53, "grad_norm": 0.41781451795973, "learning_rate": 9.44529099854497e-06, "loss": 0.2856, "step": 11584 }, { "epoch": 0.53, "grad_norm": 0.45518012458921936, "learning_rate": 9.443805376407758e-06, "loss": 0.2907, "step": 11585 }, { "epoch": 0.53, "grad_norm": 0.4009928071656808, "learning_rate": 9.442319766584153e-06, "loss": 0.3456, "step": 11586 }, { "epoch": 0.53, "grad_norm": 0.47711975918400323, "learning_rate": 9.440834169107054e-06, "loss": 0.3089, "step": 11587 }, { "epoch": 0.53, "grad_norm": 0.5121742086899269, "learning_rate": 9.439348584009347e-06, "loss": 0.267, "step": 11588 }, { "epoch": 0.53, "grad_norm": 0.6531649277935633, "learning_rate": 9.437863011323923e-06, "loss": 0.3848, "step": 11589 }, { "epoch": 0.53, "grad_norm": 0.24067517557129425, "learning_rate": 9.43637745108367e-06, "loss": 0.1725, "step": 11590 }, { "epoch": 0.53, "grad_norm": 0.31685247756225854, "learning_rate": 9.434891903321475e-06, "loss": 0.2724, "step": 11591 }, { "epoch": 0.53, "grad_norm": 1.5858732160329463, "learning_rate": 9.43340636807023e-06, "loss": 0.6234, "step": 11592 }, { "epoch": 0.53, "grad_norm": 0.38199825809520005, "learning_rate": 9.431920845362822e-06, "loss": 0.2611, "step": 11593 }, { "epoch": 0.53, "grad_norm": 0.507419548204257, "learning_rate": 9.43043533523214e-06, "loss": 0.2867, "step": 11594 }, { "epoch": 0.53, "grad_norm": 0.4611194863326474, "learning_rate": 9.428949837711068e-06, "loss": 0.364, "step": 11595 }, { "epoch": 0.53, "grad_norm": 0.3771586328895342, "learning_rate": 9.427464352832498e-06, "loss": 0.2779, "step": 11596 }, { "epoch": 0.53, "grad_norm": 0.46763979781345666, "learning_rate": 9.425978880629315e-06, "loss": 0.2696, "step": 11597 }, { "epoch": 0.53, "grad_norm": 0.26537678001470155, "learning_rate": 9.424493421134407e-06, "loss": 0.2059, "step": 11598 }, { "epoch": 0.53, "grad_norm": 0.4549239375220082, "learning_rate": 9.423007974380656e-06, "loss": 0.269, "step": 11599 }, { "epoch": 0.53, "grad_norm": 0.5387294193592074, "learning_rate": 9.421522540400955e-06, "loss": 0.3382, "step": 11600 }, { "epoch": 0.53, "grad_norm": 0.4921534222947431, "learning_rate": 9.42003711922819e-06, "loss": 0.2348, "step": 11601 }, { "epoch": 0.53, "grad_norm": 0.4355996686401445, "learning_rate": 9.418551710895243e-06, "loss": 0.2964, "step": 11602 }, { "epoch": 0.53, "grad_norm": 0.353586162835902, "learning_rate": 9.417066315435002e-06, "loss": 0.3084, "step": 11603 }, { "epoch": 0.53, "grad_norm": 0.3245198321431988, "learning_rate": 9.415580932880347e-06, "loss": 0.1529, "step": 11604 }, { "epoch": 0.53, "grad_norm": 0.43866629070919116, "learning_rate": 9.414095563264169e-06, "loss": 0.2727, "step": 11605 }, { "epoch": 0.53, "grad_norm": 0.326232925175616, "learning_rate": 9.41261020661935e-06, "loss": 0.3093, "step": 11606 }, { "epoch": 0.53, "grad_norm": 0.8985547972415693, "learning_rate": 9.411124862978777e-06, "loss": 0.3161, "step": 11607 }, { "epoch": 0.53, "grad_norm": 0.384382698777126, "learning_rate": 9.409639532375327e-06, "loss": 0.2723, "step": 11608 }, { "epoch": 0.53, "grad_norm": 0.5687459015449872, "learning_rate": 9.408154214841894e-06, "loss": 0.4402, "step": 11609 }, { "epoch": 0.53, "grad_norm": 0.3075215507334488, "learning_rate": 9.406668910411356e-06, "loss": 0.2522, "step": 11610 }, { "epoch": 0.53, "grad_norm": 0.3330528900470736, "learning_rate": 9.405183619116594e-06, "loss": 0.1898, "step": 11611 }, { "epoch": 0.53, "grad_norm": 0.6521972590743307, "learning_rate": 9.40369834099049e-06, "loss": 0.3641, "step": 11612 }, { "epoch": 0.53, "grad_norm": 0.8606685124260647, "learning_rate": 9.402213076065937e-06, "loss": 0.4956, "step": 11613 }, { "epoch": 0.53, "grad_norm": 0.313928014452957, "learning_rate": 9.400727824375809e-06, "loss": 0.2166, "step": 11614 }, { "epoch": 0.53, "grad_norm": 0.5764658073112854, "learning_rate": 9.399242585952988e-06, "loss": 0.4243, "step": 11615 }, { "epoch": 0.53, "grad_norm": 0.3298526325049756, "learning_rate": 9.397757360830353e-06, "loss": 0.2105, "step": 11616 }, { "epoch": 0.53, "grad_norm": 0.5771189911617128, "learning_rate": 9.396272149040794e-06, "loss": 0.2352, "step": 11617 }, { "epoch": 0.53, "grad_norm": 0.49172257415506077, "learning_rate": 9.394786950617188e-06, "loss": 0.3418, "step": 11618 }, { "epoch": 0.53, "grad_norm": 0.5149435213148109, "learning_rate": 9.393301765592415e-06, "loss": 0.375, "step": 11619 }, { "epoch": 0.53, "grad_norm": 0.9021804416550401, "learning_rate": 9.391816593999357e-06, "loss": 0.4766, "step": 11620 }, { "epoch": 0.53, "grad_norm": 0.3268495065524992, "learning_rate": 9.39033143587089e-06, "loss": 0.2375, "step": 11621 }, { "epoch": 0.53, "grad_norm": 0.27504092355111404, "learning_rate": 9.388846291239902e-06, "loss": 0.2466, "step": 11622 }, { "epoch": 0.53, "grad_norm": 0.5137307119130171, "learning_rate": 9.387361160139267e-06, "loss": 0.284, "step": 11623 }, { "epoch": 0.53, "grad_norm": 0.31304615858142565, "learning_rate": 9.385876042601865e-06, "loss": 0.2163, "step": 11624 }, { "epoch": 0.53, "grad_norm": 1.484826023617579, "learning_rate": 9.384390938660572e-06, "loss": 0.794, "step": 11625 }, { "epoch": 0.53, "grad_norm": 0.34399974112785403, "learning_rate": 9.382905848348274e-06, "loss": 0.295, "step": 11626 }, { "epoch": 0.53, "grad_norm": 0.4001701551471713, "learning_rate": 9.381420771697845e-06, "loss": 0.2766, "step": 11627 }, { "epoch": 0.53, "grad_norm": 0.6618474873869882, "learning_rate": 9.379935708742164e-06, "loss": 0.4498, "step": 11628 }, { "epoch": 0.53, "grad_norm": 0.504418556713321, "learning_rate": 9.378450659514107e-06, "loss": 0.3443, "step": 11629 }, { "epoch": 0.53, "grad_norm": 0.25900473644456884, "learning_rate": 9.376965624046555e-06, "loss": 0.2027, "step": 11630 }, { "epoch": 0.53, "grad_norm": 0.3309850488210643, "learning_rate": 9.375480602372384e-06, "loss": 0.2393, "step": 11631 }, { "epoch": 0.53, "grad_norm": 0.6905169250464249, "learning_rate": 9.373995594524474e-06, "loss": 0.387, "step": 11632 }, { "epoch": 0.53, "grad_norm": 0.4302111475287135, "learning_rate": 9.372510600535693e-06, "loss": 0.2958, "step": 11633 }, { "epoch": 0.53, "grad_norm": 0.4074766723107201, "learning_rate": 9.371025620438922e-06, "loss": 0.2863, "step": 11634 }, { "epoch": 0.53, "grad_norm": 0.5041505438944777, "learning_rate": 9.369540654267039e-06, "loss": 0.2909, "step": 11635 }, { "epoch": 0.53, "grad_norm": 0.3913208284044223, "learning_rate": 9.368055702052919e-06, "loss": 0.2658, "step": 11636 }, { "epoch": 0.53, "grad_norm": 0.31003779035931117, "learning_rate": 9.366570763829439e-06, "loss": 0.1776, "step": 11637 }, { "epoch": 0.53, "grad_norm": 0.44953459287440634, "learning_rate": 9.365085839629466e-06, "loss": 0.3031, "step": 11638 }, { "epoch": 0.53, "grad_norm": 0.3896207379468654, "learning_rate": 9.363600929485885e-06, "loss": 0.2965, "step": 11639 }, { "epoch": 0.53, "grad_norm": 0.6087830701546962, "learning_rate": 9.362116033431566e-06, "loss": 0.3416, "step": 11640 }, { "epoch": 0.53, "grad_norm": 0.8222679475513474, "learning_rate": 9.360631151499382e-06, "loss": 0.4121, "step": 11641 }, { "epoch": 0.53, "grad_norm": 0.3004690274617724, "learning_rate": 9.359146283722206e-06, "loss": 0.2761, "step": 11642 }, { "epoch": 0.53, "grad_norm": 0.22471035452567548, "learning_rate": 9.357661430132916e-06, "loss": 0.1054, "step": 11643 }, { "epoch": 0.53, "grad_norm": 0.6200494504459275, "learning_rate": 9.356176590764382e-06, "loss": 0.4299, "step": 11644 }, { "epoch": 0.53, "grad_norm": 0.40263729573447943, "learning_rate": 9.35469176564948e-06, "loss": 0.3069, "step": 11645 }, { "epoch": 0.54, "grad_norm": 0.35644280595544175, "learning_rate": 9.353206954821075e-06, "loss": 0.3157, "step": 11646 }, { "epoch": 0.54, "grad_norm": 0.4422490387852672, "learning_rate": 9.35172215831205e-06, "loss": 0.2071, "step": 11647 }, { "epoch": 0.54, "grad_norm": 0.29127712832389857, "learning_rate": 9.350237376155269e-06, "loss": 0.2306, "step": 11648 }, { "epoch": 0.54, "grad_norm": 0.423903823747645, "learning_rate": 9.348752608383608e-06, "loss": 0.2455, "step": 11649 }, { "epoch": 0.54, "grad_norm": 0.3107040305171492, "learning_rate": 9.347267855029939e-06, "loss": 0.2396, "step": 11650 }, { "epoch": 0.54, "grad_norm": 0.4258717083161907, "learning_rate": 9.345783116127122e-06, "loss": 0.311, "step": 11651 }, { "epoch": 0.54, "grad_norm": 0.572067195715333, "learning_rate": 9.344298391708043e-06, "loss": 0.408, "step": 11652 }, { "epoch": 0.54, "grad_norm": 0.37066031201809835, "learning_rate": 9.342813681805564e-06, "loss": 0.2209, "step": 11653 }, { "epoch": 0.54, "grad_norm": 0.3168676582140073, "learning_rate": 9.341328986452558e-06, "loss": 0.264, "step": 11654 }, { "epoch": 0.54, "grad_norm": 0.29010760126124835, "learning_rate": 9.33984430568189e-06, "loss": 0.1817, "step": 11655 }, { "epoch": 0.54, "grad_norm": 0.5473738858833251, "learning_rate": 9.338359639526436e-06, "loss": 0.2607, "step": 11656 }, { "epoch": 0.54, "grad_norm": 0.3652733708898023, "learning_rate": 9.336874988019063e-06, "loss": 0.3039, "step": 11657 }, { "epoch": 0.54, "grad_norm": 0.38603016526081996, "learning_rate": 9.335390351192636e-06, "loss": 0.3444, "step": 11658 }, { "epoch": 0.54, "grad_norm": 1.7112014898914996, "learning_rate": 9.333905729080024e-06, "loss": 0.8273, "step": 11659 }, { "epoch": 0.54, "grad_norm": 0.3518696755351413, "learning_rate": 9.332421121714101e-06, "loss": 0.2064, "step": 11660 }, { "epoch": 0.54, "grad_norm": 0.28890595250732565, "learning_rate": 9.33093652912773e-06, "loss": 0.1632, "step": 11661 }, { "epoch": 0.54, "grad_norm": 0.41623436207766035, "learning_rate": 9.329451951353781e-06, "loss": 0.3274, "step": 11662 }, { "epoch": 0.54, "grad_norm": 0.39106761355405417, "learning_rate": 9.32796738842512e-06, "loss": 0.2206, "step": 11663 }, { "epoch": 0.54, "grad_norm": 1.363003718700666, "learning_rate": 9.326482840374606e-06, "loss": 0.87, "step": 11664 }, { "epoch": 0.54, "grad_norm": 0.43384043733285793, "learning_rate": 9.324998307235117e-06, "loss": 0.3083, "step": 11665 }, { "epoch": 0.54, "grad_norm": 0.31649052573784386, "learning_rate": 9.323513789039517e-06, "loss": 0.2354, "step": 11666 }, { "epoch": 0.54, "grad_norm": 0.3389514946724946, "learning_rate": 9.322029285820669e-06, "loss": 0.1909, "step": 11667 }, { "epoch": 0.54, "grad_norm": 0.5623254675463492, "learning_rate": 9.320544797611436e-06, "loss": 0.3962, "step": 11668 }, { "epoch": 0.54, "grad_norm": 0.41028675953833244, "learning_rate": 9.31906032444469e-06, "loss": 0.2331, "step": 11669 }, { "epoch": 0.54, "grad_norm": 0.41513545072717595, "learning_rate": 9.317575866353293e-06, "loss": 0.3514, "step": 11670 }, { "epoch": 0.54, "grad_norm": 0.9287869047694566, "learning_rate": 9.316091423370105e-06, "loss": 0.4506, "step": 11671 }, { "epoch": 0.54, "grad_norm": 0.3885442245023446, "learning_rate": 9.314606995527994e-06, "loss": 0.2682, "step": 11672 }, { "epoch": 0.54, "grad_norm": 0.2802318521153969, "learning_rate": 9.313122582859826e-06, "loss": 0.1939, "step": 11673 }, { "epoch": 0.54, "grad_norm": 0.47667283084442197, "learning_rate": 9.311638185398461e-06, "loss": 0.3297, "step": 11674 }, { "epoch": 0.54, "grad_norm": 0.35765135438946277, "learning_rate": 9.310153803176765e-06, "loss": 0.2575, "step": 11675 }, { "epoch": 0.54, "grad_norm": 1.0472080587585906, "learning_rate": 9.308669436227592e-06, "loss": 0.4618, "step": 11676 }, { "epoch": 0.54, "grad_norm": 0.33633738585763256, "learning_rate": 9.307185084583816e-06, "loss": 0.288, "step": 11677 }, { "epoch": 0.54, "grad_norm": 0.336502292606778, "learning_rate": 9.305700748278296e-06, "loss": 0.2521, "step": 11678 }, { "epoch": 0.54, "grad_norm": 0.4048952944452784, "learning_rate": 9.304216427343894e-06, "loss": 0.1611, "step": 11679 }, { "epoch": 0.54, "grad_norm": 0.44317456840682923, "learning_rate": 9.302732121813467e-06, "loss": 0.2925, "step": 11680 }, { "epoch": 0.54, "grad_norm": 0.3629013014908533, "learning_rate": 9.301247831719876e-06, "loss": 0.2761, "step": 11681 }, { "epoch": 0.54, "grad_norm": 0.3878242196618716, "learning_rate": 9.299763557095986e-06, "loss": 0.2757, "step": 11682 }, { "epoch": 0.54, "grad_norm": 1.065162077148229, "learning_rate": 9.298279297974659e-06, "loss": 0.5394, "step": 11683 }, { "epoch": 0.54, "grad_norm": 0.3264921792546553, "learning_rate": 9.29679505438875e-06, "loss": 0.2382, "step": 11684 }, { "epoch": 0.54, "grad_norm": 0.8193978850515296, "learning_rate": 9.29531082637112e-06, "loss": 0.4485, "step": 11685 }, { "epoch": 0.54, "grad_norm": 0.30081617178664427, "learning_rate": 9.293826613954629e-06, "loss": 0.2231, "step": 11686 }, { "epoch": 0.54, "grad_norm": 0.396169861059128, "learning_rate": 9.292342417172138e-06, "loss": 0.2979, "step": 11687 }, { "epoch": 0.54, "grad_norm": 0.5032826500275135, "learning_rate": 9.290858236056503e-06, "loss": 0.3435, "step": 11688 }, { "epoch": 0.54, "grad_norm": 0.31658410608483184, "learning_rate": 9.289374070640581e-06, "loss": 0.2508, "step": 11689 }, { "epoch": 0.54, "grad_norm": 0.38163147474502807, "learning_rate": 9.287889920957236e-06, "loss": 0.2762, "step": 11690 }, { "epoch": 0.54, "grad_norm": 0.7824481203912594, "learning_rate": 9.28640578703932e-06, "loss": 0.4238, "step": 11691 }, { "epoch": 0.54, "grad_norm": 0.39616453155576553, "learning_rate": 9.284921668919692e-06, "loss": 0.1707, "step": 11692 }, { "epoch": 0.54, "grad_norm": 0.3454923519403278, "learning_rate": 9.283437566631209e-06, "loss": 0.2626, "step": 11693 }, { "epoch": 0.54, "grad_norm": 0.36159524598091314, "learning_rate": 9.281953480206725e-06, "loss": 0.3256, "step": 11694 }, { "epoch": 0.54, "grad_norm": 0.3388010346526541, "learning_rate": 9.280469409679102e-06, "loss": 0.1039, "step": 11695 }, { "epoch": 0.54, "grad_norm": 0.32889107040152904, "learning_rate": 9.278985355081193e-06, "loss": 0.2684, "step": 11696 }, { "epoch": 0.54, "grad_norm": 0.5037149651234222, "learning_rate": 9.277501316445854e-06, "loss": 0.3698, "step": 11697 }, { "epoch": 0.54, "grad_norm": 0.5077007471135834, "learning_rate": 9.276017293805936e-06, "loss": 0.3993, "step": 11698 }, { "epoch": 0.54, "grad_norm": 0.2620827818208293, "learning_rate": 9.2745332871943e-06, "loss": 0.1654, "step": 11699 }, { "epoch": 0.54, "grad_norm": 1.3831590743269435, "learning_rate": 9.273049296643798e-06, "loss": 0.8403, "step": 11700 }, { "epoch": 0.54, "grad_norm": 0.28698881801479265, "learning_rate": 9.271565322187283e-06, "loss": 0.2469, "step": 11701 }, { "epoch": 0.54, "grad_norm": 0.3290629204536742, "learning_rate": 9.27008136385761e-06, "loss": 0.2126, "step": 11702 }, { "epoch": 0.54, "grad_norm": 0.6519353019369623, "learning_rate": 9.268597421687631e-06, "loss": 0.4266, "step": 11703 }, { "epoch": 0.54, "grad_norm": 1.2492380899637494, "learning_rate": 9.267113495710203e-06, "loss": 0.6571, "step": 11704 }, { "epoch": 0.54, "grad_norm": 0.30799574502210647, "learning_rate": 9.265629585958173e-06, "loss": 0.201, "step": 11705 }, { "epoch": 0.54, "grad_norm": 0.37735684909753636, "learning_rate": 9.264145692464394e-06, "loss": 0.315, "step": 11706 }, { "epoch": 0.54, "grad_norm": 0.35323059992538514, "learning_rate": 9.262661815261726e-06, "loss": 0.2292, "step": 11707 }, { "epoch": 0.54, "grad_norm": 0.30951337940811935, "learning_rate": 9.261177954383014e-06, "loss": 0.1934, "step": 11708 }, { "epoch": 0.54, "grad_norm": 0.39216085184940486, "learning_rate": 9.259694109861107e-06, "loss": 0.3177, "step": 11709 }, { "epoch": 0.54, "grad_norm": 1.3660324486377504, "learning_rate": 9.258210281728862e-06, "loss": 0.5812, "step": 11710 }, { "epoch": 0.54, "grad_norm": 0.44098786757010494, "learning_rate": 9.256726470019121e-06, "loss": 0.2762, "step": 11711 }, { "epoch": 0.54, "grad_norm": 0.22001286754437122, "learning_rate": 9.255242674764742e-06, "loss": 0.1498, "step": 11712 }, { "epoch": 0.54, "grad_norm": 0.37089302180149, "learning_rate": 9.253758895998575e-06, "loss": 0.3131, "step": 11713 }, { "epoch": 0.54, "grad_norm": 0.4385983391000386, "learning_rate": 9.252275133753466e-06, "loss": 0.2794, "step": 11714 }, { "epoch": 0.54, "grad_norm": 0.6514131600036096, "learning_rate": 9.250791388062263e-06, "loss": 0.2822, "step": 11715 }, { "epoch": 0.54, "grad_norm": 1.0679832644691192, "learning_rate": 9.249307658957817e-06, "loss": 0.6274, "step": 11716 }, { "epoch": 0.54, "grad_norm": 0.28284915146840306, "learning_rate": 9.247823946472978e-06, "loss": 0.2374, "step": 11717 }, { "epoch": 0.54, "grad_norm": 0.47598752314670184, "learning_rate": 9.24634025064059e-06, "loss": 0.2769, "step": 11718 }, { "epoch": 0.54, "grad_norm": 0.5375053465251965, "learning_rate": 9.244856571493502e-06, "loss": 0.2979, "step": 11719 }, { "epoch": 0.54, "grad_norm": 0.3669957796236485, "learning_rate": 9.243372909064564e-06, "loss": 0.2588, "step": 11720 }, { "epoch": 0.54, "grad_norm": 0.28852017958288007, "learning_rate": 9.241889263386618e-06, "loss": 0.1947, "step": 11721 }, { "epoch": 0.54, "grad_norm": 1.0661933004402595, "learning_rate": 9.240405634492515e-06, "loss": 0.6298, "step": 11722 }, { "epoch": 0.54, "grad_norm": 0.6273475437122229, "learning_rate": 9.238922022415095e-06, "loss": 0.3381, "step": 11723 }, { "epoch": 0.54, "grad_norm": 0.40273542611871793, "learning_rate": 9.23743842718721e-06, "loss": 0.344, "step": 11724 }, { "epoch": 0.54, "grad_norm": 0.38936299256955526, "learning_rate": 9.235954848841708e-06, "loss": 0.287, "step": 11725 }, { "epoch": 0.54, "grad_norm": 0.6402911988383123, "learning_rate": 9.234471287411427e-06, "loss": 0.3524, "step": 11726 }, { "epoch": 0.54, "grad_norm": 0.25480388390569253, "learning_rate": 9.232987742929214e-06, "loss": 0.1834, "step": 11727 }, { "epoch": 0.54, "grad_norm": 1.0472047430212839, "learning_rate": 9.231504215427906e-06, "loss": 0.3271, "step": 11728 }, { "epoch": 0.54, "grad_norm": 0.3670156199905095, "learning_rate": 9.230020704940361e-06, "loss": 0.266, "step": 11729 }, { "epoch": 0.54, "grad_norm": 0.38175122821227114, "learning_rate": 9.228537211499415e-06, "loss": 0.3192, "step": 11730 }, { "epoch": 0.54, "grad_norm": 0.665170701756217, "learning_rate": 9.227053735137911e-06, "loss": 0.3251, "step": 11731 }, { "epoch": 0.54, "grad_norm": 0.2952840968826415, "learning_rate": 9.225570275888692e-06, "loss": 0.211, "step": 11732 }, { "epoch": 0.54, "grad_norm": 0.30784671402876257, "learning_rate": 9.2240868337846e-06, "loss": 0.2654, "step": 11733 }, { "epoch": 0.54, "grad_norm": 1.5845608661243697, "learning_rate": 9.222603408858479e-06, "loss": 0.6848, "step": 11734 }, { "epoch": 0.54, "grad_norm": 0.44170935422139157, "learning_rate": 9.22112000114317e-06, "loss": 0.2225, "step": 11735 }, { "epoch": 0.54, "grad_norm": 0.5822223480235589, "learning_rate": 9.21963661067151e-06, "loss": 0.3921, "step": 11736 }, { "epoch": 0.54, "grad_norm": 0.38097048647408255, "learning_rate": 9.218153237476347e-06, "loss": 0.3226, "step": 11737 }, { "epoch": 0.54, "grad_norm": 0.35596313530092244, "learning_rate": 9.216669881590515e-06, "loss": 0.2068, "step": 11738 }, { "epoch": 0.54, "grad_norm": 0.27248729064045973, "learning_rate": 9.215186543046859e-06, "loss": 0.1677, "step": 11739 }, { "epoch": 0.54, "grad_norm": 1.1048807784267523, "learning_rate": 9.213703221878217e-06, "loss": 0.5724, "step": 11740 }, { "epoch": 0.54, "grad_norm": 0.3201440479629725, "learning_rate": 9.212219918117423e-06, "loss": 0.2218, "step": 11741 }, { "epoch": 0.54, "grad_norm": 0.3877990882605535, "learning_rate": 9.210736631797323e-06, "loss": 0.3227, "step": 11742 }, { "epoch": 0.54, "grad_norm": 0.8665899517857275, "learning_rate": 9.209253362950756e-06, "loss": 0.4682, "step": 11743 }, { "epoch": 0.54, "grad_norm": 0.20263733383781116, "learning_rate": 9.207770111610558e-06, "loss": 0.0699, "step": 11744 }, { "epoch": 0.54, "grad_norm": 0.27644202449833455, "learning_rate": 9.206286877809561e-06, "loss": 0.2435, "step": 11745 }, { "epoch": 0.54, "grad_norm": 1.2389493161484852, "learning_rate": 9.20480366158061e-06, "loss": 0.5767, "step": 11746 }, { "epoch": 0.54, "grad_norm": 0.7697942041233611, "learning_rate": 9.203320462956542e-06, "loss": 0.3713, "step": 11747 }, { "epoch": 0.54, "grad_norm": 0.40568154309710064, "learning_rate": 9.201837281970189e-06, "loss": 0.2619, "step": 11748 }, { "epoch": 0.54, "grad_norm": 0.4170892453425178, "learning_rate": 9.200354118654388e-06, "loss": 0.3199, "step": 11749 }, { "epoch": 0.54, "grad_norm": 0.5786696817018457, "learning_rate": 9.198870973041977e-06, "loss": 0.3088, "step": 11750 }, { "epoch": 0.54, "grad_norm": 0.24595719680342173, "learning_rate": 9.197387845165792e-06, "loss": 0.1654, "step": 11751 }, { "epoch": 0.54, "grad_norm": 1.0613192706043042, "learning_rate": 9.195904735058667e-06, "loss": 0.564, "step": 11752 }, { "epoch": 0.54, "grad_norm": 0.3275506925455265, "learning_rate": 9.19442164275343e-06, "loss": 0.2624, "step": 11753 }, { "epoch": 0.54, "grad_norm": 0.41732927715402635, "learning_rate": 9.19293856828293e-06, "loss": 0.2603, "step": 11754 }, { "epoch": 0.54, "grad_norm": 0.853511541896152, "learning_rate": 9.191455511679988e-06, "loss": 0.5283, "step": 11755 }, { "epoch": 0.54, "grad_norm": 0.5844273255196977, "learning_rate": 9.189972472977445e-06, "loss": 0.2569, "step": 11756 }, { "epoch": 0.54, "grad_norm": 0.28738762893968806, "learning_rate": 9.188489452208127e-06, "loss": 0.2362, "step": 11757 }, { "epoch": 0.54, "grad_norm": 0.3241662313616925, "learning_rate": 9.187006449404867e-06, "loss": 0.2219, "step": 11758 }, { "epoch": 0.54, "grad_norm": 0.580633524159367, "learning_rate": 9.185523464600506e-06, "loss": 0.3758, "step": 11759 }, { "epoch": 0.54, "grad_norm": 0.3879732705561156, "learning_rate": 9.18404049782787e-06, "loss": 0.2849, "step": 11760 }, { "epoch": 0.54, "grad_norm": 0.3760149772286431, "learning_rate": 9.18255754911979e-06, "loss": 0.2715, "step": 11761 }, { "epoch": 0.54, "grad_norm": 0.8634905954045263, "learning_rate": 9.181074618509097e-06, "loss": 0.4201, "step": 11762 }, { "epoch": 0.54, "grad_norm": 0.2857079586336548, "learning_rate": 9.179591706028626e-06, "loss": 0.226, "step": 11763 }, { "epoch": 0.54, "grad_norm": 0.3111934446404846, "learning_rate": 9.178108811711202e-06, "loss": 0.2144, "step": 11764 }, { "epoch": 0.54, "grad_norm": 0.39948621928221306, "learning_rate": 9.176625935589657e-06, "loss": 0.3213, "step": 11765 }, { "epoch": 0.54, "grad_norm": 0.3966612751084353, "learning_rate": 9.175143077696818e-06, "loss": 0.2983, "step": 11766 }, { "epoch": 0.54, "grad_norm": 0.8917503790897656, "learning_rate": 9.173660238065519e-06, "loss": 0.3729, "step": 11767 }, { "epoch": 0.54, "grad_norm": 0.448262125383949, "learning_rate": 9.172177416728584e-06, "loss": 0.2974, "step": 11768 }, { "epoch": 0.54, "grad_norm": 0.34330871468696605, "learning_rate": 9.170694613718845e-06, "loss": 0.2699, "step": 11769 }, { "epoch": 0.54, "grad_norm": 0.2577080291206743, "learning_rate": 9.169211829069129e-06, "loss": 0.1494, "step": 11770 }, { "epoch": 0.54, "grad_norm": 0.5871604841866879, "learning_rate": 9.167729062812256e-06, "loss": 0.3608, "step": 11771 }, { "epoch": 0.54, "grad_norm": 0.4158101012583032, "learning_rate": 9.166246314981066e-06, "loss": 0.3119, "step": 11772 }, { "epoch": 0.54, "grad_norm": 0.38083852072478935, "learning_rate": 9.164763585608379e-06, "loss": 0.3059, "step": 11773 }, { "epoch": 0.54, "grad_norm": 0.35566389357598654, "learning_rate": 9.16328087472702e-06, "loss": 0.0687, "step": 11774 }, { "epoch": 0.54, "grad_norm": 0.4229299559826986, "learning_rate": 9.161798182369809e-06, "loss": 0.3093, "step": 11775 }, { "epoch": 0.54, "grad_norm": 0.3763830717552729, "learning_rate": 9.160315508569587e-06, "loss": 0.2907, "step": 11776 }, { "epoch": 0.54, "grad_norm": 0.2850816016617573, "learning_rate": 9.158832853359167e-06, "loss": 0.1948, "step": 11777 }, { "epoch": 0.54, "grad_norm": 0.40507525725455573, "learning_rate": 9.157350216771379e-06, "loss": 0.263, "step": 11778 }, { "epoch": 0.54, "grad_norm": 1.2206266241756674, "learning_rate": 9.155867598839042e-06, "loss": 0.5766, "step": 11779 }, { "epoch": 0.54, "grad_norm": 0.3806652774536837, "learning_rate": 9.154384999594985e-06, "loss": 0.2507, "step": 11780 }, { "epoch": 0.54, "grad_norm": 0.3524388530149945, "learning_rate": 9.15290241907203e-06, "loss": 0.2451, "step": 11781 }, { "epoch": 0.54, "grad_norm": 0.7180814612700875, "learning_rate": 9.151419857302997e-06, "loss": 0.385, "step": 11782 }, { "epoch": 0.54, "grad_norm": 0.26947866846550655, "learning_rate": 9.14993731432071e-06, "loss": 0.1221, "step": 11783 }, { "epoch": 0.54, "grad_norm": 0.35551347087258284, "learning_rate": 9.148454790157993e-06, "loss": 0.2778, "step": 11784 }, { "epoch": 0.54, "grad_norm": 0.30523006271363934, "learning_rate": 9.146972284847665e-06, "loss": 0.2543, "step": 11785 }, { "epoch": 0.54, "grad_norm": 0.9596008431398434, "learning_rate": 9.14548979842255e-06, "loss": 0.5304, "step": 11786 }, { "epoch": 0.54, "grad_norm": 0.45840696915878626, "learning_rate": 9.144007330915469e-06, "loss": 0.2286, "step": 11787 }, { "epoch": 0.54, "grad_norm": 0.606249565030041, "learning_rate": 9.142524882359234e-06, "loss": 0.2928, "step": 11788 }, { "epoch": 0.54, "grad_norm": 0.5354836941467207, "learning_rate": 9.141042452786677e-06, "loss": 0.3854, "step": 11789 }, { "epoch": 0.54, "grad_norm": 0.32076393707763484, "learning_rate": 9.13956004223061e-06, "loss": 0.1904, "step": 11790 }, { "epoch": 0.54, "grad_norm": 0.2771889320698763, "learning_rate": 9.13807765072386e-06, "loss": 0.1656, "step": 11791 }, { "epoch": 0.54, "grad_norm": 0.374554057238784, "learning_rate": 9.136595278299232e-06, "loss": 0.313, "step": 11792 }, { "epoch": 0.54, "grad_norm": 0.36102194161686546, "learning_rate": 9.135112924989555e-06, "loss": 0.2315, "step": 11793 }, { "epoch": 0.54, "grad_norm": 0.7740935792251299, "learning_rate": 9.133630590827646e-06, "loss": 0.4283, "step": 11794 }, { "epoch": 0.54, "grad_norm": 0.8061229661665564, "learning_rate": 9.132148275846322e-06, "loss": 0.5376, "step": 11795 }, { "epoch": 0.54, "grad_norm": 0.3329793498380328, "learning_rate": 9.130665980078394e-06, "loss": 0.2167, "step": 11796 }, { "epoch": 0.54, "grad_norm": 0.23055121123074543, "learning_rate": 9.129183703556687e-06, "loss": 0.2039, "step": 11797 }, { "epoch": 0.54, "grad_norm": 0.9337471676199127, "learning_rate": 9.127701446314013e-06, "loss": 0.4027, "step": 11798 }, { "epoch": 0.54, "grad_norm": 0.3465239999456, "learning_rate": 9.126219208383188e-06, "loss": 0.2668, "step": 11799 }, { "epoch": 0.54, "grad_norm": 0.35354192922023686, "learning_rate": 9.124736989797028e-06, "loss": 0.2543, "step": 11800 }, { "epoch": 0.54, "grad_norm": 1.0951520511256436, "learning_rate": 9.123254790588346e-06, "loss": 0.665, "step": 11801 }, { "epoch": 0.54, "grad_norm": 0.3530882370436018, "learning_rate": 9.121772610789959e-06, "loss": 0.2531, "step": 11802 }, { "epoch": 0.54, "grad_norm": 0.2335556021716112, "learning_rate": 9.120290450434678e-06, "loss": 0.1077, "step": 11803 }, { "epoch": 0.54, "grad_norm": 0.49430505471762104, "learning_rate": 9.118808309555323e-06, "loss": 0.3072, "step": 11804 }, { "epoch": 0.54, "grad_norm": 0.3925051521775017, "learning_rate": 9.117326188184696e-06, "loss": 0.2909, "step": 11805 }, { "epoch": 0.54, "grad_norm": 0.9568778097199951, "learning_rate": 9.11584408635562e-06, "loss": 0.327, "step": 11806 }, { "epoch": 0.54, "grad_norm": 1.153122845252285, "learning_rate": 9.114362004100905e-06, "loss": 0.7138, "step": 11807 }, { "epoch": 0.54, "grad_norm": 0.3591160629562497, "learning_rate": 9.112879941453361e-06, "loss": 0.2587, "step": 11808 }, { "epoch": 0.54, "grad_norm": 0.24837658650669117, "learning_rate": 9.111397898445798e-06, "loss": 0.1786, "step": 11809 }, { "epoch": 0.54, "grad_norm": 1.0046858414902224, "learning_rate": 9.109915875111032e-06, "loss": 0.4097, "step": 11810 }, { "epoch": 0.54, "grad_norm": 0.440712195311647, "learning_rate": 9.10843387148187e-06, "loss": 0.29, "step": 11811 }, { "epoch": 0.54, "grad_norm": 0.38741481103051595, "learning_rate": 9.106951887591123e-06, "loss": 0.3299, "step": 11812 }, { "epoch": 0.54, "grad_norm": 0.47143590848608313, "learning_rate": 9.105469923471599e-06, "loss": 0.2797, "step": 11813 }, { "epoch": 0.54, "grad_norm": 0.3396606957200545, "learning_rate": 9.10398797915611e-06, "loss": 0.2624, "step": 11814 }, { "epoch": 0.54, "grad_norm": 0.7526467285562537, "learning_rate": 9.102506054677462e-06, "loss": 0.4395, "step": 11815 }, { "epoch": 0.54, "grad_norm": 0.3541252807405396, "learning_rate": 9.101024150068467e-06, "loss": 0.2693, "step": 11816 }, { "epoch": 0.54, "grad_norm": 0.2787188827962047, "learning_rate": 9.09954226536193e-06, "loss": 0.2285, "step": 11817 }, { "epoch": 0.54, "grad_norm": 0.39751221059998965, "learning_rate": 9.098060400590657e-06, "loss": 0.2689, "step": 11818 }, { "epoch": 0.54, "grad_norm": 0.8949340531125743, "learning_rate": 9.096578555787462e-06, "loss": 0.5405, "step": 11819 }, { "epoch": 0.54, "grad_norm": 0.32291750460531427, "learning_rate": 9.095096730985145e-06, "loss": 0.237, "step": 11820 }, { "epoch": 0.54, "grad_norm": 0.5698087836804717, "learning_rate": 9.093614926216515e-06, "loss": 0.3993, "step": 11821 }, { "epoch": 0.54, "grad_norm": 0.5012497983219936, "learning_rate": 9.092133141514371e-06, "loss": 0.2339, "step": 11822 }, { "epoch": 0.54, "grad_norm": 0.2792818336966272, "learning_rate": 9.090651376911532e-06, "loss": 0.2037, "step": 11823 }, { "epoch": 0.54, "grad_norm": 0.5410362717887204, "learning_rate": 9.089169632440792e-06, "loss": 0.3914, "step": 11824 }, { "epoch": 0.54, "grad_norm": 0.5527244329819719, "learning_rate": 9.087687908134959e-06, "loss": 0.3923, "step": 11825 }, { "epoch": 0.54, "grad_norm": 0.3577616434257504, "learning_rate": 9.086206204026834e-06, "loss": 0.1946, "step": 11826 }, { "epoch": 0.54, "grad_norm": 0.6785985284342616, "learning_rate": 9.084724520149226e-06, "loss": 0.4166, "step": 11827 }, { "epoch": 0.54, "grad_norm": 0.3597716910895084, "learning_rate": 9.083242856534935e-06, "loss": 0.3222, "step": 11828 }, { "epoch": 0.54, "grad_norm": 0.29897549704083254, "learning_rate": 9.081761213216763e-06, "loss": 0.125, "step": 11829 }, { "epoch": 0.54, "grad_norm": 0.3563106573655419, "learning_rate": 9.080279590227514e-06, "loss": 0.2363, "step": 11830 }, { "epoch": 0.54, "grad_norm": 0.5196085394329052, "learning_rate": 9.078797987599983e-06, "loss": 0.4066, "step": 11831 }, { "epoch": 0.54, "grad_norm": 0.3182692762594009, "learning_rate": 9.07731640536698e-06, "loss": 0.1989, "step": 11832 }, { "epoch": 0.54, "grad_norm": 0.6100782896133956, "learning_rate": 9.075834843561305e-06, "loss": 0.3486, "step": 11833 }, { "epoch": 0.54, "grad_norm": 1.3852521995572378, "learning_rate": 9.074353302215755e-06, "loss": 0.6042, "step": 11834 }, { "epoch": 0.54, "grad_norm": 0.2790419103783699, "learning_rate": 9.072871781363125e-06, "loss": 0.1645, "step": 11835 }, { "epoch": 0.54, "grad_norm": 0.30682098628181514, "learning_rate": 9.071390281036225e-06, "loss": 0.2557, "step": 11836 }, { "epoch": 0.54, "grad_norm": 1.5603228155869955, "learning_rate": 9.069908801267853e-06, "loss": 0.8318, "step": 11837 }, { "epoch": 0.54, "grad_norm": 0.4194742067223031, "learning_rate": 9.0684273420908e-06, "loss": 0.3008, "step": 11838 }, { "epoch": 0.54, "grad_norm": 0.5475818016228682, "learning_rate": 9.066945903537862e-06, "loss": 0.2909, "step": 11839 }, { "epoch": 0.54, "grad_norm": 0.37007775103491114, "learning_rate": 9.065464485641849e-06, "loss": 0.3128, "step": 11840 }, { "epoch": 0.54, "grad_norm": 0.2918843392682149, "learning_rate": 9.06398308843555e-06, "loss": 0.227, "step": 11841 }, { "epoch": 0.54, "grad_norm": 0.300116940729314, "learning_rate": 9.062501711951766e-06, "loss": 0.1426, "step": 11842 }, { "epoch": 0.54, "grad_norm": 0.4535662949441457, "learning_rate": 9.061020356223285e-06, "loss": 0.3749, "step": 11843 }, { "epoch": 0.54, "grad_norm": 0.36144442444833325, "learning_rate": 9.059539021282913e-06, "loss": 0.2937, "step": 11844 }, { "epoch": 0.54, "grad_norm": 0.43987568205387334, "learning_rate": 9.05805770716344e-06, "loss": 0.2907, "step": 11845 }, { "epoch": 0.54, "grad_norm": 1.2693595244816196, "learning_rate": 9.05657641389766e-06, "loss": 0.5362, "step": 11846 }, { "epoch": 0.54, "grad_norm": 0.33835908791600655, "learning_rate": 9.05509514151837e-06, "loss": 0.194, "step": 11847 }, { "epoch": 0.54, "grad_norm": 0.21939623798901306, "learning_rate": 9.053613890058362e-06, "loss": 0.2079, "step": 11848 }, { "epoch": 0.54, "grad_norm": 0.6067823500047931, "learning_rate": 9.052132659550431e-06, "loss": 0.4281, "step": 11849 }, { "epoch": 0.54, "grad_norm": 0.598036160362092, "learning_rate": 9.05065145002737e-06, "loss": 0.337, "step": 11850 }, { "epoch": 0.54, "grad_norm": 0.4178477164980959, "learning_rate": 9.04917026152197e-06, "loss": 0.3061, "step": 11851 }, { "epoch": 0.54, "grad_norm": 0.3538169125567631, "learning_rate": 9.04768909406702e-06, "loss": 0.2505, "step": 11852 }, { "epoch": 0.54, "grad_norm": 0.5843021237082427, "learning_rate": 9.046207947695321e-06, "loss": 0.2633, "step": 11853 }, { "epoch": 0.54, "grad_norm": 0.24799342040782396, "learning_rate": 9.044726822439658e-06, "loss": 0.2028, "step": 11854 }, { "epoch": 0.54, "grad_norm": 0.5474643161308999, "learning_rate": 9.043245718332821e-06, "loss": 0.2846, "step": 11855 }, { "epoch": 0.54, "grad_norm": 0.3481956129383209, "learning_rate": 9.041764635407602e-06, "loss": 0.2926, "step": 11856 }, { "epoch": 0.54, "grad_norm": 0.4957189851958987, "learning_rate": 9.040283573696791e-06, "loss": 0.3414, "step": 11857 }, { "epoch": 0.54, "grad_norm": 0.9560524433233588, "learning_rate": 9.038802533233178e-06, "loss": 0.3027, "step": 11858 }, { "epoch": 0.54, "grad_norm": 0.31861216303255263, "learning_rate": 9.037321514049549e-06, "loss": 0.2266, "step": 11859 }, { "epoch": 0.54, "grad_norm": 0.32814070314485694, "learning_rate": 9.035840516178695e-06, "loss": 0.2838, "step": 11860 }, { "epoch": 0.54, "grad_norm": 0.50559648032967, "learning_rate": 9.034359539653399e-06, "loss": 0.3059, "step": 11861 }, { "epoch": 0.54, "grad_norm": 0.40011584883822304, "learning_rate": 9.032878584506455e-06, "loss": 0.2205, "step": 11862 }, { "epoch": 0.54, "grad_norm": 0.5550201101669583, "learning_rate": 9.031397650770648e-06, "loss": 0.3756, "step": 11863 }, { "epoch": 0.55, "grad_norm": 0.38205764702113265, "learning_rate": 9.029916738478762e-06, "loss": 0.3064, "step": 11864 }, { "epoch": 0.55, "grad_norm": 0.6132049096025071, "learning_rate": 9.02843584766358e-06, "loss": 0.1673, "step": 11865 }, { "epoch": 0.55, "grad_norm": 0.29098658353015133, "learning_rate": 9.0269549783579e-06, "loss": 0.2281, "step": 11866 }, { "epoch": 0.55, "grad_norm": 0.4622545108670501, "learning_rate": 9.025474130594495e-06, "loss": 0.3512, "step": 11867 }, { "epoch": 0.55, "grad_norm": 0.35739991410872585, "learning_rate": 9.023993304406156e-06, "loss": 0.2129, "step": 11868 }, { "epoch": 0.55, "grad_norm": 0.3094439430048366, "learning_rate": 9.022512499825658e-06, "loss": 0.2452, "step": 11869 }, { "epoch": 0.55, "grad_norm": 1.1004926512280166, "learning_rate": 9.021031716885797e-06, "loss": 0.5324, "step": 11870 }, { "epoch": 0.55, "grad_norm": 0.3323659043326884, "learning_rate": 9.01955095561935e-06, "loss": 0.2108, "step": 11871 }, { "epoch": 0.55, "grad_norm": 0.2651446781288627, "learning_rate": 9.018070216059098e-06, "loss": 0.256, "step": 11872 }, { "epoch": 0.55, "grad_norm": 0.7384624686624097, "learning_rate": 9.016589498237825e-06, "loss": 0.4374, "step": 11873 }, { "epoch": 0.55, "grad_norm": 0.8289305177604985, "learning_rate": 9.015108802188314e-06, "loss": 0.5241, "step": 11874 }, { "epoch": 0.55, "grad_norm": 0.2843442724573891, "learning_rate": 9.013628127943345e-06, "loss": 0.1742, "step": 11875 }, { "epoch": 0.55, "grad_norm": 0.39186448912650274, "learning_rate": 9.012147475535698e-06, "loss": 0.2955, "step": 11876 }, { "epoch": 0.55, "grad_norm": 0.6189472330096473, "learning_rate": 9.010666844998154e-06, "loss": 0.3162, "step": 11877 }, { "epoch": 0.55, "grad_norm": 0.44327473998813544, "learning_rate": 9.00918623636349e-06, "loss": 0.2531, "step": 11878 }, { "epoch": 0.55, "grad_norm": 0.5206410581033497, "learning_rate": 9.007705649664491e-06, "loss": 0.3643, "step": 11879 }, { "epoch": 0.55, "grad_norm": 0.4267329918750843, "learning_rate": 9.006225084933932e-06, "loss": 0.312, "step": 11880 }, { "epoch": 0.55, "grad_norm": 0.2917122436907841, "learning_rate": 9.004744542204593e-06, "loss": 0.1882, "step": 11881 }, { "epoch": 0.55, "grad_norm": 0.5611276748425978, "learning_rate": 9.003264021509243e-06, "loss": 0.2842, "step": 11882 }, { "epoch": 0.55, "grad_norm": 0.3987042029575161, "learning_rate": 9.001783522880675e-06, "loss": 0.3232, "step": 11883 }, { "epoch": 0.55, "grad_norm": 0.33095713034068386, "learning_rate": 9.000303046351658e-06, "loss": 0.2285, "step": 11884 }, { "epoch": 0.55, "grad_norm": 0.8358069353286274, "learning_rate": 8.998822591954967e-06, "loss": 0.4945, "step": 11885 }, { "epoch": 0.55, "grad_norm": 1.0681155620055311, "learning_rate": 8.997342159723372e-06, "loss": 0.6454, "step": 11886 }, { "epoch": 0.55, "grad_norm": 0.2500990833033169, "learning_rate": 8.995861749689662e-06, "loss": 0.1979, "step": 11887 }, { "epoch": 0.55, "grad_norm": 0.25660745447452915, "learning_rate": 8.994381361886603e-06, "loss": 0.1879, "step": 11888 }, { "epoch": 0.55, "grad_norm": 0.7077093096196782, "learning_rate": 8.992900996346973e-06, "loss": 0.3951, "step": 11889 }, { "epoch": 0.55, "grad_norm": 0.3565319964044789, "learning_rate": 8.991420653103546e-06, "loss": 0.2909, "step": 11890 }, { "epoch": 0.55, "grad_norm": 0.44688066210211536, "learning_rate": 8.98994033218909e-06, "loss": 0.2719, "step": 11891 }, { "epoch": 0.55, "grad_norm": 0.44271720189703945, "learning_rate": 8.98846003363638e-06, "loss": 0.3167, "step": 11892 }, { "epoch": 0.55, "grad_norm": 0.30369461949143023, "learning_rate": 8.986979757478195e-06, "loss": 0.2227, "step": 11893 }, { "epoch": 0.55, "grad_norm": 0.42038434180472234, "learning_rate": 8.985499503747298e-06, "loss": 0.1844, "step": 11894 }, { "epoch": 0.55, "grad_norm": 0.3500522380221386, "learning_rate": 8.984019272476465e-06, "loss": 0.3086, "step": 11895 }, { "epoch": 0.55, "grad_norm": 0.39151272476517607, "learning_rate": 8.982539063698468e-06, "loss": 0.2803, "step": 11896 }, { "epoch": 0.55, "grad_norm": 0.6653153442715919, "learning_rate": 8.981058877446073e-06, "loss": 0.3167, "step": 11897 }, { "epoch": 0.55, "grad_norm": 0.6027038682025772, "learning_rate": 8.979578713752055e-06, "loss": 0.4087, "step": 11898 }, { "epoch": 0.55, "grad_norm": 0.40504333571494705, "learning_rate": 8.978098572649176e-06, "loss": 0.3019, "step": 11899 }, { "epoch": 0.55, "grad_norm": 0.2592786579362508, "learning_rate": 8.976618454170212e-06, "loss": 0.2296, "step": 11900 }, { "epoch": 0.55, "grad_norm": 0.6916368474250763, "learning_rate": 8.975138358347931e-06, "loss": 0.3107, "step": 11901 }, { "epoch": 0.55, "grad_norm": 0.362077249581779, "learning_rate": 8.973658285215101e-06, "loss": 0.2634, "step": 11902 }, { "epoch": 0.55, "grad_norm": 0.3821333947443045, "learning_rate": 8.97217823480448e-06, "loss": 0.3107, "step": 11903 }, { "epoch": 0.55, "grad_norm": 0.9189977796612226, "learning_rate": 8.970698207148848e-06, "loss": 0.5339, "step": 11904 }, { "epoch": 0.55, "grad_norm": 0.3655146397281494, "learning_rate": 8.969218202280964e-06, "loss": 0.2548, "step": 11905 }, { "epoch": 0.55, "grad_norm": 0.3911935267797988, "learning_rate": 8.967738220233597e-06, "loss": 0.2042, "step": 11906 }, { "epoch": 0.55, "grad_norm": 0.37758109943956747, "learning_rate": 8.96625826103951e-06, "loss": 0.2734, "step": 11907 }, { "epoch": 0.55, "grad_norm": 0.35640658064075487, "learning_rate": 8.964778324731467e-06, "loss": 0.2438, "step": 11908 }, { "epoch": 0.55, "grad_norm": 1.6368110205812663, "learning_rate": 8.963298411342236e-06, "loss": 0.7069, "step": 11909 }, { "epoch": 0.55, "grad_norm": 0.6983923990929997, "learning_rate": 8.96181852090458e-06, "loss": 0.2931, "step": 11910 }, { "epoch": 0.55, "grad_norm": 0.2793484679495848, "learning_rate": 8.96033865345126e-06, "loss": 0.2654, "step": 11911 }, { "epoch": 0.55, "grad_norm": 0.6899307236122483, "learning_rate": 8.958858809015036e-06, "loss": 0.3966, "step": 11912 }, { "epoch": 0.55, "grad_norm": 0.34828482576864805, "learning_rate": 8.957378987628682e-06, "loss": 0.2154, "step": 11913 }, { "epoch": 0.55, "grad_norm": 0.32251749757909354, "learning_rate": 8.95589918932495e-06, "loss": 0.1842, "step": 11914 }, { "epoch": 0.55, "grad_norm": 0.39425689691051585, "learning_rate": 8.954419414136602e-06, "loss": 0.3082, "step": 11915 }, { "epoch": 0.55, "grad_norm": 1.197984969771984, "learning_rate": 8.952939662096397e-06, "loss": 0.7476, "step": 11916 }, { "epoch": 0.55, "grad_norm": 0.3085798553802743, "learning_rate": 8.951459933237103e-06, "loss": 0.1882, "step": 11917 }, { "epoch": 0.55, "grad_norm": 0.7431781619259294, "learning_rate": 8.949980227591476e-06, "loss": 0.4028, "step": 11918 }, { "epoch": 0.55, "grad_norm": 0.2949509626744957, "learning_rate": 8.948500545192274e-06, "loss": 0.2528, "step": 11919 }, { "epoch": 0.55, "grad_norm": 0.24006285337238, "learning_rate": 8.947020886072258e-06, "loss": 0.1675, "step": 11920 }, { "epoch": 0.55, "grad_norm": 1.2160646409147715, "learning_rate": 8.945541250264182e-06, "loss": 0.6558, "step": 11921 }, { "epoch": 0.55, "grad_norm": 1.1450981483073202, "learning_rate": 8.944061637800808e-06, "loss": 0.6818, "step": 11922 }, { "epoch": 0.55, "grad_norm": 0.30884588531532053, "learning_rate": 8.942582048714891e-06, "loss": 0.2287, "step": 11923 }, { "epoch": 0.55, "grad_norm": 0.6349107866218481, "learning_rate": 8.941102483039188e-06, "loss": 0.3514, "step": 11924 }, { "epoch": 0.55, "grad_norm": 0.27052262468975724, "learning_rate": 8.939622940806456e-06, "loss": 0.1456, "step": 11925 }, { "epoch": 0.55, "grad_norm": 0.32268023987257255, "learning_rate": 8.93814342204945e-06, "loss": 0.2583, "step": 11926 }, { "epoch": 0.55, "grad_norm": 0.37183543159288124, "learning_rate": 8.936663926800926e-06, "loss": 0.2427, "step": 11927 }, { "epoch": 0.55, "grad_norm": 0.8572066118160747, "learning_rate": 8.935184455093637e-06, "loss": 0.5737, "step": 11928 }, { "epoch": 0.55, "grad_norm": 0.3620050591650081, "learning_rate": 8.933705006960333e-06, "loss": 0.2892, "step": 11929 }, { "epoch": 0.55, "grad_norm": 0.7461454013971124, "learning_rate": 8.932225582433779e-06, "loss": 0.3354, "step": 11930 }, { "epoch": 0.55, "grad_norm": 0.26971288129210313, "learning_rate": 8.930746181546723e-06, "loss": 0.238, "step": 11931 }, { "epoch": 0.55, "grad_norm": 0.41491734911341543, "learning_rate": 8.929266804331912e-06, "loss": 0.2765, "step": 11932 }, { "epoch": 0.55, "grad_norm": 0.28769518004868905, "learning_rate": 8.927787450822099e-06, "loss": 0.1758, "step": 11933 }, { "epoch": 0.55, "grad_norm": 0.5474531416647711, "learning_rate": 8.926308121050042e-06, "loss": 0.3458, "step": 11934 }, { "epoch": 0.55, "grad_norm": 0.46994000017203785, "learning_rate": 8.92482881504849e-06, "loss": 0.304, "step": 11935 }, { "epoch": 0.55, "grad_norm": 0.40049241422106846, "learning_rate": 8.923349532850191e-06, "loss": 0.2795, "step": 11936 }, { "epoch": 0.55, "grad_norm": 0.30971522364837467, "learning_rate": 8.921870274487896e-06, "loss": 0.1903, "step": 11937 }, { "epoch": 0.55, "grad_norm": 0.45800706148017134, "learning_rate": 8.92039103999435e-06, "loss": 0.314, "step": 11938 }, { "epoch": 0.55, "grad_norm": 0.3191680637813942, "learning_rate": 8.918911829402311e-06, "loss": 0.2918, "step": 11939 }, { "epoch": 0.55, "grad_norm": 0.8715304630563817, "learning_rate": 8.917432642744519e-06, "loss": 0.3607, "step": 11940 }, { "epoch": 0.55, "grad_norm": 0.3297736574001783, "learning_rate": 8.915953480053726e-06, "loss": 0.2782, "step": 11941 }, { "epoch": 0.55, "grad_norm": 0.7901504814030177, "learning_rate": 8.914474341362677e-06, "loss": 0.4654, "step": 11942 }, { "epoch": 0.55, "grad_norm": 0.3385953701893658, "learning_rate": 8.91299522670412e-06, "loss": 0.2559, "step": 11943 }, { "epoch": 0.55, "grad_norm": 0.26240276168600873, "learning_rate": 8.911516136110803e-06, "loss": 0.2007, "step": 11944 }, { "epoch": 0.55, "grad_norm": 0.42579703963147303, "learning_rate": 8.910037069615468e-06, "loss": 0.275, "step": 11945 }, { "epoch": 0.55, "grad_norm": 0.534312683379889, "learning_rate": 8.908558027250859e-06, "loss": 0.3053, "step": 11946 }, { "epoch": 0.55, "grad_norm": 0.29425277504442326, "learning_rate": 8.907079009049728e-06, "loss": 0.2577, "step": 11947 }, { "epoch": 0.55, "grad_norm": 1.2675648379049664, "learning_rate": 8.905600015044813e-06, "loss": 0.7705, "step": 11948 }, { "epoch": 0.55, "grad_norm": 0.3402363256162234, "learning_rate": 8.904121045268862e-06, "loss": 0.1329, "step": 11949 }, { "epoch": 0.55, "grad_norm": 0.3306486469876794, "learning_rate": 8.902642099754611e-06, "loss": 0.186, "step": 11950 }, { "epoch": 0.55, "grad_norm": 0.27497502828706905, "learning_rate": 8.901163178534804e-06, "loss": 0.2787, "step": 11951 }, { "epoch": 0.55, "grad_norm": 0.7767299915185887, "learning_rate": 8.899684281642189e-06, "loss": 0.4064, "step": 11952 }, { "epoch": 0.55, "grad_norm": 0.3731491520905725, "learning_rate": 8.898205409109503e-06, "loss": 0.1906, "step": 11953 }, { "epoch": 0.55, "grad_norm": 0.5400836506518383, "learning_rate": 8.896726560969486e-06, "loss": 0.3601, "step": 11954 }, { "epoch": 0.55, "grad_norm": 0.35889372931241625, "learning_rate": 8.89524773725488e-06, "loss": 0.3097, "step": 11955 }, { "epoch": 0.55, "grad_norm": 0.21806108303222296, "learning_rate": 8.893768937998425e-06, "loss": 0.1236, "step": 11956 }, { "epoch": 0.55, "grad_norm": 0.45110614749666156, "learning_rate": 8.89229016323286e-06, "loss": 0.2655, "step": 11957 }, { "epoch": 0.55, "grad_norm": 0.9502639970997538, "learning_rate": 8.890811412990923e-06, "loss": 0.4785, "step": 11958 }, { "epoch": 0.55, "grad_norm": 0.28074274645875097, "learning_rate": 8.88933268730535e-06, "loss": 0.2193, "step": 11959 }, { "epoch": 0.55, "grad_norm": 0.5586783289622752, "learning_rate": 8.887853986208883e-06, "loss": 0.3813, "step": 11960 }, { "epoch": 0.55, "grad_norm": 1.1120165776800148, "learning_rate": 8.886375309734257e-06, "loss": 0.6174, "step": 11961 }, { "epoch": 0.55, "grad_norm": 0.2392298532593129, "learning_rate": 8.884896657914208e-06, "loss": 0.1611, "step": 11962 }, { "epoch": 0.55, "grad_norm": 0.37356346499112647, "learning_rate": 8.883418030781468e-06, "loss": 0.2975, "step": 11963 }, { "epoch": 0.55, "grad_norm": 1.0204850220444457, "learning_rate": 8.88193942836878e-06, "loss": 0.4824, "step": 11964 }, { "epoch": 0.55, "grad_norm": 0.4394118431045616, "learning_rate": 8.880460850708877e-06, "loss": 0.2822, "step": 11965 }, { "epoch": 0.55, "grad_norm": 0.2964547840079321, "learning_rate": 8.878982297834492e-06, "loss": 0.1477, "step": 11966 }, { "epoch": 0.55, "grad_norm": 0.38670111706394944, "learning_rate": 8.877503769778358e-06, "loss": 0.3148, "step": 11967 }, { "epoch": 0.55, "grad_norm": 0.5321987397161865, "learning_rate": 8.876025266573206e-06, "loss": 0.3413, "step": 11968 }, { "epoch": 0.55, "grad_norm": 0.5038676985928764, "learning_rate": 8.874546788251773e-06, "loss": 0.281, "step": 11969 }, { "epoch": 0.55, "grad_norm": 0.30080831038884753, "learning_rate": 8.87306833484679e-06, "loss": 0.2583, "step": 11970 }, { "epoch": 0.55, "grad_norm": 0.9018782641473774, "learning_rate": 8.871589906390988e-06, "loss": 0.5578, "step": 11971 }, { "epoch": 0.55, "grad_norm": 0.2725778831370997, "learning_rate": 8.870111502917097e-06, "loss": 0.1848, "step": 11972 }, { "epoch": 0.55, "grad_norm": 1.3155925963290773, "learning_rate": 8.868633124457848e-06, "loss": 0.7015, "step": 11973 }, { "epoch": 0.55, "grad_norm": 0.5318628317206986, "learning_rate": 8.867154771045973e-06, "loss": 0.2976, "step": 11974 }, { "epoch": 0.55, "grad_norm": 0.3067103475537007, "learning_rate": 8.865676442714199e-06, "loss": 0.2714, "step": 11975 }, { "epoch": 0.55, "grad_norm": 0.6867981071016549, "learning_rate": 8.86419813949525e-06, "loss": 0.3627, "step": 11976 }, { "epoch": 0.55, "grad_norm": 0.34009171363857127, "learning_rate": 8.862719861421866e-06, "loss": 0.1915, "step": 11977 }, { "epoch": 0.55, "grad_norm": 0.3396337476704829, "learning_rate": 8.861241608526768e-06, "loss": 0.2512, "step": 11978 }, { "epoch": 0.55, "grad_norm": 0.3508046945428364, "learning_rate": 8.859763380842683e-06, "loss": 0.2419, "step": 11979 }, { "epoch": 0.55, "grad_norm": 0.6718522790576175, "learning_rate": 8.858285178402335e-06, "loss": 0.3306, "step": 11980 }, { "epoch": 0.55, "grad_norm": 0.4577767900038282, "learning_rate": 8.856807001238452e-06, "loss": 0.3342, "step": 11981 }, { "epoch": 0.55, "grad_norm": 0.3753635095980729, "learning_rate": 8.855328849383761e-06, "loss": 0.2585, "step": 11982 }, { "epoch": 0.55, "grad_norm": 0.4331005721413234, "learning_rate": 8.853850722870989e-06, "loss": 0.3018, "step": 11983 }, { "epoch": 0.55, "grad_norm": 0.3970536680822463, "learning_rate": 8.852372621732856e-06, "loss": 0.2683, "step": 11984 }, { "epoch": 0.55, "grad_norm": 0.4432730571369166, "learning_rate": 8.850894546002085e-06, "loss": 0.2231, "step": 11985 }, { "epoch": 0.55, "grad_norm": 0.4473410966693785, "learning_rate": 8.849416495711403e-06, "loss": 0.3042, "step": 11986 }, { "epoch": 0.55, "grad_norm": 0.3608431350744341, "learning_rate": 8.847938470893533e-06, "loss": 0.2791, "step": 11987 }, { "epoch": 0.55, "grad_norm": 1.3340588083468055, "learning_rate": 8.846460471581192e-06, "loss": 0.8145, "step": 11988 }, { "epoch": 0.55, "grad_norm": 0.23646920895177737, "learning_rate": 8.844982497807104e-06, "loss": 0.1126, "step": 11989 }, { "epoch": 0.55, "grad_norm": 0.2975044295581151, "learning_rate": 8.843504549603993e-06, "loss": 0.2323, "step": 11990 }, { "epoch": 0.55, "grad_norm": 0.35933584846402805, "learning_rate": 8.842026627004574e-06, "loss": 0.3341, "step": 11991 }, { "epoch": 0.55, "grad_norm": 0.42488731823978176, "learning_rate": 8.84054873004157e-06, "loss": 0.2275, "step": 11992 }, { "epoch": 0.55, "grad_norm": 0.423454274171741, "learning_rate": 8.839070858747697e-06, "loss": 0.3318, "step": 11993 }, { "epoch": 0.55, "grad_norm": 0.4838204249276851, "learning_rate": 8.83759301315568e-06, "loss": 0.3821, "step": 11994 }, { "epoch": 0.55, "grad_norm": 0.319655871467485, "learning_rate": 8.836115193298232e-06, "loss": 0.2025, "step": 11995 }, { "epoch": 0.55, "grad_norm": 0.23942928131271157, "learning_rate": 8.834637399208076e-06, "loss": 0.1924, "step": 11996 }, { "epoch": 0.55, "grad_norm": 0.7282807717283084, "learning_rate": 8.833159630917922e-06, "loss": 0.4181, "step": 11997 }, { "epoch": 0.55, "grad_norm": 0.29349185538462813, "learning_rate": 8.831681888460484e-06, "loss": 0.2456, "step": 11998 }, { "epoch": 0.55, "grad_norm": 0.4030794767902304, "learning_rate": 8.830204171868487e-06, "loss": 0.281, "step": 11999 }, { "epoch": 0.55, "grad_norm": 0.9516729543701261, "learning_rate": 8.828726481174643e-06, "loss": 0.5827, "step": 12000 }, { "epoch": 0.55, "grad_norm": 0.42236630063156705, "learning_rate": 8.827248816411666e-06, "loss": 0.2993, "step": 12001 }, { "epoch": 0.55, "grad_norm": 0.34156172398438306, "learning_rate": 8.825771177612269e-06, "loss": 0.2339, "step": 12002 }, { "epoch": 0.55, "grad_norm": 0.38401867620663854, "learning_rate": 8.824293564809166e-06, "loss": 0.3158, "step": 12003 }, { "epoch": 0.55, "grad_norm": 0.3107882646696273, "learning_rate": 8.822815978035072e-06, "loss": 0.1715, "step": 12004 }, { "epoch": 0.55, "grad_norm": 0.41587757530185343, "learning_rate": 8.821338417322696e-06, "loss": 0.2415, "step": 12005 }, { "epoch": 0.55, "grad_norm": 0.3781782068438, "learning_rate": 8.819860882704751e-06, "loss": 0.313, "step": 12006 }, { "epoch": 0.55, "grad_norm": 1.1858673132868942, "learning_rate": 8.81838337421395e-06, "loss": 0.5966, "step": 12007 }, { "epoch": 0.55, "grad_norm": 0.3031844145490355, "learning_rate": 8.816905891883002e-06, "loss": 0.2046, "step": 12008 }, { "epoch": 0.55, "grad_norm": 0.3215837287830184, "learning_rate": 8.81542843574462e-06, "loss": 0.2309, "step": 12009 }, { "epoch": 0.55, "grad_norm": 0.3687449564220821, "learning_rate": 8.813951005831507e-06, "loss": 0.3064, "step": 12010 }, { "epoch": 0.55, "grad_norm": 0.3311277846298865, "learning_rate": 8.812473602176373e-06, "loss": 0.1936, "step": 12011 }, { "epoch": 0.55, "grad_norm": 1.250981447290825, "learning_rate": 8.810996224811933e-06, "loss": 0.7244, "step": 12012 }, { "epoch": 0.55, "grad_norm": 0.7042402386612823, "learning_rate": 8.80951887377089e-06, "loss": 0.3762, "step": 12013 }, { "epoch": 0.55, "grad_norm": 0.3297862835025889, "learning_rate": 8.808041549085954e-06, "loss": 0.2994, "step": 12014 }, { "epoch": 0.55, "grad_norm": 0.41110558822388815, "learning_rate": 8.806564250789823e-06, "loss": 0.2982, "step": 12015 }, { "epoch": 0.55, "grad_norm": 0.23555947633486063, "learning_rate": 8.805086978915215e-06, "loss": 0.1446, "step": 12016 }, { "epoch": 0.55, "grad_norm": 0.3759370181846773, "learning_rate": 8.803609733494826e-06, "loss": 0.2434, "step": 12017 }, { "epoch": 0.55, "grad_norm": 0.3257131124348913, "learning_rate": 8.802132514561368e-06, "loss": 0.2491, "step": 12018 }, { "epoch": 0.55, "grad_norm": 0.7656360339514674, "learning_rate": 8.800655322147539e-06, "loss": 0.4078, "step": 12019 }, { "epoch": 0.55, "grad_norm": 0.3959445253797471, "learning_rate": 8.799178156286046e-06, "loss": 0.2784, "step": 12020 }, { "epoch": 0.55, "grad_norm": 0.38909344306332827, "learning_rate": 8.79770101700959e-06, "loss": 0.227, "step": 12021 }, { "epoch": 0.55, "grad_norm": 0.2874065431082309, "learning_rate": 8.796223904350878e-06, "loss": 0.2674, "step": 12022 }, { "epoch": 0.55, "grad_norm": 0.4122689258544816, "learning_rate": 8.794746818342603e-06, "loss": 0.3078, "step": 12023 }, { "epoch": 0.55, "grad_norm": 1.097776493706162, "learning_rate": 8.793269759017475e-06, "loss": 0.4353, "step": 12024 }, { "epoch": 0.55, "grad_norm": 0.8309541728271063, "learning_rate": 8.791792726408194e-06, "loss": 0.4321, "step": 12025 }, { "epoch": 0.55, "grad_norm": 0.29053784392307386, "learning_rate": 8.790315720547456e-06, "loss": 0.2366, "step": 12026 }, { "epoch": 0.55, "grad_norm": 0.4265672607253519, "learning_rate": 8.788838741467961e-06, "loss": 0.3421, "step": 12027 }, { "epoch": 0.55, "grad_norm": 0.21670371639272284, "learning_rate": 8.787361789202406e-06, "loss": 0.0906, "step": 12028 }, { "epoch": 0.55, "grad_norm": 0.3404766667164344, "learning_rate": 8.785884863783498e-06, "loss": 0.2564, "step": 12029 }, { "epoch": 0.55, "grad_norm": 0.3912450325470597, "learning_rate": 8.784407965243926e-06, "loss": 0.3004, "step": 12030 }, { "epoch": 0.55, "grad_norm": 0.9787767926254624, "learning_rate": 8.782931093616392e-06, "loss": 0.3409, "step": 12031 }, { "epoch": 0.55, "grad_norm": 0.3387538536114813, "learning_rate": 8.781454248933586e-06, "loss": 0.2634, "step": 12032 }, { "epoch": 0.55, "grad_norm": 1.2035584318068064, "learning_rate": 8.779977431228213e-06, "loss": 0.5478, "step": 12033 }, { "epoch": 0.55, "grad_norm": 0.20558018365004155, "learning_rate": 8.778500640532963e-06, "loss": 0.1592, "step": 12034 }, { "epoch": 0.55, "grad_norm": 0.3897127770619501, "learning_rate": 8.777023876880532e-06, "loss": 0.2835, "step": 12035 }, { "epoch": 0.55, "grad_norm": 1.0989633803004761, "learning_rate": 8.77554714030361e-06, "loss": 0.4923, "step": 12036 }, { "epoch": 0.55, "grad_norm": 0.6097157262689636, "learning_rate": 8.774070430834898e-06, "loss": 0.2856, "step": 12037 }, { "epoch": 0.55, "grad_norm": 0.4183619078638152, "learning_rate": 8.772593748507083e-06, "loss": 0.2759, "step": 12038 }, { "epoch": 0.55, "grad_norm": 0.49880110316623244, "learning_rate": 8.771117093352861e-06, "loss": 0.3491, "step": 12039 }, { "epoch": 0.55, "grad_norm": 0.31057170995754196, "learning_rate": 8.769640465404922e-06, "loss": 0.2039, "step": 12040 }, { "epoch": 0.55, "grad_norm": 0.2974341327603078, "learning_rate": 8.768163864695953e-06, "loss": 0.1922, "step": 12041 }, { "epoch": 0.55, "grad_norm": 0.44147151235301285, "learning_rate": 8.766687291258653e-06, "loss": 0.3282, "step": 12042 }, { "epoch": 0.55, "grad_norm": 0.9843206258956417, "learning_rate": 8.76521074512571e-06, "loss": 0.4223, "step": 12043 }, { "epoch": 0.55, "grad_norm": 0.32996100101104375, "learning_rate": 8.763734226329809e-06, "loss": 0.2028, "step": 12044 }, { "epoch": 0.55, "grad_norm": 1.0957435518742933, "learning_rate": 8.762257734903635e-06, "loss": 0.5338, "step": 12045 }, { "epoch": 0.55, "grad_norm": 0.396917324501631, "learning_rate": 8.760781270879889e-06, "loss": 0.336, "step": 12046 }, { "epoch": 0.55, "grad_norm": 0.2529929989488865, "learning_rate": 8.759304834291248e-06, "loss": 0.1708, "step": 12047 }, { "epoch": 0.55, "grad_norm": 0.8797059442867133, "learning_rate": 8.757828425170403e-06, "loss": 0.4366, "step": 12048 }, { "epoch": 0.55, "grad_norm": 0.5154756744209222, "learning_rate": 8.756352043550038e-06, "loss": 0.3382, "step": 12049 }, { "epoch": 0.55, "grad_norm": 0.22362575038940657, "learning_rate": 8.754875689462843e-06, "loss": 0.1816, "step": 12050 }, { "epoch": 0.55, "grad_norm": 1.29676257439492, "learning_rate": 8.753399362941499e-06, "loss": 0.608, "step": 12051 }, { "epoch": 0.55, "grad_norm": 0.3990289284007032, "learning_rate": 8.751923064018692e-06, "loss": 0.2287, "step": 12052 }, { "epoch": 0.55, "grad_norm": 0.3403326258277737, "learning_rate": 8.750446792727103e-06, "loss": 0.2698, "step": 12053 }, { "epoch": 0.55, "grad_norm": 0.41565576498811224, "learning_rate": 8.74897054909942e-06, "loss": 0.2664, "step": 12054 }, { "epoch": 0.55, "grad_norm": 0.4351278614087777, "learning_rate": 8.747494333168323e-06, "loss": 0.2791, "step": 12055 }, { "epoch": 0.55, "grad_norm": 0.40722765813421624, "learning_rate": 8.746018144966495e-06, "loss": 0.256, "step": 12056 }, { "epoch": 0.55, "grad_norm": 0.5239466587438371, "learning_rate": 8.744541984526616e-06, "loss": 0.2625, "step": 12057 }, { "epoch": 0.55, "grad_norm": 0.3885071837563332, "learning_rate": 8.743065851881364e-06, "loss": 0.3376, "step": 12058 }, { "epoch": 0.55, "grad_norm": 0.4065261607424598, "learning_rate": 8.741589747063426e-06, "loss": 0.2239, "step": 12059 }, { "epoch": 0.55, "grad_norm": 0.5399567678158372, "learning_rate": 8.74011367010548e-06, "loss": 0.3077, "step": 12060 }, { "epoch": 0.55, "grad_norm": 0.45652742757003495, "learning_rate": 8.738637621040202e-06, "loss": 0.2503, "step": 12061 }, { "epoch": 0.55, "grad_norm": 0.2873170587304599, "learning_rate": 8.737161599900267e-06, "loss": 0.2447, "step": 12062 }, { "epoch": 0.55, "grad_norm": 0.30762283018979064, "learning_rate": 8.73568560671836e-06, "loss": 0.1721, "step": 12063 }, { "epoch": 0.55, "grad_norm": 0.957083522560443, "learning_rate": 8.734209641527157e-06, "loss": 0.4992, "step": 12064 }, { "epoch": 0.55, "grad_norm": 0.4275929876464102, "learning_rate": 8.732733704359331e-06, "loss": 0.305, "step": 12065 }, { "epoch": 0.55, "grad_norm": 0.33508925606584844, "learning_rate": 8.731257795247558e-06, "loss": 0.3024, "step": 12066 }, { "epoch": 0.55, "grad_norm": 0.3189441061505665, "learning_rate": 8.729781914224517e-06, "loss": 0.1839, "step": 12067 }, { "epoch": 0.55, "grad_norm": 0.2936189463971261, "learning_rate": 8.728306061322879e-06, "loss": 0.2079, "step": 12068 }, { "epoch": 0.55, "grad_norm": 1.0867408537636722, "learning_rate": 8.72683023657532e-06, "loss": 0.5639, "step": 12069 }, { "epoch": 0.55, "grad_norm": 0.40596548790664233, "learning_rate": 8.725354440014512e-06, "loss": 0.2702, "step": 12070 }, { "epoch": 0.55, "grad_norm": 0.3498077115694771, "learning_rate": 8.723878671673127e-06, "loss": 0.2695, "step": 12071 }, { "epoch": 0.55, "grad_norm": 1.060320666679604, "learning_rate": 8.722402931583839e-06, "loss": 0.4815, "step": 12072 }, { "epoch": 0.55, "grad_norm": 0.3512309137902775, "learning_rate": 8.720927219779319e-06, "loss": 0.2621, "step": 12073 }, { "epoch": 0.55, "grad_norm": 0.37278184971887085, "learning_rate": 8.719451536292239e-06, "loss": 0.2241, "step": 12074 }, { "epoch": 0.55, "grad_norm": 0.3092564428550018, "learning_rate": 8.717975881155261e-06, "loss": 0.225, "step": 12075 }, { "epoch": 0.55, "grad_norm": 0.910333943371931, "learning_rate": 8.716500254401067e-06, "loss": 0.3269, "step": 12076 }, { "epoch": 0.55, "grad_norm": 0.4146545995514526, "learning_rate": 8.715024656062321e-06, "loss": 0.2581, "step": 12077 }, { "epoch": 0.55, "grad_norm": 0.33083560233192044, "learning_rate": 8.71354908617169e-06, "loss": 0.293, "step": 12078 }, { "epoch": 0.55, "grad_norm": 1.0467048271811226, "learning_rate": 8.71207354476184e-06, "loss": 0.6423, "step": 12079 }, { "epoch": 0.55, "grad_norm": 0.25244907094249064, "learning_rate": 8.710598031865444e-06, "loss": 0.1468, "step": 12080 }, { "epoch": 0.55, "grad_norm": 0.29069464484614976, "learning_rate": 8.709122547515163e-06, "loss": 0.2473, "step": 12081 }, { "epoch": 0.56, "grad_norm": 0.5493560144031064, "learning_rate": 8.707647091743665e-06, "loss": 0.3434, "step": 12082 }, { "epoch": 0.56, "grad_norm": 0.45346816912496, "learning_rate": 8.706171664583613e-06, "loss": 0.2426, "step": 12083 }, { "epoch": 0.56, "grad_norm": 0.554773173869556, "learning_rate": 8.704696266067676e-06, "loss": 0.3824, "step": 12084 }, { "epoch": 0.56, "grad_norm": 0.5269683132127034, "learning_rate": 8.703220896228515e-06, "loss": 0.4071, "step": 12085 }, { "epoch": 0.56, "grad_norm": 0.26536870224085796, "learning_rate": 8.701745555098793e-06, "loss": 0.1839, "step": 12086 }, { "epoch": 0.56, "grad_norm": 0.34480091374865607, "learning_rate": 8.700270242711174e-06, "loss": 0.2088, "step": 12087 }, { "epoch": 0.56, "grad_norm": 0.6692146415829521, "learning_rate": 8.698794959098314e-06, "loss": 0.3888, "step": 12088 }, { "epoch": 0.56, "grad_norm": 0.29535850900869326, "learning_rate": 8.697319704292884e-06, "loss": 0.2043, "step": 12089 }, { "epoch": 0.56, "grad_norm": 0.395418791236929, "learning_rate": 8.695844478327541e-06, "loss": 0.3112, "step": 12090 }, { "epoch": 0.56, "grad_norm": 1.020479196761933, "learning_rate": 8.694369281234945e-06, "loss": 0.6468, "step": 12091 }, { "epoch": 0.56, "grad_norm": 0.3690321253269678, "learning_rate": 8.692894113047748e-06, "loss": 0.2251, "step": 12092 }, { "epoch": 0.56, "grad_norm": 0.26451933624404217, "learning_rate": 8.691418973798621e-06, "loss": 0.213, "step": 12093 }, { "epoch": 0.56, "grad_norm": 0.4349829082853501, "learning_rate": 8.689943863520215e-06, "loss": 0.3357, "step": 12094 }, { "epoch": 0.56, "grad_norm": 0.8771696615448499, "learning_rate": 8.688468782245192e-06, "loss": 0.412, "step": 12095 }, { "epoch": 0.56, "grad_norm": 0.3995132364516439, "learning_rate": 8.686993730006202e-06, "loss": 0.2328, "step": 12096 }, { "epoch": 0.56, "grad_norm": 0.38632855670177463, "learning_rate": 8.685518706835909e-06, "loss": 0.3075, "step": 12097 }, { "epoch": 0.56, "grad_norm": 0.5879456966206789, "learning_rate": 8.684043712766962e-06, "loss": 0.3132, "step": 12098 }, { "epoch": 0.56, "grad_norm": 0.36582937446088176, "learning_rate": 8.682568747832023e-06, "loss": 0.2348, "step": 12099 }, { "epoch": 0.56, "grad_norm": 0.4896582608066107, "learning_rate": 8.681093812063739e-06, "loss": 0.3081, "step": 12100 }, { "epoch": 0.56, "grad_norm": 0.3836812537596712, "learning_rate": 8.679618905494765e-06, "loss": 0.2769, "step": 12101 }, { "epoch": 0.56, "grad_norm": 0.3484784604433825, "learning_rate": 8.678144028157758e-06, "loss": 0.2905, "step": 12102 }, { "epoch": 0.56, "grad_norm": 1.0479378538221573, "learning_rate": 8.676669180085369e-06, "loss": 0.4337, "step": 12103 }, { "epoch": 0.56, "grad_norm": 0.5421737039367766, "learning_rate": 8.67519436131025e-06, "loss": 0.3703, "step": 12104 }, { "epoch": 0.56, "grad_norm": 0.43947241166762246, "learning_rate": 8.673719571865045e-06, "loss": 0.2947, "step": 12105 }, { "epoch": 0.56, "grad_norm": 0.24921961520559055, "learning_rate": 8.672244811782416e-06, "loss": 0.1885, "step": 12106 }, { "epoch": 0.56, "grad_norm": 0.6412450819669681, "learning_rate": 8.670770081095005e-06, "loss": 0.3019, "step": 12107 }, { "epoch": 0.56, "grad_norm": 0.46005836278543133, "learning_rate": 8.669295379835467e-06, "loss": 0.3225, "step": 12108 }, { "epoch": 0.56, "grad_norm": 0.3334958102490018, "learning_rate": 8.667820708036441e-06, "loss": 0.27, "step": 12109 }, { "epoch": 0.56, "grad_norm": 0.6426044984608472, "learning_rate": 8.666346065730583e-06, "loss": 0.3594, "step": 12110 }, { "epoch": 0.56, "grad_norm": 0.3424909965785827, "learning_rate": 8.664871452950538e-06, "loss": 0.3004, "step": 12111 }, { "epoch": 0.56, "grad_norm": 0.26765923835118355, "learning_rate": 8.663396869728953e-06, "loss": 0.1383, "step": 12112 }, { "epoch": 0.56, "grad_norm": 0.35210022771984856, "learning_rate": 8.661922316098472e-06, "loss": 0.294, "step": 12113 }, { "epoch": 0.56, "grad_norm": 0.39717433736412966, "learning_rate": 8.66044779209174e-06, "loss": 0.3081, "step": 12114 }, { "epoch": 0.56, "grad_norm": 0.8170779609219889, "learning_rate": 8.658973297741406e-06, "loss": 0.6013, "step": 12115 }, { "epoch": 0.56, "grad_norm": 0.6329941894039852, "learning_rate": 8.65749883308011e-06, "loss": 0.2735, "step": 12116 }, { "epoch": 0.56, "grad_norm": 0.3042072479130231, "learning_rate": 8.656024398140495e-06, "loss": 0.285, "step": 12117 }, { "epoch": 0.56, "grad_norm": 0.5189996414630461, "learning_rate": 8.654549992955203e-06, "loss": 0.3282, "step": 12118 }, { "epoch": 0.56, "grad_norm": 0.24187174057280092, "learning_rate": 8.653075617556879e-06, "loss": 0.1076, "step": 12119 }, { "epoch": 0.56, "grad_norm": 0.430266890248924, "learning_rate": 8.651601271978162e-06, "loss": 0.3348, "step": 12120 }, { "epoch": 0.56, "grad_norm": 0.36795004582379204, "learning_rate": 8.650126956251696e-06, "loss": 0.2939, "step": 12121 }, { "epoch": 0.56, "grad_norm": 0.719364124669118, "learning_rate": 8.64865267041011e-06, "loss": 0.2767, "step": 12122 }, { "epoch": 0.56, "grad_norm": 0.4043313951179866, "learning_rate": 8.64717841448606e-06, "loss": 0.282, "step": 12123 }, { "epoch": 0.56, "grad_norm": 0.48788202139674147, "learning_rate": 8.645704188512173e-06, "loss": 0.2994, "step": 12124 }, { "epoch": 0.56, "grad_norm": 0.2405309844582279, "learning_rate": 8.644229992521092e-06, "loss": 0.1882, "step": 12125 }, { "epoch": 0.56, "grad_norm": 0.41080006042089867, "learning_rate": 8.642755826545448e-06, "loss": 0.3032, "step": 12126 }, { "epoch": 0.56, "grad_norm": 0.697861652310811, "learning_rate": 8.641281690617886e-06, "loss": 0.4605, "step": 12127 }, { "epoch": 0.56, "grad_norm": 0.708843040572815, "learning_rate": 8.639807584771036e-06, "loss": 0.4533, "step": 12128 }, { "epoch": 0.56, "grad_norm": 0.28422475962820065, "learning_rate": 8.638333509037537e-06, "loss": 0.2154, "step": 12129 }, { "epoch": 0.56, "grad_norm": 0.3553995867958154, "learning_rate": 8.63685946345002e-06, "loss": 0.2497, "step": 12130 }, { "epoch": 0.56, "grad_norm": 0.4664045815236763, "learning_rate": 8.63538544804112e-06, "loss": 0.2637, "step": 12131 }, { "epoch": 0.56, "grad_norm": 0.3106996953047385, "learning_rate": 8.633911462843472e-06, "loss": 0.2174, "step": 12132 }, { "epoch": 0.56, "grad_norm": 0.4078470250622167, "learning_rate": 8.632437507889707e-06, "loss": 0.3118, "step": 12133 }, { "epoch": 0.56, "grad_norm": 0.8623188118934189, "learning_rate": 8.630963583212458e-06, "loss": 0.4157, "step": 12134 }, { "epoch": 0.56, "grad_norm": 0.3272544556945812, "learning_rate": 8.629489688844353e-06, "loss": 0.1998, "step": 12135 }, { "epoch": 0.56, "grad_norm": 1.0315416012625427, "learning_rate": 8.628015824818028e-06, "loss": 0.5152, "step": 12136 }, { "epoch": 0.56, "grad_norm": 0.25469559508549583, "learning_rate": 8.626541991166113e-06, "loss": 0.2285, "step": 12137 }, { "epoch": 0.56, "grad_norm": 0.3063539257844163, "learning_rate": 8.625068187921231e-06, "loss": 0.222, "step": 12138 }, { "epoch": 0.56, "grad_norm": 0.8578195238735736, "learning_rate": 8.623594415116013e-06, "loss": 0.4166, "step": 12139 }, { "epoch": 0.56, "grad_norm": 0.8017656309839689, "learning_rate": 8.62212067278309e-06, "loss": 0.4905, "step": 12140 }, { "epoch": 0.56, "grad_norm": 0.32775088971283983, "learning_rate": 8.620646960955087e-06, "loss": 0.2757, "step": 12141 }, { "epoch": 0.56, "grad_norm": 0.4597058637600257, "learning_rate": 8.619173279664633e-06, "loss": 0.243, "step": 12142 }, { "epoch": 0.56, "grad_norm": 0.3677673562886798, "learning_rate": 8.617699628944346e-06, "loss": 0.2409, "step": 12143 }, { "epoch": 0.56, "grad_norm": 0.4127577571625338, "learning_rate": 8.616226008826863e-06, "loss": 0.2503, "step": 12144 }, { "epoch": 0.56, "grad_norm": 0.418445022040065, "learning_rate": 8.6147524193448e-06, "loss": 0.271, "step": 12145 }, { "epoch": 0.56, "grad_norm": 0.47577158919892376, "learning_rate": 8.613278860530784e-06, "loss": 0.2723, "step": 12146 }, { "epoch": 0.56, "grad_norm": 0.3882829402976484, "learning_rate": 8.611805332417438e-06, "loss": 0.299, "step": 12147 }, { "epoch": 0.56, "grad_norm": 0.4869964076547249, "learning_rate": 8.610331835037383e-06, "loss": 0.2675, "step": 12148 }, { "epoch": 0.56, "grad_norm": 0.35313175997503493, "learning_rate": 8.60885836842324e-06, "loss": 0.2813, "step": 12149 }, { "epoch": 0.56, "grad_norm": 0.3545971297571187, "learning_rate": 8.607384932607635e-06, "loss": 0.2677, "step": 12150 }, { "epoch": 0.56, "grad_norm": 0.6443502224815294, "learning_rate": 8.605911527623186e-06, "loss": 0.3515, "step": 12151 }, { "epoch": 0.56, "grad_norm": 0.3432484243188843, "learning_rate": 8.604438153502506e-06, "loss": 0.2396, "step": 12152 }, { "epoch": 0.56, "grad_norm": 0.277963297287344, "learning_rate": 8.602964810278225e-06, "loss": 0.2345, "step": 12153 }, { "epoch": 0.56, "grad_norm": 1.641393479569556, "learning_rate": 8.601491497982956e-06, "loss": 0.6431, "step": 12154 }, { "epoch": 0.56, "grad_norm": 0.7810315904394448, "learning_rate": 8.60001821664932e-06, "loss": 0.3187, "step": 12155 }, { "epoch": 0.56, "grad_norm": 0.3474634253626934, "learning_rate": 8.598544966309926e-06, "loss": 0.2619, "step": 12156 }, { "epoch": 0.56, "grad_norm": 0.40978884890491546, "learning_rate": 8.597071746997399e-06, "loss": 0.3234, "step": 12157 }, { "epoch": 0.56, "grad_norm": 0.2598257580304664, "learning_rate": 8.595598558744351e-06, "loss": 0.1357, "step": 12158 }, { "epoch": 0.56, "grad_norm": 0.4061192614377386, "learning_rate": 8.594125401583397e-06, "loss": 0.289, "step": 12159 }, { "epoch": 0.56, "grad_norm": 0.5356915346565578, "learning_rate": 8.592652275547153e-06, "loss": 0.3201, "step": 12160 }, { "epoch": 0.56, "grad_norm": 0.3848580712658988, "learning_rate": 8.591179180668227e-06, "loss": 0.2396, "step": 12161 }, { "epoch": 0.56, "grad_norm": 0.383663763353386, "learning_rate": 8.589706116979241e-06, "loss": 0.2821, "step": 12162 }, { "epoch": 0.56, "grad_norm": 0.7289583063960312, "learning_rate": 8.588233084512801e-06, "loss": 0.4797, "step": 12163 }, { "epoch": 0.56, "grad_norm": 0.2265295807231736, "learning_rate": 8.586760083301519e-06, "loss": 0.1705, "step": 12164 }, { "epoch": 0.56, "grad_norm": 0.3650074408151643, "learning_rate": 8.585287113378006e-06, "loss": 0.2866, "step": 12165 }, { "epoch": 0.56, "grad_norm": 0.8848424419267559, "learning_rate": 8.583814174774873e-06, "loss": 0.4913, "step": 12166 }, { "epoch": 0.56, "grad_norm": 0.661133540222615, "learning_rate": 8.582341267524733e-06, "loss": 0.4102, "step": 12167 }, { "epoch": 0.56, "grad_norm": 0.3357431072546675, "learning_rate": 8.580868391660186e-06, "loss": 0.2173, "step": 12168 }, { "epoch": 0.56, "grad_norm": 0.37970655856010216, "learning_rate": 8.579395547213844e-06, "loss": 0.3189, "step": 12169 }, { "epoch": 0.56, "grad_norm": 0.5213368559657799, "learning_rate": 8.57792273421832e-06, "loss": 0.3244, "step": 12170 }, { "epoch": 0.56, "grad_norm": 0.2805746365219365, "learning_rate": 8.576449952706213e-06, "loss": 0.1719, "step": 12171 }, { "epoch": 0.56, "grad_norm": 0.8445387806104611, "learning_rate": 8.574977202710135e-06, "loss": 0.4685, "step": 12172 }, { "epoch": 0.56, "grad_norm": 0.3621125659722737, "learning_rate": 8.573504484262684e-06, "loss": 0.3156, "step": 12173 }, { "epoch": 0.56, "grad_norm": 0.30865931289735926, "learning_rate": 8.572031797396473e-06, "loss": 0.1993, "step": 12174 }, { "epoch": 0.56, "grad_norm": 1.2974983223794978, "learning_rate": 8.570559142144102e-06, "loss": 0.7434, "step": 12175 }, { "epoch": 0.56, "grad_norm": 0.38661142469154014, "learning_rate": 8.569086518538172e-06, "loss": 0.3354, "step": 12176 }, { "epoch": 0.56, "grad_norm": 0.23021367270157592, "learning_rate": 8.567613926611287e-06, "loss": 0.1309, "step": 12177 }, { "epoch": 0.56, "grad_norm": 0.3587175980648574, "learning_rate": 8.566141366396048e-06, "loss": 0.278, "step": 12178 }, { "epoch": 0.56, "grad_norm": 0.5600552244288344, "learning_rate": 8.56466883792506e-06, "loss": 0.3731, "step": 12179 }, { "epoch": 0.56, "grad_norm": 0.6031314381984104, "learning_rate": 8.56319634123092e-06, "loss": 0.3132, "step": 12180 }, { "epoch": 0.56, "grad_norm": 0.2748011088240663, "learning_rate": 8.561723876346228e-06, "loss": 0.2461, "step": 12181 }, { "epoch": 0.56, "grad_norm": 0.3077726725664092, "learning_rate": 8.56025144330358e-06, "loss": 0.2059, "step": 12182 }, { "epoch": 0.56, "grad_norm": 0.4000626028051199, "learning_rate": 8.55877904213558e-06, "loss": 0.2386, "step": 12183 }, { "epoch": 0.56, "grad_norm": 0.39626546830682496, "learning_rate": 8.557306672874825e-06, "loss": 0.2747, "step": 12184 }, { "epoch": 0.56, "grad_norm": 0.4664917790823666, "learning_rate": 8.555834335553908e-06, "loss": 0.3257, "step": 12185 }, { "epoch": 0.56, "grad_norm": 0.41328949852093566, "learning_rate": 8.554362030205424e-06, "loss": 0.2759, "step": 12186 }, { "epoch": 0.56, "grad_norm": 0.49546115808013835, "learning_rate": 8.552889756861972e-06, "loss": 0.3035, "step": 12187 }, { "epoch": 0.56, "grad_norm": 0.35446449916160533, "learning_rate": 8.55141751555615e-06, "loss": 0.304, "step": 12188 }, { "epoch": 0.56, "grad_norm": 0.3126638866833582, "learning_rate": 8.549945306320547e-06, "loss": 0.2175, "step": 12189 }, { "epoch": 0.56, "grad_norm": 0.4697372500024799, "learning_rate": 8.548473129187757e-06, "loss": 0.2854, "step": 12190 }, { "epoch": 0.56, "grad_norm": 0.4581908978174879, "learning_rate": 8.54700098419037e-06, "loss": 0.2778, "step": 12191 }, { "epoch": 0.56, "grad_norm": 0.3588548872870379, "learning_rate": 8.545528871360983e-06, "loss": 0.2666, "step": 12192 }, { "epoch": 0.56, "grad_norm": 0.3872369348798836, "learning_rate": 8.544056790732187e-06, "loss": 0.3067, "step": 12193 }, { "epoch": 0.56, "grad_norm": 0.27344526734229596, "learning_rate": 8.542584742336568e-06, "loss": 0.1215, "step": 12194 }, { "epoch": 0.56, "grad_norm": 0.41658011002886075, "learning_rate": 8.541112726206718e-06, "loss": 0.2675, "step": 12195 }, { "epoch": 0.56, "grad_norm": 0.45158672756614165, "learning_rate": 8.539640742375226e-06, "loss": 0.3518, "step": 12196 }, { "epoch": 0.56, "grad_norm": 0.2914082902460317, "learning_rate": 8.538168790874683e-06, "loss": 0.2112, "step": 12197 }, { "epoch": 0.56, "grad_norm": 0.6086763730866136, "learning_rate": 8.536696871737673e-06, "loss": 0.3309, "step": 12198 }, { "epoch": 0.56, "grad_norm": 0.4115296773770633, "learning_rate": 8.535224984996779e-06, "loss": 0.3525, "step": 12199 }, { "epoch": 0.56, "grad_norm": 0.35375840691280214, "learning_rate": 8.533753130684596e-06, "loss": 0.2546, "step": 12200 }, { "epoch": 0.56, "grad_norm": 0.5581319351832283, "learning_rate": 8.532281308833706e-06, "loss": 0.3358, "step": 12201 }, { "epoch": 0.56, "grad_norm": 0.4799169639064649, "learning_rate": 8.530809519476697e-06, "loss": 0.3291, "step": 12202 }, { "epoch": 0.56, "grad_norm": 0.26887190617570306, "learning_rate": 8.52933776264614e-06, "loss": 0.0956, "step": 12203 }, { "epoch": 0.56, "grad_norm": 0.34603870095644224, "learning_rate": 8.527866038374633e-06, "loss": 0.2952, "step": 12204 }, { "epoch": 0.56, "grad_norm": 0.3847119778336983, "learning_rate": 8.526394346694755e-06, "loss": 0.3127, "step": 12205 }, { "epoch": 0.56, "grad_norm": 0.9762341618015461, "learning_rate": 8.524922687639084e-06, "loss": 0.4625, "step": 12206 }, { "epoch": 0.56, "grad_norm": 0.38205248827501354, "learning_rate": 8.523451061240202e-06, "loss": 0.2528, "step": 12207 }, { "epoch": 0.56, "grad_norm": 0.38754565611288994, "learning_rate": 8.52197946753069e-06, "loss": 0.3014, "step": 12208 }, { "epoch": 0.56, "grad_norm": 0.26346209831925566, "learning_rate": 8.520507906543129e-06, "loss": 0.2067, "step": 12209 }, { "epoch": 0.56, "grad_norm": 0.3361025858136923, "learning_rate": 8.519036378310098e-06, "loss": 0.1954, "step": 12210 }, { "epoch": 0.56, "grad_norm": 0.6097712512375393, "learning_rate": 8.517564882864173e-06, "loss": 0.3873, "step": 12211 }, { "epoch": 0.56, "grad_norm": 0.33294253806331114, "learning_rate": 8.516093420237931e-06, "loss": 0.2873, "step": 12212 }, { "epoch": 0.56, "grad_norm": 0.7119791421549855, "learning_rate": 8.514621990463954e-06, "loss": 0.2668, "step": 12213 }, { "epoch": 0.56, "grad_norm": 0.42278946882098206, "learning_rate": 8.513150593574813e-06, "loss": 0.3234, "step": 12214 }, { "epoch": 0.56, "grad_norm": 0.3128778316950503, "learning_rate": 8.511679229603084e-06, "loss": 0.1857, "step": 12215 }, { "epoch": 0.56, "grad_norm": 0.34927475244001355, "learning_rate": 8.51020789858134e-06, "loss": 0.277, "step": 12216 }, { "epoch": 0.56, "grad_norm": 0.33454256558069395, "learning_rate": 8.50873660054216e-06, "loss": 0.2403, "step": 12217 }, { "epoch": 0.56, "grad_norm": 0.9702380619660157, "learning_rate": 8.507265335518117e-06, "loss": 0.4392, "step": 12218 }, { "epoch": 0.56, "grad_norm": 0.7632386694883394, "learning_rate": 8.50579410354178e-06, "loss": 0.408, "step": 12219 }, { "epoch": 0.56, "grad_norm": 0.27695884814597393, "learning_rate": 8.504322904645717e-06, "loss": 0.237, "step": 12220 }, { "epoch": 0.56, "grad_norm": 0.46338973248245197, "learning_rate": 8.502851738862507e-06, "loss": 0.2492, "step": 12221 }, { "epoch": 0.56, "grad_norm": 0.3843856866205924, "learning_rate": 8.501380606224716e-06, "loss": 0.2465, "step": 12222 }, { "epoch": 0.56, "grad_norm": 0.48915247081570984, "learning_rate": 8.499909506764914e-06, "loss": 0.2569, "step": 12223 }, { "epoch": 0.56, "grad_norm": 0.39755627024894946, "learning_rate": 8.49843844051567e-06, "loss": 0.3135, "step": 12224 }, { "epoch": 0.56, "grad_norm": 0.5427120294148579, "learning_rate": 8.49696740750955e-06, "loss": 0.2877, "step": 12225 }, { "epoch": 0.56, "grad_norm": 0.4146484601502723, "learning_rate": 8.495496407779124e-06, "loss": 0.2223, "step": 12226 }, { "epoch": 0.56, "grad_norm": 0.3092562101851163, "learning_rate": 8.49402544135696e-06, "loss": 0.1906, "step": 12227 }, { "epoch": 0.56, "grad_norm": 0.3138838781379907, "learning_rate": 8.492554508275618e-06, "loss": 0.2593, "step": 12228 }, { "epoch": 0.56, "grad_norm": 0.41752587300089833, "learning_rate": 8.491083608567663e-06, "loss": 0.2852, "step": 12229 }, { "epoch": 0.56, "grad_norm": 0.8513710820205609, "learning_rate": 8.48961274226567e-06, "loss": 0.348, "step": 12230 }, { "epoch": 0.56, "grad_norm": 0.6285612552495742, "learning_rate": 8.488141909402192e-06, "loss": 0.3291, "step": 12231 }, { "epoch": 0.56, "grad_norm": 0.32532520349079674, "learning_rate": 8.486671110009797e-06, "loss": 0.2761, "step": 12232 }, { "epoch": 0.56, "grad_norm": 0.3014111629217826, "learning_rate": 8.485200344121038e-06, "loss": 0.1957, "step": 12233 }, { "epoch": 0.56, "grad_norm": 0.9152635228464191, "learning_rate": 8.483729611768488e-06, "loss": 0.5334, "step": 12234 }, { "epoch": 0.56, "grad_norm": 0.3522411408534553, "learning_rate": 8.482258912984705e-06, "loss": 0.2544, "step": 12235 }, { "epoch": 0.56, "grad_norm": 0.2879316845982022, "learning_rate": 8.480788247802246e-06, "loss": 0.2376, "step": 12236 }, { "epoch": 0.56, "grad_norm": 0.7531629689728865, "learning_rate": 8.479317616253671e-06, "loss": 0.4686, "step": 12237 }, { "epoch": 0.56, "grad_norm": 0.38888973406978433, "learning_rate": 8.477847018371534e-06, "loss": 0.2731, "step": 12238 }, { "epoch": 0.56, "grad_norm": 1.082679360651809, "learning_rate": 8.476376454188402e-06, "loss": 0.2473, "step": 12239 }, { "epoch": 0.56, "grad_norm": 0.33926809878027236, "learning_rate": 8.474905923736825e-06, "loss": 0.2951, "step": 12240 }, { "epoch": 0.56, "grad_norm": 0.391925832358534, "learning_rate": 8.473435427049362e-06, "loss": 0.2772, "step": 12241 }, { "epoch": 0.56, "grad_norm": 0.9215804538714935, "learning_rate": 8.471964964158565e-06, "loss": 0.5506, "step": 12242 }, { "epoch": 0.56, "grad_norm": 0.24938738179554498, "learning_rate": 8.470494535096994e-06, "loss": 0.1554, "step": 12243 }, { "epoch": 0.56, "grad_norm": 0.30343710640133315, "learning_rate": 8.469024139897197e-06, "loss": 0.2716, "step": 12244 }, { "epoch": 0.56, "grad_norm": 0.9621075217184689, "learning_rate": 8.467553778591733e-06, "loss": 0.5603, "step": 12245 }, { "epoch": 0.56, "grad_norm": 0.5267145703134086, "learning_rate": 8.466083451213145e-06, "loss": 0.2554, "step": 12246 }, { "epoch": 0.56, "grad_norm": 0.39749061629440513, "learning_rate": 8.464613157793996e-06, "loss": 0.3041, "step": 12247 }, { "epoch": 0.56, "grad_norm": 0.36752547917807277, "learning_rate": 8.463142898366834e-06, "loss": 0.3156, "step": 12248 }, { "epoch": 0.56, "grad_norm": 0.19354851847672563, "learning_rate": 8.461672672964204e-06, "loss": 0.0739, "step": 12249 }, { "epoch": 0.56, "grad_norm": 0.4511532821598407, "learning_rate": 8.460202481618658e-06, "loss": 0.3217, "step": 12250 }, { "epoch": 0.56, "grad_norm": 0.828478575156366, "learning_rate": 8.458732324362744e-06, "loss": 0.5049, "step": 12251 }, { "epoch": 0.56, "grad_norm": 0.33677243313494404, "learning_rate": 8.457262201229015e-06, "loss": 0.2576, "step": 12252 }, { "epoch": 0.56, "grad_norm": 0.3607525356344563, "learning_rate": 8.45579211225001e-06, "loss": 0.2745, "step": 12253 }, { "epoch": 0.56, "grad_norm": 0.39625136918530396, "learning_rate": 8.454322057458282e-06, "loss": 0.2592, "step": 12254 }, { "epoch": 0.56, "grad_norm": 0.39880183924414137, "learning_rate": 8.452852036886373e-06, "loss": 0.2331, "step": 12255 }, { "epoch": 0.56, "grad_norm": 0.23728147736271077, "learning_rate": 8.451382050566828e-06, "loss": 0.2168, "step": 12256 }, { "epoch": 0.56, "grad_norm": 0.8820760947840431, "learning_rate": 8.449912098532193e-06, "loss": 0.4586, "step": 12257 }, { "epoch": 0.56, "grad_norm": 0.710098155367854, "learning_rate": 8.44844218081501e-06, "loss": 0.4384, "step": 12258 }, { "epoch": 0.56, "grad_norm": 0.31337951869611874, "learning_rate": 8.446972297447819e-06, "loss": 0.2152, "step": 12259 }, { "epoch": 0.56, "grad_norm": 0.3766201087040202, "learning_rate": 8.445502448463167e-06, "loss": 0.3308, "step": 12260 }, { "epoch": 0.56, "grad_norm": 0.28772847815407715, "learning_rate": 8.444032633893593e-06, "loss": 0.1575, "step": 12261 }, { "epoch": 0.56, "grad_norm": 0.3243789689030855, "learning_rate": 8.442562853771637e-06, "loss": 0.1933, "step": 12262 }, { "epoch": 0.56, "grad_norm": 0.7609796041097836, "learning_rate": 8.441093108129833e-06, "loss": 0.4319, "step": 12263 }, { "epoch": 0.56, "grad_norm": 0.3668232256734244, "learning_rate": 8.43962339700073e-06, "loss": 0.3153, "step": 12264 }, { "epoch": 0.56, "grad_norm": 0.32084289603252325, "learning_rate": 8.438153720416861e-06, "loss": 0.187, "step": 12265 }, { "epoch": 0.56, "grad_norm": 0.9539874704255746, "learning_rate": 8.436684078410764e-06, "loss": 0.6051, "step": 12266 }, { "epoch": 0.56, "grad_norm": 0.2541484428081228, "learning_rate": 8.435214471014976e-06, "loss": 0.1911, "step": 12267 }, { "epoch": 0.56, "grad_norm": 0.4139397817949519, "learning_rate": 8.433744898262026e-06, "loss": 0.2709, "step": 12268 }, { "epoch": 0.56, "grad_norm": 0.5154275074548234, "learning_rate": 8.432275360184458e-06, "loss": 0.3021, "step": 12269 }, { "epoch": 0.56, "grad_norm": 1.0430868653628655, "learning_rate": 8.430805856814802e-06, "loss": 0.5234, "step": 12270 }, { "epoch": 0.56, "grad_norm": 0.36072743708953225, "learning_rate": 8.429336388185594e-06, "loss": 0.2578, "step": 12271 }, { "epoch": 0.56, "grad_norm": 0.38156912630191214, "learning_rate": 8.42786695432936e-06, "loss": 0.2855, "step": 12272 }, { "epoch": 0.56, "grad_norm": 0.3148772494330761, "learning_rate": 8.42639755527864e-06, "loss": 0.1895, "step": 12273 }, { "epoch": 0.56, "grad_norm": 0.3315200950484738, "learning_rate": 8.42492819106596e-06, "loss": 0.2662, "step": 12274 }, { "epoch": 0.56, "grad_norm": 0.5078759545969014, "learning_rate": 8.423458861723853e-06, "loss": 0.2963, "step": 12275 }, { "epoch": 0.56, "grad_norm": 0.5067876304115886, "learning_rate": 8.421989567284841e-06, "loss": 0.3864, "step": 12276 }, { "epoch": 0.56, "grad_norm": 0.3094634640457943, "learning_rate": 8.420520307781468e-06, "loss": 0.2635, "step": 12277 }, { "epoch": 0.56, "grad_norm": 1.145670721769822, "learning_rate": 8.41905108324625e-06, "loss": 0.3763, "step": 12278 }, { "epoch": 0.56, "grad_norm": 0.23026724450965205, "learning_rate": 8.417581893711717e-06, "loss": 0.2001, "step": 12279 }, { "epoch": 0.56, "grad_norm": 0.3225302041955291, "learning_rate": 8.416112739210393e-06, "loss": 0.2305, "step": 12280 }, { "epoch": 0.56, "grad_norm": 0.6723651076262414, "learning_rate": 8.414643619774809e-06, "loss": 0.3846, "step": 12281 }, { "epoch": 0.56, "grad_norm": 0.8609229764986652, "learning_rate": 8.413174535437486e-06, "loss": 0.3982, "step": 12282 }, { "epoch": 0.56, "grad_norm": 0.4244626369481598, "learning_rate": 8.411705486230952e-06, "loss": 0.2768, "step": 12283 }, { "epoch": 0.56, "grad_norm": 0.31803690351561287, "learning_rate": 8.410236472187727e-06, "loss": 0.2884, "step": 12284 }, { "epoch": 0.56, "grad_norm": 0.4684941091565667, "learning_rate": 8.408767493340333e-06, "loss": 0.2216, "step": 12285 }, { "epoch": 0.56, "grad_norm": 0.4400555014856853, "learning_rate": 8.407298549721294e-06, "loss": 0.2604, "step": 12286 }, { "epoch": 0.56, "grad_norm": 0.3181900605068484, "learning_rate": 8.40582964136313e-06, "loss": 0.267, "step": 12287 }, { "epoch": 0.56, "grad_norm": 0.455355698615622, "learning_rate": 8.404360768298361e-06, "loss": 0.2362, "step": 12288 }, { "epoch": 0.56, "grad_norm": 0.3541057167771725, "learning_rate": 8.402891930559504e-06, "loss": 0.267, "step": 12289 }, { "epoch": 0.56, "grad_norm": 0.9169651701659455, "learning_rate": 8.401423128179085e-06, "loss": 0.5216, "step": 12290 }, { "epoch": 0.56, "grad_norm": 0.40015061744475117, "learning_rate": 8.399954361189614e-06, "loss": 0.2484, "step": 12291 }, { "epoch": 0.56, "grad_norm": 0.3822281441654599, "learning_rate": 8.398485629623613e-06, "loss": 0.266, "step": 12292 }, { "epoch": 0.56, "grad_norm": 0.3786164173749657, "learning_rate": 8.397016933513593e-06, "loss": 0.2389, "step": 12293 }, { "epoch": 0.56, "grad_norm": 1.4443129571587467, "learning_rate": 8.395548272892078e-06, "loss": 0.7676, "step": 12294 }, { "epoch": 0.56, "grad_norm": 0.3126518650778811, "learning_rate": 8.394079647791578e-06, "loss": 0.2093, "step": 12295 }, { "epoch": 0.56, "grad_norm": 0.3866842476804744, "learning_rate": 8.392611058244606e-06, "loss": 0.3096, "step": 12296 }, { "epoch": 0.56, "grad_norm": 0.707850410247406, "learning_rate": 8.391142504283674e-06, "loss": 0.4348, "step": 12297 }, { "epoch": 0.56, "grad_norm": 0.3310082850903642, "learning_rate": 8.389673985941295e-06, "loss": 0.2255, "step": 12298 }, { "epoch": 0.57, "grad_norm": 0.35445335164359526, "learning_rate": 8.388205503249985e-06, "loss": 0.2472, "step": 12299 }, { "epoch": 0.57, "grad_norm": 0.34069700015315174, "learning_rate": 8.38673705624225e-06, "loss": 0.26, "step": 12300 }, { "epoch": 0.57, "grad_norm": 0.3627539339991464, "learning_rate": 8.385268644950603e-06, "loss": 0.1894, "step": 12301 }, { "epoch": 0.57, "grad_norm": 0.5728298758163443, "learning_rate": 8.38380026940755e-06, "loss": 0.399, "step": 12302 }, { "epoch": 0.57, "grad_norm": 0.43630309822364827, "learning_rate": 8.382331929645603e-06, "loss": 0.3012, "step": 12303 }, { "epoch": 0.57, "grad_norm": 0.40155062616462034, "learning_rate": 8.380863625697267e-06, "loss": 0.2132, "step": 12304 }, { "epoch": 0.57, "grad_norm": 0.3753800892917308, "learning_rate": 8.379395357595051e-06, "loss": 0.2518, "step": 12305 }, { "epoch": 0.57, "grad_norm": 0.46736156988416844, "learning_rate": 8.37792712537146e-06, "loss": 0.2982, "step": 12306 }, { "epoch": 0.57, "grad_norm": 0.4145352274247496, "learning_rate": 8.376458929058996e-06, "loss": 0.2966, "step": 12307 }, { "epoch": 0.57, "grad_norm": 0.33414861418633585, "learning_rate": 8.37499076869017e-06, "loss": 0.2594, "step": 12308 }, { "epoch": 0.57, "grad_norm": 0.7731945129074849, "learning_rate": 8.373522644297482e-06, "loss": 0.4021, "step": 12309 }, { "epoch": 0.57, "grad_norm": 0.42692925623859, "learning_rate": 8.37205455591343e-06, "loss": 0.2893, "step": 12310 }, { "epoch": 0.57, "grad_norm": 0.29640537218097945, "learning_rate": 8.370586503570526e-06, "loss": 0.2078, "step": 12311 }, { "epoch": 0.57, "grad_norm": 0.31602330787739547, "learning_rate": 8.369118487301265e-06, "loss": 0.2341, "step": 12312 }, { "epoch": 0.57, "grad_norm": 0.4435462887731274, "learning_rate": 8.367650507138149e-06, "loss": 0.301, "step": 12313 }, { "epoch": 0.57, "grad_norm": 0.5634089595076581, "learning_rate": 8.366182563113682e-06, "loss": 0.2748, "step": 12314 }, { "epoch": 0.57, "grad_norm": 0.4104412633006264, "learning_rate": 8.36471465526035e-06, "loss": 0.3178, "step": 12315 }, { "epoch": 0.57, "grad_norm": 0.43374480809673693, "learning_rate": 8.363246783610663e-06, "loss": 0.2798, "step": 12316 }, { "epoch": 0.57, "grad_norm": 0.49163498912273956, "learning_rate": 8.361778948197116e-06, "loss": 0.2443, "step": 12317 }, { "epoch": 0.57, "grad_norm": 0.3245370180339963, "learning_rate": 8.360311149052205e-06, "loss": 0.2061, "step": 12318 }, { "epoch": 0.57, "grad_norm": 0.3357430319742506, "learning_rate": 8.358843386208421e-06, "loss": 0.2801, "step": 12319 }, { "epoch": 0.57, "grad_norm": 0.4172117202657959, "learning_rate": 8.357375659698266e-06, "loss": 0.3011, "step": 12320 }, { "epoch": 0.57, "grad_norm": 0.7636735713563897, "learning_rate": 8.35590796955423e-06, "loss": 0.3229, "step": 12321 }, { "epoch": 0.57, "grad_norm": 0.5829176456898921, "learning_rate": 8.354440315808808e-06, "loss": 0.3054, "step": 12322 }, { "epoch": 0.57, "grad_norm": 0.35329106033621605, "learning_rate": 8.352972698494484e-06, "loss": 0.2806, "step": 12323 }, { "epoch": 0.57, "grad_norm": 0.2385204701281666, "learning_rate": 8.351505117643767e-06, "loss": 0.1502, "step": 12324 }, { "epoch": 0.57, "grad_norm": 0.566322246283762, "learning_rate": 8.350037573289133e-06, "loss": 0.3042, "step": 12325 }, { "epoch": 0.57, "grad_norm": 0.4141413599260994, "learning_rate": 8.34857006546308e-06, "loss": 0.3187, "step": 12326 }, { "epoch": 0.57, "grad_norm": 0.3297787432007817, "learning_rate": 8.34710259419809e-06, "loss": 0.2385, "step": 12327 }, { "epoch": 0.57, "grad_norm": 0.5739844718882392, "learning_rate": 8.345635159526654e-06, "loss": 0.2817, "step": 12328 }, { "epoch": 0.57, "grad_norm": 0.4187237643756576, "learning_rate": 8.344167761481266e-06, "loss": 0.3128, "step": 12329 }, { "epoch": 0.57, "grad_norm": 0.24042062409301138, "learning_rate": 8.342700400094407e-06, "loss": 0.1375, "step": 12330 }, { "epoch": 0.57, "grad_norm": 0.2791592107264087, "learning_rate": 8.341233075398563e-06, "loss": 0.2448, "step": 12331 }, { "epoch": 0.57, "grad_norm": 0.5411815087418221, "learning_rate": 8.339765787426218e-06, "loss": 0.3431, "step": 12332 }, { "epoch": 0.57, "grad_norm": 0.5520264174346495, "learning_rate": 8.338298536209861e-06, "loss": 0.31, "step": 12333 }, { "epoch": 0.57, "grad_norm": 0.3476942740972606, "learning_rate": 8.336831321781973e-06, "loss": 0.1875, "step": 12334 }, { "epoch": 0.57, "grad_norm": 0.38915487620435996, "learning_rate": 8.335364144175036e-06, "loss": 0.2818, "step": 12335 }, { "epoch": 0.57, "grad_norm": 0.5367657339956364, "learning_rate": 8.33389700342153e-06, "loss": 0.3837, "step": 12336 }, { "epoch": 0.57, "grad_norm": 0.5648056153426586, "learning_rate": 8.332429899553941e-06, "loss": 0.281, "step": 12337 }, { "epoch": 0.57, "grad_norm": 0.35499618059166976, "learning_rate": 8.330962832604747e-06, "loss": 0.3156, "step": 12338 }, { "epoch": 0.57, "grad_norm": 0.24185408363033395, "learning_rate": 8.329495802606428e-06, "loss": 0.212, "step": 12339 }, { "epoch": 0.57, "grad_norm": 0.423425225371304, "learning_rate": 8.328028809591456e-06, "loss": 0.1555, "step": 12340 }, { "epoch": 0.57, "grad_norm": 0.41636324662122903, "learning_rate": 8.32656185359232e-06, "loss": 0.3148, "step": 12341 }, { "epoch": 0.57, "grad_norm": 0.41438110745259765, "learning_rate": 8.325094934641493e-06, "loss": 0.345, "step": 12342 }, { "epoch": 0.57, "grad_norm": 0.46363697417222777, "learning_rate": 8.323628052771448e-06, "loss": 0.3108, "step": 12343 }, { "epoch": 0.57, "grad_norm": 0.31613961305280897, "learning_rate": 8.322161208014663e-06, "loss": 0.2495, "step": 12344 }, { "epoch": 0.57, "grad_norm": 0.26446155877630106, "learning_rate": 8.320694400403608e-06, "loss": 0.1691, "step": 12345 }, { "epoch": 0.57, "grad_norm": 0.7584094529468643, "learning_rate": 8.319227629970763e-06, "loss": 0.3558, "step": 12346 }, { "epoch": 0.57, "grad_norm": 0.2467619352272183, "learning_rate": 8.317760896748598e-06, "loss": 0.2238, "step": 12347 }, { "epoch": 0.57, "grad_norm": 0.786507250461491, "learning_rate": 8.316294200769587e-06, "loss": 0.4751, "step": 12348 }, { "epoch": 0.57, "grad_norm": 0.5635684491854968, "learning_rate": 8.314827542066198e-06, "loss": 0.3764, "step": 12349 }, { "epoch": 0.57, "grad_norm": 0.3394966276185355, "learning_rate": 8.313360920670903e-06, "loss": 0.2272, "step": 12350 }, { "epoch": 0.57, "grad_norm": 0.29952073495419357, "learning_rate": 8.311894336616173e-06, "loss": 0.2579, "step": 12351 }, { "epoch": 0.57, "grad_norm": 0.3351909188305221, "learning_rate": 8.310427789934475e-06, "loss": 0.1936, "step": 12352 }, { "epoch": 0.57, "grad_norm": 0.40280897316861747, "learning_rate": 8.308961280658275e-06, "loss": 0.2342, "step": 12353 }, { "epoch": 0.57, "grad_norm": 0.5517372185741795, "learning_rate": 8.307494808820045e-06, "loss": 0.3587, "step": 12354 }, { "epoch": 0.57, "grad_norm": 0.42896098357748164, "learning_rate": 8.306028374452249e-06, "loss": 0.3308, "step": 12355 }, { "epoch": 0.57, "grad_norm": 0.34143708362417374, "learning_rate": 8.30456197758735e-06, "loss": 0.2636, "step": 12356 }, { "epoch": 0.57, "grad_norm": 0.2138957221817499, "learning_rate": 8.303095618257817e-06, "loss": 0.1137, "step": 12357 }, { "epoch": 0.57, "grad_norm": 0.48671452439179286, "learning_rate": 8.301629296496107e-06, "loss": 0.3235, "step": 12358 }, { "epoch": 0.57, "grad_norm": 0.29215022679804736, "learning_rate": 8.30016301233469e-06, "loss": 0.264, "step": 12359 }, { "epoch": 0.57, "grad_norm": 0.6852034843517372, "learning_rate": 8.29869676580603e-06, "loss": 0.3167, "step": 12360 }, { "epoch": 0.57, "grad_norm": 0.8100227846775483, "learning_rate": 8.29723055694258e-06, "loss": 0.4972, "step": 12361 }, { "epoch": 0.57, "grad_norm": 0.36596297005250733, "learning_rate": 8.295764385776801e-06, "loss": 0.2798, "step": 12362 }, { "epoch": 0.57, "grad_norm": 0.3444347362357057, "learning_rate": 8.29429825234116e-06, "loss": 0.2532, "step": 12363 }, { "epoch": 0.57, "grad_norm": 0.286030448178334, "learning_rate": 8.29283215666811e-06, "loss": 0.1828, "step": 12364 }, { "epoch": 0.57, "grad_norm": 0.37295889348453226, "learning_rate": 8.291366098790114e-06, "loss": 0.2933, "step": 12365 }, { "epoch": 0.57, "grad_norm": 0.7567154038039623, "learning_rate": 8.28990007873962e-06, "loss": 0.3287, "step": 12366 }, { "epoch": 0.57, "grad_norm": 0.35881415309839687, "learning_rate": 8.288434096549096e-06, "loss": 0.3132, "step": 12367 }, { "epoch": 0.57, "grad_norm": 0.37109836359347176, "learning_rate": 8.286968152250989e-06, "loss": 0.2674, "step": 12368 }, { "epoch": 0.57, "grad_norm": 0.9766100134664008, "learning_rate": 8.285502245877757e-06, "loss": 0.5622, "step": 12369 }, { "epoch": 0.57, "grad_norm": 0.23036099474315122, "learning_rate": 8.284036377461848e-06, "loss": 0.1636, "step": 12370 }, { "epoch": 0.57, "grad_norm": 0.3915760855123803, "learning_rate": 8.282570547035726e-06, "loss": 0.2974, "step": 12371 }, { "epoch": 0.57, "grad_norm": 0.9085164552755735, "learning_rate": 8.281104754631836e-06, "loss": 0.4167, "step": 12372 }, { "epoch": 0.57, "grad_norm": 0.5499504465950326, "learning_rate": 8.279639000282629e-06, "loss": 0.2759, "step": 12373 }, { "epoch": 0.57, "grad_norm": 0.36719988891307936, "learning_rate": 8.278173284020557e-06, "loss": 0.2782, "step": 12374 }, { "epoch": 0.57, "grad_norm": 0.3496606429658165, "learning_rate": 8.276707605878063e-06, "loss": 0.2932, "step": 12375 }, { "epoch": 0.57, "grad_norm": 0.3213788260554583, "learning_rate": 8.275241965887606e-06, "loss": 0.1556, "step": 12376 }, { "epoch": 0.57, "grad_norm": 0.33004758968524384, "learning_rate": 8.273776364081632e-06, "loss": 0.2833, "step": 12377 }, { "epoch": 0.57, "grad_norm": 0.4652463008922134, "learning_rate": 8.272310800492584e-06, "loss": 0.3157, "step": 12378 }, { "epoch": 0.57, "grad_norm": 0.3963285494475821, "learning_rate": 8.270845275152909e-06, "loss": 0.261, "step": 12379 }, { "epoch": 0.57, "grad_norm": 0.36151111087124244, "learning_rate": 8.269379788095053e-06, "loss": 0.2883, "step": 12380 }, { "epoch": 0.57, "grad_norm": 1.2563761730621403, "learning_rate": 8.26791433935146e-06, "loss": 0.6024, "step": 12381 }, { "epoch": 0.57, "grad_norm": 0.2757890557973591, "learning_rate": 8.266448928954576e-06, "loss": 0.2593, "step": 12382 }, { "epoch": 0.57, "grad_norm": 0.35760234358959275, "learning_rate": 8.264983556936841e-06, "loss": 0.2328, "step": 12383 }, { "epoch": 0.57, "grad_norm": 0.42104959897857963, "learning_rate": 8.263518223330698e-06, "loss": 0.2612, "step": 12384 }, { "epoch": 0.57, "grad_norm": 0.9887674191536168, "learning_rate": 8.262052928168589e-06, "loss": 0.6629, "step": 12385 }, { "epoch": 0.57, "grad_norm": 0.3231988184591064, "learning_rate": 8.260587671482951e-06, "loss": 0.1939, "step": 12386 }, { "epoch": 0.57, "grad_norm": 0.3716017017037349, "learning_rate": 8.259122453306228e-06, "loss": 0.301, "step": 12387 }, { "epoch": 0.57, "grad_norm": 0.6026389594277747, "learning_rate": 8.25765727367085e-06, "loss": 0.3981, "step": 12388 }, { "epoch": 0.57, "grad_norm": 0.2959017554143114, "learning_rate": 8.256192132609266e-06, "loss": 0.191, "step": 12389 }, { "epoch": 0.57, "grad_norm": 0.2483599588069115, "learning_rate": 8.254727030153908e-06, "loss": 0.1881, "step": 12390 }, { "epoch": 0.57, "grad_norm": 0.5218630992226893, "learning_rate": 8.25326196633721e-06, "loss": 0.4024, "step": 12391 }, { "epoch": 0.57, "grad_norm": 0.3233240626980372, "learning_rate": 8.251796941191605e-06, "loss": 0.1899, "step": 12392 }, { "epoch": 0.57, "grad_norm": 0.9057577591052176, "learning_rate": 8.250331954749534e-06, "loss": 0.414, "step": 12393 }, { "epoch": 0.57, "grad_norm": 0.4689033724729866, "learning_rate": 8.24886700704343e-06, "loss": 0.3672, "step": 12394 }, { "epoch": 0.57, "grad_norm": 0.29289236885933145, "learning_rate": 8.24740209810572e-06, "loss": 0.2603, "step": 12395 }, { "epoch": 0.57, "grad_norm": 0.27872756981612334, "learning_rate": 8.245937227968836e-06, "loss": 0.1319, "step": 12396 }, { "epoch": 0.57, "grad_norm": 1.4317620298695066, "learning_rate": 8.244472396665215e-06, "loss": 0.858, "step": 12397 }, { "epoch": 0.57, "grad_norm": 0.3389507964243794, "learning_rate": 8.243007604227282e-06, "loss": 0.2417, "step": 12398 }, { "epoch": 0.57, "grad_norm": 0.3600797151606209, "learning_rate": 8.241542850687466e-06, "loss": 0.2283, "step": 12399 }, { "epoch": 0.57, "grad_norm": 1.166964003307873, "learning_rate": 8.240078136078195e-06, "loss": 0.3802, "step": 12400 }, { "epoch": 0.57, "grad_norm": 0.3383773601837302, "learning_rate": 8.238613460431902e-06, "loss": 0.2477, "step": 12401 }, { "epoch": 0.57, "grad_norm": 0.22146615153481458, "learning_rate": 8.237148823781008e-06, "loss": 0.1392, "step": 12402 }, { "epoch": 0.57, "grad_norm": 0.484298914712896, "learning_rate": 8.23568422615794e-06, "loss": 0.3555, "step": 12403 }, { "epoch": 0.57, "grad_norm": 0.34085736574902065, "learning_rate": 8.234219667595123e-06, "loss": 0.2505, "step": 12404 }, { "epoch": 0.57, "grad_norm": 1.0624227973674418, "learning_rate": 8.232755148124977e-06, "loss": 0.3223, "step": 12405 }, { "epoch": 0.57, "grad_norm": 0.37652724744219923, "learning_rate": 8.231290667779931e-06, "loss": 0.327, "step": 12406 }, { "epoch": 0.57, "grad_norm": 0.3922742983999391, "learning_rate": 8.22982622659241e-06, "loss": 0.2295, "step": 12407 }, { "epoch": 0.57, "grad_norm": 0.2791941550733342, "learning_rate": 8.228361824594827e-06, "loss": 0.2, "step": 12408 }, { "epoch": 0.57, "grad_norm": 0.4762164290786062, "learning_rate": 8.2268974618196e-06, "loss": 0.3031, "step": 12409 }, { "epoch": 0.57, "grad_norm": 0.46365458648694297, "learning_rate": 8.22543313829916e-06, "loss": 0.3029, "step": 12410 }, { "epoch": 0.57, "grad_norm": 0.5724800502322915, "learning_rate": 8.223968854065918e-06, "loss": 0.3312, "step": 12411 }, { "epoch": 0.57, "grad_norm": 1.2673897735447883, "learning_rate": 8.222504609152295e-06, "loss": 0.3363, "step": 12412 }, { "epoch": 0.57, "grad_norm": 0.40223367770599583, "learning_rate": 8.221040403590704e-06, "loss": 0.2689, "step": 12413 }, { "epoch": 0.57, "grad_norm": 0.2787998568034056, "learning_rate": 8.219576237413568e-06, "loss": 0.2475, "step": 12414 }, { "epoch": 0.57, "grad_norm": 1.3542287516963254, "learning_rate": 8.218112110653297e-06, "loss": 0.5502, "step": 12415 }, { "epoch": 0.57, "grad_norm": 0.2955149527241956, "learning_rate": 8.216648023342307e-06, "loss": 0.2131, "step": 12416 }, { "epoch": 0.57, "grad_norm": 1.016787540048818, "learning_rate": 8.21518397551301e-06, "loss": 0.3979, "step": 12417 }, { "epoch": 0.57, "grad_norm": 0.37578326516777144, "learning_rate": 8.213719967197818e-06, "loss": 0.266, "step": 12418 }, { "epoch": 0.57, "grad_norm": 0.35435315444939663, "learning_rate": 8.212255998429146e-06, "loss": 0.2826, "step": 12419 }, { "epoch": 0.57, "grad_norm": 0.43313529004640994, "learning_rate": 8.210792069239401e-06, "loss": 0.2281, "step": 12420 }, { "epoch": 0.57, "grad_norm": 1.0984868694252856, "learning_rate": 8.209328179660998e-06, "loss": 0.5229, "step": 12421 }, { "epoch": 0.57, "grad_norm": 0.3477079155075459, "learning_rate": 8.207864329726338e-06, "loss": 0.2229, "step": 12422 }, { "epoch": 0.57, "grad_norm": 0.42983207572983373, "learning_rate": 8.206400519467839e-06, "loss": 0.3093, "step": 12423 }, { "epoch": 0.57, "grad_norm": 0.4803253007753997, "learning_rate": 8.204936748917904e-06, "loss": 0.2709, "step": 12424 }, { "epoch": 0.57, "grad_norm": 0.3434384332900092, "learning_rate": 8.20347301810894e-06, "loss": 0.1641, "step": 12425 }, { "epoch": 0.57, "grad_norm": 0.3093570204364109, "learning_rate": 8.20200932707335e-06, "loss": 0.287, "step": 12426 }, { "epoch": 0.57, "grad_norm": 0.8032423300693617, "learning_rate": 8.20054567584354e-06, "loss": 0.4817, "step": 12427 }, { "epoch": 0.57, "grad_norm": 0.34757534088613523, "learning_rate": 8.199082064451916e-06, "loss": 0.2145, "step": 12428 }, { "epoch": 0.57, "grad_norm": 0.2822963453604566, "learning_rate": 8.19761849293088e-06, "loss": 0.2185, "step": 12429 }, { "epoch": 0.57, "grad_norm": 0.3702796510005095, "learning_rate": 8.19615496131283e-06, "loss": 0.299, "step": 12430 }, { "epoch": 0.57, "grad_norm": 0.4137639722220958, "learning_rate": 8.194691469630174e-06, "loss": 0.1824, "step": 12431 }, { "epoch": 0.57, "grad_norm": 0.5323005131427144, "learning_rate": 8.193228017915309e-06, "loss": 0.4215, "step": 12432 }, { "epoch": 0.57, "grad_norm": 0.9311564601728092, "learning_rate": 8.191764606200633e-06, "loss": 0.4534, "step": 12433 }, { "epoch": 0.57, "grad_norm": 0.3149471324540098, "learning_rate": 8.190301234518547e-06, "loss": 0.2756, "step": 12434 }, { "epoch": 0.57, "grad_norm": 0.5249088376428949, "learning_rate": 8.188837902901441e-06, "loss": 0.2558, "step": 12435 }, { "epoch": 0.57, "grad_norm": 0.25703269409942686, "learning_rate": 8.187374611381726e-06, "loss": 0.1457, "step": 12436 }, { "epoch": 0.57, "grad_norm": 0.4132464407406081, "learning_rate": 8.18591135999179e-06, "loss": 0.2578, "step": 12437 }, { "epoch": 0.57, "grad_norm": 0.3041335441454722, "learning_rate": 8.184448148764024e-06, "loss": 0.2481, "step": 12438 }, { "epoch": 0.57, "grad_norm": 0.8117770327175959, "learning_rate": 8.182984977730826e-06, "loss": 0.482, "step": 12439 }, { "epoch": 0.57, "grad_norm": 0.4122792101600311, "learning_rate": 8.18152184692459e-06, "loss": 0.3015, "step": 12440 }, { "epoch": 0.57, "grad_norm": 0.313747636165733, "learning_rate": 8.18005875637771e-06, "loss": 0.1781, "step": 12441 }, { "epoch": 0.57, "grad_norm": 0.3062370537936647, "learning_rate": 8.178595706122574e-06, "loss": 0.2756, "step": 12442 }, { "epoch": 0.57, "grad_norm": 1.0103859000339337, "learning_rate": 8.177132696191573e-06, "loss": 0.3966, "step": 12443 }, { "epoch": 0.57, "grad_norm": 0.40139422837911387, "learning_rate": 8.175669726617097e-06, "loss": 0.2424, "step": 12444 }, { "epoch": 0.57, "grad_norm": 0.5407441836549812, "learning_rate": 8.174206797431537e-06, "loss": 0.3529, "step": 12445 }, { "epoch": 0.57, "grad_norm": 0.40317056526756295, "learning_rate": 8.172743908667277e-06, "loss": 0.3095, "step": 12446 }, { "epoch": 0.57, "grad_norm": 0.42489331987648654, "learning_rate": 8.171281060356705e-06, "loss": 0.339, "step": 12447 }, { "epoch": 0.57, "grad_norm": 0.17159769055253393, "learning_rate": 8.16981825253221e-06, "loss": 0.0726, "step": 12448 }, { "epoch": 0.57, "grad_norm": 0.6731544408584548, "learning_rate": 8.168355485226173e-06, "loss": 0.39, "step": 12449 }, { "epoch": 0.57, "grad_norm": 0.2874904813776581, "learning_rate": 8.166892758470982e-06, "loss": 0.2817, "step": 12450 }, { "epoch": 0.57, "grad_norm": 0.6633613225421824, "learning_rate": 8.165430072299017e-06, "loss": 0.3199, "step": 12451 }, { "epoch": 0.57, "grad_norm": 0.5769082173635337, "learning_rate": 8.16396742674266e-06, "loss": 0.3491, "step": 12452 }, { "epoch": 0.57, "grad_norm": 0.3251954097440529, "learning_rate": 8.162504821834296e-06, "loss": 0.2609, "step": 12453 }, { "epoch": 0.57, "grad_norm": 0.26880435688326815, "learning_rate": 8.16104225760631e-06, "loss": 0.1841, "step": 12454 }, { "epoch": 0.57, "grad_norm": 0.6593908460085663, "learning_rate": 8.15957973409107e-06, "loss": 0.3456, "step": 12455 }, { "epoch": 0.57, "grad_norm": 0.4019859074904042, "learning_rate": 8.158117251320958e-06, "loss": 0.3045, "step": 12456 }, { "epoch": 0.57, "grad_norm": 0.6605321846349567, "learning_rate": 8.15665480932836e-06, "loss": 0.4457, "step": 12457 }, { "epoch": 0.57, "grad_norm": 0.2708970998161733, "learning_rate": 8.155192408145647e-06, "loss": 0.2264, "step": 12458 }, { "epoch": 0.57, "grad_norm": 0.5402518195015316, "learning_rate": 8.153730047805198e-06, "loss": 0.3464, "step": 12459 }, { "epoch": 0.57, "grad_norm": 0.3463248154201205, "learning_rate": 8.152267728339382e-06, "loss": 0.2422, "step": 12460 }, { "epoch": 0.57, "grad_norm": 0.5064206343411938, "learning_rate": 8.15080544978058e-06, "loss": 0.2461, "step": 12461 }, { "epoch": 0.57, "grad_norm": 0.2840657758305156, "learning_rate": 8.149343212161163e-06, "loss": 0.2515, "step": 12462 }, { "epoch": 0.57, "grad_norm": 0.6627502765015306, "learning_rate": 8.147881015513505e-06, "loss": 0.4602, "step": 12463 }, { "epoch": 0.57, "grad_norm": 0.5664572059038284, "learning_rate": 8.146418859869975e-06, "loss": 0.2433, "step": 12464 }, { "epoch": 0.57, "grad_norm": 0.3765739237751285, "learning_rate": 8.144956745262944e-06, "loss": 0.277, "step": 12465 }, { "epoch": 0.57, "grad_norm": 0.29502516818982955, "learning_rate": 8.143494671724784e-06, "loss": 0.2515, "step": 12466 }, { "epoch": 0.57, "grad_norm": 0.2949079316320679, "learning_rate": 8.142032639287861e-06, "loss": 0.1248, "step": 12467 }, { "epoch": 0.57, "grad_norm": 0.3822384776154844, "learning_rate": 8.140570647984547e-06, "loss": 0.3162, "step": 12468 }, { "epoch": 0.57, "grad_norm": 1.4129630009859606, "learning_rate": 8.139108697847201e-06, "loss": 0.79, "step": 12469 }, { "epoch": 0.57, "grad_norm": 0.35011490265178713, "learning_rate": 8.1376467889082e-06, "loss": 0.2756, "step": 12470 }, { "epoch": 0.57, "grad_norm": 0.3820766663129361, "learning_rate": 8.136184921199904e-06, "loss": 0.2188, "step": 12471 }, { "epoch": 0.57, "grad_norm": 0.27827514541470605, "learning_rate": 8.134723094754679e-06, "loss": 0.1636, "step": 12472 }, { "epoch": 0.57, "grad_norm": 0.42491064725440575, "learning_rate": 8.133261309604881e-06, "loss": 0.319, "step": 12473 }, { "epoch": 0.57, "grad_norm": 0.2958681542869471, "learning_rate": 8.131799565782884e-06, "loss": 0.2171, "step": 12474 }, { "epoch": 0.57, "grad_norm": 0.720910375386371, "learning_rate": 8.130337863321042e-06, "loss": 0.4501, "step": 12475 }, { "epoch": 0.57, "grad_norm": 0.7362239965509798, "learning_rate": 8.128876202251719e-06, "loss": 0.4947, "step": 12476 }, { "epoch": 0.57, "grad_norm": 0.37897847573113214, "learning_rate": 8.127414582607272e-06, "loss": 0.2142, "step": 12477 }, { "epoch": 0.57, "grad_norm": 0.3739905570542845, "learning_rate": 8.125953004420061e-06, "loss": 0.2968, "step": 12478 }, { "epoch": 0.57, "grad_norm": 0.7084940774992083, "learning_rate": 8.124491467722446e-06, "loss": 0.4007, "step": 12479 }, { "epoch": 0.57, "grad_norm": 0.2065382822005008, "learning_rate": 8.123029972546782e-06, "loss": 0.143, "step": 12480 }, { "epoch": 0.57, "grad_norm": 0.3876459832523706, "learning_rate": 8.121568518925424e-06, "loss": 0.3077, "step": 12481 }, { "epoch": 0.57, "grad_norm": 1.0955839718431823, "learning_rate": 8.120107106890726e-06, "loss": 0.6572, "step": 12482 }, { "epoch": 0.57, "grad_norm": 0.3569165523728648, "learning_rate": 8.118645736475051e-06, "loss": 0.2578, "step": 12483 }, { "epoch": 0.57, "grad_norm": 0.6492416582049085, "learning_rate": 8.117184407710743e-06, "loss": 0.2934, "step": 12484 }, { "epoch": 0.57, "grad_norm": 0.40233132736261773, "learning_rate": 8.115723120630159e-06, "loss": 0.3011, "step": 12485 }, { "epoch": 0.57, "grad_norm": 0.2624374978066074, "learning_rate": 8.114261875265643e-06, "loss": 0.1997, "step": 12486 }, { "epoch": 0.57, "grad_norm": 0.29734490956247445, "learning_rate": 8.112800671649557e-06, "loss": 0.1312, "step": 12487 }, { "epoch": 0.57, "grad_norm": 0.7807367233554806, "learning_rate": 8.111339509814245e-06, "loss": 0.4675, "step": 12488 }, { "epoch": 0.57, "grad_norm": 0.37387641436265884, "learning_rate": 8.109878389792055e-06, "loss": 0.2793, "step": 12489 }, { "epoch": 0.57, "grad_norm": 0.3845747260479208, "learning_rate": 8.108417311615336e-06, "loss": 0.2558, "step": 12490 }, { "epoch": 0.57, "grad_norm": 0.6206724529713612, "learning_rate": 8.106956275316433e-06, "loss": 0.4, "step": 12491 }, { "epoch": 0.57, "grad_norm": 0.35668872045109684, "learning_rate": 8.105495280927696e-06, "loss": 0.2601, "step": 12492 }, { "epoch": 0.57, "grad_norm": 0.2372328514099953, "learning_rate": 8.104034328481468e-06, "loss": 0.1781, "step": 12493 }, { "epoch": 0.57, "grad_norm": 0.8630802371627697, "learning_rate": 8.10257341801009e-06, "loss": 0.5461, "step": 12494 }, { "epoch": 0.57, "grad_norm": 0.366086123693029, "learning_rate": 8.101112549545908e-06, "loss": 0.2511, "step": 12495 }, { "epoch": 0.57, "grad_norm": 0.8657018978144175, "learning_rate": 8.099651723121267e-06, "loss": 0.3853, "step": 12496 }, { "epoch": 0.57, "grad_norm": 0.3906777218534867, "learning_rate": 8.098190938768503e-06, "loss": 0.2514, "step": 12497 }, { "epoch": 0.57, "grad_norm": 0.33402678133117836, "learning_rate": 8.09673019651996e-06, "loss": 0.2298, "step": 12498 }, { "epoch": 0.57, "grad_norm": 0.34129716769269874, "learning_rate": 8.095269496407972e-06, "loss": 0.222, "step": 12499 }, { "epoch": 0.57, "grad_norm": 0.9013801734073409, "learning_rate": 8.093808838464884e-06, "loss": 0.3809, "step": 12500 }, { "epoch": 0.57, "grad_norm": 0.39387452502888753, "learning_rate": 8.092348222723034e-06, "loss": 0.2537, "step": 12501 }, { "epoch": 0.57, "grad_norm": 0.4908113845834032, "learning_rate": 8.090887649214755e-06, "loss": 0.3218, "step": 12502 }, { "epoch": 0.57, "grad_norm": 1.3932149890811034, "learning_rate": 8.089427117972379e-06, "loss": 0.2244, "step": 12503 }, { "epoch": 0.57, "grad_norm": 0.27325876785340175, "learning_rate": 8.087966629028247e-06, "loss": 0.1955, "step": 12504 }, { "epoch": 0.57, "grad_norm": 0.6780950161412388, "learning_rate": 8.086506182414692e-06, "loss": 0.3881, "step": 12505 }, { "epoch": 0.57, "grad_norm": 0.3649718590466074, "learning_rate": 8.085045778164049e-06, "loss": 0.2652, "step": 12506 }, { "epoch": 0.57, "grad_norm": 0.3704535583965833, "learning_rate": 8.083585416308642e-06, "loss": 0.2704, "step": 12507 }, { "epoch": 0.57, "grad_norm": 0.7668013343495043, "learning_rate": 8.082125096880808e-06, "loss": 0.4164, "step": 12508 }, { "epoch": 0.57, "grad_norm": 0.40252819197572576, "learning_rate": 8.080664819912877e-06, "loss": 0.3291, "step": 12509 }, { "epoch": 0.57, "grad_norm": 0.3372206058233182, "learning_rate": 8.079204585437177e-06, "loss": 0.1967, "step": 12510 }, { "epoch": 0.57, "grad_norm": 0.5575128779912731, "learning_rate": 8.077744393486036e-06, "loss": 0.3846, "step": 12511 }, { "epoch": 0.57, "grad_norm": 0.636988304950756, "learning_rate": 8.076284244091779e-06, "loss": 0.4156, "step": 12512 }, { "epoch": 0.57, "grad_norm": 0.3011571854953526, "learning_rate": 8.074824137286738e-06, "loss": 0.1913, "step": 12513 }, { "epoch": 0.57, "grad_norm": 0.3005690470286853, "learning_rate": 8.073364073103234e-06, "loss": 0.2532, "step": 12514 }, { "epoch": 0.57, "grad_norm": 1.0750069108132279, "learning_rate": 8.071904051573592e-06, "loss": 0.5573, "step": 12515 }, { "epoch": 0.57, "grad_norm": 0.32269095283556476, "learning_rate": 8.070444072730132e-06, "loss": 0.1944, "step": 12516 }, { "epoch": 0.58, "grad_norm": 0.30407980089671194, "learning_rate": 8.068984136605187e-06, "loss": 0.2547, "step": 12517 }, { "epoch": 0.58, "grad_norm": 0.8934025157688004, "learning_rate": 8.06752424323107e-06, "loss": 0.568, "step": 12518 }, { "epoch": 0.58, "grad_norm": 0.35157622106586794, "learning_rate": 8.066064392640106e-06, "loss": 0.2316, "step": 12519 }, { "epoch": 0.58, "grad_norm": 0.3886878663802641, "learning_rate": 8.064604584864607e-06, "loss": 0.2693, "step": 12520 }, { "epoch": 0.58, "grad_norm": 0.3614275275720635, "learning_rate": 8.0631448199369e-06, "loss": 0.2895, "step": 12521 }, { "epoch": 0.58, "grad_norm": 0.3603961092805273, "learning_rate": 8.0616850978893e-06, "loss": 0.2837, "step": 12522 }, { "epoch": 0.58, "grad_norm": 0.9109855957212859, "learning_rate": 8.060225418754125e-06, "loss": 0.3651, "step": 12523 }, { "epoch": 0.58, "grad_norm": 0.7645880523581638, "learning_rate": 8.058765782563688e-06, "loss": 0.3986, "step": 12524 }, { "epoch": 0.58, "grad_norm": 0.2810392851338441, "learning_rate": 8.057306189350305e-06, "loss": 0.25, "step": 12525 }, { "epoch": 0.58, "grad_norm": 0.2310976151121476, "learning_rate": 8.055846639146292e-06, "loss": 0.139, "step": 12526 }, { "epoch": 0.58, "grad_norm": 0.9821769487500752, "learning_rate": 8.054387131983962e-06, "loss": 0.5024, "step": 12527 }, { "epoch": 0.58, "grad_norm": 0.42843692044450804, "learning_rate": 8.052927667895624e-06, "loss": 0.2828, "step": 12528 }, { "epoch": 0.58, "grad_norm": 0.3391632759506232, "learning_rate": 8.05146824691359e-06, "loss": 0.2559, "step": 12529 }, { "epoch": 0.58, "grad_norm": 1.1488297881680636, "learning_rate": 8.050008869070172e-06, "loss": 0.4057, "step": 12530 }, { "epoch": 0.58, "grad_norm": 0.3698351660343765, "learning_rate": 8.048549534397679e-06, "loss": 0.2269, "step": 12531 }, { "epoch": 0.58, "grad_norm": 0.22585768041778964, "learning_rate": 8.047090242928416e-06, "loss": 0.1394, "step": 12532 }, { "epoch": 0.58, "grad_norm": 0.3938117946272398, "learning_rate": 8.045630994694692e-06, "loss": 0.2955, "step": 12533 }, { "epoch": 0.58, "grad_norm": 0.4226909304057653, "learning_rate": 8.044171789728816e-06, "loss": 0.2942, "step": 12534 }, { "epoch": 0.58, "grad_norm": 0.5634467242269736, "learning_rate": 8.042712628063094e-06, "loss": 0.3734, "step": 12535 }, { "epoch": 0.58, "grad_norm": 1.3491688688008365, "learning_rate": 8.041253509729825e-06, "loss": 0.336, "step": 12536 }, { "epoch": 0.58, "grad_norm": 0.2954357993854914, "learning_rate": 8.03979443476132e-06, "loss": 0.258, "step": 12537 }, { "epoch": 0.58, "grad_norm": 0.2706554027162987, "learning_rate": 8.038335403189868e-06, "loss": 0.2044, "step": 12538 }, { "epoch": 0.58, "grad_norm": 0.9811512680948671, "learning_rate": 8.036876415047786e-06, "loss": 0.2276, "step": 12539 }, { "epoch": 0.58, "grad_norm": 0.4686787196730729, "learning_rate": 8.035417470367368e-06, "loss": 0.3077, "step": 12540 }, { "epoch": 0.58, "grad_norm": 0.46839624518546885, "learning_rate": 8.033958569180912e-06, "loss": 0.3248, "step": 12541 }, { "epoch": 0.58, "grad_norm": 0.8658598301058729, "learning_rate": 8.032499711520717e-06, "loss": 0.3044, "step": 12542 }, { "epoch": 0.58, "grad_norm": 0.34766056563632164, "learning_rate": 8.031040897419084e-06, "loss": 0.2779, "step": 12543 }, { "epoch": 0.58, "grad_norm": 0.4023602701488176, "learning_rate": 8.02958212690831e-06, "loss": 0.2396, "step": 12544 }, { "epoch": 0.58, "grad_norm": 0.35876524670454457, "learning_rate": 8.028123400020686e-06, "loss": 0.2035, "step": 12545 }, { "epoch": 0.58, "grad_norm": 0.4494014621544313, "learning_rate": 8.026664716788506e-06, "loss": 0.2683, "step": 12546 }, { "epoch": 0.58, "grad_norm": 0.48402231522786154, "learning_rate": 8.025206077244072e-06, "loss": 0.3759, "step": 12547 }, { "epoch": 0.58, "grad_norm": 0.45194413819769047, "learning_rate": 8.023747481419674e-06, "loss": 0.3741, "step": 12548 }, { "epoch": 0.58, "grad_norm": 0.3680181159338744, "learning_rate": 8.022288929347599e-06, "loss": 0.2091, "step": 12549 }, { "epoch": 0.58, "grad_norm": 0.32448754821138226, "learning_rate": 8.020830421060137e-06, "loss": 0.2475, "step": 12550 }, { "epoch": 0.58, "grad_norm": 0.5119490706206589, "learning_rate": 8.019371956589588e-06, "loss": 0.2276, "step": 12551 }, { "epoch": 0.58, "grad_norm": 0.34538058686291784, "learning_rate": 8.017913535968234e-06, "loss": 0.2048, "step": 12552 }, { "epoch": 0.58, "grad_norm": 0.33584169019823745, "learning_rate": 8.016455159228363e-06, "loss": 0.2912, "step": 12553 }, { "epoch": 0.58, "grad_norm": 0.7784194308493582, "learning_rate": 8.014996826402263e-06, "loss": 0.528, "step": 12554 }, { "epoch": 0.58, "grad_norm": 0.32368324024993933, "learning_rate": 8.013538537522219e-06, "loss": 0.1974, "step": 12555 }, { "epoch": 0.58, "grad_norm": 0.30192809124081077, "learning_rate": 8.01208029262052e-06, "loss": 0.1932, "step": 12556 }, { "epoch": 0.58, "grad_norm": 0.41836740439964365, "learning_rate": 8.010622091729444e-06, "loss": 0.2946, "step": 12557 }, { "epoch": 0.58, "grad_norm": 0.3853546307528533, "learning_rate": 8.00916393488128e-06, "loss": 0.232, "step": 12558 }, { "epoch": 0.58, "grad_norm": 0.5015878029593724, "learning_rate": 8.007705822108305e-06, "loss": 0.3632, "step": 12559 }, { "epoch": 0.58, "grad_norm": 1.0909867155640225, "learning_rate": 8.006247753442805e-06, "loss": 0.7113, "step": 12560 }, { "epoch": 0.58, "grad_norm": 0.33987331106490626, "learning_rate": 8.004789728917059e-06, "loss": 0.2905, "step": 12561 }, { "epoch": 0.58, "grad_norm": 0.3679740839261454, "learning_rate": 8.003331748563344e-06, "loss": 0.2307, "step": 12562 }, { "epoch": 0.58, "grad_norm": 0.3239617159083856, "learning_rate": 8.001873812413936e-06, "loss": 0.2314, "step": 12563 }, { "epoch": 0.58, "grad_norm": 0.4588379688612307, "learning_rate": 8.00041592050112e-06, "loss": 0.3148, "step": 12564 }, { "epoch": 0.58, "grad_norm": 0.2666343635475842, "learning_rate": 7.99895807285717e-06, "loss": 0.2365, "step": 12565 }, { "epoch": 0.58, "grad_norm": 1.2850071211710368, "learning_rate": 7.99750026951436e-06, "loss": 0.7672, "step": 12566 }, { "epoch": 0.58, "grad_norm": 0.7461355930196935, "learning_rate": 7.996042510504963e-06, "loss": 0.3431, "step": 12567 }, { "epoch": 0.58, "grad_norm": 0.2805438155926942, "learning_rate": 7.994584795861248e-06, "loss": 0.1719, "step": 12568 }, { "epoch": 0.58, "grad_norm": 0.2713886302334592, "learning_rate": 7.993127125615498e-06, "loss": 0.2428, "step": 12569 }, { "epoch": 0.58, "grad_norm": 0.6307198542543352, "learning_rate": 7.991669499799978e-06, "loss": 0.3941, "step": 12570 }, { "epoch": 0.58, "grad_norm": 0.3468610344511736, "learning_rate": 7.990211918446961e-06, "loss": 0.222, "step": 12571 }, { "epoch": 0.58, "grad_norm": 1.3039111211953105, "learning_rate": 7.988754381588712e-06, "loss": 0.7556, "step": 12572 }, { "epoch": 0.58, "grad_norm": 0.32071360620227096, "learning_rate": 7.987296889257505e-06, "loss": 0.2648, "step": 12573 }, { "epoch": 0.58, "grad_norm": 0.43378719543085736, "learning_rate": 7.985839441485604e-06, "loss": 0.287, "step": 12574 }, { "epoch": 0.58, "grad_norm": 0.5992632740044342, "learning_rate": 7.984382038305278e-06, "loss": 0.3066, "step": 12575 }, { "epoch": 0.58, "grad_norm": 0.2788417247655016, "learning_rate": 7.982924679748789e-06, "loss": 0.2237, "step": 12576 }, { "epoch": 0.58, "grad_norm": 0.2703859297933438, "learning_rate": 7.981467365848405e-06, "loss": 0.2307, "step": 12577 }, { "epoch": 0.58, "grad_norm": 0.9936824965760298, "learning_rate": 7.980010096636387e-06, "loss": 0.5441, "step": 12578 }, { "epoch": 0.58, "grad_norm": 0.6704327879376025, "learning_rate": 7.978552872145e-06, "loss": 0.3623, "step": 12579 }, { "epoch": 0.58, "grad_norm": 0.41489905911827907, "learning_rate": 7.9770956924065e-06, "loss": 0.3125, "step": 12580 }, { "epoch": 0.58, "grad_norm": 0.39821486208140056, "learning_rate": 7.975638557453155e-06, "loss": 0.2826, "step": 12581 }, { "epoch": 0.58, "grad_norm": 0.28891383774287344, "learning_rate": 7.974181467317222e-06, "loss": 0.1633, "step": 12582 }, { "epoch": 0.58, "grad_norm": 0.37276327839125256, "learning_rate": 7.972724422030957e-06, "loss": 0.2613, "step": 12583 }, { "epoch": 0.58, "grad_norm": 0.5296359728156753, "learning_rate": 7.971267421626624e-06, "loss": 0.4101, "step": 12584 }, { "epoch": 0.58, "grad_norm": 0.4454463832865182, "learning_rate": 7.969810466136466e-06, "loss": 0.2414, "step": 12585 }, { "epoch": 0.58, "grad_norm": 0.37110394714560857, "learning_rate": 7.968353555592754e-06, "loss": 0.2564, "step": 12586 }, { "epoch": 0.58, "grad_norm": 0.6892518185472958, "learning_rate": 7.966896690027734e-06, "loss": 0.3907, "step": 12587 }, { "epoch": 0.58, "grad_norm": 0.29306298979400774, "learning_rate": 7.965439869473664e-06, "loss": 0.1567, "step": 12588 }, { "epoch": 0.58, "grad_norm": 0.24921091001228685, "learning_rate": 7.963983093962792e-06, "loss": 0.2224, "step": 12589 }, { "epoch": 0.58, "grad_norm": 1.3396481506724143, "learning_rate": 7.962526363527372e-06, "loss": 0.7508, "step": 12590 }, { "epoch": 0.58, "grad_norm": 0.5210697713419054, "learning_rate": 7.961069678199658e-06, "loss": 0.2443, "step": 12591 }, { "epoch": 0.58, "grad_norm": 0.3197146093217933, "learning_rate": 7.959613038011892e-06, "loss": 0.2786, "step": 12592 }, { "epoch": 0.58, "grad_norm": 0.4774819893334619, "learning_rate": 7.958156442996325e-06, "loss": 0.3719, "step": 12593 }, { "epoch": 0.58, "grad_norm": 0.5340840542738716, "learning_rate": 7.956699893185213e-06, "loss": 0.1479, "step": 12594 }, { "epoch": 0.58, "grad_norm": 0.3524340723754244, "learning_rate": 7.955243388610794e-06, "loss": 0.2429, "step": 12595 }, { "epoch": 0.58, "grad_norm": 0.49929082941615566, "learning_rate": 7.953786929305315e-06, "loss": 0.386, "step": 12596 }, { "epoch": 0.58, "grad_norm": 0.30312209599273787, "learning_rate": 7.952330515301022e-06, "loss": 0.2419, "step": 12597 }, { "epoch": 0.58, "grad_norm": 0.3517978594723385, "learning_rate": 7.950874146630152e-06, "loss": 0.2213, "step": 12598 }, { "epoch": 0.58, "grad_norm": 0.6740411118302502, "learning_rate": 7.949417823324958e-06, "loss": 0.4333, "step": 12599 }, { "epoch": 0.58, "grad_norm": 0.3405168737369513, "learning_rate": 7.947961545417677e-06, "loss": 0.2957, "step": 12600 }, { "epoch": 0.58, "grad_norm": 0.27730768999340255, "learning_rate": 7.94650531294055e-06, "loss": 0.1676, "step": 12601 }, { "epoch": 0.58, "grad_norm": 0.40300658984119136, "learning_rate": 7.945049125925815e-06, "loss": 0.2716, "step": 12602 }, { "epoch": 0.58, "grad_norm": 0.6671260172681813, "learning_rate": 7.943592984405714e-06, "loss": 0.3891, "step": 12603 }, { "epoch": 0.58, "grad_norm": 0.28061672707988833, "learning_rate": 7.94213688841248e-06, "loss": 0.2275, "step": 12604 }, { "epoch": 0.58, "grad_norm": 0.49158423093495524, "learning_rate": 7.940680837978353e-06, "loss": 0.3354, "step": 12605 }, { "epoch": 0.58, "grad_norm": 1.1317518225799188, "learning_rate": 7.939224833135567e-06, "loss": 0.5889, "step": 12606 }, { "epoch": 0.58, "grad_norm": 0.2902423319471402, "learning_rate": 7.937768873916358e-06, "loss": 0.2053, "step": 12607 }, { "epoch": 0.58, "grad_norm": 0.5203033092001017, "learning_rate": 7.936312960352957e-06, "loss": 0.4001, "step": 12608 }, { "epoch": 0.58, "grad_norm": 0.32220372637452793, "learning_rate": 7.934857092477599e-06, "loss": 0.2722, "step": 12609 }, { "epoch": 0.58, "grad_norm": 0.3475609924164586, "learning_rate": 7.933401270322512e-06, "loss": 0.2823, "step": 12610 }, { "epoch": 0.58, "grad_norm": 0.37695932096699447, "learning_rate": 7.931945493919932e-06, "loss": 0.1051, "step": 12611 }, { "epoch": 0.58, "grad_norm": 0.39458721459345736, "learning_rate": 7.930489763302085e-06, "loss": 0.2924, "step": 12612 }, { "epoch": 0.58, "grad_norm": 0.36523231724918903, "learning_rate": 7.929034078501202e-06, "loss": 0.2929, "step": 12613 }, { "epoch": 0.58, "grad_norm": 0.9362589753683518, "learning_rate": 7.927578439549506e-06, "loss": 0.3702, "step": 12614 }, { "epoch": 0.58, "grad_norm": 0.3575964060566967, "learning_rate": 7.926122846479224e-06, "loss": 0.2638, "step": 12615 }, { "epoch": 0.58, "grad_norm": 0.38522296828818736, "learning_rate": 7.924667299322585e-06, "loss": 0.274, "step": 12616 }, { "epoch": 0.58, "grad_norm": 0.30684398423890547, "learning_rate": 7.923211798111815e-06, "loss": 0.2289, "step": 12617 }, { "epoch": 0.58, "grad_norm": 0.8098901239714268, "learning_rate": 7.92175634287913e-06, "loss": 0.4745, "step": 12618 }, { "epoch": 0.58, "grad_norm": 0.36607871518936536, "learning_rate": 7.920300933656758e-06, "loss": 0.2922, "step": 12619 }, { "epoch": 0.58, "grad_norm": 0.3236687201078858, "learning_rate": 7.91884557047692e-06, "loss": 0.2664, "step": 12620 }, { "epoch": 0.58, "grad_norm": 0.7948109816623913, "learning_rate": 7.917390253371835e-06, "loss": 0.4017, "step": 12621 }, { "epoch": 0.58, "grad_norm": 0.3347989140896034, "learning_rate": 7.915934982373723e-06, "loss": 0.2506, "step": 12622 }, { "epoch": 0.58, "grad_norm": 0.31445494204293517, "learning_rate": 7.914479757514798e-06, "loss": 0.1825, "step": 12623 }, { "epoch": 0.58, "grad_norm": 0.34446354462157114, "learning_rate": 7.913024578827284e-06, "loss": 0.238, "step": 12624 }, { "epoch": 0.58, "grad_norm": 0.3802373636255726, "learning_rate": 7.911569446343394e-06, "loss": 0.2823, "step": 12625 }, { "epoch": 0.58, "grad_norm": 0.8926159130597905, "learning_rate": 7.910114360095345e-06, "loss": 0.4582, "step": 12626 }, { "epoch": 0.58, "grad_norm": 0.8681639670852823, "learning_rate": 7.908659320115349e-06, "loss": 0.334, "step": 12627 }, { "epoch": 0.58, "grad_norm": 0.29752523392779107, "learning_rate": 7.907204326435616e-06, "loss": 0.2555, "step": 12628 }, { "epoch": 0.58, "grad_norm": 0.2815656824090016, "learning_rate": 7.905749379088366e-06, "loss": 0.1923, "step": 12629 }, { "epoch": 0.58, "grad_norm": 0.978202880847337, "learning_rate": 7.904294478105806e-06, "loss": 0.3791, "step": 12630 }, { "epoch": 0.58, "grad_norm": 0.3341207522871565, "learning_rate": 7.90283962352015e-06, "loss": 0.247, "step": 12631 }, { "epoch": 0.58, "grad_norm": 0.5185402863867649, "learning_rate": 7.901384815363595e-06, "loss": 0.3246, "step": 12632 }, { "epoch": 0.58, "grad_norm": 1.3103867725155687, "learning_rate": 7.899930053668362e-06, "loss": 0.3698, "step": 12633 }, { "epoch": 0.58, "grad_norm": 0.6587853889627598, "learning_rate": 7.898475338466655e-06, "loss": 0.2963, "step": 12634 }, { "epoch": 0.58, "grad_norm": 0.2601060435970856, "learning_rate": 7.897020669790678e-06, "loss": 0.1745, "step": 12635 }, { "epoch": 0.58, "grad_norm": 0.3850765855647958, "learning_rate": 7.895566047672635e-06, "loss": 0.3138, "step": 12636 }, { "epoch": 0.58, "grad_norm": 0.33389922883343787, "learning_rate": 7.894111472144733e-06, "loss": 0.1913, "step": 12637 }, { "epoch": 0.58, "grad_norm": 1.161787970066977, "learning_rate": 7.892656943239172e-06, "loss": 0.4289, "step": 12638 }, { "epoch": 0.58, "grad_norm": 1.1998857445695863, "learning_rate": 7.891202460988158e-06, "loss": 0.6669, "step": 12639 }, { "epoch": 0.58, "grad_norm": 0.26249887024212365, "learning_rate": 7.889748025423882e-06, "loss": 0.2055, "step": 12640 }, { "epoch": 0.58, "grad_norm": 0.276589762392002, "learning_rate": 7.88829363657856e-06, "loss": 0.2091, "step": 12641 }, { "epoch": 0.58, "grad_norm": 1.5612438129931288, "learning_rate": 7.886839294484378e-06, "loss": 0.8209, "step": 12642 }, { "epoch": 0.58, "grad_norm": 0.3407378826065463, "learning_rate": 7.885384999173536e-06, "loss": 0.2279, "step": 12643 }, { "epoch": 0.58, "grad_norm": 0.3801962232979422, "learning_rate": 7.883930750678234e-06, "loss": 0.3204, "step": 12644 }, { "epoch": 0.58, "grad_norm": 1.2450762092730006, "learning_rate": 7.88247654903066e-06, "loss": 0.7545, "step": 12645 }, { "epoch": 0.58, "grad_norm": 0.34530719906204305, "learning_rate": 7.88102239426302e-06, "loss": 0.1896, "step": 12646 }, { "epoch": 0.58, "grad_norm": 0.3443003768136979, "learning_rate": 7.8795682864075e-06, "loss": 0.2042, "step": 12647 }, { "epoch": 0.58, "grad_norm": 0.3722375167670876, "learning_rate": 7.878114225496296e-06, "loss": 0.3331, "step": 12648 }, { "epoch": 0.58, "grad_norm": 0.3494477264956144, "learning_rate": 7.876660211561596e-06, "loss": 0.2944, "step": 12649 }, { "epoch": 0.58, "grad_norm": 0.8727556846387707, "learning_rate": 7.875206244635594e-06, "loss": 0.3558, "step": 12650 }, { "epoch": 0.58, "grad_norm": 0.5443354494664977, "learning_rate": 7.873752324750476e-06, "loss": 0.4219, "step": 12651 }, { "epoch": 0.58, "grad_norm": 0.5220989721043423, "learning_rate": 7.872298451938434e-06, "loss": 0.2709, "step": 12652 }, { "epoch": 0.58, "grad_norm": 0.2557293559042787, "learning_rate": 7.870844626231652e-06, "loss": 0.1589, "step": 12653 }, { "epoch": 0.58, "grad_norm": 0.6468448425487455, "learning_rate": 7.869390847662319e-06, "loss": 0.4049, "step": 12654 }, { "epoch": 0.58, "grad_norm": 0.5540984556558914, "learning_rate": 7.86793711626262e-06, "loss": 0.3679, "step": 12655 }, { "epoch": 0.58, "grad_norm": 0.28212708911982204, "learning_rate": 7.866483432064737e-06, "loss": 0.236, "step": 12656 }, { "epoch": 0.58, "grad_norm": 1.250844795160495, "learning_rate": 7.865029795100857e-06, "loss": 0.679, "step": 12657 }, { "epoch": 0.58, "grad_norm": 0.4539509024420482, "learning_rate": 7.863576205403153e-06, "loss": 0.2975, "step": 12658 }, { "epoch": 0.58, "grad_norm": 0.2533246325859676, "learning_rate": 7.862122663003819e-06, "loss": 0.1633, "step": 12659 }, { "epoch": 0.58, "grad_norm": 0.5112533225780336, "learning_rate": 7.860669167935028e-06, "loss": 0.3594, "step": 12660 }, { "epoch": 0.58, "grad_norm": 0.457637034124689, "learning_rate": 7.85921572022896e-06, "loss": 0.2962, "step": 12661 }, { "epoch": 0.58, "grad_norm": 0.5726592026191093, "learning_rate": 7.857762319917787e-06, "loss": 0.3787, "step": 12662 }, { "epoch": 0.58, "grad_norm": 0.44461902655318186, "learning_rate": 7.856308967033697e-06, "loss": 0.2465, "step": 12663 }, { "epoch": 0.58, "grad_norm": 0.3370231121170109, "learning_rate": 7.854855661608858e-06, "loss": 0.2521, "step": 12664 }, { "epoch": 0.58, "grad_norm": 0.6361696779083015, "learning_rate": 7.853402403675449e-06, "loss": 0.3577, "step": 12665 }, { "epoch": 0.58, "grad_norm": 0.39936835506328316, "learning_rate": 7.85194919326564e-06, "loss": 0.2058, "step": 12666 }, { "epoch": 0.58, "grad_norm": 0.42491252578065297, "learning_rate": 7.850496030411608e-06, "loss": 0.2971, "step": 12667 }, { "epoch": 0.58, "grad_norm": 0.3483353506919157, "learning_rate": 7.84904291514552e-06, "loss": 0.2917, "step": 12668 }, { "epoch": 0.58, "grad_norm": 0.8263498947622043, "learning_rate": 7.84758984749955e-06, "loss": 0.4065, "step": 12669 }, { "epoch": 0.58, "grad_norm": 0.6099462694633945, "learning_rate": 7.846136827505866e-06, "loss": 0.3497, "step": 12670 }, { "epoch": 0.58, "grad_norm": 0.39457532516270394, "learning_rate": 7.844683855196637e-06, "loss": 0.3152, "step": 12671 }, { "epoch": 0.58, "grad_norm": 0.2796453804615942, "learning_rate": 7.843230930604028e-06, "loss": 0.2327, "step": 12672 }, { "epoch": 0.58, "grad_norm": 0.3201605047618988, "learning_rate": 7.841778053760212e-06, "loss": 0.1372, "step": 12673 }, { "epoch": 0.58, "grad_norm": 0.4163178647357595, "learning_rate": 7.840325224697348e-06, "loss": 0.3281, "step": 12674 }, { "epoch": 0.58, "grad_norm": 0.522060078342285, "learning_rate": 7.838872443447596e-06, "loss": 0.3445, "step": 12675 }, { "epoch": 0.58, "grad_norm": 0.3910218572249062, "learning_rate": 7.837419710043131e-06, "loss": 0.1934, "step": 12676 }, { "epoch": 0.58, "grad_norm": 0.45377159324605604, "learning_rate": 7.835967024516107e-06, "loss": 0.3159, "step": 12677 }, { "epoch": 0.58, "grad_norm": 0.7614369084645728, "learning_rate": 7.834514386898693e-06, "loss": 0.4169, "step": 12678 }, { "epoch": 0.58, "grad_norm": 0.1921810064205715, "learning_rate": 7.833061797223035e-06, "loss": 0.1252, "step": 12679 }, { "epoch": 0.58, "grad_norm": 0.33386065259883574, "learning_rate": 7.831609255521305e-06, "loss": 0.3009, "step": 12680 }, { "epoch": 0.58, "grad_norm": 1.2262781943184184, "learning_rate": 7.830156761825656e-06, "loss": 0.6402, "step": 12681 }, { "epoch": 0.58, "grad_norm": 0.38619140964693904, "learning_rate": 7.828704316168245e-06, "loss": 0.2211, "step": 12682 }, { "epoch": 0.58, "grad_norm": 0.5245862977006677, "learning_rate": 7.827251918581225e-06, "loss": 0.3574, "step": 12683 }, { "epoch": 0.58, "grad_norm": 0.3830797185728154, "learning_rate": 7.825799569096758e-06, "loss": 0.3257, "step": 12684 }, { "epoch": 0.58, "grad_norm": 0.28003312299068583, "learning_rate": 7.82434726774699e-06, "loss": 0.1564, "step": 12685 }, { "epoch": 0.58, "grad_norm": 0.5898295070891924, "learning_rate": 7.822895014564078e-06, "loss": 0.4206, "step": 12686 }, { "epoch": 0.58, "grad_norm": 0.2960106671095541, "learning_rate": 7.821442809580172e-06, "loss": 0.2642, "step": 12687 }, { "epoch": 0.58, "grad_norm": 0.8707744762969479, "learning_rate": 7.81999065282742e-06, "loss": 0.3994, "step": 12688 }, { "epoch": 0.58, "grad_norm": 0.34795852024580404, "learning_rate": 7.818538544337975e-06, "loss": 0.2397, "step": 12689 }, { "epoch": 0.58, "grad_norm": 0.8652227310579472, "learning_rate": 7.817086484143987e-06, "loss": 0.4585, "step": 12690 }, { "epoch": 0.58, "grad_norm": 0.4319232767910534, "learning_rate": 7.815634472277597e-06, "loss": 0.3185, "step": 12691 }, { "epoch": 0.58, "grad_norm": 0.2768057891485402, "learning_rate": 7.814182508770953e-06, "loss": 0.2307, "step": 12692 }, { "epoch": 0.58, "grad_norm": 0.28279437703032895, "learning_rate": 7.812730593656203e-06, "loss": 0.1893, "step": 12693 }, { "epoch": 0.58, "grad_norm": 0.7098006772698936, "learning_rate": 7.811278726965492e-06, "loss": 0.3637, "step": 12694 }, { "epoch": 0.58, "grad_norm": 0.3179244428979831, "learning_rate": 7.80982690873096e-06, "loss": 0.2473, "step": 12695 }, { "epoch": 0.58, "grad_norm": 0.5316893464638233, "learning_rate": 7.808375138984747e-06, "loss": 0.3898, "step": 12696 }, { "epoch": 0.58, "grad_norm": 0.8630942169624378, "learning_rate": 7.806923417758999e-06, "loss": 0.569, "step": 12697 }, { "epoch": 0.58, "grad_norm": 0.26791479259116613, "learning_rate": 7.805471745085851e-06, "loss": 0.1892, "step": 12698 }, { "epoch": 0.58, "grad_norm": 0.34395399660954734, "learning_rate": 7.804020120997443e-06, "loss": 0.2629, "step": 12699 }, { "epoch": 0.58, "grad_norm": 0.38971885476184925, "learning_rate": 7.802568545525913e-06, "loss": 0.2776, "step": 12700 }, { "epoch": 0.58, "grad_norm": 0.4322676376663041, "learning_rate": 7.801117018703398e-06, "loss": 0.3044, "step": 12701 }, { "epoch": 0.58, "grad_norm": 0.8293628503423306, "learning_rate": 7.799665540562034e-06, "loss": 0.3316, "step": 12702 }, { "epoch": 0.58, "grad_norm": 0.33691500869657975, "learning_rate": 7.798214111133954e-06, "loss": 0.2716, "step": 12703 }, { "epoch": 0.58, "grad_norm": 0.41667108627821636, "learning_rate": 7.796762730451292e-06, "loss": 0.3238, "step": 12704 }, { "epoch": 0.58, "grad_norm": 0.47536467680254574, "learning_rate": 7.795311398546174e-06, "loss": 0.2352, "step": 12705 }, { "epoch": 0.58, "grad_norm": 0.4091655862535293, "learning_rate": 7.793860115450744e-06, "loss": 0.221, "step": 12706 }, { "epoch": 0.58, "grad_norm": 0.3843159760743542, "learning_rate": 7.792408881197122e-06, "loss": 0.2866, "step": 12707 }, { "epoch": 0.58, "grad_norm": 0.33659983901033974, "learning_rate": 7.79095769581744e-06, "loss": 0.2308, "step": 12708 }, { "epoch": 0.58, "grad_norm": 0.9919849593589294, "learning_rate": 7.789506559343821e-06, "loss": 0.5709, "step": 12709 }, { "epoch": 0.58, "grad_norm": 0.38730906487439765, "learning_rate": 7.788055471808401e-06, "loss": 0.2979, "step": 12710 }, { "epoch": 0.58, "grad_norm": 0.34654807857656456, "learning_rate": 7.7866044332433e-06, "loss": 0.2979, "step": 12711 }, { "epoch": 0.58, "grad_norm": 0.3990331716414633, "learning_rate": 7.785153443680646e-06, "loss": 0.172, "step": 12712 }, { "epoch": 0.58, "grad_norm": 0.3129950457641804, "learning_rate": 7.783702503152557e-06, "loss": 0.251, "step": 12713 }, { "epoch": 0.58, "grad_norm": 1.739392246224489, "learning_rate": 7.78225161169116e-06, "loss": 0.7767, "step": 12714 }, { "epoch": 0.58, "grad_norm": 0.2967349941365548, "learning_rate": 7.780800769328574e-06, "loss": 0.2143, "step": 12715 }, { "epoch": 0.58, "grad_norm": 0.3733989155471841, "learning_rate": 7.77934997609692e-06, "loss": 0.3081, "step": 12716 }, { "epoch": 0.58, "grad_norm": 0.7559852686228888, "learning_rate": 7.777899232028319e-06, "loss": 0.4853, "step": 12717 }, { "epoch": 0.58, "grad_norm": 0.2637166826170659, "learning_rate": 7.776448537154883e-06, "loss": 0.1651, "step": 12718 }, { "epoch": 0.58, "grad_norm": 0.3934348397547351, "learning_rate": 7.774997891508737e-06, "loss": 0.2546, "step": 12719 }, { "epoch": 0.58, "grad_norm": 0.37041073652678824, "learning_rate": 7.773547295121994e-06, "loss": 0.2788, "step": 12720 }, { "epoch": 0.58, "grad_norm": 0.44447437946952, "learning_rate": 7.772096748026768e-06, "loss": 0.1916, "step": 12721 }, { "epoch": 0.58, "grad_norm": 0.46225300955705867, "learning_rate": 7.770646250255167e-06, "loss": 0.3167, "step": 12722 }, { "epoch": 0.58, "grad_norm": 0.3786172634701655, "learning_rate": 7.769195801839313e-06, "loss": 0.3065, "step": 12723 }, { "epoch": 0.58, "grad_norm": 0.806230981507373, "learning_rate": 7.767745402811316e-06, "loss": 0.4504, "step": 12724 }, { "epoch": 0.58, "grad_norm": 0.240571146952024, "learning_rate": 7.766295053203285e-06, "loss": 0.1593, "step": 12725 }, { "epoch": 0.58, "grad_norm": 0.49002974751190337, "learning_rate": 7.764844753047321e-06, "loss": 0.2816, "step": 12726 }, { "epoch": 0.58, "grad_norm": 0.3750389632967279, "learning_rate": 7.763394502375547e-06, "loss": 0.3069, "step": 12727 }, { "epoch": 0.58, "grad_norm": 0.3287739378802034, "learning_rate": 7.76194430122006e-06, "loss": 0.2244, "step": 12728 }, { "epoch": 0.58, "grad_norm": 0.6701579747563696, "learning_rate": 7.760494149612971e-06, "loss": 0.4415, "step": 12729 }, { "epoch": 0.58, "grad_norm": 0.44360422701781166, "learning_rate": 7.759044047586382e-06, "loss": 0.243, "step": 12730 }, { "epoch": 0.58, "grad_norm": 0.2824708068320173, "learning_rate": 7.757593995172399e-06, "loss": 0.2213, "step": 12731 }, { "epoch": 0.58, "grad_norm": 0.4048384631868413, "learning_rate": 7.756143992403123e-06, "loss": 0.1737, "step": 12732 }, { "epoch": 0.58, "grad_norm": 0.840620303570009, "learning_rate": 7.754694039310658e-06, "loss": 0.4689, "step": 12733 }, { "epoch": 0.58, "grad_norm": 0.3689585276893205, "learning_rate": 7.7532441359271e-06, "loss": 0.2329, "step": 12734 }, { "epoch": 0.59, "grad_norm": 0.4099765586685885, "learning_rate": 7.75179428228455e-06, "loss": 0.342, "step": 12735 }, { "epoch": 0.59, "grad_norm": 1.02779099374064, "learning_rate": 7.750344478415113e-06, "loss": 0.5829, "step": 12736 }, { "epoch": 0.59, "grad_norm": 0.35310801221176674, "learning_rate": 7.748894724350879e-06, "loss": 0.2561, "step": 12737 }, { "epoch": 0.59, "grad_norm": 0.2230389078281909, "learning_rate": 7.747445020123945e-06, "loss": 0.0902, "step": 12738 }, { "epoch": 0.59, "grad_norm": 0.3783188802839845, "learning_rate": 7.745995365766404e-06, "loss": 0.3079, "step": 12739 }, { "epoch": 0.59, "grad_norm": 0.444669157569832, "learning_rate": 7.744545761310358e-06, "loss": 0.2916, "step": 12740 }, { "epoch": 0.59, "grad_norm": 0.5806950974618498, "learning_rate": 7.743096206787894e-06, "loss": 0.313, "step": 12741 }, { "epoch": 0.59, "grad_norm": 0.5175243670311491, "learning_rate": 7.741646702231106e-06, "loss": 0.3229, "step": 12742 }, { "epoch": 0.59, "grad_norm": 0.35181954183854286, "learning_rate": 7.74019724767208e-06, "loss": 0.2699, "step": 12743 }, { "epoch": 0.59, "grad_norm": 0.2701476654552088, "learning_rate": 7.738747843142912e-06, "loss": 0.1537, "step": 12744 }, { "epoch": 0.59, "grad_norm": 0.6194240777205317, "learning_rate": 7.737298488675687e-06, "loss": 0.3751, "step": 12745 }, { "epoch": 0.59, "grad_norm": 0.3610492826118389, "learning_rate": 7.735849184302493e-06, "loss": 0.2717, "step": 12746 }, { "epoch": 0.59, "grad_norm": 0.34197919673688043, "learning_rate": 7.734399930055412e-06, "loss": 0.2726, "step": 12747 }, { "epoch": 0.59, "grad_norm": 1.5824121641839315, "learning_rate": 7.732950725966537e-06, "loss": 0.6288, "step": 12748 }, { "epoch": 0.59, "grad_norm": 0.37736818535626143, "learning_rate": 7.731501572067946e-06, "loss": 0.2863, "step": 12749 }, { "epoch": 0.59, "grad_norm": 0.3283829294932836, "learning_rate": 7.730052468391726e-06, "loss": 0.2144, "step": 12750 }, { "epoch": 0.59, "grad_norm": 0.3596897938440602, "learning_rate": 7.728603414969956e-06, "loss": 0.28, "step": 12751 }, { "epoch": 0.59, "grad_norm": 0.32879325812867227, "learning_rate": 7.727154411834712e-06, "loss": 0.2668, "step": 12752 }, { "epoch": 0.59, "grad_norm": 1.3049994606863364, "learning_rate": 7.725705459018084e-06, "loss": 0.7871, "step": 12753 }, { "epoch": 0.59, "grad_norm": 0.3290970797401489, "learning_rate": 7.724256556552145e-06, "loss": 0.2436, "step": 12754 }, { "epoch": 0.59, "grad_norm": 0.3563676335902697, "learning_rate": 7.722807704468973e-06, "loss": 0.2476, "step": 12755 }, { "epoch": 0.59, "grad_norm": 0.32055420690503067, "learning_rate": 7.721358902800638e-06, "loss": 0.2064, "step": 12756 }, { "epoch": 0.59, "grad_norm": 0.5539872210323883, "learning_rate": 7.719910151579225e-06, "loss": 0.3239, "step": 12757 }, { "epoch": 0.59, "grad_norm": 0.40156659972048003, "learning_rate": 7.718461450836805e-06, "loss": 0.248, "step": 12758 }, { "epoch": 0.59, "grad_norm": 0.3251181043971125, "learning_rate": 7.717012800605447e-06, "loss": 0.2906, "step": 12759 }, { "epoch": 0.59, "grad_norm": 1.0709108370939942, "learning_rate": 7.715564200917226e-06, "loss": 0.318, "step": 12760 }, { "epoch": 0.59, "grad_norm": 0.4021975417533534, "learning_rate": 7.714115651804213e-06, "loss": 0.2663, "step": 12761 }, { "epoch": 0.59, "grad_norm": 0.3901642471537993, "learning_rate": 7.712667153298474e-06, "loss": 0.2715, "step": 12762 }, { "epoch": 0.59, "grad_norm": 0.37933918555287227, "learning_rate": 7.711218705432082e-06, "loss": 0.2902, "step": 12763 }, { "epoch": 0.59, "grad_norm": 0.2368246483065897, "learning_rate": 7.709770308237102e-06, "loss": 0.1601, "step": 12764 }, { "epoch": 0.59, "grad_norm": 1.215552913484588, "learning_rate": 7.708321961745597e-06, "loss": 0.7275, "step": 12765 }, { "epoch": 0.59, "grad_norm": 1.1616422435047316, "learning_rate": 7.706873665989638e-06, "loss": 0.5561, "step": 12766 }, { "epoch": 0.59, "grad_norm": 0.24761467343326626, "learning_rate": 7.705425421001285e-06, "loss": 0.2217, "step": 12767 }, { "epoch": 0.59, "grad_norm": 0.8164870011449455, "learning_rate": 7.703977226812602e-06, "loss": 0.5097, "step": 12768 }, { "epoch": 0.59, "grad_norm": 0.33185071084642837, "learning_rate": 7.702529083455646e-06, "loss": 0.236, "step": 12769 }, { "epoch": 0.59, "grad_norm": 0.35506545002646184, "learning_rate": 7.701080990962487e-06, "loss": 0.2058, "step": 12770 }, { "epoch": 0.59, "grad_norm": 0.4094554319150414, "learning_rate": 7.699632949365177e-06, "loss": 0.316, "step": 12771 }, { "epoch": 0.59, "grad_norm": 0.9157542452112523, "learning_rate": 7.698184958695781e-06, "loss": 0.5007, "step": 12772 }, { "epoch": 0.59, "grad_norm": 0.3401130446672728, "learning_rate": 7.696737018986342e-06, "loss": 0.1985, "step": 12773 }, { "epoch": 0.59, "grad_norm": 0.756741130196454, "learning_rate": 7.695289130268933e-06, "loss": 0.3878, "step": 12774 }, { "epoch": 0.59, "grad_norm": 0.24944867575725807, "learning_rate": 7.6938412925756e-06, "loss": 0.2195, "step": 12775 }, { "epoch": 0.59, "grad_norm": 0.6578990555319902, "learning_rate": 7.692393505938397e-06, "loss": 0.3611, "step": 12776 }, { "epoch": 0.59, "grad_norm": 0.39317868149535645, "learning_rate": 7.690945770389377e-06, "loss": 0.2456, "step": 12777 }, { "epoch": 0.59, "grad_norm": 0.3644619708417844, "learning_rate": 7.689498085960594e-06, "loss": 0.2963, "step": 12778 }, { "epoch": 0.59, "grad_norm": 0.657989171976488, "learning_rate": 7.688050452684096e-06, "loss": 0.3327, "step": 12779 }, { "epoch": 0.59, "grad_norm": 0.45478683551937554, "learning_rate": 7.686602870591933e-06, "loss": 0.266, "step": 12780 }, { "epoch": 0.59, "grad_norm": 0.5398651142568992, "learning_rate": 7.685155339716152e-06, "loss": 0.2379, "step": 12781 }, { "epoch": 0.59, "grad_norm": 0.4093107308539042, "learning_rate": 7.683707860088801e-06, "loss": 0.3016, "step": 12782 }, { "epoch": 0.59, "grad_norm": 0.22838055257729695, "learning_rate": 7.682260431741924e-06, "loss": 0.201, "step": 12783 }, { "epoch": 0.59, "grad_norm": 1.279961711598058, "learning_rate": 7.68081305470757e-06, "loss": 0.6938, "step": 12784 }, { "epoch": 0.59, "grad_norm": 0.4013193650335751, "learning_rate": 7.679365729017779e-06, "loss": 0.2952, "step": 12785 }, { "epoch": 0.59, "grad_norm": 0.46222146840440503, "learning_rate": 7.67791845470459e-06, "loss": 0.2852, "step": 12786 }, { "epoch": 0.59, "grad_norm": 0.3732974169639896, "learning_rate": 7.676471231800052e-06, "loss": 0.3603, "step": 12787 }, { "epoch": 0.59, "grad_norm": 0.4112471419339967, "learning_rate": 7.6750240603362e-06, "loss": 0.3156, "step": 12788 }, { "epoch": 0.59, "grad_norm": 0.531298729203499, "learning_rate": 7.673576940345078e-06, "loss": 0.3305, "step": 12789 }, { "epoch": 0.59, "grad_norm": 0.20477291820338664, "learning_rate": 7.672129871858715e-06, "loss": 0.1768, "step": 12790 }, { "epoch": 0.59, "grad_norm": 0.5384016917926084, "learning_rate": 7.670682854909158e-06, "loss": 0.3411, "step": 12791 }, { "epoch": 0.59, "grad_norm": 0.4069783142140041, "learning_rate": 7.669235889528436e-06, "loss": 0.3094, "step": 12792 }, { "epoch": 0.59, "grad_norm": 0.9646550019903518, "learning_rate": 7.667788975748584e-06, "loss": 0.3837, "step": 12793 }, { "epoch": 0.59, "grad_norm": 0.6975876498445063, "learning_rate": 7.666342113601638e-06, "loss": 0.3814, "step": 12794 }, { "epoch": 0.59, "grad_norm": 0.31653024690046894, "learning_rate": 7.664895303119625e-06, "loss": 0.289, "step": 12795 }, { "epoch": 0.59, "grad_norm": 0.3495541646132096, "learning_rate": 7.663448544334583e-06, "loss": 0.1983, "step": 12796 }, { "epoch": 0.59, "grad_norm": 0.6760886582871586, "learning_rate": 7.662001837278538e-06, "loss": 0.3736, "step": 12797 }, { "epoch": 0.59, "grad_norm": 0.3287275872272099, "learning_rate": 7.660555181983517e-06, "loss": 0.2721, "step": 12798 }, { "epoch": 0.59, "grad_norm": 0.49386304434525125, "learning_rate": 7.659108578481547e-06, "loss": 0.2748, "step": 12799 }, { "epoch": 0.59, "grad_norm": 0.6350300918549228, "learning_rate": 7.657662026804663e-06, "loss": 0.4218, "step": 12800 }, { "epoch": 0.59, "grad_norm": 0.29050876339873366, "learning_rate": 7.656215526984881e-06, "loss": 0.234, "step": 12801 }, { "epoch": 0.59, "grad_norm": 0.585248268041225, "learning_rate": 7.654769079054229e-06, "loss": 0.3403, "step": 12802 }, { "epoch": 0.59, "grad_norm": 0.2916575748229832, "learning_rate": 7.653322683044726e-06, "loss": 0.1869, "step": 12803 }, { "epoch": 0.59, "grad_norm": 0.4056545917373161, "learning_rate": 7.6518763389884e-06, "loss": 0.3014, "step": 12804 }, { "epoch": 0.59, "grad_norm": 1.014743588396409, "learning_rate": 7.65043004691727e-06, "loss": 0.534, "step": 12805 }, { "epoch": 0.59, "grad_norm": 0.2913004727458303, "learning_rate": 7.648983806863353e-06, "loss": 0.2218, "step": 12806 }, { "epoch": 0.59, "grad_norm": 0.40914832217632163, "learning_rate": 7.647537618858667e-06, "loss": 0.3413, "step": 12807 }, { "epoch": 0.59, "grad_norm": 0.5957950155155142, "learning_rate": 7.646091482935232e-06, "loss": 0.3739, "step": 12808 }, { "epoch": 0.59, "grad_norm": 0.16086236302141288, "learning_rate": 7.644645399125063e-06, "loss": 0.0718, "step": 12809 }, { "epoch": 0.59, "grad_norm": 0.3365292818234121, "learning_rate": 7.643199367460176e-06, "loss": 0.2683, "step": 12810 }, { "epoch": 0.59, "grad_norm": 0.5027099060763884, "learning_rate": 7.641753387972583e-06, "loss": 0.3656, "step": 12811 }, { "epoch": 0.59, "grad_norm": 0.5960161477661496, "learning_rate": 7.640307460694294e-06, "loss": 0.2564, "step": 12812 }, { "epoch": 0.59, "grad_norm": 0.36484368577096477, "learning_rate": 7.638861585657327e-06, "loss": 0.2779, "step": 12813 }, { "epoch": 0.59, "grad_norm": 0.36625999974295237, "learning_rate": 7.637415762893687e-06, "loss": 0.2886, "step": 12814 }, { "epoch": 0.59, "grad_norm": 0.39660244433373076, "learning_rate": 7.635969992435387e-06, "loss": 0.2307, "step": 12815 }, { "epoch": 0.59, "grad_norm": 0.29514312472976933, "learning_rate": 7.634524274314427e-06, "loss": 0.1662, "step": 12816 }, { "epoch": 0.59, "grad_norm": 1.4393208659335313, "learning_rate": 7.633078608562825e-06, "loss": 0.8331, "step": 12817 }, { "epoch": 0.59, "grad_norm": 0.34499480269669247, "learning_rate": 7.631632995212584e-06, "loss": 0.307, "step": 12818 }, { "epoch": 0.59, "grad_norm": 0.3711622614853309, "learning_rate": 7.630187434295701e-06, "loss": 0.255, "step": 12819 }, { "epoch": 0.59, "grad_norm": 0.7951413385460965, "learning_rate": 7.628741925844183e-06, "loss": 0.4492, "step": 12820 }, { "epoch": 0.59, "grad_norm": 0.28342384725328973, "learning_rate": 7.6272964698900356e-06, "loss": 0.1825, "step": 12821 }, { "epoch": 0.59, "grad_norm": 0.31005228266203044, "learning_rate": 7.6258510664652585e-06, "loss": 0.2298, "step": 12822 }, { "epoch": 0.59, "grad_norm": 0.5221430731418525, "learning_rate": 7.624405715601851e-06, "loss": 0.3881, "step": 12823 }, { "epoch": 0.59, "grad_norm": 0.6664092076245254, "learning_rate": 7.6229604173318095e-06, "loss": 0.3967, "step": 12824 }, { "epoch": 0.59, "grad_norm": 0.3746288616707745, "learning_rate": 7.6215151716871325e-06, "loss": 0.2918, "step": 12825 }, { "epoch": 0.59, "grad_norm": 0.365670511328936, "learning_rate": 7.620069978699819e-06, "loss": 0.2783, "step": 12826 }, { "epoch": 0.59, "grad_norm": 0.36337934937584165, "learning_rate": 7.618624838401863e-06, "loss": 0.2329, "step": 12827 }, { "epoch": 0.59, "grad_norm": 0.2973558764509843, "learning_rate": 7.617179750825257e-06, "loss": 0.2247, "step": 12828 }, { "epoch": 0.59, "grad_norm": 0.773967598551542, "learning_rate": 7.615734716001992e-06, "loss": 0.3495, "step": 12829 }, { "epoch": 0.59, "grad_norm": 0.3691076236509977, "learning_rate": 7.614289733964067e-06, "loss": 0.2959, "step": 12830 }, { "epoch": 0.59, "grad_norm": 0.3332786627056567, "learning_rate": 7.612844804743466e-06, "loss": 0.2581, "step": 12831 }, { "epoch": 0.59, "grad_norm": 0.8161709175043685, "learning_rate": 7.61139992837218e-06, "loss": 0.3472, "step": 12832 }, { "epoch": 0.59, "grad_norm": 0.2651327540304119, "learning_rate": 7.609955104882194e-06, "loss": 0.1268, "step": 12833 }, { "epoch": 0.59, "grad_norm": 0.29201991084979567, "learning_rate": 7.6085103343055024e-06, "loss": 0.2458, "step": 12834 }, { "epoch": 0.59, "grad_norm": 0.4137579831347058, "learning_rate": 7.607065616674088e-06, "loss": 0.2418, "step": 12835 }, { "epoch": 0.59, "grad_norm": 0.5811358087880002, "learning_rate": 7.605620952019932e-06, "loss": 0.3819, "step": 12836 }, { "epoch": 0.59, "grad_norm": 0.3648243408675913, "learning_rate": 7.6041763403750206e-06, "loss": 0.3217, "step": 12837 }, { "epoch": 0.59, "grad_norm": 0.38939675541487206, "learning_rate": 7.602731781771338e-06, "loss": 0.3153, "step": 12838 }, { "epoch": 0.59, "grad_norm": 0.35250109303940275, "learning_rate": 7.601287276240862e-06, "loss": 0.1459, "step": 12839 }, { "epoch": 0.59, "grad_norm": 0.2841498491678493, "learning_rate": 7.599842823815574e-06, "loss": 0.2142, "step": 12840 }, { "epoch": 0.59, "grad_norm": 1.0463220421002493, "learning_rate": 7.5983984245274535e-06, "loss": 0.5708, "step": 12841 }, { "epoch": 0.59, "grad_norm": 0.31571986377054756, "learning_rate": 7.596954078408474e-06, "loss": 0.2545, "step": 12842 }, { "epoch": 0.59, "grad_norm": 0.3822621139113642, "learning_rate": 7.595509785490618e-06, "loss": 0.261, "step": 12843 }, { "epoch": 0.59, "grad_norm": 1.1027278261238769, "learning_rate": 7.5940655458058575e-06, "loss": 0.6561, "step": 12844 }, { "epoch": 0.59, "grad_norm": 0.2703387372667672, "learning_rate": 7.592621359386167e-06, "loss": 0.1657, "step": 12845 }, { "epoch": 0.59, "grad_norm": 0.41805180238997125, "learning_rate": 7.591177226263515e-06, "loss": 0.2625, "step": 12846 }, { "epoch": 0.59, "grad_norm": 0.36889009839411935, "learning_rate": 7.589733146469884e-06, "loss": 0.2835, "step": 12847 }, { "epoch": 0.59, "grad_norm": 0.7746047983416644, "learning_rate": 7.588289120037236e-06, "loss": 0.3399, "step": 12848 }, { "epoch": 0.59, "grad_norm": 0.3607688395276889, "learning_rate": 7.586845146997542e-06, "loss": 0.2534, "step": 12849 }, { "epoch": 0.59, "grad_norm": 0.36356604323136266, "learning_rate": 7.585401227382767e-06, "loss": 0.304, "step": 12850 }, { "epoch": 0.59, "grad_norm": 1.300763438943114, "learning_rate": 7.583957361224886e-06, "loss": 0.649, "step": 12851 }, { "epoch": 0.59, "grad_norm": 0.25596065809993546, "learning_rate": 7.58251354855586e-06, "loss": 0.1581, "step": 12852 }, { "epoch": 0.59, "grad_norm": 0.517787628309515, "learning_rate": 7.581069789407654e-06, "loss": 0.288, "step": 12853 }, { "epoch": 0.59, "grad_norm": 0.40401200213242383, "learning_rate": 7.579626083812232e-06, "loss": 0.3168, "step": 12854 }, { "epoch": 0.59, "grad_norm": 0.30688570534735093, "learning_rate": 7.578182431801553e-06, "loss": 0.2074, "step": 12855 }, { "epoch": 0.59, "grad_norm": 1.0377389161180632, "learning_rate": 7.576738833407583e-06, "loss": 0.5494, "step": 12856 }, { "epoch": 0.59, "grad_norm": 0.4733115634627761, "learning_rate": 7.57529528866228e-06, "loss": 0.3815, "step": 12857 }, { "epoch": 0.59, "grad_norm": 0.2350094857062686, "learning_rate": 7.573851797597602e-06, "loss": 0.173, "step": 12858 }, { "epoch": 0.59, "grad_norm": 0.6189103680759699, "learning_rate": 7.572408360245504e-06, "loss": 0.4336, "step": 12859 }, { "epoch": 0.59, "grad_norm": 0.44556698438964754, "learning_rate": 7.570964976637949e-06, "loss": 0.2708, "step": 12860 }, { "epoch": 0.59, "grad_norm": 0.3209194729865041, "learning_rate": 7.569521646806888e-06, "loss": 0.1842, "step": 12861 }, { "epoch": 0.59, "grad_norm": 0.39003086522875924, "learning_rate": 7.568078370784274e-06, "loss": 0.3208, "step": 12862 }, { "epoch": 0.59, "grad_norm": 1.0004981500905583, "learning_rate": 7.566635148602057e-06, "loss": 0.4338, "step": 12863 }, { "epoch": 0.59, "grad_norm": 0.44192144372532094, "learning_rate": 7.565191980292197e-06, "loss": 0.2814, "step": 12864 }, { "epoch": 0.59, "grad_norm": 0.47067132542285656, "learning_rate": 7.563748865886642e-06, "loss": 0.2741, "step": 12865 }, { "epoch": 0.59, "grad_norm": 0.32129290949204087, "learning_rate": 7.562305805417337e-06, "loss": 0.2439, "step": 12866 }, { "epoch": 0.59, "grad_norm": 0.4718526296014951, "learning_rate": 7.560862798916229e-06, "loss": 0.3357, "step": 12867 }, { "epoch": 0.59, "grad_norm": 0.3636897969039732, "learning_rate": 7.55941984641527e-06, "loss": 0.1895, "step": 12868 }, { "epoch": 0.59, "grad_norm": 0.5305479271488595, "learning_rate": 7.557976947946404e-06, "loss": 0.381, "step": 12869 }, { "epoch": 0.59, "grad_norm": 0.3030235842661243, "learning_rate": 7.556534103541575e-06, "loss": 0.2769, "step": 12870 }, { "epoch": 0.59, "grad_norm": 0.7859037631330339, "learning_rate": 7.555091313232725e-06, "loss": 0.3216, "step": 12871 }, { "epoch": 0.59, "grad_norm": 0.41619394394085024, "learning_rate": 7.5536485770517955e-06, "loss": 0.2639, "step": 12872 }, { "epoch": 0.59, "grad_norm": 0.35468994877570575, "learning_rate": 7.5522058950307305e-06, "loss": 0.2647, "step": 12873 }, { "epoch": 0.59, "grad_norm": 0.29911493401788203, "learning_rate": 7.550763267201469e-06, "loss": 0.2041, "step": 12874 }, { "epoch": 0.59, "grad_norm": 0.8765168263795583, "learning_rate": 7.549320693595946e-06, "loss": 0.425, "step": 12875 }, { "epoch": 0.59, "grad_norm": 0.39418334619394546, "learning_rate": 7.5478781742461e-06, "loss": 0.2806, "step": 12876 }, { "epoch": 0.59, "grad_norm": 0.7639826852802636, "learning_rate": 7.546435709183871e-06, "loss": 0.4758, "step": 12877 }, { "epoch": 0.59, "grad_norm": 0.32194552157801365, "learning_rate": 7.544993298441189e-06, "loss": 0.2472, "step": 12878 }, { "epoch": 0.59, "grad_norm": 0.45605385201732895, "learning_rate": 7.5435509420499896e-06, "loss": 0.3076, "step": 12879 }, { "epoch": 0.59, "grad_norm": 0.2826810367449615, "learning_rate": 7.5421086400422e-06, "loss": 0.2107, "step": 12880 }, { "epoch": 0.59, "grad_norm": 0.47706502200032874, "learning_rate": 7.5406663924497615e-06, "loss": 0.2693, "step": 12881 }, { "epoch": 0.59, "grad_norm": 0.4114497182822956, "learning_rate": 7.539224199304598e-06, "loss": 0.2667, "step": 12882 }, { "epoch": 0.59, "grad_norm": 0.5675418669488211, "learning_rate": 7.537782060638641e-06, "loss": 0.3727, "step": 12883 }, { "epoch": 0.59, "grad_norm": 1.0185805933549796, "learning_rate": 7.536339976483815e-06, "loss": 0.482, "step": 12884 }, { "epoch": 0.59, "grad_norm": 0.4570731343768416, "learning_rate": 7.534897946872042e-06, "loss": 0.2786, "step": 12885 }, { "epoch": 0.59, "grad_norm": 0.2397819651941745, "learning_rate": 7.533455971835257e-06, "loss": 0.2128, "step": 12886 }, { "epoch": 0.59, "grad_norm": 0.7727184499499008, "learning_rate": 7.532014051405381e-06, "loss": 0.3074, "step": 12887 }, { "epoch": 0.59, "grad_norm": 0.41957746360385845, "learning_rate": 7.530572185614333e-06, "loss": 0.2807, "step": 12888 }, { "epoch": 0.59, "grad_norm": 0.41918086966068613, "learning_rate": 7.529130374494036e-06, "loss": 0.3197, "step": 12889 }, { "epoch": 0.59, "grad_norm": 0.5367698944695509, "learning_rate": 7.527688618076413e-06, "loss": 0.3761, "step": 12890 }, { "epoch": 0.59, "grad_norm": 0.32439921311449227, "learning_rate": 7.52624691639338e-06, "loss": 0.2158, "step": 12891 }, { "epoch": 0.59, "grad_norm": 0.2833917110667752, "learning_rate": 7.524805269476858e-06, "loss": 0.1687, "step": 12892 }, { "epoch": 0.59, "grad_norm": 0.46308838947875286, "learning_rate": 7.523363677358757e-06, "loss": 0.3309, "step": 12893 }, { "epoch": 0.59, "grad_norm": 0.32273526539448355, "learning_rate": 7.521922140071003e-06, "loss": 0.2257, "step": 12894 }, { "epoch": 0.59, "grad_norm": 1.4159908074856369, "learning_rate": 7.520480657645502e-06, "loss": 0.6065, "step": 12895 }, { "epoch": 0.59, "grad_norm": 1.1676292066866414, "learning_rate": 7.519039230114169e-06, "loss": 0.7432, "step": 12896 }, { "epoch": 0.59, "grad_norm": 0.3871499384259446, "learning_rate": 7.5175978575089135e-06, "loss": 0.2035, "step": 12897 }, { "epoch": 0.59, "grad_norm": 0.3523947189248126, "learning_rate": 7.516156539861652e-06, "loss": 0.2911, "step": 12898 }, { "epoch": 0.59, "grad_norm": 0.38413073574532336, "learning_rate": 7.514715277204292e-06, "loss": 0.2387, "step": 12899 }, { "epoch": 0.59, "grad_norm": 0.3841868225721464, "learning_rate": 7.513274069568739e-06, "loss": 0.1866, "step": 12900 }, { "epoch": 0.59, "grad_norm": 0.41499574535414846, "learning_rate": 7.511832916986902e-06, "loss": 0.2892, "step": 12901 }, { "epoch": 0.59, "grad_norm": 0.5461189823082333, "learning_rate": 7.510391819490683e-06, "loss": 0.4186, "step": 12902 }, { "epoch": 0.59, "grad_norm": 0.6138453773191452, "learning_rate": 7.508950777111993e-06, "loss": 0.3808, "step": 12903 }, { "epoch": 0.59, "grad_norm": 0.4387332854452208, "learning_rate": 7.507509789882732e-06, "loss": 0.2667, "step": 12904 }, { "epoch": 0.59, "grad_norm": 0.3950588510717907, "learning_rate": 7.506068857834801e-06, "loss": 0.2761, "step": 12905 }, { "epoch": 0.59, "grad_norm": 0.2847227124557217, "learning_rate": 7.504627981000101e-06, "loss": 0.2151, "step": 12906 }, { "epoch": 0.59, "grad_norm": 0.4038562941356066, "learning_rate": 7.503187159410533e-06, "loss": 0.2479, "step": 12907 }, { "epoch": 0.59, "grad_norm": 1.354104507667754, "learning_rate": 7.501746393097995e-06, "loss": 0.8238, "step": 12908 }, { "epoch": 0.59, "grad_norm": 0.34265097540483663, "learning_rate": 7.500305682094385e-06, "loss": 0.2646, "step": 12909 }, { "epoch": 0.59, "grad_norm": 0.4049564492703838, "learning_rate": 7.498865026431593e-06, "loss": 0.2515, "step": 12910 }, { "epoch": 0.59, "grad_norm": 0.5021444033513522, "learning_rate": 7.497424426141524e-06, "loss": 0.312, "step": 12911 }, { "epoch": 0.59, "grad_norm": 0.2936307844217129, "learning_rate": 7.495983881256067e-06, "loss": 0.2014, "step": 12912 }, { "epoch": 0.59, "grad_norm": 0.3606542715199571, "learning_rate": 7.494543391807112e-06, "loss": 0.2401, "step": 12913 }, { "epoch": 0.59, "grad_norm": 0.4870336436072836, "learning_rate": 7.493102957826552e-06, "loss": 0.3833, "step": 12914 }, { "epoch": 0.59, "grad_norm": 0.5262801541303066, "learning_rate": 7.49166257934627e-06, "loss": 0.2958, "step": 12915 }, { "epoch": 0.59, "grad_norm": 0.4231854016782443, "learning_rate": 7.4902222563981675e-06, "loss": 0.3421, "step": 12916 }, { "epoch": 0.59, "grad_norm": 0.36273439945042396, "learning_rate": 7.488781989014124e-06, "loss": 0.2514, "step": 12917 }, { "epoch": 0.59, "grad_norm": 0.2746929236774449, "learning_rate": 7.487341777226027e-06, "loss": 0.1506, "step": 12918 }, { "epoch": 0.59, "grad_norm": 0.3936245599877834, "learning_rate": 7.48590162106576e-06, "loss": 0.2899, "step": 12919 }, { "epoch": 0.59, "grad_norm": 0.7090525804302139, "learning_rate": 7.484461520565209e-06, "loss": 0.3783, "step": 12920 }, { "epoch": 0.59, "grad_norm": 0.32721952551972744, "learning_rate": 7.483021475756257e-06, "loss": 0.2609, "step": 12921 }, { "epoch": 0.59, "grad_norm": 0.3550835500263203, "learning_rate": 7.481581486670783e-06, "loss": 0.3103, "step": 12922 }, { "epoch": 0.59, "grad_norm": 1.0249920983102796, "learning_rate": 7.480141553340665e-06, "loss": 0.3376, "step": 12923 }, { "epoch": 0.59, "grad_norm": 0.24647472121299804, "learning_rate": 7.478701675797786e-06, "loss": 0.1414, "step": 12924 }, { "epoch": 0.59, "grad_norm": 0.28260057752083656, "learning_rate": 7.4772618540740225e-06, "loss": 0.2898, "step": 12925 }, { "epoch": 0.59, "grad_norm": 0.9032353049464689, "learning_rate": 7.47582208820125e-06, "loss": 0.3498, "step": 12926 }, { "epoch": 0.59, "grad_norm": 0.511033236305105, "learning_rate": 7.47438237821134e-06, "loss": 0.3477, "step": 12927 }, { "epoch": 0.59, "grad_norm": 0.39565591681249984, "learning_rate": 7.472942724136174e-06, "loss": 0.2844, "step": 12928 }, { "epoch": 0.59, "grad_norm": 0.4067107530259097, "learning_rate": 7.47150312600762e-06, "loss": 0.3023, "step": 12929 }, { "epoch": 0.59, "grad_norm": 0.1721850524228638, "learning_rate": 7.470063583857552e-06, "loss": 0.0729, "step": 12930 }, { "epoch": 0.59, "grad_norm": 0.43829835967392805, "learning_rate": 7.468624097717836e-06, "loss": 0.3041, "step": 12931 }, { "epoch": 0.59, "grad_norm": 0.6352649910966798, "learning_rate": 7.467184667620337e-06, "loss": 0.4056, "step": 12932 }, { "epoch": 0.59, "grad_norm": 0.330768838277074, "learning_rate": 7.465745293596934e-06, "loss": 0.2376, "step": 12933 }, { "epoch": 0.59, "grad_norm": 0.4090030683472436, "learning_rate": 7.464305975679488e-06, "loss": 0.3319, "step": 12934 }, { "epoch": 0.59, "grad_norm": 1.4835769819423026, "learning_rate": 7.462866713899863e-06, "loss": 0.7763, "step": 12935 }, { "epoch": 0.59, "grad_norm": 0.17897073925182497, "learning_rate": 7.461427508289922e-06, "loss": 0.0858, "step": 12936 }, { "epoch": 0.59, "grad_norm": 0.2675707804343146, "learning_rate": 7.459988358881532e-06, "loss": 0.2664, "step": 12937 }, { "epoch": 0.59, "grad_norm": 0.6790215164307541, "learning_rate": 7.458549265706549e-06, "loss": 0.4128, "step": 12938 }, { "epoch": 0.59, "grad_norm": 0.6852820439773292, "learning_rate": 7.457110228796838e-06, "loss": 0.271, "step": 12939 }, { "epoch": 0.59, "grad_norm": 0.3705358249848323, "learning_rate": 7.455671248184253e-06, "loss": 0.2981, "step": 12940 }, { "epoch": 0.59, "grad_norm": 0.4157289229678683, "learning_rate": 7.454232323900656e-06, "loss": 0.3336, "step": 12941 }, { "epoch": 0.59, "grad_norm": 0.45687373389646935, "learning_rate": 7.452793455977903e-06, "loss": 0.2965, "step": 12942 }, { "epoch": 0.59, "grad_norm": 0.2827466822253932, "learning_rate": 7.451354644447847e-06, "loss": 0.1825, "step": 12943 }, { "epoch": 0.59, "grad_norm": 1.0328241254967943, "learning_rate": 7.449915889342343e-06, "loss": 0.3834, "step": 12944 }, { "epoch": 0.59, "grad_norm": 0.3528911549065219, "learning_rate": 7.448477190693238e-06, "loss": 0.3115, "step": 12945 }, { "epoch": 0.59, "grad_norm": 0.3518699507900261, "learning_rate": 7.447038548532395e-06, "loss": 0.2176, "step": 12946 }, { "epoch": 0.59, "grad_norm": 1.018753186019547, "learning_rate": 7.445599962891656e-06, "loss": 0.5263, "step": 12947 }, { "epoch": 0.59, "grad_norm": 0.32103850373717274, "learning_rate": 7.444161433802874e-06, "loss": 0.2499, "step": 12948 }, { "epoch": 0.59, "grad_norm": 0.31167196264725294, "learning_rate": 7.44272296129789e-06, "loss": 0.2251, "step": 12949 }, { "epoch": 0.59, "grad_norm": 0.6712217770428013, "learning_rate": 7.441284545408558e-06, "loss": 0.2855, "step": 12950 }, { "epoch": 0.59, "grad_norm": 0.7841371738004029, "learning_rate": 7.4398461861667214e-06, "loss": 0.509, "step": 12951 }, { "epoch": 0.6, "grad_norm": 0.4020468075489816, "learning_rate": 7.438407883604221e-06, "loss": 0.2711, "step": 12952 }, { "epoch": 0.6, "grad_norm": 0.36936884797303865, "learning_rate": 7.4369696377529e-06, "loss": 0.2532, "step": 12953 }, { "epoch": 0.6, "grad_norm": 0.45911041645853273, "learning_rate": 7.435531448644603e-06, "loss": 0.2501, "step": 12954 }, { "epoch": 0.6, "grad_norm": 0.36566309824678284, "learning_rate": 7.434093316311167e-06, "loss": 0.2602, "step": 12955 }, { "epoch": 0.6, "grad_norm": 0.4160880116754914, "learning_rate": 7.432655240784433e-06, "loss": 0.238, "step": 12956 }, { "epoch": 0.6, "grad_norm": 0.5553620355590713, "learning_rate": 7.431217222096233e-06, "loss": 0.3861, "step": 12957 }, { "epoch": 0.6, "grad_norm": 0.3145543464002043, "learning_rate": 7.429779260278411e-06, "loss": 0.2552, "step": 12958 }, { "epoch": 0.6, "grad_norm": 0.9477306390869384, "learning_rate": 7.428341355362803e-06, "loss": 0.2483, "step": 12959 }, { "epoch": 0.6, "grad_norm": 0.40991439409074804, "learning_rate": 7.426903507381235e-06, "loss": 0.3306, "step": 12960 }, { "epoch": 0.6, "grad_norm": 0.2794768696400758, "learning_rate": 7.4254657163655456e-06, "loss": 0.238, "step": 12961 }, { "epoch": 0.6, "grad_norm": 0.3976899003163253, "learning_rate": 7.4240279823475584e-06, "loss": 0.191, "step": 12962 }, { "epoch": 0.6, "grad_norm": 0.7362834362347954, "learning_rate": 7.422590305359112e-06, "loss": 0.4724, "step": 12963 }, { "epoch": 0.6, "grad_norm": 0.31000032295001145, "learning_rate": 7.421152685432034e-06, "loss": 0.2344, "step": 12964 }, { "epoch": 0.6, "grad_norm": 0.37723975421260075, "learning_rate": 7.419715122598149e-06, "loss": 0.3395, "step": 12965 }, { "epoch": 0.6, "grad_norm": 0.5409301455347854, "learning_rate": 7.418277616889282e-06, "loss": 0.2828, "step": 12966 }, { "epoch": 0.6, "grad_norm": 0.3649752644331592, "learning_rate": 7.416840168337263e-06, "loss": 0.2516, "step": 12967 }, { "epoch": 0.6, "grad_norm": 0.573393814439926, "learning_rate": 7.415402776973913e-06, "loss": 0.289, "step": 12968 }, { "epoch": 0.6, "grad_norm": 0.3522208471237922, "learning_rate": 7.413965442831054e-06, "loss": 0.265, "step": 12969 }, { "epoch": 0.6, "grad_norm": 0.33196103352826073, "learning_rate": 7.412528165940505e-06, "loss": 0.2303, "step": 12970 }, { "epoch": 0.6, "grad_norm": 0.4561045034561405, "learning_rate": 7.411090946334092e-06, "loss": 0.2507, "step": 12971 }, { "epoch": 0.6, "grad_norm": 0.4875120685821009, "learning_rate": 7.409653784043629e-06, "loss": 0.2934, "step": 12972 }, { "epoch": 0.6, "grad_norm": 0.29673461547138197, "learning_rate": 7.408216679100935e-06, "loss": 0.2512, "step": 12973 }, { "epoch": 0.6, "grad_norm": 0.8601398064876824, "learning_rate": 7.4067796315378256e-06, "loss": 0.5544, "step": 12974 }, { "epoch": 0.6, "grad_norm": 0.2612260558981168, "learning_rate": 7.405342641386113e-06, "loss": 0.1607, "step": 12975 }, { "epoch": 0.6, "grad_norm": 0.26771645064023897, "learning_rate": 7.4039057086776165e-06, "loss": 0.212, "step": 12976 }, { "epoch": 0.6, "grad_norm": 0.3650315700412918, "learning_rate": 7.402468833444147e-06, "loss": 0.2876, "step": 12977 }, { "epoch": 0.6, "grad_norm": 0.8963960601936947, "learning_rate": 7.401032015717513e-06, "loss": 0.3917, "step": 12978 }, { "epoch": 0.6, "grad_norm": 0.34486202619409023, "learning_rate": 7.3995952555295215e-06, "loss": 0.2134, "step": 12979 }, { "epoch": 0.6, "grad_norm": 1.0658749133171492, "learning_rate": 7.398158552911987e-06, "loss": 0.5699, "step": 12980 }, { "epoch": 0.6, "grad_norm": 0.36930349359416115, "learning_rate": 7.3967219078967155e-06, "loss": 0.3199, "step": 12981 }, { "epoch": 0.6, "grad_norm": 0.292476351533126, "learning_rate": 7.395285320515513e-06, "loss": 0.1822, "step": 12982 }, { "epoch": 0.6, "grad_norm": 0.3303378213665017, "learning_rate": 7.39384879080018e-06, "loss": 0.2117, "step": 12983 }, { "epoch": 0.6, "grad_norm": 0.40988130099756026, "learning_rate": 7.392412318782524e-06, "loss": 0.3246, "step": 12984 }, { "epoch": 0.6, "grad_norm": 0.3255998657446926, "learning_rate": 7.390975904494346e-06, "loss": 0.1881, "step": 12985 }, { "epoch": 0.6, "grad_norm": 1.216094188090242, "learning_rate": 7.389539547967448e-06, "loss": 0.658, "step": 12986 }, { "epoch": 0.6, "grad_norm": 0.43338775316463984, "learning_rate": 7.388103249233627e-06, "loss": 0.2883, "step": 12987 }, { "epoch": 0.6, "grad_norm": 0.3177226572434137, "learning_rate": 7.3866670083246835e-06, "loss": 0.1984, "step": 12988 }, { "epoch": 0.6, "grad_norm": 0.3132683181742891, "learning_rate": 7.385230825272414e-06, "loss": 0.2614, "step": 12989 }, { "epoch": 0.6, "grad_norm": 0.6705016622637922, "learning_rate": 7.383794700108614e-06, "loss": 0.463, "step": 12990 }, { "epoch": 0.6, "grad_norm": 0.45946291080906965, "learning_rate": 7.382358632865079e-06, "loss": 0.2974, "step": 12991 }, { "epoch": 0.6, "grad_norm": 0.31939324130768193, "learning_rate": 7.380922623573594e-06, "loss": 0.2458, "step": 12992 }, { "epoch": 0.6, "grad_norm": 1.185453823755025, "learning_rate": 7.379486672265964e-06, "loss": 0.7071, "step": 12993 }, { "epoch": 0.6, "grad_norm": 0.41128090508604503, "learning_rate": 7.378050778973973e-06, "loss": 0.254, "step": 12994 }, { "epoch": 0.6, "grad_norm": 0.6022905690643766, "learning_rate": 7.376614943729412e-06, "loss": 0.2889, "step": 12995 }, { "epoch": 0.6, "grad_norm": 0.23883759389694836, "learning_rate": 7.375179166564062e-06, "loss": 0.2227, "step": 12996 }, { "epoch": 0.6, "grad_norm": 0.36485720940927485, "learning_rate": 7.373743447509721e-06, "loss": 0.253, "step": 12997 }, { "epoch": 0.6, "grad_norm": 0.93658813794875, "learning_rate": 7.372307786598168e-06, "loss": 0.3241, "step": 12998 }, { "epoch": 0.6, "grad_norm": 0.9687499360416071, "learning_rate": 7.3708721838611865e-06, "loss": 0.5506, "step": 12999 }, { "epoch": 0.6, "grad_norm": 0.36090762209432337, "learning_rate": 7.36943663933056e-06, "loss": 0.2649, "step": 13000 }, { "epoch": 0.6, "grad_norm": 0.4165782447203431, "learning_rate": 7.368001153038073e-06, "loss": 0.27, "step": 13001 }, { "epoch": 0.6, "grad_norm": 0.2846790148806605, "learning_rate": 7.366565725015504e-06, "loss": 0.1851, "step": 13002 }, { "epoch": 0.6, "grad_norm": 0.7004557034314004, "learning_rate": 7.36513035529463e-06, "loss": 0.3156, "step": 13003 }, { "epoch": 0.6, "grad_norm": 0.3730580383521489, "learning_rate": 7.363695043907233e-06, "loss": 0.2893, "step": 13004 }, { "epoch": 0.6, "grad_norm": 0.4915608245532158, "learning_rate": 7.36225979088508e-06, "loss": 0.2915, "step": 13005 }, { "epoch": 0.6, "grad_norm": 0.4084196751072741, "learning_rate": 7.360824596259961e-06, "loss": 0.2409, "step": 13006 }, { "epoch": 0.6, "grad_norm": 0.5228129468718864, "learning_rate": 7.35938946006364e-06, "loss": 0.3332, "step": 13007 }, { "epoch": 0.6, "grad_norm": 0.2788677511320499, "learning_rate": 7.3579543823278894e-06, "loss": 0.2006, "step": 13008 }, { "epoch": 0.6, "grad_norm": 0.3024163517332204, "learning_rate": 7.35651936308448e-06, "loss": 0.2165, "step": 13009 }, { "epoch": 0.6, "grad_norm": 0.5248515133621819, "learning_rate": 7.355084402365188e-06, "loss": 0.3477, "step": 13010 }, { "epoch": 0.6, "grad_norm": 0.8199514678979798, "learning_rate": 7.353649500201778e-06, "loss": 0.3291, "step": 13011 }, { "epoch": 0.6, "grad_norm": 0.34492604200175475, "learning_rate": 7.352214656626017e-06, "loss": 0.2775, "step": 13012 }, { "epoch": 0.6, "grad_norm": 0.36172735503415093, "learning_rate": 7.350779871669669e-06, "loss": 0.3159, "step": 13013 }, { "epoch": 0.6, "grad_norm": 0.19480498299918567, "learning_rate": 7.3493451453645035e-06, "loss": 0.0898, "step": 13014 }, { "epoch": 0.6, "grad_norm": 0.4153662119312081, "learning_rate": 7.347910477742284e-06, "loss": 0.2333, "step": 13015 }, { "epoch": 0.6, "grad_norm": 0.5679187037766412, "learning_rate": 7.346475868834768e-06, "loss": 0.3993, "step": 13016 }, { "epoch": 0.6, "grad_norm": 0.36144328483528043, "learning_rate": 7.345041318673717e-06, "loss": 0.3082, "step": 13017 }, { "epoch": 0.6, "grad_norm": 0.3649302728864425, "learning_rate": 7.343606827290895e-06, "loss": 0.2297, "step": 13018 }, { "epoch": 0.6, "grad_norm": 1.1931353273253702, "learning_rate": 7.342172394718057e-06, "loss": 0.621, "step": 13019 }, { "epoch": 0.6, "grad_norm": 0.30483494760970925, "learning_rate": 7.340738020986961e-06, "loss": 0.2679, "step": 13020 }, { "epoch": 0.6, "grad_norm": 0.27629496027533135, "learning_rate": 7.339303706129361e-06, "loss": 0.0929, "step": 13021 }, { "epoch": 0.6, "grad_norm": 0.4027911985525034, "learning_rate": 7.337869450177011e-06, "loss": 0.3012, "step": 13022 }, { "epoch": 0.6, "grad_norm": 0.6064316470883844, "learning_rate": 7.336435253161667e-06, "loss": 0.4015, "step": 13023 }, { "epoch": 0.6, "grad_norm": 0.32408597405033895, "learning_rate": 7.335001115115084e-06, "loss": 0.2241, "step": 13024 }, { "epoch": 0.6, "grad_norm": 0.43004749980329615, "learning_rate": 7.333567036069003e-06, "loss": 0.3052, "step": 13025 }, { "epoch": 0.6, "grad_norm": 0.45510377943488306, "learning_rate": 7.332133016055175e-06, "loss": 0.2364, "step": 13026 }, { "epoch": 0.6, "grad_norm": 0.2675529744256249, "learning_rate": 7.330699055105354e-06, "loss": 0.1364, "step": 13027 }, { "epoch": 0.6, "grad_norm": 0.32229859800169836, "learning_rate": 7.329265153251285e-06, "loss": 0.2885, "step": 13028 }, { "epoch": 0.6, "grad_norm": 0.7200967573502258, "learning_rate": 7.327831310524711e-06, "loss": 0.3695, "step": 13029 }, { "epoch": 0.6, "grad_norm": 0.5214081462208413, "learning_rate": 7.326397526957374e-06, "loss": 0.4025, "step": 13030 }, { "epoch": 0.6, "grad_norm": 0.4098261913459206, "learning_rate": 7.32496380258102e-06, "loss": 0.2278, "step": 13031 }, { "epoch": 0.6, "grad_norm": 0.385961412976362, "learning_rate": 7.323530137427391e-06, "loss": 0.3014, "step": 13032 }, { "epoch": 0.6, "grad_norm": 0.27119181186433017, "learning_rate": 7.322096531528222e-06, "loss": 0.155, "step": 13033 }, { "epoch": 0.6, "grad_norm": 0.397251200496358, "learning_rate": 7.320662984915258e-06, "loss": 0.2392, "step": 13034 }, { "epoch": 0.6, "grad_norm": 0.9293902533522632, "learning_rate": 7.31922949762023e-06, "loss": 0.4463, "step": 13035 }, { "epoch": 0.6, "grad_norm": 0.33847977650002437, "learning_rate": 7.317796069674878e-06, "loss": 0.2888, "step": 13036 }, { "epoch": 0.6, "grad_norm": 0.509630429493513, "learning_rate": 7.316362701110938e-06, "loss": 0.2358, "step": 13037 }, { "epoch": 0.6, "grad_norm": 0.5152972863342689, "learning_rate": 7.314929391960139e-06, "loss": 0.2653, "step": 13038 }, { "epoch": 0.6, "grad_norm": 0.3814692966666743, "learning_rate": 7.3134961422542125e-06, "loss": 0.2221, "step": 13039 }, { "epoch": 0.6, "grad_norm": 0.3128663462681856, "learning_rate": 7.312062952024896e-06, "loss": 0.2326, "step": 13040 }, { "epoch": 0.6, "grad_norm": 1.2265954788348128, "learning_rate": 7.310629821303916e-06, "loss": 0.4221, "step": 13041 }, { "epoch": 0.6, "grad_norm": 0.8704467366316939, "learning_rate": 7.309196750123001e-06, "loss": 0.4628, "step": 13042 }, { "epoch": 0.6, "grad_norm": 0.38942320723238094, "learning_rate": 7.30776373851387e-06, "loss": 0.2862, "step": 13043 }, { "epoch": 0.6, "grad_norm": 0.38730732047861177, "learning_rate": 7.30633078650826e-06, "loss": 0.2501, "step": 13044 }, { "epoch": 0.6, "grad_norm": 0.44193429372339893, "learning_rate": 7.30489789413789e-06, "loss": 0.2099, "step": 13045 }, { "epoch": 0.6, "grad_norm": 0.8911154465731558, "learning_rate": 7.303465061434483e-06, "loss": 0.2892, "step": 13046 }, { "epoch": 0.6, "grad_norm": 1.2495053771731763, "learning_rate": 7.3020322884297565e-06, "loss": 0.4139, "step": 13047 }, { "epoch": 0.6, "grad_norm": 0.3103308970579325, "learning_rate": 7.300599575155441e-06, "loss": 0.2681, "step": 13048 }, { "epoch": 0.6, "grad_norm": 0.2995619785578511, "learning_rate": 7.299166921643246e-06, "loss": 0.2209, "step": 13049 }, { "epoch": 0.6, "grad_norm": 1.2103027052171522, "learning_rate": 7.297734327924892e-06, "loss": 0.2877, "step": 13050 }, { "epoch": 0.6, "grad_norm": 0.6478452626427619, "learning_rate": 7.296301794032097e-06, "loss": 0.3283, "step": 13051 }, { "epoch": 0.6, "grad_norm": 0.36476800654657787, "learning_rate": 7.294869319996571e-06, "loss": 0.2999, "step": 13052 }, { "epoch": 0.6, "grad_norm": 0.39234819969857054, "learning_rate": 7.2934369058500355e-06, "loss": 0.2139, "step": 13053 }, { "epoch": 0.6, "grad_norm": 0.3845981612873682, "learning_rate": 7.292004551624196e-06, "loss": 0.2726, "step": 13054 }, { "epoch": 0.6, "grad_norm": 0.4641540653785157, "learning_rate": 7.290572257350768e-06, "loss": 0.3076, "step": 13055 }, { "epoch": 0.6, "grad_norm": 0.3755342998765627, "learning_rate": 7.289140023061452e-06, "loss": 0.3029, "step": 13056 }, { "epoch": 0.6, "grad_norm": 0.6510659785724403, "learning_rate": 7.287707848787968e-06, "loss": 0.3148, "step": 13057 }, { "epoch": 0.6, "grad_norm": 0.31994317698089164, "learning_rate": 7.286275734562019e-06, "loss": 0.2631, "step": 13058 }, { "epoch": 0.6, "grad_norm": 0.5030936424675906, "learning_rate": 7.28484368041531e-06, "loss": 0.3969, "step": 13059 }, { "epoch": 0.6, "grad_norm": 0.27252219634268565, "learning_rate": 7.283411686379543e-06, "loss": 0.1692, "step": 13060 }, { "epoch": 0.6, "grad_norm": 0.28007333258290534, "learning_rate": 7.281979752486423e-06, "loss": 0.2081, "step": 13061 }, { "epoch": 0.6, "grad_norm": 0.8754471020163299, "learning_rate": 7.280547878767654e-06, "loss": 0.5395, "step": 13062 }, { "epoch": 0.6, "grad_norm": 0.4185104039530996, "learning_rate": 7.279116065254932e-06, "loss": 0.2721, "step": 13063 }, { "epoch": 0.6, "grad_norm": 0.31987776487947406, "learning_rate": 7.277684311979959e-06, "loss": 0.2725, "step": 13064 }, { "epoch": 0.6, "grad_norm": 1.1787505201373472, "learning_rate": 7.276252618974428e-06, "loss": 0.6367, "step": 13065 }, { "epoch": 0.6, "grad_norm": 0.30342178765555655, "learning_rate": 7.274820986270043e-06, "loss": 0.2223, "step": 13066 }, { "epoch": 0.6, "grad_norm": 0.3576018603811819, "learning_rate": 7.273389413898495e-06, "loss": 0.2066, "step": 13067 }, { "epoch": 0.6, "grad_norm": 0.4193576528825786, "learning_rate": 7.2719579018914756e-06, "loss": 0.3127, "step": 13068 }, { "epoch": 0.6, "grad_norm": 0.6913150708142002, "learning_rate": 7.270526450280675e-06, "loss": 0.3989, "step": 13069 }, { "epoch": 0.6, "grad_norm": 0.3915456066879617, "learning_rate": 7.269095059097793e-06, "loss": 0.2175, "step": 13070 }, { "epoch": 0.6, "grad_norm": 1.3355515963637203, "learning_rate": 7.267663728374517e-06, "loss": 0.7556, "step": 13071 }, { "epoch": 0.6, "grad_norm": 0.2810694827883178, "learning_rate": 7.266232458142529e-06, "loss": 0.2236, "step": 13072 }, { "epoch": 0.6, "grad_norm": 0.2636793126359312, "learning_rate": 7.264801248433516e-06, "loss": 0.1707, "step": 13073 }, { "epoch": 0.6, "grad_norm": 0.7198684750370714, "learning_rate": 7.263370099279173e-06, "loss": 0.4095, "step": 13074 }, { "epoch": 0.6, "grad_norm": 0.3841803241461575, "learning_rate": 7.261939010711175e-06, "loss": 0.3199, "step": 13075 }, { "epoch": 0.6, "grad_norm": 0.3353259106184257, "learning_rate": 7.260507982761211e-06, "loss": 0.2013, "step": 13076 }, { "epoch": 0.6, "grad_norm": 1.3137467478344542, "learning_rate": 7.259077015460956e-06, "loss": 0.876, "step": 13077 }, { "epoch": 0.6, "grad_norm": 0.4609138630836044, "learning_rate": 7.257646108842098e-06, "loss": 0.2413, "step": 13078 }, { "epoch": 0.6, "grad_norm": 0.2668068985743164, "learning_rate": 7.25621526293631e-06, "loss": 0.2116, "step": 13079 }, { "epoch": 0.6, "grad_norm": 0.3546271827372109, "learning_rate": 7.254784477775274e-06, "loss": 0.2472, "step": 13080 }, { "epoch": 0.6, "grad_norm": 0.8105485037615277, "learning_rate": 7.253353753390662e-06, "loss": 0.4661, "step": 13081 }, { "epoch": 0.6, "grad_norm": 0.3470179315261528, "learning_rate": 7.251923089814149e-06, "loss": 0.2305, "step": 13082 }, { "epoch": 0.6, "grad_norm": 0.8192897524069326, "learning_rate": 7.250492487077412e-06, "loss": 0.5378, "step": 13083 }, { "epoch": 0.6, "grad_norm": 0.30696030657633144, "learning_rate": 7.2490619452121226e-06, "loss": 0.2671, "step": 13084 }, { "epoch": 0.6, "grad_norm": 0.34659660102493167, "learning_rate": 7.247631464249949e-06, "loss": 0.2684, "step": 13085 }, { "epoch": 0.6, "grad_norm": 0.5281285543453738, "learning_rate": 7.246201044222558e-06, "loss": 0.2282, "step": 13086 }, { "epoch": 0.6, "grad_norm": 0.3364294968566659, "learning_rate": 7.2447706851616265e-06, "loss": 0.2867, "step": 13087 }, { "epoch": 0.6, "grad_norm": 0.39739548134382874, "learning_rate": 7.243340387098816e-06, "loss": 0.2646, "step": 13088 }, { "epoch": 0.6, "grad_norm": 0.4898473015620399, "learning_rate": 7.241910150065795e-06, "loss": 0.2973, "step": 13089 }, { "epoch": 0.6, "grad_norm": 0.6350048478381867, "learning_rate": 7.240479974094219e-06, "loss": 0.335, "step": 13090 }, { "epoch": 0.6, "grad_norm": 0.3911313223915429, "learning_rate": 7.23904985921576e-06, "loss": 0.3012, "step": 13091 }, { "epoch": 0.6, "grad_norm": 0.3293102712141958, "learning_rate": 7.2376198054620765e-06, "loss": 0.3134, "step": 13092 }, { "epoch": 0.6, "grad_norm": 0.2168469566508136, "learning_rate": 7.236189812864828e-06, "loss": 0.0713, "step": 13093 }, { "epoch": 0.6, "grad_norm": 0.37346744029676804, "learning_rate": 7.234759881455673e-06, "loss": 0.2949, "step": 13094 }, { "epoch": 0.6, "grad_norm": 0.5244964040940353, "learning_rate": 7.233330011266266e-06, "loss": 0.4032, "step": 13095 }, { "epoch": 0.6, "grad_norm": 0.5124745625575606, "learning_rate": 7.23190020232827e-06, "loss": 0.2596, "step": 13096 }, { "epoch": 0.6, "grad_norm": 0.3357911546907022, "learning_rate": 7.230470454673335e-06, "loss": 0.2764, "step": 13097 }, { "epoch": 0.6, "grad_norm": 0.3595309202513587, "learning_rate": 7.2290407683331154e-06, "loss": 0.2458, "step": 13098 }, { "epoch": 0.6, "grad_norm": 0.36232954794909916, "learning_rate": 7.227611143339259e-06, "loss": 0.2552, "step": 13099 }, { "epoch": 0.6, "grad_norm": 0.34473565927659955, "learning_rate": 7.2261815797234235e-06, "loss": 0.2813, "step": 13100 }, { "epoch": 0.6, "grad_norm": 1.388602177037523, "learning_rate": 7.224752077517253e-06, "loss": 0.9302, "step": 13101 }, { "epoch": 0.6, "grad_norm": 0.6646202327224993, "learning_rate": 7.223322636752397e-06, "loss": 0.327, "step": 13102 }, { "epoch": 0.6, "grad_norm": 0.2888319502877227, "learning_rate": 7.221893257460497e-06, "loss": 0.2509, "step": 13103 }, { "epoch": 0.6, "grad_norm": 0.4848289437571862, "learning_rate": 7.220463939673208e-06, "loss": 0.3521, "step": 13104 }, { "epoch": 0.6, "grad_norm": 0.3001986487997207, "learning_rate": 7.219034683422168e-06, "loss": 0.208, "step": 13105 }, { "epoch": 0.6, "grad_norm": 0.3983782047441542, "learning_rate": 7.21760548873902e-06, "loss": 0.1903, "step": 13106 }, { "epoch": 0.6, "grad_norm": 0.3597121848378269, "learning_rate": 7.216176355655402e-06, "loss": 0.3058, "step": 13107 }, { "epoch": 0.6, "grad_norm": 0.5297023645799521, "learning_rate": 7.214747284202959e-06, "loss": 0.3301, "step": 13108 }, { "epoch": 0.6, "grad_norm": 0.33270782231456103, "learning_rate": 7.213318274413327e-06, "loss": 0.1957, "step": 13109 }, { "epoch": 0.6, "grad_norm": 0.6024887159370106, "learning_rate": 7.211889326318142e-06, "loss": 0.374, "step": 13110 }, { "epoch": 0.6, "grad_norm": 0.24926937904326515, "learning_rate": 7.210460439949041e-06, "loss": 0.2284, "step": 13111 }, { "epoch": 0.6, "grad_norm": 0.33146743523244254, "learning_rate": 7.2090316153376535e-06, "loss": 0.1954, "step": 13112 }, { "epoch": 0.6, "grad_norm": 0.8843966720103299, "learning_rate": 7.2076028525156195e-06, "loss": 0.5543, "step": 13113 }, { "epoch": 0.6, "grad_norm": 0.6574004998628649, "learning_rate": 7.206174151514567e-06, "loss": 0.4081, "step": 13114 }, { "epoch": 0.6, "grad_norm": 0.2850333959323658, "learning_rate": 7.204745512366125e-06, "loss": 0.2159, "step": 13115 }, { "epoch": 0.6, "grad_norm": 0.49514731601911993, "learning_rate": 7.20331693510192e-06, "loss": 0.3335, "step": 13116 }, { "epoch": 0.6, "grad_norm": 0.3326449206352974, "learning_rate": 7.201888419753587e-06, "loss": 0.173, "step": 13117 }, { "epoch": 0.6, "grad_norm": 0.4281277722623995, "learning_rate": 7.200459966352748e-06, "loss": 0.2776, "step": 13118 }, { "epoch": 0.6, "grad_norm": 0.4053609795866975, "learning_rate": 7.199031574931027e-06, "loss": 0.2634, "step": 13119 }, { "epoch": 0.6, "grad_norm": 0.5841591522089046, "learning_rate": 7.197603245520042e-06, "loss": 0.3691, "step": 13120 }, { "epoch": 0.6, "grad_norm": 0.34590770275186555, "learning_rate": 7.196174978151424e-06, "loss": 0.2778, "step": 13121 }, { "epoch": 0.6, "grad_norm": 0.982147838879129, "learning_rate": 7.194746772856791e-06, "loss": 0.3198, "step": 13122 }, { "epoch": 0.6, "grad_norm": 0.27399008437980904, "learning_rate": 7.19331862966776e-06, "loss": 0.2459, "step": 13123 }, { "epoch": 0.6, "grad_norm": 0.44101589753002157, "learning_rate": 7.191890548615949e-06, "loss": 0.2392, "step": 13124 }, { "epoch": 0.6, "grad_norm": 0.5261414721553771, "learning_rate": 7.190462529732973e-06, "loss": 0.2864, "step": 13125 }, { "epoch": 0.6, "grad_norm": 0.8098704283372421, "learning_rate": 7.189034573050451e-06, "loss": 0.4568, "step": 13126 }, { "epoch": 0.6, "grad_norm": 0.3799932434086642, "learning_rate": 7.187606678599994e-06, "loss": 0.2568, "step": 13127 }, { "epoch": 0.6, "grad_norm": 0.35886065222063157, "learning_rate": 7.1861788464132145e-06, "loss": 0.2558, "step": 13128 }, { "epoch": 0.6, "grad_norm": 0.3113100544688846, "learning_rate": 7.184751076521721e-06, "loss": 0.1669, "step": 13129 }, { "epoch": 0.6, "grad_norm": 0.577164657310814, "learning_rate": 7.183323368957129e-06, "loss": 0.304, "step": 13130 }, { "epoch": 0.6, "grad_norm": 0.2732634728482863, "learning_rate": 7.181895723751041e-06, "loss": 0.29, "step": 13131 }, { "epoch": 0.6, "grad_norm": 0.9889330476369504, "learning_rate": 7.180468140935066e-06, "loss": 0.3566, "step": 13132 }, { "epoch": 0.6, "grad_norm": 0.4300639901054414, "learning_rate": 7.179040620540805e-06, "loss": 0.2936, "step": 13133 }, { "epoch": 0.6, "grad_norm": 0.5620263635845721, "learning_rate": 7.17761316259987e-06, "loss": 0.4056, "step": 13134 }, { "epoch": 0.6, "grad_norm": 0.2699209338443238, "learning_rate": 7.17618576714386e-06, "loss": 0.1945, "step": 13135 }, { "epoch": 0.6, "grad_norm": 0.3214509676689571, "learning_rate": 7.1747584342043764e-06, "loss": 0.2184, "step": 13136 }, { "epoch": 0.6, "grad_norm": 0.542614847663522, "learning_rate": 7.173331163813012e-06, "loss": 0.3215, "step": 13137 }, { "epoch": 0.6, "grad_norm": 0.9677512418286224, "learning_rate": 7.171903956001376e-06, "loss": 0.5347, "step": 13138 }, { "epoch": 0.6, "grad_norm": 0.26087101500569765, "learning_rate": 7.170476810801059e-06, "loss": 0.2518, "step": 13139 }, { "epoch": 0.6, "grad_norm": 0.5661829148563724, "learning_rate": 7.16904972824366e-06, "loss": 0.3726, "step": 13140 }, { "epoch": 0.6, "grad_norm": 0.4083842363928816, "learning_rate": 7.16762270836077e-06, "loss": 0.169, "step": 13141 }, { "epoch": 0.6, "grad_norm": 0.43453981416469795, "learning_rate": 7.1661957511839845e-06, "loss": 0.2907, "step": 13142 }, { "epoch": 0.6, "grad_norm": 0.3351949044624958, "learning_rate": 7.164768856744893e-06, "loss": 0.3025, "step": 13143 }, { "epoch": 0.6, "grad_norm": 0.8491619468982453, "learning_rate": 7.163342025075088e-06, "loss": 0.5269, "step": 13144 }, { "epoch": 0.6, "grad_norm": 0.2561555983965833, "learning_rate": 7.161915256206155e-06, "loss": 0.0949, "step": 13145 }, { "epoch": 0.6, "grad_norm": 0.42102602782236065, "learning_rate": 7.1604885501696815e-06, "loss": 0.3285, "step": 13146 }, { "epoch": 0.6, "grad_norm": 0.3904455758843244, "learning_rate": 7.159061906997257e-06, "loss": 0.2974, "step": 13147 }, { "epoch": 0.6, "grad_norm": 0.4742208812482457, "learning_rate": 7.157635326720462e-06, "loss": 0.2292, "step": 13148 }, { "epoch": 0.6, "grad_norm": 0.32667372418057317, "learning_rate": 7.156208809370884e-06, "loss": 0.2579, "step": 13149 }, { "epoch": 0.6, "grad_norm": 1.2769451002380916, "learning_rate": 7.1547823549800966e-06, "loss": 0.7609, "step": 13150 }, { "epoch": 0.6, "grad_norm": 0.2663221220738006, "learning_rate": 7.15335596357969e-06, "loss": 0.1776, "step": 13151 }, { "epoch": 0.6, "grad_norm": 0.4315092470684004, "learning_rate": 7.151929635201238e-06, "loss": 0.301, "step": 13152 }, { "epoch": 0.6, "grad_norm": 0.746033657499269, "learning_rate": 7.15050336987632e-06, "loss": 0.4079, "step": 13153 }, { "epoch": 0.6, "grad_norm": 0.49221890733294776, "learning_rate": 7.149077167636514e-06, "loss": 0.2274, "step": 13154 }, { "epoch": 0.6, "grad_norm": 0.4089863733488511, "learning_rate": 7.1476510285133824e-06, "loss": 0.3227, "step": 13155 }, { "epoch": 0.6, "grad_norm": 0.5196987957961049, "learning_rate": 7.146224952538514e-06, "loss": 0.4168, "step": 13156 }, { "epoch": 0.6, "grad_norm": 0.25876996963615884, "learning_rate": 7.144798939743475e-06, "loss": 0.1627, "step": 13157 }, { "epoch": 0.6, "grad_norm": 0.42257976480295784, "learning_rate": 7.143372990159835e-06, "loss": 0.2161, "step": 13158 }, { "epoch": 0.6, "grad_norm": 0.4163779030551335, "learning_rate": 7.141947103819163e-06, "loss": 0.3254, "step": 13159 }, { "epoch": 0.6, "grad_norm": 0.5505659038364246, "learning_rate": 7.140521280753028e-06, "loss": 0.3328, "step": 13160 }, { "epoch": 0.6, "grad_norm": 0.38824565133695726, "learning_rate": 7.139095520992996e-06, "loss": 0.2399, "step": 13161 }, { "epoch": 0.6, "grad_norm": 0.4053966803143022, "learning_rate": 7.137669824570631e-06, "loss": 0.267, "step": 13162 }, { "epoch": 0.6, "grad_norm": 0.329445484208078, "learning_rate": 7.136244191517494e-06, "loss": 0.2281, "step": 13163 }, { "epoch": 0.6, "grad_norm": 0.33579671661220045, "learning_rate": 7.134818621865157e-06, "loss": 0.2038, "step": 13164 }, { "epoch": 0.6, "grad_norm": 0.6889386419283249, "learning_rate": 7.133393115645172e-06, "loss": 0.3853, "step": 13165 }, { "epoch": 0.6, "grad_norm": 0.7665483240974319, "learning_rate": 7.131967672889101e-06, "loss": 0.3561, "step": 13166 }, { "epoch": 0.6, "grad_norm": 0.26731556018905, "learning_rate": 7.1305422936284965e-06, "loss": 0.2294, "step": 13167 }, { "epoch": 0.6, "grad_norm": 1.397854622643993, "learning_rate": 7.129116977894924e-06, "loss": 0.7462, "step": 13168 }, { "epoch": 0.6, "grad_norm": 0.25075032625970506, "learning_rate": 7.1276917257199356e-06, "loss": 0.1753, "step": 13169 }, { "epoch": 0.61, "grad_norm": 0.35370386768326073, "learning_rate": 7.126266537135082e-06, "loss": 0.2772, "step": 13170 }, { "epoch": 0.61, "grad_norm": 0.3674322599884452, "learning_rate": 7.124841412171921e-06, "loss": 0.2805, "step": 13171 }, { "epoch": 0.61, "grad_norm": 0.8088994855314864, "learning_rate": 7.1234163508619954e-06, "loss": 0.3802, "step": 13172 }, { "epoch": 0.61, "grad_norm": 0.44167816405217686, "learning_rate": 7.121991353236861e-06, "loss": 0.2998, "step": 13173 }, { "epoch": 0.61, "grad_norm": 0.4712836300934926, "learning_rate": 7.1205664193280655e-06, "loss": 0.3011, "step": 13174 }, { "epoch": 0.61, "grad_norm": 0.2827029762469823, "learning_rate": 7.119141549167154e-06, "loss": 0.212, "step": 13175 }, { "epoch": 0.61, "grad_norm": 0.2877340600060954, "learning_rate": 7.11771674278567e-06, "loss": 0.2162, "step": 13176 }, { "epoch": 0.61, "grad_norm": 0.6321776589843386, "learning_rate": 7.116292000215161e-06, "loss": 0.3229, "step": 13177 }, { "epoch": 0.61, "grad_norm": 0.3999298250012933, "learning_rate": 7.114867321487169e-06, "loss": 0.2838, "step": 13178 }, { "epoch": 0.61, "grad_norm": 0.3425629391831343, "learning_rate": 7.113442706633233e-06, "loss": 0.2786, "step": 13179 }, { "epoch": 0.61, "grad_norm": 0.9869457622589638, "learning_rate": 7.112018155684888e-06, "loss": 0.5356, "step": 13180 }, { "epoch": 0.61, "grad_norm": 0.4832428014949986, "learning_rate": 7.110593668673682e-06, "loss": 0.281, "step": 13181 }, { "epoch": 0.61, "grad_norm": 0.35335443390835214, "learning_rate": 7.109169245631149e-06, "loss": 0.3076, "step": 13182 }, { "epoch": 0.61, "grad_norm": 0.2950247985858858, "learning_rate": 7.1077448865888236e-06, "loss": 0.2472, "step": 13183 }, { "epoch": 0.61, "grad_norm": 0.745021081118966, "learning_rate": 7.106320591578237e-06, "loss": 0.1374, "step": 13184 }, { "epoch": 0.61, "grad_norm": 0.35025670181340446, "learning_rate": 7.10489636063092e-06, "loss": 0.2634, "step": 13185 }, { "epoch": 0.61, "grad_norm": 0.5484912969264011, "learning_rate": 7.10347219377841e-06, "loss": 0.4036, "step": 13186 }, { "epoch": 0.61, "grad_norm": 0.37595529345835094, "learning_rate": 7.102048091052235e-06, "loss": 0.2506, "step": 13187 }, { "epoch": 0.61, "grad_norm": 0.38514353561772385, "learning_rate": 7.1006240524839225e-06, "loss": 0.3255, "step": 13188 }, { "epoch": 0.61, "grad_norm": 0.2714241392126355, "learning_rate": 7.099200078104995e-06, "loss": 0.1407, "step": 13189 }, { "epoch": 0.61, "grad_norm": 0.3403392197765243, "learning_rate": 7.097776167946986e-06, "loss": 0.2412, "step": 13190 }, { "epoch": 0.61, "grad_norm": 0.37813807431389335, "learning_rate": 7.096352322041414e-06, "loss": 0.2753, "step": 13191 }, { "epoch": 0.61, "grad_norm": 1.4587232457790513, "learning_rate": 7.094928540419804e-06, "loss": 0.7653, "step": 13192 }, { "epoch": 0.61, "grad_norm": 0.689564675335786, "learning_rate": 7.093504823113674e-06, "loss": 0.4063, "step": 13193 }, { "epoch": 0.61, "grad_norm": 0.34403370475593453, "learning_rate": 7.0920811701545474e-06, "loss": 0.1856, "step": 13194 }, { "epoch": 0.61, "grad_norm": 0.2742343126233367, "learning_rate": 7.090657581573941e-06, "loss": 0.2229, "step": 13195 }, { "epoch": 0.61, "grad_norm": 0.9116441205076875, "learning_rate": 7.089234057403373e-06, "loss": 0.4537, "step": 13196 }, { "epoch": 0.61, "grad_norm": 0.3561583406500787, "learning_rate": 7.087810597674351e-06, "loss": 0.2109, "step": 13197 }, { "epoch": 0.61, "grad_norm": 0.3944290660533092, "learning_rate": 7.0863872024184025e-06, "loss": 0.3168, "step": 13198 }, { "epoch": 0.61, "grad_norm": 0.966833204298043, "learning_rate": 7.084963871667032e-06, "loss": 0.375, "step": 13199 }, { "epoch": 0.61, "grad_norm": 0.3449343416853031, "learning_rate": 7.0835406054517505e-06, "loss": 0.2194, "step": 13200 }, { "epoch": 0.61, "grad_norm": 0.3581267708625302, "learning_rate": 7.082117403804074e-06, "loss": 0.1945, "step": 13201 }, { "epoch": 0.61, "grad_norm": 0.38050943772535994, "learning_rate": 7.080694266755497e-06, "loss": 0.3193, "step": 13202 }, { "epoch": 0.61, "grad_norm": 0.2937967540633318, "learning_rate": 7.0792711943375406e-06, "loss": 0.2005, "step": 13203 }, { "epoch": 0.61, "grad_norm": 1.218071774332793, "learning_rate": 7.077848186581705e-06, "loss": 0.4616, "step": 13204 }, { "epoch": 0.61, "grad_norm": 0.8635243969689705, "learning_rate": 7.076425243519494e-06, "loss": 0.4277, "step": 13205 }, { "epoch": 0.61, "grad_norm": 0.36676066954631537, "learning_rate": 7.0750023651824086e-06, "loss": 0.2501, "step": 13206 }, { "epoch": 0.61, "grad_norm": 0.34594653848202417, "learning_rate": 7.073579551601952e-06, "loss": 0.2301, "step": 13207 }, { "epoch": 0.61, "grad_norm": 0.3322400983187017, "learning_rate": 7.072156802809626e-06, "loss": 0.2164, "step": 13208 }, { "epoch": 0.61, "grad_norm": 0.3444796556916447, "learning_rate": 7.070734118836925e-06, "loss": 0.2752, "step": 13209 }, { "epoch": 0.61, "grad_norm": 0.596934595652926, "learning_rate": 7.069311499715344e-06, "loss": 0.2818, "step": 13210 }, { "epoch": 0.61, "grad_norm": 0.5772617227785102, "learning_rate": 7.067888945476386e-06, "loss": 0.3564, "step": 13211 }, { "epoch": 0.61, "grad_norm": 0.40755943259517474, "learning_rate": 7.066466456151541e-06, "loss": 0.2915, "step": 13212 }, { "epoch": 0.61, "grad_norm": 0.23153860773606208, "learning_rate": 7.065044031772301e-06, "loss": 0.1289, "step": 13213 }, { "epoch": 0.61, "grad_norm": 0.38833186655934354, "learning_rate": 7.063621672370157e-06, "loss": 0.2997, "step": 13214 }, { "epoch": 0.61, "grad_norm": 0.3918095409614361, "learning_rate": 7.062199377976595e-06, "loss": 0.2507, "step": 13215 }, { "epoch": 0.61, "grad_norm": 0.46972123010949823, "learning_rate": 7.06077714862311e-06, "loss": 0.2875, "step": 13216 }, { "epoch": 0.61, "grad_norm": 1.1703365226642282, "learning_rate": 7.059354984341189e-06, "loss": 0.5967, "step": 13217 }, { "epoch": 0.61, "grad_norm": 0.36156675160805446, "learning_rate": 7.057932885162312e-06, "loss": 0.2521, "step": 13218 }, { "epoch": 0.61, "grad_norm": 0.4192115205045195, "learning_rate": 7.0565108511179635e-06, "loss": 0.326, "step": 13219 }, { "epoch": 0.61, "grad_norm": 0.23077055175410224, "learning_rate": 7.055088882239631e-06, "loss": 0.098, "step": 13220 }, { "epoch": 0.61, "grad_norm": 0.34818336750425904, "learning_rate": 7.053666978558791e-06, "loss": 0.2569, "step": 13221 }, { "epoch": 0.61, "grad_norm": 0.47756221390437303, "learning_rate": 7.052245140106926e-06, "loss": 0.3416, "step": 13222 }, { "epoch": 0.61, "grad_norm": 0.48399079563732117, "learning_rate": 7.050823366915509e-06, "loss": 0.2714, "step": 13223 }, { "epoch": 0.61, "grad_norm": 0.3856934684886553, "learning_rate": 7.049401659016023e-06, "loss": 0.2611, "step": 13224 }, { "epoch": 0.61, "grad_norm": 1.2912415538386326, "learning_rate": 7.04798001643994e-06, "loss": 0.561, "step": 13225 }, { "epoch": 0.61, "grad_norm": 0.21113464758715922, "learning_rate": 7.0465584392187345e-06, "loss": 0.1796, "step": 13226 }, { "epoch": 0.61, "grad_norm": 0.3489388705698056, "learning_rate": 7.045136927383874e-06, "loss": 0.2675, "step": 13227 }, { "epoch": 0.61, "grad_norm": 0.7320256661961151, "learning_rate": 7.043715480966839e-06, "loss": 0.4193, "step": 13228 }, { "epoch": 0.61, "grad_norm": 0.44714865521354447, "learning_rate": 7.042294099999096e-06, "loss": 0.2883, "step": 13229 }, { "epoch": 0.61, "grad_norm": 0.41776620968393785, "learning_rate": 7.040872784512107e-06, "loss": 0.2908, "step": 13230 }, { "epoch": 0.61, "grad_norm": 0.3838355888205946, "learning_rate": 7.039451534537345e-06, "loss": 0.3194, "step": 13231 }, { "epoch": 0.61, "grad_norm": 0.4201385036254127, "learning_rate": 7.0380303501062675e-06, "loss": 0.3288, "step": 13232 }, { "epoch": 0.61, "grad_norm": 0.3902013936431746, "learning_rate": 7.036609231250346e-06, "loss": 0.1943, "step": 13233 }, { "epoch": 0.61, "grad_norm": 0.31405309228437783, "learning_rate": 7.035188178001042e-06, "loss": 0.2969, "step": 13234 }, { "epoch": 0.61, "grad_norm": 0.4443853799132858, "learning_rate": 7.033767190389814e-06, "loss": 0.2871, "step": 13235 }, { "epoch": 0.61, "grad_norm": 0.3375871779312744, "learning_rate": 7.032346268448118e-06, "loss": 0.1564, "step": 13236 }, { "epoch": 0.61, "grad_norm": 0.5793804394085306, "learning_rate": 7.030925412207419e-06, "loss": 0.3442, "step": 13237 }, { "epoch": 0.61, "grad_norm": 0.350426211631335, "learning_rate": 7.029504621699169e-06, "loss": 0.3084, "step": 13238 }, { "epoch": 0.61, "grad_norm": 0.3327481888262889, "learning_rate": 7.028083896954825e-06, "loss": 0.2254, "step": 13239 }, { "epoch": 0.61, "grad_norm": 0.8765198477215321, "learning_rate": 7.026663238005835e-06, "loss": 0.5121, "step": 13240 }, { "epoch": 0.61, "grad_norm": 0.2864856508660541, "learning_rate": 7.025242644883659e-06, "loss": 0.1913, "step": 13241 }, { "epoch": 0.61, "grad_norm": 0.26518996452147225, "learning_rate": 7.023822117619742e-06, "loss": 0.2307, "step": 13242 }, { "epoch": 0.61, "grad_norm": 1.1541231292746266, "learning_rate": 7.022401656245535e-06, "loss": 0.5283, "step": 13243 }, { "epoch": 0.61, "grad_norm": 0.7745811158052468, "learning_rate": 7.020981260792484e-06, "loss": 0.395, "step": 13244 }, { "epoch": 0.61, "grad_norm": 0.4020693223690801, "learning_rate": 7.019560931292038e-06, "loss": 0.2943, "step": 13245 }, { "epoch": 0.61, "grad_norm": 0.3073561538290007, "learning_rate": 7.018140667775642e-06, "loss": 0.2411, "step": 13246 }, { "epoch": 0.61, "grad_norm": 0.2996032766957375, "learning_rate": 7.016720470274736e-06, "loss": 0.1915, "step": 13247 }, { "epoch": 0.61, "grad_norm": 0.616236488826098, "learning_rate": 7.015300338820766e-06, "loss": 0.2799, "step": 13248 }, { "epoch": 0.61, "grad_norm": 0.4419147198751879, "learning_rate": 7.013880273445164e-06, "loss": 0.2458, "step": 13249 }, { "epoch": 0.61, "grad_norm": 0.4794183373129288, "learning_rate": 7.0124602741793794e-06, "loss": 0.3022, "step": 13250 }, { "epoch": 0.61, "grad_norm": 0.6108087650643558, "learning_rate": 7.0110403410548445e-06, "loss": 0.2847, "step": 13251 }, { "epoch": 0.61, "grad_norm": 0.4075570218469928, "learning_rate": 7.009620474102995e-06, "loss": 0.2697, "step": 13252 }, { "epoch": 0.61, "grad_norm": 0.27818188541525096, "learning_rate": 7.0082006733552646e-06, "loss": 0.213, "step": 13253 }, { "epoch": 0.61, "grad_norm": 0.3947332515512756, "learning_rate": 7.00678093884309e-06, "loss": 0.2698, "step": 13254 }, { "epoch": 0.61, "grad_norm": 0.3848307493098855, "learning_rate": 7.005361270597899e-06, "loss": 0.2506, "step": 13255 }, { "epoch": 0.61, "grad_norm": 0.9683634804008813, "learning_rate": 7.003941668651125e-06, "loss": 0.4545, "step": 13256 }, { "epoch": 0.61, "grad_norm": 0.6032631413756341, "learning_rate": 7.0025221330341905e-06, "loss": 0.314, "step": 13257 }, { "epoch": 0.61, "grad_norm": 0.2902885745894445, "learning_rate": 7.001102663778533e-06, "loss": 0.2605, "step": 13258 }, { "epoch": 0.61, "grad_norm": 0.23296764618120724, "learning_rate": 6.99968326091557e-06, "loss": 0.0996, "step": 13259 }, { "epoch": 0.61, "grad_norm": 0.4564579128740561, "learning_rate": 6.998263924476727e-06, "loss": 0.2967, "step": 13260 }, { "epoch": 0.61, "grad_norm": 0.4949144013324813, "learning_rate": 6.996844654493429e-06, "loss": 0.3361, "step": 13261 }, { "epoch": 0.61, "grad_norm": 0.33530624190760694, "learning_rate": 6.9954254509970905e-06, "loss": 0.2731, "step": 13262 }, { "epoch": 0.61, "grad_norm": 0.4162845160538152, "learning_rate": 6.994006314019141e-06, "loss": 0.294, "step": 13263 }, { "epoch": 0.61, "grad_norm": 0.5807107999290808, "learning_rate": 6.992587243590996e-06, "loss": 0.399, "step": 13264 }, { "epoch": 0.61, "grad_norm": 0.2301457138065459, "learning_rate": 6.991168239744068e-06, "loss": 0.1513, "step": 13265 }, { "epoch": 0.61, "grad_norm": 0.4996920700842822, "learning_rate": 6.989749302509776e-06, "loss": 0.3578, "step": 13266 }, { "epoch": 0.61, "grad_norm": 0.39319750080528976, "learning_rate": 6.988330431919534e-06, "loss": 0.2866, "step": 13267 }, { "epoch": 0.61, "grad_norm": 0.8177512581954891, "learning_rate": 6.986911628004753e-06, "loss": 0.3096, "step": 13268 }, { "epoch": 0.61, "grad_norm": 0.6587424620188117, "learning_rate": 6.985492890796846e-06, "loss": 0.3595, "step": 13269 }, { "epoch": 0.61, "grad_norm": 0.29956451407525114, "learning_rate": 6.984074220327217e-06, "loss": 0.271, "step": 13270 }, { "epoch": 0.61, "grad_norm": 0.45141752724755735, "learning_rate": 6.982655616627282e-06, "loss": 0.2302, "step": 13271 }, { "epoch": 0.61, "grad_norm": 0.5980308970073374, "learning_rate": 6.981237079728442e-06, "loss": 0.2204, "step": 13272 }, { "epoch": 0.61, "grad_norm": 0.3110007732456863, "learning_rate": 6.979818609662104e-06, "loss": 0.2402, "step": 13273 }, { "epoch": 0.61, "grad_norm": 0.36906311448099194, "learning_rate": 6.978400206459668e-06, "loss": 0.2866, "step": 13274 }, { "epoch": 0.61, "grad_norm": 0.4611047151940982, "learning_rate": 6.976981870152541e-06, "loss": 0.1835, "step": 13275 }, { "epoch": 0.61, "grad_norm": 0.4321954179240608, "learning_rate": 6.975563600772126e-06, "loss": 0.3195, "step": 13276 }, { "epoch": 0.61, "grad_norm": 0.47446984703925477, "learning_rate": 6.974145398349814e-06, "loss": 0.2624, "step": 13277 }, { "epoch": 0.61, "grad_norm": 0.31696536885933657, "learning_rate": 6.972727262917008e-06, "loss": 0.2256, "step": 13278 }, { "epoch": 0.61, "grad_norm": 0.4382728838794006, "learning_rate": 6.971309194505098e-06, "loss": 0.2948, "step": 13279 }, { "epoch": 0.61, "grad_norm": 0.5031982280657289, "learning_rate": 6.969891193145489e-06, "loss": 0.3118, "step": 13280 }, { "epoch": 0.61, "grad_norm": 0.2992853722339447, "learning_rate": 6.968473258869566e-06, "loss": 0.2177, "step": 13281 }, { "epoch": 0.61, "grad_norm": 0.37623675287915465, "learning_rate": 6.9670553917087246e-06, "loss": 0.2867, "step": 13282 }, { "epoch": 0.61, "grad_norm": 0.7987918096646779, "learning_rate": 6.9656375916943505e-06, "loss": 0.4614, "step": 13283 }, { "epoch": 0.61, "grad_norm": 0.6711136549712603, "learning_rate": 6.964219858857839e-06, "loss": 0.343, "step": 13284 }, { "epoch": 0.61, "grad_norm": 0.27241354568695225, "learning_rate": 6.962802193230574e-06, "loss": 0.1578, "step": 13285 }, { "epoch": 0.61, "grad_norm": 0.29399386377274095, "learning_rate": 6.961384594843939e-06, "loss": 0.2704, "step": 13286 }, { "epoch": 0.61, "grad_norm": 1.3735825512963098, "learning_rate": 6.959967063729321e-06, "loss": 0.8432, "step": 13287 }, { "epoch": 0.61, "grad_norm": 0.319565969108888, "learning_rate": 6.958549599918103e-06, "loss": 0.2058, "step": 13288 }, { "epoch": 0.61, "grad_norm": 0.4863582146485311, "learning_rate": 6.957132203441666e-06, "loss": 0.3224, "step": 13289 }, { "epoch": 0.61, "grad_norm": 0.4234258689341677, "learning_rate": 6.955714874331388e-06, "loss": 0.3172, "step": 13290 }, { "epoch": 0.61, "grad_norm": 0.32153606237302085, "learning_rate": 6.95429761261865e-06, "loss": 0.1663, "step": 13291 }, { "epoch": 0.61, "grad_norm": 0.49008023733572054, "learning_rate": 6.952880418334822e-06, "loss": 0.2735, "step": 13292 }, { "epoch": 0.61, "grad_norm": 0.4141911271186079, "learning_rate": 6.951463291511289e-06, "loss": 0.3184, "step": 13293 }, { "epoch": 0.61, "grad_norm": 0.2905234190982208, "learning_rate": 6.9500462321794214e-06, "loss": 0.2211, "step": 13294 }, { "epoch": 0.61, "grad_norm": 0.7947607701200965, "learning_rate": 6.948629240370592e-06, "loss": 0.394, "step": 13295 }, { "epoch": 0.61, "grad_norm": 0.5644798627247649, "learning_rate": 6.947212316116164e-06, "loss": 0.3577, "step": 13296 }, { "epoch": 0.61, "grad_norm": 0.37898938366177054, "learning_rate": 6.945795459447517e-06, "loss": 0.2992, "step": 13297 }, { "epoch": 0.61, "grad_norm": 0.22212031680559113, "learning_rate": 6.944378670396016e-06, "loss": 0.1703, "step": 13298 }, { "epoch": 0.61, "grad_norm": 0.7539079032897804, "learning_rate": 6.942961948993026e-06, "loss": 0.4961, "step": 13299 }, { "epoch": 0.61, "grad_norm": 0.37266457123857255, "learning_rate": 6.941545295269909e-06, "loss": 0.27, "step": 13300 }, { "epoch": 0.61, "grad_norm": 0.38438273520229865, "learning_rate": 6.940128709258034e-06, "loss": 0.2841, "step": 13301 }, { "epoch": 0.61, "grad_norm": 0.7446501517362355, "learning_rate": 6.93871219098876e-06, "loss": 0.4113, "step": 13302 }, { "epoch": 0.61, "grad_norm": 0.3879636463843152, "learning_rate": 6.937295740493448e-06, "loss": 0.2651, "step": 13303 }, { "epoch": 0.61, "grad_norm": 0.2521511669756142, "learning_rate": 6.935879357803453e-06, "loss": 0.124, "step": 13304 }, { "epoch": 0.61, "grad_norm": 0.378772770535479, "learning_rate": 6.9344630429501395e-06, "loss": 0.3198, "step": 13305 }, { "epoch": 0.61, "grad_norm": 0.374211412125898, "learning_rate": 6.9330467959648594e-06, "loss": 0.2653, "step": 13306 }, { "epoch": 0.61, "grad_norm": 0.6539568689989769, "learning_rate": 6.931630616878967e-06, "loss": 0.3937, "step": 13307 }, { "epoch": 0.61, "grad_norm": 1.1114677756185225, "learning_rate": 6.930214505723816e-06, "loss": 0.2607, "step": 13308 }, { "epoch": 0.61, "grad_norm": 0.2747438841453921, "learning_rate": 6.928798462530751e-06, "loss": 0.2372, "step": 13309 }, { "epoch": 0.61, "grad_norm": 0.36853898722400935, "learning_rate": 6.927382487331134e-06, "loss": 0.282, "step": 13310 }, { "epoch": 0.61, "grad_norm": 0.2567367038577743, "learning_rate": 6.925966580156307e-06, "loss": 0.1401, "step": 13311 }, { "epoch": 0.61, "grad_norm": 0.3988918828921113, "learning_rate": 6.924550741037616e-06, "loss": 0.2814, "step": 13312 }, { "epoch": 0.61, "grad_norm": 0.48703845415777197, "learning_rate": 6.923134970006406e-06, "loss": 0.3532, "step": 13313 }, { "epoch": 0.61, "grad_norm": 0.4037358482857964, "learning_rate": 6.921719267094025e-06, "loss": 0.2336, "step": 13314 }, { "epoch": 0.61, "grad_norm": 0.3785049680740953, "learning_rate": 6.9203036323318125e-06, "loss": 0.2726, "step": 13315 }, { "epoch": 0.61, "grad_norm": 0.26611801316774814, "learning_rate": 6.918888065751109e-06, "loss": 0.1463, "step": 13316 }, { "epoch": 0.61, "grad_norm": 0.3106606135247731, "learning_rate": 6.917472567383252e-06, "loss": 0.2535, "step": 13317 }, { "epoch": 0.61, "grad_norm": 0.3930960677460471, "learning_rate": 6.916057137259584e-06, "loss": 0.2738, "step": 13318 }, { "epoch": 0.61, "grad_norm": 0.6919710512102732, "learning_rate": 6.914641775411437e-06, "loss": 0.423, "step": 13319 }, { "epoch": 0.61, "grad_norm": 1.1562742740568586, "learning_rate": 6.913226481870149e-06, "loss": 0.5072, "step": 13320 }, { "epoch": 0.61, "grad_norm": 0.312359362151665, "learning_rate": 6.911811256667052e-06, "loss": 0.1829, "step": 13321 }, { "epoch": 0.61, "grad_norm": 0.25539123242755546, "learning_rate": 6.910396099833471e-06, "loss": 0.2171, "step": 13322 }, { "epoch": 0.61, "grad_norm": 0.754773230804693, "learning_rate": 6.908981011400751e-06, "loss": 0.4655, "step": 13323 }, { "epoch": 0.61, "grad_norm": 0.31727478570742296, "learning_rate": 6.907565991400209e-06, "loss": 0.2291, "step": 13324 }, { "epoch": 0.61, "grad_norm": 0.4241917678490096, "learning_rate": 6.906151039863176e-06, "loss": 0.3272, "step": 13325 }, { "epoch": 0.61, "grad_norm": 1.4163860398847878, "learning_rate": 6.904736156820973e-06, "loss": 0.6315, "step": 13326 }, { "epoch": 0.61, "grad_norm": 0.3015662923899679, "learning_rate": 6.90332134230493e-06, "loss": 0.2068, "step": 13327 }, { "epoch": 0.61, "grad_norm": 0.5550024492076097, "learning_rate": 6.90190659634637e-06, "loss": 0.3304, "step": 13328 }, { "epoch": 0.61, "grad_norm": 0.35426135694143507, "learning_rate": 6.900491918976613e-06, "loss": 0.3051, "step": 13329 }, { "epoch": 0.61, "grad_norm": 0.35336340830976487, "learning_rate": 6.899077310226973e-06, "loss": 0.2399, "step": 13330 }, { "epoch": 0.61, "grad_norm": 1.1594833771305124, "learning_rate": 6.897662770128776e-06, "loss": 0.6752, "step": 13331 }, { "epoch": 0.61, "grad_norm": 0.5402441592250512, "learning_rate": 6.896248298713336e-06, "loss": 0.2621, "step": 13332 }, { "epoch": 0.61, "grad_norm": 0.356529365027446, "learning_rate": 6.894833896011967e-06, "loss": 0.2491, "step": 13333 }, { "epoch": 0.61, "grad_norm": 0.3579151918867826, "learning_rate": 6.89341956205598e-06, "loss": 0.2446, "step": 13334 }, { "epoch": 0.61, "grad_norm": 0.6122528508336325, "learning_rate": 6.892005296876692e-06, "loss": 0.4095, "step": 13335 }, { "epoch": 0.61, "grad_norm": 0.4369749360113549, "learning_rate": 6.89059110050541e-06, "loss": 0.2708, "step": 13336 }, { "epoch": 0.61, "grad_norm": 0.3203649719722516, "learning_rate": 6.889176972973446e-06, "loss": 0.2483, "step": 13337 }, { "epoch": 0.61, "grad_norm": 0.3055920849384419, "learning_rate": 6.887762914312104e-06, "loss": 0.1876, "step": 13338 }, { "epoch": 0.61, "grad_norm": 0.4274910206833901, "learning_rate": 6.886348924552689e-06, "loss": 0.2454, "step": 13339 }, { "epoch": 0.61, "grad_norm": 0.5573080972518692, "learning_rate": 6.884935003726512e-06, "loss": 0.2827, "step": 13340 }, { "epoch": 0.61, "grad_norm": 0.3833045861421186, "learning_rate": 6.883521151864872e-06, "loss": 0.309, "step": 13341 }, { "epoch": 0.61, "grad_norm": 0.4328125702819307, "learning_rate": 6.882107368999069e-06, "loss": 0.2749, "step": 13342 }, { "epoch": 0.61, "grad_norm": 0.2032869647555686, "learning_rate": 6.8806936551604e-06, "loss": 0.1465, "step": 13343 }, { "epoch": 0.61, "grad_norm": 1.215086187797997, "learning_rate": 6.879280010380169e-06, "loss": 0.5807, "step": 13344 }, { "epoch": 0.61, "grad_norm": 0.31897184246136756, "learning_rate": 6.877866434689673e-06, "loss": 0.2533, "step": 13345 }, { "epoch": 0.61, "grad_norm": 0.4718437950136446, "learning_rate": 6.8764529281202055e-06, "loss": 0.3094, "step": 13346 }, { "epoch": 0.61, "grad_norm": 0.5962997410687163, "learning_rate": 6.875039490703055e-06, "loss": 0.3362, "step": 13347 }, { "epoch": 0.61, "grad_norm": 0.36627603212143084, "learning_rate": 6.8736261224695225e-06, "loss": 0.2738, "step": 13348 }, { "epoch": 0.61, "grad_norm": 0.4965533502840909, "learning_rate": 6.872212823450895e-06, "loss": 0.3562, "step": 13349 }, { "epoch": 0.61, "grad_norm": 0.2788625095921907, "learning_rate": 6.870799593678459e-06, "loss": 0.1623, "step": 13350 }, { "epoch": 0.61, "grad_norm": 0.4249080093679798, "learning_rate": 6.869386433183505e-06, "loss": 0.2815, "step": 13351 }, { "epoch": 0.61, "grad_norm": 0.6252054392448368, "learning_rate": 6.867973341997315e-06, "loss": 0.3506, "step": 13352 }, { "epoch": 0.61, "grad_norm": 0.3937552232681003, "learning_rate": 6.866560320151179e-06, "loss": 0.2669, "step": 13353 }, { "epoch": 0.61, "grad_norm": 0.44337814059992464, "learning_rate": 6.865147367676378e-06, "loss": 0.2488, "step": 13354 }, { "epoch": 0.61, "grad_norm": 0.5904760318124197, "learning_rate": 6.863734484604193e-06, "loss": 0.3884, "step": 13355 }, { "epoch": 0.61, "grad_norm": 0.21147098596756062, "learning_rate": 6.862321670965899e-06, "loss": 0.1616, "step": 13356 }, { "epoch": 0.61, "grad_norm": 0.40525540888794503, "learning_rate": 6.860908926792784e-06, "loss": 0.2586, "step": 13357 }, { "epoch": 0.61, "grad_norm": 0.5671453986436225, "learning_rate": 6.859496252116119e-06, "loss": 0.3153, "step": 13358 }, { "epoch": 0.61, "grad_norm": 1.147317961326644, "learning_rate": 6.858083646967181e-06, "loss": 0.5571, "step": 13359 }, { "epoch": 0.61, "grad_norm": 0.3654008986061631, "learning_rate": 6.85667111137724e-06, "loss": 0.2108, "step": 13360 }, { "epoch": 0.61, "grad_norm": 0.31961388280847464, "learning_rate": 6.8552586453775735e-06, "loss": 0.2938, "step": 13361 }, { "epoch": 0.61, "grad_norm": 0.26214442367828417, "learning_rate": 6.853846248999449e-06, "loss": 0.1676, "step": 13362 }, { "epoch": 0.61, "grad_norm": 0.38559620428908054, "learning_rate": 6.852433922274138e-06, "loss": 0.2223, "step": 13363 }, { "epoch": 0.61, "grad_norm": 0.5227940340239714, "learning_rate": 6.851021665232902e-06, "loss": 0.3492, "step": 13364 }, { "epoch": 0.61, "grad_norm": 0.36161136759086693, "learning_rate": 6.849609477907015e-06, "loss": 0.3078, "step": 13365 }, { "epoch": 0.61, "grad_norm": 0.3098877668923679, "learning_rate": 6.848197360327736e-06, "loss": 0.2054, "step": 13366 }, { "epoch": 0.61, "grad_norm": 1.4746593345176537, "learning_rate": 6.846785312526331e-06, "loss": 0.8591, "step": 13367 }, { "epoch": 0.61, "grad_norm": 0.3372660004060218, "learning_rate": 6.84537333453406e-06, "loss": 0.2465, "step": 13368 }, { "epoch": 0.61, "grad_norm": 0.3141500257553667, "learning_rate": 6.843961426382179e-06, "loss": 0.2105, "step": 13369 }, { "epoch": 0.61, "grad_norm": 0.3419022566749706, "learning_rate": 6.8425495881019565e-06, "loss": 0.2642, "step": 13370 }, { "epoch": 0.61, "grad_norm": 1.12285067283773, "learning_rate": 6.841137819724639e-06, "loss": 0.5649, "step": 13371 }, { "epoch": 0.61, "grad_norm": 0.8687496145373245, "learning_rate": 6.839726121281488e-06, "loss": 0.5314, "step": 13372 }, { "epoch": 0.61, "grad_norm": 0.28070199760988074, "learning_rate": 6.83831449280375e-06, "loss": 0.2421, "step": 13373 }, { "epoch": 0.61, "grad_norm": 0.6408630261668267, "learning_rate": 6.836902934322684e-06, "loss": 0.3844, "step": 13374 }, { "epoch": 0.61, "grad_norm": 0.5637484308529603, "learning_rate": 6.835491445869542e-06, "loss": 0.3104, "step": 13375 }, { "epoch": 0.61, "grad_norm": 0.23709119577451992, "learning_rate": 6.834080027475567e-06, "loss": 0.1602, "step": 13376 }, { "epoch": 0.61, "grad_norm": 0.38115026952114867, "learning_rate": 6.832668679172007e-06, "loss": 0.2938, "step": 13377 }, { "epoch": 0.61, "grad_norm": 0.7513890058953042, "learning_rate": 6.831257400990113e-06, "loss": 0.3919, "step": 13378 }, { "epoch": 0.61, "grad_norm": 0.3914294822413991, "learning_rate": 6.829846192961125e-06, "loss": 0.2391, "step": 13379 }, { "epoch": 0.61, "grad_norm": 0.6106318983631007, "learning_rate": 6.828435055116286e-06, "loss": 0.3414, "step": 13380 }, { "epoch": 0.61, "grad_norm": 0.3975554521889576, "learning_rate": 6.82702398748684e-06, "loss": 0.2929, "step": 13381 }, { "epoch": 0.61, "grad_norm": 0.2959016361647113, "learning_rate": 6.82561299010402e-06, "loss": 0.1854, "step": 13382 }, { "epoch": 0.61, "grad_norm": 0.4308284805834394, "learning_rate": 6.8242020629990736e-06, "loss": 0.2808, "step": 13383 }, { "epoch": 0.61, "grad_norm": 0.37166919695496264, "learning_rate": 6.822791206203232e-06, "loss": 0.288, "step": 13384 }, { "epoch": 0.61, "grad_norm": 0.3383684141607946, "learning_rate": 6.821380419747729e-06, "loss": 0.2784, "step": 13385 }, { "epoch": 0.61, "grad_norm": 0.734082300088229, "learning_rate": 6.819969703663798e-06, "loss": 0.3092, "step": 13386 }, { "epoch": 0.61, "grad_norm": 0.4281720335127633, "learning_rate": 6.818559057982676e-06, "loss": 0.277, "step": 13387 }, { "epoch": 0.62, "grad_norm": 0.26244132801338854, "learning_rate": 6.817148482735594e-06, "loss": 0.1863, "step": 13388 }, { "epoch": 0.62, "grad_norm": 0.3365421577728329, "learning_rate": 6.815737977953773e-06, "loss": 0.2703, "step": 13389 }, { "epoch": 0.62, "grad_norm": 0.8714532501283128, "learning_rate": 6.81432754366844e-06, "loss": 0.5102, "step": 13390 }, { "epoch": 0.62, "grad_norm": 0.45886033829610806, "learning_rate": 6.81291717991083e-06, "loss": 0.2945, "step": 13391 }, { "epoch": 0.62, "grad_norm": 0.4145486435410062, "learning_rate": 6.8115068867121604e-06, "loss": 0.276, "step": 13392 }, { "epoch": 0.62, "grad_norm": 0.8673398144930178, "learning_rate": 6.810096664103656e-06, "loss": 0.4092, "step": 13393 }, { "epoch": 0.62, "grad_norm": 0.2740705382515217, "learning_rate": 6.8086865121165356e-06, "loss": 0.2175, "step": 13394 }, { "epoch": 0.62, "grad_norm": 0.26165990222225316, "learning_rate": 6.807276430782022e-06, "loss": 0.132, "step": 13395 }, { "epoch": 0.62, "grad_norm": 0.41137605991850584, "learning_rate": 6.80586642013133e-06, "loss": 0.3042, "step": 13396 }, { "epoch": 0.62, "grad_norm": 0.3359857439529303, "learning_rate": 6.804456480195677e-06, "loss": 0.3094, "step": 13397 }, { "epoch": 0.62, "grad_norm": 0.6389024554221187, "learning_rate": 6.803046611006278e-06, "loss": 0.3817, "step": 13398 }, { "epoch": 0.62, "grad_norm": 0.47320648134687343, "learning_rate": 6.801636812594343e-06, "loss": 0.1514, "step": 13399 }, { "epoch": 0.62, "grad_norm": 0.2966466033327947, "learning_rate": 6.800227084991089e-06, "loss": 0.229, "step": 13400 }, { "epoch": 0.62, "grad_norm": 0.2850719524246659, "learning_rate": 6.798817428227724e-06, "loss": 0.2413, "step": 13401 }, { "epoch": 0.62, "grad_norm": 0.47959802059900786, "learning_rate": 6.797407842335455e-06, "loss": 0.2269, "step": 13402 }, { "epoch": 0.62, "grad_norm": 0.39117437332585386, "learning_rate": 6.7959983273454855e-06, "loss": 0.3037, "step": 13403 }, { "epoch": 0.62, "grad_norm": 0.47785960180717135, "learning_rate": 6.79458888328903e-06, "loss": 0.3332, "step": 13404 }, { "epoch": 0.62, "grad_norm": 0.35853028910776097, "learning_rate": 6.793179510197287e-06, "loss": 0.2115, "step": 13405 }, { "epoch": 0.62, "grad_norm": 0.42455106243715884, "learning_rate": 6.791770208101458e-06, "loss": 0.3109, "step": 13406 }, { "epoch": 0.62, "grad_norm": 0.3180175148375077, "learning_rate": 6.7903609770327425e-06, "loss": 0.2184, "step": 13407 }, { "epoch": 0.62, "grad_norm": 0.3288491965942752, "learning_rate": 6.7889518170223445e-06, "loss": 0.2468, "step": 13408 }, { "epoch": 0.62, "grad_norm": 0.40399461277998727, "learning_rate": 6.787542728101457e-06, "loss": 0.2926, "step": 13409 }, { "epoch": 0.62, "grad_norm": 0.8302834851534248, "learning_rate": 6.786133710301279e-06, "loss": 0.5066, "step": 13410 }, { "epoch": 0.62, "grad_norm": 1.7014620763672794, "learning_rate": 6.784724763653002e-06, "loss": 0.7792, "step": 13411 }, { "epoch": 0.62, "grad_norm": 0.2771498120281875, "learning_rate": 6.783315888187818e-06, "loss": 0.2093, "step": 13412 }, { "epoch": 0.62, "grad_norm": 0.3006268015841587, "learning_rate": 6.781907083936922e-06, "loss": 0.2457, "step": 13413 }, { "epoch": 0.62, "grad_norm": 0.6443147491811781, "learning_rate": 6.7804983509315015e-06, "loss": 0.4055, "step": 13414 }, { "epoch": 0.62, "grad_norm": 0.3327888673285415, "learning_rate": 6.779089689202744e-06, "loss": 0.2226, "step": 13415 }, { "epoch": 0.62, "grad_norm": 0.3425138994864374, "learning_rate": 6.777681098781833e-06, "loss": 0.2399, "step": 13416 }, { "epoch": 0.62, "grad_norm": 0.46525286247544295, "learning_rate": 6.776272579699963e-06, "loss": 0.3238, "step": 13417 }, { "epoch": 0.62, "grad_norm": 0.37205906208094514, "learning_rate": 6.774864131988309e-06, "loss": 0.2238, "step": 13418 }, { "epoch": 0.62, "grad_norm": 0.43678725838365934, "learning_rate": 6.773455755678054e-06, "loss": 0.3014, "step": 13419 }, { "epoch": 0.62, "grad_norm": 0.369635075772253, "learning_rate": 6.7720474508003755e-06, "loss": 0.3283, "step": 13420 }, { "epoch": 0.62, "grad_norm": 0.3662025150537096, "learning_rate": 6.770639217386459e-06, "loss": 0.2277, "step": 13421 }, { "epoch": 0.62, "grad_norm": 0.5127706818605707, "learning_rate": 6.7692310554674775e-06, "loss": 0.3421, "step": 13422 }, { "epoch": 0.62, "grad_norm": 0.7866663167104675, "learning_rate": 6.767822965074607e-06, "loss": 0.4178, "step": 13423 }, { "epoch": 0.62, "grad_norm": 0.3824868749707829, "learning_rate": 6.766414946239018e-06, "loss": 0.2878, "step": 13424 }, { "epoch": 0.62, "grad_norm": 0.31514165238858877, "learning_rate": 6.765006998991889e-06, "loss": 0.271, "step": 13425 }, { "epoch": 0.62, "grad_norm": 0.8583009413402867, "learning_rate": 6.763599123364386e-06, "loss": 0.4209, "step": 13426 }, { "epoch": 0.62, "grad_norm": 0.33961009311629115, "learning_rate": 6.762191319387678e-06, "loss": 0.2637, "step": 13427 }, { "epoch": 0.62, "grad_norm": 0.22781086565203268, "learning_rate": 6.760783587092936e-06, "loss": 0.1724, "step": 13428 }, { "epoch": 0.62, "grad_norm": 1.0185572312100901, "learning_rate": 6.75937592651132e-06, "loss": 0.5024, "step": 13429 }, { "epoch": 0.62, "grad_norm": 0.36219398999337843, "learning_rate": 6.757968337674001e-06, "loss": 0.2596, "step": 13430 }, { "epoch": 0.62, "grad_norm": 0.7307791786558051, "learning_rate": 6.756560820612137e-06, "loss": 0.3242, "step": 13431 }, { "epoch": 0.62, "grad_norm": 0.3674203393977306, "learning_rate": 6.755153375356891e-06, "loss": 0.3202, "step": 13432 }, { "epoch": 0.62, "grad_norm": 0.3357170514431487, "learning_rate": 6.753746001939416e-06, "loss": 0.2773, "step": 13433 }, { "epoch": 0.62, "grad_norm": 0.49724624865556893, "learning_rate": 6.752338700390881e-06, "loss": 0.2422, "step": 13434 }, { "epoch": 0.62, "grad_norm": 0.35246492626908804, "learning_rate": 6.75093147074244e-06, "loss": 0.1768, "step": 13435 }, { "epoch": 0.62, "grad_norm": 0.28356269635467823, "learning_rate": 6.7495243130252415e-06, "loss": 0.2313, "step": 13436 }, { "epoch": 0.62, "grad_norm": 0.8458566086516052, "learning_rate": 6.748117227270439e-06, "loss": 0.3898, "step": 13437 }, { "epoch": 0.62, "grad_norm": 0.5621487706187629, "learning_rate": 6.74671021350919e-06, "loss": 0.3047, "step": 13438 }, { "epoch": 0.62, "grad_norm": 0.38914373729341334, "learning_rate": 6.7453032717726416e-06, "loss": 0.2771, "step": 13439 }, { "epoch": 0.62, "grad_norm": 0.2884000429154594, "learning_rate": 6.743896402091941e-06, "loss": 0.2421, "step": 13440 }, { "epoch": 0.62, "grad_norm": 0.2960152004770081, "learning_rate": 6.742489604498237e-06, "loss": 0.0927, "step": 13441 }, { "epoch": 0.62, "grad_norm": 0.4255825737909948, "learning_rate": 6.741082879022671e-06, "loss": 0.2808, "step": 13442 }, { "epoch": 0.62, "grad_norm": 1.0015148640382894, "learning_rate": 6.739676225696392e-06, "loss": 0.409, "step": 13443 }, { "epoch": 0.62, "grad_norm": 0.36968843538834517, "learning_rate": 6.738269644550538e-06, "loss": 0.2715, "step": 13444 }, { "epoch": 0.62, "grad_norm": 0.35961199497053314, "learning_rate": 6.736863135616251e-06, "loss": 0.2532, "step": 13445 }, { "epoch": 0.62, "grad_norm": 0.31734163711113594, "learning_rate": 6.7354566989246665e-06, "loss": 0.1709, "step": 13446 }, { "epoch": 0.62, "grad_norm": 1.1538689303295533, "learning_rate": 6.7340503345069264e-06, "loss": 0.4795, "step": 13447 }, { "epoch": 0.62, "grad_norm": 0.2527398877963983, "learning_rate": 6.732644042394164e-06, "loss": 0.2161, "step": 13448 }, { "epoch": 0.62, "grad_norm": 1.0743780660800497, "learning_rate": 6.7312378226175135e-06, "loss": 0.3704, "step": 13449 }, { "epoch": 0.62, "grad_norm": 1.3649647864154875, "learning_rate": 6.729831675208103e-06, "loss": 0.7868, "step": 13450 }, { "epoch": 0.62, "grad_norm": 0.32281818024577347, "learning_rate": 6.728425600197072e-06, "loss": 0.2078, "step": 13451 }, { "epoch": 0.62, "grad_norm": 0.3993358600248972, "learning_rate": 6.727019597615545e-06, "loss": 0.2834, "step": 13452 }, { "epoch": 0.62, "grad_norm": 0.34466100941584016, "learning_rate": 6.7256136674946505e-06, "loss": 0.2088, "step": 13453 }, { "epoch": 0.62, "grad_norm": 0.4061849971330678, "learning_rate": 6.724207809865508e-06, "loss": 0.2272, "step": 13454 }, { "epoch": 0.62, "grad_norm": 1.1239183653135179, "learning_rate": 6.7228020247592515e-06, "loss": 0.5097, "step": 13455 }, { "epoch": 0.62, "grad_norm": 0.4229416473725185, "learning_rate": 6.721396312207e-06, "loss": 0.3597, "step": 13456 }, { "epoch": 0.62, "grad_norm": 0.36592049760286427, "learning_rate": 6.719990672239873e-06, "loss": 0.1855, "step": 13457 }, { "epoch": 0.62, "grad_norm": 0.3878084654086433, "learning_rate": 6.718585104888993e-06, "loss": 0.267, "step": 13458 }, { "epoch": 0.62, "grad_norm": 0.3783029285320806, "learning_rate": 6.717179610185473e-06, "loss": 0.272, "step": 13459 }, { "epoch": 0.62, "grad_norm": 0.41421337091575466, "learning_rate": 6.715774188160434e-06, "loss": 0.2792, "step": 13460 }, { "epoch": 0.62, "grad_norm": 0.38201081103613405, "learning_rate": 6.71436883884499e-06, "loss": 0.2611, "step": 13461 }, { "epoch": 0.62, "grad_norm": 1.3217153086747997, "learning_rate": 6.712963562270252e-06, "loss": 0.8192, "step": 13462 }, { "epoch": 0.62, "grad_norm": 0.4258920785975871, "learning_rate": 6.7115583584673294e-06, "loss": 0.2502, "step": 13463 }, { "epoch": 0.62, "grad_norm": 0.3106395061290788, "learning_rate": 6.710153227467342e-06, "loss": 0.2289, "step": 13464 }, { "epoch": 0.62, "grad_norm": 0.5877860328354798, "learning_rate": 6.708748169301389e-06, "loss": 0.2921, "step": 13465 }, { "epoch": 0.62, "grad_norm": 0.4510729527949029, "learning_rate": 6.707343184000579e-06, "loss": 0.3165, "step": 13466 }, { "epoch": 0.62, "grad_norm": 0.3079313779018472, "learning_rate": 6.705938271596013e-06, "loss": 0.1639, "step": 13467 }, { "epoch": 0.62, "grad_norm": 0.3793296968298386, "learning_rate": 6.704533432118804e-06, "loss": 0.2901, "step": 13468 }, { "epoch": 0.62, "grad_norm": 0.3941715291265191, "learning_rate": 6.703128665600047e-06, "loss": 0.2511, "step": 13469 }, { "epoch": 0.62, "grad_norm": 1.2781675938984973, "learning_rate": 6.701723972070845e-06, "loss": 0.307, "step": 13470 }, { "epoch": 0.62, "grad_norm": 0.5007784284424243, "learning_rate": 6.700319351562295e-06, "loss": 0.3301, "step": 13471 }, { "epoch": 0.62, "grad_norm": 0.2962199301239696, "learning_rate": 6.698914804105492e-06, "loss": 0.2725, "step": 13472 }, { "epoch": 0.62, "grad_norm": 0.2782068150804055, "learning_rate": 6.697510329731536e-06, "loss": 0.157, "step": 13473 }, { "epoch": 0.62, "grad_norm": 0.7440124466178986, "learning_rate": 6.696105928471519e-06, "loss": 0.5213, "step": 13474 }, { "epoch": 0.62, "grad_norm": 0.7127907512326165, "learning_rate": 6.694701600356531e-06, "loss": 0.393, "step": 13475 }, { "epoch": 0.62, "grad_norm": 0.3677233555560248, "learning_rate": 6.693297345417662e-06, "loss": 0.2758, "step": 13476 }, { "epoch": 0.62, "grad_norm": 0.7985519142259734, "learning_rate": 6.691893163686005e-06, "loss": 0.3188, "step": 13477 }, { "epoch": 0.62, "grad_norm": 0.3907876334295463, "learning_rate": 6.690489055192646e-06, "loss": 0.1396, "step": 13478 }, { "epoch": 0.62, "grad_norm": 0.345863167914341, "learning_rate": 6.689085019968669e-06, "loss": 0.2438, "step": 13479 }, { "epoch": 0.62, "grad_norm": 0.3783344118322687, "learning_rate": 6.6876810580451545e-06, "loss": 0.2657, "step": 13480 }, { "epoch": 0.62, "grad_norm": 0.4343064505170676, "learning_rate": 6.686277169453193e-06, "loss": 0.2777, "step": 13481 }, { "epoch": 0.62, "grad_norm": 0.666444687901114, "learning_rate": 6.684873354223862e-06, "loss": 0.3352, "step": 13482 }, { "epoch": 0.62, "grad_norm": 0.4838406563508897, "learning_rate": 6.683469612388239e-06, "loss": 0.2534, "step": 13483 }, { "epoch": 0.62, "grad_norm": 0.3194564123018437, "learning_rate": 6.682065943977399e-06, "loss": 0.2639, "step": 13484 }, { "epoch": 0.62, "grad_norm": 0.2676109107466966, "learning_rate": 6.6806623490224234e-06, "loss": 0.19, "step": 13485 }, { "epoch": 0.62, "grad_norm": 1.2457416357936884, "learning_rate": 6.679258827554384e-06, "loss": 0.7245, "step": 13486 }, { "epoch": 0.62, "grad_norm": 0.36899346696751806, "learning_rate": 6.677855379604355e-06, "loss": 0.2063, "step": 13487 }, { "epoch": 0.62, "grad_norm": 0.343492131064617, "learning_rate": 6.6764520052034054e-06, "loss": 0.2834, "step": 13488 }, { "epoch": 0.62, "grad_norm": 0.8327053881189924, "learning_rate": 6.675048704382603e-06, "loss": 0.3967, "step": 13489 }, { "epoch": 0.62, "grad_norm": 0.32990200827519667, "learning_rate": 6.67364547717302e-06, "loss": 0.2066, "step": 13490 }, { "epoch": 0.62, "grad_norm": 0.28119829940692465, "learning_rate": 6.67224232360572e-06, "loss": 0.1727, "step": 13491 }, { "epoch": 0.62, "grad_norm": 0.366735138486504, "learning_rate": 6.670839243711768e-06, "loss": 0.3136, "step": 13492 }, { "epoch": 0.62, "grad_norm": 0.44540652266741043, "learning_rate": 6.669436237522223e-06, "loss": 0.1861, "step": 13493 }, { "epoch": 0.62, "grad_norm": 0.45641235260931867, "learning_rate": 6.6680333050681535e-06, "loss": 0.3255, "step": 13494 }, { "epoch": 0.62, "grad_norm": 0.36160167808414756, "learning_rate": 6.666630446380614e-06, "loss": 0.2789, "step": 13495 }, { "epoch": 0.62, "grad_norm": 0.42847561708183446, "learning_rate": 6.665227661490664e-06, "loss": 0.1141, "step": 13496 }, { "epoch": 0.62, "grad_norm": 0.34679169113342967, "learning_rate": 6.6638249504293565e-06, "loss": 0.2504, "step": 13497 }, { "epoch": 0.62, "grad_norm": 0.41407911669513575, "learning_rate": 6.662422313227751e-06, "loss": 0.2759, "step": 13498 }, { "epoch": 0.62, "grad_norm": 0.4150506483783012, "learning_rate": 6.661019749916899e-06, "loss": 0.2941, "step": 13499 }, { "epoch": 0.62, "grad_norm": 0.2773672562255504, "learning_rate": 6.659617260527855e-06, "loss": 0.2424, "step": 13500 }, { "epoch": 0.62, "grad_norm": 1.2838819425827355, "learning_rate": 6.658214845091664e-06, "loss": 0.6325, "step": 13501 }, { "epoch": 0.62, "grad_norm": 0.5909897065863796, "learning_rate": 6.65681250363937e-06, "loss": 0.3172, "step": 13502 }, { "epoch": 0.62, "grad_norm": 0.2537293165029303, "learning_rate": 6.655410236202029e-06, "loss": 0.2182, "step": 13503 }, { "epoch": 0.62, "grad_norm": 1.2197102302697673, "learning_rate": 6.654008042810682e-06, "loss": 0.802, "step": 13504 }, { "epoch": 0.62, "grad_norm": 0.5073364028729574, "learning_rate": 6.652605923496372e-06, "loss": 0.326, "step": 13505 }, { "epoch": 0.62, "grad_norm": 0.28322402275180153, "learning_rate": 6.651203878290139e-06, "loss": 0.1886, "step": 13506 }, { "epoch": 0.62, "grad_norm": 0.39249863478245, "learning_rate": 6.649801907223026e-06, "loss": 0.3182, "step": 13507 }, { "epoch": 0.62, "grad_norm": 0.6417425941291718, "learning_rate": 6.648400010326071e-06, "loss": 0.3536, "step": 13508 }, { "epoch": 0.62, "grad_norm": 0.2937066946165478, "learning_rate": 6.646998187630308e-06, "loss": 0.1727, "step": 13509 }, { "epoch": 0.62, "grad_norm": 0.7515154896469476, "learning_rate": 6.645596439166771e-06, "loss": 0.4306, "step": 13510 }, { "epoch": 0.62, "grad_norm": 0.3881477218260446, "learning_rate": 6.644194764966499e-06, "loss": 0.2987, "step": 13511 }, { "epoch": 0.62, "grad_norm": 0.34565792456014566, "learning_rate": 6.642793165060521e-06, "loss": 0.2906, "step": 13512 }, { "epoch": 0.62, "grad_norm": 0.39395375236212493, "learning_rate": 6.6413916394798665e-06, "loss": 0.1328, "step": 13513 }, { "epoch": 0.62, "grad_norm": 0.8183444754834605, "learning_rate": 6.639990188255559e-06, "loss": 0.3757, "step": 13514 }, { "epoch": 0.62, "grad_norm": 0.3037058313122601, "learning_rate": 6.638588811418635e-06, "loss": 0.2288, "step": 13515 }, { "epoch": 0.62, "grad_norm": 0.3585138180864113, "learning_rate": 6.6371875090001136e-06, "loss": 0.2639, "step": 13516 }, { "epoch": 0.62, "grad_norm": 0.6925361484520756, "learning_rate": 6.6357862810310215e-06, "loss": 0.342, "step": 13517 }, { "epoch": 0.62, "grad_norm": 0.2779713093908614, "learning_rate": 6.63438512754238e-06, "loss": 0.2057, "step": 13518 }, { "epoch": 0.62, "grad_norm": 0.3914515420958655, "learning_rate": 6.632984048565202e-06, "loss": 0.2587, "step": 13519 }, { "epoch": 0.62, "grad_norm": 0.8447351147748743, "learning_rate": 6.631583044130516e-06, "loss": 0.3886, "step": 13520 }, { "epoch": 0.62, "grad_norm": 0.40104828827966543, "learning_rate": 6.630182114269334e-06, "loss": 0.27, "step": 13521 }, { "epoch": 0.62, "grad_norm": 0.7451099333818421, "learning_rate": 6.628781259012673e-06, "loss": 0.3169, "step": 13522 }, { "epoch": 0.62, "grad_norm": 0.343501669260469, "learning_rate": 6.627380478391543e-06, "loss": 0.2704, "step": 13523 }, { "epoch": 0.62, "grad_norm": 0.38434460145562876, "learning_rate": 6.625979772436961e-06, "loss": 0.3096, "step": 13524 }, { "epoch": 0.62, "grad_norm": 0.33125826989359125, "learning_rate": 6.624579141179937e-06, "loss": 0.2018, "step": 13525 }, { "epoch": 0.62, "grad_norm": 0.5176780281028744, "learning_rate": 6.623178584651475e-06, "loss": 0.1098, "step": 13526 }, { "epoch": 0.62, "grad_norm": 0.4105483439709261, "learning_rate": 6.6217781028825815e-06, "loss": 0.3069, "step": 13527 }, { "epoch": 0.62, "grad_norm": 0.4091475743802213, "learning_rate": 6.620377695904267e-06, "loss": 0.3014, "step": 13528 }, { "epoch": 0.62, "grad_norm": 0.5346846208175418, "learning_rate": 6.618977363747538e-06, "loss": 0.313, "step": 13529 }, { "epoch": 0.62, "grad_norm": 0.39864704552911057, "learning_rate": 6.617577106443387e-06, "loss": 0.2877, "step": 13530 }, { "epoch": 0.62, "grad_norm": 0.24378561639885932, "learning_rate": 6.61617692402282e-06, "loss": 0.2284, "step": 13531 }, { "epoch": 0.62, "grad_norm": 0.6434848560694352, "learning_rate": 6.61477681651683e-06, "loss": 0.1386, "step": 13532 }, { "epoch": 0.62, "grad_norm": 0.3822749425931364, "learning_rate": 6.613376783956423e-06, "loss": 0.292, "step": 13533 }, { "epoch": 0.62, "grad_norm": 0.6954658844577782, "learning_rate": 6.61197682637259e-06, "loss": 0.4242, "step": 13534 }, { "epoch": 0.62, "grad_norm": 0.3236155639502606, "learning_rate": 6.610576943796325e-06, "loss": 0.2222, "step": 13535 }, { "epoch": 0.62, "grad_norm": 0.3741457286097807, "learning_rate": 6.609177136258618e-06, "loss": 0.2939, "step": 13536 }, { "epoch": 0.62, "grad_norm": 0.2875022439771004, "learning_rate": 6.60777740379046e-06, "loss": 0.1681, "step": 13537 }, { "epoch": 0.62, "grad_norm": 0.7205991992527593, "learning_rate": 6.606377746422845e-06, "loss": 0.4434, "step": 13538 }, { "epoch": 0.62, "grad_norm": 0.27628887701497773, "learning_rate": 6.604978164186752e-06, "loss": 0.2185, "step": 13539 }, { "epoch": 0.62, "grad_norm": 0.6922191734043246, "learning_rate": 6.603578657113172e-06, "loss": 0.4262, "step": 13540 }, { "epoch": 0.62, "grad_norm": 1.2247751713094377, "learning_rate": 6.602179225233088e-06, "loss": 0.768, "step": 13541 }, { "epoch": 0.62, "grad_norm": 0.3359134785710888, "learning_rate": 6.600779868577479e-06, "loss": 0.182, "step": 13542 }, { "epoch": 0.62, "grad_norm": 0.2462573463784811, "learning_rate": 6.599380587177329e-06, "loss": 0.2157, "step": 13543 }, { "epoch": 0.62, "grad_norm": 0.6638396313500866, "learning_rate": 6.597981381063612e-06, "loss": 0.297, "step": 13544 }, { "epoch": 0.62, "grad_norm": 0.37277797544143787, "learning_rate": 6.59658225026731e-06, "loss": 0.2258, "step": 13545 }, { "epoch": 0.62, "grad_norm": 0.7964061584648231, "learning_rate": 6.595183194819399e-06, "loss": 0.4656, "step": 13546 }, { "epoch": 0.62, "grad_norm": 0.32931497769222684, "learning_rate": 6.5937842147508515e-06, "loss": 0.2863, "step": 13547 }, { "epoch": 0.62, "grad_norm": 0.3948796101686391, "learning_rate": 6.5923853100926375e-06, "loss": 0.2909, "step": 13548 }, { "epoch": 0.62, "grad_norm": 0.2518172731649503, "learning_rate": 6.590986480875723e-06, "loss": 0.1136, "step": 13549 }, { "epoch": 0.62, "grad_norm": 0.804080672849914, "learning_rate": 6.589587727131086e-06, "loss": 0.4448, "step": 13550 }, { "epoch": 0.62, "grad_norm": 0.28545152414040403, "learning_rate": 6.58818904888969e-06, "loss": 0.2574, "step": 13551 }, { "epoch": 0.62, "grad_norm": 0.4943696904867765, "learning_rate": 6.586790446182501e-06, "loss": 0.2902, "step": 13552 }, { "epoch": 0.62, "grad_norm": 1.3112347802196245, "learning_rate": 6.58539191904048e-06, "loss": 0.7529, "step": 13553 }, { "epoch": 0.62, "grad_norm": 0.33234850837298635, "learning_rate": 6.583993467494592e-06, "loss": 0.2774, "step": 13554 }, { "epoch": 0.62, "grad_norm": 0.261054235047595, "learning_rate": 6.5825950915757964e-06, "loss": 0.201, "step": 13555 }, { "epoch": 0.62, "grad_norm": 0.5538101293883746, "learning_rate": 6.581196791315052e-06, "loss": 0.3016, "step": 13556 }, { "epoch": 0.62, "grad_norm": 0.3450811139550611, "learning_rate": 6.579798566743314e-06, "loss": 0.2522, "step": 13557 }, { "epoch": 0.62, "grad_norm": 1.0680655851367749, "learning_rate": 6.578400417891539e-06, "loss": 0.3974, "step": 13558 }, { "epoch": 0.62, "grad_norm": 0.38586406433553, "learning_rate": 6.577002344790684e-06, "loss": 0.3192, "step": 13559 }, { "epoch": 0.62, "grad_norm": 0.3513168492728097, "learning_rate": 6.575604347471696e-06, "loss": 0.2524, "step": 13560 }, { "epoch": 0.62, "grad_norm": 1.1187370232006462, "learning_rate": 6.574206425965528e-06, "loss": 0.4263, "step": 13561 }, { "epoch": 0.62, "grad_norm": 0.32255015788801844, "learning_rate": 6.572808580303124e-06, "loss": 0.2365, "step": 13562 }, { "epoch": 0.62, "grad_norm": 0.3814831841104404, "learning_rate": 6.571410810515439e-06, "loss": 0.229, "step": 13563 }, { "epoch": 0.62, "grad_norm": 0.3654973383307629, "learning_rate": 6.570013116633413e-06, "loss": 0.2554, "step": 13564 }, { "epoch": 0.62, "grad_norm": 0.5656393675167831, "learning_rate": 6.568615498687993e-06, "loss": 0.2579, "step": 13565 }, { "epoch": 0.62, "grad_norm": 0.40017997220653473, "learning_rate": 6.567217956710115e-06, "loss": 0.2584, "step": 13566 }, { "epoch": 0.62, "grad_norm": 0.3513324958219387, "learning_rate": 6.565820490730725e-06, "loss": 0.2933, "step": 13567 }, { "epoch": 0.62, "grad_norm": 0.4540643968902497, "learning_rate": 6.564423100780759e-06, "loss": 0.2126, "step": 13568 }, { "epoch": 0.62, "grad_norm": 0.2592219254927489, "learning_rate": 6.563025786891155e-06, "loss": 0.2073, "step": 13569 }, { "epoch": 0.62, "grad_norm": 1.2120595394033797, "learning_rate": 6.561628549092844e-06, "loss": 0.7161, "step": 13570 }, { "epoch": 0.62, "grad_norm": 0.3368745478916684, "learning_rate": 6.560231387416766e-06, "loss": 0.2568, "step": 13571 }, { "epoch": 0.62, "grad_norm": 0.38017450052947815, "learning_rate": 6.558834301893851e-06, "loss": 0.291, "step": 13572 }, { "epoch": 0.62, "grad_norm": 0.7528033112160798, "learning_rate": 6.557437292555027e-06, "loss": 0.3998, "step": 13573 }, { "epoch": 0.62, "grad_norm": 0.4660636674683085, "learning_rate": 6.556040359431219e-06, "loss": 0.3625, "step": 13574 }, { "epoch": 0.62, "grad_norm": 0.22445241674572333, "learning_rate": 6.554643502553365e-06, "loss": 0.1799, "step": 13575 }, { "epoch": 0.62, "grad_norm": 0.44690457393576005, "learning_rate": 6.55324672195238e-06, "loss": 0.2624, "step": 13576 }, { "epoch": 0.62, "grad_norm": 1.189648060758862, "learning_rate": 6.551850017659192e-06, "loss": 0.6777, "step": 13577 }, { "epoch": 0.62, "grad_norm": 0.41413053172059355, "learning_rate": 6.550453389704721e-06, "loss": 0.2021, "step": 13578 }, { "epoch": 0.62, "grad_norm": 0.4623761277410905, "learning_rate": 6.5490568381198815e-06, "loss": 0.3202, "step": 13579 }, { "epoch": 0.62, "grad_norm": 0.49373473530849976, "learning_rate": 6.547660362935603e-06, "loss": 0.3176, "step": 13580 }, { "epoch": 0.62, "grad_norm": 0.25492930925364227, "learning_rate": 6.546263964182796e-06, "loss": 0.1377, "step": 13581 }, { "epoch": 0.62, "grad_norm": 0.5759292252415109, "learning_rate": 6.544867641892376e-06, "loss": 0.443, "step": 13582 }, { "epoch": 0.62, "grad_norm": 0.37642791123404024, "learning_rate": 6.543471396095256e-06, "loss": 0.2965, "step": 13583 }, { "epoch": 0.62, "grad_norm": 0.3966915695437229, "learning_rate": 6.542075226822349e-06, "loss": 0.1939, "step": 13584 }, { "epoch": 0.62, "grad_norm": 0.47137929420304037, "learning_rate": 6.540679134104562e-06, "loss": 0.3519, "step": 13585 }, { "epoch": 0.62, "grad_norm": 0.515896333962505, "learning_rate": 6.539283117972805e-06, "loss": 0.3161, "step": 13586 }, { "epoch": 0.62, "grad_norm": 0.32782025648902746, "learning_rate": 6.537887178457984e-06, "loss": 0.2537, "step": 13587 }, { "epoch": 0.62, "grad_norm": 0.21341923500148452, "learning_rate": 6.536491315591006e-06, "loss": 0.1504, "step": 13588 }, { "epoch": 0.62, "grad_norm": 0.8379827003431878, "learning_rate": 6.5350955294027715e-06, "loss": 0.4556, "step": 13589 }, { "epoch": 0.62, "grad_norm": 0.4327063589434647, "learning_rate": 6.533699819924182e-06, "loss": 0.3112, "step": 13590 }, { "epoch": 0.62, "grad_norm": 0.3309764422731282, "learning_rate": 6.532304187186138e-06, "loss": 0.2564, "step": 13591 }, { "epoch": 0.62, "grad_norm": 1.1003021709932568, "learning_rate": 6.530908631219533e-06, "loss": 0.5603, "step": 13592 }, { "epoch": 0.62, "grad_norm": 0.3220238624452519, "learning_rate": 6.5295131520552725e-06, "loss": 0.2231, "step": 13593 }, { "epoch": 0.62, "grad_norm": 0.5519985755696906, "learning_rate": 6.528117749724248e-06, "loss": 0.294, "step": 13594 }, { "epoch": 0.62, "grad_norm": 0.380345351740475, "learning_rate": 6.526722424257346e-06, "loss": 0.3062, "step": 13595 }, { "epoch": 0.62, "grad_norm": 0.3367410491272575, "learning_rate": 6.525327175685459e-06, "loss": 0.2723, "step": 13596 }, { "epoch": 0.62, "grad_norm": 0.38463427642600034, "learning_rate": 6.5239320040394836e-06, "loss": 0.1588, "step": 13597 }, { "epoch": 0.62, "grad_norm": 0.3956691344379368, "learning_rate": 6.522536909350303e-06, "loss": 0.2897, "step": 13598 }, { "epoch": 0.62, "grad_norm": 0.6990117335351764, "learning_rate": 6.521141891648804e-06, "loss": 0.3605, "step": 13599 }, { "epoch": 0.62, "grad_norm": 0.29000361808172953, "learning_rate": 6.519746950965868e-06, "loss": 0.2359, "step": 13600 }, { "epoch": 0.62, "grad_norm": 0.5730095770691935, "learning_rate": 6.518352087332384e-06, "loss": 0.323, "step": 13601 }, { "epoch": 0.62, "grad_norm": 0.41486784338328975, "learning_rate": 6.516957300779227e-06, "loss": 0.3106, "step": 13602 }, { "epoch": 0.62, "grad_norm": 0.2895026703735662, "learning_rate": 6.515562591337279e-06, "loss": 0.2512, "step": 13603 }, { "epoch": 0.62, "grad_norm": 1.1280972075387121, "learning_rate": 6.514167959037415e-06, "loss": 0.3003, "step": 13604 }, { "epoch": 0.62, "grad_norm": 0.6038107088624585, "learning_rate": 6.5127734039105154e-06, "loss": 0.3317, "step": 13605 }, { "epoch": 0.63, "grad_norm": 0.3556698684340373, "learning_rate": 6.51137892598745e-06, "loss": 0.2905, "step": 13606 }, { "epoch": 0.63, "grad_norm": 0.5091940594138323, "learning_rate": 6.509984525299094e-06, "loss": 0.2775, "step": 13607 }, { "epoch": 0.63, "grad_norm": 0.44259149965369876, "learning_rate": 6.508590201876317e-06, "loss": 0.3178, "step": 13608 }, { "epoch": 0.63, "grad_norm": 0.2651758019393575, "learning_rate": 6.507195955749983e-06, "loss": 0.1761, "step": 13609 }, { "epoch": 0.63, "grad_norm": 0.3843186926675106, "learning_rate": 6.505801786950969e-06, "loss": 0.2547, "step": 13610 }, { "epoch": 0.63, "grad_norm": 0.4165351540492922, "learning_rate": 6.504407695510135e-06, "loss": 0.2513, "step": 13611 }, { "epoch": 0.63, "grad_norm": 0.5717073071296611, "learning_rate": 6.5030136814583475e-06, "loss": 0.3512, "step": 13612 }, { "epoch": 0.63, "grad_norm": 0.9462675654888323, "learning_rate": 6.501619744826462e-06, "loss": 0.3942, "step": 13613 }, { "epoch": 0.63, "grad_norm": 0.2596385621002387, "learning_rate": 6.500225885645346e-06, "loss": 0.2158, "step": 13614 }, { "epoch": 0.63, "grad_norm": 0.2650466362583132, "learning_rate": 6.498832103945857e-06, "loss": 0.1827, "step": 13615 }, { "epoch": 0.63, "grad_norm": 1.2108509039715882, "learning_rate": 6.49743839975885e-06, "loss": 0.4793, "step": 13616 }, { "epoch": 0.63, "grad_norm": 0.2804825833058116, "learning_rate": 6.4960447731151785e-06, "loss": 0.1193, "step": 13617 }, { "epoch": 0.63, "grad_norm": 0.38142806029102955, "learning_rate": 6.494651224045702e-06, "loss": 0.2884, "step": 13618 }, { "epoch": 0.63, "grad_norm": 0.6542733355473932, "learning_rate": 6.493257752581268e-06, "loss": 0.3421, "step": 13619 }, { "epoch": 0.63, "grad_norm": 0.30062518581652486, "learning_rate": 6.491864358752728e-06, "loss": 0.1144, "step": 13620 }, { "epoch": 0.63, "grad_norm": 0.2915309033095364, "learning_rate": 6.490471042590929e-06, "loss": 0.2113, "step": 13621 }, { "epoch": 0.63, "grad_norm": 0.3781819429036915, "learning_rate": 6.489077804126717e-06, "loss": 0.3169, "step": 13622 }, { "epoch": 0.63, "grad_norm": 0.24554877796376093, "learning_rate": 6.487684643390941e-06, "loss": 0.1041, "step": 13623 }, { "epoch": 0.63, "grad_norm": 0.3710530595122849, "learning_rate": 6.486291560414441e-06, "loss": 0.3026, "step": 13624 }, { "epoch": 0.63, "grad_norm": 0.7890588175173323, "learning_rate": 6.48489855522806e-06, "loss": 0.4993, "step": 13625 }, { "epoch": 0.63, "grad_norm": 0.3263755436709602, "learning_rate": 6.483505627862632e-06, "loss": 0.2909, "step": 13626 }, { "epoch": 0.63, "grad_norm": 0.26265339357003836, "learning_rate": 6.482112778349005e-06, "loss": 0.1732, "step": 13627 }, { "epoch": 0.63, "grad_norm": 0.4774459288676462, "learning_rate": 6.480720006718011e-06, "loss": 0.2661, "step": 13628 }, { "epoch": 0.63, "grad_norm": 0.5910513082801484, "learning_rate": 6.479327313000483e-06, "loss": 0.3357, "step": 13629 }, { "epoch": 0.63, "grad_norm": 0.32338598774722543, "learning_rate": 6.477934697227254e-06, "loss": 0.2462, "step": 13630 }, { "epoch": 0.63, "grad_norm": 0.4450418972773433, "learning_rate": 6.476542159429158e-06, "loss": 0.3512, "step": 13631 }, { "epoch": 0.63, "grad_norm": 0.5839848323571413, "learning_rate": 6.475149699637022e-06, "loss": 0.325, "step": 13632 }, { "epoch": 0.63, "grad_norm": 0.2150093790563464, "learning_rate": 6.473757317881675e-06, "loss": 0.147, "step": 13633 }, { "epoch": 0.63, "grad_norm": 0.37276436347433134, "learning_rate": 6.47236501419394e-06, "loss": 0.2913, "step": 13634 }, { "epoch": 0.63, "grad_norm": 0.7088512573983962, "learning_rate": 6.4709727886046455e-06, "loss": 0.4263, "step": 13635 }, { "epoch": 0.63, "grad_norm": 0.3217880742746967, "learning_rate": 6.4695806411446125e-06, "loss": 0.2284, "step": 13636 }, { "epoch": 0.63, "grad_norm": 1.0747593688227615, "learning_rate": 6.4681885718446624e-06, "loss": 0.6763, "step": 13637 }, { "epoch": 0.63, "grad_norm": 0.37255231144988693, "learning_rate": 6.466796580735611e-06, "loss": 0.2954, "step": 13638 }, { "epoch": 0.63, "grad_norm": 0.3922717309648967, "learning_rate": 6.4654046678482765e-06, "loss": 0.2734, "step": 13639 }, { "epoch": 0.63, "grad_norm": 0.2500497906705041, "learning_rate": 6.4640128332134774e-06, "loss": 0.1114, "step": 13640 }, { "epoch": 0.63, "grad_norm": 0.6240687126576941, "learning_rate": 6.46262107686203e-06, "loss": 0.3444, "step": 13641 }, { "epoch": 0.63, "grad_norm": 0.2746684977192115, "learning_rate": 6.46122939882474e-06, "loss": 0.2448, "step": 13642 }, { "epoch": 0.63, "grad_norm": 1.2084918136604883, "learning_rate": 6.459837799132416e-06, "loss": 0.4871, "step": 13643 }, { "epoch": 0.63, "grad_norm": 0.7224313470811684, "learning_rate": 6.458446277815876e-06, "loss": 0.4363, "step": 13644 }, { "epoch": 0.63, "grad_norm": 0.2301581105292881, "learning_rate": 6.45705483490592e-06, "loss": 0.1827, "step": 13645 }, { "epoch": 0.63, "grad_norm": 0.3500829726178428, "learning_rate": 6.455663470433358e-06, "loss": 0.2475, "step": 13646 }, { "epoch": 0.63, "grad_norm": 0.5678009054775089, "learning_rate": 6.454272184428987e-06, "loss": 0.3846, "step": 13647 }, { "epoch": 0.63, "grad_norm": 0.35479739755105305, "learning_rate": 6.452880976923614e-06, "loss": 0.2848, "step": 13648 }, { "epoch": 0.63, "grad_norm": 0.9924227603975603, "learning_rate": 6.451489847948039e-06, "loss": 0.4607, "step": 13649 }, { "epoch": 0.63, "grad_norm": 0.32170812810026933, "learning_rate": 6.450098797533057e-06, "loss": 0.2713, "step": 13650 }, { "epoch": 0.63, "grad_norm": 0.3945097247760993, "learning_rate": 6.4487078257094685e-06, "loss": 0.2879, "step": 13651 }, { "epoch": 0.63, "grad_norm": 0.33951515707295765, "learning_rate": 6.447316932508063e-06, "loss": 0.2364, "step": 13652 }, { "epoch": 0.63, "grad_norm": 0.7187630399193446, "learning_rate": 6.445926117959638e-06, "loss": 0.2936, "step": 13653 }, { "epoch": 0.63, "grad_norm": 0.28793544651631053, "learning_rate": 6.4445353820949826e-06, "loss": 0.2512, "step": 13654 }, { "epoch": 0.63, "grad_norm": 0.46735270042750343, "learning_rate": 6.4431447249448875e-06, "loss": 0.3717, "step": 13655 }, { "epoch": 0.63, "grad_norm": 1.319575368106341, "learning_rate": 6.441754146540137e-06, "loss": 0.2372, "step": 13656 }, { "epoch": 0.63, "grad_norm": 0.3786108169757764, "learning_rate": 6.4403636469115225e-06, "loss": 0.2575, "step": 13657 }, { "epoch": 0.63, "grad_norm": 0.41239527876538173, "learning_rate": 6.438973226089828e-06, "loss": 0.3107, "step": 13658 }, { "epoch": 0.63, "grad_norm": 0.3007128347499269, "learning_rate": 6.437582884105835e-06, "loss": 0.1245, "step": 13659 }, { "epoch": 0.63, "grad_norm": 0.39411157741329433, "learning_rate": 6.436192620990318e-06, "loss": 0.282, "step": 13660 }, { "epoch": 0.63, "grad_norm": 1.2785307167030902, "learning_rate": 6.434802436774065e-06, "loss": 0.712, "step": 13661 }, { "epoch": 0.63, "grad_norm": 0.33815883677112824, "learning_rate": 6.4334123314878495e-06, "loss": 0.2395, "step": 13662 }, { "epoch": 0.63, "grad_norm": 0.35841567956965986, "learning_rate": 6.4320223051624485e-06, "loss": 0.2777, "step": 13663 }, { "epoch": 0.63, "grad_norm": 0.7329798118941226, "learning_rate": 6.430632357828632e-06, "loss": 0.418, "step": 13664 }, { "epoch": 0.63, "grad_norm": 0.2909518970827277, "learning_rate": 6.429242489517178e-06, "loss": 0.2164, "step": 13665 }, { "epoch": 0.63, "grad_norm": 0.3413314932462295, "learning_rate": 6.427852700258852e-06, "loss": 0.1978, "step": 13666 }, { "epoch": 0.63, "grad_norm": 0.4607850684279235, "learning_rate": 6.4264629900844255e-06, "loss": 0.3709, "step": 13667 }, { "epoch": 0.63, "grad_norm": 0.787700481262018, "learning_rate": 6.425073359024664e-06, "loss": 0.4396, "step": 13668 }, { "epoch": 0.63, "grad_norm": 0.315676722552752, "learning_rate": 6.4236838071103305e-06, "loss": 0.1971, "step": 13669 }, { "epoch": 0.63, "grad_norm": 0.3801232797865906, "learning_rate": 6.4222943343721925e-06, "loss": 0.3284, "step": 13670 }, { "epoch": 0.63, "grad_norm": 0.28457958440644565, "learning_rate": 6.420904940841011e-06, "loss": 0.1725, "step": 13671 }, { "epoch": 0.63, "grad_norm": 0.30231106681913206, "learning_rate": 6.419515626547543e-06, "loss": 0.1964, "step": 13672 }, { "epoch": 0.63, "grad_norm": 0.5394933021987132, "learning_rate": 6.418126391522544e-06, "loss": 0.4365, "step": 13673 }, { "epoch": 0.63, "grad_norm": 0.5144866507395176, "learning_rate": 6.41673723579678e-06, "loss": 0.3224, "step": 13674 }, { "epoch": 0.63, "grad_norm": 0.35353036505905033, "learning_rate": 6.415348159400998e-06, "loss": 0.2745, "step": 13675 }, { "epoch": 0.63, "grad_norm": 0.765089869569537, "learning_rate": 6.4139591623659545e-06, "loss": 0.3172, "step": 13676 }, { "epoch": 0.63, "grad_norm": 0.25564180106080453, "learning_rate": 6.412570244722396e-06, "loss": 0.1862, "step": 13677 }, { "epoch": 0.63, "grad_norm": 0.3249209962482771, "learning_rate": 6.411181406501077e-06, "loss": 0.2713, "step": 13678 }, { "epoch": 0.63, "grad_norm": 0.9521741275109614, "learning_rate": 6.409792647732743e-06, "loss": 0.5482, "step": 13679 }, { "epoch": 0.63, "grad_norm": 0.9310621420367279, "learning_rate": 6.408403968448139e-06, "loss": 0.4427, "step": 13680 }, { "epoch": 0.63, "grad_norm": 0.3400771452829275, "learning_rate": 6.40701536867801e-06, "loss": 0.2632, "step": 13681 }, { "epoch": 0.63, "grad_norm": 0.3742603939816222, "learning_rate": 6.405626848453095e-06, "loss": 0.24, "step": 13682 }, { "epoch": 0.63, "grad_norm": 0.43053178239634077, "learning_rate": 6.40423840780414e-06, "loss": 0.2576, "step": 13683 }, { "epoch": 0.63, "grad_norm": 0.35900300601984575, "learning_rate": 6.402850046761881e-06, "loss": 0.2584, "step": 13684 }, { "epoch": 0.63, "grad_norm": 0.3087640385141549, "learning_rate": 6.401461765357055e-06, "loss": 0.1897, "step": 13685 }, { "epoch": 0.63, "grad_norm": 0.6398156660662767, "learning_rate": 6.400073563620392e-06, "loss": 0.346, "step": 13686 }, { "epoch": 0.63, "grad_norm": 0.40800513509260283, "learning_rate": 6.398685441582639e-06, "loss": 0.2627, "step": 13687 }, { "epoch": 0.63, "grad_norm": 0.5119749047887007, "learning_rate": 6.397297399274516e-06, "loss": 0.3618, "step": 13688 }, { "epoch": 0.63, "grad_norm": 0.3693317921303293, "learning_rate": 6.395909436726755e-06, "loss": 0.2554, "step": 13689 }, { "epoch": 0.63, "grad_norm": 0.4522658965947409, "learning_rate": 6.394521553970083e-06, "loss": 0.2938, "step": 13690 }, { "epoch": 0.63, "grad_norm": 0.3613441109908906, "learning_rate": 6.393133751035232e-06, "loss": 0.2717, "step": 13691 }, { "epoch": 0.63, "grad_norm": 0.7450006655157405, "learning_rate": 6.3917460279529234e-06, "loss": 0.3142, "step": 13692 }, { "epoch": 0.63, "grad_norm": 0.28875317335404266, "learning_rate": 6.390358384753881e-06, "loss": 0.2294, "step": 13693 }, { "epoch": 0.63, "grad_norm": 0.736803354950209, "learning_rate": 6.388970821468823e-06, "loss": 0.2977, "step": 13694 }, { "epoch": 0.63, "grad_norm": 0.8732637761398607, "learning_rate": 6.387583338128471e-06, "loss": 0.2358, "step": 13695 }, { "epoch": 0.63, "grad_norm": 0.36785106754924785, "learning_rate": 6.386195934763544e-06, "loss": 0.2587, "step": 13696 }, { "epoch": 0.63, "grad_norm": 0.9234216359502381, "learning_rate": 6.384808611404755e-06, "loss": 0.4895, "step": 13697 }, { "epoch": 0.63, "grad_norm": 0.34322946877814786, "learning_rate": 6.383421368082818e-06, "loss": 0.2483, "step": 13698 }, { "epoch": 0.63, "grad_norm": 0.3053665024614012, "learning_rate": 6.3820342048284465e-06, "loss": 0.2229, "step": 13699 }, { "epoch": 0.63, "grad_norm": 1.3147600100620773, "learning_rate": 6.380647121672352e-06, "loss": 0.6294, "step": 13700 }, { "epoch": 0.63, "grad_norm": 0.38417229925348206, "learning_rate": 6.3792601186452405e-06, "loss": 0.2841, "step": 13701 }, { "epoch": 0.63, "grad_norm": 0.313920513820115, "learning_rate": 6.377873195777822e-06, "loss": 0.1972, "step": 13702 }, { "epoch": 0.63, "grad_norm": 0.8918396797533942, "learning_rate": 6.376486353100795e-06, "loss": 0.5183, "step": 13703 }, { "epoch": 0.63, "grad_norm": 0.45705671226387096, "learning_rate": 6.375099590644871e-06, "loss": 0.2761, "step": 13704 }, { "epoch": 0.63, "grad_norm": 0.2564019470009641, "learning_rate": 6.373712908440749e-06, "loss": 0.1278, "step": 13705 }, { "epoch": 0.63, "grad_norm": 0.33196542045305477, "learning_rate": 6.37232630651913e-06, "loss": 0.2766, "step": 13706 }, { "epoch": 0.63, "grad_norm": 1.1164767911733577, "learning_rate": 6.370939784910706e-06, "loss": 0.6048, "step": 13707 }, { "epoch": 0.63, "grad_norm": 0.35332186116560255, "learning_rate": 6.369553343646178e-06, "loss": 0.1713, "step": 13708 }, { "epoch": 0.63, "grad_norm": 0.3947149481265831, "learning_rate": 6.368166982756243e-06, "loss": 0.3179, "step": 13709 }, { "epoch": 0.63, "grad_norm": 0.5182953904646964, "learning_rate": 6.366780702271589e-06, "loss": 0.355, "step": 13710 }, { "epoch": 0.63, "grad_norm": 0.20305948052983017, "learning_rate": 6.365394502222909e-06, "loss": 0.1249, "step": 13711 }, { "epoch": 0.63, "grad_norm": 0.5406257565557732, "learning_rate": 6.364008382640889e-06, "loss": 0.3672, "step": 13712 }, { "epoch": 0.63, "grad_norm": 0.3770059422882819, "learning_rate": 6.362622343556222e-06, "loss": 0.2953, "step": 13713 }, { "epoch": 0.63, "grad_norm": 0.4272615762919037, "learning_rate": 6.3612363849995895e-06, "loss": 0.276, "step": 13714 }, { "epoch": 0.63, "grad_norm": 0.4510293751241945, "learning_rate": 6.359850507001677e-06, "loss": 0.2646, "step": 13715 }, { "epoch": 0.63, "grad_norm": 0.8202970601738923, "learning_rate": 6.358464709593164e-06, "loss": 0.5591, "step": 13716 }, { "epoch": 0.63, "grad_norm": 0.2665364364650663, "learning_rate": 6.357078992804735e-06, "loss": 0.2033, "step": 13717 }, { "epoch": 0.63, "grad_norm": 0.2690265011818245, "learning_rate": 6.3556933566670656e-06, "loss": 0.1883, "step": 13718 }, { "epoch": 0.63, "grad_norm": 1.0311816113904653, "learning_rate": 6.3543078012108325e-06, "loss": 0.5251, "step": 13719 }, { "epoch": 0.63, "grad_norm": 0.5303796504390793, "learning_rate": 6.352922326466706e-06, "loss": 0.2914, "step": 13720 }, { "epoch": 0.63, "grad_norm": 0.33378868077554313, "learning_rate": 6.35153693246537e-06, "loss": 0.261, "step": 13721 }, { "epoch": 0.63, "grad_norm": 0.49427344656861777, "learning_rate": 6.350151619237489e-06, "loss": 0.4257, "step": 13722 }, { "epoch": 0.63, "grad_norm": 0.6792563770797668, "learning_rate": 6.348766386813734e-06, "loss": 0.3144, "step": 13723 }, { "epoch": 0.63, "grad_norm": 0.2346819912657847, "learning_rate": 6.347381235224769e-06, "loss": 0.1538, "step": 13724 }, { "epoch": 0.63, "grad_norm": 0.40565107886905327, "learning_rate": 6.345996164501265e-06, "loss": 0.3201, "step": 13725 }, { "epoch": 0.63, "grad_norm": 0.5445717105696417, "learning_rate": 6.3446111746738845e-06, "loss": 0.2953, "step": 13726 }, { "epoch": 0.63, "grad_norm": 0.3654269738542627, "learning_rate": 6.34322626577329e-06, "loss": 0.2966, "step": 13727 }, { "epoch": 0.63, "grad_norm": 1.1138406636687501, "learning_rate": 6.341841437830141e-06, "loss": 0.4359, "step": 13728 }, { "epoch": 0.63, "grad_norm": 0.3242864616456861, "learning_rate": 6.340456690875095e-06, "loss": 0.2549, "step": 13729 }, { "epoch": 0.63, "grad_norm": 0.26193009627805075, "learning_rate": 6.3390720249388125e-06, "loss": 0.2096, "step": 13730 }, { "epoch": 0.63, "grad_norm": 0.9040563962700809, "learning_rate": 6.337687440051947e-06, "loss": 0.3076, "step": 13731 }, { "epoch": 0.63, "grad_norm": 0.5394438846851447, "learning_rate": 6.336302936245154e-06, "loss": 0.364, "step": 13732 }, { "epoch": 0.63, "grad_norm": 0.3342277310675126, "learning_rate": 6.334918513549075e-06, "loss": 0.276, "step": 13733 }, { "epoch": 0.63, "grad_norm": 0.45667063085563714, "learning_rate": 6.333534171994375e-06, "loss": 0.2553, "step": 13734 }, { "epoch": 0.63, "grad_norm": 0.41788456258604867, "learning_rate": 6.3321499116116935e-06, "loss": 0.2517, "step": 13735 }, { "epoch": 0.63, "grad_norm": 0.26932353338718423, "learning_rate": 6.330765732431679e-06, "loss": 0.1857, "step": 13736 }, { "epoch": 0.63, "grad_norm": 0.45766510743207534, "learning_rate": 6.329381634484968e-06, "loss": 0.2537, "step": 13737 }, { "epoch": 0.63, "grad_norm": 0.7010241701619224, "learning_rate": 6.327997617802215e-06, "loss": 0.3513, "step": 13738 }, { "epoch": 0.63, "grad_norm": 0.41947279795343034, "learning_rate": 6.326613682414056e-06, "loss": 0.3073, "step": 13739 }, { "epoch": 0.63, "grad_norm": 1.1480612882879153, "learning_rate": 6.325229828351129e-06, "loss": 0.679, "step": 13740 }, { "epoch": 0.63, "grad_norm": 0.32480684195167947, "learning_rate": 6.32384605564407e-06, "loss": 0.2077, "step": 13741 }, { "epoch": 0.63, "grad_norm": 0.24007822422054026, "learning_rate": 6.322462364323519e-06, "loss": 0.1948, "step": 13742 }, { "epoch": 0.63, "grad_norm": 0.694605311345928, "learning_rate": 6.321078754420105e-06, "loss": 0.4073, "step": 13743 }, { "epoch": 0.63, "grad_norm": 0.5355223471537685, "learning_rate": 6.319695225964463e-06, "loss": 0.1449, "step": 13744 }, { "epoch": 0.63, "grad_norm": 0.33674014777401, "learning_rate": 6.318311778987221e-06, "loss": 0.272, "step": 13745 }, { "epoch": 0.63, "grad_norm": 1.295079136532708, "learning_rate": 6.316928413519006e-06, "loss": 0.7799, "step": 13746 }, { "epoch": 0.63, "grad_norm": 0.5217432078256229, "learning_rate": 6.315545129590448e-06, "loss": 0.1773, "step": 13747 }, { "epoch": 0.63, "grad_norm": 0.3231974442890065, "learning_rate": 6.314161927232169e-06, "loss": 0.2664, "step": 13748 }, { "epoch": 0.63, "grad_norm": 0.38828426320675563, "learning_rate": 6.312778806474795e-06, "loss": 0.3129, "step": 13749 }, { "epoch": 0.63, "grad_norm": 0.3571371151129754, "learning_rate": 6.311395767348938e-06, "loss": 0.1247, "step": 13750 }, { "epoch": 0.63, "grad_norm": 0.42979275131340433, "learning_rate": 6.310012809885229e-06, "loss": 0.3204, "step": 13751 }, { "epoch": 0.63, "grad_norm": 1.4035874272265616, "learning_rate": 6.308629934114279e-06, "loss": 0.7434, "step": 13752 }, { "epoch": 0.63, "grad_norm": 0.34702872092395576, "learning_rate": 6.307247140066705e-06, "loss": 0.279, "step": 13753 }, { "epoch": 0.63, "grad_norm": 0.38003206886919433, "learning_rate": 6.305864427773117e-06, "loss": 0.2108, "step": 13754 }, { "epoch": 0.63, "grad_norm": 0.4165638893636752, "learning_rate": 6.304481797264132e-06, "loss": 0.2653, "step": 13755 }, { "epoch": 0.63, "grad_norm": 0.36245159531049326, "learning_rate": 6.303099248570361e-06, "loss": 0.2321, "step": 13756 }, { "epoch": 0.63, "grad_norm": 0.2774919465268556, "learning_rate": 6.301716781722406e-06, "loss": 0.2381, "step": 13757 }, { "epoch": 0.63, "grad_norm": 1.2276952956324017, "learning_rate": 6.30033439675088e-06, "loss": 0.7805, "step": 13758 }, { "epoch": 0.63, "grad_norm": 0.8940534217727629, "learning_rate": 6.298952093686381e-06, "loss": 0.4074, "step": 13759 }, { "epoch": 0.63, "grad_norm": 0.3456620895163242, "learning_rate": 6.297569872559519e-06, "loss": 0.2209, "step": 13760 }, { "epoch": 0.63, "grad_norm": 0.32388983657721465, "learning_rate": 6.29618773340089e-06, "loss": 0.257, "step": 13761 }, { "epoch": 0.63, "grad_norm": 0.43187005562652625, "learning_rate": 6.294805676241096e-06, "loss": 0.2125, "step": 13762 }, { "epoch": 0.63, "grad_norm": 0.327565988280973, "learning_rate": 6.29342370111073e-06, "loss": 0.2117, "step": 13763 }, { "epoch": 0.63, "grad_norm": 1.3101844022331988, "learning_rate": 6.292041808040393e-06, "loss": 0.5824, "step": 13764 }, { "epoch": 0.63, "grad_norm": 0.3630391763503653, "learning_rate": 6.2906599970606774e-06, "loss": 0.2898, "step": 13765 }, { "epoch": 0.63, "grad_norm": 0.4119547461254126, "learning_rate": 6.2892782682021745e-06, "loss": 0.2975, "step": 13766 }, { "epoch": 0.63, "grad_norm": 1.0186738941013866, "learning_rate": 6.2878966214954684e-06, "loss": 0.2993, "step": 13767 }, { "epoch": 0.63, "grad_norm": 0.25555927257468164, "learning_rate": 6.286515056971158e-06, "loss": 0.2118, "step": 13768 }, { "epoch": 0.63, "grad_norm": 0.3033190996462018, "learning_rate": 6.285133574659827e-06, "loss": 0.2639, "step": 13769 }, { "epoch": 0.63, "grad_norm": 1.2839030349428027, "learning_rate": 6.283752174592057e-06, "loss": 0.3702, "step": 13770 }, { "epoch": 0.63, "grad_norm": 0.581101276380584, "learning_rate": 6.28237085679843e-06, "loss": 0.3424, "step": 13771 }, { "epoch": 0.63, "grad_norm": 0.397314944956453, "learning_rate": 6.280989621309531e-06, "loss": 0.2929, "step": 13772 }, { "epoch": 0.63, "grad_norm": 0.3448589704186857, "learning_rate": 6.279608468155938e-06, "loss": 0.2678, "step": 13773 }, { "epoch": 0.63, "grad_norm": 0.2404728873630171, "learning_rate": 6.278227397368227e-06, "loss": 0.1757, "step": 13774 }, { "epoch": 0.63, "grad_norm": 0.38915532673048614, "learning_rate": 6.276846408976975e-06, "loss": 0.2954, "step": 13775 }, { "epoch": 0.63, "grad_norm": 0.45847900170359007, "learning_rate": 6.275465503012752e-06, "loss": 0.2712, "step": 13776 }, { "epoch": 0.63, "grad_norm": 0.47792238292786554, "learning_rate": 6.274084679506136e-06, "loss": 0.3043, "step": 13777 }, { "epoch": 0.63, "grad_norm": 0.464636138439792, "learning_rate": 6.272703938487694e-06, "loss": 0.2577, "step": 13778 }, { "epoch": 0.63, "grad_norm": 1.1589077901843, "learning_rate": 6.271323279987995e-06, "loss": 0.4516, "step": 13779 }, { "epoch": 0.63, "grad_norm": 0.2787768114311294, "learning_rate": 6.2699427040376e-06, "loss": 0.2013, "step": 13780 }, { "epoch": 0.63, "grad_norm": 0.349771309466261, "learning_rate": 6.268562210667084e-06, "loss": 0.276, "step": 13781 }, { "epoch": 0.63, "grad_norm": 0.5176736161197101, "learning_rate": 6.2671817999070025e-06, "loss": 0.2696, "step": 13782 }, { "epoch": 0.63, "grad_norm": 0.8468243367785466, "learning_rate": 6.265801471787919e-06, "loss": 0.3251, "step": 13783 }, { "epoch": 0.63, "grad_norm": 0.35129874440924846, "learning_rate": 6.264421226340387e-06, "loss": 0.2546, "step": 13784 }, { "epoch": 0.63, "grad_norm": 0.3759174751782755, "learning_rate": 6.263041063594973e-06, "loss": 0.2953, "step": 13785 }, { "epoch": 0.63, "grad_norm": 0.7570133406425583, "learning_rate": 6.261660983582229e-06, "loss": 0.265, "step": 13786 }, { "epoch": 0.63, "grad_norm": 0.4235969342607545, "learning_rate": 6.260280986332707e-06, "loss": 0.2966, "step": 13787 }, { "epoch": 0.63, "grad_norm": 0.5807382242288651, "learning_rate": 6.258901071876959e-06, "loss": 0.2806, "step": 13788 }, { "epoch": 0.63, "grad_norm": 0.3389385748791269, "learning_rate": 6.257521240245534e-06, "loss": 0.2628, "step": 13789 }, { "epoch": 0.63, "grad_norm": 0.3252777992767435, "learning_rate": 6.2561414914689835e-06, "loss": 0.1831, "step": 13790 }, { "epoch": 0.63, "grad_norm": 1.0098525961729672, "learning_rate": 6.254761825577853e-06, "loss": 0.5458, "step": 13791 }, { "epoch": 0.63, "grad_norm": 0.3968447798630403, "learning_rate": 6.253382242602685e-06, "loss": 0.3164, "step": 13792 }, { "epoch": 0.63, "grad_norm": 0.29033535383010767, "learning_rate": 6.252002742574021e-06, "loss": 0.1846, "step": 13793 }, { "epoch": 0.63, "grad_norm": 0.7837029670902719, "learning_rate": 6.250623325522407e-06, "loss": 0.3983, "step": 13794 }, { "epoch": 0.63, "grad_norm": 0.3142161876321316, "learning_rate": 6.249243991478377e-06, "loss": 0.1931, "step": 13795 }, { "epoch": 0.63, "grad_norm": 0.3109255037552431, "learning_rate": 6.247864740472471e-06, "loss": 0.1968, "step": 13796 }, { "epoch": 0.63, "grad_norm": 0.3805655914974935, "learning_rate": 6.246485572535219e-06, "loss": 0.3177, "step": 13797 }, { "epoch": 0.63, "grad_norm": 1.1887776219483017, "learning_rate": 6.245106487697163e-06, "loss": 0.6946, "step": 13798 }, { "epoch": 0.63, "grad_norm": 0.32318422530199165, "learning_rate": 6.243727485988833e-06, "loss": 0.2237, "step": 13799 }, { "epoch": 0.63, "grad_norm": 0.6341461435601496, "learning_rate": 6.2423485674407545e-06, "loss": 0.3293, "step": 13800 }, { "epoch": 0.63, "grad_norm": 0.3745215622135042, "learning_rate": 6.240969732083451e-06, "loss": 0.2887, "step": 13801 }, { "epoch": 0.63, "grad_norm": 0.25342319944131736, "learning_rate": 6.2395909799474605e-06, "loss": 0.1946, "step": 13802 }, { "epoch": 0.63, "grad_norm": 1.1935357493032865, "learning_rate": 6.238212311063301e-06, "loss": 0.2767, "step": 13803 }, { "epoch": 0.63, "grad_norm": 0.3708317267370427, "learning_rate": 6.236833725461495e-06, "loss": 0.3173, "step": 13804 }, { "epoch": 0.63, "grad_norm": 0.33766389898660537, "learning_rate": 6.235455223172563e-06, "loss": 0.2804, "step": 13805 }, { "epoch": 0.63, "grad_norm": 0.8228778388406415, "learning_rate": 6.234076804227023e-06, "loss": 0.3091, "step": 13806 }, { "epoch": 0.63, "grad_norm": 0.2565533211124841, "learning_rate": 6.232698468655394e-06, "loss": 0.171, "step": 13807 }, { "epoch": 0.63, "grad_norm": 0.44081102251320176, "learning_rate": 6.23132021648819e-06, "loss": 0.2918, "step": 13808 }, { "epoch": 0.63, "grad_norm": 0.3301721845631173, "learning_rate": 6.2299420477559236e-06, "loss": 0.2423, "step": 13809 }, { "epoch": 0.63, "grad_norm": 0.7226649009552698, "learning_rate": 6.228563962489106e-06, "loss": 0.4122, "step": 13810 }, { "epoch": 0.63, "grad_norm": 0.4437286595349055, "learning_rate": 6.2271859607182485e-06, "loss": 0.2558, "step": 13811 }, { "epoch": 0.63, "grad_norm": 0.32175072605886906, "learning_rate": 6.225808042473857e-06, "loss": 0.2602, "step": 13812 }, { "epoch": 0.63, "grad_norm": 1.2620952097698657, "learning_rate": 6.224430207786438e-06, "loss": 0.5556, "step": 13813 }, { "epoch": 0.63, "grad_norm": 0.22852143156537716, "learning_rate": 6.223052456686492e-06, "loss": 0.147, "step": 13814 }, { "epoch": 0.63, "grad_norm": 0.6108052527491664, "learning_rate": 6.221674789204528e-06, "loss": 0.315, "step": 13815 }, { "epoch": 0.63, "grad_norm": 0.387141918175387, "learning_rate": 6.220297205371044e-06, "loss": 0.2604, "step": 13816 }, { "epoch": 0.63, "grad_norm": 0.39394833829626136, "learning_rate": 6.218919705216535e-06, "loss": 0.2788, "step": 13817 }, { "epoch": 0.63, "grad_norm": 0.9707860431142171, "learning_rate": 6.217542288771502e-06, "loss": 0.509, "step": 13818 }, { "epoch": 0.63, "grad_norm": 0.4125274018699957, "learning_rate": 6.2161649560664305e-06, "loss": 0.1309, "step": 13819 }, { "epoch": 0.63, "grad_norm": 0.2975174398203126, "learning_rate": 6.214787707131825e-06, "loss": 0.2077, "step": 13820 }, { "epoch": 0.63, "grad_norm": 0.3783390283103983, "learning_rate": 6.213410541998171e-06, "loss": 0.2867, "step": 13821 }, { "epoch": 0.63, "grad_norm": 0.7034140413601049, "learning_rate": 6.2120334606959585e-06, "loss": 0.3001, "step": 13822 }, { "epoch": 0.64, "grad_norm": 0.3496260938226288, "learning_rate": 6.2106564632556725e-06, "loss": 0.2778, "step": 13823 }, { "epoch": 0.64, "grad_norm": 0.4982366281086838, "learning_rate": 6.2092795497078005e-06, "loss": 0.4129, "step": 13824 }, { "epoch": 0.64, "grad_norm": 0.38148030233120556, "learning_rate": 6.207902720082828e-06, "loss": 0.2003, "step": 13825 }, { "epoch": 0.64, "grad_norm": 0.3150141927675991, "learning_rate": 6.206525974411233e-06, "loss": 0.1689, "step": 13826 }, { "epoch": 0.64, "grad_norm": 0.4217405810631256, "learning_rate": 6.205149312723493e-06, "loss": 0.2696, "step": 13827 }, { "epoch": 0.64, "grad_norm": 0.4006029610244969, "learning_rate": 6.203772735050096e-06, "loss": 0.3145, "step": 13828 }, { "epoch": 0.64, "grad_norm": 0.33783835907181453, "learning_rate": 6.2023962414215085e-06, "loss": 0.1938, "step": 13829 }, { "epoch": 0.64, "grad_norm": 0.5434171226608157, "learning_rate": 6.201019831868209e-06, "loss": 0.4013, "step": 13830 }, { "epoch": 0.64, "grad_norm": 1.0090888598456478, "learning_rate": 6.199643506420665e-06, "loss": 0.5232, "step": 13831 }, { "epoch": 0.64, "grad_norm": 0.24112058864962477, "learning_rate": 6.198267265109356e-06, "loss": 0.1655, "step": 13832 }, { "epoch": 0.64, "grad_norm": 0.3089787222497205, "learning_rate": 6.196891107964744e-06, "loss": 0.2483, "step": 13833 }, { "epoch": 0.64, "grad_norm": 0.6607546406989968, "learning_rate": 6.195515035017298e-06, "loss": 0.4199, "step": 13834 }, { "epoch": 0.64, "grad_norm": 0.36292554460857707, "learning_rate": 6.194139046297482e-06, "loss": 0.2037, "step": 13835 }, { "epoch": 0.64, "grad_norm": 0.33342794001173687, "learning_rate": 6.192763141835758e-06, "loss": 0.2984, "step": 13836 }, { "epoch": 0.64, "grad_norm": 1.3753346489803266, "learning_rate": 6.19138732166259e-06, "loss": 0.5578, "step": 13837 }, { "epoch": 0.64, "grad_norm": 0.23794930341799886, "learning_rate": 6.190011585808435e-06, "loss": 0.1577, "step": 13838 }, { "epoch": 0.64, "grad_norm": 0.4425281161262238, "learning_rate": 6.188635934303752e-06, "loss": 0.2631, "step": 13839 }, { "epoch": 0.64, "grad_norm": 0.36526958270148396, "learning_rate": 6.187260367178996e-06, "loss": 0.2975, "step": 13840 }, { "epoch": 0.64, "grad_norm": 0.44411900500269885, "learning_rate": 6.185884884464621e-06, "loss": 0.2981, "step": 13841 }, { "epoch": 0.64, "grad_norm": 0.5117546562318249, "learning_rate": 6.1845094861910785e-06, "loss": 0.2673, "step": 13842 }, { "epoch": 0.64, "grad_norm": 1.6357588189948409, "learning_rate": 6.183134172388819e-06, "loss": 0.671, "step": 13843 }, { "epoch": 0.64, "grad_norm": 0.3219142247783382, "learning_rate": 6.181758943088285e-06, "loss": 0.2308, "step": 13844 }, { "epoch": 0.64, "grad_norm": 0.3248482963926374, "learning_rate": 6.180383798319934e-06, "loss": 0.2063, "step": 13845 }, { "epoch": 0.64, "grad_norm": 0.47513926969734743, "learning_rate": 6.1790087381142035e-06, "loss": 0.2781, "step": 13846 }, { "epoch": 0.64, "grad_norm": 0.8153096064493197, "learning_rate": 6.177633762501537e-06, "loss": 0.4888, "step": 13847 }, { "epoch": 0.64, "grad_norm": 0.2788604779574639, "learning_rate": 6.176258871512375e-06, "loss": 0.2307, "step": 13848 }, { "epoch": 0.64, "grad_norm": 1.2597578340547748, "learning_rate": 6.174884065177151e-06, "loss": 0.5809, "step": 13849 }, { "epoch": 0.64, "grad_norm": 0.5159120857930166, "learning_rate": 6.1735093435263115e-06, "loss": 0.2892, "step": 13850 }, { "epoch": 0.64, "grad_norm": 0.4033173649713765, "learning_rate": 6.172134706590287e-06, "loss": 0.2612, "step": 13851 }, { "epoch": 0.64, "grad_norm": 0.27440239769189817, "learning_rate": 6.170760154399511e-06, "loss": 0.2372, "step": 13852 }, { "epoch": 0.64, "grad_norm": 0.30397031180476486, "learning_rate": 6.169385686984413e-06, "loss": 0.1856, "step": 13853 }, { "epoch": 0.64, "grad_norm": 0.445288737964441, "learning_rate": 6.168011304375425e-06, "loss": 0.3277, "step": 13854 }, { "epoch": 0.64, "grad_norm": 1.030096009150229, "learning_rate": 6.166637006602975e-06, "loss": 0.2895, "step": 13855 }, { "epoch": 0.64, "grad_norm": 0.3240408300281562, "learning_rate": 6.165262793697486e-06, "loss": 0.2805, "step": 13856 }, { "epoch": 0.64, "grad_norm": 0.44105739849606385, "learning_rate": 6.16388866568938e-06, "loss": 0.3376, "step": 13857 }, { "epoch": 0.64, "grad_norm": 0.33080055670480035, "learning_rate": 6.162514622609085e-06, "loss": 0.1723, "step": 13858 }, { "epoch": 0.64, "grad_norm": 0.42811216257442963, "learning_rate": 6.161140664487017e-06, "loss": 0.2963, "step": 13859 }, { "epoch": 0.64, "grad_norm": 0.42182911761851494, "learning_rate": 6.159766791353594e-06, "loss": 0.3297, "step": 13860 }, { "epoch": 0.64, "grad_norm": 0.4246334565540588, "learning_rate": 6.15839300323923e-06, "loss": 0.2433, "step": 13861 }, { "epoch": 0.64, "grad_norm": 0.546302453946891, "learning_rate": 6.157019300174346e-06, "loss": 0.3158, "step": 13862 }, { "epoch": 0.64, "grad_norm": 0.36793126644265056, "learning_rate": 6.15564568218935e-06, "loss": 0.2937, "step": 13863 }, { "epoch": 0.64, "grad_norm": 0.2586025359807072, "learning_rate": 6.154272149314658e-06, "loss": 0.1977, "step": 13864 }, { "epoch": 0.64, "grad_norm": 0.8491509831041123, "learning_rate": 6.152898701580669e-06, "loss": 0.4995, "step": 13865 }, { "epoch": 0.64, "grad_norm": 0.2797668430226198, "learning_rate": 6.151525339017792e-06, "loss": 0.2568, "step": 13866 }, { "epoch": 0.64, "grad_norm": 0.5233379929794071, "learning_rate": 6.150152061656439e-06, "loss": 0.3949, "step": 13867 }, { "epoch": 0.64, "grad_norm": 0.42831173166798225, "learning_rate": 6.148778869527009e-06, "loss": 0.2522, "step": 13868 }, { "epoch": 0.64, "grad_norm": 0.3650486511638581, "learning_rate": 6.147405762659902e-06, "loss": 0.2894, "step": 13869 }, { "epoch": 0.64, "grad_norm": 0.45030704401442284, "learning_rate": 6.146032741085517e-06, "loss": 0.2596, "step": 13870 }, { "epoch": 0.64, "grad_norm": 0.40058833805660227, "learning_rate": 6.1446598048342556e-06, "loss": 0.2283, "step": 13871 }, { "epoch": 0.64, "grad_norm": 0.23021803412515876, "learning_rate": 6.143286953936509e-06, "loss": 0.2357, "step": 13872 }, { "epoch": 0.64, "grad_norm": 0.8726318140317489, "learning_rate": 6.141914188422673e-06, "loss": 0.4504, "step": 13873 }, { "epoch": 0.64, "grad_norm": 0.5490746117191487, "learning_rate": 6.140541508323132e-06, "loss": 0.2492, "step": 13874 }, { "epoch": 0.64, "grad_norm": 0.4298402951210946, "learning_rate": 6.139168913668291e-06, "loss": 0.2919, "step": 13875 }, { "epoch": 0.64, "grad_norm": 0.35829775744823256, "learning_rate": 6.137796404488525e-06, "loss": 0.2846, "step": 13876 }, { "epoch": 0.64, "grad_norm": 0.36655387780840537, "learning_rate": 6.136423980814225e-06, "loss": 0.2259, "step": 13877 }, { "epoch": 0.64, "grad_norm": 0.3227119233334314, "learning_rate": 6.135051642675775e-06, "loss": 0.2316, "step": 13878 }, { "epoch": 0.64, "grad_norm": 0.4054198865398271, "learning_rate": 6.1336793901035526e-06, "loss": 0.2775, "step": 13879 }, { "epoch": 0.64, "grad_norm": 0.4528290235364106, "learning_rate": 6.132307223127945e-06, "loss": 0.3279, "step": 13880 }, { "epoch": 0.64, "grad_norm": 0.38655686394596395, "learning_rate": 6.130935141779328e-06, "loss": 0.2297, "step": 13881 }, { "epoch": 0.64, "grad_norm": 1.4234348894720836, "learning_rate": 6.12956314608808e-06, "loss": 0.5589, "step": 13882 }, { "epoch": 0.64, "grad_norm": 0.556487246186019, "learning_rate": 6.128191236084569e-06, "loss": 0.3656, "step": 13883 }, { "epoch": 0.64, "grad_norm": 0.2992721562379536, "learning_rate": 6.126819411799175e-06, "loss": 0.2106, "step": 13884 }, { "epoch": 0.64, "grad_norm": 0.4949216110377767, "learning_rate": 6.125447673262266e-06, "loss": 0.2879, "step": 13885 }, { "epoch": 0.64, "grad_norm": 0.46199684277353253, "learning_rate": 6.124076020504213e-06, "loss": 0.2475, "step": 13886 }, { "epoch": 0.64, "grad_norm": 0.32834354245058006, "learning_rate": 6.122704453555377e-06, "loss": 0.2013, "step": 13887 }, { "epoch": 0.64, "grad_norm": 0.37220094871738957, "learning_rate": 6.1213329724461305e-06, "loss": 0.3009, "step": 13888 }, { "epoch": 0.64, "grad_norm": 1.2475134386857376, "learning_rate": 6.1199615772068324e-06, "loss": 0.8039, "step": 13889 }, { "epoch": 0.64, "grad_norm": 0.3503292505677632, "learning_rate": 6.118590267867847e-06, "loss": 0.2185, "step": 13890 }, { "epoch": 0.64, "grad_norm": 0.524903265245972, "learning_rate": 6.117219044459527e-06, "loss": 0.3496, "step": 13891 }, { "epoch": 0.64, "grad_norm": 0.24995885801324022, "learning_rate": 6.11584790701224e-06, "loss": 0.1936, "step": 13892 }, { "epoch": 0.64, "grad_norm": 0.36988544562524966, "learning_rate": 6.114476855556337e-06, "loss": 0.2789, "step": 13893 }, { "epoch": 0.64, "grad_norm": 1.0742786117667407, "learning_rate": 6.113105890122172e-06, "loss": 0.3657, "step": 13894 }, { "epoch": 0.64, "grad_norm": 0.32995531069287776, "learning_rate": 6.111735010740094e-06, "loss": 0.2895, "step": 13895 }, { "epoch": 0.64, "grad_norm": 0.3677734611032791, "learning_rate": 6.110364217440453e-06, "loss": 0.2752, "step": 13896 }, { "epoch": 0.64, "grad_norm": 0.7325666516433144, "learning_rate": 6.108993510253602e-06, "loss": 0.2996, "step": 13897 }, { "epoch": 0.64, "grad_norm": 0.3334666735054576, "learning_rate": 6.1076228892098856e-06, "loss": 0.1932, "step": 13898 }, { "epoch": 0.64, "grad_norm": 0.3271418452175672, "learning_rate": 6.106252354339647e-06, "loss": 0.2548, "step": 13899 }, { "epoch": 0.64, "grad_norm": 0.3636125574228562, "learning_rate": 6.104881905673226e-06, "loss": 0.2606, "step": 13900 }, { "epoch": 0.64, "grad_norm": 0.779745658858173, "learning_rate": 6.1035115432409675e-06, "loss": 0.4159, "step": 13901 }, { "epoch": 0.64, "grad_norm": 0.36380940366596587, "learning_rate": 6.102141267073207e-06, "loss": 0.2518, "step": 13902 }, { "epoch": 0.64, "grad_norm": 0.3859740720756227, "learning_rate": 6.100771077200284e-06, "loss": 0.287, "step": 13903 }, { "epoch": 0.64, "grad_norm": 0.3567722990613763, "learning_rate": 6.0994009736525275e-06, "loss": 0.2055, "step": 13904 }, { "epoch": 0.64, "grad_norm": 0.3350838698558275, "learning_rate": 6.098030956460277e-06, "loss": 0.2453, "step": 13905 }, { "epoch": 0.64, "grad_norm": 1.1713263439209174, "learning_rate": 6.09666102565386e-06, "loss": 0.687, "step": 13906 }, { "epoch": 0.64, "grad_norm": 0.31835001909591154, "learning_rate": 6.095291181263605e-06, "loss": 0.2613, "step": 13907 }, { "epoch": 0.64, "grad_norm": 0.33645966347971534, "learning_rate": 6.093921423319842e-06, "loss": 0.2752, "step": 13908 }, { "epoch": 0.64, "grad_norm": 1.1395949319757779, "learning_rate": 6.0925517518528875e-06, "loss": 0.5575, "step": 13909 }, { "epoch": 0.64, "grad_norm": 0.21868314525692864, "learning_rate": 6.091182166893077e-06, "loss": 0.0851, "step": 13910 }, { "epoch": 0.64, "grad_norm": 0.35297265048325643, "learning_rate": 6.089812668470727e-06, "loss": 0.2729, "step": 13911 }, { "epoch": 0.64, "grad_norm": 0.3697626885697134, "learning_rate": 6.088443256616154e-06, "loss": 0.2976, "step": 13912 }, { "epoch": 0.64, "grad_norm": 0.6527010514620597, "learning_rate": 6.087073931359672e-06, "loss": 0.2948, "step": 13913 }, { "epoch": 0.64, "grad_norm": 0.38464961289826516, "learning_rate": 6.085704692731609e-06, "loss": 0.2826, "step": 13914 }, { "epoch": 0.64, "grad_norm": 0.5107882224340381, "learning_rate": 6.08433554076227e-06, "loss": 0.3637, "step": 13915 }, { "epoch": 0.64, "grad_norm": 0.2585879510200286, "learning_rate": 6.0829664754819665e-06, "loss": 0.1975, "step": 13916 }, { "epoch": 0.64, "grad_norm": 0.2918190694749011, "learning_rate": 6.08159749692101e-06, "loss": 0.1788, "step": 13917 }, { "epoch": 0.64, "grad_norm": 0.7832952021845467, "learning_rate": 6.0802286051097095e-06, "loss": 0.3904, "step": 13918 }, { "epoch": 0.64, "grad_norm": 0.3975248059853997, "learning_rate": 6.078859800078369e-06, "loss": 0.3066, "step": 13919 }, { "epoch": 0.64, "grad_norm": 0.3111389377780675, "learning_rate": 6.077491081857294e-06, "loss": 0.2158, "step": 13920 }, { "epoch": 0.64, "grad_norm": 1.2448699420253515, "learning_rate": 6.076122450476785e-06, "loss": 0.5928, "step": 13921 }, { "epoch": 0.64, "grad_norm": 0.28182896674334956, "learning_rate": 6.074753905967144e-06, "loss": 0.171, "step": 13922 }, { "epoch": 0.64, "grad_norm": 0.2744877444389626, "learning_rate": 6.073385448358668e-06, "loss": 0.2008, "step": 13923 }, { "epoch": 0.64, "grad_norm": 0.5417619103404065, "learning_rate": 6.072017077681654e-06, "loss": 0.3616, "step": 13924 }, { "epoch": 0.64, "grad_norm": 0.7672529626488681, "learning_rate": 6.070648793966396e-06, "loss": 0.3799, "step": 13925 }, { "epoch": 0.64, "grad_norm": 0.32359873885082413, "learning_rate": 6.0692805972431835e-06, "loss": 0.2014, "step": 13926 }, { "epoch": 0.64, "grad_norm": 0.5243716294771018, "learning_rate": 6.067912487542312e-06, "loss": 0.3559, "step": 13927 }, { "epoch": 0.64, "grad_norm": 0.3636343897453809, "learning_rate": 6.066544464894069e-06, "loss": 0.2521, "step": 13928 }, { "epoch": 0.64, "grad_norm": 0.29228912996241135, "learning_rate": 6.065176529328743e-06, "loss": 0.2133, "step": 13929 }, { "epoch": 0.64, "grad_norm": 0.78084620471736, "learning_rate": 6.063808680876611e-06, "loss": 0.2924, "step": 13930 }, { "epoch": 0.64, "grad_norm": 0.36541293649495693, "learning_rate": 6.062440919567965e-06, "loss": 0.3049, "step": 13931 }, { "epoch": 0.64, "grad_norm": 0.6332530676964685, "learning_rate": 6.06107324543308e-06, "loss": 0.33, "step": 13932 }, { "epoch": 0.64, "grad_norm": 0.400946193123766, "learning_rate": 6.059705658502239e-06, "loss": 0.2524, "step": 13933 }, { "epoch": 0.64, "grad_norm": 0.27081255944141536, "learning_rate": 6.058338158805714e-06, "loss": 0.2061, "step": 13934 }, { "epoch": 0.64, "grad_norm": 0.42267808350852526, "learning_rate": 6.056970746373785e-06, "loss": 0.2491, "step": 13935 }, { "epoch": 0.64, "grad_norm": 0.553848875594435, "learning_rate": 6.055603421236723e-06, "loss": 0.2793, "step": 13936 }, { "epoch": 0.64, "grad_norm": 1.4924285532992687, "learning_rate": 6.0542361834248e-06, "loss": 0.7511, "step": 13937 }, { "epoch": 0.64, "grad_norm": 0.4664070939459533, "learning_rate": 6.052869032968285e-06, "loss": 0.2863, "step": 13938 }, { "epoch": 0.64, "grad_norm": 0.3343228077422523, "learning_rate": 6.051501969897442e-06, "loss": 0.2613, "step": 13939 }, { "epoch": 0.64, "grad_norm": 1.2548638085173232, "learning_rate": 6.050134994242545e-06, "loss": 0.6256, "step": 13940 }, { "epoch": 0.64, "grad_norm": 0.3018821161366112, "learning_rate": 6.048768106033851e-06, "loss": 0.2233, "step": 13941 }, { "epoch": 0.64, "grad_norm": 0.5044790701983395, "learning_rate": 6.0474013053016215e-06, "loss": 0.3903, "step": 13942 }, { "epoch": 0.64, "grad_norm": 0.2605724013141853, "learning_rate": 6.0460345920761156e-06, "loss": 0.2053, "step": 13943 }, { "epoch": 0.64, "grad_norm": 0.39157420952536914, "learning_rate": 6.0446679663875955e-06, "loss": 0.2716, "step": 13944 }, { "epoch": 0.64, "grad_norm": 1.1322705480340391, "learning_rate": 6.043301428266314e-06, "loss": 0.6276, "step": 13945 }, { "epoch": 0.64, "grad_norm": 0.4697571477942363, "learning_rate": 6.041934977742526e-06, "loss": 0.2576, "step": 13946 }, { "epoch": 0.64, "grad_norm": 0.32730788884032985, "learning_rate": 6.040568614846481e-06, "loss": 0.254, "step": 13947 }, { "epoch": 0.64, "grad_norm": 0.4768511755553711, "learning_rate": 6.039202339608432e-06, "loss": 0.3406, "step": 13948 }, { "epoch": 0.64, "grad_norm": 0.18741304137980996, "learning_rate": 6.037836152058627e-06, "loss": 0.1196, "step": 13949 }, { "epoch": 0.64, "grad_norm": 0.6236998409670669, "learning_rate": 6.03647005222731e-06, "loss": 0.3323, "step": 13950 }, { "epoch": 0.64, "grad_norm": 0.29247676644612164, "learning_rate": 6.0351040401447235e-06, "loss": 0.276, "step": 13951 }, { "epoch": 0.64, "grad_norm": 0.5904979871966565, "learning_rate": 6.0337381158411145e-06, "loss": 0.3046, "step": 13952 }, { "epoch": 0.64, "grad_norm": 0.5426700279207406, "learning_rate": 6.032372279346721e-06, "loss": 0.2971, "step": 13953 }, { "epoch": 0.64, "grad_norm": 0.33207282727189236, "learning_rate": 6.031006530691781e-06, "loss": 0.2501, "step": 13954 }, { "epoch": 0.64, "grad_norm": 0.25432608529295825, "learning_rate": 6.0296408699065325e-06, "loss": 0.2256, "step": 13955 }, { "epoch": 0.64, "grad_norm": 0.3761620633003791, "learning_rate": 6.028275297021202e-06, "loss": 0.2097, "step": 13956 }, { "epoch": 0.64, "grad_norm": 0.5474882994913526, "learning_rate": 6.026909812066034e-06, "loss": 0.3329, "step": 13957 }, { "epoch": 0.64, "grad_norm": 0.909234634916144, "learning_rate": 6.025544415071256e-06, "loss": 0.4046, "step": 13958 }, { "epoch": 0.64, "grad_norm": 0.28396838035334615, "learning_rate": 6.024179106067091e-06, "loss": 0.2153, "step": 13959 }, { "epoch": 0.64, "grad_norm": 0.5486000001338303, "learning_rate": 6.022813885083764e-06, "loss": 0.3759, "step": 13960 }, { "epoch": 0.64, "grad_norm": 0.5346700890347228, "learning_rate": 6.021448752151508e-06, "loss": 0.313, "step": 13961 }, { "epoch": 0.64, "grad_norm": 0.27240022956792126, "learning_rate": 6.020083707300545e-06, "loss": 0.1554, "step": 13962 }, { "epoch": 0.64, "grad_norm": 0.3437014226746102, "learning_rate": 6.01871875056109e-06, "loss": 0.2829, "step": 13963 }, { "epoch": 0.64, "grad_norm": 0.8756539480670127, "learning_rate": 6.017353881963364e-06, "loss": 0.3714, "step": 13964 }, { "epoch": 0.64, "grad_norm": 0.39445150001458174, "learning_rate": 6.015989101537586e-06, "loss": 0.1974, "step": 13965 }, { "epoch": 0.64, "grad_norm": 0.531613488517085, "learning_rate": 6.01462440931397e-06, "loss": 0.3365, "step": 13966 }, { "epoch": 0.64, "grad_norm": 0.27926532394902526, "learning_rate": 6.0132598053227275e-06, "loss": 0.2386, "step": 13967 }, { "epoch": 0.64, "grad_norm": 0.8889284503431224, "learning_rate": 6.011895289594072e-06, "loss": 0.4573, "step": 13968 }, { "epoch": 0.64, "grad_norm": 0.3081466617744832, "learning_rate": 6.010530862158207e-06, "loss": 0.1961, "step": 13969 }, { "epoch": 0.64, "grad_norm": 0.5674442571956718, "learning_rate": 6.0091665230453465e-06, "loss": 0.3636, "step": 13970 }, { "epoch": 0.64, "grad_norm": 0.45967609372796786, "learning_rate": 6.007802272285693e-06, "loss": 0.2932, "step": 13971 }, { "epoch": 0.64, "grad_norm": 0.34513440300235276, "learning_rate": 6.006438109909449e-06, "loss": 0.2095, "step": 13972 }, { "epoch": 0.64, "grad_norm": 1.244036587386846, "learning_rate": 6.005074035946813e-06, "loss": 0.6816, "step": 13973 }, { "epoch": 0.64, "grad_norm": 0.3129660059868295, "learning_rate": 6.003710050427991e-06, "loss": 0.2303, "step": 13974 }, { "epoch": 0.64, "grad_norm": 0.23754626454349723, "learning_rate": 6.002346153383176e-06, "loss": 0.2045, "step": 13975 }, { "epoch": 0.64, "grad_norm": 0.7097344391241067, "learning_rate": 6.0009823448425675e-06, "loss": 0.4256, "step": 13976 }, { "epoch": 0.64, "grad_norm": 0.6539634740846677, "learning_rate": 5.99961862483635e-06, "loss": 0.3563, "step": 13977 }, { "epoch": 0.64, "grad_norm": 0.39489466383579075, "learning_rate": 5.998254993394723e-06, "loss": 0.2283, "step": 13978 }, { "epoch": 0.64, "grad_norm": 0.39325741087960653, "learning_rate": 5.996891450547874e-06, "loss": 0.3226, "step": 13979 }, { "epoch": 0.64, "grad_norm": 0.6192811217527082, "learning_rate": 5.995527996325989e-06, "loss": 0.3368, "step": 13980 }, { "epoch": 0.64, "grad_norm": 0.4128657996975594, "learning_rate": 5.994164630759255e-06, "loss": 0.3086, "step": 13981 }, { "epoch": 0.64, "grad_norm": 0.33984521067875934, "learning_rate": 5.992801353877855e-06, "loss": 0.1981, "step": 13982 }, { "epoch": 0.64, "grad_norm": 0.41028605702400667, "learning_rate": 5.991438165711972e-06, "loss": 0.2792, "step": 13983 }, { "epoch": 0.64, "grad_norm": 0.39863345365823605, "learning_rate": 5.990075066291785e-06, "loss": 0.3193, "step": 13984 }, { "epoch": 0.64, "grad_norm": 1.1181275958111858, "learning_rate": 5.98871205564747e-06, "loss": 0.4314, "step": 13985 }, { "epoch": 0.64, "grad_norm": 0.710955903206158, "learning_rate": 5.987349133809201e-06, "loss": 0.3395, "step": 13986 }, { "epoch": 0.64, "grad_norm": 0.28092670876968545, "learning_rate": 5.98598630080716e-06, "loss": 0.2555, "step": 13987 }, { "epoch": 0.64, "grad_norm": 0.297032847695535, "learning_rate": 5.984623556671511e-06, "loss": 0.1534, "step": 13988 }, { "epoch": 0.64, "grad_norm": 0.792822963610027, "learning_rate": 5.9832609014324284e-06, "loss": 0.3679, "step": 13989 }, { "epoch": 0.64, "grad_norm": 0.3954742641776457, "learning_rate": 5.981898335120072e-06, "loss": 0.2726, "step": 13990 }, { "epoch": 0.64, "grad_norm": 0.34688056729845823, "learning_rate": 5.980535857764619e-06, "loss": 0.2569, "step": 13991 }, { "epoch": 0.64, "grad_norm": 0.7045285732138477, "learning_rate": 5.979173469396226e-06, "loss": 0.392, "step": 13992 }, { "epoch": 0.64, "grad_norm": 0.3613347498255459, "learning_rate": 5.97781117004506e-06, "loss": 0.2853, "step": 13993 }, { "epoch": 0.64, "grad_norm": 0.45592041115748355, "learning_rate": 5.9764489597412744e-06, "loss": 0.2312, "step": 13994 }, { "epoch": 0.64, "grad_norm": 0.2567543445725462, "learning_rate": 5.975086838515034e-06, "loss": 0.1833, "step": 13995 }, { "epoch": 0.64, "grad_norm": 0.39636881108234673, "learning_rate": 5.973724806396491e-06, "loss": 0.2762, "step": 13996 }, { "epoch": 0.64, "grad_norm": 1.230784619844159, "learning_rate": 5.972362863415799e-06, "loss": 0.7232, "step": 13997 }, { "epoch": 0.64, "grad_norm": 0.4410252887551768, "learning_rate": 5.9710010096031135e-06, "loss": 0.2438, "step": 13998 }, { "epoch": 0.64, "grad_norm": 0.3164557210632191, "learning_rate": 5.969639244988579e-06, "loss": 0.2731, "step": 13999 }, { "epoch": 0.64, "grad_norm": 0.4763283789656363, "learning_rate": 5.96827756960235e-06, "loss": 0.2578, "step": 14000 }, { "epoch": 0.64, "grad_norm": 0.2873672902424474, "learning_rate": 5.966915983474569e-06, "loss": 0.0913, "step": 14001 }, { "epoch": 0.64, "grad_norm": 0.43585165425091266, "learning_rate": 5.965554486635381e-06, "loss": 0.3022, "step": 14002 }, { "epoch": 0.64, "grad_norm": 0.37288706557421614, "learning_rate": 5.964193079114925e-06, "loss": 0.3229, "step": 14003 }, { "epoch": 0.64, "grad_norm": 0.6191245196238206, "learning_rate": 5.962831760943348e-06, "loss": 0.3169, "step": 14004 }, { "epoch": 0.64, "grad_norm": 0.349550917043772, "learning_rate": 5.961470532150787e-06, "loss": 0.2685, "step": 14005 }, { "epoch": 0.64, "grad_norm": 0.40657768501801067, "learning_rate": 5.960109392767374e-06, "loss": 0.3197, "step": 14006 }, { "epoch": 0.64, "grad_norm": 0.2940599433282073, "learning_rate": 5.95874834282324e-06, "loss": 0.1689, "step": 14007 }, { "epoch": 0.64, "grad_norm": 0.3627495523996298, "learning_rate": 5.95738738234853e-06, "loss": 0.2055, "step": 14008 }, { "epoch": 0.64, "grad_norm": 0.8455424806672331, "learning_rate": 5.956026511373363e-06, "loss": 0.4845, "step": 14009 }, { "epoch": 0.64, "grad_norm": 0.3906060847573226, "learning_rate": 5.954665729927873e-06, "loss": 0.3124, "step": 14010 }, { "epoch": 0.64, "grad_norm": 0.3125920250908797, "learning_rate": 5.953305038042183e-06, "loss": 0.2133, "step": 14011 }, { "epoch": 0.64, "grad_norm": 1.3490627749112505, "learning_rate": 5.951944435746422e-06, "loss": 0.6816, "step": 14012 }, { "epoch": 0.64, "grad_norm": 0.3762824213821155, "learning_rate": 5.95058392307071e-06, "loss": 0.2292, "step": 14013 }, { "epoch": 0.64, "grad_norm": 0.27702605903239613, "learning_rate": 5.9492235000451645e-06, "loss": 0.2228, "step": 14014 }, { "epoch": 0.64, "grad_norm": 0.5396572245345912, "learning_rate": 5.947863166699909e-06, "loss": 0.3306, "step": 14015 }, { "epoch": 0.64, "grad_norm": 0.8835921497815773, "learning_rate": 5.946502923065054e-06, "loss": 0.4459, "step": 14016 }, { "epoch": 0.64, "grad_norm": 0.30870793467092883, "learning_rate": 5.94514276917072e-06, "loss": 0.1922, "step": 14017 }, { "epoch": 0.64, "grad_norm": 0.40783406689933543, "learning_rate": 5.943782705047016e-06, "loss": 0.2937, "step": 14018 }, { "epoch": 0.64, "grad_norm": 0.2774443558589636, "learning_rate": 5.942422730724056e-06, "loss": 0.1735, "step": 14019 }, { "epoch": 0.64, "grad_norm": 0.40546606837626253, "learning_rate": 5.9410628462319395e-06, "loss": 0.3179, "step": 14020 }, { "epoch": 0.64, "grad_norm": 1.0319937000151396, "learning_rate": 5.939703051600785e-06, "loss": 0.3211, "step": 14021 }, { "epoch": 0.64, "grad_norm": 0.35594690678240204, "learning_rate": 5.938343346860691e-06, "loss": 0.3125, "step": 14022 }, { "epoch": 0.64, "grad_norm": 0.36679396768821, "learning_rate": 5.936983732041762e-06, "loss": 0.282, "step": 14023 }, { "epoch": 0.64, "grad_norm": 1.0619124674219391, "learning_rate": 5.935624207174091e-06, "loss": 0.2938, "step": 14024 }, { "epoch": 0.64, "grad_norm": 0.3139314170072544, "learning_rate": 5.934264772287788e-06, "loss": 0.1597, "step": 14025 }, { "epoch": 0.64, "grad_norm": 0.4132522604381668, "learning_rate": 5.932905427412945e-06, "loss": 0.2819, "step": 14026 }, { "epoch": 0.64, "grad_norm": 0.5058327049772476, "learning_rate": 5.931546172579654e-06, "loss": 0.2721, "step": 14027 }, { "epoch": 0.64, "grad_norm": 1.2493164779550003, "learning_rate": 5.9301870078180115e-06, "loss": 0.7818, "step": 14028 }, { "epoch": 0.64, "grad_norm": 0.36539958269327333, "learning_rate": 5.928827933158101e-06, "loss": 0.2529, "step": 14029 }, { "epoch": 0.64, "grad_norm": 0.48970531808253404, "learning_rate": 5.927468948630022e-06, "loss": 0.3006, "step": 14030 }, { "epoch": 0.64, "grad_norm": 0.26278519974333453, "learning_rate": 5.926110054263853e-06, "loss": 0.1608, "step": 14031 }, { "epoch": 0.64, "grad_norm": 0.3522290736438575, "learning_rate": 5.924751250089681e-06, "loss": 0.2469, "step": 14032 }, { "epoch": 0.64, "grad_norm": 0.7797938445854389, "learning_rate": 5.923392536137587e-06, "loss": 0.3883, "step": 14033 }, { "epoch": 0.64, "grad_norm": 0.37627157742501927, "learning_rate": 5.922033912437655e-06, "loss": 0.2814, "step": 14034 }, { "epoch": 0.64, "grad_norm": 0.40872402295254384, "learning_rate": 5.9206753790199625e-06, "loss": 0.2525, "step": 14035 }, { "epoch": 0.64, "grad_norm": 0.5696894184459426, "learning_rate": 5.919316935914584e-06, "loss": 0.3534, "step": 14036 }, { "epoch": 0.64, "grad_norm": 0.4359473846904344, "learning_rate": 5.917958583151593e-06, "loss": 0.272, "step": 14037 }, { "epoch": 0.64, "grad_norm": 0.41465614678037366, "learning_rate": 5.916600320761068e-06, "loss": 0.2689, "step": 14038 }, { "epoch": 0.64, "grad_norm": 0.2700984727000162, "learning_rate": 5.915242148773075e-06, "loss": 0.2176, "step": 14039 }, { "epoch": 0.64, "grad_norm": 1.344405344456894, "learning_rate": 5.913884067217686e-06, "loss": 0.5553, "step": 14040 }, { "epoch": 0.65, "grad_norm": 0.3704167630867199, "learning_rate": 5.912526076124963e-06, "loss": 0.2544, "step": 14041 }, { "epoch": 0.65, "grad_norm": 0.41014772977446634, "learning_rate": 5.911168175524975e-06, "loss": 0.3125, "step": 14042 }, { "epoch": 0.65, "grad_norm": 0.9713927018185115, "learning_rate": 5.909810365447781e-06, "loss": 0.4341, "step": 14043 }, { "epoch": 0.65, "grad_norm": 0.3617616147385406, "learning_rate": 5.908452645923446e-06, "loss": 0.2336, "step": 14044 }, { "epoch": 0.65, "grad_norm": 0.428745502892723, "learning_rate": 5.907095016982024e-06, "loss": 0.2728, "step": 14045 }, { "epoch": 0.65, "grad_norm": 0.398109947912981, "learning_rate": 5.905737478653573e-06, "loss": 0.2958, "step": 14046 }, { "epoch": 0.65, "grad_norm": 0.2326567238674615, "learning_rate": 5.904380030968149e-06, "loss": 0.1661, "step": 14047 }, { "epoch": 0.65, "grad_norm": 1.2375449626057426, "learning_rate": 5.9030226739558035e-06, "loss": 0.5481, "step": 14048 }, { "epoch": 0.65, "grad_norm": 0.8078774281881038, "learning_rate": 5.901665407646589e-06, "loss": 0.4016, "step": 14049 }, { "epoch": 0.65, "grad_norm": 0.2846723660380272, "learning_rate": 5.900308232070546e-06, "loss": 0.2191, "step": 14050 }, { "epoch": 0.65, "grad_norm": 0.5024436298825485, "learning_rate": 5.898951147257733e-06, "loss": 0.2511, "step": 14051 }, { "epoch": 0.65, "grad_norm": 0.437778933677139, "learning_rate": 5.897594153238191e-06, "loss": 0.2854, "step": 14052 }, { "epoch": 0.65, "grad_norm": 0.34278946780478636, "learning_rate": 5.896237250041958e-06, "loss": 0.1695, "step": 14053 }, { "epoch": 0.65, "grad_norm": 0.3620538072606337, "learning_rate": 5.894880437699073e-06, "loss": 0.2964, "step": 14054 }, { "epoch": 0.65, "grad_norm": 0.7879645578272876, "learning_rate": 5.893523716239582e-06, "loss": 0.3843, "step": 14055 }, { "epoch": 0.65, "grad_norm": 0.6183889401103069, "learning_rate": 5.892167085693518e-06, "loss": 0.3074, "step": 14056 }, { "epoch": 0.65, "grad_norm": 0.4354465362710058, "learning_rate": 5.8908105460909175e-06, "loss": 0.2495, "step": 14057 }, { "epoch": 0.65, "grad_norm": 0.3571420235220315, "learning_rate": 5.88945409746181e-06, "loss": 0.3204, "step": 14058 }, { "epoch": 0.65, "grad_norm": 0.21444502755462191, "learning_rate": 5.888097739836225e-06, "loss": 0.1131, "step": 14059 }, { "epoch": 0.65, "grad_norm": 0.42014320869758087, "learning_rate": 5.886741473244194e-06, "loss": 0.2521, "step": 14060 }, { "epoch": 0.65, "grad_norm": 0.8335583293861194, "learning_rate": 5.885385297715744e-06, "loss": 0.388, "step": 14061 }, { "epoch": 0.65, "grad_norm": 0.29658374384207536, "learning_rate": 5.884029213280896e-06, "loss": 0.2804, "step": 14062 }, { "epoch": 0.65, "grad_norm": 0.5078248457646383, "learning_rate": 5.882673219969673e-06, "loss": 0.268, "step": 14063 }, { "epoch": 0.65, "grad_norm": 0.4935444353691137, "learning_rate": 5.881317317812099e-06, "loss": 0.2933, "step": 14064 }, { "epoch": 0.65, "grad_norm": 0.31485789609760956, "learning_rate": 5.87996150683819e-06, "loss": 0.2039, "step": 14065 }, { "epoch": 0.65, "grad_norm": 0.31179708209101076, "learning_rate": 5.8786057870779625e-06, "loss": 0.2434, "step": 14066 }, { "epoch": 0.65, "grad_norm": 0.7656375068291004, "learning_rate": 5.877250158561425e-06, "loss": 0.4008, "step": 14067 }, { "epoch": 0.65, "grad_norm": 0.4399311701859046, "learning_rate": 5.875894621318601e-06, "loss": 0.2915, "step": 14068 }, { "epoch": 0.65, "grad_norm": 0.5576276289419394, "learning_rate": 5.874539175379494e-06, "loss": 0.3929, "step": 14069 }, { "epoch": 0.65, "grad_norm": 0.36723683065939416, "learning_rate": 5.873183820774115e-06, "loss": 0.273, "step": 14070 }, { "epoch": 0.65, "grad_norm": 0.3406893676365391, "learning_rate": 5.871828557532465e-06, "loss": 0.2242, "step": 14071 }, { "epoch": 0.65, "grad_norm": 0.4644958537405138, "learning_rate": 5.8704733856845545e-06, "loss": 0.3321, "step": 14072 }, { "epoch": 0.65, "grad_norm": 0.28841420157094566, "learning_rate": 5.869118305260384e-06, "loss": 0.2105, "step": 14073 }, { "epoch": 0.65, "grad_norm": 0.611158836154535, "learning_rate": 5.8677633162899515e-06, "loss": 0.2855, "step": 14074 }, { "epoch": 0.65, "grad_norm": 0.42617592692666373, "learning_rate": 5.8664084188032575e-06, "loss": 0.3183, "step": 14075 }, { "epoch": 0.65, "grad_norm": 1.0436855478240445, "learning_rate": 5.865053612830296e-06, "loss": 0.3864, "step": 14076 }, { "epoch": 0.65, "grad_norm": 0.526657586806867, "learning_rate": 5.863698898401062e-06, "loss": 0.3342, "step": 14077 }, { "epoch": 0.65, "grad_norm": 0.3017474326605516, "learning_rate": 5.862344275545548e-06, "loss": 0.2773, "step": 14078 }, { "epoch": 0.65, "grad_norm": 0.3417635397111297, "learning_rate": 5.8609897442937455e-06, "loss": 0.1226, "step": 14079 }, { "epoch": 0.65, "grad_norm": 0.7097763065674744, "learning_rate": 5.859635304675638e-06, "loss": 0.3446, "step": 14080 }, { "epoch": 0.65, "grad_norm": 0.4278757754877832, "learning_rate": 5.858280956721217e-06, "loss": 0.331, "step": 14081 }, { "epoch": 0.65, "grad_norm": 0.3825985560055229, "learning_rate": 5.856926700460464e-06, "loss": 0.3075, "step": 14082 }, { "epoch": 0.65, "grad_norm": 0.4511504948413761, "learning_rate": 5.8555725359233586e-06, "loss": 0.2254, "step": 14083 }, { "epoch": 0.65, "grad_norm": 0.5110215449893202, "learning_rate": 5.85421846313988e-06, "loss": 0.385, "step": 14084 }, { "epoch": 0.65, "grad_norm": 0.30071075920063023, "learning_rate": 5.852864482140013e-06, "loss": 0.2123, "step": 14085 }, { "epoch": 0.65, "grad_norm": 0.3069837889145256, "learning_rate": 5.851510592953729e-06, "loss": 0.206, "step": 14086 }, { "epoch": 0.65, "grad_norm": 0.5843109546453796, "learning_rate": 5.850156795611002e-06, "loss": 0.3289, "step": 14087 }, { "epoch": 0.65, "grad_norm": 0.7342322343790678, "learning_rate": 5.848803090141806e-06, "loss": 0.4923, "step": 14088 }, { "epoch": 0.65, "grad_norm": 0.38461868813903305, "learning_rate": 5.847449476576104e-06, "loss": 0.2303, "step": 14089 }, { "epoch": 0.65, "grad_norm": 0.3299582547819613, "learning_rate": 5.84609595494387e-06, "loss": 0.2902, "step": 14090 }, { "epoch": 0.65, "grad_norm": 0.26534864811694686, "learning_rate": 5.844742525275069e-06, "loss": 0.17, "step": 14091 }, { "epoch": 0.65, "grad_norm": 0.44361048012692367, "learning_rate": 5.843389187599664e-06, "loss": 0.113, "step": 14092 }, { "epoch": 0.65, "grad_norm": 0.42645125874693257, "learning_rate": 5.842035941947614e-06, "loss": 0.3284, "step": 14093 }, { "epoch": 0.65, "grad_norm": 0.3838857290026101, "learning_rate": 5.840682788348882e-06, "loss": 0.2973, "step": 14094 }, { "epoch": 0.65, "grad_norm": 0.690880998125243, "learning_rate": 5.839329726833425e-06, "loss": 0.3819, "step": 14095 }, { "epoch": 0.65, "grad_norm": 0.3968293947113594, "learning_rate": 5.837976757431198e-06, "loss": 0.2293, "step": 14096 }, { "epoch": 0.65, "grad_norm": 0.30187270205852207, "learning_rate": 5.836623880172152e-06, "loss": 0.1612, "step": 14097 }, { "epoch": 0.65, "grad_norm": 0.3934615622872843, "learning_rate": 5.83527109508624e-06, "loss": 0.304, "step": 14098 }, { "epoch": 0.65, "grad_norm": 0.35200652692966583, "learning_rate": 5.833918402203416e-06, "loss": 0.2391, "step": 14099 }, { "epoch": 0.65, "grad_norm": 0.6820557851729071, "learning_rate": 5.8325658015536205e-06, "loss": 0.3974, "step": 14100 }, { "epoch": 0.65, "grad_norm": 0.34812750767947015, "learning_rate": 5.8312132931667994e-06, "loss": 0.295, "step": 14101 }, { "epoch": 0.65, "grad_norm": 0.32502191251457896, "learning_rate": 5.829860877072903e-06, "loss": 0.206, "step": 14102 }, { "epoch": 0.65, "grad_norm": 0.29381155656663266, "learning_rate": 5.828508553301864e-06, "loss": 0.1668, "step": 14103 }, { "epoch": 0.65, "grad_norm": 0.6288475125107023, "learning_rate": 5.827156321883629e-06, "loss": 0.331, "step": 14104 }, { "epoch": 0.65, "grad_norm": 0.37856086051881566, "learning_rate": 5.825804182848127e-06, "loss": 0.2459, "step": 14105 }, { "epoch": 0.65, "grad_norm": 0.35770407623246614, "learning_rate": 5.824452136225298e-06, "loss": 0.2969, "step": 14106 }, { "epoch": 0.65, "grad_norm": 0.6400617567491976, "learning_rate": 5.823100182045074e-06, "loss": 0.3684, "step": 14107 }, { "epoch": 0.65, "grad_norm": 0.4054847395861062, "learning_rate": 5.821748320337389e-06, "loss": 0.3155, "step": 14108 }, { "epoch": 0.65, "grad_norm": 0.2751408132879385, "learning_rate": 5.82039655113217e-06, "loss": 0.2118, "step": 14109 }, { "epoch": 0.65, "grad_norm": 0.4736254919197536, "learning_rate": 5.819044874459335e-06, "loss": 0.2494, "step": 14110 }, { "epoch": 0.65, "grad_norm": 0.33354660241817974, "learning_rate": 5.8176932903488245e-06, "loss": 0.2667, "step": 14111 }, { "epoch": 0.65, "grad_norm": 0.7121068681611223, "learning_rate": 5.81634179883055e-06, "loss": 0.3407, "step": 14112 }, { "epoch": 0.65, "grad_norm": 0.3555368937200797, "learning_rate": 5.814990399934439e-06, "loss": 0.2924, "step": 14113 }, { "epoch": 0.65, "grad_norm": 0.396501409591606, "learning_rate": 5.813639093690404e-06, "loss": 0.3013, "step": 14114 }, { "epoch": 0.65, "grad_norm": 0.2868657038564414, "learning_rate": 5.812287880128365e-06, "loss": 0.0894, "step": 14115 }, { "epoch": 0.65, "grad_norm": 0.39630154708749077, "learning_rate": 5.810936759278238e-06, "loss": 0.25, "step": 14116 }, { "epoch": 0.65, "grad_norm": 0.32714653874073996, "learning_rate": 5.809585731169932e-06, "loss": 0.2666, "step": 14117 }, { "epoch": 0.65, "grad_norm": 0.5399109562101687, "learning_rate": 5.8082347958333625e-06, "loss": 0.2699, "step": 14118 }, { "epoch": 0.65, "grad_norm": 0.8554431642166302, "learning_rate": 5.80688395329843e-06, "loss": 0.5314, "step": 14119 }, { "epoch": 0.65, "grad_norm": 0.3849877892034726, "learning_rate": 5.8055332035950466e-06, "loss": 0.2853, "step": 14120 }, { "epoch": 0.65, "grad_norm": 0.5338949265551288, "learning_rate": 5.804182546753118e-06, "loss": 0.3385, "step": 14121 }, { "epoch": 0.65, "grad_norm": 0.22869768903670576, "learning_rate": 5.802831982802539e-06, "loss": 0.1729, "step": 14122 }, { "epoch": 0.65, "grad_norm": 0.38241936189181336, "learning_rate": 5.801481511773217e-06, "loss": 0.2434, "step": 14123 }, { "epoch": 0.65, "grad_norm": 0.7834390698914407, "learning_rate": 5.800131133695046e-06, "loss": 0.4461, "step": 14124 }, { "epoch": 0.65, "grad_norm": 0.3282578729058527, "learning_rate": 5.798780848597929e-06, "loss": 0.2427, "step": 14125 }, { "epoch": 0.65, "grad_norm": 0.39201846305756777, "learning_rate": 5.7974306565117544e-06, "loss": 0.2929, "step": 14126 }, { "epoch": 0.65, "grad_norm": 0.47533860282002594, "learning_rate": 5.796080557466406e-06, "loss": 0.2677, "step": 14127 }, { "epoch": 0.65, "grad_norm": 0.48870676441467303, "learning_rate": 5.794730551491792e-06, "loss": 0.2066, "step": 14128 }, { "epoch": 0.65, "grad_norm": 0.27686320923913305, "learning_rate": 5.793380638617785e-06, "loss": 0.2549, "step": 14129 }, { "epoch": 0.65, "grad_norm": 0.4491758521915681, "learning_rate": 5.792030818874279e-06, "loss": 0.3713, "step": 14130 }, { "epoch": 0.65, "grad_norm": 1.2268351057548426, "learning_rate": 5.790681092291153e-06, "loss": 0.3928, "step": 14131 }, { "epoch": 0.65, "grad_norm": 0.33786954246303263, "learning_rate": 5.7893314588982905e-06, "loss": 0.2693, "step": 14132 }, { "epoch": 0.65, "grad_norm": 0.5407620300106977, "learning_rate": 5.7879819187255745e-06, "loss": 0.3268, "step": 14133 }, { "epoch": 0.65, "grad_norm": 0.49344767327277367, "learning_rate": 5.786632471802876e-06, "loss": 0.3629, "step": 14134 }, { "epoch": 0.65, "grad_norm": 0.24936355129212784, "learning_rate": 5.785283118160077e-06, "loss": 0.1778, "step": 14135 }, { "epoch": 0.65, "grad_norm": 0.43376290978721116, "learning_rate": 5.783933857827044e-06, "loss": 0.2646, "step": 14136 }, { "epoch": 0.65, "grad_norm": 0.4063260275117608, "learning_rate": 5.782584690833651e-06, "loss": 0.334, "step": 14137 }, { "epoch": 0.65, "grad_norm": 0.3059287155354174, "learning_rate": 5.7812356172097725e-06, "loss": 0.1861, "step": 14138 }, { "epoch": 0.65, "grad_norm": 0.9392788631669434, "learning_rate": 5.779886636985268e-06, "loss": 0.4264, "step": 14139 }, { "epoch": 0.65, "grad_norm": 0.43336313756211825, "learning_rate": 5.778537750190005e-06, "loss": 0.3307, "step": 14140 }, { "epoch": 0.65, "grad_norm": 0.31510948105453057, "learning_rate": 5.7771889568538495e-06, "loss": 0.1886, "step": 14141 }, { "epoch": 0.65, "grad_norm": 0.3403563397976049, "learning_rate": 5.775840257006663e-06, "loss": 0.2883, "step": 14142 }, { "epoch": 0.65, "grad_norm": 0.42484941290585215, "learning_rate": 5.7744916506782976e-06, "loss": 0.2322, "step": 14143 }, { "epoch": 0.65, "grad_norm": 0.3451680086973624, "learning_rate": 5.7731431378986155e-06, "loss": 0.1851, "step": 14144 }, { "epoch": 0.65, "grad_norm": 0.37366765411841035, "learning_rate": 5.771794718697474e-06, "loss": 0.3001, "step": 14145 }, { "epoch": 0.65, "grad_norm": 0.7995982250513114, "learning_rate": 5.7704463931047186e-06, "loss": 0.4337, "step": 14146 }, { "epoch": 0.65, "grad_norm": 0.33748847832534323, "learning_rate": 5.769098161150206e-06, "loss": 0.262, "step": 14147 }, { "epoch": 0.65, "grad_norm": 0.2901482807547783, "learning_rate": 5.76775002286378e-06, "loss": 0.1651, "step": 14148 }, { "epoch": 0.65, "grad_norm": 0.29429213222781453, "learning_rate": 5.766401978275288e-06, "loss": 0.2411, "step": 14149 }, { "epoch": 0.65, "grad_norm": 0.34971280364525265, "learning_rate": 5.7650540274145806e-06, "loss": 0.2426, "step": 14150 }, { "epoch": 0.65, "grad_norm": 0.9465308797483732, "learning_rate": 5.763706170311492e-06, "loss": 0.2854, "step": 14151 }, { "epoch": 0.65, "grad_norm": 0.9676438196133452, "learning_rate": 5.762358406995868e-06, "loss": 0.5064, "step": 14152 }, { "epoch": 0.65, "grad_norm": 0.2987169408719296, "learning_rate": 5.761010737497538e-06, "loss": 0.2686, "step": 14153 }, { "epoch": 0.65, "grad_norm": 0.4712628696276102, "learning_rate": 5.759663161846352e-06, "loss": 0.2588, "step": 14154 }, { "epoch": 0.65, "grad_norm": 0.24464770839307945, "learning_rate": 5.758315680072137e-06, "loss": 0.1562, "step": 14155 }, { "epoch": 0.65, "grad_norm": 0.407422811341761, "learning_rate": 5.756968292204721e-06, "loss": 0.2751, "step": 14156 }, { "epoch": 0.65, "grad_norm": 0.451236921817357, "learning_rate": 5.755620998273938e-06, "loss": 0.2947, "step": 14157 }, { "epoch": 0.65, "grad_norm": 1.2393826991287544, "learning_rate": 5.754273798309613e-06, "loss": 0.3248, "step": 14158 }, { "epoch": 0.65, "grad_norm": 0.4035827372022771, "learning_rate": 5.752926692341581e-06, "loss": 0.2444, "step": 14159 }, { "epoch": 0.65, "grad_norm": 0.5421600021492852, "learning_rate": 5.751579680399652e-06, "loss": 0.3986, "step": 14160 }, { "epoch": 0.65, "grad_norm": 0.2206263915601203, "learning_rate": 5.7502327625136565e-06, "loss": 0.1588, "step": 14161 }, { "epoch": 0.65, "grad_norm": 0.4731552190960419, "learning_rate": 5.748885938713413e-06, "loss": 0.2715, "step": 14162 }, { "epoch": 0.65, "grad_norm": 0.6159099671232361, "learning_rate": 5.747539209028736e-06, "loss": 0.3448, "step": 14163 }, { "epoch": 0.65, "grad_norm": 1.1071598758855485, "learning_rate": 5.746192573489446e-06, "loss": 0.295, "step": 14164 }, { "epoch": 0.65, "grad_norm": 0.3149059586994086, "learning_rate": 5.744846032125347e-06, "loss": 0.2753, "step": 14165 }, { "epoch": 0.65, "grad_norm": 0.5033871897175037, "learning_rate": 5.7434995849662566e-06, "loss": 0.3761, "step": 14166 }, { "epoch": 0.65, "grad_norm": 0.3391029684368323, "learning_rate": 5.742153232041987e-06, "loss": 0.168, "step": 14167 }, { "epoch": 0.65, "grad_norm": 0.375337184900047, "learning_rate": 5.740806973382338e-06, "loss": 0.2639, "step": 14168 }, { "epoch": 0.65, "grad_norm": 0.3568123759604016, "learning_rate": 5.73946080901712e-06, "loss": 0.3067, "step": 14169 }, { "epoch": 0.65, "grad_norm": 0.4613382979896434, "learning_rate": 5.738114738976126e-06, "loss": 0.2418, "step": 14170 }, { "epoch": 0.65, "grad_norm": 0.33662864431754785, "learning_rate": 5.736768763289172e-06, "loss": 0.1917, "step": 14171 }, { "epoch": 0.65, "grad_norm": 1.359983792665407, "learning_rate": 5.735422881986045e-06, "loss": 0.7775, "step": 14172 }, { "epoch": 0.65, "grad_norm": 0.3852553956596151, "learning_rate": 5.7340770950965485e-06, "loss": 0.313, "step": 14173 }, { "epoch": 0.65, "grad_norm": 0.3262234500471043, "learning_rate": 5.73273140265047e-06, "loss": 0.2284, "step": 14174 }, { "epoch": 0.65, "grad_norm": 0.49914491088904966, "learning_rate": 5.731385804677605e-06, "loss": 0.3025, "step": 14175 }, { "epoch": 0.65, "grad_norm": 0.38152383616333674, "learning_rate": 5.730040301207749e-06, "loss": 0.2585, "step": 14176 }, { "epoch": 0.65, "grad_norm": 0.32416110086969513, "learning_rate": 5.72869489227068e-06, "loss": 0.205, "step": 14177 }, { "epoch": 0.65, "grad_norm": 0.5679739192206334, "learning_rate": 5.727349577896194e-06, "loss": 0.3729, "step": 14178 }, { "epoch": 0.65, "grad_norm": 0.6570330115989632, "learning_rate": 5.7260043581140655e-06, "loss": 0.4017, "step": 14179 }, { "epoch": 0.65, "grad_norm": 0.3610192647193398, "learning_rate": 5.724659232954082e-06, "loss": 0.2094, "step": 14180 }, { "epoch": 0.65, "grad_norm": 0.2856354020456345, "learning_rate": 5.723314202446027e-06, "loss": 0.2482, "step": 14181 }, { "epoch": 0.65, "grad_norm": 0.4684549178345702, "learning_rate": 5.7219692666196695e-06, "loss": 0.2509, "step": 14182 }, { "epoch": 0.65, "grad_norm": 0.6149544046599806, "learning_rate": 5.720624425504788e-06, "loss": 0.3054, "step": 14183 }, { "epoch": 0.65, "grad_norm": 0.3406045174449596, "learning_rate": 5.719279679131162e-06, "loss": 0.2507, "step": 14184 }, { "epoch": 0.65, "grad_norm": 0.4759607349202062, "learning_rate": 5.717935027528554e-06, "loss": 0.3284, "step": 14185 }, { "epoch": 0.65, "grad_norm": 0.4105633865655265, "learning_rate": 5.7165904707267415e-06, "loss": 0.2929, "step": 14186 }, { "epoch": 0.65, "grad_norm": 0.35537583070972373, "learning_rate": 5.71524600875548e-06, "loss": 0.1659, "step": 14187 }, { "epoch": 0.65, "grad_norm": 0.38481586145885494, "learning_rate": 5.713901641644549e-06, "loss": 0.244, "step": 14188 }, { "epoch": 0.65, "grad_norm": 0.34512305554861017, "learning_rate": 5.712557369423701e-06, "loss": 0.2749, "step": 14189 }, { "epoch": 0.65, "grad_norm": 0.49328624160947737, "learning_rate": 5.7112131921227055e-06, "loss": 0.2585, "step": 14190 }, { "epoch": 0.65, "grad_norm": 0.6098671563062075, "learning_rate": 5.709869109771312e-06, "loss": 0.3676, "step": 14191 }, { "epoch": 0.65, "grad_norm": 0.44890639166524327, "learning_rate": 5.708525122399281e-06, "loss": 0.2799, "step": 14192 }, { "epoch": 0.65, "grad_norm": 0.2915815409652929, "learning_rate": 5.707181230036372e-06, "loss": 0.2349, "step": 14193 }, { "epoch": 0.65, "grad_norm": 0.35963937531729373, "learning_rate": 5.705837432712329e-06, "loss": 0.1951, "step": 14194 }, { "epoch": 0.65, "grad_norm": 0.8418610004963427, "learning_rate": 5.70449373045691e-06, "loss": 0.4448, "step": 14195 }, { "epoch": 0.65, "grad_norm": 0.39447917298210217, "learning_rate": 5.703150123299857e-06, "loss": 0.2683, "step": 14196 }, { "epoch": 0.65, "grad_norm": 0.40753951251270465, "learning_rate": 5.701806611270917e-06, "loss": 0.2557, "step": 14197 }, { "epoch": 0.65, "grad_norm": 0.6751337569632193, "learning_rate": 5.700463194399841e-06, "loss": 0.3357, "step": 14198 }, { "epoch": 0.65, "grad_norm": 0.45264573070798325, "learning_rate": 5.699119872716363e-06, "loss": 0.3251, "step": 14199 }, { "epoch": 0.65, "grad_norm": 0.21703008589817113, "learning_rate": 5.697776646250225e-06, "loss": 0.1646, "step": 14200 }, { "epoch": 0.65, "grad_norm": 0.6595318002984385, "learning_rate": 5.696433515031169e-06, "loss": 0.3672, "step": 14201 }, { "epoch": 0.65, "grad_norm": 0.41054545419020705, "learning_rate": 5.695090479088923e-06, "loss": 0.3133, "step": 14202 }, { "epoch": 0.65, "grad_norm": 0.8165759290881441, "learning_rate": 5.693747538453229e-06, "loss": 0.3391, "step": 14203 }, { "epoch": 0.65, "grad_norm": 0.40005630914203494, "learning_rate": 5.692404693153807e-06, "loss": 0.2632, "step": 14204 }, { "epoch": 0.65, "grad_norm": 0.33477709388680915, "learning_rate": 5.6910619432204e-06, "loss": 0.2703, "step": 14205 }, { "epoch": 0.65, "grad_norm": 0.24210380441457324, "learning_rate": 5.689719288682724e-06, "loss": 0.0949, "step": 14206 }, { "epoch": 0.65, "grad_norm": 0.4369507200398018, "learning_rate": 5.688376729570515e-06, "loss": 0.2833, "step": 14207 }, { "epoch": 0.65, "grad_norm": 0.5222180691816736, "learning_rate": 5.687034265913484e-06, "loss": 0.3264, "step": 14208 }, { "epoch": 0.65, "grad_norm": 0.3885298135418425, "learning_rate": 5.685691897741359e-06, "loss": 0.3289, "step": 14209 }, { "epoch": 0.65, "grad_norm": 0.36614740960178666, "learning_rate": 5.6843496250838595e-06, "loss": 0.1941, "step": 14210 }, { "epoch": 0.65, "grad_norm": 0.592447933918337, "learning_rate": 5.6830074479706964e-06, "loss": 0.3501, "step": 14211 }, { "epoch": 0.65, "grad_norm": 0.26950665642334565, "learning_rate": 5.681665366431591e-06, "loss": 0.2287, "step": 14212 }, { "epoch": 0.65, "grad_norm": 0.5695476275524742, "learning_rate": 5.680323380496249e-06, "loss": 0.239, "step": 14213 }, { "epoch": 0.65, "grad_norm": 0.38139558410336155, "learning_rate": 5.678981490194384e-06, "loss": 0.304, "step": 14214 }, { "epoch": 0.65, "grad_norm": 0.7483139913001906, "learning_rate": 5.677639695555708e-06, "loss": 0.5238, "step": 14215 }, { "epoch": 0.65, "grad_norm": 0.3537701716276475, "learning_rate": 5.67629799660992e-06, "loss": 0.2305, "step": 14216 }, { "epoch": 0.65, "grad_norm": 0.3765521953950769, "learning_rate": 5.674956393386726e-06, "loss": 0.2677, "step": 14217 }, { "epoch": 0.65, "grad_norm": 0.2897579969665024, "learning_rate": 5.6736148859158305e-06, "loss": 0.1758, "step": 14218 }, { "epoch": 0.65, "grad_norm": 0.548099549788341, "learning_rate": 5.672273474226934e-06, "loss": 0.2411, "step": 14219 }, { "epoch": 0.65, "grad_norm": 0.34541878267423803, "learning_rate": 5.670932158349732e-06, "loss": 0.2932, "step": 14220 }, { "epoch": 0.65, "grad_norm": 0.4821728946039155, "learning_rate": 5.669590938313911e-06, "loss": 0.3186, "step": 14221 }, { "epoch": 0.65, "grad_norm": 0.7963979162775936, "learning_rate": 5.668249814149182e-06, "loss": 0.3891, "step": 14222 }, { "epoch": 0.65, "grad_norm": 0.35921902691172936, "learning_rate": 5.666908785885222e-06, "loss": 0.2077, "step": 14223 }, { "epoch": 0.65, "grad_norm": 0.293057228740933, "learning_rate": 5.66556785355173e-06, "loss": 0.2511, "step": 14224 }, { "epoch": 0.65, "grad_norm": 0.6387911332969055, "learning_rate": 5.664227017178385e-06, "loss": 0.3409, "step": 14225 }, { "epoch": 0.65, "grad_norm": 0.27522830199844417, "learning_rate": 5.662886276794874e-06, "loss": 0.1684, "step": 14226 }, { "epoch": 0.65, "grad_norm": 1.1993223672192868, "learning_rate": 5.661545632430885e-06, "loss": 0.7549, "step": 14227 }, { "epoch": 0.65, "grad_norm": 0.3225440958746963, "learning_rate": 5.660205084116089e-06, "loss": 0.2644, "step": 14228 }, { "epoch": 0.65, "grad_norm": 0.42340677287258677, "learning_rate": 5.658864631880174e-06, "loss": 0.2332, "step": 14229 }, { "epoch": 0.65, "grad_norm": 0.7360893733765348, "learning_rate": 5.6575242757528095e-06, "loss": 0.3875, "step": 14230 }, { "epoch": 0.65, "grad_norm": 0.4019577050511025, "learning_rate": 5.656184015763671e-06, "loss": 0.2605, "step": 14231 }, { "epoch": 0.65, "grad_norm": 0.2740373031618846, "learning_rate": 5.654843851942436e-06, "loss": 0.2272, "step": 14232 }, { "epoch": 0.65, "grad_norm": 0.3004929777231637, "learning_rate": 5.653503784318767e-06, "loss": 0.2317, "step": 14233 }, { "epoch": 0.65, "grad_norm": 1.5335587741320547, "learning_rate": 5.652163812922334e-06, "loss": 0.6379, "step": 14234 }, { "epoch": 0.65, "grad_norm": 0.34352361961153377, "learning_rate": 5.650823937782803e-06, "loss": 0.2458, "step": 14235 }, { "epoch": 0.65, "grad_norm": 0.42610722290167297, "learning_rate": 5.649484158929844e-06, "loss": 0.2585, "step": 14236 }, { "epoch": 0.65, "grad_norm": 0.9032717675331637, "learning_rate": 5.648144476393108e-06, "loss": 0.4389, "step": 14237 }, { "epoch": 0.65, "grad_norm": 0.3372163749507671, "learning_rate": 5.646804890202258e-06, "loss": 0.2522, "step": 14238 }, { "epoch": 0.65, "grad_norm": 0.32014044621985355, "learning_rate": 5.645465400386958e-06, "loss": 0.1601, "step": 14239 }, { "epoch": 0.65, "grad_norm": 0.28536590953643326, "learning_rate": 5.644126006976851e-06, "loss": 0.238, "step": 14240 }, { "epoch": 0.65, "grad_norm": 0.42883083663738253, "learning_rate": 5.6427867100016024e-06, "loss": 0.2901, "step": 14241 }, { "epoch": 0.65, "grad_norm": 0.9003142029145237, "learning_rate": 5.641447509490851e-06, "loss": 0.3061, "step": 14242 }, { "epoch": 0.65, "grad_norm": 0.8907281877991363, "learning_rate": 5.64010840547425e-06, "loss": 0.4442, "step": 14243 }, { "epoch": 0.65, "grad_norm": 0.38835959640316525, "learning_rate": 5.638769397981452e-06, "loss": 0.2795, "step": 14244 }, { "epoch": 0.65, "grad_norm": 0.2764713961205734, "learning_rate": 5.637430487042091e-06, "loss": 0.2083, "step": 14245 }, { "epoch": 0.65, "grad_norm": 0.47751155804188555, "learning_rate": 5.636091672685819e-06, "loss": 0.2188, "step": 14246 }, { "epoch": 0.65, "grad_norm": 0.3680340927726867, "learning_rate": 5.634752954942264e-06, "loss": 0.2606, "step": 14247 }, { "epoch": 0.65, "grad_norm": 0.469372681713889, "learning_rate": 5.633414333841079e-06, "loss": 0.315, "step": 14248 }, { "epoch": 0.65, "grad_norm": 0.8238786934035612, "learning_rate": 5.632075809411892e-06, "loss": 0.2484, "step": 14249 }, { "epoch": 0.65, "grad_norm": 0.36641447556072176, "learning_rate": 5.6307373816843324e-06, "loss": 0.2776, "step": 14250 }, { "epoch": 0.65, "grad_norm": 0.5097054076531459, "learning_rate": 5.629399050688036e-06, "loss": 0.3645, "step": 14251 }, { "epoch": 0.65, "grad_norm": 0.2965976728084407, "learning_rate": 5.628060816452633e-06, "loss": 0.203, "step": 14252 }, { "epoch": 0.65, "grad_norm": 0.34936276223295415, "learning_rate": 5.626722679007753e-06, "loss": 0.2725, "step": 14253 }, { "epoch": 0.65, "grad_norm": 0.7164809327899495, "learning_rate": 5.625384638383014e-06, "loss": 0.373, "step": 14254 }, { "epoch": 0.65, "grad_norm": 0.5524030552283329, "learning_rate": 5.624046694608048e-06, "loss": 0.2586, "step": 14255 }, { "epoch": 0.65, "grad_norm": 0.31736641554879524, "learning_rate": 5.622708847712465e-06, "loss": 0.266, "step": 14256 }, { "epoch": 0.65, "grad_norm": 1.2824608981603112, "learning_rate": 5.621371097725889e-06, "loss": 0.741, "step": 14257 }, { "epoch": 0.65, "grad_norm": 0.3233526483378466, "learning_rate": 5.620033444677942e-06, "loss": 0.174, "step": 14258 }, { "epoch": 0.66, "grad_norm": 0.3281317592160886, "learning_rate": 5.618695888598228e-06, "loss": 0.2737, "step": 14259 }, { "epoch": 0.66, "grad_norm": 0.37129675136988083, "learning_rate": 5.6173584295163645e-06, "loss": 0.3006, "step": 14260 }, { "epoch": 0.66, "grad_norm": 1.1675457003963716, "learning_rate": 5.616021067461965e-06, "loss": 0.5681, "step": 14261 }, { "epoch": 0.66, "grad_norm": 0.3188456827370682, "learning_rate": 5.614683802464631e-06, "loss": 0.1863, "step": 14262 }, { "epoch": 0.66, "grad_norm": 1.22554060627318, "learning_rate": 5.6133466345539745e-06, "loss": 0.7175, "step": 14263 }, { "epoch": 0.66, "grad_norm": 0.31785025218900276, "learning_rate": 5.612009563759588e-06, "loss": 0.2579, "step": 14264 }, { "epoch": 0.66, "grad_norm": 0.3471616472853809, "learning_rate": 5.610672590111087e-06, "loss": 0.1933, "step": 14265 }, { "epoch": 0.66, "grad_norm": 0.5013416931190756, "learning_rate": 5.609335713638066e-06, "loss": 0.2643, "step": 14266 }, { "epoch": 0.66, "grad_norm": 0.3902198195921654, "learning_rate": 5.607998934370115e-06, "loss": 0.3079, "step": 14267 }, { "epoch": 0.66, "grad_norm": 0.3170246782980747, "learning_rate": 5.606662252336836e-06, "loss": 0.1883, "step": 14268 }, { "epoch": 0.66, "grad_norm": 1.1078586181018406, "learning_rate": 5.60532566756782e-06, "loss": 0.5957, "step": 14269 }, { "epoch": 0.66, "grad_norm": 0.654345179331826, "learning_rate": 5.603989180092661e-06, "loss": 0.3383, "step": 14270 }, { "epoch": 0.66, "grad_norm": 0.3604227978209283, "learning_rate": 5.602652789940941e-06, "loss": 0.2582, "step": 14271 }, { "epoch": 0.66, "grad_norm": 0.243514146603745, "learning_rate": 5.601316497142255e-06, "loss": 0.1579, "step": 14272 }, { "epoch": 0.66, "grad_norm": 1.3728625137128292, "learning_rate": 5.599980301726178e-06, "loss": 0.8046, "step": 14273 }, { "epoch": 0.66, "grad_norm": 0.34247309405555254, "learning_rate": 5.598644203722297e-06, "loss": 0.2689, "step": 14274 }, { "epoch": 0.66, "grad_norm": 0.7610271050849037, "learning_rate": 5.597308203160193e-06, "loss": 0.3187, "step": 14275 }, { "epoch": 0.66, "grad_norm": 0.35727192818962616, "learning_rate": 5.595972300069439e-06, "loss": 0.3158, "step": 14276 }, { "epoch": 0.66, "grad_norm": 0.37046281064243497, "learning_rate": 5.594636494479615e-06, "loss": 0.2803, "step": 14277 }, { "epoch": 0.66, "grad_norm": 0.196416306954674, "learning_rate": 5.593300786420295e-06, "loss": 0.0691, "step": 14278 }, { "epoch": 0.66, "grad_norm": 0.3566435339245806, "learning_rate": 5.591965175921046e-06, "loss": 0.2995, "step": 14279 }, { "epoch": 0.66, "grad_norm": 0.41377142937939826, "learning_rate": 5.590629663011442e-06, "loss": 0.2578, "step": 14280 }, { "epoch": 0.66, "grad_norm": 0.6461180749131437, "learning_rate": 5.589294247721041e-06, "loss": 0.2715, "step": 14281 }, { "epoch": 0.66, "grad_norm": 0.7606455976898617, "learning_rate": 5.587958930079422e-06, "loss": 0.4087, "step": 14282 }, { "epoch": 0.66, "grad_norm": 0.42554643240121875, "learning_rate": 5.586623710116135e-06, "loss": 0.2667, "step": 14283 }, { "epoch": 0.66, "grad_norm": 0.28220121077519666, "learning_rate": 5.58528858786075e-06, "loss": 0.2611, "step": 14284 }, { "epoch": 0.66, "grad_norm": 0.31114296575308625, "learning_rate": 5.583953563342821e-06, "loss": 0.1332, "step": 14285 }, { "epoch": 0.66, "grad_norm": 0.41882636979098803, "learning_rate": 5.582618636591895e-06, "loss": 0.282, "step": 14286 }, { "epoch": 0.66, "grad_norm": 0.6236188079933123, "learning_rate": 5.581283807637543e-06, "loss": 0.3289, "step": 14287 }, { "epoch": 0.66, "grad_norm": 0.36050526327785865, "learning_rate": 5.579949076509306e-06, "loss": 0.2411, "step": 14288 }, { "epoch": 0.66, "grad_norm": 0.43022839088968157, "learning_rate": 5.578614443236738e-06, "loss": 0.2722, "step": 14289 }, { "epoch": 0.66, "grad_norm": 0.26105073750649477, "learning_rate": 5.577279907849383e-06, "loss": 0.1784, "step": 14290 }, { "epoch": 0.66, "grad_norm": 0.476573794962954, "learning_rate": 5.575945470376787e-06, "loss": 0.2514, "step": 14291 }, { "epoch": 0.66, "grad_norm": 0.2921973754557666, "learning_rate": 5.574611130848499e-06, "loss": 0.256, "step": 14292 }, { "epoch": 0.66, "grad_norm": 0.6795554400154031, "learning_rate": 5.57327688929405e-06, "loss": 0.4419, "step": 14293 }, { "epoch": 0.66, "grad_norm": 0.7248740068038048, "learning_rate": 5.5719427457429854e-06, "loss": 0.3172, "step": 14294 }, { "epoch": 0.66, "grad_norm": 0.3891463940957887, "learning_rate": 5.570608700224844e-06, "loss": 0.2706, "step": 14295 }, { "epoch": 0.66, "grad_norm": 0.3946280731500319, "learning_rate": 5.5692747527691534e-06, "loss": 0.3376, "step": 14296 }, { "epoch": 0.66, "grad_norm": 0.3169734056521815, "learning_rate": 5.567940903405453e-06, "loss": 0.2096, "step": 14297 }, { "epoch": 0.66, "grad_norm": 0.4451406243635951, "learning_rate": 5.566607152163261e-06, "loss": 0.2025, "step": 14298 }, { "epoch": 0.66, "grad_norm": 0.5521747921624851, "learning_rate": 5.565273499072124e-06, "loss": 0.3353, "step": 14299 }, { "epoch": 0.66, "grad_norm": 0.3717540889867584, "learning_rate": 5.563939944161551e-06, "loss": 0.284, "step": 14300 }, { "epoch": 0.66, "grad_norm": 0.5885112681336554, "learning_rate": 5.562606487461077e-06, "loss": 0.1342, "step": 14301 }, { "epoch": 0.66, "grad_norm": 0.3231017626637755, "learning_rate": 5.561273129000213e-06, "loss": 0.2444, "step": 14302 }, { "epoch": 0.66, "grad_norm": 0.3116586453642888, "learning_rate": 5.559939868808486e-06, "loss": 0.2636, "step": 14303 }, { "epoch": 0.66, "grad_norm": 0.3808587296916276, "learning_rate": 5.558606706915414e-06, "loss": 0.1936, "step": 14304 }, { "epoch": 0.66, "grad_norm": 0.495299200503403, "learning_rate": 5.557273643350505e-06, "loss": 0.3337, "step": 14305 }, { "epoch": 0.66, "grad_norm": 1.261816240602834, "learning_rate": 5.555940678143279e-06, "loss": 0.6306, "step": 14306 }, { "epoch": 0.66, "grad_norm": 0.3210262523557024, "learning_rate": 5.55460781132324e-06, "loss": 0.2122, "step": 14307 }, { "epoch": 0.66, "grad_norm": 0.3026122764285616, "learning_rate": 5.553275042919899e-06, "loss": 0.2489, "step": 14308 }, { "epoch": 0.66, "grad_norm": 0.5133763491120305, "learning_rate": 5.5519423729627666e-06, "loss": 0.2807, "step": 14309 }, { "epoch": 0.66, "grad_norm": 0.5352216354302248, "learning_rate": 5.550609801481339e-06, "loss": 0.3159, "step": 14310 }, { "epoch": 0.66, "grad_norm": 0.4105191469311303, "learning_rate": 5.5492773285051225e-06, "loss": 0.2444, "step": 14311 }, { "epoch": 0.66, "grad_norm": 0.3834621336207729, "learning_rate": 5.547944954063616e-06, "loss": 0.3261, "step": 14312 }, { "epoch": 0.66, "grad_norm": 0.43230706275357317, "learning_rate": 5.546612678186322e-06, "loss": 0.2683, "step": 14313 }, { "epoch": 0.66, "grad_norm": 0.5520800308974478, "learning_rate": 5.54528050090273e-06, "loss": 0.237, "step": 14314 }, { "epoch": 0.66, "grad_norm": 0.22738306296687652, "learning_rate": 5.543948422242331e-06, "loss": 0.233, "step": 14315 }, { "epoch": 0.66, "grad_norm": 0.41821931607414636, "learning_rate": 5.542616442234618e-06, "loss": 0.2761, "step": 14316 }, { "epoch": 0.66, "grad_norm": 0.46820250400131486, "learning_rate": 5.541284560909081e-06, "loss": 0.2728, "step": 14317 }, { "epoch": 0.66, "grad_norm": 1.1472145734195835, "learning_rate": 5.539952778295212e-06, "loss": 0.6372, "step": 14318 }, { "epoch": 0.66, "grad_norm": 0.4472733730394902, "learning_rate": 5.538621094422485e-06, "loss": 0.3172, "step": 14319 }, { "epoch": 0.66, "grad_norm": 0.2741275945962341, "learning_rate": 5.537289509320387e-06, "loss": 0.2116, "step": 14320 }, { "epoch": 0.66, "grad_norm": 0.441946978809499, "learning_rate": 5.535958023018402e-06, "loss": 0.2605, "step": 14321 }, { "epoch": 0.66, "grad_norm": 0.3422502400983826, "learning_rate": 5.534626635546e-06, "loss": 0.1989, "step": 14322 }, { "epoch": 0.66, "grad_norm": 0.3008213191002857, "learning_rate": 5.533295346932664e-06, "loss": 0.2738, "step": 14323 }, { "epoch": 0.66, "grad_norm": 0.47999917826137445, "learning_rate": 5.531964157207861e-06, "loss": 0.2768, "step": 14324 }, { "epoch": 0.66, "grad_norm": 0.6523211684917575, "learning_rate": 5.530633066401063e-06, "loss": 0.3602, "step": 14325 }, { "epoch": 0.66, "grad_norm": 0.3680564045403798, "learning_rate": 5.529302074541748e-06, "loss": 0.299, "step": 14326 }, { "epoch": 0.66, "grad_norm": 0.3410408388934398, "learning_rate": 5.527971181659371e-06, "loss": 0.2598, "step": 14327 }, { "epoch": 0.66, "grad_norm": 0.3743246906818137, "learning_rate": 5.526640387783402e-06, "loss": 0.2245, "step": 14328 }, { "epoch": 0.66, "grad_norm": 0.4236182190119762, "learning_rate": 5.525309692943303e-06, "loss": 0.3064, "step": 14329 }, { "epoch": 0.66, "grad_norm": 0.36609971659370943, "learning_rate": 5.523979097168539e-06, "loss": 0.1605, "step": 14330 }, { "epoch": 0.66, "grad_norm": 0.3140950104501291, "learning_rate": 5.52264860048856e-06, "loss": 0.2662, "step": 14331 }, { "epoch": 0.66, "grad_norm": 0.4136570380239444, "learning_rate": 5.521318202932829e-06, "loss": 0.2771, "step": 14332 }, { "epoch": 0.66, "grad_norm": 0.53866608449905, "learning_rate": 5.519987904530792e-06, "loss": 0.3006, "step": 14333 }, { "epoch": 0.66, "grad_norm": 0.4065093063295819, "learning_rate": 5.518657705311905e-06, "loss": 0.2942, "step": 14334 }, { "epoch": 0.66, "grad_norm": 0.3888552951178163, "learning_rate": 5.517327605305623e-06, "loss": 0.3046, "step": 14335 }, { "epoch": 0.66, "grad_norm": 0.2501474266456698, "learning_rate": 5.515997604541381e-06, "loss": 0.2011, "step": 14336 }, { "epoch": 0.66, "grad_norm": 0.7760044815122245, "learning_rate": 5.514667703048632e-06, "loss": 0.2028, "step": 14337 }, { "epoch": 0.66, "grad_norm": 0.36496197443764716, "learning_rate": 5.51333790085682e-06, "loss": 0.2816, "step": 14338 }, { "epoch": 0.66, "grad_norm": 0.4066806178599458, "learning_rate": 5.512008197995379e-06, "loss": 0.3044, "step": 14339 }, { "epoch": 0.66, "grad_norm": 0.48849882851465426, "learning_rate": 5.510678594493755e-06, "loss": 0.139, "step": 14340 }, { "epoch": 0.66, "grad_norm": 0.37560524894971725, "learning_rate": 5.509349090381371e-06, "loss": 0.2794, "step": 14341 }, { "epoch": 0.66, "grad_norm": 0.3325329110254885, "learning_rate": 5.5080196856876796e-06, "loss": 0.22, "step": 14342 }, { "epoch": 0.66, "grad_norm": 0.31908110212494073, "learning_rate": 5.5066903804421025e-06, "loss": 0.2354, "step": 14343 }, { "epoch": 0.66, "grad_norm": 0.36365956549982315, "learning_rate": 5.505361174674065e-06, "loss": 0.2826, "step": 14344 }, { "epoch": 0.66, "grad_norm": 0.9942855327110379, "learning_rate": 5.504032068413003e-06, "loss": 0.4414, "step": 14345 }, { "epoch": 0.66, "grad_norm": 0.5608271277029652, "learning_rate": 5.50270306168833e-06, "loss": 0.1874, "step": 14346 }, { "epoch": 0.66, "grad_norm": 0.33407072038868196, "learning_rate": 5.501374154529487e-06, "loss": 0.2796, "step": 14347 }, { "epoch": 0.66, "grad_norm": 0.3640256695011617, "learning_rate": 5.5000453469658766e-06, "loss": 0.2657, "step": 14348 }, { "epoch": 0.66, "grad_norm": 0.5190160215030387, "learning_rate": 5.498716639026931e-06, "loss": 0.2438, "step": 14349 }, { "epoch": 0.66, "grad_norm": 0.386609989311832, "learning_rate": 5.497388030742057e-06, "loss": 0.2247, "step": 14350 }, { "epoch": 0.66, "grad_norm": 0.37192685195966674, "learning_rate": 5.496059522140671e-06, "loss": 0.292, "step": 14351 }, { "epoch": 0.66, "grad_norm": 0.7898882143323293, "learning_rate": 5.494731113252192e-06, "loss": 0.4606, "step": 14352 }, { "epoch": 0.66, "grad_norm": 0.3135239208686496, "learning_rate": 5.493402804106018e-06, "loss": 0.2068, "step": 14353 }, { "epoch": 0.66, "grad_norm": 0.32408218046667736, "learning_rate": 5.492074594731565e-06, "loss": 0.1991, "step": 14354 }, { "epoch": 0.66, "grad_norm": 0.3803391427905632, "learning_rate": 5.490746485158237e-06, "loss": 0.3036, "step": 14355 }, { "epoch": 0.66, "grad_norm": 0.3528469731216006, "learning_rate": 5.489418475415434e-06, "loss": 0.2276, "step": 14356 }, { "epoch": 0.66, "grad_norm": 1.2589976042055355, "learning_rate": 5.48809056553256e-06, "loss": 0.4672, "step": 14357 }, { "epoch": 0.66, "grad_norm": 1.4107037412336747, "learning_rate": 5.486762755539005e-06, "loss": 0.8533, "step": 14358 }, { "epoch": 0.66, "grad_norm": 0.2640953104311103, "learning_rate": 5.4854350454641825e-06, "loss": 0.2157, "step": 14359 }, { "epoch": 0.66, "grad_norm": 0.8085484146032684, "learning_rate": 5.484107435337475e-06, "loss": 0.483, "step": 14360 }, { "epoch": 0.66, "grad_norm": 0.4416780953741353, "learning_rate": 5.482779925188273e-06, "loss": 0.2674, "step": 14361 }, { "epoch": 0.66, "grad_norm": 0.2650017487873503, "learning_rate": 5.481452515045974e-06, "loss": 0.2202, "step": 14362 }, { "epoch": 0.66, "grad_norm": 0.34481439078217285, "learning_rate": 5.480125204939952e-06, "loss": 0.2314, "step": 14363 }, { "epoch": 0.66, "grad_norm": 1.2231596893490546, "learning_rate": 5.478797994899612e-06, "loss": 0.7127, "step": 14364 }, { "epoch": 0.66, "grad_norm": 0.3719375988781338, "learning_rate": 5.477470884954321e-06, "loss": 0.2886, "step": 14365 }, { "epoch": 0.66, "grad_norm": 1.0233224010643873, "learning_rate": 5.47614387513347e-06, "loss": 0.2958, "step": 14366 }, { "epoch": 0.66, "grad_norm": 0.3757674457037407, "learning_rate": 5.47481696546643e-06, "loss": 0.3035, "step": 14367 }, { "epoch": 0.66, "grad_norm": 0.3762208577694757, "learning_rate": 5.473490155982581e-06, "loss": 0.2925, "step": 14368 }, { "epoch": 0.66, "grad_norm": 0.33938808007686827, "learning_rate": 5.472163446711301e-06, "loss": 0.1233, "step": 14369 }, { "epoch": 0.66, "grad_norm": 0.46518643860864695, "learning_rate": 5.470836837681955e-06, "loss": 0.3492, "step": 14370 }, { "epoch": 0.66, "grad_norm": 0.37648370199421266, "learning_rate": 5.469510328923915e-06, "loss": 0.2799, "step": 14371 }, { "epoch": 0.66, "grad_norm": 0.5581998802685205, "learning_rate": 5.468183920466554e-06, "loss": 0.2899, "step": 14372 }, { "epoch": 0.66, "grad_norm": 0.8899053321042577, "learning_rate": 5.466857612339229e-06, "loss": 0.4631, "step": 14373 }, { "epoch": 0.66, "grad_norm": 0.27133659544050065, "learning_rate": 5.4655314045713115e-06, "loss": 0.2017, "step": 14374 }, { "epoch": 0.66, "grad_norm": 0.331607300263977, "learning_rate": 5.464205297192155e-06, "loss": 0.2512, "step": 14375 }, { "epoch": 0.66, "grad_norm": 1.0751161400848894, "learning_rate": 5.4628792902311204e-06, "loss": 0.4687, "step": 14376 }, { "epoch": 0.66, "grad_norm": 0.37034504710116734, "learning_rate": 5.461553383717566e-06, "loss": 0.2841, "step": 14377 }, { "epoch": 0.66, "grad_norm": 0.884971947518883, "learning_rate": 5.46022757768085e-06, "loss": 0.3778, "step": 14378 }, { "epoch": 0.66, "grad_norm": 0.3632622280557171, "learning_rate": 5.45890187215032e-06, "loss": 0.2552, "step": 14379 }, { "epoch": 0.66, "grad_norm": 0.3535550387344249, "learning_rate": 5.457576267155317e-06, "loss": 0.2652, "step": 14380 }, { "epoch": 0.66, "grad_norm": 0.3348489224699886, "learning_rate": 5.4562507627252055e-06, "loss": 0.1684, "step": 14381 }, { "epoch": 0.66, "grad_norm": 0.36496879557908773, "learning_rate": 5.4549253588893185e-06, "loss": 0.2667, "step": 14382 }, { "epoch": 0.66, "grad_norm": 0.3387178351859606, "learning_rate": 5.4536000556770085e-06, "loss": 0.2746, "step": 14383 }, { "epoch": 0.66, "grad_norm": 0.9065033237617657, "learning_rate": 5.452274853117606e-06, "loss": 0.3664, "step": 14384 }, { "epoch": 0.66, "grad_norm": 0.44045236396572185, "learning_rate": 5.450949751240456e-06, "loss": 0.193, "step": 14385 }, { "epoch": 0.66, "grad_norm": 0.39150730856634075, "learning_rate": 5.449624750074898e-06, "loss": 0.2301, "step": 14386 }, { "epoch": 0.66, "grad_norm": 0.2653637501881538, "learning_rate": 5.4482998496502585e-06, "loss": 0.2546, "step": 14387 }, { "epoch": 0.66, "grad_norm": 0.8552791134012457, "learning_rate": 5.446975049995873e-06, "loss": 0.5341, "step": 14388 }, { "epoch": 0.66, "grad_norm": 0.312245914211541, "learning_rate": 5.445650351141076e-06, "loss": 0.1868, "step": 14389 }, { "epoch": 0.66, "grad_norm": 0.5971458131692577, "learning_rate": 5.444325753115186e-06, "loss": 0.3577, "step": 14390 }, { "epoch": 0.66, "grad_norm": 0.6550121440756231, "learning_rate": 5.443001255947538e-06, "loss": 0.3292, "step": 14391 }, { "epoch": 0.66, "grad_norm": 0.3114984959470451, "learning_rate": 5.441676859667445e-06, "loss": 0.201, "step": 14392 }, { "epoch": 0.66, "grad_norm": 0.2694872293758464, "learning_rate": 5.440352564304235e-06, "loss": 0.1715, "step": 14393 }, { "epoch": 0.66, "grad_norm": 0.5136769244123666, "learning_rate": 5.439028369887223e-06, "loss": 0.3585, "step": 14394 }, { "epoch": 0.66, "grad_norm": 0.30259757898238504, "learning_rate": 5.43770427644573e-06, "loss": 0.2241, "step": 14395 }, { "epoch": 0.66, "grad_norm": 0.8619869832343193, "learning_rate": 5.436380284009064e-06, "loss": 0.4856, "step": 14396 }, { "epoch": 0.66, "grad_norm": 1.3114937677966916, "learning_rate": 5.43505639260654e-06, "loss": 0.5828, "step": 14397 }, { "epoch": 0.66, "grad_norm": 0.35811882016411706, "learning_rate": 5.433732602267472e-06, "loss": 0.2577, "step": 14398 }, { "epoch": 0.66, "grad_norm": 0.2939232054874963, "learning_rate": 5.432408913021159e-06, "loss": 0.2068, "step": 14399 }, { "epoch": 0.66, "grad_norm": 0.4261501978524303, "learning_rate": 5.431085324896914e-06, "loss": 0.3086, "step": 14400 }, { "epoch": 0.66, "grad_norm": 0.32856517158157045, "learning_rate": 5.429761837924034e-06, "loss": 0.2822, "step": 14401 }, { "epoch": 0.66, "grad_norm": 0.9080750859580784, "learning_rate": 5.428438452131821e-06, "loss": 0.3451, "step": 14402 }, { "epoch": 0.66, "grad_norm": 0.3668578061367511, "learning_rate": 5.427115167549577e-06, "loss": 0.3184, "step": 14403 }, { "epoch": 0.66, "grad_norm": 0.6535115190380515, "learning_rate": 5.425791984206594e-06, "loss": 0.3355, "step": 14404 }, { "epoch": 0.66, "grad_norm": 0.2352386557275581, "learning_rate": 5.424468902132171e-06, "loss": 0.1518, "step": 14405 }, { "epoch": 0.66, "grad_norm": 0.41175504827423726, "learning_rate": 5.4231459213555885e-06, "loss": 0.2909, "step": 14406 }, { "epoch": 0.66, "grad_norm": 0.5536814664288235, "learning_rate": 5.421823041906151e-06, "loss": 0.3068, "step": 14407 }, { "epoch": 0.66, "grad_norm": 0.404480077497838, "learning_rate": 5.420500263813141e-06, "loss": 0.2555, "step": 14408 }, { "epoch": 0.66, "grad_norm": 1.2748727300898761, "learning_rate": 5.419177587105836e-06, "loss": 0.514, "step": 14409 }, { "epoch": 0.66, "grad_norm": 0.43927943465204355, "learning_rate": 5.417855011813524e-06, "loss": 0.2956, "step": 14410 }, { "epoch": 0.66, "grad_norm": 0.26651081775619356, "learning_rate": 5.416532537965487e-06, "loss": 0.224, "step": 14411 }, { "epoch": 0.66, "grad_norm": 0.627345548578068, "learning_rate": 5.415210165591005e-06, "loss": 0.305, "step": 14412 }, { "epoch": 0.66, "grad_norm": 0.45388924677966047, "learning_rate": 5.413887894719347e-06, "loss": 0.3107, "step": 14413 }, { "epoch": 0.66, "grad_norm": 0.29437828934787064, "learning_rate": 5.412565725379792e-06, "loss": 0.24, "step": 14414 }, { "epoch": 0.66, "grad_norm": 0.5397002300790241, "learning_rate": 5.411243657601612e-06, "loss": 0.2433, "step": 14415 }, { "epoch": 0.66, "grad_norm": 0.465348874140996, "learning_rate": 5.4099216914140726e-06, "loss": 0.2553, "step": 14416 }, { "epoch": 0.66, "grad_norm": 0.3285273820616766, "learning_rate": 5.408599826846448e-06, "loss": 0.1946, "step": 14417 }, { "epoch": 0.66, "grad_norm": 0.39442925990231276, "learning_rate": 5.407278063927992e-06, "loss": 0.2642, "step": 14418 }, { "epoch": 0.66, "grad_norm": 0.33035773943610747, "learning_rate": 5.405956402687974e-06, "loss": 0.2458, "step": 14419 }, { "epoch": 0.66, "grad_norm": 1.4305254747842355, "learning_rate": 5.404634843155657e-06, "loss": 0.7844, "step": 14420 }, { "epoch": 0.66, "grad_norm": 0.4699510254831701, "learning_rate": 5.4033133853602916e-06, "loss": 0.1365, "step": 14421 }, { "epoch": 0.66, "grad_norm": 0.38911122580607826, "learning_rate": 5.401992029331142e-06, "loss": 0.2823, "step": 14422 }, { "epoch": 0.66, "grad_norm": 0.3505614432978457, "learning_rate": 5.400670775097449e-06, "loss": 0.2846, "step": 14423 }, { "epoch": 0.66, "grad_norm": 0.6976714553114252, "learning_rate": 5.399349622688479e-06, "loss": 0.3654, "step": 14424 }, { "epoch": 0.66, "grad_norm": 0.3441830327473105, "learning_rate": 5.398028572133476e-06, "loss": 0.1555, "step": 14425 }, { "epoch": 0.66, "grad_norm": 0.2687335254148413, "learning_rate": 5.39670762346168e-06, "loss": 0.2417, "step": 14426 }, { "epoch": 0.66, "grad_norm": 0.3664692348103668, "learning_rate": 5.395386776702341e-06, "loss": 0.2197, "step": 14427 }, { "epoch": 0.66, "grad_norm": 0.4769917503211647, "learning_rate": 5.3940660318847e-06, "loss": 0.1694, "step": 14428 }, { "epoch": 0.66, "grad_norm": 0.47721874999718245, "learning_rate": 5.392745389038003e-06, "loss": 0.3108, "step": 14429 }, { "epoch": 0.66, "grad_norm": 0.527427882970769, "learning_rate": 5.391424848191478e-06, "loss": 0.3072, "step": 14430 }, { "epoch": 0.66, "grad_norm": 0.31976568891665114, "learning_rate": 5.390104409374364e-06, "loss": 0.2269, "step": 14431 }, { "epoch": 0.66, "grad_norm": 0.368047782843205, "learning_rate": 5.3887840726159e-06, "loss": 0.2687, "step": 14432 }, { "epoch": 0.66, "grad_norm": 0.5424350217804997, "learning_rate": 5.387463837945308e-06, "loss": 0.2768, "step": 14433 }, { "epoch": 0.66, "grad_norm": 0.2690367076208699, "learning_rate": 5.386143705391826e-06, "loss": 0.2132, "step": 14434 }, { "epoch": 0.66, "grad_norm": 0.5433824635285751, "learning_rate": 5.384823674984671e-06, "loss": 0.3446, "step": 14435 }, { "epoch": 0.66, "grad_norm": 0.7454809287827713, "learning_rate": 5.383503746753072e-06, "loss": 0.416, "step": 14436 }, { "epoch": 0.66, "grad_norm": 0.5757490948557092, "learning_rate": 5.382183920726254e-06, "loss": 0.3496, "step": 14437 }, { "epoch": 0.66, "grad_norm": 0.3128011414640952, "learning_rate": 5.38086419693343e-06, "loss": 0.2422, "step": 14438 }, { "epoch": 0.66, "grad_norm": 0.2836686810207828, "learning_rate": 5.379544575403823e-06, "loss": 0.1912, "step": 14439 }, { "epoch": 0.66, "grad_norm": 0.5558319282259456, "learning_rate": 5.378225056166639e-06, "loss": 0.3545, "step": 14440 }, { "epoch": 0.66, "grad_norm": 0.40245630060166104, "learning_rate": 5.376905639251106e-06, "loss": 0.2542, "step": 14441 }, { "epoch": 0.66, "grad_norm": 0.3877637447004567, "learning_rate": 5.375586324686423e-06, "loss": 0.3077, "step": 14442 }, { "epoch": 0.66, "grad_norm": 0.8551002646958938, "learning_rate": 5.374267112501806e-06, "loss": 0.4739, "step": 14443 }, { "epoch": 0.66, "grad_norm": 0.32714889434704203, "learning_rate": 5.37294800272645e-06, "loss": 0.2214, "step": 14444 }, { "epoch": 0.66, "grad_norm": 0.3305863395603335, "learning_rate": 5.371628995389568e-06, "loss": 0.1931, "step": 14445 }, { "epoch": 0.66, "grad_norm": 0.3380465840473011, "learning_rate": 5.370310090520362e-06, "loss": 0.2837, "step": 14446 }, { "epoch": 0.66, "grad_norm": 0.3701302079731544, "learning_rate": 5.3689912881480244e-06, "loss": 0.2188, "step": 14447 }, { "epoch": 0.66, "grad_norm": 1.4614701293298256, "learning_rate": 5.3676725883017576e-06, "loss": 0.7856, "step": 14448 }, { "epoch": 0.66, "grad_norm": 0.7725963236210721, "learning_rate": 5.366353991010758e-06, "loss": 0.4335, "step": 14449 }, { "epoch": 0.66, "grad_norm": 0.28972231802789833, "learning_rate": 5.365035496304211e-06, "loss": 0.2729, "step": 14450 }, { "epoch": 0.66, "grad_norm": 0.28137335268548186, "learning_rate": 5.363717104211315e-06, "loss": 0.121, "step": 14451 }, { "epoch": 0.66, "grad_norm": 0.5169297125413294, "learning_rate": 5.3623988147612495e-06, "loss": 0.3273, "step": 14452 }, { "epoch": 0.66, "grad_norm": 0.4311696837405616, "learning_rate": 5.361080627983205e-06, "loss": 0.3157, "step": 14453 }, { "epoch": 0.66, "grad_norm": 0.3319774606502584, "learning_rate": 5.3597625439063685e-06, "loss": 0.2323, "step": 14454 }, { "epoch": 0.66, "grad_norm": 0.5716281914623899, "learning_rate": 5.358444562559912e-06, "loss": 0.3522, "step": 14455 }, { "epoch": 0.66, "grad_norm": 0.42296957884115344, "learning_rate": 5.357126683973024e-06, "loss": 0.3232, "step": 14456 }, { "epoch": 0.66, "grad_norm": 0.6845174987717982, "learning_rate": 5.355808908174868e-06, "loss": 0.3203, "step": 14457 }, { "epoch": 0.66, "grad_norm": 0.305091352006226, "learning_rate": 5.354491235194635e-06, "loss": 0.2421, "step": 14458 }, { "epoch": 0.66, "grad_norm": 0.28851302489540775, "learning_rate": 5.353173665061485e-06, "loss": 0.2381, "step": 14459 }, { "epoch": 0.66, "grad_norm": 1.113074031237448, "learning_rate": 5.351856197804595e-06, "loss": 0.385, "step": 14460 }, { "epoch": 0.66, "grad_norm": 0.9076056142393685, "learning_rate": 5.350538833453125e-06, "loss": 0.476, "step": 14461 }, { "epoch": 0.66, "grad_norm": 0.2719306845380426, "learning_rate": 5.349221572036244e-06, "loss": 0.2645, "step": 14462 }, { "epoch": 0.66, "grad_norm": 0.6808419677675623, "learning_rate": 5.3479044135831185e-06, "loss": 0.3856, "step": 14463 }, { "epoch": 0.66, "grad_norm": 0.2405823195571114, "learning_rate": 5.346587358122901e-06, "loss": 0.0721, "step": 14464 }, { "epoch": 0.66, "grad_norm": 0.2775846117014136, "learning_rate": 5.34527040568476e-06, "loss": 0.2347, "step": 14465 }, { "epoch": 0.66, "grad_norm": 0.38389553846865637, "learning_rate": 5.343953556297841e-06, "loss": 0.3036, "step": 14466 }, { "epoch": 0.66, "grad_norm": 0.4198451374664271, "learning_rate": 5.3426368099913025e-06, "loss": 0.2907, "step": 14467 }, { "epoch": 0.66, "grad_norm": 0.34108744701743593, "learning_rate": 5.3413201667943014e-06, "loss": 0.2631, "step": 14468 }, { "epoch": 0.66, "grad_norm": 0.7997617181162744, "learning_rate": 5.340003626735977e-06, "loss": 0.3685, "step": 14469 }, { "epoch": 0.66, "grad_norm": 0.26168178214980903, "learning_rate": 5.33868718984548e-06, "loss": 0.1844, "step": 14470 }, { "epoch": 0.66, "grad_norm": 0.3791899429974558, "learning_rate": 5.337370856151958e-06, "loss": 0.2855, "step": 14471 }, { "epoch": 0.66, "grad_norm": 0.4708361080623944, "learning_rate": 5.336054625684552e-06, "loss": 0.2876, "step": 14472 }, { "epoch": 0.66, "grad_norm": 0.3888497267244696, "learning_rate": 5.334738498472405e-06, "loss": 0.2202, "step": 14473 }, { "epoch": 0.66, "grad_norm": 0.31617708457312554, "learning_rate": 5.333422474544641e-06, "loss": 0.2766, "step": 14474 }, { "epoch": 0.66, "grad_norm": 0.9056347354067686, "learning_rate": 5.332106553930414e-06, "loss": 0.387, "step": 14475 }, { "epoch": 0.67, "grad_norm": 0.41903550969979786, "learning_rate": 5.330790736658846e-06, "loss": 0.2419, "step": 14476 }, { "epoch": 0.67, "grad_norm": 0.25613455556744275, "learning_rate": 5.329475022759074e-06, "loss": 0.1646, "step": 14477 }, { "epoch": 0.67, "grad_norm": 0.39574753593715833, "learning_rate": 5.32815941226022e-06, "loss": 0.3361, "step": 14478 }, { "epoch": 0.67, "grad_norm": 0.9777109415088288, "learning_rate": 5.326843905191413e-06, "loss": 0.5254, "step": 14479 }, { "epoch": 0.67, "grad_norm": 0.4025870360630933, "learning_rate": 5.325528501581783e-06, "loss": 0.2175, "step": 14480 }, { "epoch": 0.67, "grad_norm": 0.5087201400407269, "learning_rate": 5.324213201460442e-06, "loss": 0.3129, "step": 14481 }, { "epoch": 0.67, "grad_norm": 0.4982503374275669, "learning_rate": 5.322898004856518e-06, "loss": 0.3086, "step": 14482 }, { "epoch": 0.67, "grad_norm": 0.21573001542193646, "learning_rate": 5.32158291179912e-06, "loss": 0.1336, "step": 14483 }, { "epoch": 0.67, "grad_norm": 1.4875484047752525, "learning_rate": 5.320267922317368e-06, "loss": 0.8108, "step": 14484 }, { "epoch": 0.67, "grad_norm": 0.5569117814769349, "learning_rate": 5.318953036440377e-06, "loss": 0.368, "step": 14485 }, { "epoch": 0.67, "grad_norm": 0.2804371677130325, "learning_rate": 5.317638254197252e-06, "loss": 0.2199, "step": 14486 }, { "epoch": 0.67, "grad_norm": 1.0161243814057206, "learning_rate": 5.3163235756171015e-06, "loss": 0.4476, "step": 14487 }, { "epoch": 0.67, "grad_norm": 0.5548137285529009, "learning_rate": 5.315009000729032e-06, "loss": 0.2636, "step": 14488 }, { "epoch": 0.67, "grad_norm": 0.35132341809486545, "learning_rate": 5.313694529562154e-06, "loss": 0.2707, "step": 14489 }, { "epoch": 0.67, "grad_norm": 0.27502581392636205, "learning_rate": 5.312380162145561e-06, "loss": 0.2055, "step": 14490 }, { "epoch": 0.67, "grad_norm": 0.7327273411749387, "learning_rate": 5.311065898508346e-06, "loss": 0.3972, "step": 14491 }, { "epoch": 0.67, "grad_norm": 0.42404795267894624, "learning_rate": 5.309751738679621e-06, "loss": 0.2675, "step": 14492 }, { "epoch": 0.67, "grad_norm": 0.37715546698948915, "learning_rate": 5.308437682688467e-06, "loss": 0.2543, "step": 14493 }, { "epoch": 0.67, "grad_norm": 0.4790058220320238, "learning_rate": 5.307123730563984e-06, "loss": 0.3344, "step": 14494 }, { "epoch": 0.67, "grad_norm": 0.3941499575352521, "learning_rate": 5.305809882335256e-06, "loss": 0.2606, "step": 14495 }, { "epoch": 0.67, "grad_norm": 0.3878626159935822, "learning_rate": 5.304496138031373e-06, "loss": 0.2077, "step": 14496 }, { "epoch": 0.67, "grad_norm": 0.6185891648275387, "learning_rate": 5.303182497681423e-06, "loss": 0.3228, "step": 14497 }, { "epoch": 0.67, "grad_norm": 0.30658686780698813, "learning_rate": 5.3018689613144825e-06, "loss": 0.2467, "step": 14498 }, { "epoch": 0.67, "grad_norm": 0.39518966408365946, "learning_rate": 5.3005555289596385e-06, "loss": 0.157, "step": 14499 }, { "epoch": 0.67, "grad_norm": 1.3428400026954694, "learning_rate": 5.299242200645959e-06, "loss": 0.5602, "step": 14500 }, { "epoch": 0.67, "grad_norm": 0.2521340364324956, "learning_rate": 5.2979289764025336e-06, "loss": 0.1875, "step": 14501 }, { "epoch": 0.67, "grad_norm": 0.34141494819348706, "learning_rate": 5.296615856258428e-06, "loss": 0.2925, "step": 14502 }, { "epoch": 0.67, "grad_norm": 0.9647587631860546, "learning_rate": 5.295302840242711e-06, "loss": 0.3035, "step": 14503 }, { "epoch": 0.67, "grad_norm": 0.34425263885839713, "learning_rate": 5.293989928384454e-06, "loss": 0.2665, "step": 14504 }, { "epoch": 0.67, "grad_norm": 0.5078619915173804, "learning_rate": 5.292677120712726e-06, "loss": 0.2804, "step": 14505 }, { "epoch": 0.67, "grad_norm": 0.37020006467979166, "learning_rate": 5.2913644172565915e-06, "loss": 0.233, "step": 14506 }, { "epoch": 0.67, "grad_norm": 0.3684262616450512, "learning_rate": 5.290051818045108e-06, "loss": 0.2699, "step": 14507 }, { "epoch": 0.67, "grad_norm": 0.82036337110452, "learning_rate": 5.288739323107337e-06, "loss": 0.4121, "step": 14508 }, { "epoch": 0.67, "grad_norm": 0.37561320921871166, "learning_rate": 5.2874269324723406e-06, "loss": 0.2739, "step": 14509 }, { "epoch": 0.67, "grad_norm": 0.40943853202100655, "learning_rate": 5.286114646169166e-06, "loss": 0.2491, "step": 14510 }, { "epoch": 0.67, "grad_norm": 0.2966049707262867, "learning_rate": 5.284802464226874e-06, "loss": 0.1922, "step": 14511 }, { "epoch": 0.67, "grad_norm": 1.274682575611519, "learning_rate": 5.283490386674507e-06, "loss": 0.2488, "step": 14512 }, { "epoch": 0.67, "grad_norm": 0.4109310191767567, "learning_rate": 5.282178413541117e-06, "loss": 0.2427, "step": 14513 }, { "epoch": 0.67, "grad_norm": 0.3613707744784703, "learning_rate": 5.280866544855753e-06, "loss": 0.3063, "step": 14514 }, { "epoch": 0.67, "grad_norm": 0.9062320464961248, "learning_rate": 5.279554780647451e-06, "loss": 0.4564, "step": 14515 }, { "epoch": 0.67, "grad_norm": 0.31368508601698064, "learning_rate": 5.278243120945262e-06, "loss": 0.214, "step": 14516 }, { "epoch": 0.67, "grad_norm": 0.2505901747215737, "learning_rate": 5.276931565778212e-06, "loss": 0.2027, "step": 14517 }, { "epoch": 0.67, "grad_norm": 1.5737998800004696, "learning_rate": 5.27562011517535e-06, "loss": 0.709, "step": 14518 }, { "epoch": 0.67, "grad_norm": 0.3555593864132874, "learning_rate": 5.274308769165708e-06, "loss": 0.2039, "step": 14519 }, { "epoch": 0.67, "grad_norm": 0.8154670788423888, "learning_rate": 5.272997527778311e-06, "loss": 0.398, "step": 14520 }, { "epoch": 0.67, "grad_norm": 0.39026118425267137, "learning_rate": 5.2716863910421926e-06, "loss": 0.2943, "step": 14521 }, { "epoch": 0.67, "grad_norm": 0.32156329941376555, "learning_rate": 5.270375358986379e-06, "loss": 0.2022, "step": 14522 }, { "epoch": 0.67, "grad_norm": 0.3210097030999681, "learning_rate": 5.269064431639901e-06, "loss": 0.1968, "step": 14523 }, { "epoch": 0.67, "grad_norm": 0.9274995784342149, "learning_rate": 5.2677536090317726e-06, "loss": 0.4433, "step": 14524 }, { "epoch": 0.67, "grad_norm": 0.3288621853541652, "learning_rate": 5.266442891191024e-06, "loss": 0.2715, "step": 14525 }, { "epoch": 0.67, "grad_norm": 0.3662531061308901, "learning_rate": 5.2651322781466606e-06, "loss": 0.2563, "step": 14526 }, { "epoch": 0.67, "grad_norm": 1.0358854833762872, "learning_rate": 5.263821769927707e-06, "loss": 0.6452, "step": 14527 }, { "epoch": 0.67, "grad_norm": 0.6295447392154762, "learning_rate": 5.262511366563179e-06, "loss": 0.3605, "step": 14528 }, { "epoch": 0.67, "grad_norm": 0.2417549454054387, "learning_rate": 5.261201068082078e-06, "loss": 0.2088, "step": 14529 }, { "epoch": 0.67, "grad_norm": 0.46958482547682684, "learning_rate": 5.259890874513418e-06, "loss": 0.2323, "step": 14530 }, { "epoch": 0.67, "grad_norm": 0.6570349794787905, "learning_rate": 5.258580785886212e-06, "loss": 0.3484, "step": 14531 }, { "epoch": 0.67, "grad_norm": 0.39188773039830543, "learning_rate": 5.2572708022294504e-06, "loss": 0.2534, "step": 14532 }, { "epoch": 0.67, "grad_norm": 0.3870212383853106, "learning_rate": 5.255960923572148e-06, "loss": 0.3395, "step": 14533 }, { "epoch": 0.67, "grad_norm": 0.4008293161846491, "learning_rate": 5.2546511499432885e-06, "loss": 0.2869, "step": 14534 }, { "epoch": 0.67, "grad_norm": 0.24846248800658954, "learning_rate": 5.253341481371888e-06, "loss": 0.1418, "step": 14535 }, { "epoch": 0.67, "grad_norm": 0.8344250754286472, "learning_rate": 5.252031917886929e-06, "loss": 0.4467, "step": 14536 }, { "epoch": 0.67, "grad_norm": 0.3129821689831803, "learning_rate": 5.25072245951741e-06, "loss": 0.2674, "step": 14537 }, { "epoch": 0.67, "grad_norm": 0.3787801011056638, "learning_rate": 5.249413106292316e-06, "loss": 0.3206, "step": 14538 }, { "epoch": 0.67, "grad_norm": 1.1539678575075514, "learning_rate": 5.248103858240636e-06, "loss": 0.4763, "step": 14539 }, { "epoch": 0.67, "grad_norm": 0.39929259462619465, "learning_rate": 5.246794715391361e-06, "loss": 0.2769, "step": 14540 }, { "epoch": 0.67, "grad_norm": 0.4039984769911849, "learning_rate": 5.245485677773465e-06, "loss": 0.2942, "step": 14541 }, { "epoch": 0.67, "grad_norm": 0.316524545467977, "learning_rate": 5.2441767454159384e-06, "loss": 0.1883, "step": 14542 }, { "epoch": 0.67, "grad_norm": 0.4009557959596617, "learning_rate": 5.24286791834775e-06, "loss": 0.3091, "step": 14543 }, { "epoch": 0.67, "grad_norm": 0.6263656068633076, "learning_rate": 5.241559196597882e-06, "loss": 0.3374, "step": 14544 }, { "epoch": 0.67, "grad_norm": 0.32813763935887025, "learning_rate": 5.240250580195311e-06, "loss": 0.2579, "step": 14545 }, { "epoch": 0.67, "grad_norm": 0.7308668140176084, "learning_rate": 5.238942069169e-06, "loss": 0.3127, "step": 14546 }, { "epoch": 0.67, "grad_norm": 0.4260901109746353, "learning_rate": 5.237633663547923e-06, "loss": 0.311, "step": 14547 }, { "epoch": 0.67, "grad_norm": 0.3713509311804359, "learning_rate": 5.236325363361051e-06, "loss": 0.237, "step": 14548 }, { "epoch": 0.67, "grad_norm": 0.4083938079639334, "learning_rate": 5.23501716863734e-06, "loss": 0.2869, "step": 14549 }, { "epoch": 0.67, "grad_norm": 0.29802925692894433, "learning_rate": 5.23370907940576e-06, "loss": 0.2354, "step": 14550 }, { "epoch": 0.67, "grad_norm": 1.4196772923541505, "learning_rate": 5.232401095695259e-06, "loss": 0.8012, "step": 14551 }, { "epoch": 0.67, "grad_norm": 0.3332856859102056, "learning_rate": 5.231093217534812e-06, "loss": 0.1123, "step": 14552 }, { "epoch": 0.67, "grad_norm": 0.28690745049150934, "learning_rate": 5.229785444953361e-06, "loss": 0.2608, "step": 14553 }, { "epoch": 0.67, "grad_norm": 0.62958333930175, "learning_rate": 5.228477777979865e-06, "loss": 0.3793, "step": 14554 }, { "epoch": 0.67, "grad_norm": 0.23310095082284424, "learning_rate": 5.2271702166432725e-06, "loss": 0.1429, "step": 14555 }, { "epoch": 0.67, "grad_norm": 0.6735298537510488, "learning_rate": 5.225862760972524e-06, "loss": 0.3429, "step": 14556 }, { "epoch": 0.67, "grad_norm": 0.38710319732601317, "learning_rate": 5.22455541099658e-06, "loss": 0.2976, "step": 14557 }, { "epoch": 0.67, "grad_norm": 0.3838107680826825, "learning_rate": 5.223248166744372e-06, "loss": 0.2079, "step": 14558 }, { "epoch": 0.67, "grad_norm": 0.5357523091925057, "learning_rate": 5.221941028244851e-06, "loss": 0.3661, "step": 14559 }, { "epoch": 0.67, "grad_norm": 0.6791392293898774, "learning_rate": 5.220633995526946e-06, "loss": 0.368, "step": 14560 }, { "epoch": 0.67, "grad_norm": 0.22039308946115338, "learning_rate": 5.2193270686195975e-06, "loss": 0.1771, "step": 14561 }, { "epoch": 0.67, "grad_norm": 0.39682532633604495, "learning_rate": 5.218020247551745e-06, "loss": 0.2375, "step": 14562 }, { "epoch": 0.67, "grad_norm": 1.1537155973096151, "learning_rate": 5.216713532352311e-06, "loss": 0.7072, "step": 14563 }, { "epoch": 0.67, "grad_norm": 0.9202677061614553, "learning_rate": 5.215406923050228e-06, "loss": 0.4504, "step": 14564 }, { "epoch": 0.67, "grad_norm": 0.24941903722594946, "learning_rate": 5.214100419674426e-06, "loss": 0.2285, "step": 14565 }, { "epoch": 0.67, "grad_norm": 0.6655004754039319, "learning_rate": 5.212794022253831e-06, "loss": 0.4032, "step": 14566 }, { "epoch": 0.67, "grad_norm": 0.2873851835931111, "learning_rate": 5.2114877308173615e-06, "loss": 0.1751, "step": 14567 }, { "epoch": 0.67, "grad_norm": 0.3850851030816268, "learning_rate": 5.210181545393933e-06, "loss": 0.2249, "step": 14568 }, { "epoch": 0.67, "grad_norm": 0.3933198791638301, "learning_rate": 5.208875466012475e-06, "loss": 0.3405, "step": 14569 }, { "epoch": 0.67, "grad_norm": 0.7356801470515539, "learning_rate": 5.207569492701892e-06, "loss": 0.3468, "step": 14570 }, { "epoch": 0.67, "grad_norm": 0.42252905135937513, "learning_rate": 5.2062636254911056e-06, "loss": 0.2409, "step": 14571 }, { "epoch": 0.67, "grad_norm": 1.5601929131669112, "learning_rate": 5.204957864409019e-06, "loss": 0.8174, "step": 14572 }, { "epoch": 0.67, "grad_norm": 0.2595881678043744, "learning_rate": 5.203652209484543e-06, "loss": 0.2321, "step": 14573 }, { "epoch": 0.67, "grad_norm": 0.2572383173526846, "learning_rate": 5.202346660746589e-06, "loss": 0.161, "step": 14574 }, { "epoch": 0.67, "grad_norm": 1.257235823245716, "learning_rate": 5.201041218224052e-06, "loss": 0.76, "step": 14575 }, { "epoch": 0.67, "grad_norm": 0.5398340397464351, "learning_rate": 5.19973588194584e-06, "loss": 0.3059, "step": 14576 }, { "epoch": 0.67, "grad_norm": 0.3585655570912759, "learning_rate": 5.198430651940846e-06, "loss": 0.2745, "step": 14577 }, { "epoch": 0.67, "grad_norm": 0.4670169959098867, "learning_rate": 5.19712552823797e-06, "loss": 0.2706, "step": 14578 }, { "epoch": 0.67, "grad_norm": 0.38374589266051484, "learning_rate": 5.195820510866108e-06, "loss": 0.2145, "step": 14579 }, { "epoch": 0.67, "grad_norm": 0.3349044566274623, "learning_rate": 5.194515599854147e-06, "loss": 0.2496, "step": 14580 }, { "epoch": 0.67, "grad_norm": 0.3256332822872563, "learning_rate": 5.193210795230978e-06, "loss": 0.2689, "step": 14581 }, { "epoch": 0.67, "grad_norm": 0.7923327226771925, "learning_rate": 5.1919060970254895e-06, "loss": 0.4431, "step": 14582 }, { "epoch": 0.67, "grad_norm": 0.3665672349678015, "learning_rate": 5.19060150526657e-06, "loss": 0.2533, "step": 14583 }, { "epoch": 0.67, "grad_norm": 0.3854791441830913, "learning_rate": 5.1892970199830985e-06, "loss": 0.2453, "step": 14584 }, { "epoch": 0.67, "grad_norm": 0.44179042092228865, "learning_rate": 5.1879926412039495e-06, "loss": 0.2361, "step": 14585 }, { "epoch": 0.67, "grad_norm": 0.24957402625683964, "learning_rate": 5.186688368958006e-06, "loss": 0.1954, "step": 14586 }, { "epoch": 0.67, "grad_norm": 0.9466836846191297, "learning_rate": 5.185384203274143e-06, "loss": 0.3254, "step": 14587 }, { "epoch": 0.67, "grad_norm": 0.4665826774041471, "learning_rate": 5.184080144181237e-06, "loss": 0.3146, "step": 14588 }, { "epoch": 0.67, "grad_norm": 0.3141740239043101, "learning_rate": 5.182776191708151e-06, "loss": 0.2672, "step": 14589 }, { "epoch": 0.67, "grad_norm": 1.429500847373923, "learning_rate": 5.181472345883758e-06, "loss": 0.5527, "step": 14590 }, { "epoch": 0.67, "grad_norm": 0.41164636390803944, "learning_rate": 5.180168606736926e-06, "loss": 0.1536, "step": 14591 }, { "epoch": 0.67, "grad_norm": 0.3978812866067028, "learning_rate": 5.178864974296511e-06, "loss": 0.299, "step": 14592 }, { "epoch": 0.67, "grad_norm": 0.4245381122455259, "learning_rate": 5.177561448591384e-06, "loss": 0.312, "step": 14593 }, { "epoch": 0.67, "grad_norm": 1.0487498819096945, "learning_rate": 5.176258029650395e-06, "loss": 0.2983, "step": 14594 }, { "epoch": 0.67, "grad_norm": 0.37103484986954144, "learning_rate": 5.174954717502403e-06, "loss": 0.2416, "step": 14595 }, { "epoch": 0.67, "grad_norm": 0.3458927841638489, "learning_rate": 5.173651512176268e-06, "loss": 0.2627, "step": 14596 }, { "epoch": 0.67, "grad_norm": 0.34819688292822737, "learning_rate": 5.1723484137008314e-06, "loss": 0.196, "step": 14597 }, { "epoch": 0.67, "grad_norm": 0.3187011606459076, "learning_rate": 5.17104542210495e-06, "loss": 0.215, "step": 14598 }, { "epoch": 0.67, "grad_norm": 0.930273354091307, "learning_rate": 5.169742537417468e-06, "loss": 0.4121, "step": 14599 }, { "epoch": 0.67, "grad_norm": 0.36489398256743455, "learning_rate": 5.168439759667234e-06, "loss": 0.2511, "step": 14600 }, { "epoch": 0.67, "grad_norm": 0.3486897623720474, "learning_rate": 5.167137088883084e-06, "loss": 0.2664, "step": 14601 }, { "epoch": 0.67, "grad_norm": 0.5560003608338897, "learning_rate": 5.165834525093864e-06, "loss": 0.25, "step": 14602 }, { "epoch": 0.67, "grad_norm": 1.6744128575833823, "learning_rate": 5.164532068328405e-06, "loss": 0.694, "step": 14603 }, { "epoch": 0.67, "grad_norm": 0.31236617474366957, "learning_rate": 5.163229718615545e-06, "loss": 0.2184, "step": 14604 }, { "epoch": 0.67, "grad_norm": 0.40630878965376616, "learning_rate": 5.161927475984122e-06, "loss": 0.3099, "step": 14605 }, { "epoch": 0.67, "grad_norm": 0.8632822595452267, "learning_rate": 5.160625340462957e-06, "loss": 0.5503, "step": 14606 }, { "epoch": 0.67, "grad_norm": 0.20241025011720643, "learning_rate": 5.159323312080883e-06, "loss": 0.1404, "step": 14607 }, { "epoch": 0.67, "grad_norm": 1.6662521418027079, "learning_rate": 5.15802139086673e-06, "loss": 0.691, "step": 14608 }, { "epoch": 0.67, "grad_norm": 0.39478032245865113, "learning_rate": 5.1567195768493114e-06, "loss": 0.3136, "step": 14609 }, { "epoch": 0.67, "grad_norm": 0.3485609115888265, "learning_rate": 5.155417870057457e-06, "loss": 0.2195, "step": 14610 }, { "epoch": 0.67, "grad_norm": 0.8256574255610679, "learning_rate": 5.154116270519975e-06, "loss": 0.4641, "step": 14611 }, { "epoch": 0.67, "grad_norm": 0.4109823929433938, "learning_rate": 5.152814778265696e-06, "loss": 0.3374, "step": 14612 }, { "epoch": 0.67, "grad_norm": 0.34838861195995197, "learning_rate": 5.151513393323426e-06, "loss": 0.1916, "step": 14613 }, { "epoch": 0.67, "grad_norm": 0.3139484689553587, "learning_rate": 5.150212115721973e-06, "loss": 0.1806, "step": 14614 }, { "epoch": 0.67, "grad_norm": 0.866068318251318, "learning_rate": 5.148910945490152e-06, "loss": 0.426, "step": 14615 }, { "epoch": 0.67, "grad_norm": 0.4830096330459205, "learning_rate": 5.147609882656761e-06, "loss": 0.3176, "step": 14616 }, { "epoch": 0.67, "grad_norm": 0.30245185015805653, "learning_rate": 5.146308927250616e-06, "loss": 0.2526, "step": 14617 }, { "epoch": 0.67, "grad_norm": 1.077738213564531, "learning_rate": 5.14500807930051e-06, "loss": 0.702, "step": 14618 }, { "epoch": 0.67, "grad_norm": 0.3730299969154624, "learning_rate": 5.143707338835251e-06, "loss": 0.2475, "step": 14619 }, { "epoch": 0.67, "grad_norm": 0.23886192130143175, "learning_rate": 5.142406705883627e-06, "loss": 0.1668, "step": 14620 }, { "epoch": 0.67, "grad_norm": 1.0830211879219933, "learning_rate": 5.1411061804744365e-06, "loss": 0.5165, "step": 14621 }, { "epoch": 0.67, "grad_norm": 0.34098399581558103, "learning_rate": 5.1398057626364765e-06, "loss": 0.2609, "step": 14622 }, { "epoch": 0.67, "grad_norm": 0.6778993252520432, "learning_rate": 5.138505452398526e-06, "loss": 0.3105, "step": 14623 }, { "epoch": 0.67, "grad_norm": 0.38095606707446816, "learning_rate": 5.137205249789382e-06, "loss": 0.3027, "step": 14624 }, { "epoch": 0.67, "grad_norm": 0.3746784625345085, "learning_rate": 5.13590515483783e-06, "loss": 0.2592, "step": 14625 }, { "epoch": 0.67, "grad_norm": 0.2599544173557721, "learning_rate": 5.134605167572646e-06, "loss": 0.0827, "step": 14626 }, { "epoch": 0.67, "grad_norm": 0.8198778182332518, "learning_rate": 5.1333052880226185e-06, "loss": 0.3891, "step": 14627 }, { "epoch": 0.67, "grad_norm": 0.29980037886734695, "learning_rate": 5.132005516216512e-06, "loss": 0.2769, "step": 14628 }, { "epoch": 0.67, "grad_norm": 0.48613521157158995, "learning_rate": 5.130705852183121e-06, "loss": 0.3265, "step": 14629 }, { "epoch": 0.67, "grad_norm": 1.0616128613456728, "learning_rate": 5.1294062959512045e-06, "loss": 0.4118, "step": 14630 }, { "epoch": 0.67, "grad_norm": 0.42418816795479464, "learning_rate": 5.128106847549543e-06, "loss": 0.2622, "step": 14631 }, { "epoch": 0.67, "grad_norm": 0.2590092227785796, "learning_rate": 5.126807507006899e-06, "loss": 0.2069, "step": 14632 }, { "epoch": 0.67, "grad_norm": 0.6856431076489268, "learning_rate": 5.125508274352033e-06, "loss": 0.2843, "step": 14633 }, { "epoch": 0.67, "grad_norm": 0.39001776860541837, "learning_rate": 5.124209149613724e-06, "loss": 0.2473, "step": 14634 }, { "epoch": 0.67, "grad_norm": 0.6189590924160445, "learning_rate": 5.1229101328207195e-06, "loss": 0.3797, "step": 14635 }, { "epoch": 0.67, "grad_norm": 0.33288899893470797, "learning_rate": 5.12161122400179e-06, "loss": 0.2662, "step": 14636 }, { "epoch": 0.67, "grad_norm": 0.45222479837408003, "learning_rate": 5.120312423185681e-06, "loss": 0.2838, "step": 14637 }, { "epoch": 0.67, "grad_norm": 0.2766801941072608, "learning_rate": 5.119013730401152e-06, "loss": 0.2141, "step": 14638 }, { "epoch": 0.67, "grad_norm": 0.7278280117379301, "learning_rate": 5.11771514567696e-06, "loss": 0.3998, "step": 14639 }, { "epoch": 0.67, "grad_norm": 0.2717933211794752, "learning_rate": 5.1164166690418435e-06, "loss": 0.196, "step": 14640 }, { "epoch": 0.67, "grad_norm": 0.5348678365580526, "learning_rate": 5.115118300524555e-06, "loss": 0.327, "step": 14641 }, { "epoch": 0.67, "grad_norm": 1.401259855324682, "learning_rate": 5.113820040153844e-06, "loss": 0.7867, "step": 14642 }, { "epoch": 0.67, "grad_norm": 0.32312003204923545, "learning_rate": 5.112521887958444e-06, "loss": 0.1893, "step": 14643 }, { "epoch": 0.67, "grad_norm": 0.47114487353831286, "learning_rate": 5.111223843967101e-06, "loss": 0.3017, "step": 14644 }, { "epoch": 0.67, "grad_norm": 0.40523760330781566, "learning_rate": 5.109925908208548e-06, "loss": 0.2701, "step": 14645 }, { "epoch": 0.67, "grad_norm": 0.24304619362661206, "learning_rate": 5.108628080711523e-06, "loss": 0.1633, "step": 14646 }, { "epoch": 0.67, "grad_norm": 1.302963865550671, "learning_rate": 5.107330361504757e-06, "loss": 0.6481, "step": 14647 }, { "epoch": 0.67, "grad_norm": 0.4091695010710248, "learning_rate": 5.106032750616985e-06, "loss": 0.3255, "step": 14648 }, { "epoch": 0.67, "grad_norm": 0.3679171288484748, "learning_rate": 5.1047352480769305e-06, "loss": 0.0713, "step": 14649 }, { "epoch": 0.67, "grad_norm": 0.3942742904196032, "learning_rate": 5.1034378539133125e-06, "loss": 0.285, "step": 14650 }, { "epoch": 0.67, "grad_norm": 0.2802975110859199, "learning_rate": 5.1021405681548686e-06, "loss": 0.2355, "step": 14651 }, { "epoch": 0.67, "grad_norm": 0.4282801336462379, "learning_rate": 5.100843390830308e-06, "loss": 0.2752, "step": 14652 }, { "epoch": 0.67, "grad_norm": 0.39055988099363154, "learning_rate": 5.099546321968356e-06, "loss": 0.2455, "step": 14653 }, { "epoch": 0.67, "grad_norm": 1.2195564825679044, "learning_rate": 5.098249361597723e-06, "loss": 0.5319, "step": 14654 }, { "epoch": 0.67, "grad_norm": 0.6186439024732834, "learning_rate": 5.096952509747124e-06, "loss": 0.2866, "step": 14655 }, { "epoch": 0.67, "grad_norm": 0.304111159659403, "learning_rate": 5.095655766445274e-06, "loss": 0.245, "step": 14656 }, { "epoch": 0.67, "grad_norm": 0.9439484645263461, "learning_rate": 5.094359131720875e-06, "loss": 0.4374, "step": 14657 }, { "epoch": 0.67, "grad_norm": 0.24898142816710866, "learning_rate": 5.093062605602637e-06, "loss": 0.1515, "step": 14658 }, { "epoch": 0.67, "grad_norm": 0.38688251322200956, "learning_rate": 5.091766188119267e-06, "loss": 0.2715, "step": 14659 }, { "epoch": 0.67, "grad_norm": 0.37973917368503773, "learning_rate": 5.09046987929946e-06, "loss": 0.3032, "step": 14660 }, { "epoch": 0.67, "grad_norm": 0.4183488399717887, "learning_rate": 5.089173679171922e-06, "loss": 0.2885, "step": 14661 }, { "epoch": 0.67, "grad_norm": 0.5369962486947505, "learning_rate": 5.087877587765339e-06, "loss": 0.2704, "step": 14662 }, { "epoch": 0.67, "grad_norm": 0.34705495866355146, "learning_rate": 5.086581605108416e-06, "loss": 0.1776, "step": 14663 }, { "epoch": 0.67, "grad_norm": 0.28243389079598086, "learning_rate": 5.0852857312298376e-06, "loss": 0.2804, "step": 14664 }, { "epoch": 0.67, "grad_norm": 0.6183600069117005, "learning_rate": 5.083989966158301e-06, "loss": 0.4008, "step": 14665 }, { "epoch": 0.67, "grad_norm": 0.7773913401679422, "learning_rate": 5.082694309922484e-06, "loss": 0.3369, "step": 14666 }, { "epoch": 0.67, "grad_norm": 0.5271037887908495, "learning_rate": 5.081398762551078e-06, "loss": 0.3064, "step": 14667 }, { "epoch": 0.67, "grad_norm": 0.3040545850868143, "learning_rate": 5.080103324072764e-06, "loss": 0.2783, "step": 14668 }, { "epoch": 0.67, "grad_norm": 0.4630782412471425, "learning_rate": 5.078807994516217e-06, "loss": 0.1771, "step": 14669 }, { "epoch": 0.67, "grad_norm": 0.3901563969419384, "learning_rate": 5.077512773910122e-06, "loss": 0.2209, "step": 14670 }, { "epoch": 0.67, "grad_norm": 0.3875837574079221, "learning_rate": 5.0762176622831476e-06, "loss": 0.2877, "step": 14671 }, { "epoch": 0.67, "grad_norm": 0.40046376048459525, "learning_rate": 5.074922659663967e-06, "loss": 0.2602, "step": 14672 }, { "epoch": 0.67, "grad_norm": 0.7766745504382916, "learning_rate": 5.073627766081256e-06, "loss": 0.3233, "step": 14673 }, { "epoch": 0.67, "grad_norm": 0.39006154608673754, "learning_rate": 5.072332981563675e-06, "loss": 0.2962, "step": 14674 }, { "epoch": 0.67, "grad_norm": 1.725373116463207, "learning_rate": 5.071038306139895e-06, "loss": 0.1903, "step": 14675 }, { "epoch": 0.67, "grad_norm": 0.2581697565680685, "learning_rate": 5.069743739838569e-06, "loss": 0.2182, "step": 14676 }, { "epoch": 0.67, "grad_norm": 0.3841024665862822, "learning_rate": 5.068449282688372e-06, "loss": 0.3137, "step": 14677 }, { "epoch": 0.67, "grad_norm": 0.8409663232284716, "learning_rate": 5.067154934717956e-06, "loss": 0.4137, "step": 14678 }, { "epoch": 0.67, "grad_norm": 0.277062667314916, "learning_rate": 5.065860695955971e-06, "loss": 0.1855, "step": 14679 }, { "epoch": 0.67, "grad_norm": 0.36350036481256914, "learning_rate": 5.064566566431075e-06, "loss": 0.2941, "step": 14680 }, { "epoch": 0.67, "grad_norm": 1.3013843140238681, "learning_rate": 5.063272546171918e-06, "loss": 0.572, "step": 14681 }, { "epoch": 0.67, "grad_norm": 0.24432780189443856, "learning_rate": 5.061978635207152e-06, "loss": 0.142, "step": 14682 }, { "epoch": 0.67, "grad_norm": 0.5461639762016587, "learning_rate": 5.0606848335654165e-06, "loss": 0.3646, "step": 14683 }, { "epoch": 0.67, "grad_norm": 0.40207427996034717, "learning_rate": 5.059391141275358e-06, "loss": 0.3058, "step": 14684 }, { "epoch": 0.67, "grad_norm": 0.39844805712055875, "learning_rate": 5.058097558365622e-06, "loss": 0.1692, "step": 14685 }, { "epoch": 0.67, "grad_norm": 0.43533318072982563, "learning_rate": 5.056804084864839e-06, "loss": 0.3156, "step": 14686 }, { "epoch": 0.67, "grad_norm": 0.5459186086601856, "learning_rate": 5.055510720801653e-06, "loss": 0.3456, "step": 14687 }, { "epoch": 0.67, "grad_norm": 0.40441756516333693, "learning_rate": 5.054217466204691e-06, "loss": 0.2189, "step": 14688 }, { "epoch": 0.67, "grad_norm": 0.38662816666683575, "learning_rate": 5.052924321102586e-06, "loss": 0.2863, "step": 14689 }, { "epoch": 0.67, "grad_norm": 0.742275049007806, "learning_rate": 5.051631285523973e-06, "loss": 0.3842, "step": 14690 }, { "epoch": 0.67, "grad_norm": 0.24431473115590516, "learning_rate": 5.050338359497471e-06, "loss": 0.1733, "step": 14691 }, { "epoch": 0.67, "grad_norm": 0.28741182616618494, "learning_rate": 5.04904554305171e-06, "loss": 0.2029, "step": 14692 }, { "epoch": 0.67, "grad_norm": 1.2864517357838037, "learning_rate": 5.0477528362153e-06, "loss": 0.563, "step": 14693 }, { "epoch": 0.68, "grad_norm": 0.8088013083660608, "learning_rate": 5.046460239016879e-06, "loss": 0.4117, "step": 14694 }, { "epoch": 0.68, "grad_norm": 0.3540882142771861, "learning_rate": 5.045167751485049e-06, "loss": 0.2369, "step": 14695 }, { "epoch": 0.68, "grad_norm": 0.5992837007908217, "learning_rate": 5.043875373648435e-06, "loss": 0.3432, "step": 14696 }, { "epoch": 0.68, "grad_norm": 0.29045165097222986, "learning_rate": 5.042583105535639e-06, "loss": 0.195, "step": 14697 }, { "epoch": 0.68, "grad_norm": 0.39772476847889165, "learning_rate": 5.041290947175274e-06, "loss": 0.2002, "step": 14698 }, { "epoch": 0.68, "grad_norm": 0.547982177662353, "learning_rate": 5.039998898595952e-06, "loss": 0.331, "step": 14699 }, { "epoch": 0.68, "grad_norm": 0.45942186184485073, "learning_rate": 5.0387069598262706e-06, "loss": 0.3462, "step": 14700 }, { "epoch": 0.68, "grad_norm": 0.36921597787587057, "learning_rate": 5.037415130894836e-06, "loss": 0.2192, "step": 14701 }, { "epoch": 0.68, "grad_norm": 0.6765889567274216, "learning_rate": 5.036123411830249e-06, "loss": 0.4376, "step": 14702 }, { "epoch": 0.68, "grad_norm": 0.30447706646246053, "learning_rate": 5.0348318026611046e-06, "loss": 0.2159, "step": 14703 }, { "epoch": 0.68, "grad_norm": 0.3203348270138396, "learning_rate": 5.033540303416e-06, "loss": 0.2499, "step": 14704 }, { "epoch": 0.68, "grad_norm": 0.42669964659331694, "learning_rate": 5.032248914123523e-06, "loss": 0.1027, "step": 14705 }, { "epoch": 0.68, "grad_norm": 0.7949108252448919, "learning_rate": 5.030957634812268e-06, "loss": 0.4177, "step": 14706 }, { "epoch": 0.68, "grad_norm": 0.3434610896200157, "learning_rate": 5.029666465510825e-06, "loss": 0.2595, "step": 14707 }, { "epoch": 0.68, "grad_norm": 0.34626921900480173, "learning_rate": 5.0283754062477715e-06, "loss": 0.2686, "step": 14708 }, { "epoch": 0.68, "grad_norm": 0.4816789328547285, "learning_rate": 5.027084457051701e-06, "loss": 0.2449, "step": 14709 }, { "epoch": 0.68, "grad_norm": 0.25761168164653114, "learning_rate": 5.025793617951178e-06, "loss": 0.1993, "step": 14710 }, { "epoch": 0.68, "grad_norm": 0.5173736508885713, "learning_rate": 5.024502888974798e-06, "loss": 0.2442, "step": 14711 }, { "epoch": 0.68, "grad_norm": 0.4774113481356877, "learning_rate": 5.0232122701511245e-06, "loss": 0.3378, "step": 14712 }, { "epoch": 0.68, "grad_norm": 0.3349170542974688, "learning_rate": 5.021921761508739e-06, "loss": 0.284, "step": 14713 }, { "epoch": 0.68, "grad_norm": 0.9139303029247234, "learning_rate": 5.0206313630762035e-06, "loss": 0.5311, "step": 14714 }, { "epoch": 0.68, "grad_norm": 0.34295066019060805, "learning_rate": 5.019341074882092e-06, "loss": 0.2827, "step": 14715 }, { "epoch": 0.68, "grad_norm": 0.28534796916316485, "learning_rate": 5.01805089695497e-06, "loss": 0.2189, "step": 14716 }, { "epoch": 0.68, "grad_norm": 0.478428991876187, "learning_rate": 5.016760829323397e-06, "loss": 0.2269, "step": 14717 }, { "epoch": 0.68, "grad_norm": 0.617738520452347, "learning_rate": 5.015470872015936e-06, "loss": 0.2744, "step": 14718 }, { "epoch": 0.68, "grad_norm": 0.39701208465343224, "learning_rate": 5.01418102506115e-06, "loss": 0.245, "step": 14719 }, { "epoch": 0.68, "grad_norm": 0.3862217174800485, "learning_rate": 5.0128912884875865e-06, "loss": 0.3023, "step": 14720 }, { "epoch": 0.68, "grad_norm": 0.3231426794662638, "learning_rate": 5.011601662323807e-06, "loss": 0.1351, "step": 14721 }, { "epoch": 0.68, "grad_norm": 0.36477671575866233, "learning_rate": 5.010312146598355e-06, "loss": 0.2664, "step": 14722 }, { "epoch": 0.68, "grad_norm": 0.30314352322622823, "learning_rate": 5.009022741339784e-06, "loss": 0.253, "step": 14723 }, { "epoch": 0.68, "grad_norm": 0.8064012702092093, "learning_rate": 5.007733446576641e-06, "loss": 0.3016, "step": 14724 }, { "epoch": 0.68, "grad_norm": 0.3492462189847072, "learning_rate": 5.006444262337466e-06, "loss": 0.2465, "step": 14725 }, { "epoch": 0.68, "grad_norm": 1.2021228083277933, "learning_rate": 5.0051551886508055e-06, "loss": 0.7554, "step": 14726 }, { "epoch": 0.68, "grad_norm": 0.3729402660221099, "learning_rate": 5.003866225545186e-06, "loss": 0.2354, "step": 14727 }, { "epoch": 0.68, "grad_norm": 0.26144051968486576, "learning_rate": 5.002577373049162e-06, "loss": 0.1898, "step": 14728 }, { "epoch": 0.68, "grad_norm": 0.6084253675181527, "learning_rate": 5.001288631191255e-06, "loss": 0.2796, "step": 14729 }, { "epoch": 0.68, "grad_norm": 0.8747689929201318, "learning_rate": 5.000000000000003e-06, "loss": 0.4638, "step": 14730 }, { "epoch": 0.68, "grad_norm": 0.29757202042047215, "learning_rate": 4.998711479503927e-06, "loss": 0.2163, "step": 14731 }, { "epoch": 0.68, "grad_norm": 0.5158097020511514, "learning_rate": 4.99742306973156e-06, "loss": 0.3865, "step": 14732 }, { "epoch": 0.68, "grad_norm": 0.9905145544434747, "learning_rate": 4.996134770711428e-06, "loss": 0.4783, "step": 14733 }, { "epoch": 0.68, "grad_norm": 0.2479293816000717, "learning_rate": 4.994846582472046e-06, "loss": 0.1564, "step": 14734 }, { "epoch": 0.68, "grad_norm": 0.29246346909524173, "learning_rate": 4.993558505041935e-06, "loss": 0.2555, "step": 14735 }, { "epoch": 0.68, "grad_norm": 0.8763628083509255, "learning_rate": 4.99227053844962e-06, "loss": 0.4179, "step": 14736 }, { "epoch": 0.68, "grad_norm": 0.324604122021244, "learning_rate": 4.990982682723603e-06, "loss": 0.1932, "step": 14737 }, { "epoch": 0.68, "grad_norm": 1.1132564856622544, "learning_rate": 4.989694937892406e-06, "loss": 0.6439, "step": 14738 }, { "epoch": 0.68, "grad_norm": 0.34537545838440503, "learning_rate": 4.98840730398453e-06, "loss": 0.2725, "step": 14739 }, { "epoch": 0.68, "grad_norm": 0.3246256539644725, "learning_rate": 4.987119781028486e-06, "loss": 0.1796, "step": 14740 }, { "epoch": 0.68, "grad_norm": 0.7880081552717293, "learning_rate": 4.985832369052778e-06, "loss": 0.3835, "step": 14741 }, { "epoch": 0.68, "grad_norm": 0.3231283272602558, "learning_rate": 4.9845450680859144e-06, "loss": 0.1962, "step": 14742 }, { "epoch": 0.68, "grad_norm": 0.3648686017270063, "learning_rate": 4.983257878156388e-06, "loss": 0.2632, "step": 14743 }, { "epoch": 0.68, "grad_norm": 0.35358932851013675, "learning_rate": 4.981970799292689e-06, "loss": 0.2534, "step": 14744 }, { "epoch": 0.68, "grad_norm": 1.609169148585498, "learning_rate": 4.980683831523328e-06, "loss": 0.8187, "step": 14745 }, { "epoch": 0.68, "grad_norm": 0.3600257578682066, "learning_rate": 4.979396974876785e-06, "loss": 0.2539, "step": 14746 }, { "epoch": 0.68, "grad_norm": 0.3667970522553824, "learning_rate": 4.978110229381558e-06, "loss": 0.2607, "step": 14747 }, { "epoch": 0.68, "grad_norm": 0.3007026059768069, "learning_rate": 4.976823595066128e-06, "loss": 0.16, "step": 14748 }, { "epoch": 0.68, "grad_norm": 0.371996171774975, "learning_rate": 4.9755370719589814e-06, "loss": 0.2752, "step": 14749 }, { "epoch": 0.68, "grad_norm": 0.9378208831618536, "learning_rate": 4.974250660088604e-06, "loss": 0.5397, "step": 14750 }, { "epoch": 0.68, "grad_norm": 0.3913092527622445, "learning_rate": 4.972964359483471e-06, "loss": 0.3095, "step": 14751 }, { "epoch": 0.68, "grad_norm": 0.33213532740982116, "learning_rate": 4.971678170172064e-06, "loss": 0.2577, "step": 14752 }, { "epoch": 0.68, "grad_norm": 0.7461866376866146, "learning_rate": 4.970392092182853e-06, "loss": 0.2965, "step": 14753 }, { "epoch": 0.68, "grad_norm": 0.2782441612667592, "learning_rate": 4.969106125544314e-06, "loss": 0.1891, "step": 14754 }, { "epoch": 0.68, "grad_norm": 0.3940320020759056, "learning_rate": 4.96782027028492e-06, "loss": 0.2659, "step": 14755 }, { "epoch": 0.68, "grad_norm": 0.4319469675563594, "learning_rate": 4.966534526433131e-06, "loss": 0.327, "step": 14756 }, { "epoch": 0.68, "grad_norm": 0.8228022061655021, "learning_rate": 4.965248894017417e-06, "loss": 0.3224, "step": 14757 }, { "epoch": 0.68, "grad_norm": 0.42108401369986137, "learning_rate": 4.963963373066238e-06, "loss": 0.2634, "step": 14758 }, { "epoch": 0.68, "grad_norm": 0.31354728513524527, "learning_rate": 4.962677963608063e-06, "loss": 0.2975, "step": 14759 }, { "epoch": 0.68, "grad_norm": 0.19205525749604488, "learning_rate": 4.961392665671336e-06, "loss": 0.0718, "step": 14760 }, { "epoch": 0.68, "grad_norm": 0.42309891347674833, "learning_rate": 4.960107479284522e-06, "loss": 0.2625, "step": 14761 }, { "epoch": 0.68, "grad_norm": 0.5705137145477874, "learning_rate": 4.9588224044760726e-06, "loss": 0.3966, "step": 14762 }, { "epoch": 0.68, "grad_norm": 0.3767112869049141, "learning_rate": 4.957537441274433e-06, "loss": 0.2495, "step": 14763 }, { "epoch": 0.68, "grad_norm": 0.4245357138414307, "learning_rate": 4.956252589708058e-06, "loss": 0.2831, "step": 14764 }, { "epoch": 0.68, "grad_norm": 0.5629506674671926, "learning_rate": 4.954967849805387e-06, "loss": 0.3344, "step": 14765 }, { "epoch": 0.68, "grad_norm": 0.3297783692087681, "learning_rate": 4.953683221594864e-06, "loss": 0.1929, "step": 14766 }, { "epoch": 0.68, "grad_norm": 0.27406628703147673, "learning_rate": 4.952398705104935e-06, "loss": 0.2161, "step": 14767 }, { "epoch": 0.68, "grad_norm": 1.3134434944668747, "learning_rate": 4.951114300364031e-06, "loss": 0.7407, "step": 14768 }, { "epoch": 0.68, "grad_norm": 0.7235504784309669, "learning_rate": 4.949830007400592e-06, "loss": 0.3866, "step": 14769 }, { "epoch": 0.68, "grad_norm": 0.3545217274076139, "learning_rate": 4.948545826243043e-06, "loss": 0.2093, "step": 14770 }, { "epoch": 0.68, "grad_norm": 0.4051280774489883, "learning_rate": 4.947261756919828e-06, "loss": 0.3157, "step": 14771 }, { "epoch": 0.68, "grad_norm": 0.486078796046991, "learning_rate": 4.945977799459367e-06, "loss": 0.295, "step": 14772 }, { "epoch": 0.68, "grad_norm": 0.36007567221146375, "learning_rate": 4.944693953890084e-06, "loss": 0.1487, "step": 14773 }, { "epoch": 0.68, "grad_norm": 0.4264932158539147, "learning_rate": 4.943410220240403e-06, "loss": 0.3078, "step": 14774 }, { "epoch": 0.68, "grad_norm": 0.37988605208599335, "learning_rate": 4.9421265985387475e-06, "loss": 0.3093, "step": 14775 }, { "epoch": 0.68, "grad_norm": 0.282216130598595, "learning_rate": 4.940843088813537e-06, "loss": 0.0892, "step": 14776 }, { "epoch": 0.68, "grad_norm": 0.44070308990398893, "learning_rate": 4.939559691093182e-06, "loss": 0.3004, "step": 14777 }, { "epoch": 0.68, "grad_norm": 0.3346604513323777, "learning_rate": 4.938276405406097e-06, "loss": 0.2377, "step": 14778 }, { "epoch": 0.68, "grad_norm": 0.38687967863886735, "learning_rate": 4.936993231780698e-06, "loss": 0.2854, "step": 14779 }, { "epoch": 0.68, "grad_norm": 0.44802560733967606, "learning_rate": 4.935710170245385e-06, "loss": 0.2614, "step": 14780 }, { "epoch": 0.68, "grad_norm": 0.811714574731708, "learning_rate": 4.934427220828572e-06, "loss": 0.4254, "step": 14781 }, { "epoch": 0.68, "grad_norm": 0.3057507535382125, "learning_rate": 4.933144383558654e-06, "loss": 0.1704, "step": 14782 }, { "epoch": 0.68, "grad_norm": 0.2972984397850378, "learning_rate": 4.931861658464036e-06, "loss": 0.2425, "step": 14783 }, { "epoch": 0.68, "grad_norm": 1.2925551863916782, "learning_rate": 4.930579045573119e-06, "loss": 0.4941, "step": 14784 }, { "epoch": 0.68, "grad_norm": 0.6386053945303372, "learning_rate": 4.929296544914294e-06, "loss": 0.4159, "step": 14785 }, { "epoch": 0.68, "grad_norm": 0.3877866745891255, "learning_rate": 4.928014156515959e-06, "loss": 0.2496, "step": 14786 }, { "epoch": 0.68, "grad_norm": 0.3791658564329558, "learning_rate": 4.926731880406495e-06, "loss": 0.3245, "step": 14787 }, { "epoch": 0.68, "grad_norm": 0.25743754271604913, "learning_rate": 4.9254497166143045e-06, "loss": 0.1917, "step": 14788 }, { "epoch": 0.68, "grad_norm": 0.5827414843842658, "learning_rate": 4.924167665167763e-06, "loss": 0.2414, "step": 14789 }, { "epoch": 0.68, "grad_norm": 0.3752255777364377, "learning_rate": 4.92288572609526e-06, "loss": 0.2849, "step": 14790 }, { "epoch": 0.68, "grad_norm": 0.9150434762498326, "learning_rate": 4.921603899425171e-06, "loss": 0.5469, "step": 14791 }, { "epoch": 0.68, "grad_norm": 0.35054748539220487, "learning_rate": 4.920322185185876e-06, "loss": 0.2864, "step": 14792 }, { "epoch": 0.68, "grad_norm": 0.27560621302988253, "learning_rate": 4.919040583405758e-06, "loss": 0.1714, "step": 14793 }, { "epoch": 0.68, "grad_norm": 0.4915111225551854, "learning_rate": 4.917759094113178e-06, "loss": 0.3247, "step": 14794 }, { "epoch": 0.68, "grad_norm": 0.313851803134407, "learning_rate": 4.916477717336514e-06, "loss": 0.2716, "step": 14795 }, { "epoch": 0.68, "grad_norm": 1.3000684555391218, "learning_rate": 4.915196453104138e-06, "loss": 0.2903, "step": 14796 }, { "epoch": 0.68, "grad_norm": 0.5951372160835796, "learning_rate": 4.9139153014444085e-06, "loss": 0.3209, "step": 14797 }, { "epoch": 0.68, "grad_norm": 0.38133120865768527, "learning_rate": 4.912634262385695e-06, "loss": 0.3019, "step": 14798 }, { "epoch": 0.68, "grad_norm": 0.3366729958144585, "learning_rate": 4.911353335956353e-06, "loss": 0.2512, "step": 14799 }, { "epoch": 0.68, "grad_norm": 0.2388175246421776, "learning_rate": 4.910072522184742e-06, "loss": 0.1333, "step": 14800 }, { "epoch": 0.68, "grad_norm": 0.4395797975860606, "learning_rate": 4.908791821099225e-06, "loss": 0.2837, "step": 14801 }, { "epoch": 0.68, "grad_norm": 0.484312930410866, "learning_rate": 4.907511232728145e-06, "loss": 0.2435, "step": 14802 }, { "epoch": 0.68, "grad_norm": 0.4307057355292276, "learning_rate": 4.906230757099862e-06, "loss": 0.2746, "step": 14803 }, { "epoch": 0.68, "grad_norm": 0.44561506344904483, "learning_rate": 4.904950394242715e-06, "loss": 0.3237, "step": 14804 }, { "epoch": 0.68, "grad_norm": 0.9181918157771111, "learning_rate": 4.903670144185061e-06, "loss": 0.5401, "step": 14805 }, { "epoch": 0.68, "grad_norm": 0.2504408705271983, "learning_rate": 4.902390006955236e-06, "loss": 0.1883, "step": 14806 }, { "epoch": 0.68, "grad_norm": 0.3065093310082717, "learning_rate": 4.901109982581586e-06, "loss": 0.2268, "step": 14807 }, { "epoch": 0.68, "grad_norm": 0.952536937288202, "learning_rate": 4.899830071092442e-06, "loss": 0.4012, "step": 14808 }, { "epoch": 0.68, "grad_norm": 0.6006567098501562, "learning_rate": 4.898550272516145e-06, "loss": 0.2689, "step": 14809 }, { "epoch": 0.68, "grad_norm": 0.3619993931342857, "learning_rate": 4.897270586881032e-06, "loss": 0.2794, "step": 14810 }, { "epoch": 0.68, "grad_norm": 0.37986003463477924, "learning_rate": 4.895991014215427e-06, "loss": 0.314, "step": 14811 }, { "epoch": 0.68, "grad_norm": 0.20093415540134557, "learning_rate": 4.894711554547665e-06, "loss": 0.0832, "step": 14812 }, { "epoch": 0.68, "grad_norm": 0.38006758433398263, "learning_rate": 4.8934322079060644e-06, "loss": 0.2585, "step": 14813 }, { "epoch": 0.68, "grad_norm": 0.37298740089267546, "learning_rate": 4.892152974318955e-06, "loss": 0.3039, "step": 14814 }, { "epoch": 0.68, "grad_norm": 0.6765187389293709, "learning_rate": 4.890873853814657e-06, "loss": 0.2586, "step": 14815 }, { "epoch": 0.68, "grad_norm": 0.3861097933049821, "learning_rate": 4.889594846421485e-06, "loss": 0.2847, "step": 14816 }, { "epoch": 0.68, "grad_norm": 1.3699898547801885, "learning_rate": 4.888315952167757e-06, "loss": 0.682, "step": 14817 }, { "epoch": 0.68, "grad_norm": 0.30197485055558476, "learning_rate": 4.887037171081792e-06, "loss": 0.2478, "step": 14818 }, { "epoch": 0.68, "grad_norm": 0.2999222988890184, "learning_rate": 4.885758503191892e-06, "loss": 0.1713, "step": 14819 }, { "epoch": 0.68, "grad_norm": 0.8210052347031515, "learning_rate": 4.884479948526373e-06, "loss": 0.4205, "step": 14820 }, { "epoch": 0.68, "grad_norm": 0.6380770395725553, "learning_rate": 4.88320150711353e-06, "loss": 0.3764, "step": 14821 }, { "epoch": 0.68, "grad_norm": 0.3453280824752142, "learning_rate": 4.881923178981681e-06, "loss": 0.2129, "step": 14822 }, { "epoch": 0.68, "grad_norm": 0.36569430452340534, "learning_rate": 4.880644964159117e-06, "loss": 0.2874, "step": 14823 }, { "epoch": 0.68, "grad_norm": 0.2949922072245876, "learning_rate": 4.879366862674143e-06, "loss": 0.1635, "step": 14824 }, { "epoch": 0.68, "grad_norm": 0.35838146793826736, "learning_rate": 4.878088874555047e-06, "loss": 0.1919, "step": 14825 }, { "epoch": 0.68, "grad_norm": 0.37686059616951706, "learning_rate": 4.876810999830127e-06, "loss": 0.3216, "step": 14826 }, { "epoch": 0.68, "grad_norm": 0.8733749706309435, "learning_rate": 4.875533238527678e-06, "loss": 0.4339, "step": 14827 }, { "epoch": 0.68, "grad_norm": 0.3195899134755221, "learning_rate": 4.874255590675981e-06, "loss": 0.194, "step": 14828 }, { "epoch": 0.68, "grad_norm": 1.2292364445862323, "learning_rate": 4.8729780563033265e-06, "loss": 0.6327, "step": 14829 }, { "epoch": 0.68, "grad_norm": 0.3806679666893542, "learning_rate": 4.871700635437993e-06, "loss": 0.3219, "step": 14830 }, { "epoch": 0.68, "grad_norm": 0.24660927941562005, "learning_rate": 4.870423328108266e-06, "loss": 0.1969, "step": 14831 }, { "epoch": 0.68, "grad_norm": 0.6236478965550891, "learning_rate": 4.869146134342426e-06, "loss": 0.2195, "step": 14832 }, { "epoch": 0.68, "grad_norm": 1.4368335514725643, "learning_rate": 4.867869054168741e-06, "loss": 0.7737, "step": 14833 }, { "epoch": 0.68, "grad_norm": 0.2739820229668408, "learning_rate": 4.86659208761549e-06, "loss": 0.2423, "step": 14834 }, { "epoch": 0.68, "grad_norm": 0.47790310974207123, "learning_rate": 4.865315234710941e-06, "loss": 0.252, "step": 14835 }, { "epoch": 0.68, "grad_norm": 0.8719823736096346, "learning_rate": 4.864038495483369e-06, "loss": 0.447, "step": 14836 }, { "epoch": 0.68, "grad_norm": 0.3640277083624418, "learning_rate": 4.862761869961033e-06, "loss": 0.2899, "step": 14837 }, { "epoch": 0.68, "grad_norm": 0.34044573855380095, "learning_rate": 4.861485358172192e-06, "loss": 0.2535, "step": 14838 }, { "epoch": 0.68, "grad_norm": 0.3195814331668908, "learning_rate": 4.8602089601451196e-06, "loss": 0.1837, "step": 14839 }, { "epoch": 0.68, "grad_norm": 0.38469528548558063, "learning_rate": 4.858932675908063e-06, "loss": 0.2612, "step": 14840 }, { "epoch": 0.68, "grad_norm": 1.2369231663264248, "learning_rate": 4.857656505489285e-06, "loss": 0.3772, "step": 14841 }, { "epoch": 0.68, "grad_norm": 0.34146249157645714, "learning_rate": 4.856380448917033e-06, "loss": 0.2846, "step": 14842 }, { "epoch": 0.68, "grad_norm": 0.38473913295869905, "learning_rate": 4.85510450621956e-06, "loss": 0.2749, "step": 14843 }, { "epoch": 0.68, "grad_norm": 0.44097985476407164, "learning_rate": 4.853828677425119e-06, "loss": 0.2754, "step": 14844 }, { "epoch": 0.68, "grad_norm": 0.2980316586063471, "learning_rate": 4.852552962561946e-06, "loss": 0.1933, "step": 14845 }, { "epoch": 0.68, "grad_norm": 0.360281600356552, "learning_rate": 4.8512773616582945e-06, "loss": 0.2931, "step": 14846 }, { "epoch": 0.68, "grad_norm": 0.5288943911274756, "learning_rate": 4.850001874742395e-06, "loss": 0.3938, "step": 14847 }, { "epoch": 0.68, "grad_norm": 0.583180873984833, "learning_rate": 4.8487265018424905e-06, "loss": 0.2966, "step": 14848 }, { "epoch": 0.68, "grad_norm": 0.33249312490259125, "learning_rate": 4.84745124298682e-06, "loss": 0.2731, "step": 14849 }, { "epoch": 0.68, "grad_norm": 0.3573421545794827, "learning_rate": 4.8461760982036096e-06, "loss": 0.2999, "step": 14850 }, { "epoch": 0.68, "grad_norm": 0.17505808618995522, "learning_rate": 4.844901067521093e-06, "loss": 0.0952, "step": 14851 }, { "epoch": 0.68, "grad_norm": 0.3503518280295589, "learning_rate": 4.843626150967498e-06, "loss": 0.2542, "step": 14852 }, { "epoch": 0.68, "grad_norm": 1.2647861585366738, "learning_rate": 4.842351348571054e-06, "loss": 0.6127, "step": 14853 }, { "epoch": 0.68, "grad_norm": 0.38223365733108594, "learning_rate": 4.841076660359977e-06, "loss": 0.2544, "step": 14854 }, { "epoch": 0.68, "grad_norm": 0.3600420098474363, "learning_rate": 4.839802086362489e-06, "loss": 0.2505, "step": 14855 }, { "epoch": 0.68, "grad_norm": 1.0851056698835129, "learning_rate": 4.838527626606815e-06, "loss": 0.5067, "step": 14856 }, { "epoch": 0.68, "grad_norm": 0.2866995802392194, "learning_rate": 4.837253281121159e-06, "loss": 0.2056, "step": 14857 }, { "epoch": 0.68, "grad_norm": 0.26806860331099647, "learning_rate": 4.835979049933744e-06, "loss": 0.202, "step": 14858 }, { "epoch": 0.68, "grad_norm": 0.478370830738979, "learning_rate": 4.8347049330727725e-06, "loss": 0.3033, "step": 14859 }, { "epoch": 0.68, "grad_norm": 0.6616205420281361, "learning_rate": 4.833430930566455e-06, "loss": 0.3799, "step": 14860 }, { "epoch": 0.68, "grad_norm": 0.442247347880616, "learning_rate": 4.832157042443001e-06, "loss": 0.178, "step": 14861 }, { "epoch": 0.68, "grad_norm": 0.3421735448917148, "learning_rate": 4.830883268730605e-06, "loss": 0.2952, "step": 14862 }, { "epoch": 0.68, "grad_norm": 0.4242861954956544, "learning_rate": 4.8296096094574765e-06, "loss": 0.2635, "step": 14863 }, { "epoch": 0.68, "grad_norm": 0.3167999253470079, "learning_rate": 4.828336064651798e-06, "loss": 0.1956, "step": 14864 }, { "epoch": 0.68, "grad_norm": 0.5494529342169818, "learning_rate": 4.827062634341785e-06, "loss": 0.2814, "step": 14865 }, { "epoch": 0.68, "grad_norm": 0.41032306548640013, "learning_rate": 4.825789318555617e-06, "loss": 0.3286, "step": 14866 }, { "epoch": 0.68, "grad_norm": 0.3130695907495629, "learning_rate": 4.824516117321484e-06, "loss": 0.1845, "step": 14867 }, { "epoch": 0.68, "grad_norm": 1.2228732425021007, "learning_rate": 4.823243030667576e-06, "loss": 0.5381, "step": 14868 }, { "epoch": 0.68, "grad_norm": 1.249935960744163, "learning_rate": 4.821970058622077e-06, "loss": 0.7687, "step": 14869 }, { "epoch": 0.68, "grad_norm": 0.28943789304284595, "learning_rate": 4.820697201213175e-06, "loss": 0.2461, "step": 14870 }, { "epoch": 0.68, "grad_norm": 0.36903575578846937, "learning_rate": 4.819424458469041e-06, "loss": 0.1866, "step": 14871 }, { "epoch": 0.68, "grad_norm": 0.48552047370513435, "learning_rate": 4.81815183041786e-06, "loss": 0.2829, "step": 14872 }, { "epoch": 0.68, "grad_norm": 0.36356304839850956, "learning_rate": 4.816879317087799e-06, "loss": 0.2505, "step": 14873 }, { "epoch": 0.68, "grad_norm": 0.36315183619714647, "learning_rate": 4.815606918507036e-06, "loss": 0.2321, "step": 14874 }, { "epoch": 0.68, "grad_norm": 1.1036323548085407, "learning_rate": 4.814334634703741e-06, "loss": 0.6113, "step": 14875 }, { "epoch": 0.68, "grad_norm": 0.42716503914857173, "learning_rate": 4.813062465706077e-06, "loss": 0.2941, "step": 14876 }, { "epoch": 0.68, "grad_norm": 0.7012278421890177, "learning_rate": 4.811790411542209e-06, "loss": 0.2652, "step": 14877 }, { "epoch": 0.68, "grad_norm": 0.27671139947561396, "learning_rate": 4.810518472240305e-06, "loss": 0.213, "step": 14878 }, { "epoch": 0.68, "grad_norm": 0.5913561470655007, "learning_rate": 4.809246647828517e-06, "loss": 0.2587, "step": 14879 }, { "epoch": 0.68, "grad_norm": 0.3984335191402875, "learning_rate": 4.807974938335009e-06, "loss": 0.2774, "step": 14880 }, { "epoch": 0.68, "grad_norm": 0.34053603385071507, "learning_rate": 4.806703343787924e-06, "loss": 0.2616, "step": 14881 }, { "epoch": 0.68, "grad_norm": 0.4098913168836748, "learning_rate": 4.8054318642154294e-06, "loss": 0.2442, "step": 14882 }, { "epoch": 0.68, "grad_norm": 0.543330196384016, "learning_rate": 4.804160499645667e-06, "loss": 0.3039, "step": 14883 }, { "epoch": 0.68, "grad_norm": 0.24514524321959258, "learning_rate": 4.8028892501067795e-06, "loss": 0.099, "step": 14884 }, { "epoch": 0.68, "grad_norm": 0.4497937285775187, "learning_rate": 4.801618115626915e-06, "loss": 0.2633, "step": 14885 }, { "epoch": 0.68, "grad_norm": 0.3519854854192787, "learning_rate": 4.800347096234215e-06, "loss": 0.2749, "step": 14886 }, { "epoch": 0.68, "grad_norm": 1.066305748079328, "learning_rate": 4.799076191956822e-06, "loss": 0.4689, "step": 14887 }, { "epoch": 0.68, "grad_norm": 0.4616407808136488, "learning_rate": 4.797805402822866e-06, "loss": 0.2836, "step": 14888 }, { "epoch": 0.68, "grad_norm": 0.5130236686582241, "learning_rate": 4.796534728860489e-06, "loss": 0.3527, "step": 14889 }, { "epoch": 0.68, "grad_norm": 0.2216244169018214, "learning_rate": 4.795264170097813e-06, "loss": 0.1634, "step": 14890 }, { "epoch": 0.68, "grad_norm": 0.44659515724180926, "learning_rate": 4.7939937265629725e-06, "loss": 0.2635, "step": 14891 }, { "epoch": 0.68, "grad_norm": 0.6164796242337653, "learning_rate": 4.792723398284097e-06, "loss": 0.3666, "step": 14892 }, { "epoch": 0.68, "grad_norm": 0.4740551068924641, "learning_rate": 4.791453185289302e-06, "loss": 0.3352, "step": 14893 }, { "epoch": 0.68, "grad_norm": 0.34189852179608027, "learning_rate": 4.790183087606715e-06, "loss": 0.221, "step": 14894 }, { "epoch": 0.68, "grad_norm": 0.5038264781010451, "learning_rate": 4.788913105264455e-06, "loss": 0.3478, "step": 14895 }, { "epoch": 0.68, "grad_norm": 0.3425412868835726, "learning_rate": 4.787643238290635e-06, "loss": 0.197, "step": 14896 }, { "epoch": 0.68, "grad_norm": 0.30041639802726205, "learning_rate": 4.786373486713371e-06, "loss": 0.0696, "step": 14897 }, { "epoch": 0.68, "grad_norm": 0.28530990357442765, "learning_rate": 4.7851038505607675e-06, "loss": 0.2877, "step": 14898 }, { "epoch": 0.68, "grad_norm": 0.6508530150422616, "learning_rate": 4.783834329860946e-06, "loss": 0.3997, "step": 14899 }, { "epoch": 0.68, "grad_norm": 0.4840382972907225, "learning_rate": 4.782564924642e-06, "loss": 0.1946, "step": 14900 }, { "epoch": 0.68, "grad_norm": 0.3329086261550118, "learning_rate": 4.781295634932042e-06, "loss": 0.2969, "step": 14901 }, { "epoch": 0.68, "grad_norm": 0.5356908643287572, "learning_rate": 4.780026460759171e-06, "loss": 0.3358, "step": 14902 }, { "epoch": 0.68, "grad_norm": 0.2654297276694022, "learning_rate": 4.7787574021514725e-06, "loss": 0.1259, "step": 14903 }, { "epoch": 0.68, "grad_norm": 0.6186378450816218, "learning_rate": 4.777488459137062e-06, "loss": 0.3699, "step": 14904 }, { "epoch": 0.68, "grad_norm": 0.465319163108568, "learning_rate": 4.77621963174402e-06, "loss": 0.3016, "step": 14905 }, { "epoch": 0.68, "grad_norm": 0.3815498312607489, "learning_rate": 4.774950920000444e-06, "loss": 0.2664, "step": 14906 }, { "epoch": 0.68, "grad_norm": 0.40187978997198653, "learning_rate": 4.773682323934415e-06, "loss": 0.2531, "step": 14907 }, { "epoch": 0.68, "grad_norm": 0.5957791936281502, "learning_rate": 4.7724138435740204e-06, "loss": 0.2858, "step": 14908 }, { "epoch": 0.68, "grad_norm": 0.3001487981441562, "learning_rate": 4.771145478947351e-06, "loss": 0.2146, "step": 14909 }, { "epoch": 0.68, "grad_norm": 0.2934836827513517, "learning_rate": 4.769877230082476e-06, "loss": 0.2151, "step": 14910 }, { "epoch": 0.68, "grad_norm": 0.7318927919111695, "learning_rate": 4.768609097007478e-06, "loss": 0.3761, "step": 14911 }, { "epoch": 0.69, "grad_norm": 0.6152835401895149, "learning_rate": 4.767341079750437e-06, "loss": 0.3981, "step": 14912 }, { "epoch": 0.69, "grad_norm": 0.43050859800303304, "learning_rate": 4.766073178339418e-06, "loss": 0.2325, "step": 14913 }, { "epoch": 0.69, "grad_norm": 0.38559529778896723, "learning_rate": 4.764805392802497e-06, "loss": 0.2953, "step": 14914 }, { "epoch": 0.69, "grad_norm": 0.2681506144282517, "learning_rate": 4.763537723167733e-06, "loss": 0.168, "step": 14915 }, { "epoch": 0.69, "grad_norm": 0.44967457284459617, "learning_rate": 4.762270169463202e-06, "loss": 0.2329, "step": 14916 }, { "epoch": 0.69, "grad_norm": 0.526003252277324, "learning_rate": 4.76100273171696e-06, "loss": 0.2964, "step": 14917 }, { "epoch": 0.69, "grad_norm": 0.8599136580628743, "learning_rate": 4.759735409957069e-06, "loss": 0.4567, "step": 14918 }, { "epoch": 0.69, "grad_norm": 0.3747516610530139, "learning_rate": 4.7584682042115834e-06, "loss": 0.2739, "step": 14919 }, { "epoch": 0.69, "grad_norm": 0.8507403581316986, "learning_rate": 4.75720111450856e-06, "loss": 0.2521, "step": 14920 }, { "epoch": 0.69, "grad_norm": 0.2157055735795716, "learning_rate": 4.755934140876054e-06, "loss": 0.1981, "step": 14921 }, { "epoch": 0.69, "grad_norm": 0.37414458891707814, "learning_rate": 4.7546672833421085e-06, "loss": 0.2599, "step": 14922 }, { "epoch": 0.69, "grad_norm": 0.7728127803175406, "learning_rate": 4.753400541934777e-06, "loss": 0.2996, "step": 14923 }, { "epoch": 0.69, "grad_norm": 0.6429291354655791, "learning_rate": 4.752133916682098e-06, "loss": 0.3732, "step": 14924 }, { "epoch": 0.69, "grad_norm": 0.35540801022069973, "learning_rate": 4.750867407612116e-06, "loss": 0.3039, "step": 14925 }, { "epoch": 0.69, "grad_norm": 0.470031815820617, "learning_rate": 4.749601014752872e-06, "loss": 0.2199, "step": 14926 }, { "epoch": 0.69, "grad_norm": 0.3902044476832956, "learning_rate": 4.748334738132399e-06, "loss": 0.2116, "step": 14927 }, { "epoch": 0.69, "grad_norm": 0.4395030620168188, "learning_rate": 4.747068577778734e-06, "loss": 0.3006, "step": 14928 }, { "epoch": 0.69, "grad_norm": 0.31866687351232026, "learning_rate": 4.745802533719908e-06, "loss": 0.2199, "step": 14929 }, { "epoch": 0.69, "grad_norm": 0.6565316245308129, "learning_rate": 4.744536605983952e-06, "loss": 0.3488, "step": 14930 }, { "epoch": 0.69, "grad_norm": 0.42421880774806314, "learning_rate": 4.743270794598891e-06, "loss": 0.3168, "step": 14931 }, { "epoch": 0.69, "grad_norm": 1.167132244905609, "learning_rate": 4.742005099592745e-06, "loss": 0.4966, "step": 14932 }, { "epoch": 0.69, "grad_norm": 0.34823517142034527, "learning_rate": 4.740739520993538e-06, "loss": 0.2648, "step": 14933 }, { "epoch": 0.69, "grad_norm": 0.34879116638724783, "learning_rate": 4.739474058829288e-06, "loss": 0.2876, "step": 14934 }, { "epoch": 0.69, "grad_norm": 0.3298183326037904, "learning_rate": 4.7382087131280176e-06, "loss": 0.1926, "step": 14935 }, { "epoch": 0.69, "grad_norm": 0.46072625884738644, "learning_rate": 4.7369434839177295e-06, "loss": 0.2487, "step": 14936 }, { "epoch": 0.69, "grad_norm": 0.30763665806783286, "learning_rate": 4.7356783712264405e-06, "loss": 0.285, "step": 14937 }, { "epoch": 0.69, "grad_norm": 0.5602712407012543, "learning_rate": 4.734413375082163e-06, "loss": 0.338, "step": 14938 }, { "epoch": 0.69, "grad_norm": 0.5639676348188166, "learning_rate": 4.7331484955128944e-06, "loss": 0.2567, "step": 14939 }, { "epoch": 0.69, "grad_norm": 0.36510447638858545, "learning_rate": 4.731883732546646e-06, "loss": 0.2795, "step": 14940 }, { "epoch": 0.69, "grad_norm": 0.27023229684251304, "learning_rate": 4.73061908621141e-06, "loss": 0.1978, "step": 14941 }, { "epoch": 0.69, "grad_norm": 0.42939707732208476, "learning_rate": 4.729354556535188e-06, "loss": 0.2673, "step": 14942 }, { "epoch": 0.69, "grad_norm": 0.3729610804199242, "learning_rate": 4.728090143545981e-06, "loss": 0.2648, "step": 14943 }, { "epoch": 0.69, "grad_norm": 0.8126709685486809, "learning_rate": 4.7268258472717735e-06, "loss": 0.4806, "step": 14944 }, { "epoch": 0.69, "grad_norm": 0.3366754608428277, "learning_rate": 4.725561667740559e-06, "loss": 0.2872, "step": 14945 }, { "epoch": 0.69, "grad_norm": 0.3582941860702632, "learning_rate": 4.7242976049803255e-06, "loss": 0.2198, "step": 14946 }, { "epoch": 0.69, "grad_norm": 0.26985403521956675, "learning_rate": 4.723033659019061e-06, "loss": 0.1546, "step": 14947 }, { "epoch": 0.69, "grad_norm": 0.8429704431916867, "learning_rate": 4.721769829884747e-06, "loss": 0.5846, "step": 14948 }, { "epoch": 0.69, "grad_norm": 0.29705912970266835, "learning_rate": 4.7205061176053575e-06, "loss": 0.2198, "step": 14949 }, { "epoch": 0.69, "grad_norm": 0.5354843818470001, "learning_rate": 4.7192425222088745e-06, "loss": 0.3008, "step": 14950 }, { "epoch": 0.69, "grad_norm": 0.7131002694524821, "learning_rate": 4.717979043723271e-06, "loss": 0.3765, "step": 14951 }, { "epoch": 0.69, "grad_norm": 0.332799838036901, "learning_rate": 4.716715682176526e-06, "loss": 0.1918, "step": 14952 }, { "epoch": 0.69, "grad_norm": 0.24951375927210165, "learning_rate": 4.7154524375965985e-06, "loss": 0.2029, "step": 14953 }, { "epoch": 0.69, "grad_norm": 1.3899141597283207, "learning_rate": 4.714189310011461e-06, "loss": 0.7985, "step": 14954 }, { "epoch": 0.69, "grad_norm": 0.3154425660497833, "learning_rate": 4.7129262994490825e-06, "loss": 0.2014, "step": 14955 }, { "epoch": 0.69, "grad_norm": 0.8401163018603621, "learning_rate": 4.711663405937416e-06, "loss": 0.3884, "step": 14956 }, { "epoch": 0.69, "grad_norm": 0.36472791637023144, "learning_rate": 4.710400629504427e-06, "loss": 0.2884, "step": 14957 }, { "epoch": 0.69, "grad_norm": 0.3608850594886287, "learning_rate": 4.709137970178067e-06, "loss": 0.2323, "step": 14958 }, { "epoch": 0.69, "grad_norm": 0.3355383047222562, "learning_rate": 4.707875427986294e-06, "loss": 0.0886, "step": 14959 }, { "epoch": 0.69, "grad_norm": 0.5443886521465415, "learning_rate": 4.70661300295706e-06, "loss": 0.404, "step": 14960 }, { "epoch": 0.69, "grad_norm": 0.3098650552302095, "learning_rate": 4.7053506951183104e-06, "loss": 0.2659, "step": 14961 }, { "epoch": 0.69, "grad_norm": 0.7811553672545108, "learning_rate": 4.704088504497996e-06, "loss": 0.2241, "step": 14962 }, { "epoch": 0.69, "grad_norm": 0.8146791331884439, "learning_rate": 4.702826431124051e-06, "loss": 0.4254, "step": 14963 }, { "epoch": 0.69, "grad_norm": 0.4647002561811673, "learning_rate": 4.70156447502443e-06, "loss": 0.2772, "step": 14964 }, { "epoch": 0.69, "grad_norm": 0.2867654002298698, "learning_rate": 4.700302636227062e-06, "loss": 0.2334, "step": 14965 }, { "epoch": 0.69, "grad_norm": 0.7998249713367785, "learning_rate": 4.6990409147598896e-06, "loss": 0.4733, "step": 14966 }, { "epoch": 0.69, "grad_norm": 0.38721610540968415, "learning_rate": 4.697779310650837e-06, "loss": 0.2819, "step": 14967 }, { "epoch": 0.69, "grad_norm": 0.6496715338990422, "learning_rate": 4.696517823927842e-06, "loss": 0.1916, "step": 14968 }, { "epoch": 0.69, "grad_norm": 0.3495899240535417, "learning_rate": 4.695256454618834e-06, "loss": 0.2806, "step": 14969 }, { "epoch": 0.69, "grad_norm": 0.3630608488856457, "learning_rate": 4.693995202751731e-06, "loss": 0.2759, "step": 14970 }, { "epoch": 0.69, "grad_norm": 1.1535030215993571, "learning_rate": 4.69273406835446e-06, "loss": 0.515, "step": 14971 }, { "epoch": 0.69, "grad_norm": 0.4603608934653564, "learning_rate": 4.691473051454945e-06, "loss": 0.2884, "step": 14972 }, { "epoch": 0.69, "grad_norm": 0.2860082973052143, "learning_rate": 4.690212152081099e-06, "loss": 0.2415, "step": 14973 }, { "epoch": 0.69, "grad_norm": 0.4700411634956343, "learning_rate": 4.6889513702608395e-06, "loss": 0.259, "step": 14974 }, { "epoch": 0.69, "grad_norm": 0.4173174697538321, "learning_rate": 4.687690706022071e-06, "loss": 0.0944, "step": 14975 }, { "epoch": 0.69, "grad_norm": 0.3783910850717987, "learning_rate": 4.686430159392718e-06, "loss": 0.2619, "step": 14976 }, { "epoch": 0.69, "grad_norm": 0.40697252076071944, "learning_rate": 4.685169730400679e-06, "loss": 0.3412, "step": 14977 }, { "epoch": 0.69, "grad_norm": 0.8308828651110298, "learning_rate": 4.683909419073858e-06, "loss": 0.3495, "step": 14978 }, { "epoch": 0.69, "grad_norm": 0.36814447659125005, "learning_rate": 4.68264922544016e-06, "loss": 0.2708, "step": 14979 }, { "epoch": 0.69, "grad_norm": 0.8995159860811099, "learning_rate": 4.681389149527478e-06, "loss": 0.5217, "step": 14980 }, { "epoch": 0.69, "grad_norm": 0.2510983669284052, "learning_rate": 4.68012919136372e-06, "loss": 0.174, "step": 14981 }, { "epoch": 0.69, "grad_norm": 0.4244888236716084, "learning_rate": 4.6788693509767715e-06, "loss": 0.2682, "step": 14982 }, { "epoch": 0.69, "grad_norm": 0.5875702953154892, "learning_rate": 4.677609628394529e-06, "loss": 0.3488, "step": 14983 }, { "epoch": 0.69, "grad_norm": 0.43257039636052735, "learning_rate": 4.676350023644878e-06, "loss": 0.2949, "step": 14984 }, { "epoch": 0.69, "grad_norm": 0.3984165724902577, "learning_rate": 4.675090536755706e-06, "loss": 0.1888, "step": 14985 }, { "epoch": 0.69, "grad_norm": 0.5199818749548794, "learning_rate": 4.6738311677549e-06, "loss": 0.3758, "step": 14986 }, { "epoch": 0.69, "grad_norm": 0.33890102354294693, "learning_rate": 4.672571916670335e-06, "loss": 0.1939, "step": 14987 }, { "epoch": 0.69, "grad_norm": 0.2943609665776541, "learning_rate": 4.6713127835298945e-06, "loss": 0.1899, "step": 14988 }, { "epoch": 0.69, "grad_norm": 0.37263026537227945, "learning_rate": 4.670053768361456e-06, "loss": 0.3176, "step": 14989 }, { "epoch": 0.69, "grad_norm": 0.7312784937929746, "learning_rate": 4.668794871192885e-06, "loss": 0.4069, "step": 14990 }, { "epoch": 0.69, "grad_norm": 0.38486801444559954, "learning_rate": 4.667536092052063e-06, "loss": 0.2215, "step": 14991 }, { "epoch": 0.69, "grad_norm": 0.509465112216622, "learning_rate": 4.666277430966848e-06, "loss": 0.3465, "step": 14992 }, { "epoch": 0.69, "grad_norm": 0.2584303745510036, "learning_rate": 4.665018887965109e-06, "loss": 0.1957, "step": 14993 }, { "epoch": 0.69, "grad_norm": 0.34439544387004356, "learning_rate": 4.663760463074711e-06, "loss": 0.1873, "step": 14994 }, { "epoch": 0.69, "grad_norm": 1.2085880005113516, "learning_rate": 4.662502156323517e-06, "loss": 0.5904, "step": 14995 }, { "epoch": 0.69, "grad_norm": 0.4845266201666869, "learning_rate": 4.6612439677393804e-06, "loss": 0.3083, "step": 14996 }, { "epoch": 0.69, "grad_norm": 0.32110136929403593, "learning_rate": 4.65998589735015e-06, "loss": 0.2666, "step": 14997 }, { "epoch": 0.69, "grad_norm": 0.5389516032796184, "learning_rate": 4.658727945183692e-06, "loss": 0.261, "step": 14998 }, { "epoch": 0.69, "grad_norm": 0.26504972786021364, "learning_rate": 4.657470111267846e-06, "loss": 0.1581, "step": 14999 }, { "epoch": 0.69, "grad_norm": 0.4140625448415751, "learning_rate": 4.656212395630465e-06, "loss": 0.2511, "step": 15000 }, { "epoch": 0.69, "grad_norm": 0.3248407675881738, "learning_rate": 4.654954798299388e-06, "loss": 0.2461, "step": 15001 }, { "epoch": 0.69, "grad_norm": 0.6745325017232855, "learning_rate": 4.653697319302461e-06, "loss": 0.4109, "step": 15002 }, { "epoch": 0.69, "grad_norm": 0.6034207022820928, "learning_rate": 4.652439958667526e-06, "loss": 0.3047, "step": 15003 }, { "epoch": 0.69, "grad_norm": 0.3248430277759104, "learning_rate": 4.651182716422412e-06, "loss": 0.2291, "step": 15004 }, { "epoch": 0.69, "grad_norm": 0.2953053313839936, "learning_rate": 4.6499255925949575e-06, "loss": 0.2145, "step": 15005 }, { "epoch": 0.69, "grad_norm": 0.6595842507948835, "learning_rate": 4.648668587212998e-06, "loss": 0.3212, "step": 15006 }, { "epoch": 0.69, "grad_norm": 0.3872264679609567, "learning_rate": 4.647411700304354e-06, "loss": 0.3035, "step": 15007 }, { "epoch": 0.69, "grad_norm": 0.8399595443490007, "learning_rate": 4.64615493189686e-06, "loss": 0.2677, "step": 15008 }, { "epoch": 0.69, "grad_norm": 0.3391506503121247, "learning_rate": 4.644898282018333e-06, "loss": 0.2427, "step": 15009 }, { "epoch": 0.69, "grad_norm": 0.6346358599678377, "learning_rate": 4.643641750696596e-06, "loss": 0.4105, "step": 15010 }, { "epoch": 0.69, "grad_norm": 0.1565622222737067, "learning_rate": 4.6423853379594675e-06, "loss": 0.068, "step": 15011 }, { "epoch": 0.69, "grad_norm": 0.3564921552422663, "learning_rate": 4.641129043834768e-06, "loss": 0.2714, "step": 15012 }, { "epoch": 0.69, "grad_norm": 0.38050554602880915, "learning_rate": 4.639872868350307e-06, "loss": 0.2893, "step": 15013 }, { "epoch": 0.69, "grad_norm": 0.7129760998602115, "learning_rate": 4.638616811533886e-06, "loss": 0.3344, "step": 15014 }, { "epoch": 0.69, "grad_norm": 0.40206576290701596, "learning_rate": 4.637360873413331e-06, "loss": 0.2923, "step": 15015 }, { "epoch": 0.69, "grad_norm": 0.5891769972521699, "learning_rate": 4.636105054016431e-06, "loss": 0.3817, "step": 15016 }, { "epoch": 0.69, "grad_norm": 0.24451825613786068, "learning_rate": 4.634849353371e-06, "loss": 0.1801, "step": 15017 }, { "epoch": 0.69, "grad_norm": 0.5998182158721055, "learning_rate": 4.63359377150483e-06, "loss": 0.3017, "step": 15018 }, { "epoch": 0.69, "grad_norm": 0.41838165711364644, "learning_rate": 4.632338308445723e-06, "loss": 0.3069, "step": 15019 }, { "epoch": 0.69, "grad_norm": 0.3846078401207333, "learning_rate": 4.631082964221475e-06, "loss": 0.3131, "step": 15020 }, { "epoch": 0.69, "grad_norm": 0.4252564919971829, "learning_rate": 4.629827738859871e-06, "loss": 0.1581, "step": 15021 }, { "epoch": 0.69, "grad_norm": 0.4279115400867005, "learning_rate": 4.62857263238871e-06, "loss": 0.3104, "step": 15022 }, { "epoch": 0.69, "grad_norm": 0.45828710742152173, "learning_rate": 4.627317644835766e-06, "loss": 0.2549, "step": 15023 }, { "epoch": 0.69, "grad_norm": 0.38347991503033996, "learning_rate": 4.626062776228839e-06, "loss": 0.1972, "step": 15024 }, { "epoch": 0.69, "grad_norm": 0.34558795598347114, "learning_rate": 4.624808026595702e-06, "loss": 0.2748, "step": 15025 }, { "epoch": 0.69, "grad_norm": 0.49215343863086497, "learning_rate": 4.623553395964131e-06, "loss": 0.2695, "step": 15026 }, { "epoch": 0.69, "grad_norm": 0.5584687447420302, "learning_rate": 4.622298884361905e-06, "loss": 0.2213, "step": 15027 }, { "epoch": 0.69, "grad_norm": 0.3039480794746229, "learning_rate": 4.621044491816801e-06, "loss": 0.2718, "step": 15028 }, { "epoch": 0.69, "grad_norm": 1.1641608643765324, "learning_rate": 4.619790218356589e-06, "loss": 0.5415, "step": 15029 }, { "epoch": 0.69, "grad_norm": 0.4302613455630632, "learning_rate": 4.618536064009034e-06, "loss": 0.2245, "step": 15030 }, { "epoch": 0.69, "grad_norm": 0.5022495209187914, "learning_rate": 4.6172820288019025e-06, "loss": 0.3425, "step": 15031 }, { "epoch": 0.69, "grad_norm": 0.23196792778911693, "learning_rate": 4.616028112762964e-06, "loss": 0.1946, "step": 15032 }, { "epoch": 0.69, "grad_norm": 0.6072750056322345, "learning_rate": 4.614774315919969e-06, "loss": 0.3832, "step": 15033 }, { "epoch": 0.69, "grad_norm": 0.4122991477652811, "learning_rate": 4.6135206383006845e-06, "loss": 0.2311, "step": 15034 }, { "epoch": 0.69, "grad_norm": 0.6298830197590604, "learning_rate": 4.612267079932858e-06, "loss": 0.3817, "step": 15035 }, { "epoch": 0.69, "grad_norm": 0.3437834898698803, "learning_rate": 4.611013640844245e-06, "loss": 0.2805, "step": 15036 }, { "epoch": 0.69, "grad_norm": 0.3642050901893921, "learning_rate": 4.609760321062601e-06, "loss": 0.198, "step": 15037 }, { "epoch": 0.69, "grad_norm": 0.28778494438632246, "learning_rate": 4.608507120615664e-06, "loss": 0.1966, "step": 15038 }, { "epoch": 0.69, "grad_norm": 1.5428744841090298, "learning_rate": 4.607254039531186e-06, "loss": 0.724, "step": 15039 }, { "epoch": 0.69, "grad_norm": 0.24173747007712018, "learning_rate": 4.606001077836899e-06, "loss": 0.2062, "step": 15040 }, { "epoch": 0.69, "grad_norm": 0.6693224925171881, "learning_rate": 4.604748235560557e-06, "loss": 0.3767, "step": 15041 }, { "epoch": 0.69, "grad_norm": 0.7878288549864655, "learning_rate": 4.603495512729889e-06, "loss": 0.4192, "step": 15042 }, { "epoch": 0.69, "grad_norm": 0.26175333907545395, "learning_rate": 4.602242909372625e-06, "loss": 0.1657, "step": 15043 }, { "epoch": 0.69, "grad_norm": 0.3676481122087531, "learning_rate": 4.6009904255165e-06, "loss": 0.3234, "step": 15044 }, { "epoch": 0.69, "grad_norm": 0.33019661994118316, "learning_rate": 4.599738061189244e-06, "loss": 0.1924, "step": 15045 }, { "epoch": 0.69, "grad_norm": 0.42358690607095756, "learning_rate": 4.598485816418586e-06, "loss": 0.2992, "step": 15046 }, { "epoch": 0.69, "grad_norm": 0.869084084008399, "learning_rate": 4.597233691232244e-06, "loss": 0.3035, "step": 15047 }, { "epoch": 0.69, "grad_norm": 0.3934394188217789, "learning_rate": 4.595981685657939e-06, "loss": 0.3038, "step": 15048 }, { "epoch": 0.69, "grad_norm": 0.4069058457299068, "learning_rate": 4.594729799723395e-06, "loss": 0.2936, "step": 15049 }, { "epoch": 0.69, "grad_norm": 0.36806272989234623, "learning_rate": 4.59347803345632e-06, "loss": 0.159, "step": 15050 }, { "epoch": 0.69, "grad_norm": 0.34629022487535427, "learning_rate": 4.592226386884434e-06, "loss": 0.1902, "step": 15051 }, { "epoch": 0.69, "grad_norm": 0.4361584723758657, "learning_rate": 4.5909748600354395e-06, "loss": 0.3088, "step": 15052 }, { "epoch": 0.69, "grad_norm": 0.3950850631882616, "learning_rate": 4.589723452937049e-06, "loss": 0.2677, "step": 15053 }, { "epoch": 0.69, "grad_norm": 0.9255034087534584, "learning_rate": 4.58847216561697e-06, "loss": 0.4551, "step": 15054 }, { "epoch": 0.69, "grad_norm": 0.3810109154178935, "learning_rate": 4.587220998102899e-06, "loss": 0.2618, "step": 15055 }, { "epoch": 0.69, "grad_norm": 0.3418873385509642, "learning_rate": 4.585969950422542e-06, "loss": 0.2593, "step": 15056 }, { "epoch": 0.69, "grad_norm": 0.24905243433512392, "learning_rate": 4.584719022603583e-06, "loss": 0.153, "step": 15057 }, { "epoch": 0.69, "grad_norm": 0.35401164199015245, "learning_rate": 4.583468214673734e-06, "loss": 0.2652, "step": 15058 }, { "epoch": 0.69, "grad_norm": 0.7774822584185109, "learning_rate": 4.582217526660675e-06, "loss": 0.3546, "step": 15059 }, { "epoch": 0.69, "grad_norm": 0.35526367550610033, "learning_rate": 4.580966958592101e-06, "loss": 0.2323, "step": 15060 }, { "epoch": 0.69, "grad_norm": 0.3297168215369546, "learning_rate": 4.579716510495692e-06, "loss": 0.2405, "step": 15061 }, { "epoch": 0.69, "grad_norm": 1.3965273513544676, "learning_rate": 4.578466182399136e-06, "loss": 0.7704, "step": 15062 }, { "epoch": 0.69, "grad_norm": 0.6136862048623709, "learning_rate": 4.577215974330117e-06, "loss": 0.2814, "step": 15063 }, { "epoch": 0.69, "grad_norm": 0.27213986912525256, "learning_rate": 4.575965886316305e-06, "loss": 0.2527, "step": 15064 }, { "epoch": 0.69, "grad_norm": 0.4717733553515794, "learning_rate": 4.574715918385382e-06, "loss": 0.2758, "step": 15065 }, { "epoch": 0.69, "grad_norm": 0.36621624599969715, "learning_rate": 4.573466070565022e-06, "loss": 0.118, "step": 15066 }, { "epoch": 0.69, "grad_norm": 0.39435549379720786, "learning_rate": 4.572216342882891e-06, "loss": 0.29, "step": 15067 }, { "epoch": 0.69, "grad_norm": 0.3753782789915386, "learning_rate": 4.570966735366661e-06, "loss": 0.3287, "step": 15068 }, { "epoch": 0.69, "grad_norm": 0.8835055894430496, "learning_rate": 4.569717248043991e-06, "loss": 0.2909, "step": 15069 }, { "epoch": 0.69, "grad_norm": 0.3818858122866555, "learning_rate": 4.568467880942548e-06, "loss": 0.2582, "step": 15070 }, { "epoch": 0.69, "grad_norm": 0.2855388232861323, "learning_rate": 4.567218634089995e-06, "loss": 0.1952, "step": 15071 }, { "epoch": 0.69, "grad_norm": 0.347337322275765, "learning_rate": 4.565969507513981e-06, "loss": 0.272, "step": 15072 }, { "epoch": 0.69, "grad_norm": 0.38926275079838873, "learning_rate": 4.5647205012421695e-06, "loss": 0.2097, "step": 15073 }, { "epoch": 0.69, "grad_norm": 1.0642639894620527, "learning_rate": 4.563471615302198e-06, "loss": 0.6302, "step": 15074 }, { "epoch": 0.69, "grad_norm": 0.808615050259147, "learning_rate": 4.562222849721735e-06, "loss": 0.3708, "step": 15075 }, { "epoch": 0.69, "grad_norm": 0.24709329285994466, "learning_rate": 4.560974204528412e-06, "loss": 0.2252, "step": 15076 }, { "epoch": 0.69, "grad_norm": 0.32699945775896205, "learning_rate": 4.559725679749883e-06, "loss": 0.2016, "step": 15077 }, { "epoch": 0.69, "grad_norm": 1.7301375251451545, "learning_rate": 4.5584772754137785e-06, "loss": 0.6738, "step": 15078 }, { "epoch": 0.69, "grad_norm": 0.34774945525877665, "learning_rate": 4.557228991547743e-06, "loss": 0.2125, "step": 15079 }, { "epoch": 0.69, "grad_norm": 0.3743729986406214, "learning_rate": 4.555980828179416e-06, "loss": 0.2971, "step": 15080 }, { "epoch": 0.69, "grad_norm": 0.9762390097429827, "learning_rate": 4.5547327853364224e-06, "loss": 0.3857, "step": 15081 }, { "epoch": 0.69, "grad_norm": 0.33564905696697234, "learning_rate": 4.553484863046401e-06, "loss": 0.1981, "step": 15082 }, { "epoch": 0.69, "grad_norm": 0.33717141997507216, "learning_rate": 4.552237061336972e-06, "loss": 0.1748, "step": 15083 }, { "epoch": 0.69, "grad_norm": 0.40328610858347463, "learning_rate": 4.550989380235762e-06, "loss": 0.2909, "step": 15084 }, { "epoch": 0.69, "grad_norm": 0.36586554698754253, "learning_rate": 4.5497418197704e-06, "loss": 0.2654, "step": 15085 }, { "epoch": 0.69, "grad_norm": 0.9392539468708888, "learning_rate": 4.548494379968498e-06, "loss": 0.3351, "step": 15086 }, { "epoch": 0.69, "grad_norm": 0.38618541158994074, "learning_rate": 4.5472470608576745e-06, "loss": 0.2983, "step": 15087 }, { "epoch": 0.69, "grad_norm": 0.41676256011097607, "learning_rate": 4.545999862465548e-06, "loss": 0.2498, "step": 15088 }, { "epoch": 0.69, "grad_norm": 0.2965360007063014, "learning_rate": 4.54475278481973e-06, "loss": 0.1602, "step": 15089 }, { "epoch": 0.69, "grad_norm": 0.5585336256320842, "learning_rate": 4.543505827947827e-06, "loss": 0.2681, "step": 15090 }, { "epoch": 0.69, "grad_norm": 0.4230718777618868, "learning_rate": 4.5422589918774394e-06, "loss": 0.283, "step": 15091 }, { "epoch": 0.69, "grad_norm": 0.34965667524658567, "learning_rate": 4.5410122766361856e-06, "loss": 0.244, "step": 15092 }, { "epoch": 0.69, "grad_norm": 0.8598517455741441, "learning_rate": 4.539765682251654e-06, "loss": 0.4211, "step": 15093 }, { "epoch": 0.69, "grad_norm": 0.3321640258970063, "learning_rate": 4.538519208751452e-06, "loss": 0.2394, "step": 15094 }, { "epoch": 0.69, "grad_norm": 0.3544791254783406, "learning_rate": 4.537272856163166e-06, "loss": 0.2296, "step": 15095 }, { "epoch": 0.69, "grad_norm": 0.3370683532149129, "learning_rate": 4.536026624514395e-06, "loss": 0.1919, "step": 15096 }, { "epoch": 0.69, "grad_norm": 0.3519816524708853, "learning_rate": 4.534780513832732e-06, "loss": 0.256, "step": 15097 }, { "epoch": 0.69, "grad_norm": 0.7893360944159631, "learning_rate": 4.533534524145756e-06, "loss": 0.3928, "step": 15098 }, { "epoch": 0.69, "grad_norm": 0.35207603650473746, "learning_rate": 4.532288655481062e-06, "loss": 0.2638, "step": 15099 }, { "epoch": 0.69, "grad_norm": 0.3302165087591495, "learning_rate": 4.531042907866222e-06, "loss": 0.2405, "step": 15100 }, { "epoch": 0.69, "grad_norm": 1.397893228547607, "learning_rate": 4.5297972813288224e-06, "loss": 0.5013, "step": 15101 }, { "epoch": 0.69, "grad_norm": 0.29250874127762, "learning_rate": 4.528551775896442e-06, "loss": 0.1265, "step": 15102 }, { "epoch": 0.69, "grad_norm": 0.42699088595363477, "learning_rate": 4.527306391596649e-06, "loss": 0.2846, "step": 15103 }, { "epoch": 0.69, "grad_norm": 0.3408772024733025, "learning_rate": 4.526061128457017e-06, "loss": 0.2753, "step": 15104 }, { "epoch": 0.69, "grad_norm": 0.8400971633950676, "learning_rate": 4.524815986505116e-06, "loss": 0.3779, "step": 15105 }, { "epoch": 0.69, "grad_norm": 0.4446202030645186, "learning_rate": 4.5235709657685145e-06, "loss": 0.2713, "step": 15106 }, { "epoch": 0.69, "grad_norm": 0.6187808255775611, "learning_rate": 4.522326066274775e-06, "loss": 0.3508, "step": 15107 }, { "epoch": 0.69, "grad_norm": 0.20849948140057645, "learning_rate": 4.5210812880514485e-06, "loss": 0.1751, "step": 15108 }, { "epoch": 0.69, "grad_norm": 0.39445035142795265, "learning_rate": 4.5198366311261096e-06, "loss": 0.2509, "step": 15109 }, { "epoch": 0.69, "grad_norm": 0.4992766067966705, "learning_rate": 4.518592095526303e-06, "loss": 0.3234, "step": 15110 }, { "epoch": 0.69, "grad_norm": 0.3534522977642188, "learning_rate": 4.5173476812795865e-06, "loss": 0.3152, "step": 15111 }, { "epoch": 0.69, "grad_norm": 0.3364226341316586, "learning_rate": 4.516103388413506e-06, "loss": 0.1766, "step": 15112 }, { "epoch": 0.69, "grad_norm": 0.5815771648254908, "learning_rate": 4.514859216955611e-06, "loss": 0.34, "step": 15113 }, { "epoch": 0.69, "grad_norm": 0.29780907026965675, "learning_rate": 4.5136151669334486e-06, "loss": 0.1976, "step": 15114 }, { "epoch": 0.69, "grad_norm": 0.3409025764998092, "learning_rate": 4.512371238374556e-06, "loss": 0.218, "step": 15115 }, { "epoch": 0.69, "grad_norm": 0.36585498521769144, "learning_rate": 4.511127431306478e-06, "loss": 0.305, "step": 15116 }, { "epoch": 0.69, "grad_norm": 1.2031420307941076, "learning_rate": 4.509883745756745e-06, "loss": 0.784, "step": 15117 }, { "epoch": 0.69, "grad_norm": 0.3259677576015253, "learning_rate": 4.508640181752893e-06, "loss": 0.1907, "step": 15118 }, { "epoch": 0.69, "grad_norm": 1.3912817301305562, "learning_rate": 4.507396739322461e-06, "loss": 0.6477, "step": 15119 }, { "epoch": 0.69, "grad_norm": 0.3650590828810978, "learning_rate": 4.506153418492967e-06, "loss": 0.299, "step": 15120 }, { "epoch": 0.69, "grad_norm": 0.4426771133591469, "learning_rate": 4.504910219291941e-06, "loss": 0.2988, "step": 15121 }, { "epoch": 0.69, "grad_norm": 0.24926609078377604, "learning_rate": 4.503667141746906e-06, "loss": 0.131, "step": 15122 }, { "epoch": 0.69, "grad_norm": 0.38515243333087734, "learning_rate": 4.502424185885387e-06, "loss": 0.3258, "step": 15123 }, { "epoch": 0.69, "grad_norm": 0.8928850506600022, "learning_rate": 4.501181351734893e-06, "loss": 0.4836, "step": 15124 }, { "epoch": 0.69, "grad_norm": 0.37394642927086474, "learning_rate": 4.499938639322946e-06, "loss": 0.1998, "step": 15125 }, { "epoch": 0.69, "grad_norm": 0.6374029954078039, "learning_rate": 4.4986960486770596e-06, "loss": 0.3806, "step": 15126 }, { "epoch": 0.69, "grad_norm": 0.3830519819529997, "learning_rate": 4.4974535798247365e-06, "loss": 0.2544, "step": 15127 }, { "epoch": 0.69, "grad_norm": 0.23511853335918828, "learning_rate": 4.4962112327934915e-06, "loss": 0.1776, "step": 15128 }, { "epoch": 0.7, "grad_norm": 1.261271615580231, "learning_rate": 4.494969007610821e-06, "loss": 0.735, "step": 15129 }, { "epoch": 0.7, "grad_norm": 0.6270693890376513, "learning_rate": 4.493726904304232e-06, "loss": 0.3243, "step": 15130 }, { "epoch": 0.7, "grad_norm": 0.26182362537610016, "learning_rate": 4.492484922901226e-06, "loss": 0.2302, "step": 15131 }, { "epoch": 0.7, "grad_norm": 0.711115629860134, "learning_rate": 4.49124306342929e-06, "loss": 0.4191, "step": 15132 }, { "epoch": 0.7, "grad_norm": 0.5668940989141893, "learning_rate": 4.49000132591593e-06, "loss": 0.297, "step": 15133 }, { "epoch": 0.7, "grad_norm": 0.28806013384021917, "learning_rate": 4.4887597103886194e-06, "loss": 0.1918, "step": 15134 }, { "epoch": 0.7, "grad_norm": 0.3300699088710153, "learning_rate": 4.487518216874866e-06, "loss": 0.2474, "step": 15135 }, { "epoch": 0.7, "grad_norm": 0.43497766612974953, "learning_rate": 4.486276845402147e-06, "loss": 0.2773, "step": 15136 }, { "epoch": 0.7, "grad_norm": 0.5812307601851208, "learning_rate": 4.4850355959979385e-06, "loss": 0.3283, "step": 15137 }, { "epoch": 0.7, "grad_norm": 0.8597001226251257, "learning_rate": 4.483794468689728e-06, "loss": 0.302, "step": 15138 }, { "epoch": 0.7, "grad_norm": 0.3070103517755589, "learning_rate": 4.482553463504991e-06, "loss": 0.2743, "step": 15139 }, { "epoch": 0.7, "grad_norm": 0.5568791781738436, "learning_rate": 4.481312580471208e-06, "loss": 0.302, "step": 15140 }, { "epoch": 0.7, "grad_norm": 0.22182741282696428, "learning_rate": 4.48007181961584e-06, "loss": 0.1232, "step": 15141 }, { "epoch": 0.7, "grad_norm": 0.9159255627133426, "learning_rate": 4.478831180966366e-06, "loss": 0.4672, "step": 15142 }, { "epoch": 0.7, "grad_norm": 0.33375505148038936, "learning_rate": 4.477590664550243e-06, "loss": 0.2759, "step": 15143 }, { "epoch": 0.7, "grad_norm": 0.6061394122079311, "learning_rate": 4.476350270394942e-06, "loss": 0.2691, "step": 15144 }, { "epoch": 0.7, "grad_norm": 0.9824000046889556, "learning_rate": 4.475109998527926e-06, "loss": 0.4374, "step": 15145 }, { "epoch": 0.7, "grad_norm": 0.30675684260001135, "learning_rate": 4.473869848976644e-06, "loss": 0.2207, "step": 15146 }, { "epoch": 0.7, "grad_norm": 0.3585500081526208, "learning_rate": 4.472629821768559e-06, "loss": 0.2931, "step": 15147 }, { "epoch": 0.7, "grad_norm": 0.27269290635672777, "learning_rate": 4.471389916931126e-06, "loss": 0.1073, "step": 15148 }, { "epoch": 0.7, "grad_norm": 0.4446624915191592, "learning_rate": 4.470150134491789e-06, "loss": 0.2956, "step": 15149 }, { "epoch": 0.7, "grad_norm": 1.025443726870926, "learning_rate": 4.4689104744779995e-06, "loss": 0.4788, "step": 15150 }, { "epoch": 0.7, "grad_norm": 0.3364710987324278, "learning_rate": 4.467670936917195e-06, "loss": 0.2164, "step": 15151 }, { "epoch": 0.7, "grad_norm": 0.41522700490765285, "learning_rate": 4.466431521836832e-06, "loss": 0.3277, "step": 15152 }, { "epoch": 0.7, "grad_norm": 1.6657370420442392, "learning_rate": 4.465192229264337e-06, "loss": 0.8457, "step": 15153 }, { "epoch": 0.7, "grad_norm": 0.4090978091081507, "learning_rate": 4.463953059227155e-06, "loss": 0.1666, "step": 15154 }, { "epoch": 0.7, "grad_norm": 0.340852726846153, "learning_rate": 4.462714011752715e-06, "loss": 0.2967, "step": 15155 }, { "epoch": 0.7, "grad_norm": 0.44549323991784995, "learning_rate": 4.461475086868448e-06, "loss": 0.2552, "step": 15156 }, { "epoch": 0.7, "grad_norm": 0.432364113961003, "learning_rate": 4.460236284601788e-06, "loss": 0.1771, "step": 15157 }, { "epoch": 0.7, "grad_norm": 0.571732574401387, "learning_rate": 4.4589976049801545e-06, "loss": 0.3902, "step": 15158 }, { "epoch": 0.7, "grad_norm": 0.3730914811006373, "learning_rate": 4.4577590480309764e-06, "loss": 0.3091, "step": 15159 }, { "epoch": 0.7, "grad_norm": 0.6605139238714878, "learning_rate": 4.456520613781669e-06, "loss": 0.3784, "step": 15160 }, { "epoch": 0.7, "grad_norm": 0.3670746179168251, "learning_rate": 4.45528230225965e-06, "loss": 0.2478, "step": 15161 }, { "epoch": 0.7, "grad_norm": 0.347323995347443, "learning_rate": 4.454044113492343e-06, "loss": 0.1832, "step": 15162 }, { "epoch": 0.7, "grad_norm": 0.36546359607808315, "learning_rate": 4.452806047507149e-06, "loss": 0.313, "step": 15163 }, { "epoch": 0.7, "grad_norm": 0.32087868159019306, "learning_rate": 4.451568104331483e-06, "loss": 0.2035, "step": 15164 }, { "epoch": 0.7, "grad_norm": 0.8272147742954485, "learning_rate": 4.450330283992755e-06, "loss": 0.5227, "step": 15165 }, { "epoch": 0.7, "grad_norm": 0.4658055544899943, "learning_rate": 4.4490925865183625e-06, "loss": 0.3229, "step": 15166 }, { "epoch": 0.7, "grad_norm": 0.24941655836853, "learning_rate": 4.447855011935714e-06, "loss": 0.1937, "step": 15167 }, { "epoch": 0.7, "grad_norm": 0.29781053769894394, "learning_rate": 4.446617560272195e-06, "loss": 0.1612, "step": 15168 }, { "epoch": 0.7, "grad_norm": 0.7789594669533275, "learning_rate": 4.44538023155522e-06, "loss": 0.3679, "step": 15169 }, { "epoch": 0.7, "grad_norm": 0.328207736666487, "learning_rate": 4.444143025812169e-06, "loss": 0.2383, "step": 15170 }, { "epoch": 0.7, "grad_norm": 0.3693174230696078, "learning_rate": 4.4429059430704404e-06, "loss": 0.285, "step": 15171 }, { "epoch": 0.7, "grad_norm": 0.6970780739660067, "learning_rate": 4.441668983357417e-06, "loss": 0.3241, "step": 15172 }, { "epoch": 0.7, "grad_norm": 0.415308730160176, "learning_rate": 4.4404321467004795e-06, "loss": 0.302, "step": 15173 }, { "epoch": 0.7, "grad_norm": 0.2499223827371469, "learning_rate": 4.439195433127022e-06, "loss": 0.0821, "step": 15174 }, { "epoch": 0.7, "grad_norm": 0.3091369780692317, "learning_rate": 4.437958842664415e-06, "loss": 0.2636, "step": 15175 }, { "epoch": 0.7, "grad_norm": 0.3821456762521314, "learning_rate": 4.436722375340042e-06, "loss": 0.2706, "step": 15176 }, { "epoch": 0.7, "grad_norm": 0.8334632285193361, "learning_rate": 4.435486031181271e-06, "loss": 0.339, "step": 15177 }, { "epoch": 0.7, "grad_norm": 0.4382959379682711, "learning_rate": 4.434249810215474e-06, "loss": 0.3194, "step": 15178 }, { "epoch": 0.7, "grad_norm": 0.3287157071968033, "learning_rate": 4.4330137124700266e-06, "loss": 0.254, "step": 15179 }, { "epoch": 0.7, "grad_norm": 0.28609541468971095, "learning_rate": 4.431777737972287e-06, "loss": 0.0975, "step": 15180 }, { "epoch": 0.7, "grad_norm": 1.2223946804272083, "learning_rate": 4.430541886749621e-06, "loss": 0.5811, "step": 15181 }, { "epoch": 0.7, "grad_norm": 0.3606548208352364, "learning_rate": 4.429306158829394e-06, "loss": 0.2969, "step": 15182 }, { "epoch": 0.7, "grad_norm": 0.41762959200884686, "learning_rate": 4.4280705542389545e-06, "loss": 0.2617, "step": 15183 }, { "epoch": 0.7, "grad_norm": 0.9977943084934451, "learning_rate": 4.426835073005668e-06, "loss": 0.4398, "step": 15184 }, { "epoch": 0.7, "grad_norm": 0.36809478866697803, "learning_rate": 4.425599715156873e-06, "loss": 0.2834, "step": 15185 }, { "epoch": 0.7, "grad_norm": 0.2718645316486167, "learning_rate": 4.424364480719935e-06, "loss": 0.1746, "step": 15186 }, { "epoch": 0.7, "grad_norm": 0.35608749242325954, "learning_rate": 4.42312936972219e-06, "loss": 0.2466, "step": 15187 }, { "epoch": 0.7, "grad_norm": 0.5909139021603691, "learning_rate": 4.421894382190989e-06, "loss": 0.2439, "step": 15188 }, { "epoch": 0.7, "grad_norm": 0.7648167561706752, "learning_rate": 4.420659518153667e-06, "loss": 0.402, "step": 15189 }, { "epoch": 0.7, "grad_norm": 0.38322427692784544, "learning_rate": 4.419424777637565e-06, "loss": 0.242, "step": 15190 }, { "epoch": 0.7, "grad_norm": 0.3684128637202067, "learning_rate": 4.418190160670025e-06, "loss": 0.2678, "step": 15191 }, { "epoch": 0.7, "grad_norm": 0.3783977894820084, "learning_rate": 4.416955667278371e-06, "loss": 0.2546, "step": 15192 }, { "epoch": 0.7, "grad_norm": 0.4155417356424887, "learning_rate": 4.4157212974899395e-06, "loss": 0.1577, "step": 15193 }, { "epoch": 0.7, "grad_norm": 0.36875080693216306, "learning_rate": 4.414487051332055e-06, "loss": 0.2681, "step": 15194 }, { "epoch": 0.7, "grad_norm": 0.39276530892772926, "learning_rate": 4.413252928832042e-06, "loss": 0.3069, "step": 15195 }, { "epoch": 0.7, "grad_norm": 1.241198645915899, "learning_rate": 4.412018930017229e-06, "loss": 0.4091, "step": 15196 }, { "epoch": 0.7, "grad_norm": 0.3544435422459911, "learning_rate": 4.410785054914928e-06, "loss": 0.2626, "step": 15197 }, { "epoch": 0.7, "grad_norm": 0.297902272210225, "learning_rate": 4.409551303552457e-06, "loss": 0.2099, "step": 15198 }, { "epoch": 0.7, "grad_norm": 0.5250432468484927, "learning_rate": 4.408317675957134e-06, "loss": 0.3443, "step": 15199 }, { "epoch": 0.7, "grad_norm": 0.41159212794932065, "learning_rate": 4.407084172156271e-06, "loss": 0.2098, "step": 15200 }, { "epoch": 0.7, "grad_norm": 0.8147864667367514, "learning_rate": 4.4058507921771746e-06, "loss": 0.4439, "step": 15201 }, { "epoch": 0.7, "grad_norm": 0.5850096465388983, "learning_rate": 4.404617536047145e-06, "loss": 0.3615, "step": 15202 }, { "epoch": 0.7, "grad_norm": 0.2830120917119553, "learning_rate": 4.4033844037934915e-06, "loss": 0.2028, "step": 15203 }, { "epoch": 0.7, "grad_norm": 1.2862854861268136, "learning_rate": 4.402151395443513e-06, "loss": 0.577, "step": 15204 }, { "epoch": 0.7, "grad_norm": 0.4766336887280671, "learning_rate": 4.400918511024511e-06, "loss": 0.2348, "step": 15205 }, { "epoch": 0.7, "grad_norm": 0.3037825367387269, "learning_rate": 4.399685750563772e-06, "loss": 0.2114, "step": 15206 }, { "epoch": 0.7, "grad_norm": 0.5241600431443554, "learning_rate": 4.398453114088595e-06, "loss": 0.3615, "step": 15207 }, { "epoch": 0.7, "grad_norm": 1.2410112459262153, "learning_rate": 4.397220601626269e-06, "loss": 0.69, "step": 15208 }, { "epoch": 0.7, "grad_norm": 0.3785205854962509, "learning_rate": 4.395988213204075e-06, "loss": 0.1711, "step": 15209 }, { "epoch": 0.7, "grad_norm": 0.3262711131310727, "learning_rate": 4.394755948849305e-06, "loss": 0.2204, "step": 15210 }, { "epoch": 0.7, "grad_norm": 0.4013708139699758, "learning_rate": 4.393523808589233e-06, "loss": 0.2782, "step": 15211 }, { "epoch": 0.7, "grad_norm": 0.4123241730500596, "learning_rate": 4.39229179245114e-06, "loss": 0.267, "step": 15212 }, { "epoch": 0.7, "grad_norm": 1.0177646815586514, "learning_rate": 4.391059900462305e-06, "loss": 0.2697, "step": 15213 }, { "epoch": 0.7, "grad_norm": 0.36693782192574476, "learning_rate": 4.389828132649995e-06, "loss": 0.2817, "step": 15214 }, { "epoch": 0.7, "grad_norm": 0.43986466957214104, "learning_rate": 4.388596489041483e-06, "loss": 0.2441, "step": 15215 }, { "epoch": 0.7, "grad_norm": 0.6462477854929174, "learning_rate": 4.387364969664034e-06, "loss": 0.2609, "step": 15216 }, { "epoch": 0.7, "grad_norm": 1.102230604291616, "learning_rate": 4.386133574544921e-06, "loss": 0.3755, "step": 15217 }, { "epoch": 0.7, "grad_norm": 0.24842763666477394, "learning_rate": 4.384902303711396e-06, "loss": 0.1861, "step": 15218 }, { "epoch": 0.7, "grad_norm": 0.29150297808990167, "learning_rate": 4.383671157190725e-06, "loss": 0.2437, "step": 15219 }, { "epoch": 0.7, "grad_norm": 1.6465350819487814, "learning_rate": 4.382440135010159e-06, "loss": 0.7005, "step": 15220 }, { "epoch": 0.7, "grad_norm": 0.3656908091192963, "learning_rate": 4.381209237196953e-06, "loss": 0.257, "step": 15221 }, { "epoch": 0.7, "grad_norm": 0.5116965473359221, "learning_rate": 4.379978463778364e-06, "loss": 0.2515, "step": 15222 }, { "epoch": 0.7, "grad_norm": 0.48409627170912534, "learning_rate": 4.378747814781629e-06, "loss": 0.3307, "step": 15223 }, { "epoch": 0.7, "grad_norm": 0.36413435363247004, "learning_rate": 4.377517290234002e-06, "loss": 0.2552, "step": 15224 }, { "epoch": 0.7, "grad_norm": 0.4612892019653575, "learning_rate": 4.3762868901627265e-06, "loss": 0.2807, "step": 15225 }, { "epoch": 0.7, "grad_norm": 0.2588200211905363, "learning_rate": 4.375056614595035e-06, "loss": 0.2088, "step": 15226 }, { "epoch": 0.7, "grad_norm": 0.43369271773208273, "learning_rate": 4.373826463558173e-06, "loss": 0.2585, "step": 15227 }, { "epoch": 0.7, "grad_norm": 0.5453599704955445, "learning_rate": 4.372596437079362e-06, "loss": 0.3387, "step": 15228 }, { "epoch": 0.7, "grad_norm": 0.4316132246014325, "learning_rate": 4.3713665351858505e-06, "loss": 0.205, "step": 15229 }, { "epoch": 0.7, "grad_norm": 0.4251608638368021, "learning_rate": 4.370136757904858e-06, "loss": 0.2816, "step": 15230 }, { "epoch": 0.7, "grad_norm": 0.3331584160039521, "learning_rate": 4.368907105263608e-06, "loss": 0.2923, "step": 15231 }, { "epoch": 0.7, "grad_norm": 0.4084735270283578, "learning_rate": 4.367677577289331e-06, "loss": 0.1432, "step": 15232 }, { "epoch": 0.7, "grad_norm": 0.434937037567238, "learning_rate": 4.366448174009237e-06, "loss": 0.2471, "step": 15233 }, { "epoch": 0.7, "grad_norm": 0.29841579164175874, "learning_rate": 4.365218895450558e-06, "loss": 0.2759, "step": 15234 }, { "epoch": 0.7, "grad_norm": 0.9832731348807893, "learning_rate": 4.363989741640498e-06, "loss": 0.2984, "step": 15235 }, { "epoch": 0.7, "grad_norm": 0.46552729126392167, "learning_rate": 4.362760712606278e-06, "loss": 0.2725, "step": 15236 }, { "epoch": 0.7, "grad_norm": 0.5754906200760573, "learning_rate": 4.3615318083750965e-06, "loss": 0.3869, "step": 15237 }, { "epoch": 0.7, "grad_norm": 0.2890323118812972, "learning_rate": 4.3603030289741675e-06, "loss": 0.2278, "step": 15238 }, { "epoch": 0.7, "grad_norm": 0.2867669971959479, "learning_rate": 4.359074374430698e-06, "loss": 0.1718, "step": 15239 }, { "epoch": 0.7, "grad_norm": 0.45979811478350824, "learning_rate": 4.357845844771881e-06, "loss": 0.3222, "step": 15240 }, { "epoch": 0.7, "grad_norm": 0.9335988604252408, "learning_rate": 4.356617440024919e-06, "loss": 0.4251, "step": 15241 }, { "epoch": 0.7, "grad_norm": 0.2794366968806048, "learning_rate": 4.355389160217012e-06, "loss": 0.2131, "step": 15242 }, { "epoch": 0.7, "grad_norm": 0.5344644367526443, "learning_rate": 4.354161005375344e-06, "loss": 0.4025, "step": 15243 }, { "epoch": 0.7, "grad_norm": 0.30748066704275, "learning_rate": 4.352932975527113e-06, "loss": 0.2049, "step": 15244 }, { "epoch": 0.7, "grad_norm": 0.39625816507850115, "learning_rate": 4.351705070699498e-06, "loss": 0.2202, "step": 15245 }, { "epoch": 0.7, "grad_norm": 0.38004122383950417, "learning_rate": 4.3504772909196945e-06, "loss": 0.3019, "step": 15246 }, { "epoch": 0.7, "grad_norm": 0.5385787284656914, "learning_rate": 4.3492496362148786e-06, "loss": 0.3345, "step": 15247 }, { "epoch": 0.7, "grad_norm": 0.8627607386966181, "learning_rate": 4.348022106612226e-06, "loss": 0.4528, "step": 15248 }, { "epoch": 0.7, "grad_norm": 0.3384202699824737, "learning_rate": 4.346794702138921e-06, "loss": 0.2327, "step": 15249 }, { "epoch": 0.7, "grad_norm": 0.28862575366069737, "learning_rate": 4.345567422822124e-06, "loss": 0.2534, "step": 15250 }, { "epoch": 0.7, "grad_norm": 0.5965613187556168, "learning_rate": 4.344340268689023e-06, "loss": 0.2927, "step": 15251 }, { "epoch": 0.7, "grad_norm": 0.31121967977888754, "learning_rate": 4.343113239766774e-06, "loss": 0.2083, "step": 15252 }, { "epoch": 0.7, "grad_norm": 1.540221731632707, "learning_rate": 4.34188633608255e-06, "loss": 0.6718, "step": 15253 }, { "epoch": 0.7, "grad_norm": 0.3067809960612206, "learning_rate": 4.3406595576635024e-06, "loss": 0.2528, "step": 15254 }, { "epoch": 0.7, "grad_norm": 0.3898160306616826, "learning_rate": 4.3394329045368e-06, "loss": 0.2737, "step": 15255 }, { "epoch": 0.7, "grad_norm": 0.7241511996085377, "learning_rate": 4.3382063767296e-06, "loss": 0.4212, "step": 15256 }, { "epoch": 0.7, "grad_norm": 0.5570099951119658, "learning_rate": 4.336979974269051e-06, "loss": 0.3305, "step": 15257 }, { "epoch": 0.7, "grad_norm": 0.24273102202897184, "learning_rate": 4.335753697182308e-06, "loss": 0.1966, "step": 15258 }, { "epoch": 0.7, "grad_norm": 0.3712279567200119, "learning_rate": 4.334527545496521e-06, "loss": 0.2603, "step": 15259 }, { "epoch": 0.7, "grad_norm": 0.6284518683071875, "learning_rate": 4.333301519238831e-06, "loss": 0.3791, "step": 15260 }, { "epoch": 0.7, "grad_norm": 0.4077154996296781, "learning_rate": 4.332075618436387e-06, "loss": 0.3386, "step": 15261 }, { "epoch": 0.7, "grad_norm": 0.3946061288676655, "learning_rate": 4.3308498431163186e-06, "loss": 0.2689, "step": 15262 }, { "epoch": 0.7, "grad_norm": 0.5036357230123738, "learning_rate": 4.329624193305778e-06, "loss": 0.2652, "step": 15263 }, { "epoch": 0.7, "grad_norm": 0.3655351573272514, "learning_rate": 4.328398669031889e-06, "loss": 0.2299, "step": 15264 }, { "epoch": 0.7, "grad_norm": 0.3474090068782338, "learning_rate": 4.327173270321791e-06, "loss": 0.1746, "step": 15265 }, { "epoch": 0.7, "grad_norm": 0.4313793028765869, "learning_rate": 4.3259479972026085e-06, "loss": 0.2686, "step": 15266 }, { "epoch": 0.7, "grad_norm": 0.3565267077601619, "learning_rate": 4.3247228497014615e-06, "loss": 0.2799, "step": 15267 }, { "epoch": 0.7, "grad_norm": 1.0055404634180432, "learning_rate": 4.323497827845489e-06, "loss": 0.315, "step": 15268 }, { "epoch": 0.7, "grad_norm": 0.8738501619775048, "learning_rate": 4.322272931661798e-06, "loss": 0.396, "step": 15269 }, { "epoch": 0.7, "grad_norm": 0.27585104631782204, "learning_rate": 4.321048161177518e-06, "loss": 0.2645, "step": 15270 }, { "epoch": 0.7, "grad_norm": 0.3066696254851739, "learning_rate": 4.319823516419753e-06, "loss": 0.0973, "step": 15271 }, { "epoch": 0.7, "grad_norm": 0.620490167366543, "learning_rate": 4.318598997415621e-06, "loss": 0.3449, "step": 15272 }, { "epoch": 0.7, "grad_norm": 0.42353675525848405, "learning_rate": 4.317374604192236e-06, "loss": 0.326, "step": 15273 }, { "epoch": 0.7, "grad_norm": 0.3852153855241123, "learning_rate": 4.316150336776696e-06, "loss": 0.3238, "step": 15274 }, { "epoch": 0.7, "grad_norm": 0.6077263572317136, "learning_rate": 4.314926195196109e-06, "loss": 0.2004, "step": 15275 }, { "epoch": 0.7, "grad_norm": 0.27750099715007726, "learning_rate": 4.31370217947758e-06, "loss": 0.2107, "step": 15276 }, { "epoch": 0.7, "grad_norm": 0.5058900067838109, "learning_rate": 4.312478289648202e-06, "loss": 0.2397, "step": 15277 }, { "epoch": 0.7, "grad_norm": 0.30815081350458856, "learning_rate": 4.311254525735075e-06, "loss": 0.226, "step": 15278 }, { "epoch": 0.7, "grad_norm": 0.4477222467726712, "learning_rate": 4.310030887765288e-06, "loss": 0.3157, "step": 15279 }, { "epoch": 0.7, "grad_norm": 0.7484973045031489, "learning_rate": 4.308807375765932e-06, "loss": 0.3812, "step": 15280 }, { "epoch": 0.7, "grad_norm": 0.4744347175962742, "learning_rate": 4.307583989764094e-06, "loss": 0.2174, "step": 15281 }, { "epoch": 0.7, "grad_norm": 0.31054964720380523, "learning_rate": 4.306360729786867e-06, "loss": 0.2752, "step": 15282 }, { "epoch": 0.7, "grad_norm": 0.24985228829383402, "learning_rate": 4.30513759586132e-06, "loss": 0.1202, "step": 15283 }, { "epoch": 0.7, "grad_norm": 0.6048465249555531, "learning_rate": 4.303914588014538e-06, "loss": 0.2514, "step": 15284 }, { "epoch": 0.7, "grad_norm": 0.36516203421122423, "learning_rate": 4.3026917062736015e-06, "loss": 0.2985, "step": 15285 }, { "epoch": 0.7, "grad_norm": 0.3485125409525159, "learning_rate": 4.301468950665575e-06, "loss": 0.3194, "step": 15286 }, { "epoch": 0.7, "grad_norm": 1.6112054817186665, "learning_rate": 4.300246321217538e-06, "loss": 0.7396, "step": 15287 }, { "epoch": 0.7, "grad_norm": 0.36182032890410737, "learning_rate": 4.29902381795655e-06, "loss": 0.2009, "step": 15288 }, { "epoch": 0.7, "grad_norm": 0.31005876782673997, "learning_rate": 4.29780144090968e-06, "loss": 0.1824, "step": 15289 }, { "epoch": 0.7, "grad_norm": 0.37821869657334084, "learning_rate": 4.296579190103993e-06, "loss": 0.3167, "step": 15290 }, { "epoch": 0.7, "grad_norm": 0.3202556017833389, "learning_rate": 4.295357065566543e-06, "loss": 0.2149, "step": 15291 }, { "epoch": 0.7, "grad_norm": 1.3071745847842602, "learning_rate": 4.294135067324389e-06, "loss": 0.7354, "step": 15292 }, { "epoch": 0.7, "grad_norm": 0.5090919673158785, "learning_rate": 4.292913195404587e-06, "loss": 0.325, "step": 15293 }, { "epoch": 0.7, "grad_norm": 0.28805711218767444, "learning_rate": 4.291691449834187e-06, "loss": 0.2055, "step": 15294 }, { "epoch": 0.7, "grad_norm": 0.31950104136296664, "learning_rate": 4.290469830640238e-06, "loss": 0.1844, "step": 15295 }, { "epoch": 0.7, "grad_norm": 0.6678118621798554, "learning_rate": 4.2892483378497806e-06, "loss": 0.3673, "step": 15296 }, { "epoch": 0.7, "grad_norm": 0.374113349985378, "learning_rate": 4.288026971489861e-06, "loss": 0.2311, "step": 15297 }, { "epoch": 0.7, "grad_norm": 0.37379184730111825, "learning_rate": 4.286805731587519e-06, "loss": 0.2901, "step": 15298 }, { "epoch": 0.7, "grad_norm": 0.9464913903225278, "learning_rate": 4.2855846181697945e-06, "loss": 0.4045, "step": 15299 }, { "epoch": 0.7, "grad_norm": 0.37999810361032943, "learning_rate": 4.284363631263716e-06, "loss": 0.2759, "step": 15300 }, { "epoch": 0.7, "grad_norm": 0.2649797518446157, "learning_rate": 4.283142770896318e-06, "loss": 0.1754, "step": 15301 }, { "epoch": 0.7, "grad_norm": 0.5086918035465757, "learning_rate": 4.281922037094632e-06, "loss": 0.3281, "step": 15302 }, { "epoch": 0.7, "grad_norm": 0.3510898679729995, "learning_rate": 4.2807014298856775e-06, "loss": 0.2862, "step": 15303 }, { "epoch": 0.7, "grad_norm": 1.1363080873174627, "learning_rate": 4.279480949296485e-06, "loss": 0.4166, "step": 15304 }, { "epoch": 0.7, "grad_norm": 0.39447667770067746, "learning_rate": 4.278260595354067e-06, "loss": 0.3177, "step": 15305 }, { "epoch": 0.7, "grad_norm": 0.34960040932559466, "learning_rate": 4.277040368085444e-06, "loss": 0.2787, "step": 15306 }, { "epoch": 0.7, "grad_norm": 0.3953989447219193, "learning_rate": 4.275820267517636e-06, "loss": 0.1433, "step": 15307 }, { "epoch": 0.7, "grad_norm": 0.5208892383127405, "learning_rate": 4.2746002936776465e-06, "loss": 0.297, "step": 15308 }, { "epoch": 0.7, "grad_norm": 0.3281915526683968, "learning_rate": 4.273380446592492e-06, "loss": 0.2552, "step": 15309 }, { "epoch": 0.7, "grad_norm": 0.39651922581499216, "learning_rate": 4.272160726289167e-06, "loss": 0.2585, "step": 15310 }, { "epoch": 0.7, "grad_norm": 1.242398122481821, "learning_rate": 4.270941132794691e-06, "loss": 0.4497, "step": 15311 }, { "epoch": 0.7, "grad_norm": 0.3531697437808292, "learning_rate": 4.269721666136053e-06, "loss": 0.2438, "step": 15312 }, { "epoch": 0.7, "grad_norm": 0.85249707842036, "learning_rate": 4.268502326340258e-06, "loss": 0.4163, "step": 15313 }, { "epoch": 0.7, "grad_norm": 0.3004810278797122, "learning_rate": 4.267283113434293e-06, "loss": 0.2111, "step": 15314 }, { "epoch": 0.7, "grad_norm": 0.4020644499421195, "learning_rate": 4.2660640274451545e-06, "loss": 0.2596, "step": 15315 }, { "epoch": 0.7, "grad_norm": 0.47869915659433593, "learning_rate": 4.264845068399837e-06, "loss": 0.3198, "step": 15316 }, { "epoch": 0.7, "grad_norm": 0.41671310147512824, "learning_rate": 4.2636262363253174e-06, "loss": 0.2298, "step": 15317 }, { "epoch": 0.7, "grad_norm": 0.42851345268305224, "learning_rate": 4.262407531248585e-06, "loss": 0.29, "step": 15318 }, { "epoch": 0.7, "grad_norm": 0.7433373801554091, "learning_rate": 4.261188953196622e-06, "loss": 0.3993, "step": 15319 }, { "epoch": 0.7, "grad_norm": 0.4824341391987971, "learning_rate": 4.259970502196402e-06, "loss": 0.1741, "step": 15320 }, { "epoch": 0.7, "grad_norm": 0.3402527677811564, "learning_rate": 4.258752178274906e-06, "loss": 0.2543, "step": 15321 }, { "epoch": 0.7, "grad_norm": 0.39359682227474996, "learning_rate": 4.257533981459097e-06, "loss": 0.3179, "step": 15322 }, { "epoch": 0.7, "grad_norm": 0.44511844007639406, "learning_rate": 4.256315911775957e-06, "loss": 0.0956, "step": 15323 }, { "epoch": 0.7, "grad_norm": 0.3528584713468951, "learning_rate": 4.255097969252448e-06, "loss": 0.2592, "step": 15324 }, { "epoch": 0.7, "grad_norm": 0.6230787044164713, "learning_rate": 4.253880153915527e-06, "loss": 0.3441, "step": 15325 }, { "epoch": 0.7, "grad_norm": 0.5722242935330832, "learning_rate": 4.252662465792167e-06, "loss": 0.3249, "step": 15326 }, { "epoch": 0.7, "grad_norm": 0.27571447859213316, "learning_rate": 4.2514449049093135e-06, "loss": 0.1762, "step": 15327 }, { "epoch": 0.7, "grad_norm": 1.2593432048010116, "learning_rate": 4.2502274712939355e-06, "loss": 0.7172, "step": 15328 }, { "epoch": 0.7, "grad_norm": 0.275134740221021, "learning_rate": 4.249010164972976e-06, "loss": 0.2248, "step": 15329 }, { "epoch": 0.7, "grad_norm": 0.333883935709833, "learning_rate": 4.247792985973392e-06, "loss": 0.2134, "step": 15330 }, { "epoch": 0.7, "grad_norm": 0.7890296897483274, "learning_rate": 4.246575934322124e-06, "loss": 0.3778, "step": 15331 }, { "epoch": 0.7, "grad_norm": 1.3482788625239726, "learning_rate": 4.24535901004612e-06, "loss": 0.7107, "step": 15332 }, { "epoch": 0.7, "grad_norm": 0.29484148588811754, "learning_rate": 4.244142213172325e-06, "loss": 0.1989, "step": 15333 }, { "epoch": 0.7, "grad_norm": 0.38927109355643214, "learning_rate": 4.2429255437276696e-06, "loss": 0.3424, "step": 15334 }, { "epoch": 0.7, "grad_norm": 0.40408135239629644, "learning_rate": 4.241709001739094e-06, "loss": 0.236, "step": 15335 }, { "epoch": 0.7, "grad_norm": 0.3215587394549872, "learning_rate": 4.240492587233534e-06, "loss": 0.1837, "step": 15336 }, { "epoch": 0.7, "grad_norm": 0.3868909270445274, "learning_rate": 4.239276300237916e-06, "loss": 0.282, "step": 15337 }, { "epoch": 0.7, "grad_norm": 1.401761542437, "learning_rate": 4.23806014077917e-06, "loss": 0.6074, "step": 15338 }, { "epoch": 0.7, "grad_norm": 0.39714586362890947, "learning_rate": 4.236844108884215e-06, "loss": 0.2626, "step": 15339 }, { "epoch": 0.7, "grad_norm": 0.22237809552599133, "learning_rate": 4.235628204579978e-06, "loss": 0.1532, "step": 15340 }, { "epoch": 0.7, "grad_norm": 0.4123042403345458, "learning_rate": 4.23441242789338e-06, "loss": 0.3367, "step": 15341 }, { "epoch": 0.7, "grad_norm": 0.4098401997782652, "learning_rate": 4.2331967788513295e-06, "loss": 0.2837, "step": 15342 }, { "epoch": 0.7, "grad_norm": 0.5357102564055954, "learning_rate": 4.231981257480749e-06, "loss": 0.2517, "step": 15343 }, { "epoch": 0.7, "grad_norm": 1.1471298984080842, "learning_rate": 4.230765863808537e-06, "loss": 0.5929, "step": 15344 }, { "epoch": 0.7, "grad_norm": 0.301588946012003, "learning_rate": 4.229550597861615e-06, "loss": 0.274, "step": 15345 }, { "epoch": 0.7, "grad_norm": 0.5204227914579626, "learning_rate": 4.2283354596668765e-06, "loss": 0.2712, "step": 15346 }, { "epoch": 0.71, "grad_norm": 0.5353373222632442, "learning_rate": 4.227120449251233e-06, "loss": 0.2951, "step": 15347 }, { "epoch": 0.71, "grad_norm": 0.3597781096970731, "learning_rate": 4.225905566641575e-06, "loss": 0.2743, "step": 15348 }, { "epoch": 0.71, "grad_norm": 0.27071760758827923, "learning_rate": 4.224690811864803e-06, "loss": 0.1835, "step": 15349 }, { "epoch": 0.71, "grad_norm": 1.438326998759821, "learning_rate": 4.223476184947813e-06, "loss": 0.5187, "step": 15350 }, { "epoch": 0.71, "grad_norm": 0.6135019919869543, "learning_rate": 4.222261685917489e-06, "loss": 0.311, "step": 15351 }, { "epoch": 0.71, "grad_norm": 0.4365307279227308, "learning_rate": 4.221047314800723e-06, "loss": 0.3268, "step": 15352 }, { "epoch": 0.71, "grad_norm": 0.4219390302659312, "learning_rate": 4.219833071624404e-06, "loss": 0.2778, "step": 15353 }, { "epoch": 0.71, "grad_norm": 0.6347707610036952, "learning_rate": 4.218618956415406e-06, "loss": 0.3517, "step": 15354 }, { "epoch": 0.71, "grad_norm": 0.24197152271275132, "learning_rate": 4.217404969200615e-06, "loss": 0.2025, "step": 15355 }, { "epoch": 0.71, "grad_norm": 1.4884478240152692, "learning_rate": 4.2161911100069005e-06, "loss": 0.3149, "step": 15356 }, { "epoch": 0.71, "grad_norm": 0.30795386481110154, "learning_rate": 4.214977378861141e-06, "loss": 0.2648, "step": 15357 }, { "epoch": 0.71, "grad_norm": 0.39820627860602775, "learning_rate": 4.213763775790207e-06, "loss": 0.3269, "step": 15358 }, { "epoch": 0.71, "grad_norm": 0.7240613409280784, "learning_rate": 4.21255030082097e-06, "loss": 0.2814, "step": 15359 }, { "epoch": 0.71, "grad_norm": 0.3081369324763965, "learning_rate": 4.21133695398029e-06, "loss": 0.207, "step": 15360 }, { "epoch": 0.71, "grad_norm": 0.24893359969260237, "learning_rate": 4.210123735295025e-06, "loss": 0.2571, "step": 15361 }, { "epoch": 0.71, "grad_norm": 1.5268217748586561, "learning_rate": 4.208910644792047e-06, "loss": 0.661, "step": 15362 }, { "epoch": 0.71, "grad_norm": 0.33942435288790757, "learning_rate": 4.207697682498202e-06, "loss": 0.2024, "step": 15363 }, { "epoch": 0.71, "grad_norm": 0.587004253994649, "learning_rate": 4.206484848440351e-06, "loss": 0.3671, "step": 15364 }, { "epoch": 0.71, "grad_norm": 0.46543224748117207, "learning_rate": 4.205272142645338e-06, "loss": 0.3079, "step": 15365 }, { "epoch": 0.71, "grad_norm": 0.44760822139769224, "learning_rate": 4.204059565140015e-06, "loss": 0.1927, "step": 15366 }, { "epoch": 0.71, "grad_norm": 0.25538643422192153, "learning_rate": 4.20284711595123e-06, "loss": 0.1812, "step": 15367 }, { "epoch": 0.71, "grad_norm": 1.1585266365412363, "learning_rate": 4.201634795105819e-06, "loss": 0.5725, "step": 15368 }, { "epoch": 0.71, "grad_norm": 0.2781257863193359, "learning_rate": 4.20042260263063e-06, "loss": 0.2028, "step": 15369 }, { "epoch": 0.71, "grad_norm": 0.3864657368699799, "learning_rate": 4.199210538552489e-06, "loss": 0.3064, "step": 15370 }, { "epoch": 0.71, "grad_norm": 0.8209636737292687, "learning_rate": 4.197998602898238e-06, "loss": 0.4517, "step": 15371 }, { "epoch": 0.71, "grad_norm": 0.17154239896339205, "learning_rate": 4.196786795694708e-06, "loss": 0.0639, "step": 15372 }, { "epoch": 0.71, "grad_norm": 0.24529799628577972, "learning_rate": 4.195575116968722e-06, "loss": 0.2418, "step": 15373 }, { "epoch": 0.71, "grad_norm": 1.3180280157563797, "learning_rate": 4.1943635667471095e-06, "loss": 0.5779, "step": 15374 }, { "epoch": 0.71, "grad_norm": 0.6785745788672353, "learning_rate": 4.1931521450566905e-06, "loss": 0.3442, "step": 15375 }, { "epoch": 0.71, "grad_norm": 0.4211542838609887, "learning_rate": 4.191940851924291e-06, "loss": 0.2396, "step": 15376 }, { "epoch": 0.71, "grad_norm": 0.40857305891502943, "learning_rate": 4.190729687376719e-06, "loss": 0.3136, "step": 15377 }, { "epoch": 0.71, "grad_norm": 0.6590719459801646, "learning_rate": 4.189518651440793e-06, "loss": 0.2766, "step": 15378 }, { "epoch": 0.71, "grad_norm": 0.2509263728055345, "learning_rate": 4.188307744143328e-06, "loss": 0.1544, "step": 15379 }, { "epoch": 0.71, "grad_norm": 1.2898751786942517, "learning_rate": 4.1870969655111226e-06, "loss": 0.6735, "step": 15380 }, { "epoch": 0.71, "grad_norm": 0.34253351753028477, "learning_rate": 4.185886315570993e-06, "loss": 0.2432, "step": 15381 }, { "epoch": 0.71, "grad_norm": 0.606438819113309, "learning_rate": 4.184675794349733e-06, "loss": 0.251, "step": 15382 }, { "epoch": 0.71, "grad_norm": 0.9874635922233967, "learning_rate": 4.1834654018741465e-06, "loss": 0.4552, "step": 15383 }, { "epoch": 0.71, "grad_norm": 0.5172631443327038, "learning_rate": 4.182255138171032e-06, "loss": 0.2206, "step": 15384 }, { "epoch": 0.71, "grad_norm": 0.2532865362489919, "learning_rate": 4.181045003267179e-06, "loss": 0.2291, "step": 15385 }, { "epoch": 0.71, "grad_norm": 0.3095413238829962, "learning_rate": 4.179834997189385e-06, "loss": 0.2067, "step": 15386 }, { "epoch": 0.71, "grad_norm": 0.6613421214412327, "learning_rate": 4.178625119964427e-06, "loss": 0.3376, "step": 15387 }, { "epoch": 0.71, "grad_norm": 0.3784133215651523, "learning_rate": 4.177415371619105e-06, "loss": 0.2773, "step": 15388 }, { "epoch": 0.71, "grad_norm": 0.39666457897435925, "learning_rate": 4.176205752180195e-06, "loss": 0.247, "step": 15389 }, { "epoch": 0.71, "grad_norm": 0.9223204238789361, "learning_rate": 4.174996261674473e-06, "loss": 0.4359, "step": 15390 }, { "epoch": 0.71, "grad_norm": 0.30045969276387835, "learning_rate": 4.17378690012872e-06, "loss": 0.2161, "step": 15391 }, { "epoch": 0.71, "grad_norm": 0.3236437430833355, "learning_rate": 4.172577667569709e-06, "loss": 0.2095, "step": 15392 }, { "epoch": 0.71, "grad_norm": 0.44752825766252413, "learning_rate": 4.171368564024216e-06, "loss": 0.3164, "step": 15393 }, { "epoch": 0.71, "grad_norm": 0.3802611335532879, "learning_rate": 4.170159589519002e-06, "loss": 0.272, "step": 15394 }, { "epoch": 0.71, "grad_norm": 0.871832960552936, "learning_rate": 4.168950744080835e-06, "loss": 0.3656, "step": 15395 }, { "epoch": 0.71, "grad_norm": 0.4664369864390812, "learning_rate": 4.1677420277364825e-06, "loss": 0.2829, "step": 15396 }, { "epoch": 0.71, "grad_norm": 0.3139361129527563, "learning_rate": 4.166533440512696e-06, "loss": 0.2915, "step": 15397 }, { "epoch": 0.71, "grad_norm": 0.3306167116052387, "learning_rate": 4.165324982436242e-06, "loss": 0.1479, "step": 15398 }, { "epoch": 0.71, "grad_norm": 0.6097009457182574, "learning_rate": 4.164116653533864e-06, "loss": 0.3207, "step": 15399 }, { "epoch": 0.71, "grad_norm": 0.41968822457352534, "learning_rate": 4.16290845383232e-06, "loss": 0.3001, "step": 15400 }, { "epoch": 0.71, "grad_norm": 0.3993425681926469, "learning_rate": 4.161700383358359e-06, "loss": 0.3501, "step": 15401 }, { "epoch": 0.71, "grad_norm": 0.2394965038905432, "learning_rate": 4.160492442138722e-06, "loss": 0.0629, "step": 15402 }, { "epoch": 0.71, "grad_norm": 0.44358453520715546, "learning_rate": 4.159284630200158e-06, "loss": 0.2908, "step": 15403 }, { "epoch": 0.71, "grad_norm": 0.37662599930425195, "learning_rate": 4.1580769475693946e-06, "loss": 0.2618, "step": 15404 }, { "epoch": 0.71, "grad_norm": 0.2931631349319821, "learning_rate": 4.156869394273186e-06, "loss": 0.1786, "step": 15405 }, { "epoch": 0.71, "grad_norm": 0.4016340172324641, "learning_rate": 4.1556619703382564e-06, "loss": 0.3059, "step": 15406 }, { "epoch": 0.71, "grad_norm": 1.43793740258636, "learning_rate": 4.154454675791333e-06, "loss": 0.632, "step": 15407 }, { "epoch": 0.71, "grad_norm": 0.35903608230692824, "learning_rate": 4.153247510659151e-06, "loss": 0.2356, "step": 15408 }, { "epoch": 0.71, "grad_norm": 0.3487241411859696, "learning_rate": 4.1520404749684325e-06, "loss": 0.2438, "step": 15409 }, { "epoch": 0.71, "grad_norm": 0.6142249218718109, "learning_rate": 4.1508335687459065e-06, "loss": 0.3685, "step": 15410 }, { "epoch": 0.71, "grad_norm": 0.24225801761870494, "learning_rate": 4.149626792018283e-06, "loss": 0.1132, "step": 15411 }, { "epoch": 0.71, "grad_norm": 0.391653656709187, "learning_rate": 4.1484201448122845e-06, "loss": 0.308, "step": 15412 }, { "epoch": 0.71, "grad_norm": 0.2899521043333745, "learning_rate": 4.147213627154627e-06, "loss": 0.2276, "step": 15413 }, { "epoch": 0.71, "grad_norm": 1.2284209751430595, "learning_rate": 4.146007239072016e-06, "loss": 0.5107, "step": 15414 }, { "epoch": 0.71, "grad_norm": 0.3620178294227037, "learning_rate": 4.144800980591166e-06, "loss": 0.2105, "step": 15415 }, { "epoch": 0.71, "grad_norm": 0.4992675960901863, "learning_rate": 4.143594851738775e-06, "loss": 0.3574, "step": 15416 }, { "epoch": 0.71, "grad_norm": 0.4902339546530649, "learning_rate": 4.14238885254155e-06, "loss": 0.3668, "step": 15417 }, { "epoch": 0.71, "grad_norm": 0.33032145933374635, "learning_rate": 4.141182983026194e-06, "loss": 0.1801, "step": 15418 }, { "epoch": 0.71, "grad_norm": 0.25376341153793225, "learning_rate": 4.139977243219395e-06, "loss": 0.1738, "step": 15419 }, { "epoch": 0.71, "grad_norm": 0.3655073687889778, "learning_rate": 4.138771633147856e-06, "loss": 0.2622, "step": 15420 }, { "epoch": 0.71, "grad_norm": 0.35894312552821245, "learning_rate": 4.1375661528382586e-06, "loss": 0.2221, "step": 15421 }, { "epoch": 0.71, "grad_norm": 0.650551733587448, "learning_rate": 4.136360802317301e-06, "loss": 0.3856, "step": 15422 }, { "epoch": 0.71, "grad_norm": 0.7632758325395279, "learning_rate": 4.1351555816116615e-06, "loss": 0.4784, "step": 15423 }, { "epoch": 0.71, "grad_norm": 0.3443741227322228, "learning_rate": 4.133950490748028e-06, "loss": 0.2125, "step": 15424 }, { "epoch": 0.71, "grad_norm": 0.2683293507697789, "learning_rate": 4.132745529753073e-06, "loss": 0.2085, "step": 15425 }, { "epoch": 0.71, "grad_norm": 0.8176282824944413, "learning_rate": 4.131540698653478e-06, "loss": 0.4114, "step": 15426 }, { "epoch": 0.71, "grad_norm": 0.3227500736794906, "learning_rate": 4.130335997475918e-06, "loss": 0.2658, "step": 15427 }, { "epoch": 0.71, "grad_norm": 0.3695082738187614, "learning_rate": 4.1291314262470595e-06, "loss": 0.2474, "step": 15428 }, { "epoch": 0.71, "grad_norm": 1.3294382987845506, "learning_rate": 4.127926984993575e-06, "loss": 0.6307, "step": 15429 }, { "epoch": 0.71, "grad_norm": 0.35900728279375327, "learning_rate": 4.126722673742125e-06, "loss": 0.2596, "step": 15430 }, { "epoch": 0.71, "grad_norm": 0.2600538022535034, "learning_rate": 4.125518492519375e-06, "loss": 0.1061, "step": 15431 }, { "epoch": 0.71, "grad_norm": 0.41285078363524563, "learning_rate": 4.124314441351985e-06, "loss": 0.3172, "step": 15432 }, { "epoch": 0.71, "grad_norm": 0.3497006973371404, "learning_rate": 4.123110520266609e-06, "loss": 0.2662, "step": 15433 }, { "epoch": 0.71, "grad_norm": 0.854000184449874, "learning_rate": 4.121906729289901e-06, "loss": 0.2759, "step": 15434 }, { "epoch": 0.71, "grad_norm": 1.0324275323190373, "learning_rate": 4.120703068448515e-06, "loss": 0.7029, "step": 15435 }, { "epoch": 0.71, "grad_norm": 0.3837020625415799, "learning_rate": 4.119499537769094e-06, "loss": 0.2517, "step": 15436 }, { "epoch": 0.71, "grad_norm": 0.21799785704754804, "learning_rate": 4.118296137278289e-06, "loss": 0.1689, "step": 15437 }, { "epoch": 0.71, "grad_norm": 0.6904432865575739, "learning_rate": 4.117092867002731e-06, "loss": 0.3665, "step": 15438 }, { "epoch": 0.71, "grad_norm": 0.4418088612818454, "learning_rate": 4.115889726969075e-06, "loss": 0.2674, "step": 15439 }, { "epoch": 0.71, "grad_norm": 0.4133079117370439, "learning_rate": 4.114686717203945e-06, "loss": 0.3097, "step": 15440 }, { "epoch": 0.71, "grad_norm": 0.47971156359274614, "learning_rate": 4.113483837733982e-06, "loss": 0.2939, "step": 15441 }, { "epoch": 0.71, "grad_norm": 0.3467174072778784, "learning_rate": 4.112281088585811e-06, "loss": 0.2453, "step": 15442 }, { "epoch": 0.71, "grad_norm": 0.7640903404353571, "learning_rate": 4.111078469786062e-06, "loss": 0.4084, "step": 15443 }, { "epoch": 0.71, "grad_norm": 0.4200625396089796, "learning_rate": 4.109875981361363e-06, "loss": 0.263, "step": 15444 }, { "epoch": 0.71, "grad_norm": 0.27572537125406327, "learning_rate": 4.1086736233383285e-06, "loss": 0.2005, "step": 15445 }, { "epoch": 0.71, "grad_norm": 0.557137537019506, "learning_rate": 4.107471395743586e-06, "loss": 0.2743, "step": 15446 }, { "epoch": 0.71, "grad_norm": 1.02162319936494, "learning_rate": 4.106269298603744e-06, "loss": 0.5312, "step": 15447 }, { "epoch": 0.71, "grad_norm": 0.2934550001426496, "learning_rate": 4.105067331945419e-06, "loss": 0.2416, "step": 15448 }, { "epoch": 0.71, "grad_norm": 0.5467428592087814, "learning_rate": 4.103865495795225e-06, "loss": 0.3415, "step": 15449 }, { "epoch": 0.71, "grad_norm": 0.5080358849625232, "learning_rate": 4.102663790179764e-06, "loss": 0.2317, "step": 15450 }, { "epoch": 0.71, "grad_norm": 0.26077960174848236, "learning_rate": 4.1014622151256415e-06, "loss": 0.1878, "step": 15451 }, { "epoch": 0.71, "grad_norm": 0.5545864511694208, "learning_rate": 4.100260770659461e-06, "loss": 0.3488, "step": 15452 }, { "epoch": 0.71, "grad_norm": 0.5275921970827687, "learning_rate": 4.0990594568078235e-06, "loss": 0.3706, "step": 15453 }, { "epoch": 0.71, "grad_norm": 0.3254175683471316, "learning_rate": 4.0978582735973225e-06, "loss": 0.1988, "step": 15454 }, { "epoch": 0.71, "grad_norm": 0.8330488177349571, "learning_rate": 4.0966572210545445e-06, "loss": 0.4093, "step": 15455 }, { "epoch": 0.71, "grad_norm": 0.42780915546688275, "learning_rate": 4.095456299206092e-06, "loss": 0.2922, "step": 15456 }, { "epoch": 0.71, "grad_norm": 0.2642948363764565, "learning_rate": 4.094255508078544e-06, "loss": 0.1304, "step": 15457 }, { "epoch": 0.71, "grad_norm": 0.38788023576313707, "learning_rate": 4.093054847698489e-06, "loss": 0.2502, "step": 15458 }, { "epoch": 0.71, "grad_norm": 0.5080623078672286, "learning_rate": 4.091854318092504e-06, "loss": 0.3538, "step": 15459 }, { "epoch": 0.71, "grad_norm": 0.31306630256084184, "learning_rate": 4.0906539192871695e-06, "loss": 0.1995, "step": 15460 }, { "epoch": 0.71, "grad_norm": 0.6696240704661317, "learning_rate": 4.0894536513090655e-06, "loss": 0.3289, "step": 15461 }, { "epoch": 0.71, "grad_norm": 1.3234777470300474, "learning_rate": 4.0882535141847566e-06, "loss": 0.5747, "step": 15462 }, { "epoch": 0.71, "grad_norm": 0.2432404743648824, "learning_rate": 4.087053507940823e-06, "loss": 0.1579, "step": 15463 }, { "epoch": 0.71, "grad_norm": 0.3150487963413936, "learning_rate": 4.08585363260382e-06, "loss": 0.2539, "step": 15464 }, { "epoch": 0.71, "grad_norm": 1.3590932860804303, "learning_rate": 4.084653888200319e-06, "loss": 0.7879, "step": 15465 }, { "epoch": 0.71, "grad_norm": 0.42716404632076094, "learning_rate": 4.083454274756881e-06, "loss": 0.2983, "step": 15466 }, { "epoch": 0.71, "grad_norm": 0.5275065167875603, "learning_rate": 4.082254792300061e-06, "loss": 0.2613, "step": 15467 }, { "epoch": 0.71, "grad_norm": 0.3870188429135705, "learning_rate": 4.0810554408564154e-06, "loss": 0.3063, "step": 15468 }, { "epoch": 0.71, "grad_norm": 0.29796304654025724, "learning_rate": 4.079856220452498e-06, "loss": 0.2143, "step": 15469 }, { "epoch": 0.71, "grad_norm": 0.26241400572761425, "learning_rate": 4.078657131114861e-06, "loss": 0.1331, "step": 15470 }, { "epoch": 0.71, "grad_norm": 0.4821450322626492, "learning_rate": 4.07745817287005e-06, "loss": 0.3647, "step": 15471 }, { "epoch": 0.71, "grad_norm": 0.33132628009783577, "learning_rate": 4.0762593457445975e-06, "loss": 0.2662, "step": 15472 }, { "epoch": 0.71, "grad_norm": 0.6355957250246723, "learning_rate": 4.075060649765062e-06, "loss": 0.2624, "step": 15473 }, { "epoch": 0.71, "grad_norm": 1.359888500724607, "learning_rate": 4.07386208495797e-06, "loss": 0.526, "step": 15474 }, { "epoch": 0.71, "grad_norm": 0.3749773361850086, "learning_rate": 4.072663651349862e-06, "loss": 0.2257, "step": 15475 }, { "epoch": 0.71, "grad_norm": 0.23467209216856855, "learning_rate": 4.071465348967265e-06, "loss": 0.2058, "step": 15476 }, { "epoch": 0.71, "grad_norm": 0.644663419329526, "learning_rate": 4.070267177836712e-06, "loss": 0.3821, "step": 15477 }, { "epoch": 0.71, "grad_norm": 0.5885374550508444, "learning_rate": 4.069069137984732e-06, "loss": 0.304, "step": 15478 }, { "epoch": 0.71, "grad_norm": 0.4216145601135524, "learning_rate": 4.06787122943784e-06, "loss": 0.3328, "step": 15479 }, { "epoch": 0.71, "grad_norm": 0.375932711090284, "learning_rate": 4.066673452222566e-06, "loss": 0.2428, "step": 15480 }, { "epoch": 0.71, "grad_norm": 0.6334867103430766, "learning_rate": 4.065475806365415e-06, "loss": 0.321, "step": 15481 }, { "epoch": 0.71, "grad_norm": 0.2270104480960671, "learning_rate": 4.064278291892918e-06, "loss": 0.1974, "step": 15482 }, { "epoch": 0.71, "grad_norm": 0.5895690351772647, "learning_rate": 4.063080908831578e-06, "loss": 0.282, "step": 15483 }, { "epoch": 0.71, "grad_norm": 0.3362356554604297, "learning_rate": 4.061883657207902e-06, "loss": 0.2687, "step": 15484 }, { "epoch": 0.71, "grad_norm": 0.5616330307499463, "learning_rate": 4.060686537048398e-06, "loss": 0.3591, "step": 15485 }, { "epoch": 0.71, "grad_norm": 1.2092849394172236, "learning_rate": 4.05948954837957e-06, "loss": 0.2435, "step": 15486 }, { "epoch": 0.71, "grad_norm": 0.280041169211778, "learning_rate": 4.058292691227922e-06, "loss": 0.197, "step": 15487 }, { "epoch": 0.71, "grad_norm": 0.32558278272001023, "learning_rate": 4.057095965619943e-06, "loss": 0.2887, "step": 15488 }, { "epoch": 0.71, "grad_norm": 0.5709600545498836, "learning_rate": 4.0558993715821335e-06, "loss": 0.2716, "step": 15489 }, { "epoch": 0.71, "grad_norm": 0.40553544049211937, "learning_rate": 4.054702909140982e-06, "loss": 0.2216, "step": 15490 }, { "epoch": 0.71, "grad_norm": 0.5976325876193552, "learning_rate": 4.053506578322976e-06, "loss": 0.3674, "step": 15491 }, { "epoch": 0.71, "grad_norm": 0.3837044891064192, "learning_rate": 4.052310379154607e-06, "loss": 0.28, "step": 15492 }, { "epoch": 0.71, "grad_norm": 0.8479501477857144, "learning_rate": 4.051114311662351e-06, "loss": 0.1418, "step": 15493 }, { "epoch": 0.71, "grad_norm": 0.32227656789213904, "learning_rate": 4.04991837587269e-06, "loss": 0.2179, "step": 15494 }, { "epoch": 0.71, "grad_norm": 0.5199745669126264, "learning_rate": 4.048722571812105e-06, "loss": 0.3449, "step": 15495 }, { "epoch": 0.71, "grad_norm": 0.39538358329729445, "learning_rate": 4.047526899507063e-06, "loss": 0.2135, "step": 15496 }, { "epoch": 0.71, "grad_norm": 0.2966929622529681, "learning_rate": 4.0463313589840415e-06, "loss": 0.2312, "step": 15497 }, { "epoch": 0.71, "grad_norm": 1.3594233955769435, "learning_rate": 4.0451359502694986e-06, "loss": 0.5296, "step": 15498 }, { "epoch": 0.71, "grad_norm": 0.42253517739176183, "learning_rate": 4.043940673389913e-06, "loss": 0.1968, "step": 15499 }, { "epoch": 0.71, "grad_norm": 0.2702509296339678, "learning_rate": 4.042745528371741e-06, "loss": 0.2456, "step": 15500 }, { "epoch": 0.71, "grad_norm": 0.6860244995469101, "learning_rate": 4.041550515241438e-06, "loss": 0.3879, "step": 15501 }, { "epoch": 0.71, "grad_norm": 0.8255041134032126, "learning_rate": 4.040355634025463e-06, "loss": 0.5485, "step": 15502 }, { "epoch": 0.71, "grad_norm": 0.2775937366121965, "learning_rate": 4.039160884750271e-06, "loss": 0.1719, "step": 15503 }, { "epoch": 0.71, "grad_norm": 0.37843353911066774, "learning_rate": 4.0379662674423145e-06, "loss": 0.2971, "step": 15504 }, { "epoch": 0.71, "grad_norm": 0.656048816343922, "learning_rate": 4.036771782128036e-06, "loss": 0.3371, "step": 15505 }, { "epoch": 0.71, "grad_norm": 0.4527974437626401, "learning_rate": 4.035577428833886e-06, "loss": 0.253, "step": 15506 }, { "epoch": 0.71, "grad_norm": 0.5442523370270513, "learning_rate": 4.034383207586299e-06, "loss": 0.3453, "step": 15507 }, { "epoch": 0.71, "grad_norm": 0.38580761851004725, "learning_rate": 4.033189118411719e-06, "loss": 0.273, "step": 15508 }, { "epoch": 0.71, "grad_norm": 0.2630748879034027, "learning_rate": 4.031995161336584e-06, "loss": 0.1789, "step": 15509 }, { "epoch": 0.71, "grad_norm": 0.6297927967147808, "learning_rate": 4.030801336387321e-06, "loss": 0.2493, "step": 15510 }, { "epoch": 0.71, "grad_norm": 0.36574754655553965, "learning_rate": 4.029607643590363e-06, "loss": 0.2982, "step": 15511 }, { "epoch": 0.71, "grad_norm": 0.37282857909359957, "learning_rate": 4.028414082972141e-06, "loss": 0.2222, "step": 15512 }, { "epoch": 0.71, "grad_norm": 0.8702055671141345, "learning_rate": 4.027220654559072e-06, "loss": 0.5004, "step": 15513 }, { "epoch": 0.71, "grad_norm": 1.2799421300732114, "learning_rate": 4.026027358377584e-06, "loss": 0.6382, "step": 15514 }, { "epoch": 0.71, "grad_norm": 0.2674578567095945, "learning_rate": 4.024834194454086e-06, "loss": 0.218, "step": 15515 }, { "epoch": 0.71, "grad_norm": 0.2564183407134996, "learning_rate": 4.023641162815007e-06, "loss": 0.1997, "step": 15516 }, { "epoch": 0.71, "grad_norm": 0.6201332092579396, "learning_rate": 4.02244826348675e-06, "loss": 0.4014, "step": 15517 }, { "epoch": 0.71, "grad_norm": 0.3882574468204005, "learning_rate": 4.02125549649573e-06, "loss": 0.2737, "step": 15518 }, { "epoch": 0.71, "grad_norm": 0.5117669534890628, "learning_rate": 4.02006286186835e-06, "loss": 0.2448, "step": 15519 }, { "epoch": 0.71, "grad_norm": 0.42503277751157154, "learning_rate": 4.0188703596310085e-06, "loss": 0.3146, "step": 15520 }, { "epoch": 0.71, "grad_norm": 0.3056705572460837, "learning_rate": 4.01767798981012e-06, "loss": 0.2222, "step": 15521 }, { "epoch": 0.71, "grad_norm": 0.4228458858952225, "learning_rate": 4.016485752432071e-06, "loss": 0.1496, "step": 15522 }, { "epoch": 0.71, "grad_norm": 0.3307591699716963, "learning_rate": 4.015293647523264e-06, "loss": 0.3087, "step": 15523 }, { "epoch": 0.71, "grad_norm": 0.3916166873677584, "learning_rate": 4.0141016751100834e-06, "loss": 0.2905, "step": 15524 }, { "epoch": 0.71, "grad_norm": 1.010457837401049, "learning_rate": 4.012909835218924e-06, "loss": 0.3168, "step": 15525 }, { "epoch": 0.71, "grad_norm": 0.6782563653088336, "learning_rate": 4.011718127876173e-06, "loss": 0.4637, "step": 15526 }, { "epoch": 0.71, "grad_norm": 0.43052354792375336, "learning_rate": 4.010526553108207e-06, "loss": 0.2835, "step": 15527 }, { "epoch": 0.71, "grad_norm": 0.2511407380468412, "learning_rate": 4.0093351109414115e-06, "loss": 0.2085, "step": 15528 }, { "epoch": 0.71, "grad_norm": 0.7155107171680686, "learning_rate": 4.008143801402166e-06, "loss": 0.3062, "step": 15529 }, { "epoch": 0.71, "grad_norm": 0.3480921423162831, "learning_rate": 4.0069526245168375e-06, "loss": 0.2434, "step": 15530 }, { "epoch": 0.71, "grad_norm": 0.4010881115565821, "learning_rate": 4.005761580311805e-06, "loss": 0.3214, "step": 15531 }, { "epoch": 0.71, "grad_norm": 0.970331530175825, "learning_rate": 4.004570668813427e-06, "loss": 0.5249, "step": 15532 }, { "epoch": 0.71, "grad_norm": 0.3529049650920527, "learning_rate": 4.0033798900480845e-06, "loss": 0.2742, "step": 15533 }, { "epoch": 0.71, "grad_norm": 0.33612861577292935, "learning_rate": 4.002189244042126e-06, "loss": 0.2142, "step": 15534 }, { "epoch": 0.71, "grad_norm": 0.37984712109441304, "learning_rate": 4.000998730821922e-06, "loss": 0.259, "step": 15535 }, { "epoch": 0.71, "grad_norm": 0.35571527283470394, "learning_rate": 3.999808350413823e-06, "loss": 0.2463, "step": 15536 }, { "epoch": 0.71, "grad_norm": 1.5241663590526002, "learning_rate": 3.998618102844178e-06, "loss": 0.7123, "step": 15537 }, { "epoch": 0.71, "grad_norm": 0.5970884686858505, "learning_rate": 3.997427988139351e-06, "loss": 0.2873, "step": 15538 }, { "epoch": 0.71, "grad_norm": 0.2660817022507961, "learning_rate": 3.996238006325679e-06, "loss": 0.2633, "step": 15539 }, { "epoch": 0.71, "grad_norm": 1.042325415826318, "learning_rate": 3.995048157429514e-06, "loss": 0.3831, "step": 15540 }, { "epoch": 0.71, "grad_norm": 0.3491319425514608, "learning_rate": 3.993858441477193e-06, "loss": 0.2155, "step": 15541 }, { "epoch": 0.71, "grad_norm": 0.3055290460243411, "learning_rate": 3.992668858495055e-06, "loss": 0.1898, "step": 15542 }, { "epoch": 0.71, "grad_norm": 0.38152533496642127, "learning_rate": 3.991479408509444e-06, "loss": 0.2871, "step": 15543 }, { "epoch": 0.71, "grad_norm": 1.2423997802425857, "learning_rate": 3.990290091546685e-06, "loss": 0.7798, "step": 15544 }, { "epoch": 0.71, "grad_norm": 0.3374350948541978, "learning_rate": 3.98910090763311e-06, "loss": 0.1889, "step": 15545 }, { "epoch": 0.71, "grad_norm": 0.696889465161503, "learning_rate": 3.987911856795047e-06, "loss": 0.3564, "step": 15546 }, { "epoch": 0.71, "grad_norm": 0.28336323671469765, "learning_rate": 3.9867229390588245e-06, "loss": 0.2354, "step": 15547 }, { "epoch": 0.71, "grad_norm": 0.2553198634753402, "learning_rate": 3.985534154450762e-06, "loss": 0.1648, "step": 15548 }, { "epoch": 0.71, "grad_norm": 1.1638800324545835, "learning_rate": 3.9843455029971715e-06, "loss": 0.6614, "step": 15549 }, { "epoch": 0.71, "grad_norm": 1.2431847610865912, "learning_rate": 3.983156984724374e-06, "loss": 0.7071, "step": 15550 }, { "epoch": 0.71, "grad_norm": 0.29859397787766245, "learning_rate": 3.981968599658682e-06, "loss": 0.2336, "step": 15551 }, { "epoch": 0.71, "grad_norm": 0.4689919660796311, "learning_rate": 3.980780347826409e-06, "loss": 0.3588, "step": 15552 }, { "epoch": 0.71, "grad_norm": 0.24446450545567375, "learning_rate": 3.979592229253853e-06, "loss": 0.1407, "step": 15553 }, { "epoch": 0.71, "grad_norm": 0.348068369693218, "learning_rate": 3.978404243967323e-06, "loss": 0.2633, "step": 15554 }, { "epoch": 0.71, "grad_norm": 0.3618088096735549, "learning_rate": 3.977216391993123e-06, "loss": 0.2389, "step": 15555 }, { "epoch": 0.71, "grad_norm": 0.8783487316889272, "learning_rate": 3.9760286733575435e-06, "loss": 0.5293, "step": 15556 }, { "epoch": 0.71, "grad_norm": 0.33048320946368565, "learning_rate": 3.974841088086887e-06, "loss": 0.2394, "step": 15557 }, { "epoch": 0.71, "grad_norm": 0.7233561189048227, "learning_rate": 3.973653636207437e-06, "loss": 0.2841, "step": 15558 }, { "epoch": 0.71, "grad_norm": 0.3039144161797102, "learning_rate": 3.972466317745489e-06, "loss": 0.2658, "step": 15559 }, { "epoch": 0.71, "grad_norm": 0.4171875442895504, "learning_rate": 3.971279132727329e-06, "loss": 0.2461, "step": 15560 }, { "epoch": 0.71, "grad_norm": 0.2868310384490601, "learning_rate": 3.970092081179236e-06, "loss": 0.1683, "step": 15561 }, { "epoch": 0.71, "grad_norm": 0.5101438684893173, "learning_rate": 3.968905163127493e-06, "loss": 0.3279, "step": 15562 }, { "epoch": 0.71, "grad_norm": 0.40796046306245215, "learning_rate": 3.967718378598377e-06, "loss": 0.2522, "step": 15563 }, { "epoch": 0.71, "grad_norm": 0.3896019094271313, "learning_rate": 3.966531727618165e-06, "loss": 0.2555, "step": 15564 }, { "epoch": 0.72, "grad_norm": 0.2985226605443229, "learning_rate": 3.965345210213125e-06, "loss": 0.1793, "step": 15565 }, { "epoch": 0.72, "grad_norm": 0.4611526218924979, "learning_rate": 3.964158826409523e-06, "loss": 0.2603, "step": 15566 }, { "epoch": 0.72, "grad_norm": 0.29972742686438963, "learning_rate": 3.9629725762336266e-06, "loss": 0.2661, "step": 15567 }, { "epoch": 0.72, "grad_norm": 0.765697572376828, "learning_rate": 3.961786459711699e-06, "loss": 0.3202, "step": 15568 }, { "epoch": 0.72, "grad_norm": 0.34647881300648886, "learning_rate": 3.960600476870003e-06, "loss": 0.2481, "step": 15569 }, { "epoch": 0.72, "grad_norm": 0.8966314841487539, "learning_rate": 3.959414627734789e-06, "loss": 0.4436, "step": 15570 }, { "epoch": 0.72, "grad_norm": 0.3648837881450559, "learning_rate": 3.958228912332312e-06, "loss": 0.2348, "step": 15571 }, { "epoch": 0.72, "grad_norm": 0.25766799622912173, "learning_rate": 3.9570433306888265e-06, "loss": 0.1973, "step": 15572 }, { "epoch": 0.72, "grad_norm": 0.43460842536034927, "learning_rate": 3.955857882830576e-06, "loss": 0.2153, "step": 15573 }, { "epoch": 0.72, "grad_norm": 0.48359823649628614, "learning_rate": 3.954672568783809e-06, "loss": 0.2949, "step": 15574 }, { "epoch": 0.72, "grad_norm": 0.3108944990482073, "learning_rate": 3.95348738857476e-06, "loss": 0.2503, "step": 15575 }, { "epoch": 0.72, "grad_norm": 1.3620705737252354, "learning_rate": 3.952302342229674e-06, "loss": 0.7562, "step": 15576 }, { "epoch": 0.72, "grad_norm": 0.4647474504029795, "learning_rate": 3.951117429774789e-06, "loss": 0.105, "step": 15577 }, { "epoch": 0.72, "grad_norm": 0.31476660559122266, "learning_rate": 3.94993265123633e-06, "loss": 0.2062, "step": 15578 }, { "epoch": 0.72, "grad_norm": 0.28814180412122126, "learning_rate": 3.948748006640535e-06, "loss": 0.2771, "step": 15579 }, { "epoch": 0.72, "grad_norm": 0.6687811015986703, "learning_rate": 3.94756349601362e-06, "loss": 0.4041, "step": 15580 }, { "epoch": 0.72, "grad_norm": 0.32427902404722525, "learning_rate": 3.946379119381822e-06, "loss": 0.1731, "step": 15581 }, { "epoch": 0.72, "grad_norm": 0.5709409312348593, "learning_rate": 3.945194876771352e-06, "loss": 0.3628, "step": 15582 }, { "epoch": 0.72, "grad_norm": 0.3762588365469927, "learning_rate": 3.944010768208436e-06, "loss": 0.3002, "step": 15583 }, { "epoch": 0.72, "grad_norm": 0.24383970423917428, "learning_rate": 3.942826793719281e-06, "loss": 0.1258, "step": 15584 }, { "epoch": 0.72, "grad_norm": 0.3365459096654575, "learning_rate": 3.941642953330102e-06, "loss": 0.2813, "step": 15585 }, { "epoch": 0.72, "grad_norm": 0.8001471991624923, "learning_rate": 3.9404592470671145e-06, "loss": 0.4252, "step": 15586 }, { "epoch": 0.72, "grad_norm": 0.27872356397792214, "learning_rate": 3.939275674956514e-06, "loss": 0.2216, "step": 15587 }, { "epoch": 0.72, "grad_norm": 0.5953289136224065, "learning_rate": 3.938092237024509e-06, "loss": 0.3667, "step": 15588 }, { "epoch": 0.72, "grad_norm": 1.2982026881980373, "learning_rate": 3.936908933297302e-06, "loss": 0.52, "step": 15589 }, { "epoch": 0.72, "grad_norm": 0.23631750740416335, "learning_rate": 3.935725763801085e-06, "loss": 0.1499, "step": 15590 }, { "epoch": 0.72, "grad_norm": 0.3922146685168824, "learning_rate": 3.934542728562058e-06, "loss": 0.3256, "step": 15591 }, { "epoch": 0.72, "grad_norm": 0.7871682133919862, "learning_rate": 3.933359827606402e-06, "loss": 0.4197, "step": 15592 }, { "epoch": 0.72, "grad_norm": 0.4361744491301722, "learning_rate": 3.932177060960319e-06, "loss": 0.284, "step": 15593 }, { "epoch": 0.72, "grad_norm": 0.3009296709869533, "learning_rate": 3.930994428649989e-06, "loss": 0.1501, "step": 15594 }, { "epoch": 0.72, "grad_norm": 0.392340226070996, "learning_rate": 3.929811930701588e-06, "loss": 0.3034, "step": 15595 }, { "epoch": 0.72, "grad_norm": 0.6018259196858956, "learning_rate": 3.928629567141305e-06, "loss": 0.3023, "step": 15596 }, { "epoch": 0.72, "grad_norm": 0.40882440878038734, "learning_rate": 3.9274473379953035e-06, "loss": 0.2651, "step": 15597 }, { "epoch": 0.72, "grad_norm": 0.28988786927200716, "learning_rate": 3.926265243289773e-06, "loss": 0.2645, "step": 15598 }, { "epoch": 0.72, "grad_norm": 0.876440240611924, "learning_rate": 3.9250832830508715e-06, "loss": 0.4933, "step": 15599 }, { "epoch": 0.72, "grad_norm": 0.30477597904492615, "learning_rate": 3.9239014573047755e-06, "loss": 0.1748, "step": 15600 }, { "epoch": 0.72, "grad_norm": 1.5686087651270901, "learning_rate": 3.922719766077642e-06, "loss": 0.5161, "step": 15601 }, { "epoch": 0.72, "grad_norm": 0.5292112152332294, "learning_rate": 3.921538209395634e-06, "loss": 0.2963, "step": 15602 }, { "epoch": 0.72, "grad_norm": 0.2782706412515793, "learning_rate": 3.9203567872849154e-06, "loss": 0.2886, "step": 15603 }, { "epoch": 0.72, "grad_norm": 0.8956216113323225, "learning_rate": 3.919175499771635e-06, "loss": 0.3308, "step": 15604 }, { "epoch": 0.72, "grad_norm": 0.28405768422545374, "learning_rate": 3.9179943468819485e-06, "loss": 0.1661, "step": 15605 }, { "epoch": 0.72, "grad_norm": 0.3284563351104206, "learning_rate": 3.916813328642008e-06, "loss": 0.226, "step": 15606 }, { "epoch": 0.72, "grad_norm": 0.35804816186725547, "learning_rate": 3.915632445077955e-06, "loss": 0.2371, "step": 15607 }, { "epoch": 0.72, "grad_norm": 0.5579065259712115, "learning_rate": 3.914451696215937e-06, "loss": 0.287, "step": 15608 }, { "epoch": 0.72, "grad_norm": 0.4045019348095558, "learning_rate": 3.91327108208209e-06, "loss": 0.303, "step": 15609 }, { "epoch": 0.72, "grad_norm": 0.3455352213924204, "learning_rate": 3.912090602702556e-06, "loss": 0.2421, "step": 15610 }, { "epoch": 0.72, "grad_norm": 0.3902891227336239, "learning_rate": 3.910910258103468e-06, "loss": 0.2919, "step": 15611 }, { "epoch": 0.72, "grad_norm": 0.36551291193798424, "learning_rate": 3.9097300483109625e-06, "loss": 0.22, "step": 15612 }, { "epoch": 0.72, "grad_norm": 0.44767097775473574, "learning_rate": 3.908549973351164e-06, "loss": 0.2112, "step": 15613 }, { "epoch": 0.72, "grad_norm": 0.3945377141070083, "learning_rate": 3.907370033250188e-06, "loss": 0.2978, "step": 15614 }, { "epoch": 0.72, "grad_norm": 0.3424204936232039, "learning_rate": 3.906190228034177e-06, "loss": 0.2886, "step": 15615 }, { "epoch": 0.72, "grad_norm": 1.321649278791491, "learning_rate": 3.905010557729238e-06, "loss": 0.7187, "step": 15616 }, { "epoch": 0.72, "grad_norm": 0.3895072694694036, "learning_rate": 3.903831022361493e-06, "loss": 0.1172, "step": 15617 }, { "epoch": 0.72, "grad_norm": 0.28549428771090135, "learning_rate": 3.9026516219570495e-06, "loss": 0.2207, "step": 15618 }, { "epoch": 0.72, "grad_norm": 0.3647009625656791, "learning_rate": 3.901472356542023e-06, "loss": 0.2987, "step": 15619 }, { "epoch": 0.72, "grad_norm": 0.5975779603878336, "learning_rate": 3.9002932261425255e-06, "loss": 0.2161, "step": 15620 }, { "epoch": 0.72, "grad_norm": 0.44850355468851294, "learning_rate": 3.899114230784652e-06, "loss": 0.3005, "step": 15621 }, { "epoch": 0.72, "grad_norm": 0.4889422367028318, "learning_rate": 3.89793537049451e-06, "loss": 0.3313, "step": 15622 }, { "epoch": 0.72, "grad_norm": 0.3309322500891111, "learning_rate": 3.896756645298201e-06, "loss": 0.1944, "step": 15623 }, { "epoch": 0.72, "grad_norm": 0.2566648621492292, "learning_rate": 3.8955780552218135e-06, "loss": 0.1925, "step": 15624 }, { "epoch": 0.72, "grad_norm": 0.7542194753727185, "learning_rate": 3.8943996002914485e-06, "loss": 0.3993, "step": 15625 }, { "epoch": 0.72, "grad_norm": 0.3427039037583272, "learning_rate": 3.893221280533188e-06, "loss": 0.2214, "step": 15626 }, { "epoch": 0.72, "grad_norm": 0.41548833197526674, "learning_rate": 3.892043095973123e-06, "loss": 0.2888, "step": 15627 }, { "epoch": 0.72, "grad_norm": 1.1570078418309857, "learning_rate": 3.8908650466373355e-06, "loss": 0.6182, "step": 15628 }, { "epoch": 0.72, "grad_norm": 0.41029184416798337, "learning_rate": 3.889687132551913e-06, "loss": 0.1997, "step": 15629 }, { "epoch": 0.72, "grad_norm": 0.34696278064543573, "learning_rate": 3.888509353742927e-06, "loss": 0.2145, "step": 15630 }, { "epoch": 0.72, "grad_norm": 0.3673221662424565, "learning_rate": 3.887331710236447e-06, "loss": 0.2936, "step": 15631 }, { "epoch": 0.72, "grad_norm": 0.36259148784163087, "learning_rate": 3.886154202058559e-06, "loss": 0.2076, "step": 15632 }, { "epoch": 0.72, "grad_norm": 0.389142242977095, "learning_rate": 3.88497682923532e-06, "loss": 0.2227, "step": 15633 }, { "epoch": 0.72, "grad_norm": 0.3768931109329765, "learning_rate": 3.883799591792804e-06, "loss": 0.3017, "step": 15634 }, { "epoch": 0.72, "grad_norm": 1.3819381680781753, "learning_rate": 3.882622489757067e-06, "loss": 0.5256, "step": 15635 }, { "epoch": 0.72, "grad_norm": 0.3709581734938562, "learning_rate": 3.881445523154172e-06, "loss": 0.2047, "step": 15636 }, { "epoch": 0.72, "grad_norm": 0.5765386956424883, "learning_rate": 3.880268692010178e-06, "loss": 0.1995, "step": 15637 }, { "epoch": 0.72, "grad_norm": 0.39150010812542857, "learning_rate": 3.879091996351135e-06, "loss": 0.3072, "step": 15638 }, { "epoch": 0.72, "grad_norm": 0.2971497210331202, "learning_rate": 3.877915436203099e-06, "loss": 0.1804, "step": 15639 }, { "epoch": 0.72, "grad_norm": 1.4254436596557116, "learning_rate": 3.876739011592112e-06, "loss": 0.8242, "step": 15640 }, { "epoch": 0.72, "grad_norm": 0.7009587751537443, "learning_rate": 3.87556272254422e-06, "loss": 0.3502, "step": 15641 }, { "epoch": 0.72, "grad_norm": 0.2942149539446829, "learning_rate": 3.874386569085471e-06, "loss": 0.2727, "step": 15642 }, { "epoch": 0.72, "grad_norm": 0.4845702885926688, "learning_rate": 3.873210551241896e-06, "loss": 0.2854, "step": 15643 }, { "epoch": 0.72, "grad_norm": 0.2625763648955105, "learning_rate": 3.872034669039534e-06, "loss": 0.1543, "step": 15644 }, { "epoch": 0.72, "grad_norm": 0.3943947073440283, "learning_rate": 3.8708589225044195e-06, "loss": 0.2894, "step": 15645 }, { "epoch": 0.72, "grad_norm": 0.32194494396560275, "learning_rate": 3.869683311662582e-06, "loss": 0.2435, "step": 15646 }, { "epoch": 0.72, "grad_norm": 0.7082241396307494, "learning_rate": 3.8685078365400465e-06, "loss": 0.3448, "step": 15647 }, { "epoch": 0.72, "grad_norm": 0.4126253595397479, "learning_rate": 3.867332497162836e-06, "loss": 0.2703, "step": 15648 }, { "epoch": 0.72, "grad_norm": 0.4334016351378079, "learning_rate": 3.866157293556978e-06, "loss": 0.1809, "step": 15649 }, { "epoch": 0.72, "grad_norm": 0.2778540957382801, "learning_rate": 3.864982225748481e-06, "loss": 0.2515, "step": 15650 }, { "epoch": 0.72, "grad_norm": 0.3776989518825572, "learning_rate": 3.863807293763368e-06, "loss": 0.2627, "step": 15651 }, { "epoch": 0.72, "grad_norm": 1.1482531528535536, "learning_rate": 3.862632497627645e-06, "loss": 0.397, "step": 15652 }, { "epoch": 0.72, "grad_norm": 0.7083691691322206, "learning_rate": 3.861457837367324e-06, "loss": 0.3826, "step": 15653 }, { "epoch": 0.72, "grad_norm": 0.2937339625804799, "learning_rate": 3.860283313008412e-06, "loss": 0.255, "step": 15654 }, { "epoch": 0.72, "grad_norm": 0.5169200025397416, "learning_rate": 3.859108924576906e-06, "loss": 0.3697, "step": 15655 }, { "epoch": 0.72, "grad_norm": 0.25990425762859615, "learning_rate": 3.857934672098815e-06, "loss": 0.0935, "step": 15656 }, { "epoch": 0.72, "grad_norm": 0.3778646118737523, "learning_rate": 3.856760555600122e-06, "loss": 0.258, "step": 15657 }, { "epoch": 0.72, "grad_norm": 0.3784369445637982, "learning_rate": 3.855586575106838e-06, "loss": 0.3239, "step": 15658 }, { "epoch": 0.72, "grad_norm": 0.7680662081639719, "learning_rate": 3.8544127306449446e-06, "loss": 0.3065, "step": 15659 }, { "epoch": 0.72, "grad_norm": 0.37300712850379814, "learning_rate": 3.8532390222404245e-06, "loss": 0.2512, "step": 15660 }, { "epoch": 0.72, "grad_norm": 1.3817453729144542, "learning_rate": 3.852065449919271e-06, "loss": 0.5215, "step": 15661 }, { "epoch": 0.72, "grad_norm": 0.20511682523674526, "learning_rate": 3.850892013707461e-06, "loss": 0.1566, "step": 15662 }, { "epoch": 0.72, "grad_norm": 0.3901461597766473, "learning_rate": 3.84971871363098e-06, "loss": 0.2696, "step": 15663 }, { "epoch": 0.72, "grad_norm": 0.8933753289728933, "learning_rate": 3.848545549715795e-06, "loss": 0.5159, "step": 15664 }, { "epoch": 0.72, "grad_norm": 0.48799684387146985, "learning_rate": 3.847372521987883e-06, "loss": 0.2574, "step": 15665 }, { "epoch": 0.72, "grad_norm": 0.38524738078674114, "learning_rate": 3.846199630473216e-06, "loss": 0.2446, "step": 15666 }, { "epoch": 0.72, "grad_norm": 0.5486793187087692, "learning_rate": 3.845026875197755e-06, "loss": 0.3006, "step": 15667 }, { "epoch": 0.72, "grad_norm": 0.33325406962915516, "learning_rate": 3.84385425618747e-06, "loss": 0.2185, "step": 15668 }, { "epoch": 0.72, "grad_norm": 0.3107367560350991, "learning_rate": 3.842681773468316e-06, "loss": 0.1941, "step": 15669 }, { "epoch": 0.72, "grad_norm": 0.4015495218843103, "learning_rate": 3.841509427066252e-06, "loss": 0.3003, "step": 15670 }, { "epoch": 0.72, "grad_norm": 0.8115963857670337, "learning_rate": 3.840337217007238e-06, "loss": 0.4071, "step": 15671 }, { "epoch": 0.72, "grad_norm": 0.3243923276799991, "learning_rate": 3.839165143317217e-06, "loss": 0.188, "step": 15672 }, { "epoch": 0.72, "grad_norm": 1.237999659161016, "learning_rate": 3.837993206022146e-06, "loss": 0.5433, "step": 15673 }, { "epoch": 0.72, "grad_norm": 0.40305880319537346, "learning_rate": 3.836821405147959e-06, "loss": 0.2913, "step": 15674 }, { "epoch": 0.72, "grad_norm": 0.24133924331697731, "learning_rate": 3.835649740720613e-06, "loss": 0.1634, "step": 15675 }, { "epoch": 0.72, "grad_norm": 0.7559151875857146, "learning_rate": 3.834478212766036e-06, "loss": 0.3962, "step": 15676 }, { "epoch": 0.72, "grad_norm": 0.4852086887700415, "learning_rate": 3.8333068213101744e-06, "loss": 0.3541, "step": 15677 }, { "epoch": 0.72, "grad_norm": 0.22273746221949853, "learning_rate": 3.8321355663789504e-06, "loss": 0.1711, "step": 15678 }, { "epoch": 0.72, "grad_norm": 1.498957527909359, "learning_rate": 3.830964447998302e-06, "loss": 0.6366, "step": 15679 }, { "epoch": 0.72, "grad_norm": 0.380552626234912, "learning_rate": 3.8297934661941586e-06, "loss": 0.2046, "step": 15680 }, { "epoch": 0.72, "grad_norm": 0.35829599918961436, "learning_rate": 3.828622620992436e-06, "loss": 0.2537, "step": 15681 }, { "epoch": 0.72, "grad_norm": 0.4029262343448055, "learning_rate": 3.827451912419062e-06, "loss": 0.2539, "step": 15682 }, { "epoch": 0.72, "grad_norm": 0.5144365403223831, "learning_rate": 3.826281340499957e-06, "loss": 0.3457, "step": 15683 }, { "epoch": 0.72, "grad_norm": 0.43416946729881545, "learning_rate": 3.825110905261028e-06, "loss": 0.2554, "step": 15684 }, { "epoch": 0.72, "grad_norm": 0.6713422374588778, "learning_rate": 3.823940606728196e-06, "loss": 0.2313, "step": 15685 }, { "epoch": 0.72, "grad_norm": 0.3935484239194369, "learning_rate": 3.822770444927363e-06, "loss": 0.3042, "step": 15686 }, { "epoch": 0.72, "grad_norm": 0.4715434427901114, "learning_rate": 3.8216004198844395e-06, "loss": 0.3121, "step": 15687 }, { "epoch": 0.72, "grad_norm": 0.47420930449002086, "learning_rate": 3.8204305316253295e-06, "loss": 0.2842, "step": 15688 }, { "epoch": 0.72, "grad_norm": 0.4655929402782526, "learning_rate": 3.819260780175929e-06, "loss": 0.2778, "step": 15689 }, { "epoch": 0.72, "grad_norm": 0.2908665632089152, "learning_rate": 3.818091165562142e-06, "loss": 0.2586, "step": 15690 }, { "epoch": 0.72, "grad_norm": 0.3741701081638742, "learning_rate": 3.816921687809851e-06, "loss": 0.1652, "step": 15691 }, { "epoch": 0.72, "grad_norm": 0.8736376588969164, "learning_rate": 3.815752346944962e-06, "loss": 0.4414, "step": 15692 }, { "epoch": 0.72, "grad_norm": 0.41785909512710767, "learning_rate": 3.8145831429933523e-06, "loss": 0.2921, "step": 15693 }, { "epoch": 0.72, "grad_norm": 0.32548528205580224, "learning_rate": 3.8134140759809126e-06, "loss": 0.2858, "step": 15694 }, { "epoch": 0.72, "grad_norm": 0.3136029210610828, "learning_rate": 3.8122451459335195e-06, "loss": 0.1695, "step": 15695 }, { "epoch": 0.72, "grad_norm": 0.26026711916928963, "learning_rate": 3.8110763528770543e-06, "loss": 0.1964, "step": 15696 }, { "epoch": 0.72, "grad_norm": 1.3996266118335197, "learning_rate": 3.809907696837398e-06, "loss": 0.4759, "step": 15697 }, { "epoch": 0.72, "grad_norm": 0.5615102118614147, "learning_rate": 3.808739177840416e-06, "loss": 0.2631, "step": 15698 }, { "epoch": 0.72, "grad_norm": 0.3486638604311125, "learning_rate": 3.8075707959119845e-06, "loss": 0.2705, "step": 15699 }, { "epoch": 0.72, "grad_norm": 0.9652546826954341, "learning_rate": 3.8064025510779636e-06, "loss": 0.4812, "step": 15700 }, { "epoch": 0.72, "grad_norm": 0.3485619267211182, "learning_rate": 3.805234443364221e-06, "loss": 0.2588, "step": 15701 }, { "epoch": 0.72, "grad_norm": 0.29300597530626005, "learning_rate": 3.80406647279662e-06, "loss": 0.1689, "step": 15702 }, { "epoch": 0.72, "grad_norm": 0.3004916062130135, "learning_rate": 3.8028986394010124e-06, "loss": 0.2323, "step": 15703 }, { "epoch": 0.72, "grad_norm": 0.7437665926195619, "learning_rate": 3.8017309432032566e-06, "loss": 0.2994, "step": 15704 }, { "epoch": 0.72, "grad_norm": 0.3822550541202694, "learning_rate": 3.8005633842292065e-06, "loss": 0.236, "step": 15705 }, { "epoch": 0.72, "grad_norm": 0.34684852686513096, "learning_rate": 3.799395962504705e-06, "loss": 0.2891, "step": 15706 }, { "epoch": 0.72, "grad_norm": 1.274273200512106, "learning_rate": 3.7982286780556043e-06, "loss": 0.6597, "step": 15707 }, { "epoch": 0.72, "grad_norm": 0.25567467420149104, "learning_rate": 3.7970615309077364e-06, "loss": 0.1358, "step": 15708 }, { "epoch": 0.72, "grad_norm": 0.2920397335743465, "learning_rate": 3.7958945210869546e-06, "loss": 0.2267, "step": 15709 }, { "epoch": 0.72, "grad_norm": 0.48707749842562026, "learning_rate": 3.7947276486190843e-06, "loss": 0.329, "step": 15710 }, { "epoch": 0.72, "grad_norm": 0.40741947185938127, "learning_rate": 3.7935609135299677e-06, "loss": 0.2258, "step": 15711 }, { "epoch": 0.72, "grad_norm": 0.6537090350026551, "learning_rate": 3.7923943158454267e-06, "loss": 0.358, "step": 15712 }, { "epoch": 0.72, "grad_norm": 0.5366485143662795, "learning_rate": 3.791227855591293e-06, "loss": 0.3598, "step": 15713 }, { "epoch": 0.72, "grad_norm": 0.24107228739404737, "learning_rate": 3.790061532793393e-06, "loss": 0.1686, "step": 15714 }, { "epoch": 0.72, "grad_norm": 0.37434731596704085, "learning_rate": 3.7888953474775424e-06, "loss": 0.2461, "step": 15715 }, { "epoch": 0.72, "grad_norm": 0.6857064605693761, "learning_rate": 3.787729299669566e-06, "loss": 0.3571, "step": 15716 }, { "epoch": 0.72, "grad_norm": 0.3127842792926238, "learning_rate": 3.7865633893952725e-06, "loss": 0.1892, "step": 15717 }, { "epoch": 0.72, "grad_norm": 0.3514624005747015, "learning_rate": 3.7853976166804762e-06, "loss": 0.2894, "step": 15718 }, { "epoch": 0.72, "grad_norm": 1.1509287829486181, "learning_rate": 3.784231981550991e-06, "loss": 0.6316, "step": 15719 }, { "epoch": 0.72, "grad_norm": 0.475808753749805, "learning_rate": 3.783066484032615e-06, "loss": 0.2222, "step": 15720 }, { "epoch": 0.72, "grad_norm": 0.2543662719721029, "learning_rate": 3.781901124151155e-06, "loss": 0.2014, "step": 15721 }, { "epoch": 0.72, "grad_norm": 0.4531757019950255, "learning_rate": 3.7807359019324107e-06, "loss": 0.3197, "step": 15722 }, { "epoch": 0.72, "grad_norm": 0.8477756234937067, "learning_rate": 3.779570817402184e-06, "loss": 0.2629, "step": 15723 }, { "epoch": 0.72, "grad_norm": 0.3552710276476677, "learning_rate": 3.7784058705862624e-06, "loss": 0.2274, "step": 15724 }, { "epoch": 0.72, "grad_norm": 0.3890133209647503, "learning_rate": 3.777241061510433e-06, "loss": 0.3176, "step": 15725 }, { "epoch": 0.72, "grad_norm": 0.6872313548256496, "learning_rate": 3.776076390200495e-06, "loss": 0.3484, "step": 15726 }, { "epoch": 0.72, "grad_norm": 0.3531996267818355, "learning_rate": 3.774911856682224e-06, "loss": 0.2132, "step": 15727 }, { "epoch": 0.72, "grad_norm": 0.49536312758731754, "learning_rate": 3.7737474609814086e-06, "loss": 0.2708, "step": 15728 }, { "epoch": 0.72, "grad_norm": 0.4078905754827501, "learning_rate": 3.7725832031238187e-06, "loss": 0.2697, "step": 15729 }, { "epoch": 0.72, "grad_norm": 0.3409810135075762, "learning_rate": 3.771419083135236e-06, "loss": 0.257, "step": 15730 }, { "epoch": 0.72, "grad_norm": 1.4315413263477226, "learning_rate": 3.7702551010414333e-06, "loss": 0.4046, "step": 15731 }, { "epoch": 0.72, "grad_norm": 0.6336688246350893, "learning_rate": 3.769091256868177e-06, "loss": 0.3158, "step": 15732 }, { "epoch": 0.72, "grad_norm": 0.4044531896693863, "learning_rate": 3.767927550641237e-06, "loss": 0.2844, "step": 15733 }, { "epoch": 0.72, "grad_norm": 0.2361510393424766, "learning_rate": 3.766763982386371e-06, "loss": 0.1766, "step": 15734 }, { "epoch": 0.72, "grad_norm": 0.6185799716474243, "learning_rate": 3.765600552129344e-06, "loss": 0.3196, "step": 15735 }, { "epoch": 0.72, "grad_norm": 0.4171507387718398, "learning_rate": 3.764437259895913e-06, "loss": 0.2855, "step": 15736 }, { "epoch": 0.72, "grad_norm": 0.3809399995176105, "learning_rate": 3.7632741057118304e-06, "loss": 0.2624, "step": 15737 }, { "epoch": 0.72, "grad_norm": 0.7217694008642399, "learning_rate": 3.7621110896028467e-06, "loss": 0.3815, "step": 15738 }, { "epoch": 0.72, "grad_norm": 0.4033870229440153, "learning_rate": 3.7609482115947115e-06, "loss": 0.2681, "step": 15739 }, { "epoch": 0.72, "grad_norm": 0.3325661066863811, "learning_rate": 3.7597854717131733e-06, "loss": 0.1437, "step": 15740 }, { "epoch": 0.72, "grad_norm": 0.3356718459499345, "learning_rate": 3.7586228699839666e-06, "loss": 0.2491, "step": 15741 }, { "epoch": 0.72, "grad_norm": 0.3656771310244979, "learning_rate": 3.7574604064328336e-06, "loss": 0.2749, "step": 15742 }, { "epoch": 0.72, "grad_norm": 0.9700776302059902, "learning_rate": 3.7562980810855144e-06, "loss": 0.4917, "step": 15743 }, { "epoch": 0.72, "grad_norm": 0.6505888843461077, "learning_rate": 3.755135893967735e-06, "loss": 0.2754, "step": 15744 }, { "epoch": 0.72, "grad_norm": 0.3332711068042054, "learning_rate": 3.753973845105231e-06, "loss": 0.2568, "step": 15745 }, { "epoch": 0.72, "grad_norm": 0.5501707667046273, "learning_rate": 3.7528119345237224e-06, "loss": 0.3566, "step": 15746 }, { "epoch": 0.72, "grad_norm": 0.294511522410627, "learning_rate": 3.7516501622489365e-06, "loss": 0.0985, "step": 15747 }, { "epoch": 0.72, "grad_norm": 0.3988610014438023, "learning_rate": 3.750488528306598e-06, "loss": 0.314, "step": 15748 }, { "epoch": 0.72, "grad_norm": 0.3690077445223635, "learning_rate": 3.7493270327224162e-06, "loss": 0.2936, "step": 15749 }, { "epoch": 0.72, "grad_norm": 0.6160611994065603, "learning_rate": 3.748165675522113e-06, "loss": 0.2441, "step": 15750 }, { "epoch": 0.72, "grad_norm": 0.4135960420662529, "learning_rate": 3.747004456731389e-06, "loss": 0.2783, "step": 15751 }, { "epoch": 0.72, "grad_norm": 0.51599501583834, "learning_rate": 3.745843376375966e-06, "loss": 0.2443, "step": 15752 }, { "epoch": 0.72, "grad_norm": 0.24411267553542826, "learning_rate": 3.7446824344815437e-06, "loss": 0.176, "step": 15753 }, { "epoch": 0.72, "grad_norm": 0.41036095577992754, "learning_rate": 3.74352163107382e-06, "loss": 0.2969, "step": 15754 }, { "epoch": 0.72, "grad_norm": 0.7474440896137018, "learning_rate": 3.7423609661784965e-06, "loss": 0.4069, "step": 15755 }, { "epoch": 0.72, "grad_norm": 0.7842154866045788, "learning_rate": 3.7412004398212707e-06, "loss": 0.3861, "step": 15756 }, { "epoch": 0.72, "grad_norm": 0.2918373051885403, "learning_rate": 3.740040052027838e-06, "loss": 0.2171, "step": 15757 }, { "epoch": 0.72, "grad_norm": 0.35582706141741727, "learning_rate": 3.7388798028238815e-06, "loss": 0.2321, "step": 15758 }, { "epoch": 0.72, "grad_norm": 0.5944188150613753, "learning_rate": 3.7377196922350924e-06, "loss": 0.2547, "step": 15759 }, { "epoch": 0.72, "grad_norm": 0.3155941710886627, "learning_rate": 3.7365597202871564e-06, "loss": 0.2007, "step": 15760 }, { "epoch": 0.72, "grad_norm": 0.4152677361671202, "learning_rate": 3.7353998870057484e-06, "loss": 0.3229, "step": 15761 }, { "epoch": 0.72, "grad_norm": 0.8374256117287706, "learning_rate": 3.7342401924165516e-06, "loss": 0.4548, "step": 15762 }, { "epoch": 0.72, "grad_norm": 0.30023624381686526, "learning_rate": 3.7330806365452355e-06, "loss": 0.1855, "step": 15763 }, { "epoch": 0.72, "grad_norm": 1.3322611881831874, "learning_rate": 3.7319212194174727e-06, "loss": 0.5595, "step": 15764 }, { "epoch": 0.72, "grad_norm": 0.24675612719136056, "learning_rate": 3.730761941058938e-06, "loss": 0.2198, "step": 15765 }, { "epoch": 0.72, "grad_norm": 0.32197215281158237, "learning_rate": 3.7296028014952866e-06, "loss": 0.2036, "step": 15766 }, { "epoch": 0.72, "grad_norm": 0.7283613496903513, "learning_rate": 3.7284438007521896e-06, "loss": 0.3575, "step": 15767 }, { "epoch": 0.72, "grad_norm": 0.8579840306669612, "learning_rate": 3.727284938855296e-06, "loss": 0.4802, "step": 15768 }, { "epoch": 0.72, "grad_norm": 0.322197402899127, "learning_rate": 3.7261262158302745e-06, "loss": 0.2619, "step": 15769 }, { "epoch": 0.72, "grad_norm": 0.5213025328064339, "learning_rate": 3.7249676317027683e-06, "loss": 0.2477, "step": 15770 }, { "epoch": 0.72, "grad_norm": 0.4197641753216645, "learning_rate": 3.723809186498434e-06, "loss": 0.2342, "step": 15771 }, { "epoch": 0.72, "grad_norm": 0.3821969383249988, "learning_rate": 3.7226508802429118e-06, "loss": 0.2792, "step": 15772 }, { "epoch": 0.72, "grad_norm": 0.3574092542502935, "learning_rate": 3.7214927129618496e-06, "loss": 0.2605, "step": 15773 }, { "epoch": 0.72, "grad_norm": 0.48505651992622695, "learning_rate": 3.7203346846808898e-06, "loss": 0.2928, "step": 15774 }, { "epoch": 0.72, "grad_norm": 0.34122618850255426, "learning_rate": 3.719176795425665e-06, "loss": 0.2384, "step": 15775 }, { "epoch": 0.72, "grad_norm": 0.5318886614037652, "learning_rate": 3.7180190452218157e-06, "loss": 0.2524, "step": 15776 }, { "epoch": 0.72, "grad_norm": 0.3132705936196789, "learning_rate": 3.7168614340949672e-06, "loss": 0.2297, "step": 15777 }, { "epoch": 0.72, "grad_norm": 0.3519567583977024, "learning_rate": 3.71570396207075e-06, "loss": 0.266, "step": 15778 }, { "epoch": 0.72, "grad_norm": 0.7179153877037063, "learning_rate": 3.7145466291747935e-06, "loss": 0.3297, "step": 15779 }, { "epoch": 0.72, "grad_norm": 0.34588722640204356, "learning_rate": 3.7133894354327138e-06, "loss": 0.233, "step": 15780 }, { "epoch": 0.72, "grad_norm": 0.30956707917714277, "learning_rate": 3.7122323808701323e-06, "loss": 0.2683, "step": 15781 }, { "epoch": 0.73, "grad_norm": 1.4853247723345238, "learning_rate": 3.7110754655126703e-06, "loss": 0.6, "step": 15782 }, { "epoch": 0.73, "grad_norm": 0.7941554650957486, "learning_rate": 3.7099186893859317e-06, "loss": 0.2792, "step": 15783 }, { "epoch": 0.73, "grad_norm": 0.3490487265792109, "learning_rate": 3.7087620525155343e-06, "loss": 0.2705, "step": 15784 }, { "epoch": 0.73, "grad_norm": 0.38084321739689575, "learning_rate": 3.707605554927074e-06, "loss": 0.2916, "step": 15785 }, { "epoch": 0.73, "grad_norm": 0.25196420904020306, "learning_rate": 3.70644919664617e-06, "loss": 0.1134, "step": 15786 }, { "epoch": 0.73, "grad_norm": 0.4452361526463336, "learning_rate": 3.7052929776984114e-06, "loss": 0.265, "step": 15787 }, { "epoch": 0.73, "grad_norm": 0.606185339522962, "learning_rate": 3.704136898109403e-06, "loss": 0.3266, "step": 15788 }, { "epoch": 0.73, "grad_norm": 0.3560367420634984, "learning_rate": 3.7029809579047314e-06, "loss": 0.2319, "step": 15789 }, { "epoch": 0.73, "grad_norm": 0.3799742381189488, "learning_rate": 3.7018251571099927e-06, "loss": 0.2596, "step": 15790 }, { "epoch": 0.73, "grad_norm": 0.7120201456841941, "learning_rate": 3.7006694957507782e-06, "loss": 0.4296, "step": 15791 }, { "epoch": 0.73, "grad_norm": 0.2261287645050398, "learning_rate": 3.6995139738526662e-06, "loss": 0.1713, "step": 15792 }, { "epoch": 0.73, "grad_norm": 0.36994019557106134, "learning_rate": 3.6983585914412456e-06, "loss": 0.2833, "step": 15793 }, { "epoch": 0.73, "grad_norm": 1.2552418355706043, "learning_rate": 3.697203348542089e-06, "loss": 0.3983, "step": 15794 }, { "epoch": 0.73, "grad_norm": 0.6763381095527005, "learning_rate": 3.6960482451807757e-06, "loss": 0.3836, "step": 15795 }, { "epoch": 0.73, "grad_norm": 0.3249436654470026, "learning_rate": 3.694893281382881e-06, "loss": 0.1999, "step": 15796 }, { "epoch": 0.73, "grad_norm": 0.354363746677199, "learning_rate": 3.69373845717397e-06, "loss": 0.2905, "step": 15797 }, { "epoch": 0.73, "grad_norm": 0.43969182656759415, "learning_rate": 3.69258377257961e-06, "loss": 0.2683, "step": 15798 }, { "epoch": 0.73, "grad_norm": 0.26544143492733324, "learning_rate": 3.6914292276253705e-06, "loss": 0.1584, "step": 15799 }, { "epoch": 0.73, "grad_norm": 1.1023576773084665, "learning_rate": 3.6902748223368044e-06, "loss": 0.4443, "step": 15800 }, { "epoch": 0.73, "grad_norm": 0.4113269380573955, "learning_rate": 3.689120556739475e-06, "loss": 0.2979, "step": 15801 }, { "epoch": 0.73, "grad_norm": 0.3405148327620765, "learning_rate": 3.687966430858928e-06, "loss": 0.1877, "step": 15802 }, { "epoch": 0.73, "grad_norm": 1.3077987249900687, "learning_rate": 3.6868124447207266e-06, "loss": 0.7262, "step": 15803 }, { "epoch": 0.73, "grad_norm": 0.4129017821500772, "learning_rate": 3.68565859835041e-06, "loss": 0.3271, "step": 15804 }, { "epoch": 0.73, "grad_norm": 0.24827872916603902, "learning_rate": 3.6845048917735292e-06, "loss": 0.1267, "step": 15805 }, { "epoch": 0.73, "grad_norm": 0.37638856557267275, "learning_rate": 3.6833513250156207e-06, "loss": 0.2391, "step": 15806 }, { "epoch": 0.73, "grad_norm": 0.8960439361717315, "learning_rate": 3.6821978981022245e-06, "loss": 0.4086, "step": 15807 }, { "epoch": 0.73, "grad_norm": 0.7040881807631726, "learning_rate": 3.6810446110588825e-06, "loss": 0.3676, "step": 15808 }, { "epoch": 0.73, "grad_norm": 0.2917073581584961, "learning_rate": 3.6798914639111184e-06, "loss": 0.2337, "step": 15809 }, { "epoch": 0.73, "grad_norm": 0.3127535072909122, "learning_rate": 3.6787384566844685e-06, "loss": 0.1901, "step": 15810 }, { "epoch": 0.73, "grad_norm": 0.4021240546886768, "learning_rate": 3.6775855894044543e-06, "loss": 0.2556, "step": 15811 }, { "epoch": 0.73, "grad_norm": 0.43761701293147537, "learning_rate": 3.6764328620966016e-06, "loss": 0.2507, "step": 15812 }, { "epoch": 0.73, "grad_norm": 0.5049424195519188, "learning_rate": 3.6752802747864337e-06, "loss": 0.3609, "step": 15813 }, { "epoch": 0.73, "grad_norm": 0.45985937754675626, "learning_rate": 3.6741278274994605e-06, "loss": 0.2628, "step": 15814 }, { "epoch": 0.73, "grad_norm": 0.5559645561162057, "learning_rate": 3.6729755202612004e-06, "loss": 0.2916, "step": 15815 }, { "epoch": 0.73, "grad_norm": 0.3815890687164551, "learning_rate": 3.6718233530971657e-06, "loss": 0.2996, "step": 15816 }, { "epoch": 0.73, "grad_norm": 0.29075647309702546, "learning_rate": 3.670671326032865e-06, "loss": 0.1852, "step": 15817 }, { "epoch": 0.73, "grad_norm": 0.4918043179184085, "learning_rate": 3.6695194390938018e-06, "loss": 0.249, "step": 15818 }, { "epoch": 0.73, "grad_norm": 0.45891643493861695, "learning_rate": 3.668367692305469e-06, "loss": 0.2742, "step": 15819 }, { "epoch": 0.73, "grad_norm": 0.32350958090533904, "learning_rate": 3.667216085693379e-06, "loss": 0.2479, "step": 15820 }, { "epoch": 0.73, "grad_norm": 0.3703779075935711, "learning_rate": 3.6660646192830196e-06, "loss": 0.2964, "step": 15821 }, { "epoch": 0.73, "grad_norm": 0.2927145114658251, "learning_rate": 3.6649132930998877e-06, "loss": 0.1264, "step": 15822 }, { "epoch": 0.73, "grad_norm": 0.4550638169654456, "learning_rate": 3.663762107169466e-06, "loss": 0.2777, "step": 15823 }, { "epoch": 0.73, "grad_norm": 0.60642794675878, "learning_rate": 3.6626110615172437e-06, "loss": 0.3405, "step": 15824 }, { "epoch": 0.73, "grad_norm": 0.2929962274729631, "learning_rate": 3.661460156168709e-06, "loss": 0.1956, "step": 15825 }, { "epoch": 0.73, "grad_norm": 0.66972801630549, "learning_rate": 3.660309391149334e-06, "loss": 0.3371, "step": 15826 }, { "epoch": 0.73, "grad_norm": 0.40566993629851367, "learning_rate": 3.659158766484601e-06, "loss": 0.309, "step": 15827 }, { "epoch": 0.73, "grad_norm": 0.35935895421850533, "learning_rate": 3.6580082821999787e-06, "loss": 0.2473, "step": 15828 }, { "epoch": 0.73, "grad_norm": 0.5976165895307988, "learning_rate": 3.6568579383209414e-06, "loss": 0.2809, "step": 15829 }, { "epoch": 0.73, "grad_norm": 0.37345924081679094, "learning_rate": 3.6557077348729576e-06, "loss": 0.2964, "step": 15830 }, { "epoch": 0.73, "grad_norm": 0.2412155392794255, "learning_rate": 3.654557671881487e-06, "loss": 0.0899, "step": 15831 }, { "epoch": 0.73, "grad_norm": 0.3573669701331559, "learning_rate": 3.6534077493719945e-06, "loss": 0.2777, "step": 15832 }, { "epoch": 0.73, "grad_norm": 0.3806852589436953, "learning_rate": 3.6522579673699364e-06, "loss": 0.3183, "step": 15833 }, { "epoch": 0.73, "grad_norm": 0.9189779753233617, "learning_rate": 3.651108325900773e-06, "loss": 0.4512, "step": 15834 }, { "epoch": 0.73, "grad_norm": 0.40914614001581373, "learning_rate": 3.6499588249899485e-06, "loss": 0.2366, "step": 15835 }, { "epoch": 0.73, "grad_norm": 0.4038414880021001, "learning_rate": 3.648809464662919e-06, "loss": 0.2811, "step": 15836 }, { "epoch": 0.73, "grad_norm": 0.2624489219011203, "learning_rate": 3.6476602449451228e-06, "loss": 0.203, "step": 15837 }, { "epoch": 0.73, "grad_norm": 0.35678308729895347, "learning_rate": 3.6465111658620067e-06, "loss": 0.1871, "step": 15838 }, { "epoch": 0.73, "grad_norm": 0.5707720403581364, "learning_rate": 3.645362227439013e-06, "loss": 0.387, "step": 15839 }, { "epoch": 0.73, "grad_norm": 0.3789732517908, "learning_rate": 3.644213429701571e-06, "loss": 0.2802, "step": 15840 }, { "epoch": 0.73, "grad_norm": 0.667087200712018, "learning_rate": 3.6430647726751187e-06, "loss": 0.2347, "step": 15841 }, { "epoch": 0.73, "grad_norm": 0.4240838509844322, "learning_rate": 3.6419162563850886e-06, "loss": 0.3019, "step": 15842 }, { "epoch": 0.73, "grad_norm": 0.24809770756173508, "learning_rate": 3.640767880856901e-06, "loss": 0.1566, "step": 15843 }, { "epoch": 0.73, "grad_norm": 0.35348859222886875, "learning_rate": 3.6396196461159874e-06, "loss": 0.2927, "step": 15844 }, { "epoch": 0.73, "grad_norm": 0.3396110022707792, "learning_rate": 3.638471552187757e-06, "loss": 0.2166, "step": 15845 }, { "epoch": 0.73, "grad_norm": 0.730663059887459, "learning_rate": 3.6373235990976418e-06, "loss": 0.3652, "step": 15846 }, { "epoch": 0.73, "grad_norm": 0.917159125747844, "learning_rate": 3.63617578687105e-06, "loss": 0.4188, "step": 15847 }, { "epoch": 0.73, "grad_norm": 0.2573768433462493, "learning_rate": 3.63502811553339e-06, "loss": 0.2218, "step": 15848 }, { "epoch": 0.73, "grad_norm": 0.490454631024778, "learning_rate": 3.633880585110072e-06, "loss": 0.227, "step": 15849 }, { "epoch": 0.73, "grad_norm": 0.3835325518134792, "learning_rate": 3.6327331956265035e-06, "loss": 0.2395, "step": 15850 }, { "epoch": 0.73, "grad_norm": 0.33927108437763553, "learning_rate": 3.6315859471080874e-06, "loss": 0.2391, "step": 15851 }, { "epoch": 0.73, "grad_norm": 0.37452129282462276, "learning_rate": 3.630438839580217e-06, "loss": 0.2818, "step": 15852 }, { "epoch": 0.73, "grad_norm": 0.6158234640347121, "learning_rate": 3.6292918730682948e-06, "loss": 0.3163, "step": 15853 }, { "epoch": 0.73, "grad_norm": 0.36019077752292356, "learning_rate": 3.6281450475977076e-06, "loss": 0.2053, "step": 15854 }, { "epoch": 0.73, "grad_norm": 0.2899562078186164, "learning_rate": 3.6269983631938476e-06, "loss": 0.2086, "step": 15855 }, { "epoch": 0.73, "grad_norm": 0.32998516287621893, "learning_rate": 3.6258518198821045e-06, "loss": 0.2588, "step": 15856 }, { "epoch": 0.73, "grad_norm": 0.44121283494998537, "learning_rate": 3.624705417687856e-06, "loss": 0.3152, "step": 15857 }, { "epoch": 0.73, "grad_norm": 0.7894060501039268, "learning_rate": 3.6235591566364847e-06, "loss": 0.3089, "step": 15858 }, { "epoch": 0.73, "grad_norm": 0.6983215888741472, "learning_rate": 3.6224130367533715e-06, "loss": 0.286, "step": 15859 }, { "epoch": 0.73, "grad_norm": 0.3412941104745918, "learning_rate": 3.6212670580638833e-06, "loss": 0.2805, "step": 15860 }, { "epoch": 0.73, "grad_norm": 0.3177221756540167, "learning_rate": 3.6201212205933976e-06, "loss": 0.1812, "step": 15861 }, { "epoch": 0.73, "grad_norm": 0.9081695792132801, "learning_rate": 3.618975524367272e-06, "loss": 0.515, "step": 15862 }, { "epoch": 0.73, "grad_norm": 0.35209521859603016, "learning_rate": 3.617829969410885e-06, "loss": 0.2509, "step": 15863 }, { "epoch": 0.73, "grad_norm": 0.2877598011211002, "learning_rate": 3.6166845557495924e-06, "loss": 0.2371, "step": 15864 }, { "epoch": 0.73, "grad_norm": 0.984732311909354, "learning_rate": 3.615539283408748e-06, "loss": 0.3628, "step": 15865 }, { "epoch": 0.73, "grad_norm": 0.3686174434423414, "learning_rate": 3.6143941524137125e-06, "loss": 0.2629, "step": 15866 }, { "epoch": 0.73, "grad_norm": 0.8081670212701979, "learning_rate": 3.6132491627898305e-06, "loss": 0.2407, "step": 15867 }, { "epoch": 0.73, "grad_norm": 0.3653937531059826, "learning_rate": 3.6121043145624624e-06, "loss": 0.3286, "step": 15868 }, { "epoch": 0.73, "grad_norm": 0.3950692333407417, "learning_rate": 3.610959607756944e-06, "loss": 0.2812, "step": 15869 }, { "epoch": 0.73, "grad_norm": 0.9335088565241662, "learning_rate": 3.6098150423986267e-06, "loss": 0.5228, "step": 15870 }, { "epoch": 0.73, "grad_norm": 0.2308588434369723, "learning_rate": 3.608670618512842e-06, "loss": 0.1352, "step": 15871 }, { "epoch": 0.73, "grad_norm": 0.3041436772183461, "learning_rate": 3.607526336124929e-06, "loss": 0.2552, "step": 15872 }, { "epoch": 0.73, "grad_norm": 1.2169527996671472, "learning_rate": 3.6063821952602252e-06, "loss": 0.5171, "step": 15873 }, { "epoch": 0.73, "grad_norm": 0.5664899401649722, "learning_rate": 3.605238195944054e-06, "loss": 0.2455, "step": 15874 }, { "epoch": 0.73, "grad_norm": 0.4388004220364488, "learning_rate": 3.6040943382017467e-06, "loss": 0.3057, "step": 15875 }, { "epoch": 0.73, "grad_norm": 0.3401544019265543, "learning_rate": 3.6029506220586285e-06, "loss": 0.2917, "step": 15876 }, { "epoch": 0.73, "grad_norm": 0.14703735860297892, "learning_rate": 3.601807047540016e-06, "loss": 0.07, "step": 15877 }, { "epoch": 0.73, "grad_norm": 0.4377622607620792, "learning_rate": 3.6006636146712304e-06, "loss": 0.2907, "step": 15878 }, { "epoch": 0.73, "grad_norm": 1.0200807972955561, "learning_rate": 3.599520323477579e-06, "loss": 0.465, "step": 15879 }, { "epoch": 0.73, "grad_norm": 0.33282652038509297, "learning_rate": 3.5983771739843855e-06, "loss": 0.2438, "step": 15880 }, { "epoch": 0.73, "grad_norm": 0.31979100952939926, "learning_rate": 3.5972341662169473e-06, "loss": 0.2605, "step": 15881 }, { "epoch": 0.73, "grad_norm": 0.4663040668798485, "learning_rate": 3.596091300200578e-06, "loss": 0.2674, "step": 15882 }, { "epoch": 0.73, "grad_norm": 0.4439548926474651, "learning_rate": 3.594948575960574e-06, "loss": 0.2242, "step": 15883 }, { "epoch": 0.73, "grad_norm": 0.24975242026354447, "learning_rate": 3.593805993522229e-06, "loss": 0.2202, "step": 15884 }, { "epoch": 0.73, "grad_norm": 0.8738545750893891, "learning_rate": 3.592663552910852e-06, "loss": 0.4477, "step": 15885 }, { "epoch": 0.73, "grad_norm": 0.6513038967583082, "learning_rate": 3.5915212541517253e-06, "loss": 0.3944, "step": 15886 }, { "epoch": 0.73, "grad_norm": 0.3108793223415059, "learning_rate": 3.5903790972701445e-06, "loss": 0.2033, "step": 15887 }, { "epoch": 0.73, "grad_norm": 0.3776517319181402, "learning_rate": 3.589237082291389e-06, "loss": 0.2905, "step": 15888 }, { "epoch": 0.73, "grad_norm": 0.2707656722460186, "learning_rate": 3.588095209240746e-06, "loss": 0.1451, "step": 15889 }, { "epoch": 0.73, "grad_norm": 0.30505701099863564, "learning_rate": 3.586953478143499e-06, "loss": 0.1938, "step": 15890 }, { "epoch": 0.73, "grad_norm": 0.7868764213755611, "learning_rate": 3.585811889024917e-06, "loss": 0.3981, "step": 15891 }, { "epoch": 0.73, "grad_norm": 0.3709444659775492, "learning_rate": 3.5846704419102783e-06, "loss": 0.3149, "step": 15892 }, { "epoch": 0.73, "grad_norm": 0.3353333619229525, "learning_rate": 3.583529136824856e-06, "loss": 0.1954, "step": 15893 }, { "epoch": 0.73, "grad_norm": 1.1488152496581867, "learning_rate": 3.5823879737939114e-06, "loss": 0.5255, "step": 15894 }, { "epoch": 0.73, "grad_norm": 0.2533530937268239, "learning_rate": 3.581246952842714e-06, "loss": 0.1876, "step": 15895 }, { "epoch": 0.73, "grad_norm": 0.386237148308448, "learning_rate": 3.58010607399652e-06, "loss": 0.2882, "step": 15896 }, { "epoch": 0.73, "grad_norm": 0.5358586153510079, "learning_rate": 3.5789653372805897e-06, "loss": 0.2635, "step": 15897 }, { "epoch": 0.73, "grad_norm": 0.8312343062887351, "learning_rate": 3.5778247427201784e-06, "loss": 0.4185, "step": 15898 }, { "epoch": 0.73, "grad_norm": 0.34437869024038903, "learning_rate": 3.5766842903405407e-06, "loss": 0.2624, "step": 15899 }, { "epoch": 0.73, "grad_norm": 0.35466906186404656, "learning_rate": 3.575543980166919e-06, "loss": 0.2505, "step": 15900 }, { "epoch": 0.73, "grad_norm": 0.25668708925688655, "learning_rate": 3.5744038122245606e-06, "loss": 0.143, "step": 15901 }, { "epoch": 0.73, "grad_norm": 0.3516455352547877, "learning_rate": 3.5732637865387133e-06, "loss": 0.2559, "step": 15902 }, { "epoch": 0.73, "grad_norm": 0.4789143868316892, "learning_rate": 3.5721239031346067e-06, "loss": 0.2743, "step": 15903 }, { "epoch": 0.73, "grad_norm": 0.5427761939456859, "learning_rate": 3.5709841620374864e-06, "loss": 0.3668, "step": 15904 }, { "epoch": 0.73, "grad_norm": 0.3519099646132903, "learning_rate": 3.5698445632725766e-06, "loss": 0.248, "step": 15905 }, { "epoch": 0.73, "grad_norm": 1.2820194999232484, "learning_rate": 3.5687051068651102e-06, "loss": 0.3294, "step": 15906 }, { "epoch": 0.73, "grad_norm": 0.24558789546550086, "learning_rate": 3.5675657928403185e-06, "loss": 0.2165, "step": 15907 }, { "epoch": 0.73, "grad_norm": 0.3334280296589789, "learning_rate": 3.5664266212234157e-06, "loss": 0.2321, "step": 15908 }, { "epoch": 0.73, "grad_norm": 0.7626541481911457, "learning_rate": 3.565287592039628e-06, "loss": 0.3785, "step": 15909 }, { "epoch": 0.73, "grad_norm": 0.7874453980946099, "learning_rate": 3.564148705314171e-06, "loss": 0.3373, "step": 15910 }, { "epoch": 0.73, "grad_norm": 0.4313019464585188, "learning_rate": 3.5630099610722613e-06, "loss": 0.2782, "step": 15911 }, { "epoch": 0.73, "grad_norm": 0.32402061692861933, "learning_rate": 3.5618713593391076e-06, "loss": 0.3103, "step": 15912 }, { "epoch": 0.73, "grad_norm": 0.5398902674076549, "learning_rate": 3.5607329001399137e-06, "loss": 0.1864, "step": 15913 }, { "epoch": 0.73, "grad_norm": 0.3972068529717908, "learning_rate": 3.5595945834998868e-06, "loss": 0.2455, "step": 15914 }, { "epoch": 0.73, "grad_norm": 0.33091121159075143, "learning_rate": 3.5584564094442286e-06, "loss": 0.2495, "step": 15915 }, { "epoch": 0.73, "grad_norm": 0.5081384629121154, "learning_rate": 3.55731837799814e-06, "loss": 0.2561, "step": 15916 }, { "epoch": 0.73, "grad_norm": 0.34233883619897165, "learning_rate": 3.55618048918681e-06, "loss": 0.2556, "step": 15917 }, { "epoch": 0.73, "grad_norm": 1.0241393751380483, "learning_rate": 3.555042743035434e-06, "loss": 0.4684, "step": 15918 }, { "epoch": 0.73, "grad_norm": 0.3529665680172745, "learning_rate": 3.5539051395692024e-06, "loss": 0.2501, "step": 15919 }, { "epoch": 0.73, "grad_norm": 0.34517162234723736, "learning_rate": 3.5527676788132947e-06, "loss": 0.2686, "step": 15920 }, { "epoch": 0.73, "grad_norm": 0.36500721147743675, "learning_rate": 3.5516303607929004e-06, "loss": 0.1955, "step": 15921 }, { "epoch": 0.73, "grad_norm": 1.4273199284142934, "learning_rate": 3.5504931855331914e-06, "loss": 0.6747, "step": 15922 }, { "epoch": 0.73, "grad_norm": 0.33890116153177063, "learning_rate": 3.5493561530593477e-06, "loss": 0.2064, "step": 15923 }, { "epoch": 0.73, "grad_norm": 0.37412767685142456, "learning_rate": 3.548219263396544e-06, "loss": 0.32, "step": 15924 }, { "epoch": 0.73, "grad_norm": 0.8452482295136036, "learning_rate": 3.547082516569945e-06, "loss": 0.4046, "step": 15925 }, { "epoch": 0.73, "grad_norm": 0.37840655645816307, "learning_rate": 3.5459459126047226e-06, "loss": 0.2189, "step": 15926 }, { "epoch": 0.73, "grad_norm": 0.3555086220862457, "learning_rate": 3.544809451526031e-06, "loss": 0.2382, "step": 15927 }, { "epoch": 0.73, "grad_norm": 0.4050517731528359, "learning_rate": 3.5436731333590423e-06, "loss": 0.2529, "step": 15928 }, { "epoch": 0.73, "grad_norm": 0.404287575542151, "learning_rate": 3.5425369581289082e-06, "loss": 0.1704, "step": 15929 }, { "epoch": 0.73, "grad_norm": 0.5933353685389976, "learning_rate": 3.5414009258607794e-06, "loss": 0.3572, "step": 15930 }, { "epoch": 0.73, "grad_norm": 0.45358261516233567, "learning_rate": 3.5402650365798085e-06, "loss": 0.2862, "step": 15931 }, { "epoch": 0.73, "grad_norm": 0.4437945034926596, "learning_rate": 3.539129290311144e-06, "loss": 0.197, "step": 15932 }, { "epoch": 0.73, "grad_norm": 0.41548132994529546, "learning_rate": 3.5379936870799327e-06, "loss": 0.2435, "step": 15933 }, { "epoch": 0.73, "grad_norm": 0.5770255063890887, "learning_rate": 3.5368582269113107e-06, "loss": 0.3, "step": 15934 }, { "epoch": 0.73, "grad_norm": 0.4182096740428568, "learning_rate": 3.535722909830417e-06, "loss": 0.3041, "step": 15935 }, { "epoch": 0.73, "grad_norm": 0.8069506946065207, "learning_rate": 3.5345877358623914e-06, "loss": 0.2516, "step": 15936 }, { "epoch": 0.73, "grad_norm": 0.773040329493093, "learning_rate": 3.5334527050323596e-06, "loss": 0.3747, "step": 15937 }, { "epoch": 0.73, "grad_norm": 0.45323681551243933, "learning_rate": 3.5323178173654547e-06, "loss": 0.2993, "step": 15938 }, { "epoch": 0.73, "grad_norm": 0.29006369350188915, "learning_rate": 3.5311830728867967e-06, "loss": 0.176, "step": 15939 }, { "epoch": 0.73, "grad_norm": 0.341647903831361, "learning_rate": 3.53004847162151e-06, "loss": 0.2508, "step": 15940 }, { "epoch": 0.73, "grad_norm": 0.3910270778763466, "learning_rate": 3.5289140135947185e-06, "loss": 0.2877, "step": 15941 }, { "epoch": 0.73, "grad_norm": 0.5607472076924694, "learning_rate": 3.5277796988315303e-06, "loss": 0.2466, "step": 15942 }, { "epoch": 0.73, "grad_norm": 0.42549760041008766, "learning_rate": 3.5266455273570654e-06, "loss": 0.3073, "step": 15943 }, { "epoch": 0.73, "grad_norm": 0.4456841470349993, "learning_rate": 3.525511499196422e-06, "loss": 0.2848, "step": 15944 }, { "epoch": 0.73, "grad_norm": 0.5814306663680847, "learning_rate": 3.524377614374721e-06, "loss": 0.2171, "step": 15945 }, { "epoch": 0.73, "grad_norm": 0.262207270275035, "learning_rate": 3.523243872917055e-06, "loss": 0.1867, "step": 15946 }, { "epoch": 0.73, "grad_norm": 0.3642399082130117, "learning_rate": 3.5221102748485304e-06, "loss": 0.2797, "step": 15947 }, { "epoch": 0.73, "grad_norm": 0.3555925069991194, "learning_rate": 3.5209768201942374e-06, "loss": 0.2728, "step": 15948 }, { "epoch": 0.73, "grad_norm": 0.7754554940258331, "learning_rate": 3.5198435089792726e-06, "loss": 0.314, "step": 15949 }, { "epoch": 0.73, "grad_norm": 0.7419342943759069, "learning_rate": 3.5187103412287302e-06, "loss": 0.3106, "step": 15950 }, { "epoch": 0.73, "grad_norm": 0.3501436320336586, "learning_rate": 3.517577316967692e-06, "loss": 0.2735, "step": 15951 }, { "epoch": 0.73, "grad_norm": 0.2506862773427205, "learning_rate": 3.5164444362212435e-06, "loss": 0.1598, "step": 15952 }, { "epoch": 0.73, "grad_norm": 0.5777289646120276, "learning_rate": 3.5153116990144697e-06, "loss": 0.3273, "step": 15953 }, { "epoch": 0.73, "grad_norm": 0.6083268172832598, "learning_rate": 3.5141791053724405e-06, "loss": 0.3162, "step": 15954 }, { "epoch": 0.73, "grad_norm": 0.3705204969192318, "learning_rate": 3.513046655320239e-06, "loss": 0.2478, "step": 15955 }, { "epoch": 0.73, "grad_norm": 0.6889725031008458, "learning_rate": 3.51191434888293e-06, "loss": 0.3799, "step": 15956 }, { "epoch": 0.73, "grad_norm": 0.46082763960038525, "learning_rate": 3.510782186085583e-06, "loss": 0.3349, "step": 15957 }, { "epoch": 0.73, "grad_norm": 0.2800832968841728, "learning_rate": 3.509650166953267e-06, "loss": 0.1424, "step": 15958 }, { "epoch": 0.73, "grad_norm": 0.2673179504775796, "learning_rate": 3.5085182915110373e-06, "loss": 0.27, "step": 15959 }, { "epoch": 0.73, "grad_norm": 0.522675125364779, "learning_rate": 3.507386559783961e-06, "loss": 0.3266, "step": 15960 }, { "epoch": 0.73, "grad_norm": 0.49469698159294917, "learning_rate": 3.5062549717970796e-06, "loss": 0.3019, "step": 15961 }, { "epoch": 0.73, "grad_norm": 0.41181943279439326, "learning_rate": 3.5051235275754623e-06, "loss": 0.1775, "step": 15962 }, { "epoch": 0.73, "grad_norm": 0.39701158716181434, "learning_rate": 3.5039922271441473e-06, "loss": 0.2975, "step": 15963 }, { "epoch": 0.73, "grad_norm": 0.5182699046311277, "learning_rate": 3.5028610705281864e-06, "loss": 0.3525, "step": 15964 }, { "epoch": 0.73, "grad_norm": 0.6098331511668046, "learning_rate": 3.501730057752616e-06, "loss": 0.2617, "step": 15965 }, { "epoch": 0.73, "grad_norm": 0.37395907463308353, "learning_rate": 3.5005991888424793e-06, "loss": 0.2713, "step": 15966 }, { "epoch": 0.73, "grad_norm": 0.23326878257361724, "learning_rate": 3.4994684638228148e-06, "loss": 0.2099, "step": 15967 }, { "epoch": 0.73, "grad_norm": 0.6929078652579116, "learning_rate": 3.498337882718651e-06, "loss": 0.132, "step": 15968 }, { "epoch": 0.73, "grad_norm": 0.4225568386414838, "learning_rate": 3.49720744555502e-06, "loss": 0.2847, "step": 15969 }, { "epoch": 0.73, "grad_norm": 0.5077890939177351, "learning_rate": 3.4960771523569515e-06, "loss": 0.3473, "step": 15970 }, { "epoch": 0.73, "grad_norm": 0.4318510997081486, "learning_rate": 3.4949470031494625e-06, "loss": 0.3059, "step": 15971 }, { "epoch": 0.73, "grad_norm": 0.34764467668991683, "learning_rate": 3.493816997957582e-06, "loss": 0.2362, "step": 15972 }, { "epoch": 0.73, "grad_norm": 0.30149092289962215, "learning_rate": 3.4926871368063177e-06, "loss": 0.2097, "step": 15973 }, { "epoch": 0.73, "grad_norm": 0.8910870102133231, "learning_rate": 3.491557419720689e-06, "loss": 0.3643, "step": 15974 }, { "epoch": 0.73, "grad_norm": 0.26275790862669485, "learning_rate": 3.4904278467257057e-06, "loss": 0.2216, "step": 15975 }, { "epoch": 0.73, "grad_norm": 0.931847972943403, "learning_rate": 3.4892984178463797e-06, "loss": 0.4693, "step": 15976 }, { "epoch": 0.73, "grad_norm": 0.6117167828880529, "learning_rate": 3.4881691331077117e-06, "loss": 0.3512, "step": 15977 }, { "epoch": 0.73, "grad_norm": 0.3472049775735102, "learning_rate": 3.4870399925346955e-06, "loss": 0.2012, "step": 15978 }, { "epoch": 0.73, "grad_norm": 0.27577459534910365, "learning_rate": 3.485910996152344e-06, "loss": 0.2341, "step": 15979 }, { "epoch": 0.73, "grad_norm": 0.39745666577191496, "learning_rate": 3.484782143985641e-06, "loss": 0.2076, "step": 15980 }, { "epoch": 0.73, "grad_norm": 0.4279330621104455, "learning_rate": 3.4836534360595852e-06, "loss": 0.2152, "step": 15981 }, { "epoch": 0.73, "grad_norm": 0.4798386238157052, "learning_rate": 3.482524872399159e-06, "loss": 0.3091, "step": 15982 }, { "epoch": 0.73, "grad_norm": 0.43937690985847544, "learning_rate": 3.4813964530293497e-06, "loss": 0.3256, "step": 15983 }, { "epoch": 0.73, "grad_norm": 0.386533857834818, "learning_rate": 3.4802681779751433e-06, "loss": 0.2898, "step": 15984 }, { "epoch": 0.73, "grad_norm": 0.2588281581813228, "learning_rate": 3.4791400472615133e-06, "loss": 0.1132, "step": 15985 }, { "epoch": 0.73, "grad_norm": 0.5480010795667939, "learning_rate": 3.4780120609134404e-06, "loss": 0.3348, "step": 15986 }, { "epoch": 0.73, "grad_norm": 0.31419664065496866, "learning_rate": 3.4768842189558918e-06, "loss": 0.2513, "step": 15987 }, { "epoch": 0.73, "grad_norm": 0.6477000617616433, "learning_rate": 3.475756521413839e-06, "loss": 0.2794, "step": 15988 }, { "epoch": 0.73, "grad_norm": 0.7417110251271716, "learning_rate": 3.4746289683122525e-06, "loss": 0.4314, "step": 15989 }, { "epoch": 0.73, "grad_norm": 0.4080169469670278, "learning_rate": 3.473501559676088e-06, "loss": 0.2958, "step": 15990 }, { "epoch": 0.73, "grad_norm": 0.41224255354791584, "learning_rate": 3.4723742955303087e-06, "loss": 0.2471, "step": 15991 }, { "epoch": 0.73, "grad_norm": 0.25961155693087923, "learning_rate": 3.47124717589987e-06, "loss": 0.1513, "step": 15992 }, { "epoch": 0.73, "grad_norm": 0.3572193766172659, "learning_rate": 3.4701202008097313e-06, "loss": 0.2709, "step": 15993 }, { "epoch": 0.73, "grad_norm": 0.7497176985419289, "learning_rate": 3.4689933702848365e-06, "loss": 0.2943, "step": 15994 }, { "epoch": 0.73, "grad_norm": 0.31931789406963673, "learning_rate": 3.4678666843501276e-06, "loss": 0.2731, "step": 15995 }, { "epoch": 0.73, "grad_norm": 0.3519319825450123, "learning_rate": 3.466740143030561e-06, "loss": 0.2663, "step": 15996 }, { "epoch": 0.73, "grad_norm": 1.2058378481887444, "learning_rate": 3.4656137463510676e-06, "loss": 0.5383, "step": 15997 }, { "epoch": 0.73, "grad_norm": 0.20477227340452706, "learning_rate": 3.464487494336591e-06, "loss": 0.1546, "step": 15998 }, { "epoch": 0.73, "grad_norm": 0.3945061938130286, "learning_rate": 3.4633613870120596e-06, "loss": 0.2643, "step": 15999 }, { "epoch": 0.74, "grad_norm": 0.6923559988319632, "learning_rate": 3.462235424402407e-06, "loss": 0.3858, "step": 16000 }, { "epoch": 0.74, "grad_norm": 0.7080862989221754, "learning_rate": 3.4611096065325644e-06, "loss": 0.235, "step": 16001 }, { "epoch": 0.74, "grad_norm": 0.394599354931304, "learning_rate": 3.4599839334274488e-06, "loss": 0.3005, "step": 16002 }, { "epoch": 0.74, "grad_norm": 0.38462109574640807, "learning_rate": 3.458858405111989e-06, "loss": 0.2802, "step": 16003 }, { "epoch": 0.74, "grad_norm": 0.3013967479084854, "learning_rate": 3.4577330216110925e-06, "loss": 0.138, "step": 16004 }, { "epoch": 0.74, "grad_norm": 0.33797329034160273, "learning_rate": 3.4566077829496892e-06, "loss": 0.2524, "step": 16005 }, { "epoch": 0.74, "grad_norm": 0.4865594372464824, "learning_rate": 3.4554826891526828e-06, "loss": 0.3378, "step": 16006 }, { "epoch": 0.74, "grad_norm": 0.49078257441504675, "learning_rate": 3.454357740244978e-06, "loss": 0.2894, "step": 16007 }, { "epoch": 0.74, "grad_norm": 0.33117981759534654, "learning_rate": 3.453232936251485e-06, "loss": 0.2449, "step": 16008 }, { "epoch": 0.74, "grad_norm": 1.4009861010774025, "learning_rate": 3.452108277197104e-06, "loss": 0.542, "step": 16009 }, { "epoch": 0.74, "grad_norm": 0.30514851762621314, "learning_rate": 3.450983763106739e-06, "loss": 0.2599, "step": 16010 }, { "epoch": 0.74, "grad_norm": 0.3378193693971455, "learning_rate": 3.449859394005277e-06, "loss": 0.2239, "step": 16011 }, { "epoch": 0.74, "grad_norm": 0.4679353510900042, "learning_rate": 3.4487351699176155e-06, "loss": 0.2659, "step": 16012 }, { "epoch": 0.74, "grad_norm": 1.2158495681457318, "learning_rate": 3.4476110908686467e-06, "loss": 0.7061, "step": 16013 }, { "epoch": 0.74, "grad_norm": 0.33809495012038887, "learning_rate": 3.446487156883249e-06, "loss": 0.1942, "step": 16014 }, { "epoch": 0.74, "grad_norm": 0.3690940110294595, "learning_rate": 3.4453633679863142e-06, "loss": 0.2756, "step": 16015 }, { "epoch": 0.74, "grad_norm": 0.639223639505482, "learning_rate": 3.4442397242027116e-06, "loss": 0.3782, "step": 16016 }, { "epoch": 0.74, "grad_norm": 0.3230169504719691, "learning_rate": 3.443116225557325e-06, "loss": 0.183, "step": 16017 }, { "epoch": 0.74, "grad_norm": 0.2992873209342639, "learning_rate": 3.4419928720750274e-06, "loss": 0.1996, "step": 16018 }, { "epoch": 0.74, "grad_norm": 0.5662242530883589, "learning_rate": 3.4408696637806837e-06, "loss": 0.3602, "step": 16019 }, { "epoch": 0.74, "grad_norm": 0.3210874080940691, "learning_rate": 3.4397466006991676e-06, "loss": 0.1839, "step": 16020 }, { "epoch": 0.74, "grad_norm": 0.9924887460510491, "learning_rate": 3.438623682855332e-06, "loss": 0.3814, "step": 16021 }, { "epoch": 0.74, "grad_norm": 0.4994724426068747, "learning_rate": 3.437500910274052e-06, "loss": 0.341, "step": 16022 }, { "epoch": 0.74, "grad_norm": 0.2861738993342746, "learning_rate": 3.436378282980175e-06, "loss": 0.2485, "step": 16023 }, { "epoch": 0.74, "grad_norm": 0.35692002166363995, "learning_rate": 3.435255800998555e-06, "loss": 0.1218, "step": 16024 }, { "epoch": 0.74, "grad_norm": 1.3224771398885873, "learning_rate": 3.434133464354044e-06, "loss": 0.7636, "step": 16025 }, { "epoch": 0.74, "grad_norm": 0.33826544814868903, "learning_rate": 3.433011273071488e-06, "loss": 0.2387, "step": 16026 }, { "epoch": 0.74, "grad_norm": 0.399478138512981, "learning_rate": 3.4318892271757387e-06, "loss": 0.24, "step": 16027 }, { "epoch": 0.74, "grad_norm": 1.1847104330800728, "learning_rate": 3.4307673266916275e-06, "loss": 0.3578, "step": 16028 }, { "epoch": 0.74, "grad_norm": 0.3506241009764794, "learning_rate": 3.4296455716439957e-06, "loss": 0.231, "step": 16029 }, { "epoch": 0.74, "grad_norm": 0.2426353904403282, "learning_rate": 3.4285239620576814e-06, "loss": 0.142, "step": 16030 }, { "epoch": 0.74, "grad_norm": 0.5237174179937928, "learning_rate": 3.4274024979575107e-06, "loss": 0.3844, "step": 16031 }, { "epoch": 0.74, "grad_norm": 0.36734531906511436, "learning_rate": 3.426281179368317e-06, "loss": 0.2765, "step": 16032 }, { "epoch": 0.74, "grad_norm": 0.8252836836871129, "learning_rate": 3.425160006314918e-06, "loss": 0.2886, "step": 16033 }, { "epoch": 0.74, "grad_norm": 0.405705796355882, "learning_rate": 3.4240389788221407e-06, "loss": 0.299, "step": 16034 }, { "epoch": 0.74, "grad_norm": 0.4145311487230021, "learning_rate": 3.4229180969148048e-06, "loss": 0.2374, "step": 16035 }, { "epoch": 0.74, "grad_norm": 0.2683601079381109, "learning_rate": 3.42179736061772e-06, "loss": 0.1894, "step": 16036 }, { "epoch": 0.74, "grad_norm": 0.546523079567316, "learning_rate": 3.420676769955705e-06, "loss": 0.3027, "step": 16037 }, { "epoch": 0.74, "grad_norm": 0.44651271253850955, "learning_rate": 3.419556324953558e-06, "loss": 0.286, "step": 16038 }, { "epoch": 0.74, "grad_norm": 0.3930930404730781, "learning_rate": 3.418436025636099e-06, "loss": 0.2944, "step": 16039 }, { "epoch": 0.74, "grad_norm": 0.9511791100046201, "learning_rate": 3.4173158720281197e-06, "loss": 0.2714, "step": 16040 }, { "epoch": 0.74, "grad_norm": 0.43363196530128345, "learning_rate": 3.416195864154426e-06, "loss": 0.2828, "step": 16041 }, { "epoch": 0.74, "grad_norm": 0.24930233478609265, "learning_rate": 3.4150760020398056e-06, "loss": 0.2328, "step": 16042 }, { "epoch": 0.74, "grad_norm": 0.9286879525441522, "learning_rate": 3.4139562857090568e-06, "loss": 0.5221, "step": 16043 }, { "epoch": 0.74, "grad_norm": 0.28858501914574175, "learning_rate": 3.412836715186971e-06, "loss": 0.2167, "step": 16044 }, { "epoch": 0.74, "grad_norm": 0.7911303205032798, "learning_rate": 3.41171729049833e-06, "loss": 0.4124, "step": 16045 }, { "epoch": 0.74, "grad_norm": 0.4298460707940331, "learning_rate": 3.4105980116679195e-06, "loss": 0.2596, "step": 16046 }, { "epoch": 0.74, "grad_norm": 0.3880275859438745, "learning_rate": 3.409478878720516e-06, "loss": 0.2662, "step": 16047 }, { "epoch": 0.74, "grad_norm": 0.5288520393918172, "learning_rate": 3.408359891680897e-06, "loss": 0.2392, "step": 16048 }, { "epoch": 0.74, "grad_norm": 0.9013495980025912, "learning_rate": 3.407241050573841e-06, "loss": 0.5594, "step": 16049 }, { "epoch": 0.74, "grad_norm": 0.32491269761548563, "learning_rate": 3.406122355424111e-06, "loss": 0.2146, "step": 16050 }, { "epoch": 0.74, "grad_norm": 0.41222003668588764, "learning_rate": 3.405003806256476e-06, "loss": 0.3178, "step": 16051 }, { "epoch": 0.74, "grad_norm": 0.5623822527495987, "learning_rate": 3.4038854030957035e-06, "loss": 0.251, "step": 16052 }, { "epoch": 0.74, "grad_norm": 0.3479438946069456, "learning_rate": 3.402767145966548e-06, "loss": 0.1561, "step": 16053 }, { "epoch": 0.74, "grad_norm": 0.3238819764586656, "learning_rate": 3.4016490348937735e-06, "loss": 0.2795, "step": 16054 }, { "epoch": 0.74, "grad_norm": 0.8053159834172191, "learning_rate": 3.400531069902122e-06, "loss": 0.4176, "step": 16055 }, { "epoch": 0.74, "grad_norm": 0.4021190846936983, "learning_rate": 3.399413251016359e-06, "loss": 0.1897, "step": 16056 }, { "epoch": 0.74, "grad_norm": 0.2786882476644866, "learning_rate": 3.3982955782612216e-06, "loss": 0.2098, "step": 16057 }, { "epoch": 0.74, "grad_norm": 0.3766644020809064, "learning_rate": 3.3971780516614607e-06, "loss": 0.2802, "step": 16058 }, { "epoch": 0.74, "grad_norm": 0.42466331913823124, "learning_rate": 3.39606067124181e-06, "loss": 0.1671, "step": 16059 }, { "epoch": 0.74, "grad_norm": 0.6052575643851409, "learning_rate": 3.394943437027011e-06, "loss": 0.3635, "step": 16060 }, { "epoch": 0.74, "grad_norm": 0.8312299966613697, "learning_rate": 3.393826349041802e-06, "loss": 0.4673, "step": 16061 }, { "epoch": 0.74, "grad_norm": 0.32446423251763346, "learning_rate": 3.3927094073109077e-06, "loss": 0.2673, "step": 16062 }, { "epoch": 0.74, "grad_norm": 0.43832093772407266, "learning_rate": 3.3915926118590615e-06, "loss": 0.2352, "step": 16063 }, { "epoch": 0.74, "grad_norm": 0.26441527666778475, "learning_rate": 3.3904759627109828e-06, "loss": 0.1579, "step": 16064 }, { "epoch": 0.74, "grad_norm": 0.42712135772762283, "learning_rate": 3.389359459891396e-06, "loss": 0.2717, "step": 16065 }, { "epoch": 0.74, "grad_norm": 0.5522817385320041, "learning_rate": 3.388243103425022e-06, "loss": 0.235, "step": 16066 }, { "epoch": 0.74, "grad_norm": 0.6604081093102742, "learning_rate": 3.3871268933365696e-06, "loss": 0.4264, "step": 16067 }, { "epoch": 0.74, "grad_norm": 0.41344009539807464, "learning_rate": 3.3860108296507556e-06, "loss": 0.2831, "step": 16068 }, { "epoch": 0.74, "grad_norm": 0.31462677688752494, "learning_rate": 3.3848949123922857e-06, "loss": 0.1608, "step": 16069 }, { "epoch": 0.74, "grad_norm": 0.32579844595315455, "learning_rate": 3.3837791415858712e-06, "loss": 0.2454, "step": 16070 }, { "epoch": 0.74, "grad_norm": 0.89233648424047, "learning_rate": 3.3826635172562096e-06, "loss": 0.4376, "step": 16071 }, { "epoch": 0.74, "grad_norm": 0.42476969851014046, "learning_rate": 3.3815480394279922e-06, "loss": 0.2245, "step": 16072 }, { "epoch": 0.74, "grad_norm": 0.4633335775134509, "learning_rate": 3.3804327081259304e-06, "loss": 0.3422, "step": 16073 }, { "epoch": 0.74, "grad_norm": 0.40415755581022905, "learning_rate": 3.3793175233747034e-06, "loss": 0.2856, "step": 16074 }, { "epoch": 0.74, "grad_norm": 0.41891745721233753, "learning_rate": 3.37820248519901e-06, "loss": 0.3054, "step": 16075 }, { "epoch": 0.74, "grad_norm": 0.1750573597262636, "learning_rate": 3.377087593623527e-06, "loss": 0.0703, "step": 16076 }, { "epoch": 0.74, "grad_norm": 0.6656876433274187, "learning_rate": 3.375972848672943e-06, "loss": 0.2693, "step": 16077 }, { "epoch": 0.74, "grad_norm": 0.29691160905151925, "learning_rate": 3.3748582503719373e-06, "loss": 0.2873, "step": 16078 }, { "epoch": 0.74, "grad_norm": 0.6789729851827108, "learning_rate": 3.3737437987451826e-06, "loss": 0.2966, "step": 16079 }, { "epoch": 0.74, "grad_norm": 0.6291131170262131, "learning_rate": 3.3726294938173566e-06, "loss": 0.3915, "step": 16080 }, { "epoch": 0.74, "grad_norm": 0.3480741757014308, "learning_rate": 3.3715153356131223e-06, "loss": 0.2457, "step": 16081 }, { "epoch": 0.74, "grad_norm": 0.2662859751938957, "learning_rate": 3.370401324157151e-06, "loss": 0.1932, "step": 16082 }, { "epoch": 0.74, "grad_norm": 0.6777872034971865, "learning_rate": 3.3692874594741064e-06, "loss": 0.2889, "step": 16083 }, { "epoch": 0.74, "grad_norm": 0.4325174015223367, "learning_rate": 3.3681737415886453e-06, "loss": 0.3149, "step": 16084 }, { "epoch": 0.74, "grad_norm": 0.7309351079965748, "learning_rate": 3.3670601705254235e-06, "loss": 0.3985, "step": 16085 }, { "epoch": 0.74, "grad_norm": 0.2790770173588824, "learning_rate": 3.3659467463090978e-06, "loss": 0.2104, "step": 16086 }, { "epoch": 0.74, "grad_norm": 0.6304181808535578, "learning_rate": 3.3648334689643214e-06, "loss": 0.3596, "step": 16087 }, { "epoch": 0.74, "grad_norm": 0.41725011910956933, "learning_rate": 3.363720338515736e-06, "loss": 0.1934, "step": 16088 }, { "epoch": 0.74, "grad_norm": 0.639307048280286, "learning_rate": 3.362607354987979e-06, "loss": 0.2318, "step": 16089 }, { "epoch": 0.74, "grad_norm": 0.26523923206571404, "learning_rate": 3.361494518405705e-06, "loss": 0.2558, "step": 16090 }, { "epoch": 0.74, "grad_norm": 0.8076612069886973, "learning_rate": 3.360381828793541e-06, "loss": 0.4288, "step": 16091 }, { "epoch": 0.74, "grad_norm": 0.7366596725361553, "learning_rate": 3.359269286176127e-06, "loss": 0.2168, "step": 16092 }, { "epoch": 0.74, "grad_norm": 0.44630013796267093, "learning_rate": 3.358156890578088e-06, "loss": 0.2792, "step": 16093 }, { "epoch": 0.74, "grad_norm": 0.2930026884835982, "learning_rate": 3.3570446420240534e-06, "loss": 0.2464, "step": 16094 }, { "epoch": 0.74, "grad_norm": 0.3258605922665766, "learning_rate": 3.3559325405386513e-06, "loss": 0.1281, "step": 16095 }, { "epoch": 0.74, "grad_norm": 0.41513553547822357, "learning_rate": 3.3548205861464956e-06, "loss": 0.322, "step": 16096 }, { "epoch": 0.74, "grad_norm": 1.3430851241228432, "learning_rate": 3.35370877887221e-06, "loss": 0.7711, "step": 16097 }, { "epoch": 0.74, "grad_norm": 0.3756029673686868, "learning_rate": 3.352597118740404e-06, "loss": 0.2844, "step": 16098 }, { "epoch": 0.74, "grad_norm": 0.3620577419249336, "learning_rate": 3.3514856057756905e-06, "loss": 0.1953, "step": 16099 }, { "epoch": 0.74, "grad_norm": 0.33957096426189765, "learning_rate": 3.3503742400026816e-06, "loss": 0.1712, "step": 16100 }, { "epoch": 0.74, "grad_norm": 0.527868012226301, "learning_rate": 3.349263021445974e-06, "loss": 0.3216, "step": 16101 }, { "epoch": 0.74, "grad_norm": 0.2945789989140119, "learning_rate": 3.348151950130174e-06, "loss": 0.2104, "step": 16102 }, { "epoch": 0.74, "grad_norm": 0.9830299557255137, "learning_rate": 3.347041026079878e-06, "loss": 0.4528, "step": 16103 }, { "epoch": 0.74, "grad_norm": 0.9078949383299475, "learning_rate": 3.345930249319684e-06, "loss": 0.3626, "step": 16104 }, { "epoch": 0.74, "grad_norm": 0.33762431493824857, "learning_rate": 3.344819619874179e-06, "loss": 0.1985, "step": 16105 }, { "epoch": 0.74, "grad_norm": 0.3758323617145274, "learning_rate": 3.3437091377679563e-06, "loss": 0.3184, "step": 16106 }, { "epoch": 0.74, "grad_norm": 0.7922945877591712, "learning_rate": 3.342598803025595e-06, "loss": 0.4255, "step": 16107 }, { "epoch": 0.74, "grad_norm": 0.19907840941663313, "learning_rate": 3.3414886156716785e-06, "loss": 0.1386, "step": 16108 }, { "epoch": 0.74, "grad_norm": 0.3598736035420362, "learning_rate": 3.3403785757307905e-06, "loss": 0.3183, "step": 16109 }, { "epoch": 0.74, "grad_norm": 1.474513499818245, "learning_rate": 3.339268683227499e-06, "loss": 0.6325, "step": 16110 }, { "epoch": 0.74, "grad_norm": 0.3508818339312539, "learning_rate": 3.338158938186379e-06, "loss": 0.2598, "step": 16111 }, { "epoch": 0.74, "grad_norm": 0.7879045170250311, "learning_rate": 3.3370493406320024e-06, "loss": 0.2743, "step": 16112 }, { "epoch": 0.74, "grad_norm": 0.36733307798601805, "learning_rate": 3.3359398905889295e-06, "loss": 0.3142, "step": 16113 }, { "epoch": 0.74, "grad_norm": 0.27124368251898295, "learning_rate": 3.3348305880817266e-06, "loss": 0.199, "step": 16114 }, { "epoch": 0.74, "grad_norm": 0.23460667931874093, "learning_rate": 3.3337214331349443e-06, "loss": 0.1194, "step": 16115 }, { "epoch": 0.74, "grad_norm": 0.8757412830881607, "learning_rate": 3.3326124257731506e-06, "loss": 0.3882, "step": 16116 }, { "epoch": 0.74, "grad_norm": 0.4075015078449814, "learning_rate": 3.3315035660208914e-06, "loss": 0.2338, "step": 16117 }, { "epoch": 0.74, "grad_norm": 0.3297661244973122, "learning_rate": 3.330394853902714e-06, "loss": 0.2377, "step": 16118 }, { "epoch": 0.74, "grad_norm": 0.5905474520742258, "learning_rate": 3.3292862894431653e-06, "loss": 0.3733, "step": 16119 }, { "epoch": 0.74, "grad_norm": 0.3844957154891131, "learning_rate": 3.328177872666789e-06, "loss": 0.2696, "step": 16120 }, { "epoch": 0.74, "grad_norm": 0.21910926818725007, "learning_rate": 3.3270696035981275e-06, "loss": 0.1792, "step": 16121 }, { "epoch": 0.74, "grad_norm": 0.8632624732793321, "learning_rate": 3.32596148226171e-06, "loss": 0.4857, "step": 16122 }, { "epoch": 0.74, "grad_norm": 0.39270958489206576, "learning_rate": 3.3248535086820776e-06, "loss": 0.2763, "step": 16123 }, { "epoch": 0.74, "grad_norm": 0.7315780509249727, "learning_rate": 3.32374568288375e-06, "loss": 0.338, "step": 16124 }, { "epoch": 0.74, "grad_norm": 0.38121667954284166, "learning_rate": 3.3226380048912586e-06, "loss": 0.2374, "step": 16125 }, { "epoch": 0.74, "grad_norm": 0.39145930572575766, "learning_rate": 3.32153047472913e-06, "loss": 0.2768, "step": 16126 }, { "epoch": 0.74, "grad_norm": 0.3538415504293478, "learning_rate": 3.320423092421876e-06, "loss": 0.1988, "step": 16127 }, { "epoch": 0.74, "grad_norm": 1.044541027306054, "learning_rate": 3.3193158579940164e-06, "loss": 0.3959, "step": 16128 }, { "epoch": 0.74, "grad_norm": 0.35601210923626375, "learning_rate": 3.3182087714700694e-06, "loss": 0.2668, "step": 16129 }, { "epoch": 0.74, "grad_norm": 0.3682407844886599, "learning_rate": 3.3171018328745364e-06, "loss": 0.2926, "step": 16130 }, { "epoch": 0.74, "grad_norm": 2.297854303456897, "learning_rate": 3.315995042231931e-06, "loss": 0.2099, "step": 16131 }, { "epoch": 0.74, "grad_norm": 0.26947889323034246, "learning_rate": 3.3148883995667457e-06, "loss": 0.2147, "step": 16132 }, { "epoch": 0.74, "grad_norm": 0.4334930096280682, "learning_rate": 3.3137819049034957e-06, "loss": 0.2516, "step": 16133 }, { "epoch": 0.74, "grad_norm": 0.3368574989949426, "learning_rate": 3.312675558266667e-06, "loss": 0.2664, "step": 16134 }, { "epoch": 0.74, "grad_norm": 0.32667901451206866, "learning_rate": 3.3115693596807584e-06, "loss": 0.2501, "step": 16135 }, { "epoch": 0.74, "grad_norm": 0.8003012011282131, "learning_rate": 3.310463309170259e-06, "loss": 0.4116, "step": 16136 }, { "epoch": 0.74, "grad_norm": 0.3861248574902254, "learning_rate": 3.309357406759647e-06, "loss": 0.329, "step": 16137 }, { "epoch": 0.74, "grad_norm": 0.30955351571782025, "learning_rate": 3.30825165247342e-06, "loss": 0.1786, "step": 16138 }, { "epoch": 0.74, "grad_norm": 0.5188374830472456, "learning_rate": 3.3071460463360485e-06, "loss": 0.2468, "step": 16139 }, { "epoch": 0.74, "grad_norm": 0.7149262720806215, "learning_rate": 3.306040588372017e-06, "loss": 0.4317, "step": 16140 }, { "epoch": 0.74, "grad_norm": 0.358192855929158, "learning_rate": 3.304935278605791e-06, "loss": 0.1965, "step": 16141 }, { "epoch": 0.74, "grad_norm": 0.29734969951174195, "learning_rate": 3.303830117061846e-06, "loss": 0.2347, "step": 16142 }, { "epoch": 0.74, "grad_norm": 1.2724837161518323, "learning_rate": 3.3027251037646504e-06, "loss": 0.5205, "step": 16143 }, { "epoch": 0.74, "grad_norm": 0.317019692791806, "learning_rate": 3.301620238738664e-06, "loss": 0.1878, "step": 16144 }, { "epoch": 0.74, "grad_norm": 0.2936610601476582, "learning_rate": 3.3005155220083485e-06, "loss": 0.2479, "step": 16145 }, { "epoch": 0.74, "grad_norm": 0.9829195222449634, "learning_rate": 3.2994109535981666e-06, "loss": 0.5056, "step": 16146 }, { "epoch": 0.74, "grad_norm": 0.3818905881398835, "learning_rate": 3.2983065335325636e-06, "loss": 0.2155, "step": 16147 }, { "epoch": 0.74, "grad_norm": 0.5160790801911431, "learning_rate": 3.297202261835999e-06, "loss": 0.2506, "step": 16148 }, { "epoch": 0.74, "grad_norm": 0.4499971818936267, "learning_rate": 3.2960981385329094e-06, "loss": 0.3013, "step": 16149 }, { "epoch": 0.74, "grad_norm": 0.359607336901922, "learning_rate": 3.2949941636477523e-06, "loss": 0.2383, "step": 16150 }, { "epoch": 0.74, "grad_norm": 0.9870576953504104, "learning_rate": 3.293890337204959e-06, "loss": 0.3757, "step": 16151 }, { "epoch": 0.74, "grad_norm": 0.9284451036292902, "learning_rate": 3.2927866592289725e-06, "loss": 0.3442, "step": 16152 }, { "epoch": 0.74, "grad_norm": 0.30458095187051076, "learning_rate": 3.2916831297442255e-06, "loss": 0.2601, "step": 16153 }, { "epoch": 0.74, "grad_norm": 0.22536001354157525, "learning_rate": 3.2905797487751424e-06, "loss": 0.1459, "step": 16154 }, { "epoch": 0.74, "grad_norm": 1.1420696542265183, "learning_rate": 3.289476516346163e-06, "loss": 0.4822, "step": 16155 }, { "epoch": 0.74, "grad_norm": 0.4525238884397799, "learning_rate": 3.288373432481703e-06, "loss": 0.2864, "step": 16156 }, { "epoch": 0.74, "grad_norm": 0.30112622202575445, "learning_rate": 3.2872704972061884e-06, "loss": 0.2486, "step": 16157 }, { "epoch": 0.74, "grad_norm": 0.7537088589824122, "learning_rate": 3.2861677105440335e-06, "loss": 0.3805, "step": 16158 }, { "epoch": 0.74, "grad_norm": 0.3980597895663537, "learning_rate": 3.2850650725196543e-06, "loss": 0.2373, "step": 16159 }, { "epoch": 0.74, "grad_norm": 0.2313688817433475, "learning_rate": 3.2839625831574653e-06, "loss": 0.135, "step": 16160 }, { "epoch": 0.74, "grad_norm": 0.3854610936153916, "learning_rate": 3.2828602424818677e-06, "loss": 0.2898, "step": 16161 }, { "epoch": 0.74, "grad_norm": 0.4087413720311877, "learning_rate": 3.2817580505172717e-06, "loss": 0.2605, "step": 16162 }, { "epoch": 0.74, "grad_norm": 0.5659023460892615, "learning_rate": 3.28065600728808e-06, "loss": 0.3543, "step": 16163 }, { "epoch": 0.74, "grad_norm": 0.7191671636883915, "learning_rate": 3.2795541128186848e-06, "loss": 0.3054, "step": 16164 }, { "epoch": 0.74, "grad_norm": 0.2765105041215185, "learning_rate": 3.278452367133488e-06, "loss": 0.2576, "step": 16165 }, { "epoch": 0.74, "grad_norm": 0.2611474019458847, "learning_rate": 3.277350770256873e-06, "loss": 0.1902, "step": 16166 }, { "epoch": 0.74, "grad_norm": 1.6018837010269276, "learning_rate": 3.2762493222132342e-06, "loss": 0.2351, "step": 16167 }, { "epoch": 0.74, "grad_norm": 0.41364887796998884, "learning_rate": 3.275148023026954e-06, "loss": 0.2613, "step": 16168 }, { "epoch": 0.74, "grad_norm": 0.41165226246115383, "learning_rate": 3.2740468727224184e-06, "loss": 0.3007, "step": 16169 }, { "epoch": 0.74, "grad_norm": 0.5959572703682788, "learning_rate": 3.272945871323999e-06, "loss": 0.298, "step": 16170 }, { "epoch": 0.74, "grad_norm": 0.3267540750924954, "learning_rate": 3.271845018856075e-06, "loss": 0.2509, "step": 16171 }, { "epoch": 0.74, "grad_norm": 0.5570804574953516, "learning_rate": 3.2707443153430206e-06, "loss": 0.2465, "step": 16172 }, { "epoch": 0.74, "grad_norm": 0.2727921489608086, "learning_rate": 3.269643760809198e-06, "loss": 0.1916, "step": 16173 }, { "epoch": 0.74, "grad_norm": 0.4463482475968113, "learning_rate": 3.268543355278979e-06, "loss": 0.2669, "step": 16174 }, { "epoch": 0.74, "grad_norm": 0.47338766101770396, "learning_rate": 3.267443098776719e-06, "loss": 0.307, "step": 16175 }, { "epoch": 0.74, "grad_norm": 0.4711178204213357, "learning_rate": 3.2663429913267795e-06, "loss": 0.3602, "step": 16176 }, { "epoch": 0.74, "grad_norm": 0.33058592293801603, "learning_rate": 3.26524303295352e-06, "loss": 0.19, "step": 16177 }, { "epoch": 0.74, "grad_norm": 0.3022331547532038, "learning_rate": 3.2641432236812855e-06, "loss": 0.2438, "step": 16178 }, { "epoch": 0.74, "grad_norm": 0.5042215389006428, "learning_rate": 3.2630435635344283e-06, "loss": 0.257, "step": 16179 }, { "epoch": 0.74, "grad_norm": 0.39676000572111725, "learning_rate": 3.2619440525372927e-06, "loss": 0.1999, "step": 16180 }, { "epoch": 0.74, "grad_norm": 0.3088026956957434, "learning_rate": 3.2608446907142244e-06, "loss": 0.2939, "step": 16181 }, { "epoch": 0.74, "grad_norm": 0.8325122734946345, "learning_rate": 3.25974547808956e-06, "loss": 0.5128, "step": 16182 }, { "epoch": 0.74, "grad_norm": 0.3482061112546673, "learning_rate": 3.258646414687632e-06, "loss": 0.1943, "step": 16183 }, { "epoch": 0.74, "grad_norm": 0.2761731327942557, "learning_rate": 3.257547500532774e-06, "loss": 0.1905, "step": 16184 }, { "epoch": 0.74, "grad_norm": 0.35580695012609526, "learning_rate": 3.2564487356493157e-06, "loss": 0.2877, "step": 16185 }, { "epoch": 0.74, "grad_norm": 0.37528078305626134, "learning_rate": 3.2553501200615858e-06, "loss": 0.2211, "step": 16186 }, { "epoch": 0.74, "grad_norm": 0.5215771798823283, "learning_rate": 3.2542516537939005e-06, "loss": 0.3201, "step": 16187 }, { "epoch": 0.74, "grad_norm": 1.060089521871366, "learning_rate": 3.2531533368705828e-06, "loss": 0.6665, "step": 16188 }, { "epoch": 0.74, "grad_norm": 0.31100061257219114, "learning_rate": 3.252055169315951e-06, "loss": 0.2582, "step": 16189 }, { "epoch": 0.74, "grad_norm": 0.39891706900082424, "learning_rate": 3.250957151154309e-06, "loss": 0.2199, "step": 16190 }, { "epoch": 0.74, "grad_norm": 0.3428219125410267, "learning_rate": 3.249859282409976e-06, "loss": 0.2199, "step": 16191 }, { "epoch": 0.74, "grad_norm": 0.5703773592059471, "learning_rate": 3.248761563107249e-06, "loss": 0.3302, "step": 16192 }, { "epoch": 0.74, "grad_norm": 0.2669964828374416, "learning_rate": 3.2476639932704335e-06, "loss": 0.2284, "step": 16193 }, { "epoch": 0.74, "grad_norm": 1.230687750757218, "learning_rate": 3.246566572923833e-06, "loss": 0.8005, "step": 16194 }, { "epoch": 0.74, "grad_norm": 0.8898841504817586, "learning_rate": 3.245469302091735e-06, "loss": 0.455, "step": 16195 }, { "epoch": 0.74, "grad_norm": 0.26300311296790924, "learning_rate": 3.244372180798441e-06, "loss": 0.1681, "step": 16196 }, { "epoch": 0.74, "grad_norm": 0.3113994946352287, "learning_rate": 3.2432752090682286e-06, "loss": 0.2479, "step": 16197 }, { "epoch": 0.74, "grad_norm": 0.7435028702934103, "learning_rate": 3.2421783869253985e-06, "loss": 0.3297, "step": 16198 }, { "epoch": 0.74, "grad_norm": 0.3689499863966752, "learning_rate": 3.2410817143942207e-06, "loss": 0.2076, "step": 16199 }, { "epoch": 0.74, "grad_norm": 1.349573231309148, "learning_rate": 3.2399851914989842e-06, "loss": 0.7598, "step": 16200 }, { "epoch": 0.74, "grad_norm": 0.31551649890808037, "learning_rate": 3.2388888182639566e-06, "loss": 0.2445, "step": 16201 }, { "epoch": 0.74, "grad_norm": 0.472714425087857, "learning_rate": 3.2377925947134137e-06, "loss": 0.3129, "step": 16202 }, { "epoch": 0.74, "grad_norm": 0.8052972015991857, "learning_rate": 3.236696520871628e-06, "loss": 0.2973, "step": 16203 }, { "epoch": 0.74, "grad_norm": 0.3036751973221981, "learning_rate": 3.23560059676286e-06, "loss": 0.2193, "step": 16204 }, { "epoch": 0.74, "grad_norm": 0.27859658895535727, "learning_rate": 3.2345048224113764e-06, "loss": 0.2303, "step": 16205 }, { "epoch": 0.74, "grad_norm": 0.9480435756547749, "learning_rate": 3.233409197841437e-06, "loss": 0.5324, "step": 16206 }, { "epoch": 0.74, "grad_norm": 0.6439513062890482, "learning_rate": 3.2323137230772937e-06, "loss": 0.2633, "step": 16207 }, { "epoch": 0.74, "grad_norm": 0.4078880036330967, "learning_rate": 3.231218398143204e-06, "loss": 0.2929, "step": 16208 }, { "epoch": 0.74, "grad_norm": 0.4691604734785131, "learning_rate": 3.2301232230634104e-06, "loss": 0.27, "step": 16209 }, { "epoch": 0.74, "grad_norm": 0.3091572642571224, "learning_rate": 3.2290281978621695e-06, "loss": 0.166, "step": 16210 }, { "epoch": 0.74, "grad_norm": 0.40168543540251894, "learning_rate": 3.227933322563718e-06, "loss": 0.2813, "step": 16211 }, { "epoch": 0.74, "grad_norm": 0.49317599406421814, "learning_rate": 3.226838597192292e-06, "loss": 0.3843, "step": 16212 }, { "epoch": 0.74, "grad_norm": 0.4895074202392746, "learning_rate": 3.225744021772136e-06, "loss": 0.2281, "step": 16213 }, { "epoch": 0.74, "grad_norm": 0.386088206921874, "learning_rate": 3.2246495963274713e-06, "loss": 0.2599, "step": 16214 }, { "epoch": 0.74, "grad_norm": 0.7846604284698759, "learning_rate": 3.2235553208825398e-06, "loss": 0.3185, "step": 16215 }, { "epoch": 0.74, "grad_norm": 0.2870618483940645, "learning_rate": 3.22246119546156e-06, "loss": 0.1692, "step": 16216 }, { "epoch": 0.74, "grad_norm": 0.25390974017381585, "learning_rate": 3.22136722008876e-06, "loss": 0.2158, "step": 16217 }, { "epoch": 0.75, "grad_norm": 1.3354459556541762, "learning_rate": 3.2202733947883536e-06, "loss": 0.7886, "step": 16218 }, { "epoch": 0.75, "grad_norm": 0.7585343414335117, "learning_rate": 3.2191797195845597e-06, "loss": 0.2318, "step": 16219 }, { "epoch": 0.75, "grad_norm": 0.31746914090406975, "learning_rate": 3.218086194501595e-06, "loss": 0.2543, "step": 16220 }, { "epoch": 0.75, "grad_norm": 0.494010222246306, "learning_rate": 3.2169928195636612e-06, "loss": 0.3474, "step": 16221 }, { "epoch": 0.75, "grad_norm": 0.6252414722084878, "learning_rate": 3.21589959479497e-06, "loss": 0.1265, "step": 16222 }, { "epoch": 0.75, "grad_norm": 0.3141381262047527, "learning_rate": 3.2148065202197255e-06, "loss": 0.2307, "step": 16223 }, { "epoch": 0.75, "grad_norm": 0.52698450431804, "learning_rate": 3.2137135958621225e-06, "loss": 0.4001, "step": 16224 }, { "epoch": 0.75, "grad_norm": 0.31190476523618244, "learning_rate": 3.212620821746362e-06, "loss": 0.2172, "step": 16225 }, { "epoch": 0.75, "grad_norm": 0.3681224993063705, "learning_rate": 3.2115281978966316e-06, "loss": 0.2066, "step": 16226 }, { "epoch": 0.75, "grad_norm": 0.7846341275647539, "learning_rate": 3.2104357243371252e-06, "loss": 0.4236, "step": 16227 }, { "epoch": 0.75, "grad_norm": 0.3708710206772514, "learning_rate": 3.20934340109203e-06, "loss": 0.2855, "step": 16228 }, { "epoch": 0.75, "grad_norm": 0.23886403852464788, "learning_rate": 3.2082512281855247e-06, "loss": 0.155, "step": 16229 }, { "epoch": 0.75, "grad_norm": 0.4364437519522074, "learning_rate": 3.2071592056417944e-06, "loss": 0.3, "step": 16230 }, { "epoch": 0.75, "grad_norm": 0.7605167157607732, "learning_rate": 3.2060673334850056e-06, "loss": 0.3923, "step": 16231 }, { "epoch": 0.75, "grad_norm": 0.2771228307827486, "learning_rate": 3.2049756117393437e-06, "loss": 0.2217, "step": 16232 }, { "epoch": 0.75, "grad_norm": 0.5539491551945313, "learning_rate": 3.2038840404289706e-06, "loss": 0.3297, "step": 16233 }, { "epoch": 0.75, "grad_norm": 1.4625646902945728, "learning_rate": 3.202792619578057e-06, "loss": 0.4737, "step": 16234 }, { "epoch": 0.75, "grad_norm": 0.3215110002947949, "learning_rate": 3.2017013492107608e-06, "loss": 0.2073, "step": 16235 }, { "epoch": 0.75, "grad_norm": 0.5354099619295052, "learning_rate": 3.2006102293512443e-06, "loss": 0.3519, "step": 16236 }, { "epoch": 0.75, "grad_norm": 0.36175366057772973, "learning_rate": 3.199519260023667e-06, "loss": 0.2581, "step": 16237 }, { "epoch": 0.75, "grad_norm": 0.35628914783933086, "learning_rate": 3.198428441252176e-06, "loss": 0.2629, "step": 16238 }, { "epoch": 0.75, "grad_norm": 0.45489883960552546, "learning_rate": 3.1973377730609233e-06, "loss": 0.1047, "step": 16239 }, { "epoch": 0.75, "grad_norm": 0.38334324049362095, "learning_rate": 3.1962472554740598e-06, "loss": 0.2789, "step": 16240 }, { "epoch": 0.75, "grad_norm": 0.36390072735771056, "learning_rate": 3.19515688851572e-06, "loss": 0.2786, "step": 16241 }, { "epoch": 0.75, "grad_norm": 0.8324471952602299, "learning_rate": 3.1940666722100534e-06, "loss": 0.3545, "step": 16242 }, { "epoch": 0.75, "grad_norm": 0.3889448092239731, "learning_rate": 3.1929766065811864e-06, "loss": 0.2609, "step": 16243 }, { "epoch": 0.75, "grad_norm": 0.41634608796716804, "learning_rate": 3.1918866916532564e-06, "loss": 0.2746, "step": 16244 }, { "epoch": 0.75, "grad_norm": 0.34839782341010006, "learning_rate": 3.1907969274503945e-06, "loss": 0.2046, "step": 16245 }, { "epoch": 0.75, "grad_norm": 0.9107890104900368, "learning_rate": 3.189707313996728e-06, "loss": 0.3565, "step": 16246 }, { "epoch": 0.75, "grad_norm": 0.40345173366511333, "learning_rate": 3.1886178513163777e-06, "loss": 0.2946, "step": 16247 }, { "epoch": 0.75, "grad_norm": 0.35850114949904954, "learning_rate": 3.1875285394334575e-06, "loss": 0.2651, "step": 16248 }, { "epoch": 0.75, "grad_norm": 0.8772466115033867, "learning_rate": 3.186439378372096e-06, "loss": 0.42, "step": 16249 }, { "epoch": 0.75, "grad_norm": 0.352078792253974, "learning_rate": 3.1853503681563957e-06, "loss": 0.2611, "step": 16250 }, { "epoch": 0.75, "grad_norm": 0.2530405286896428, "learning_rate": 3.1842615088104744e-06, "loss": 0.1279, "step": 16251 }, { "epoch": 0.75, "grad_norm": 0.39104832882246765, "learning_rate": 3.1831728003584308e-06, "loss": 0.237, "step": 16252 }, { "epoch": 0.75, "grad_norm": 0.3536058828209409, "learning_rate": 3.1820842428243704e-06, "loss": 0.2605, "step": 16253 }, { "epoch": 0.75, "grad_norm": 1.0349105712453652, "learning_rate": 3.1809958362323977e-06, "loss": 0.4249, "step": 16254 }, { "epoch": 0.75, "grad_norm": 1.0926383859800037, "learning_rate": 3.1799075806066016e-06, "loss": 0.314, "step": 16255 }, { "epoch": 0.75, "grad_norm": 0.2862033288147034, "learning_rate": 3.178819475971078e-06, "loss": 0.2402, "step": 16256 }, { "epoch": 0.75, "grad_norm": 0.23609206964331267, "learning_rate": 3.1777315223499193e-06, "loss": 0.1803, "step": 16257 }, { "epoch": 0.75, "grad_norm": 1.203041357848546, "learning_rate": 3.1766437197672074e-06, "loss": 0.3383, "step": 16258 }, { "epoch": 0.75, "grad_norm": 0.33612138284106663, "learning_rate": 3.17555606824703e-06, "loss": 0.2411, "step": 16259 }, { "epoch": 0.75, "grad_norm": 0.45627768613669034, "learning_rate": 3.174468567813461e-06, "loss": 0.2937, "step": 16260 }, { "epoch": 0.75, "grad_norm": 1.0737717862634149, "learning_rate": 3.173381218490579e-06, "loss": 0.3331, "step": 16261 }, { "epoch": 0.75, "grad_norm": 0.41803195388632486, "learning_rate": 3.1722940203024564e-06, "loss": 0.2787, "step": 16262 }, { "epoch": 0.75, "grad_norm": 0.2359905162798226, "learning_rate": 3.1712069732731677e-06, "loss": 0.1631, "step": 16263 }, { "epoch": 0.75, "grad_norm": 0.384828706295368, "learning_rate": 3.1701200774267714e-06, "loss": 0.3102, "step": 16264 }, { "epoch": 0.75, "grad_norm": 0.31486668099464993, "learning_rate": 3.1690333327873348e-06, "loss": 0.1823, "step": 16265 }, { "epoch": 0.75, "grad_norm": 0.6747652748737898, "learning_rate": 3.1679467393789185e-06, "loss": 0.3744, "step": 16266 }, { "epoch": 0.75, "grad_norm": 1.1603402025582956, "learning_rate": 3.1668602972255733e-06, "loss": 0.5156, "step": 16267 }, { "epoch": 0.75, "grad_norm": 0.2746123462215477, "learning_rate": 3.1657740063513596e-06, "loss": 0.1977, "step": 16268 }, { "epoch": 0.75, "grad_norm": 0.2807841200168824, "learning_rate": 3.1646878667803183e-06, "loss": 0.194, "step": 16269 }, { "epoch": 0.75, "grad_norm": 1.4841156706818384, "learning_rate": 3.1636018785364996e-06, "loss": 0.7687, "step": 16270 }, { "epoch": 0.75, "grad_norm": 0.38761758464745655, "learning_rate": 3.1625160416439503e-06, "loss": 0.2115, "step": 16271 }, { "epoch": 0.75, "grad_norm": 0.4083625476888615, "learning_rate": 3.1614303561267025e-06, "loss": 0.3168, "step": 16272 }, { "epoch": 0.75, "grad_norm": 1.358679343070449, "learning_rate": 3.1603448220087975e-06, "loss": 0.7661, "step": 16273 }, { "epoch": 0.75, "grad_norm": 0.3133154506054117, "learning_rate": 3.1592594393142606e-06, "loss": 0.1822, "step": 16274 }, { "epoch": 0.75, "grad_norm": 0.3653618659027755, "learning_rate": 3.158174208067133e-06, "loss": 0.1805, "step": 16275 }, { "epoch": 0.75, "grad_norm": 0.38146547103855466, "learning_rate": 3.157089128291434e-06, "loss": 0.2909, "step": 16276 }, { "epoch": 0.75, "grad_norm": 0.337836460064673, "learning_rate": 3.1560042000111833e-06, "loss": 0.2575, "step": 16277 }, { "epoch": 0.75, "grad_norm": 0.7545227231503971, "learning_rate": 3.154919423250403e-06, "loss": 0.3293, "step": 16278 }, { "epoch": 0.75, "grad_norm": 0.5563110687884437, "learning_rate": 3.1538347980331097e-06, "loss": 0.3406, "step": 16279 }, { "epoch": 0.75, "grad_norm": 0.3853232477254108, "learning_rate": 3.152750324383318e-06, "loss": 0.2637, "step": 16280 }, { "epoch": 0.75, "grad_norm": 0.23044809791306656, "learning_rate": 3.1516660023250323e-06, "loss": 0.1553, "step": 16281 }, { "epoch": 0.75, "grad_norm": 0.6493105216468711, "learning_rate": 3.15058183188226e-06, "loss": 0.407, "step": 16282 }, { "epoch": 0.75, "grad_norm": 0.5396976234288307, "learning_rate": 3.1494978130790088e-06, "loss": 0.2526, "step": 16283 }, { "epoch": 0.75, "grad_norm": 0.26920451758654373, "learning_rate": 3.148413945939269e-06, "loss": 0.2372, "step": 16284 }, { "epoch": 0.75, "grad_norm": 1.4291774908862382, "learning_rate": 3.1473302304870445e-06, "loss": 0.6303, "step": 16285 }, { "epoch": 0.75, "grad_norm": 0.4960315488451702, "learning_rate": 3.146246666746321e-06, "loss": 0.2994, "step": 16286 }, { "epoch": 0.75, "grad_norm": 0.24110582009741974, "learning_rate": 3.1451632547410906e-06, "loss": 0.1567, "step": 16287 }, { "epoch": 0.75, "grad_norm": 0.4766538464147961, "learning_rate": 3.1440799944953416e-06, "loss": 0.3154, "step": 16288 }, { "epoch": 0.75, "grad_norm": 0.41841794223725176, "learning_rate": 3.1429968860330505e-06, "loss": 0.2915, "step": 16289 }, { "epoch": 0.75, "grad_norm": 0.5757099067643165, "learning_rate": 3.141913929378203e-06, "loss": 0.3783, "step": 16290 }, { "epoch": 0.75, "grad_norm": 0.547374316905116, "learning_rate": 3.140831124554765e-06, "loss": 0.275, "step": 16291 }, { "epoch": 0.75, "grad_norm": 0.3341651304373114, "learning_rate": 3.139748471586721e-06, "loss": 0.2533, "step": 16292 }, { "epoch": 0.75, "grad_norm": 0.6385625139870543, "learning_rate": 3.1386659704980305e-06, "loss": 0.4236, "step": 16293 }, { "epoch": 0.75, "grad_norm": 0.335912281455225, "learning_rate": 3.1375836213126653e-06, "loss": 0.1831, "step": 16294 }, { "epoch": 0.75, "grad_norm": 0.4176407771660147, "learning_rate": 3.136501424054582e-06, "loss": 0.2869, "step": 16295 }, { "epoch": 0.75, "grad_norm": 0.3188860843585173, "learning_rate": 3.1354193787477428e-06, "loss": 0.2844, "step": 16296 }, { "epoch": 0.75, "grad_norm": 1.1090137715507415, "learning_rate": 3.1343374854161046e-06, "loss": 0.3907, "step": 16297 }, { "epoch": 0.75, "grad_norm": 0.6734148894015325, "learning_rate": 3.133255744083614e-06, "loss": 0.3135, "step": 16298 }, { "epoch": 0.75, "grad_norm": 0.40030865225891, "learning_rate": 3.1321741547742236e-06, "loss": 0.2746, "step": 16299 }, { "epoch": 0.75, "grad_norm": 0.31305484089135516, "learning_rate": 3.131092717511881e-06, "loss": 0.2277, "step": 16300 }, { "epoch": 0.75, "grad_norm": 0.33292710062757813, "learning_rate": 3.130011432320522e-06, "loss": 0.1907, "step": 16301 }, { "epoch": 0.75, "grad_norm": 0.4505201344827788, "learning_rate": 3.128930299224092e-06, "loss": 0.3272, "step": 16302 }, { "epoch": 0.75, "grad_norm": 0.5691089812626369, "learning_rate": 3.1278493182465187e-06, "loss": 0.3862, "step": 16303 }, { "epoch": 0.75, "grad_norm": 0.32952246426544163, "learning_rate": 3.126768489411739e-06, "loss": 0.1953, "step": 16304 }, { "epoch": 0.75, "grad_norm": 0.42341394819454375, "learning_rate": 3.125687812743683e-06, "loss": 0.2967, "step": 16305 }, { "epoch": 0.75, "grad_norm": 0.7190048663580556, "learning_rate": 3.12460728826627e-06, "loss": 0.4242, "step": 16306 }, { "epoch": 0.75, "grad_norm": 0.20516648425918738, "learning_rate": 3.123526916003429e-06, "loss": 0.1279, "step": 16307 }, { "epoch": 0.75, "grad_norm": 0.322470069226777, "learning_rate": 3.1224466959790676e-06, "loss": 0.2994, "step": 16308 }, { "epoch": 0.75, "grad_norm": 1.4061965462645851, "learning_rate": 3.121366628217114e-06, "loss": 0.6254, "step": 16309 }, { "epoch": 0.75, "grad_norm": 0.4219804750964464, "learning_rate": 3.1202867127414703e-06, "loss": 0.1984, "step": 16310 }, { "epoch": 0.75, "grad_norm": 0.5561840173192308, "learning_rate": 3.1192069495760525e-06, "loss": 0.3241, "step": 16311 }, { "epoch": 0.75, "grad_norm": 0.39812558548430166, "learning_rate": 3.1181273387447564e-06, "loss": 0.3002, "step": 16312 }, { "epoch": 0.75, "grad_norm": 0.2810504180759767, "learning_rate": 3.117047880271489e-06, "loss": 0.144, "step": 16313 }, { "epoch": 0.75, "grad_norm": 0.5882950289785216, "learning_rate": 3.115968574180149e-06, "loss": 0.379, "step": 16314 }, { "epoch": 0.75, "grad_norm": 0.3033553642302104, "learning_rate": 3.114889420494629e-06, "loss": 0.2445, "step": 16315 }, { "epoch": 0.75, "grad_norm": 0.7844651054596269, "learning_rate": 3.1138104192388196e-06, "loss": 0.3285, "step": 16316 }, { "epoch": 0.75, "grad_norm": 0.3879251671714471, "learning_rate": 3.1127315704366144e-06, "loss": 0.2318, "step": 16317 }, { "epoch": 0.75, "grad_norm": 0.9117075604763524, "learning_rate": 3.111652874111891e-06, "loss": 0.4072, "step": 16318 }, { "epoch": 0.75, "grad_norm": 0.43466547473984385, "learning_rate": 3.1105743302885373e-06, "loss": 0.3148, "step": 16319 }, { "epoch": 0.75, "grad_norm": 0.30857637111789443, "learning_rate": 3.1094959389904245e-06, "loss": 0.2271, "step": 16320 }, { "epoch": 0.75, "grad_norm": 0.2482075362776378, "learning_rate": 3.1084177002414307e-06, "loss": 0.1585, "step": 16321 }, { "epoch": 0.75, "grad_norm": 0.6315452501079256, "learning_rate": 3.10733961406543e-06, "loss": 0.3411, "step": 16322 }, { "epoch": 0.75, "grad_norm": 0.30804980204368937, "learning_rate": 3.1062616804862834e-06, "loss": 0.2262, "step": 16323 }, { "epoch": 0.75, "grad_norm": 0.5492817497873844, "learning_rate": 3.1051838995278617e-06, "loss": 0.3062, "step": 16324 }, { "epoch": 0.75, "grad_norm": 0.8798894324188794, "learning_rate": 3.104106271214018e-06, "loss": 0.4908, "step": 16325 }, { "epoch": 0.75, "grad_norm": 0.26086137333049136, "learning_rate": 3.10302879556862e-06, "loss": 0.1879, "step": 16326 }, { "epoch": 0.75, "grad_norm": 0.349581128577903, "learning_rate": 3.1019514726155154e-06, "loss": 0.2308, "step": 16327 }, { "epoch": 0.75, "grad_norm": 0.4333805052954599, "learning_rate": 3.1008743023785593e-06, "loss": 0.2838, "step": 16328 }, { "epoch": 0.75, "grad_norm": 0.4343381791551161, "learning_rate": 3.0997972848815926e-06, "loss": 0.3014, "step": 16329 }, { "epoch": 0.75, "grad_norm": 0.8405894053742556, "learning_rate": 3.0987204201484646e-06, "loss": 0.281, "step": 16330 }, { "epoch": 0.75, "grad_norm": 0.32025642665563875, "learning_rate": 3.0976437082030185e-06, "loss": 0.2605, "step": 16331 }, { "epoch": 0.75, "grad_norm": 0.45844415955387186, "learning_rate": 3.096567149069084e-06, "loss": 0.3327, "step": 16332 }, { "epoch": 0.75, "grad_norm": 0.5211204326550954, "learning_rate": 3.0954907427705026e-06, "loss": 0.2092, "step": 16333 }, { "epoch": 0.75, "grad_norm": 0.4250827514266838, "learning_rate": 3.094414489331099e-06, "loss": 0.243, "step": 16334 }, { "epoch": 0.75, "grad_norm": 0.3968554017943291, "learning_rate": 3.0933383887747014e-06, "loss": 0.2561, "step": 16335 }, { "epoch": 0.75, "grad_norm": 0.3566525686649253, "learning_rate": 3.0922624411251403e-06, "loss": 0.2358, "step": 16336 }, { "epoch": 0.75, "grad_norm": 0.8786999480380613, "learning_rate": 3.0911866464062266e-06, "loss": 0.4897, "step": 16337 }, { "epoch": 0.75, "grad_norm": 0.3869213178669692, "learning_rate": 3.0901110046417816e-06, "loss": 0.3031, "step": 16338 }, { "epoch": 0.75, "grad_norm": 0.33507264413712967, "learning_rate": 3.0890355158556195e-06, "loss": 0.2915, "step": 16339 }, { "epoch": 0.75, "grad_norm": 0.431369114924861, "learning_rate": 3.087960180071553e-06, "loss": 0.1726, "step": 16340 }, { "epoch": 0.75, "grad_norm": 0.29841795431933993, "learning_rate": 3.0868849973133875e-06, "loss": 0.2141, "step": 16341 }, { "epoch": 0.75, "grad_norm": 1.579432844699005, "learning_rate": 3.085809967604917e-06, "loss": 0.6987, "step": 16342 }, { "epoch": 0.75, "grad_norm": 0.3538186319487211, "learning_rate": 3.084735090969958e-06, "loss": 0.232, "step": 16343 }, { "epoch": 0.75, "grad_norm": 0.3730761161190041, "learning_rate": 3.083660367432294e-06, "loss": 0.2836, "step": 16344 }, { "epoch": 0.75, "grad_norm": 0.7996858324781417, "learning_rate": 3.082585797015728e-06, "loss": 0.4347, "step": 16345 }, { "epoch": 0.75, "grad_norm": 0.3250360845854394, "learning_rate": 3.081511379744042e-06, "loss": 0.1491, "step": 16346 }, { "epoch": 0.75, "grad_norm": 0.4167383314821497, "learning_rate": 3.080437115641025e-06, "loss": 0.2961, "step": 16347 }, { "epoch": 0.75, "grad_norm": 0.39644347428887583, "learning_rate": 3.0793630047304657e-06, "loss": 0.2951, "step": 16348 }, { "epoch": 0.75, "grad_norm": 0.5889262046595498, "learning_rate": 3.078289047036135e-06, "loss": 0.1752, "step": 16349 }, { "epoch": 0.75, "grad_norm": 0.43751791325336575, "learning_rate": 3.0772152425818167e-06, "loss": 0.3049, "step": 16350 }, { "epoch": 0.75, "grad_norm": 0.3699520337744608, "learning_rate": 3.0761415913912783e-06, "loss": 0.3203, "step": 16351 }, { "epoch": 0.75, "grad_norm": 0.9671519000344597, "learning_rate": 3.0750680934882914e-06, "loss": 0.4952, "step": 16352 }, { "epoch": 0.75, "grad_norm": 0.2717899328697984, "learning_rate": 3.073994748896626e-06, "loss": 0.1617, "step": 16353 }, { "epoch": 0.75, "grad_norm": 0.5955640628442219, "learning_rate": 3.0729215576400384e-06, "loss": 0.2598, "step": 16354 }, { "epoch": 0.75, "grad_norm": 0.353707756894835, "learning_rate": 3.071848519742291e-06, "loss": 0.314, "step": 16355 }, { "epoch": 0.75, "grad_norm": 0.3420955633928122, "learning_rate": 3.0707756352271388e-06, "loss": 0.2078, "step": 16356 }, { "epoch": 0.75, "grad_norm": 0.6760074103336285, "learning_rate": 3.06970290411834e-06, "loss": 0.4008, "step": 16357 }, { "epoch": 0.75, "grad_norm": 0.4878105804687725, "learning_rate": 3.0686303264396353e-06, "loss": 0.2284, "step": 16358 }, { "epoch": 0.75, "grad_norm": 0.23483456169127176, "learning_rate": 3.0675579022147763e-06, "loss": 0.2007, "step": 16359 }, { "epoch": 0.75, "grad_norm": 0.40365651413994263, "learning_rate": 3.0664856314675053e-06, "loss": 0.1797, "step": 16360 }, { "epoch": 0.75, "grad_norm": 0.9281598055528028, "learning_rate": 3.0654135142215567e-06, "loss": 0.4307, "step": 16361 }, { "epoch": 0.75, "grad_norm": 0.33718025458948453, "learning_rate": 3.0643415505006733e-06, "loss": 0.2115, "step": 16362 }, { "epoch": 0.75, "grad_norm": 0.3537468097262199, "learning_rate": 3.063269740328579e-06, "loss": 0.2999, "step": 16363 }, { "epoch": 0.75, "grad_norm": 1.1218175021389905, "learning_rate": 3.062198083729008e-06, "loss": 0.5458, "step": 16364 }, { "epoch": 0.75, "grad_norm": 0.35184066118736007, "learning_rate": 3.0611265807256875e-06, "loss": 0.2518, "step": 16365 }, { "epoch": 0.75, "grad_norm": 0.3073132588673357, "learning_rate": 3.060055231342334e-06, "loss": 0.0859, "step": 16366 }, { "epoch": 0.75, "grad_norm": 0.3473053146928505, "learning_rate": 3.058984035602671e-06, "loss": 0.3093, "step": 16367 }, { "epoch": 0.75, "grad_norm": 0.44133787331267116, "learning_rate": 3.0579129935304065e-06, "loss": 0.2881, "step": 16368 }, { "epoch": 0.75, "grad_norm": 0.49389009289683866, "learning_rate": 3.0568421051492623e-06, "loss": 0.2929, "step": 16369 }, { "epoch": 0.75, "grad_norm": 0.5767997043061357, "learning_rate": 3.055771370482944e-06, "loss": 0.3548, "step": 16370 }, { "epoch": 0.75, "grad_norm": 0.3456769543682887, "learning_rate": 3.0547007895551496e-06, "loss": 0.2657, "step": 16371 }, { "epoch": 0.75, "grad_norm": 0.26100477799453764, "learning_rate": 3.053630362389587e-06, "loss": 0.1467, "step": 16372 }, { "epoch": 0.75, "grad_norm": 0.7185061882388731, "learning_rate": 3.052560089009953e-06, "loss": 0.397, "step": 16373 }, { "epoch": 0.75, "grad_norm": 0.3431879589747441, "learning_rate": 3.0514899694399445e-06, "loss": 0.2718, "step": 16374 }, { "epoch": 0.75, "grad_norm": 0.3501777859924704, "learning_rate": 3.0504200037032494e-06, "loss": 0.2753, "step": 16375 }, { "epoch": 0.75, "grad_norm": 1.7556192165480307, "learning_rate": 3.049350191823557e-06, "loss": 0.7727, "step": 16376 }, { "epoch": 0.75, "grad_norm": 0.3662298702539251, "learning_rate": 3.0482805338245545e-06, "loss": 0.2545, "step": 16377 }, { "epoch": 0.75, "grad_norm": 0.34018285075066207, "learning_rate": 3.0472110297299183e-06, "loss": 0.1657, "step": 16378 }, { "epoch": 0.75, "grad_norm": 0.38452548875421755, "learning_rate": 3.0461416795633316e-06, "loss": 0.254, "step": 16379 }, { "epoch": 0.75, "grad_norm": 0.31847985555688063, "learning_rate": 3.0450724833484635e-06, "loss": 0.2618, "step": 16380 }, { "epoch": 0.75, "grad_norm": 1.1672268529401792, "learning_rate": 3.044003441108987e-06, "loss": 0.6476, "step": 16381 }, { "epoch": 0.75, "grad_norm": 0.35443302054874587, "learning_rate": 3.0429345528685727e-06, "loss": 0.2408, "step": 16382 }, { "epoch": 0.75, "grad_norm": 0.36599661895380536, "learning_rate": 3.0418658186508787e-06, "loss": 0.2874, "step": 16383 }, { "epoch": 0.75, "grad_norm": 0.33048692150046277, "learning_rate": 3.0407972384795736e-06, "loss": 0.1546, "step": 16384 }, { "epoch": 0.75, "grad_norm": 0.6334949107599004, "learning_rate": 3.039728812378303e-06, "loss": 0.2956, "step": 16385 }, { "epoch": 0.75, "grad_norm": 0.41657746064304524, "learning_rate": 3.0386605403707347e-06, "loss": 0.2757, "step": 16386 }, { "epoch": 0.75, "grad_norm": 0.3143340567043007, "learning_rate": 3.037592422480512e-06, "loss": 0.2782, "step": 16387 }, { "epoch": 0.75, "grad_norm": 1.2500536836247713, "learning_rate": 3.0365244587312804e-06, "loss": 0.2475, "step": 16388 }, { "epoch": 0.75, "grad_norm": 0.4510531034035246, "learning_rate": 3.035456649146685e-06, "loss": 0.2931, "step": 16389 }, { "epoch": 0.75, "grad_norm": 0.38730443085608907, "learning_rate": 3.0343889937503677e-06, "loss": 0.2655, "step": 16390 }, { "epoch": 0.75, "grad_norm": 0.3695970947522675, "learning_rate": 3.033321492565967e-06, "loss": 0.3007, "step": 16391 }, { "epoch": 0.75, "grad_norm": 0.27058404527592655, "learning_rate": 3.0322541456171115e-06, "loss": 0.1565, "step": 16392 }, { "epoch": 0.75, "grad_norm": 1.2230822445057148, "learning_rate": 3.0311869529274363e-06, "loss": 0.8803, "step": 16393 }, { "epoch": 0.75, "grad_norm": 1.4433662321864855, "learning_rate": 3.030119914520562e-06, "loss": 0.4755, "step": 16394 }, { "epoch": 0.75, "grad_norm": 0.242304341416268, "learning_rate": 3.029053030420115e-06, "loss": 0.2087, "step": 16395 }, { "epoch": 0.75, "grad_norm": 0.9194181153506501, "learning_rate": 3.027986300649719e-06, "loss": 0.4386, "step": 16396 }, { "epoch": 0.75, "grad_norm": 0.39885791308839486, "learning_rate": 3.026919725232983e-06, "loss": 0.2355, "step": 16397 }, { "epoch": 0.75, "grad_norm": 0.3450698031315827, "learning_rate": 3.0258533041935234e-06, "loss": 0.2135, "step": 16398 }, { "epoch": 0.75, "grad_norm": 0.40249996903549046, "learning_rate": 3.0247870375549537e-06, "loss": 0.3111, "step": 16399 }, { "epoch": 0.75, "grad_norm": 1.2188928579883807, "learning_rate": 3.0237209253408727e-06, "loss": 0.5941, "step": 16400 }, { "epoch": 0.75, "grad_norm": 0.31033871957649317, "learning_rate": 3.0226549675748894e-06, "loss": 0.1895, "step": 16401 }, { "epoch": 0.75, "grad_norm": 0.6872494138288879, "learning_rate": 3.0215891642805937e-06, "loss": 0.3519, "step": 16402 }, { "epoch": 0.75, "grad_norm": 0.24000829920705616, "learning_rate": 3.020523515481595e-06, "loss": 0.2045, "step": 16403 }, { "epoch": 0.75, "grad_norm": 0.5845462286113416, "learning_rate": 3.019458021201476e-06, "loss": 0.3199, "step": 16404 }, { "epoch": 0.75, "grad_norm": 0.43585553896310303, "learning_rate": 3.018392681463831e-06, "loss": 0.2541, "step": 16405 }, { "epoch": 0.75, "grad_norm": 0.3643621740706527, "learning_rate": 3.0173274962922396e-06, "loss": 0.3064, "step": 16406 }, { "epoch": 0.75, "grad_norm": 0.6920576889412565, "learning_rate": 3.016262465710288e-06, "loss": 0.302, "step": 16407 }, { "epoch": 0.75, "grad_norm": 0.4139706464033087, "learning_rate": 3.0151975897415574e-06, "loss": 0.26, "step": 16408 }, { "epoch": 0.75, "grad_norm": 0.6095672401529331, "learning_rate": 3.014132868409617e-06, "loss": 0.2867, "step": 16409 }, { "epoch": 0.75, "grad_norm": 0.414872772309864, "learning_rate": 3.0130683017380445e-06, "loss": 0.304, "step": 16410 }, { "epoch": 0.75, "grad_norm": 0.24624566130833486, "learning_rate": 3.012003889750403e-06, "loss": 0.1936, "step": 16411 }, { "epoch": 0.75, "grad_norm": 1.5713270389594192, "learning_rate": 3.01093963247026e-06, "loss": 0.5456, "step": 16412 }, { "epoch": 0.75, "grad_norm": 0.5112708818750812, "learning_rate": 3.009875529921181e-06, "loss": 0.2923, "step": 16413 }, { "epoch": 0.75, "grad_norm": 0.5561618773329151, "learning_rate": 3.008811582126717e-06, "loss": 0.2533, "step": 16414 }, { "epoch": 0.75, "grad_norm": 0.39359320774152307, "learning_rate": 3.007747789110427e-06, "loss": 0.2933, "step": 16415 }, { "epoch": 0.75, "grad_norm": 0.4250944807555506, "learning_rate": 3.0066841508958642e-06, "loss": 0.295, "step": 16416 }, { "epoch": 0.75, "grad_norm": 0.5870015501540505, "learning_rate": 3.005620667506571e-06, "loss": 0.3475, "step": 16417 }, { "epoch": 0.75, "grad_norm": 0.2471418725997406, "learning_rate": 3.0045573389660987e-06, "loss": 0.1836, "step": 16418 }, { "epoch": 0.75, "grad_norm": 0.5268144407869432, "learning_rate": 3.0034941652979786e-06, "loss": 0.3022, "step": 16419 }, { "epoch": 0.75, "grad_norm": 0.46476752311174324, "learning_rate": 3.0024311465257592e-06, "loss": 0.2846, "step": 16420 }, { "epoch": 0.75, "grad_norm": 1.5023710955760372, "learning_rate": 3.0013682826729686e-06, "loss": 0.3623, "step": 16421 }, { "epoch": 0.75, "grad_norm": 0.647681170257252, "learning_rate": 3.0003055737631404e-06, "loss": 0.3421, "step": 16422 }, { "epoch": 0.75, "grad_norm": 0.27390564203419276, "learning_rate": 2.9992430198197973e-06, "loss": 0.2626, "step": 16423 }, { "epoch": 0.75, "grad_norm": 0.3821929577096597, "learning_rate": 2.9981806208664676e-06, "loss": 0.1857, "step": 16424 }, { "epoch": 0.75, "grad_norm": 0.6654849922132344, "learning_rate": 2.997118376926672e-06, "loss": 0.3882, "step": 16425 }, { "epoch": 0.75, "grad_norm": 0.3319571885779992, "learning_rate": 2.9960562880239243e-06, "loss": 0.276, "step": 16426 }, { "epoch": 0.75, "grad_norm": 0.5830536577400622, "learning_rate": 2.994994354181743e-06, "loss": 0.2265, "step": 16427 }, { "epoch": 0.75, "grad_norm": 0.6431029928089156, "learning_rate": 2.9939325754236316e-06, "loss": 0.4137, "step": 16428 }, { "epoch": 0.75, "grad_norm": 0.3096305372605152, "learning_rate": 2.9928709517731005e-06, "loss": 0.2473, "step": 16429 }, { "epoch": 0.75, "grad_norm": 0.4741315376233661, "learning_rate": 2.9918094832536547e-06, "loss": 0.3367, "step": 16430 }, { "epoch": 0.75, "grad_norm": 0.27866133220471845, "learning_rate": 2.990748169888791e-06, "loss": 0.1783, "step": 16431 }, { "epoch": 0.75, "grad_norm": 0.449529093149855, "learning_rate": 2.9896870117020073e-06, "loss": 0.2834, "step": 16432 }, { "epoch": 0.75, "grad_norm": 1.1970140723890708, "learning_rate": 2.9886260087167952e-06, "loss": 0.5961, "step": 16433 }, { "epoch": 0.75, "grad_norm": 0.3030313867360394, "learning_rate": 2.9875651609566503e-06, "loss": 0.2209, "step": 16434 }, { "epoch": 0.76, "grad_norm": 0.37263404072457224, "learning_rate": 2.986504468445053e-06, "loss": 0.2699, "step": 16435 }, { "epoch": 0.76, "grad_norm": 0.8098807160075733, "learning_rate": 2.9854439312054805e-06, "loss": 0.3607, "step": 16436 }, { "epoch": 0.76, "grad_norm": 0.2030841114563129, "learning_rate": 2.984383549261426e-06, "loss": 0.0711, "step": 16437 }, { "epoch": 0.76, "grad_norm": 0.3329399025573987, "learning_rate": 2.9833233226363547e-06, "loss": 0.2917, "step": 16438 }, { "epoch": 0.76, "grad_norm": 0.5522802318404808, "learning_rate": 2.982263251353745e-06, "loss": 0.3182, "step": 16439 }, { "epoch": 0.76, "grad_norm": 0.5498526069053413, "learning_rate": 2.9812033354370595e-06, "loss": 0.243, "step": 16440 }, { "epoch": 0.76, "grad_norm": 0.41540113748382146, "learning_rate": 2.9801435749097684e-06, "loss": 0.2694, "step": 16441 }, { "epoch": 0.76, "grad_norm": 0.3680730477452568, "learning_rate": 2.9790839697953357e-06, "loss": 0.2794, "step": 16442 }, { "epoch": 0.76, "grad_norm": 0.40782841194675556, "learning_rate": 2.978024520117213e-06, "loss": 0.1723, "step": 16443 }, { "epoch": 0.76, "grad_norm": 0.25419564615532436, "learning_rate": 2.9769652258988633e-06, "loss": 0.1576, "step": 16444 }, { "epoch": 0.76, "grad_norm": 1.4132042359971555, "learning_rate": 2.9759060871637314e-06, "loss": 0.6667, "step": 16445 }, { "epoch": 0.76, "grad_norm": 0.33516870011082367, "learning_rate": 2.974847103935269e-06, "loss": 0.2892, "step": 16446 }, { "epoch": 0.76, "grad_norm": 0.4907978212875139, "learning_rate": 2.973788276236924e-06, "loss": 0.2299, "step": 16447 }, { "epoch": 0.76, "grad_norm": 0.8105001080107226, "learning_rate": 2.9727296040921315e-06, "loss": 0.4354, "step": 16448 }, { "epoch": 0.76, "grad_norm": 0.3011798492925412, "learning_rate": 2.9716710875243326e-06, "loss": 0.1806, "step": 16449 }, { "epoch": 0.76, "grad_norm": 0.3332671340503776, "learning_rate": 2.9706127265569616e-06, "loss": 0.2226, "step": 16450 }, { "epoch": 0.76, "grad_norm": 0.5621658552968646, "learning_rate": 2.9695545212134523e-06, "loss": 0.3606, "step": 16451 }, { "epoch": 0.76, "grad_norm": 0.6157271902144869, "learning_rate": 2.9684964715172306e-06, "loss": 0.369, "step": 16452 }, { "epoch": 0.76, "grad_norm": 0.39379598675732985, "learning_rate": 2.967438577491717e-06, "loss": 0.2702, "step": 16453 }, { "epoch": 0.76, "grad_norm": 0.39420820610033264, "learning_rate": 2.9663808391603354e-06, "loss": 0.2707, "step": 16454 }, { "epoch": 0.76, "grad_norm": 0.37204250415070306, "learning_rate": 2.9653232565465017e-06, "loss": 0.1773, "step": 16455 }, { "epoch": 0.76, "grad_norm": 0.31408871748549827, "learning_rate": 2.964265829673636e-06, "loss": 0.2424, "step": 16456 }, { "epoch": 0.76, "grad_norm": 0.9295425691535901, "learning_rate": 2.9632085585651393e-06, "loss": 0.3333, "step": 16457 }, { "epoch": 0.76, "grad_norm": 0.3582046866134283, "learning_rate": 2.962151443244423e-06, "loss": 0.2959, "step": 16458 }, { "epoch": 0.76, "grad_norm": 0.36575814476445256, "learning_rate": 2.961094483734894e-06, "loss": 0.2582, "step": 16459 }, { "epoch": 0.76, "grad_norm": 0.8513982645084819, "learning_rate": 2.960037680059946e-06, "loss": 0.3196, "step": 16460 }, { "epoch": 0.76, "grad_norm": 0.2742952386711195, "learning_rate": 2.9589810322429813e-06, "loss": 0.1585, "step": 16461 }, { "epoch": 0.76, "grad_norm": 0.2982649759450033, "learning_rate": 2.957924540307384e-06, "loss": 0.2377, "step": 16462 }, { "epoch": 0.76, "grad_norm": 0.5084747286381631, "learning_rate": 2.956868204276556e-06, "loss": 0.2566, "step": 16463 }, { "epoch": 0.76, "grad_norm": 0.7405019058125024, "learning_rate": 2.9558120241738786e-06, "loss": 0.3499, "step": 16464 }, { "epoch": 0.76, "grad_norm": 0.3927707471178668, "learning_rate": 2.9547560000227303e-06, "loss": 0.2992, "step": 16465 }, { "epoch": 0.76, "grad_norm": 0.36843102828714186, "learning_rate": 2.953700131846494e-06, "loss": 0.2936, "step": 16466 }, { "epoch": 0.76, "grad_norm": 0.429806417654301, "learning_rate": 2.9526444196685455e-06, "loss": 0.1195, "step": 16467 }, { "epoch": 0.76, "grad_norm": 0.2848051806041437, "learning_rate": 2.9515888635122603e-06, "loss": 0.2213, "step": 16468 }, { "epoch": 0.76, "grad_norm": 1.0797897133530532, "learning_rate": 2.950533463401001e-06, "loss": 0.4425, "step": 16469 }, { "epoch": 0.76, "grad_norm": 0.3876924458894122, "learning_rate": 2.9494782193581397e-06, "loss": 0.2457, "step": 16470 }, { "epoch": 0.76, "grad_norm": 0.3773487854202467, "learning_rate": 2.948423131407032e-06, "loss": 0.2633, "step": 16471 }, { "epoch": 0.76, "grad_norm": 1.1367868776431203, "learning_rate": 2.947368199571039e-06, "loss": 0.6018, "step": 16472 }, { "epoch": 0.76, "grad_norm": 0.3362863604780619, "learning_rate": 2.9463134238735215e-06, "loss": 0.1684, "step": 16473 }, { "epoch": 0.76, "grad_norm": 0.37933820128583157, "learning_rate": 2.9452588043378218e-06, "loss": 0.2652, "step": 16474 }, { "epoch": 0.76, "grad_norm": 0.4211902282732409, "learning_rate": 2.9442043409872933e-06, "loss": 0.2578, "step": 16475 }, { "epoch": 0.76, "grad_norm": 0.952417529851729, "learning_rate": 2.9431500338452833e-06, "loss": 0.3406, "step": 16476 }, { "epoch": 0.76, "grad_norm": 0.34427445046750477, "learning_rate": 2.9420958829351263e-06, "loss": 0.2736, "step": 16477 }, { "epoch": 0.76, "grad_norm": 0.37420444517583357, "learning_rate": 2.9410418882801682e-06, "loss": 0.305, "step": 16478 }, { "epoch": 0.76, "grad_norm": 1.6499133216650348, "learning_rate": 2.9399880499037325e-06, "loss": 0.6196, "step": 16479 }, { "epoch": 0.76, "grad_norm": 0.22550749683892404, "learning_rate": 2.9389343678291624e-06, "loss": 0.1574, "step": 16480 }, { "epoch": 0.76, "grad_norm": 0.5380962950228091, "learning_rate": 2.9378808420797812e-06, "loss": 0.2673, "step": 16481 }, { "epoch": 0.76, "grad_norm": 0.3665603374802665, "learning_rate": 2.936827472678908e-06, "loss": 0.2989, "step": 16482 }, { "epoch": 0.76, "grad_norm": 0.3361089992495192, "learning_rate": 2.9357742596498693e-06, "loss": 0.2125, "step": 16483 }, { "epoch": 0.76, "grad_norm": 1.1650946264732804, "learning_rate": 2.9347212030159746e-06, "loss": 0.4847, "step": 16484 }, { "epoch": 0.76, "grad_norm": 0.5677710605416404, "learning_rate": 2.9336683028005486e-06, "loss": 0.3544, "step": 16485 }, { "epoch": 0.76, "grad_norm": 0.21551013054115184, "learning_rate": 2.9326155590268936e-06, "loss": 0.169, "step": 16486 }, { "epoch": 0.76, "grad_norm": 0.7618742957165453, "learning_rate": 2.9315629717183204e-06, "loss": 0.3947, "step": 16487 }, { "epoch": 0.76, "grad_norm": 0.45705718341468793, "learning_rate": 2.930510540898127e-06, "loss": 0.2446, "step": 16488 }, { "epoch": 0.76, "grad_norm": 0.30524552988355325, "learning_rate": 2.9294582665896176e-06, "loss": 0.1904, "step": 16489 }, { "epoch": 0.76, "grad_norm": 0.39649074582154, "learning_rate": 2.9284061488160896e-06, "loss": 0.3056, "step": 16490 }, { "epoch": 0.76, "grad_norm": 0.917118667781435, "learning_rate": 2.9273541876008315e-06, "loss": 0.4316, "step": 16491 }, { "epoch": 0.76, "grad_norm": 0.4392835149742781, "learning_rate": 2.9263023829671357e-06, "loss": 0.2575, "step": 16492 }, { "epoch": 0.76, "grad_norm": 0.386448379835768, "learning_rate": 2.9252507349382884e-06, "loss": 0.2613, "step": 16493 }, { "epoch": 0.76, "grad_norm": 0.3270134388747646, "learning_rate": 2.92419924353757e-06, "loss": 0.248, "step": 16494 }, { "epoch": 0.76, "grad_norm": 0.39686432791917525, "learning_rate": 2.923147908788263e-06, "loss": 0.264, "step": 16495 }, { "epoch": 0.76, "grad_norm": 0.3584657804945487, "learning_rate": 2.922096730713634e-06, "loss": 0.1711, "step": 16496 }, { "epoch": 0.76, "grad_norm": 0.5692829015945852, "learning_rate": 2.921045709336968e-06, "loss": 0.3238, "step": 16497 }, { "epoch": 0.76, "grad_norm": 0.2779637867452247, "learning_rate": 2.919994844681524e-06, "loss": 0.2504, "step": 16498 }, { "epoch": 0.76, "grad_norm": 1.0317487955384954, "learning_rate": 2.918944136770574e-06, "loss": 0.3014, "step": 16499 }, { "epoch": 0.76, "grad_norm": 0.43710011670991133, "learning_rate": 2.917893585627375e-06, "loss": 0.2669, "step": 16500 }, { "epoch": 0.76, "grad_norm": 0.35784923338863056, "learning_rate": 2.9168431912751805e-06, "loss": 0.2664, "step": 16501 }, { "epoch": 0.76, "grad_norm": 0.2693327361282062, "learning_rate": 2.9157929537372577e-06, "loss": 0.1929, "step": 16502 }, { "epoch": 0.76, "grad_norm": 0.9708979208811768, "learning_rate": 2.914742873036848e-06, "loss": 0.3924, "step": 16503 }, { "epoch": 0.76, "grad_norm": 0.4536809152731579, "learning_rate": 2.9136929491972044e-06, "loss": 0.2469, "step": 16504 }, { "epoch": 0.76, "grad_norm": 0.9155742549471608, "learning_rate": 2.9126431822415658e-06, "loss": 0.4448, "step": 16505 }, { "epoch": 0.76, "grad_norm": 0.36295944201790226, "learning_rate": 2.9115935721931766e-06, "loss": 0.2668, "step": 16506 }, { "epoch": 0.76, "grad_norm": 0.4635464661092478, "learning_rate": 2.910544119075277e-06, "loss": 0.3123, "step": 16507 }, { "epoch": 0.76, "grad_norm": 0.2866690306469791, "learning_rate": 2.9094948229110952e-06, "loss": 0.1847, "step": 16508 }, { "epoch": 0.76, "grad_norm": 0.45805280713826424, "learning_rate": 2.908445683723864e-06, "loss": 0.2663, "step": 16509 }, { "epoch": 0.76, "grad_norm": 0.41591185005957104, "learning_rate": 2.907396701536813e-06, "loss": 0.2496, "step": 16510 }, { "epoch": 0.76, "grad_norm": 0.5808348129969879, "learning_rate": 2.90634787637316e-06, "loss": 0.3289, "step": 16511 }, { "epoch": 0.76, "grad_norm": 0.9505693483730848, "learning_rate": 2.9052992082561314e-06, "loss": 0.4571, "step": 16512 }, { "epoch": 0.76, "grad_norm": 0.4324496263522261, "learning_rate": 2.904250697208937e-06, "loss": 0.2794, "step": 16513 }, { "epoch": 0.76, "grad_norm": 0.20934092649358407, "learning_rate": 2.9032023432547927e-06, "loss": 0.2024, "step": 16514 }, { "epoch": 0.76, "grad_norm": 0.9657513927505617, "learning_rate": 2.90215414641691e-06, "loss": 0.2842, "step": 16515 }, { "epoch": 0.76, "grad_norm": 0.4383857468575562, "learning_rate": 2.9011061067184952e-06, "loss": 0.2624, "step": 16516 }, { "epoch": 0.76, "grad_norm": 0.42076545986855673, "learning_rate": 2.9000582241827504e-06, "loss": 0.315, "step": 16517 }, { "epoch": 0.76, "grad_norm": 0.5367414523551842, "learning_rate": 2.899010498832866e-06, "loss": 0.3862, "step": 16518 }, { "epoch": 0.76, "grad_norm": 0.3453438563422978, "learning_rate": 2.897962930692052e-06, "loss": 0.218, "step": 16519 }, { "epoch": 0.76, "grad_norm": 0.3296601959703274, "learning_rate": 2.896915519783491e-06, "loss": 0.1494, "step": 16520 }, { "epoch": 0.76, "grad_norm": 0.5211773041965411, "learning_rate": 2.8958682661303774e-06, "loss": 0.2994, "step": 16521 }, { "epoch": 0.76, "grad_norm": 0.39868423446807105, "learning_rate": 2.894821169755889e-06, "loss": 0.2115, "step": 16522 }, { "epoch": 0.76, "grad_norm": 1.4668791941985382, "learning_rate": 2.893774230683213e-06, "loss": 0.5821, "step": 16523 }, { "epoch": 0.76, "grad_norm": 1.2660858894239166, "learning_rate": 2.8927274489355296e-06, "loss": 0.7786, "step": 16524 }, { "epoch": 0.76, "grad_norm": 0.36184081604962254, "learning_rate": 2.891680824536007e-06, "loss": 0.2014, "step": 16525 }, { "epoch": 0.76, "grad_norm": 0.338881300278615, "learning_rate": 2.89063435750782e-06, "loss": 0.2974, "step": 16526 }, { "epoch": 0.76, "grad_norm": 0.3828896905278202, "learning_rate": 2.8895880478741357e-06, "loss": 0.2425, "step": 16527 }, { "epoch": 0.76, "grad_norm": 0.4433167756198432, "learning_rate": 2.8885418956581226e-06, "loss": 0.1754, "step": 16528 }, { "epoch": 0.76, "grad_norm": 0.40868910817760534, "learning_rate": 2.8874959008829372e-06, "loss": 0.2973, "step": 16529 }, { "epoch": 0.76, "grad_norm": 0.5422769469302408, "learning_rate": 2.886450063571735e-06, "loss": 0.3736, "step": 16530 }, { "epoch": 0.76, "grad_norm": 0.6031179373067898, "learning_rate": 2.885404383747672e-06, "loss": 0.3471, "step": 16531 }, { "epoch": 0.76, "grad_norm": 0.43487003035821237, "learning_rate": 2.884358861433899e-06, "loss": 0.251, "step": 16532 }, { "epoch": 0.76, "grad_norm": 0.34554071098732797, "learning_rate": 2.8833134966535658e-06, "loss": 0.2558, "step": 16533 }, { "epoch": 0.76, "grad_norm": 0.3068776341201295, "learning_rate": 2.8822682894298095e-06, "loss": 0.1981, "step": 16534 }, { "epoch": 0.76, "grad_norm": 0.45438020977869265, "learning_rate": 2.881223239785772e-06, "loss": 0.2251, "step": 16535 }, { "epoch": 0.76, "grad_norm": 1.306898103716611, "learning_rate": 2.8801783477445956e-06, "loss": 0.7305, "step": 16536 }, { "epoch": 0.76, "grad_norm": 0.3188042951860328, "learning_rate": 2.8791336133294047e-06, "loss": 0.268, "step": 16537 }, { "epoch": 0.76, "grad_norm": 0.37195990488388075, "learning_rate": 2.878089036563335e-06, "loss": 0.2363, "step": 16538 }, { "epoch": 0.76, "grad_norm": 0.5621084973668745, "learning_rate": 2.8770446174695067e-06, "loss": 0.2931, "step": 16539 }, { "epoch": 0.76, "grad_norm": 0.32366480160379035, "learning_rate": 2.876000356071046e-06, "loss": 0.2278, "step": 16540 }, { "epoch": 0.76, "grad_norm": 0.4375931423443274, "learning_rate": 2.8749562523910744e-06, "loss": 0.2405, "step": 16541 }, { "epoch": 0.76, "grad_norm": 0.5513827025180599, "learning_rate": 2.8739123064527007e-06, "loss": 0.3727, "step": 16542 }, { "epoch": 0.76, "grad_norm": 0.5655571202289628, "learning_rate": 2.872868518279044e-06, "loss": 0.3014, "step": 16543 }, { "epoch": 0.76, "grad_norm": 0.43001945437126393, "learning_rate": 2.871824887893202e-06, "loss": 0.3054, "step": 16544 }, { "epoch": 0.76, "grad_norm": 0.3616648253108674, "learning_rate": 2.8707814153182935e-06, "loss": 0.26, "step": 16545 }, { "epoch": 0.76, "grad_norm": 0.2648527893966305, "learning_rate": 2.8697381005774126e-06, "loss": 0.1399, "step": 16546 }, { "epoch": 0.76, "grad_norm": 0.4187983498661776, "learning_rate": 2.868694943693655e-06, "loss": 0.2603, "step": 16547 }, { "epoch": 0.76, "grad_norm": 0.778209187746288, "learning_rate": 2.8676519446901187e-06, "loss": 0.321, "step": 16548 }, { "epoch": 0.76, "grad_norm": 0.30508090710873276, "learning_rate": 2.8666091035898935e-06, "loss": 0.2364, "step": 16549 }, { "epoch": 0.76, "grad_norm": 0.4004046834903637, "learning_rate": 2.8655664204160718e-06, "loss": 0.3095, "step": 16550 }, { "epoch": 0.76, "grad_norm": 1.4446799760631164, "learning_rate": 2.8645238951917287e-06, "loss": 0.278, "step": 16551 }, { "epoch": 0.76, "grad_norm": 0.2633997565884859, "learning_rate": 2.8634815279399497e-06, "loss": 0.1412, "step": 16552 }, { "epoch": 0.76, "grad_norm": 0.2844974422348613, "learning_rate": 2.8624393186838152e-06, "loss": 0.2618, "step": 16553 }, { "epoch": 0.76, "grad_norm": 1.0553809986614044, "learning_rate": 2.8613972674463908e-06, "loss": 0.3193, "step": 16554 }, { "epoch": 0.76, "grad_norm": 0.5527285769856833, "learning_rate": 2.860355374250755e-06, "loss": 0.304, "step": 16555 }, { "epoch": 0.76, "grad_norm": 0.40881719124444355, "learning_rate": 2.859313639119966e-06, "loss": 0.2762, "step": 16556 }, { "epoch": 0.76, "grad_norm": 0.3851466268295038, "learning_rate": 2.858272062077091e-06, "loss": 0.3088, "step": 16557 }, { "epoch": 0.76, "grad_norm": 0.1585462992668782, "learning_rate": 2.8572306431451914e-06, "loss": 0.0724, "step": 16558 }, { "epoch": 0.76, "grad_norm": 0.43936903534590815, "learning_rate": 2.8561893823473188e-06, "loss": 0.2858, "step": 16559 }, { "epoch": 0.76, "grad_norm": 0.9961653666244219, "learning_rate": 2.8551482797065312e-06, "loss": 0.3809, "step": 16560 }, { "epoch": 0.76, "grad_norm": 0.33979836562281085, "learning_rate": 2.854107335245868e-06, "loss": 0.2321, "step": 16561 }, { "epoch": 0.76, "grad_norm": 0.4472726076861613, "learning_rate": 2.8530665489883867e-06, "loss": 0.3363, "step": 16562 }, { "epoch": 0.76, "grad_norm": 1.5969552984003739, "learning_rate": 2.8520259209571222e-06, "loss": 0.6396, "step": 16563 }, { "epoch": 0.76, "grad_norm": 0.2401999395175981, "learning_rate": 2.8509854511751166e-06, "loss": 0.0805, "step": 16564 }, { "epoch": 0.76, "grad_norm": 0.262943268238002, "learning_rate": 2.8499451396654e-06, "loss": 0.253, "step": 16565 }, { "epoch": 0.76, "grad_norm": 0.8429604173971407, "learning_rate": 2.8489049864510053e-06, "loss": 0.3883, "step": 16566 }, { "epoch": 0.76, "grad_norm": 0.6585996025071122, "learning_rate": 2.8478649915549663e-06, "loss": 0.2605, "step": 16567 }, { "epoch": 0.76, "grad_norm": 0.35127952965274456, "learning_rate": 2.8468251550003e-06, "loss": 0.2611, "step": 16568 }, { "epoch": 0.76, "grad_norm": 0.3650854904187938, "learning_rate": 2.84578547681003e-06, "loss": 0.2928, "step": 16569 }, { "epoch": 0.76, "grad_norm": 0.4908636684855845, "learning_rate": 2.844745957007178e-06, "loss": 0.1911, "step": 16570 }, { "epoch": 0.76, "grad_norm": 0.2843779950599411, "learning_rate": 2.84370659561475e-06, "loss": 0.1772, "step": 16571 }, { "epoch": 0.76, "grad_norm": 0.6646684203090293, "learning_rate": 2.8426673926557646e-06, "loss": 0.3829, "step": 16572 }, { "epoch": 0.76, "grad_norm": 0.3874664238666174, "learning_rate": 2.8416283481532214e-06, "loss": 0.2762, "step": 16573 }, { "epoch": 0.76, "grad_norm": 0.3544642621591675, "learning_rate": 2.8405894621301276e-06, "loss": 0.214, "step": 16574 }, { "epoch": 0.76, "grad_norm": 1.2150027008016253, "learning_rate": 2.839550734609485e-06, "loss": 0.4971, "step": 16575 }, { "epoch": 0.76, "grad_norm": 0.29534037613152425, "learning_rate": 2.8385121656142856e-06, "loss": 0.2098, "step": 16576 }, { "epoch": 0.76, "grad_norm": 0.2988305251242387, "learning_rate": 2.837473755167528e-06, "loss": 0.2174, "step": 16577 }, { "epoch": 0.76, "grad_norm": 0.45515509273025667, "learning_rate": 2.836435503292191e-06, "loss": 0.2732, "step": 16578 }, { "epoch": 0.76, "grad_norm": 0.7282177041243947, "learning_rate": 2.8353974100112737e-06, "loss": 0.4261, "step": 16579 }, { "epoch": 0.76, "grad_norm": 0.36821536275426064, "learning_rate": 2.8343594753477498e-06, "loss": 0.2831, "step": 16580 }, { "epoch": 0.76, "grad_norm": 0.3698198260820381, "learning_rate": 2.833321699324604e-06, "loss": 0.2307, "step": 16581 }, { "epoch": 0.76, "grad_norm": 0.46603132521891044, "learning_rate": 2.832284081964806e-06, "loss": 0.2342, "step": 16582 }, { "epoch": 0.76, "grad_norm": 0.35819382942409606, "learning_rate": 2.8312466232913284e-06, "loss": 0.2452, "step": 16583 }, { "epoch": 0.76, "grad_norm": 0.3257399878017025, "learning_rate": 2.8302093233271454e-06, "loss": 0.2357, "step": 16584 }, { "epoch": 0.76, "grad_norm": 0.5048493698585362, "learning_rate": 2.8291721820952146e-06, "loss": 0.3577, "step": 16585 }, { "epoch": 0.76, "grad_norm": 0.3421487850399439, "learning_rate": 2.828135199618499e-06, "loss": 0.2613, "step": 16586 }, { "epoch": 0.76, "grad_norm": 1.3481064742908084, "learning_rate": 2.827098375919962e-06, "loss": 0.1767, "step": 16587 }, { "epoch": 0.76, "grad_norm": 0.4774937426962428, "learning_rate": 2.8260617110225506e-06, "loss": 0.3301, "step": 16588 }, { "epoch": 0.76, "grad_norm": 0.2966956709622009, "learning_rate": 2.825025204949222e-06, "loss": 0.2452, "step": 16589 }, { "epoch": 0.76, "grad_norm": 0.4947360398641383, "learning_rate": 2.8239888577229156e-06, "loss": 0.1834, "step": 16590 }, { "epoch": 0.76, "grad_norm": 0.7870808134103285, "learning_rate": 2.822952669366581e-06, "loss": 0.4666, "step": 16591 }, { "epoch": 0.76, "grad_norm": 0.2989686619789008, "learning_rate": 2.821916639903156e-06, "loss": 0.233, "step": 16592 }, { "epoch": 0.76, "grad_norm": 0.4481966754326238, "learning_rate": 2.820880769355582e-06, "loss": 0.3085, "step": 16593 }, { "epoch": 0.76, "grad_norm": 0.6442032863755881, "learning_rate": 2.8198450577467882e-06, "loss": 0.267, "step": 16594 }, { "epoch": 0.76, "grad_norm": 0.39940322690931307, "learning_rate": 2.8188095050996976e-06, "loss": 0.2648, "step": 16595 }, { "epoch": 0.76, "grad_norm": 0.4696527092936459, "learning_rate": 2.8177741114372504e-06, "loss": 0.2955, "step": 16596 }, { "epoch": 0.76, "grad_norm": 0.3552202605287625, "learning_rate": 2.816738876782359e-06, "loss": 0.2597, "step": 16597 }, { "epoch": 0.76, "grad_norm": 0.3555467813028991, "learning_rate": 2.8157038011579485e-06, "loss": 0.2357, "step": 16598 }, { "epoch": 0.76, "grad_norm": 0.7354285436811204, "learning_rate": 2.8146688845869287e-06, "loss": 0.2673, "step": 16599 }, { "epoch": 0.76, "grad_norm": 0.5202925517063207, "learning_rate": 2.813634127092213e-06, "loss": 0.272, "step": 16600 }, { "epoch": 0.76, "grad_norm": 0.30189947934203837, "learning_rate": 2.8125995286967155e-06, "loss": 0.2372, "step": 16601 }, { "epoch": 0.76, "grad_norm": 1.005801220163093, "learning_rate": 2.811565089423335e-06, "loss": 0.4833, "step": 16602 }, { "epoch": 0.76, "grad_norm": 0.29886796492084367, "learning_rate": 2.810530809294977e-06, "loss": 0.1424, "step": 16603 }, { "epoch": 0.76, "grad_norm": 0.2565281610730216, "learning_rate": 2.809496688334534e-06, "loss": 0.2009, "step": 16604 }, { "epoch": 0.76, "grad_norm": 0.3712917777889735, "learning_rate": 2.8084627265649057e-06, "loss": 0.2994, "step": 16605 }, { "epoch": 0.76, "grad_norm": 0.665379295913935, "learning_rate": 2.8074289240089835e-06, "loss": 0.347, "step": 16606 }, { "epoch": 0.76, "grad_norm": 0.357936723707498, "learning_rate": 2.806395280689649e-06, "loss": 0.1877, "step": 16607 }, { "epoch": 0.76, "grad_norm": 1.356453048193744, "learning_rate": 2.8053617966297908e-06, "loss": 0.4811, "step": 16608 }, { "epoch": 0.76, "grad_norm": 0.3788025160831487, "learning_rate": 2.804328471852288e-06, "loss": 0.3244, "step": 16609 }, { "epoch": 0.76, "grad_norm": 0.3097997971083851, "learning_rate": 2.8032953063800192e-06, "loss": 0.1827, "step": 16610 }, { "epoch": 0.76, "grad_norm": 0.32356151705666597, "learning_rate": 2.8022623002358575e-06, "loss": 0.1908, "step": 16611 }, { "epoch": 0.76, "grad_norm": 0.3594711604917871, "learning_rate": 2.8012294534426645e-06, "loss": 0.2887, "step": 16612 }, { "epoch": 0.76, "grad_norm": 0.3288007572383773, "learning_rate": 2.80019676602332e-06, "loss": 0.189, "step": 16613 }, { "epoch": 0.76, "grad_norm": 1.3344141207946567, "learning_rate": 2.7991642380006754e-06, "loss": 0.492, "step": 16614 }, { "epoch": 0.76, "grad_norm": 0.43172421875337313, "learning_rate": 2.7981318693975988e-06, "loss": 0.2544, "step": 16615 }, { "epoch": 0.76, "grad_norm": 0.3053720627459669, "learning_rate": 2.797099660236937e-06, "loss": 0.1848, "step": 16616 }, { "epoch": 0.76, "grad_norm": 0.28341131538950487, "learning_rate": 2.7960676105415474e-06, "loss": 0.2485, "step": 16617 }, { "epoch": 0.76, "grad_norm": 0.8107387239783755, "learning_rate": 2.79503572033428e-06, "loss": 0.3569, "step": 16618 }, { "epoch": 0.76, "grad_norm": 0.42585142859728004, "learning_rate": 2.7940039896379757e-06, "loss": 0.2673, "step": 16619 }, { "epoch": 0.76, "grad_norm": 0.334215290189977, "learning_rate": 2.79297241847548e-06, "loss": 0.246, "step": 16620 }, { "epoch": 0.76, "grad_norm": 1.3968604133136437, "learning_rate": 2.791941006869626e-06, "loss": 0.8431, "step": 16621 }, { "epoch": 0.76, "grad_norm": 0.46752884394421024, "learning_rate": 2.790909754843251e-06, "loss": 0.2392, "step": 16622 }, { "epoch": 0.76, "grad_norm": 0.4654964680462339, "learning_rate": 2.7898786624191878e-06, "loss": 0.2599, "step": 16623 }, { "epoch": 0.76, "grad_norm": 0.2608099775844682, "learning_rate": 2.78884772962026e-06, "loss": 0.2181, "step": 16624 }, { "epoch": 0.76, "grad_norm": 0.3219723620114302, "learning_rate": 2.7878169564692926e-06, "loss": 0.2487, "step": 16625 }, { "epoch": 0.76, "grad_norm": 1.734205897890928, "learning_rate": 2.786786342989106e-06, "loss": 0.2772, "step": 16626 }, { "epoch": 0.76, "grad_norm": 0.9248608356272644, "learning_rate": 2.7857558892025227e-06, "loss": 0.5727, "step": 16627 }, { "epoch": 0.76, "grad_norm": 0.37553281069516226, "learning_rate": 2.784725595132346e-06, "loss": 0.2349, "step": 16628 }, { "epoch": 0.76, "grad_norm": 0.35762631415032536, "learning_rate": 2.783695460801391e-06, "loss": 0.2438, "step": 16629 }, { "epoch": 0.76, "grad_norm": 0.3758488152278506, "learning_rate": 2.7826654862324665e-06, "loss": 0.1943, "step": 16630 }, { "epoch": 0.76, "grad_norm": 0.7231118314822784, "learning_rate": 2.7816356714483685e-06, "loss": 0.305, "step": 16631 }, { "epoch": 0.76, "grad_norm": 0.31842760778053686, "learning_rate": 2.7806060164719027e-06, "loss": 0.2604, "step": 16632 }, { "epoch": 0.76, "grad_norm": 0.5101585576180016, "learning_rate": 2.7795765213258585e-06, "loss": 0.2823, "step": 16633 }, { "epoch": 0.76, "grad_norm": 0.5009212629755045, "learning_rate": 2.7785471860330313e-06, "loss": 0.2698, "step": 16634 }, { "epoch": 0.76, "grad_norm": 0.48819682283973637, "learning_rate": 2.7775180106162126e-06, "loss": 0.3425, "step": 16635 }, { "epoch": 0.76, "grad_norm": 0.2514496933622202, "learning_rate": 2.776488995098181e-06, "loss": 0.1901, "step": 16636 }, { "epoch": 0.76, "grad_norm": 0.3129090758384421, "learning_rate": 2.7754601395017233e-06, "loss": 0.1944, "step": 16637 }, { "epoch": 0.76, "grad_norm": 0.5758389032633211, "learning_rate": 2.774431443849609e-06, "loss": 0.3476, "step": 16638 }, { "epoch": 0.76, "grad_norm": 0.8367311955566802, "learning_rate": 2.773402908164625e-06, "loss": 0.3065, "step": 16639 }, { "epoch": 0.76, "grad_norm": 0.3082929500626861, "learning_rate": 2.772374532469535e-06, "loss": 0.2377, "step": 16640 }, { "epoch": 0.76, "grad_norm": 0.3662318333441316, "learning_rate": 2.7713463167871036e-06, "loss": 0.2848, "step": 16641 }, { "epoch": 0.76, "grad_norm": 0.2296830271231695, "learning_rate": 2.770318261140098e-06, "loss": 0.0825, "step": 16642 }, { "epoch": 0.76, "grad_norm": 0.4095104529086831, "learning_rate": 2.7692903655512783e-06, "loss": 0.2569, "step": 16643 }, { "epoch": 0.76, "grad_norm": 0.5750583955930172, "learning_rate": 2.7682626300434037e-06, "loss": 0.3812, "step": 16644 }, { "epoch": 0.76, "grad_norm": 0.3880356314569871, "learning_rate": 2.7672350546392213e-06, "loss": 0.3098, "step": 16645 }, { "epoch": 0.76, "grad_norm": 0.35144556799420035, "learning_rate": 2.7662076393614846e-06, "loss": 0.2156, "step": 16646 }, { "epoch": 0.76, "grad_norm": 1.4221263041241423, "learning_rate": 2.7651803842329406e-06, "loss": 0.605, "step": 16647 }, { "epoch": 0.76, "grad_norm": 0.2959753898217581, "learning_rate": 2.7641532892763267e-06, "loss": 0.2371, "step": 16648 }, { "epoch": 0.76, "grad_norm": 0.3105532073877698, "learning_rate": 2.7631263545143895e-06, "loss": 0.0878, "step": 16649 }, { "epoch": 0.76, "grad_norm": 0.4237932618064866, "learning_rate": 2.7620995799698557e-06, "loss": 0.3003, "step": 16650 }, { "epoch": 0.76, "grad_norm": 0.6064180142677038, "learning_rate": 2.761072965665461e-06, "loss": 0.376, "step": 16651 }, { "epoch": 0.76, "grad_norm": 0.26896087832692145, "learning_rate": 2.7600465116239373e-06, "loss": 0.1809, "step": 16652 }, { "epoch": 0.77, "grad_norm": 0.4731513670379963, "learning_rate": 2.7590202178680035e-06, "loss": 0.2827, "step": 16653 }, { "epoch": 0.77, "grad_norm": 0.48030946361538474, "learning_rate": 2.7579940844203857e-06, "loss": 0.2337, "step": 16654 }, { "epoch": 0.77, "grad_norm": 0.2533427280532958, "learning_rate": 2.7569681113037914e-06, "loss": 0.1315, "step": 16655 }, { "epoch": 0.77, "grad_norm": 0.31692548136498766, "learning_rate": 2.75594229854095e-06, "loss": 0.2913, "step": 16656 }, { "epoch": 0.77, "grad_norm": 0.6914481942381092, "learning_rate": 2.7549166461545608e-06, "loss": 0.3203, "step": 16657 }, { "epoch": 0.77, "grad_norm": 0.5499335805219313, "learning_rate": 2.7538911541673376e-06, "loss": 0.3347, "step": 16658 }, { "epoch": 0.77, "grad_norm": 0.4419902387599507, "learning_rate": 2.752865822601977e-06, "loss": 0.221, "step": 16659 }, { "epoch": 0.77, "grad_norm": 0.41338232842479383, "learning_rate": 2.7518406514811815e-06, "loss": 0.2956, "step": 16660 }, { "epoch": 0.77, "grad_norm": 0.2844506904536113, "learning_rate": 2.750815640827652e-06, "loss": 0.1498, "step": 16661 }, { "epoch": 0.77, "grad_norm": 0.40864717677829043, "learning_rate": 2.749790790664074e-06, "loss": 0.2411, "step": 16662 }, { "epoch": 0.77, "grad_norm": 0.6833746519196006, "learning_rate": 2.748766101013143e-06, "loss": 0.4332, "step": 16663 }, { "epoch": 0.77, "grad_norm": 0.30424384838326585, "learning_rate": 2.7477415718975387e-06, "loss": 0.274, "step": 16664 }, { "epoch": 0.77, "grad_norm": 0.4688709404107074, "learning_rate": 2.746717203339946e-06, "loss": 0.2287, "step": 16665 }, { "epoch": 0.77, "grad_norm": 0.3984056091544771, "learning_rate": 2.745692995363047e-06, "loss": 0.2095, "step": 16666 }, { "epoch": 0.77, "grad_norm": 0.3516878251970094, "learning_rate": 2.7446689479895105e-06, "loss": 0.1843, "step": 16667 }, { "epoch": 0.77, "grad_norm": 0.25471891974360444, "learning_rate": 2.7436450612420098e-06, "loss": 0.2263, "step": 16668 }, { "epoch": 0.77, "grad_norm": 0.6999031771674309, "learning_rate": 2.7426213351432174e-06, "loss": 0.4218, "step": 16669 }, { "epoch": 0.77, "grad_norm": 0.9338050303878667, "learning_rate": 2.7415977697157903e-06, "loss": 0.4605, "step": 16670 }, { "epoch": 0.77, "grad_norm": 0.4134699646373108, "learning_rate": 2.7405743649823967e-06, "loss": 0.2669, "step": 16671 }, { "epoch": 0.77, "grad_norm": 0.4063789576578708, "learning_rate": 2.7395511209656833e-06, "loss": 0.252, "step": 16672 }, { "epoch": 0.77, "grad_norm": 0.39139710941021477, "learning_rate": 2.738528037688316e-06, "loss": 0.2196, "step": 16673 }, { "epoch": 0.77, "grad_norm": 0.3749867210685117, "learning_rate": 2.737505115172937e-06, "loss": 0.2817, "step": 16674 }, { "epoch": 0.77, "grad_norm": 0.854438367201808, "learning_rate": 2.736482353442198e-06, "loss": 0.3603, "step": 16675 }, { "epoch": 0.77, "grad_norm": 0.30159227535861494, "learning_rate": 2.7354597525187365e-06, "loss": 0.2649, "step": 16676 }, { "epoch": 0.77, "grad_norm": 0.33685320228283155, "learning_rate": 2.7344373124251934e-06, "loss": 0.2341, "step": 16677 }, { "epoch": 0.77, "grad_norm": 1.570008510523658, "learning_rate": 2.733415033184209e-06, "loss": 0.237, "step": 16678 }, { "epoch": 0.77, "grad_norm": 0.5498174152184994, "learning_rate": 2.7323929148184094e-06, "loss": 0.2974, "step": 16679 }, { "epoch": 0.77, "grad_norm": 0.3442415737565152, "learning_rate": 2.7313709573504288e-06, "loss": 0.2671, "step": 16680 }, { "epoch": 0.77, "grad_norm": 0.3065745704001493, "learning_rate": 2.7303491608028864e-06, "loss": 0.2003, "step": 16681 }, { "epoch": 0.77, "grad_norm": 0.32807840181264986, "learning_rate": 2.7293275251984074e-06, "loss": 0.181, "step": 16682 }, { "epoch": 0.77, "grad_norm": 0.45344050212510834, "learning_rate": 2.7283060505596126e-06, "loss": 0.3201, "step": 16683 }, { "epoch": 0.77, "grad_norm": 0.40792247051837116, "learning_rate": 2.7272847369091093e-06, "loss": 0.318, "step": 16684 }, { "epoch": 0.77, "grad_norm": 0.6731072566515036, "learning_rate": 2.726263584269513e-06, "loss": 0.2704, "step": 16685 }, { "epoch": 0.77, "grad_norm": 0.3259355884528992, "learning_rate": 2.725242592663434e-06, "loss": 0.2422, "step": 16686 }, { "epoch": 0.77, "grad_norm": 0.4702929102769375, "learning_rate": 2.724221762113468e-06, "loss": 0.3512, "step": 16687 }, { "epoch": 0.77, "grad_norm": 0.2675273269647348, "learning_rate": 2.7232010926422235e-06, "loss": 0.1697, "step": 16688 }, { "epoch": 0.77, "grad_norm": 0.2916902860540976, "learning_rate": 2.7221805842722883e-06, "loss": 0.1984, "step": 16689 }, { "epoch": 0.77, "grad_norm": 1.0363499183287534, "learning_rate": 2.7211602370262656e-06, "loss": 0.4611, "step": 16690 }, { "epoch": 0.77, "grad_norm": 0.42280121707017404, "learning_rate": 2.7201400509267373e-06, "loss": 0.2531, "step": 16691 }, { "epoch": 0.77, "grad_norm": 0.3186983401970785, "learning_rate": 2.7191200259962938e-06, "loss": 0.2721, "step": 16692 }, { "epoch": 0.77, "grad_norm": 1.3277241274196392, "learning_rate": 2.718100162257513e-06, "loss": 0.6723, "step": 16693 }, { "epoch": 0.77, "grad_norm": 0.3168171624385611, "learning_rate": 2.717080459732977e-06, "loss": 0.1914, "step": 16694 }, { "epoch": 0.77, "grad_norm": 0.382763041741232, "learning_rate": 2.7160609184452624e-06, "loss": 0.2013, "step": 16695 }, { "epoch": 0.77, "grad_norm": 0.35890682633987075, "learning_rate": 2.715041538416936e-06, "loss": 0.3125, "step": 16696 }, { "epoch": 0.77, "grad_norm": 0.5623659516809043, "learning_rate": 2.7140223196705718e-06, "loss": 0.3373, "step": 16697 }, { "epoch": 0.77, "grad_norm": 0.33339247750900636, "learning_rate": 2.713003262228727e-06, "loss": 0.1911, "step": 16698 }, { "epoch": 0.77, "grad_norm": 1.0844939041607131, "learning_rate": 2.7119843661139677e-06, "loss": 0.6947, "step": 16699 }, { "epoch": 0.77, "grad_norm": 0.2811720132306387, "learning_rate": 2.710965631348853e-06, "loss": 0.229, "step": 16700 }, { "epoch": 0.77, "grad_norm": 0.23936624313079055, "learning_rate": 2.7099470579559317e-06, "loss": 0.167, "step": 16701 }, { "epoch": 0.77, "grad_norm": 0.6169389169633458, "learning_rate": 2.708928645957756e-06, "loss": 0.3691, "step": 16702 }, { "epoch": 0.77, "grad_norm": 0.361309840440058, "learning_rate": 2.7079103953768725e-06, "loss": 0.3012, "step": 16703 }, { "epoch": 0.77, "grad_norm": 0.3218830754681622, "learning_rate": 2.7068923062358276e-06, "loss": 0.1881, "step": 16704 }, { "epoch": 0.77, "grad_norm": 1.3702658836215624, "learning_rate": 2.7058743785571573e-06, "loss": 0.8038, "step": 16705 }, { "epoch": 0.77, "grad_norm": 0.49255027363258974, "learning_rate": 2.7048566123633935e-06, "loss": 0.2612, "step": 16706 }, { "epoch": 0.77, "grad_norm": 0.2665275606448107, "learning_rate": 2.7038390076770783e-06, "loss": 0.2084, "step": 16707 }, { "epoch": 0.77, "grad_norm": 0.37230596275630823, "learning_rate": 2.702821564520732e-06, "loss": 0.2515, "step": 16708 }, { "epoch": 0.77, "grad_norm": 0.9665570766730617, "learning_rate": 2.7018042829168867e-06, "loss": 0.4389, "step": 16709 }, { "epoch": 0.77, "grad_norm": 0.33355686024761394, "learning_rate": 2.7007871628880567e-06, "loss": 0.2382, "step": 16710 }, { "epoch": 0.77, "grad_norm": 1.2250471208907658, "learning_rate": 2.6997702044567654e-06, "loss": 0.5396, "step": 16711 }, { "epoch": 0.77, "grad_norm": 0.31793069580916333, "learning_rate": 2.6987534076455267e-06, "loss": 0.242, "step": 16712 }, { "epoch": 0.77, "grad_norm": 0.34769197644072003, "learning_rate": 2.6977367724768486e-06, "loss": 0.2647, "step": 16713 }, { "epoch": 0.77, "grad_norm": 0.4580150002971441, "learning_rate": 2.6967202989732443e-06, "loss": 0.1866, "step": 16714 }, { "epoch": 0.77, "grad_norm": 0.36281873438293033, "learning_rate": 2.695703987157209e-06, "loss": 0.2809, "step": 16715 }, { "epoch": 0.77, "grad_norm": 0.4003142238087575, "learning_rate": 2.6946878370512485e-06, "loss": 0.267, "step": 16716 }, { "epoch": 0.77, "grad_norm": 0.5918643598493024, "learning_rate": 2.693671848677861e-06, "loss": 0.2926, "step": 16717 }, { "epoch": 0.77, "grad_norm": 0.8692842552197624, "learning_rate": 2.6926560220595333e-06, "loss": 0.3715, "step": 16718 }, { "epoch": 0.77, "grad_norm": 0.4122053278070936, "learning_rate": 2.691640357218759e-06, "loss": 0.2936, "step": 16719 }, { "epoch": 0.77, "grad_norm": 0.38337913227734677, "learning_rate": 2.6906248541780233e-06, "loss": 0.2998, "step": 16720 }, { "epoch": 0.77, "grad_norm": 0.22519551808218163, "learning_rate": 2.689609512959811e-06, "loss": 0.0687, "step": 16721 }, { "epoch": 0.77, "grad_norm": 0.3362320913505971, "learning_rate": 2.6885943335865962e-06, "loss": 0.2512, "step": 16722 }, { "epoch": 0.77, "grad_norm": 0.5268567130523985, "learning_rate": 2.6875793160808584e-06, "loss": 0.3913, "step": 16723 }, { "epoch": 0.77, "grad_norm": 0.474324344012607, "learning_rate": 2.686564460465063e-06, "loss": 0.2438, "step": 16724 }, { "epoch": 0.77, "grad_norm": 0.35752894287736386, "learning_rate": 2.6855497667616824e-06, "loss": 0.2663, "step": 16725 }, { "epoch": 0.77, "grad_norm": 0.32089442098345655, "learning_rate": 2.684535234993183e-06, "loss": 0.1985, "step": 16726 }, { "epoch": 0.77, "grad_norm": 0.38417528649390204, "learning_rate": 2.6835208651820198e-06, "loss": 0.2419, "step": 16727 }, { "epoch": 0.77, "grad_norm": 0.35323582924758223, "learning_rate": 2.6825066573506543e-06, "loss": 0.26, "step": 16728 }, { "epoch": 0.77, "grad_norm": 1.348758012229431, "learning_rate": 2.68149261152154e-06, "loss": 0.7943, "step": 16729 }, { "epoch": 0.77, "grad_norm": 0.6751072821756354, "learning_rate": 2.680478727717123e-06, "loss": 0.2947, "step": 16730 }, { "epoch": 0.77, "grad_norm": 0.29463980600617223, "learning_rate": 2.679465005959856e-06, "loss": 0.2437, "step": 16731 }, { "epoch": 0.77, "grad_norm": 0.5413395913268767, "learning_rate": 2.6784514462721713e-06, "loss": 0.3311, "step": 16732 }, { "epoch": 0.77, "grad_norm": 0.33764951866060244, "learning_rate": 2.6774380486765205e-06, "loss": 0.1694, "step": 16733 }, { "epoch": 0.77, "grad_norm": 0.40874906671519173, "learning_rate": 2.676424813195335e-06, "loss": 0.1772, "step": 16734 }, { "epoch": 0.77, "grad_norm": 0.41951171391912384, "learning_rate": 2.6754117398510417e-06, "loss": 0.3384, "step": 16735 }, { "epoch": 0.77, "grad_norm": 0.5044259299138488, "learning_rate": 2.674398828666074e-06, "loss": 0.3566, "step": 16736 }, { "epoch": 0.77, "grad_norm": 0.42734949853882725, "learning_rate": 2.6733860796628542e-06, "loss": 0.1977, "step": 16737 }, { "epoch": 0.77, "grad_norm": 0.7110794778170559, "learning_rate": 2.672373492863809e-06, "loss": 0.3281, "step": 16738 }, { "epoch": 0.77, "grad_norm": 0.2542412579935446, "learning_rate": 2.6713610682913494e-06, "loss": 0.2153, "step": 16739 }, { "epoch": 0.77, "grad_norm": 0.292968086213938, "learning_rate": 2.6703488059678952e-06, "loss": 0.1757, "step": 16740 }, { "epoch": 0.77, "grad_norm": 0.947033983372503, "learning_rate": 2.6693367059158515e-06, "loss": 0.5632, "step": 16741 }, { "epoch": 0.77, "grad_norm": 0.7288273209144358, "learning_rate": 2.6683247681576265e-06, "loss": 0.3812, "step": 16742 }, { "epoch": 0.77, "grad_norm": 0.2851944491258738, "learning_rate": 2.6673129927156285e-06, "loss": 0.2111, "step": 16743 }, { "epoch": 0.77, "grad_norm": 0.5491525185607987, "learning_rate": 2.6663013796122505e-06, "loss": 0.3089, "step": 16744 }, { "epoch": 0.77, "grad_norm": 0.310065639023231, "learning_rate": 2.665289928869892e-06, "loss": 0.2087, "step": 16745 }, { "epoch": 0.77, "grad_norm": 0.40509558450595917, "learning_rate": 2.6642786405109477e-06, "loss": 0.2831, "step": 16746 }, { "epoch": 0.77, "grad_norm": 0.3949452787414176, "learning_rate": 2.6632675145578e-06, "loss": 0.2471, "step": 16747 }, { "epoch": 0.77, "grad_norm": 0.45650040969671, "learning_rate": 2.6622565510328436e-06, "loss": 0.3358, "step": 16748 }, { "epoch": 0.77, "grad_norm": 0.331421059659242, "learning_rate": 2.6612457499584477e-06, "loss": 0.2487, "step": 16749 }, { "epoch": 0.77, "grad_norm": 1.4135166675940847, "learning_rate": 2.6602351113570036e-06, "loss": 0.2702, "step": 16750 }, { "epoch": 0.77, "grad_norm": 0.2313632774435852, "learning_rate": 2.6592246352508767e-06, "loss": 0.2019, "step": 16751 }, { "epoch": 0.77, "grad_norm": 0.4468857926201969, "learning_rate": 2.6582143216624445e-06, "loss": 0.2935, "step": 16752 }, { "epoch": 0.77, "grad_norm": 0.4943364869972546, "learning_rate": 2.6572041706140682e-06, "loss": 0.2753, "step": 16753 }, { "epoch": 0.77, "grad_norm": 0.7067609635089999, "learning_rate": 2.6561941821281145e-06, "loss": 0.4327, "step": 16754 }, { "epoch": 0.77, "grad_norm": 0.3827729668520749, "learning_rate": 2.6551843562269477e-06, "loss": 0.2779, "step": 16755 }, { "epoch": 0.77, "grad_norm": 0.32694520773190744, "learning_rate": 2.6541746929329158e-06, "loss": 0.2354, "step": 16756 }, { "epoch": 0.77, "grad_norm": 0.314747302287425, "learning_rate": 2.65316519226838e-06, "loss": 0.1601, "step": 16757 }, { "epoch": 0.77, "grad_norm": 0.5691618930737959, "learning_rate": 2.6521558542556815e-06, "loss": 0.3506, "step": 16758 }, { "epoch": 0.77, "grad_norm": 0.2716421589380168, "learning_rate": 2.6511466789171715e-06, "loss": 0.2698, "step": 16759 }, { "epoch": 0.77, "grad_norm": 0.8871597053015742, "learning_rate": 2.650137666275194e-06, "loss": 0.3295, "step": 16760 }, { "epoch": 0.77, "grad_norm": 0.4506441184723755, "learning_rate": 2.6491288163520825e-06, "loss": 0.2877, "step": 16761 }, { "epoch": 0.77, "grad_norm": 0.5771436153690723, "learning_rate": 2.648120129170173e-06, "loss": 0.3266, "step": 16762 }, { "epoch": 0.77, "grad_norm": 0.2725425251040875, "learning_rate": 2.647111604751801e-06, "loss": 0.1954, "step": 16763 }, { "epoch": 0.77, "grad_norm": 0.3089732205930462, "learning_rate": 2.646103243119289e-06, "loss": 0.2086, "step": 16764 }, { "epoch": 0.77, "grad_norm": 0.4605881138208105, "learning_rate": 2.6450950442949654e-06, "loss": 0.3316, "step": 16765 }, { "epoch": 0.77, "grad_norm": 0.9425963414084222, "learning_rate": 2.644087008301144e-06, "loss": 0.5267, "step": 16766 }, { "epoch": 0.77, "grad_norm": 0.30503558167830364, "learning_rate": 2.6430791351601514e-06, "loss": 0.2648, "step": 16767 }, { "epoch": 0.77, "grad_norm": 0.6165019630165293, "learning_rate": 2.6420714248942938e-06, "loss": 0.3469, "step": 16768 }, { "epoch": 0.77, "grad_norm": 0.46061170221758924, "learning_rate": 2.6410638775258856e-06, "loss": 0.1598, "step": 16769 }, { "epoch": 0.77, "grad_norm": 0.40919998550475734, "learning_rate": 2.640056493077231e-06, "loss": 0.2861, "step": 16770 }, { "epoch": 0.77, "grad_norm": 0.3080257520500706, "learning_rate": 2.6390492715706264e-06, "loss": 0.2658, "step": 16771 }, { "epoch": 0.77, "grad_norm": 0.928025723021502, "learning_rate": 2.6380422130283812e-06, "loss": 0.5588, "step": 16772 }, { "epoch": 0.77, "grad_norm": 0.2904725193917328, "learning_rate": 2.6370353174727837e-06, "loss": 0.0846, "step": 16773 }, { "epoch": 0.77, "grad_norm": 0.4444469749584699, "learning_rate": 2.6360285849261303e-06, "loss": 0.2748, "step": 16774 }, { "epoch": 0.77, "grad_norm": 0.36258214661021765, "learning_rate": 2.6350220154107044e-06, "loss": 0.2846, "step": 16775 }, { "epoch": 0.77, "grad_norm": 0.5987837160918604, "learning_rate": 2.6340156089487912e-06, "loss": 0.2089, "step": 16776 }, { "epoch": 0.77, "grad_norm": 0.29346213528411325, "learning_rate": 2.6330093655626777e-06, "loss": 0.2292, "step": 16777 }, { "epoch": 0.77, "grad_norm": 1.2883965086248017, "learning_rate": 2.6320032852746326e-06, "loss": 0.7519, "step": 16778 }, { "epoch": 0.77, "grad_norm": 0.24275415218346255, "learning_rate": 2.6309973681069333e-06, "loss": 0.1763, "step": 16779 }, { "epoch": 0.77, "grad_norm": 0.43919299240527665, "learning_rate": 2.6299916140818527e-06, "loss": 0.2865, "step": 16780 }, { "epoch": 0.77, "grad_norm": 0.7603436078300181, "learning_rate": 2.628986023221651e-06, "loss": 0.4036, "step": 16781 }, { "epoch": 0.77, "grad_norm": 0.4474138613994775, "learning_rate": 2.627980595548599e-06, "loss": 0.229, "step": 16782 }, { "epoch": 0.77, "grad_norm": 0.39574076643155853, "learning_rate": 2.6269753310849443e-06, "loss": 0.2848, "step": 16783 }, { "epoch": 0.77, "grad_norm": 0.5298559210314184, "learning_rate": 2.6259702298529565e-06, "loss": 0.4018, "step": 16784 }, { "epoch": 0.77, "grad_norm": 0.2487329921280947, "learning_rate": 2.624965291874877e-06, "loss": 0.1425, "step": 16785 }, { "epoch": 0.77, "grad_norm": 0.41049115428712835, "learning_rate": 2.62396051717296e-06, "loss": 0.2099, "step": 16786 }, { "epoch": 0.77, "grad_norm": 0.3699195197291354, "learning_rate": 2.6229559057694466e-06, "loss": 0.2819, "step": 16787 }, { "epoch": 0.77, "grad_norm": 0.5156963593446017, "learning_rate": 2.621951457686578e-06, "loss": 0.292, "step": 16788 }, { "epoch": 0.77, "grad_norm": 0.4446364903914271, "learning_rate": 2.6209471729465964e-06, "loss": 0.2423, "step": 16789 }, { "epoch": 0.77, "grad_norm": 0.3873617995527452, "learning_rate": 2.6199430515717296e-06, "loss": 0.299, "step": 16790 }, { "epoch": 0.77, "grad_norm": 0.33792533150489, "learning_rate": 2.618939093584214e-06, "loss": 0.2287, "step": 16791 }, { "epoch": 0.77, "grad_norm": 0.38006130785460696, "learning_rate": 2.617935299006269e-06, "loss": 0.199, "step": 16792 }, { "epoch": 0.77, "grad_norm": 0.5628717039689143, "learning_rate": 2.616931667860123e-06, "loss": 0.3339, "step": 16793 }, { "epoch": 0.77, "grad_norm": 0.9254852193896228, "learning_rate": 2.6159282001679955e-06, "loss": 0.316, "step": 16794 }, { "epoch": 0.77, "grad_norm": 0.25300454347975904, "learning_rate": 2.614924895952099e-06, "loss": 0.2189, "step": 16795 }, { "epoch": 0.77, "grad_norm": 1.2691693027468753, "learning_rate": 2.6139217552346462e-06, "loss": 0.7178, "step": 16796 }, { "epoch": 0.77, "grad_norm": 0.3165158474442954, "learning_rate": 2.6129187780378473e-06, "loss": 0.154, "step": 16797 }, { "epoch": 0.77, "grad_norm": 0.3861589315444167, "learning_rate": 2.6119159643839107e-06, "loss": 0.3078, "step": 16798 }, { "epoch": 0.77, "grad_norm": 0.348322252760024, "learning_rate": 2.610913314295034e-06, "loss": 0.2505, "step": 16799 }, { "epoch": 0.77, "grad_norm": 0.8675081368059038, "learning_rate": 2.6099108277934105e-06, "loss": 0.3754, "step": 16800 }, { "epoch": 0.77, "grad_norm": 0.36584028807440316, "learning_rate": 2.6089085049012397e-06, "loss": 0.2616, "step": 16801 }, { "epoch": 0.77, "grad_norm": 0.4776039407152915, "learning_rate": 2.6079063456407106e-06, "loss": 0.2612, "step": 16802 }, { "epoch": 0.77, "grad_norm": 0.2903685272727473, "learning_rate": 2.606904350034013e-06, "loss": 0.1837, "step": 16803 }, { "epoch": 0.77, "grad_norm": 0.3447715261288622, "learning_rate": 2.605902518103325e-06, "loss": 0.2508, "step": 16804 }, { "epoch": 0.77, "grad_norm": 0.7532184132320382, "learning_rate": 2.6049008498708285e-06, "loss": 0.2954, "step": 16805 }, { "epoch": 0.77, "grad_norm": 0.47185618134577817, "learning_rate": 2.6038993453587034e-06, "loss": 0.2624, "step": 16806 }, { "epoch": 0.77, "grad_norm": 0.3288253028536059, "learning_rate": 2.602898004589115e-06, "loss": 0.2647, "step": 16807 }, { "epoch": 0.77, "grad_norm": 1.1210583169858463, "learning_rate": 2.601896827584238e-06, "loss": 0.5075, "step": 16808 }, { "epoch": 0.77, "grad_norm": 0.7531327890121325, "learning_rate": 2.6008958143662323e-06, "loss": 0.2299, "step": 16809 }, { "epoch": 0.77, "grad_norm": 0.34352398359066944, "learning_rate": 2.5998949649572614e-06, "loss": 0.2664, "step": 16810 }, { "epoch": 0.77, "grad_norm": 0.296096905744487, "learning_rate": 2.5988942793794868e-06, "loss": 0.2398, "step": 16811 }, { "epoch": 0.77, "grad_norm": 1.0210335682212848, "learning_rate": 2.5978937576550566e-06, "loss": 0.115, "step": 16812 }, { "epoch": 0.77, "grad_norm": 0.38414739216715077, "learning_rate": 2.596893399806124e-06, "loss": 0.2886, "step": 16813 }, { "epoch": 0.77, "grad_norm": 0.6846475995523693, "learning_rate": 2.595893205854837e-06, "loss": 0.3651, "step": 16814 }, { "epoch": 0.77, "grad_norm": 0.5194245999026923, "learning_rate": 2.594893175823341e-06, "loss": 0.2443, "step": 16815 }, { "epoch": 0.77, "grad_norm": 0.38345637001489674, "learning_rate": 2.5938933097337703e-06, "loss": 0.2705, "step": 16816 }, { "epoch": 0.77, "grad_norm": 0.2653099810231982, "learning_rate": 2.5928936076082666e-06, "loss": 0.154, "step": 16817 }, { "epoch": 0.77, "grad_norm": 0.3914538779858025, "learning_rate": 2.5918940694689552e-06, "loss": 0.2318, "step": 16818 }, { "epoch": 0.77, "grad_norm": 0.3575021810706745, "learning_rate": 2.590894695337971e-06, "loss": 0.2604, "step": 16819 }, { "epoch": 0.77, "grad_norm": 1.328414747237804, "learning_rate": 2.589895485237439e-06, "loss": 0.7878, "step": 16820 }, { "epoch": 0.77, "grad_norm": 0.838495162454866, "learning_rate": 2.588896439189477e-06, "loss": 0.3845, "step": 16821 }, { "epoch": 0.77, "grad_norm": 0.3361392972626814, "learning_rate": 2.5878975572162036e-06, "loss": 0.1911, "step": 16822 }, { "epoch": 0.77, "grad_norm": 0.25127939616321776, "learning_rate": 2.5868988393397376e-06, "loss": 0.2064, "step": 16823 }, { "epoch": 0.77, "grad_norm": 0.8623133443556457, "learning_rate": 2.5859002855821837e-06, "loss": 0.465, "step": 16824 }, { "epoch": 0.77, "grad_norm": 0.35890523857090056, "learning_rate": 2.5849018959656546e-06, "loss": 0.2181, "step": 16825 }, { "epoch": 0.77, "grad_norm": 0.3674442642755873, "learning_rate": 2.5839036705122456e-06, "loss": 0.2838, "step": 16826 }, { "epoch": 0.77, "grad_norm": 0.6401368583816887, "learning_rate": 2.5829056092440664e-06, "loss": 0.3714, "step": 16827 }, { "epoch": 0.77, "grad_norm": 0.3431464510168847, "learning_rate": 2.5819077121832092e-06, "loss": 0.2032, "step": 16828 }, { "epoch": 0.77, "grad_norm": 0.3101542705025432, "learning_rate": 2.5809099793517622e-06, "loss": 0.1666, "step": 16829 }, { "epoch": 0.77, "grad_norm": 0.37664066821061204, "learning_rate": 2.579912410771821e-06, "loss": 0.2685, "step": 16830 }, { "epoch": 0.77, "grad_norm": 0.3165874385171127, "learning_rate": 2.578915006465461e-06, "loss": 0.1987, "step": 16831 }, { "epoch": 0.77, "grad_norm": 0.8323456064244357, "learning_rate": 2.577917766454776e-06, "loss": 0.4931, "step": 16832 }, { "epoch": 0.77, "grad_norm": 0.8071155434145634, "learning_rate": 2.576920690761836e-06, "loss": 0.4146, "step": 16833 }, { "epoch": 0.77, "grad_norm": 0.3717142235434635, "learning_rate": 2.57592377940872e-06, "loss": 0.2166, "step": 16834 }, { "epoch": 0.77, "grad_norm": 0.3662039525766404, "learning_rate": 2.5749270324174923e-06, "loss": 0.2346, "step": 16835 }, { "epoch": 0.77, "grad_norm": 0.3465580723974107, "learning_rate": 2.5739304498102246e-06, "loss": 0.1981, "step": 16836 }, { "epoch": 0.77, "grad_norm": 0.32879939392495067, "learning_rate": 2.5729340316089822e-06, "loss": 0.2416, "step": 16837 }, { "epoch": 0.77, "grad_norm": 0.6138178753468019, "learning_rate": 2.57193777783582e-06, "loss": 0.2638, "step": 16838 }, { "epoch": 0.77, "grad_norm": 0.5413704457977427, "learning_rate": 2.570941688512795e-06, "loss": 0.31, "step": 16839 }, { "epoch": 0.77, "grad_norm": 0.42851077043210345, "learning_rate": 2.569945763661964e-06, "loss": 0.2526, "step": 16840 }, { "epoch": 0.77, "grad_norm": 0.24815332113259747, "learning_rate": 2.5689500033053705e-06, "loss": 0.1304, "step": 16841 }, { "epoch": 0.77, "grad_norm": 0.3622352266125836, "learning_rate": 2.567954407465063e-06, "loss": 0.3172, "step": 16842 }, { "epoch": 0.77, "grad_norm": 0.4271984038934308, "learning_rate": 2.5669589761630776e-06, "loss": 0.2664, "step": 16843 }, { "epoch": 0.77, "grad_norm": 0.4874050682279112, "learning_rate": 2.5659637094214616e-06, "loss": 0.2766, "step": 16844 }, { "epoch": 0.77, "grad_norm": 1.4597999135020407, "learning_rate": 2.5649686072622437e-06, "loss": 0.6212, "step": 16845 }, { "epoch": 0.77, "grad_norm": 0.3524427806300477, "learning_rate": 2.5639736697074525e-06, "loss": 0.2415, "step": 16846 }, { "epoch": 0.77, "grad_norm": 0.3778868995479082, "learning_rate": 2.5629788967791203e-06, "loss": 0.2729, "step": 16847 }, { "epoch": 0.77, "grad_norm": 0.1934119961692052, "learning_rate": 2.5619842884992607e-06, "loss": 0.1098, "step": 16848 }, { "epoch": 0.77, "grad_norm": 0.34287014610942307, "learning_rate": 2.5609898448899073e-06, "loss": 0.2636, "step": 16849 }, { "epoch": 0.77, "grad_norm": 0.502287193825031, "learning_rate": 2.5599955659730646e-06, "loss": 0.3489, "step": 16850 }, { "epoch": 0.77, "grad_norm": 0.5000799048716301, "learning_rate": 2.559001451770753e-06, "loss": 0.2656, "step": 16851 }, { "epoch": 0.77, "grad_norm": 0.3808480942109761, "learning_rate": 2.5580075023049744e-06, "loss": 0.2555, "step": 16852 }, { "epoch": 0.77, "grad_norm": 1.3923487116574238, "learning_rate": 2.557013717597737e-06, "loss": 0.5902, "step": 16853 }, { "epoch": 0.77, "grad_norm": 0.2156226855148095, "learning_rate": 2.556020097671046e-06, "loss": 0.1835, "step": 16854 }, { "epoch": 0.77, "grad_norm": 0.3346579729252059, "learning_rate": 2.555026642546892e-06, "loss": 0.2521, "step": 16855 }, { "epoch": 0.77, "grad_norm": 0.6987108622580954, "learning_rate": 2.5540333522472717e-06, "loss": 0.3875, "step": 16856 }, { "epoch": 0.77, "grad_norm": 0.49850009008590246, "learning_rate": 2.55304022679418e-06, "loss": 0.2672, "step": 16857 }, { "epoch": 0.77, "grad_norm": 0.4267393320348157, "learning_rate": 2.5520472662095975e-06, "loss": 0.2845, "step": 16858 }, { "epoch": 0.77, "grad_norm": 0.4083958525798289, "learning_rate": 2.5510544705155125e-06, "loss": 0.2966, "step": 16859 }, { "epoch": 0.77, "grad_norm": 0.46755526510637707, "learning_rate": 2.5500618397339004e-06, "loss": 0.2531, "step": 16860 }, { "epoch": 0.77, "grad_norm": 0.41370677999482647, "learning_rate": 2.5490693738867377e-06, "loss": 0.1827, "step": 16861 }, { "epoch": 0.77, "grad_norm": 0.34722232862944413, "learning_rate": 2.548077072995998e-06, "loss": 0.2865, "step": 16862 }, { "epoch": 0.77, "grad_norm": 0.4193843076091362, "learning_rate": 2.5470849370836526e-06, "loss": 0.279, "step": 16863 }, { "epoch": 0.77, "grad_norm": 0.37035555149855964, "learning_rate": 2.5460929661716637e-06, "loss": 0.1568, "step": 16864 }, { "epoch": 0.77, "grad_norm": 0.6591113577096058, "learning_rate": 2.5451011602819866e-06, "loss": 0.2979, "step": 16865 }, { "epoch": 0.77, "grad_norm": 0.36267197400145623, "learning_rate": 2.5441095194365894e-06, "loss": 0.2842, "step": 16866 }, { "epoch": 0.77, "grad_norm": 0.3414372005557124, "learning_rate": 2.5431180436574197e-06, "loss": 0.2125, "step": 16867 }, { "epoch": 0.77, "grad_norm": 0.9238118130279691, "learning_rate": 2.542126732966432e-06, "loss": 0.5234, "step": 16868 }, { "epoch": 0.77, "grad_norm": 0.3769564465805755, "learning_rate": 2.5411355873855683e-06, "loss": 0.2167, "step": 16869 }, { "epoch": 0.77, "grad_norm": 0.23081800478675196, "learning_rate": 2.5401446069367717e-06, "loss": 0.2052, "step": 16870 }, { "epoch": 0.78, "grad_norm": 1.4512488532012031, "learning_rate": 2.5391537916419883e-06, "loss": 0.5289, "step": 16871 }, { "epoch": 0.78, "grad_norm": 0.7515583571731129, "learning_rate": 2.5381631415231455e-06, "loss": 0.3521, "step": 16872 }, { "epoch": 0.78, "grad_norm": 0.4185299488064976, "learning_rate": 2.5371726566021794e-06, "loss": 0.3148, "step": 16873 }, { "epoch": 0.78, "grad_norm": 0.30117332410518094, "learning_rate": 2.536182336901021e-06, "loss": 0.2274, "step": 16874 }, { "epoch": 0.78, "grad_norm": 0.28983120390795447, "learning_rate": 2.535192182441588e-06, "loss": 0.1837, "step": 16875 }, { "epoch": 0.78, "grad_norm": 0.5925742843759656, "learning_rate": 2.5342021932458094e-06, "loss": 0.2979, "step": 16876 }, { "epoch": 0.78, "grad_norm": 0.41149221992558704, "learning_rate": 2.533212369335595e-06, "loss": 0.2367, "step": 16877 }, { "epoch": 0.78, "grad_norm": 0.3697593010579812, "learning_rate": 2.5322227107328623e-06, "loss": 0.3104, "step": 16878 }, { "epoch": 0.78, "grad_norm": 0.6798879024518325, "learning_rate": 2.531233217459521e-06, "loss": 0.3743, "step": 16879 }, { "epoch": 0.78, "grad_norm": 0.35705572783131817, "learning_rate": 2.5302438895374816e-06, "loss": 0.2497, "step": 16880 }, { "epoch": 0.78, "grad_norm": 0.2629687351516884, "learning_rate": 2.529254726988639e-06, "loss": 0.1871, "step": 16881 }, { "epoch": 0.78, "grad_norm": 0.4080779476275008, "learning_rate": 2.5282657298348968e-06, "loss": 0.2728, "step": 16882 }, { "epoch": 0.78, "grad_norm": 0.43083255903203216, "learning_rate": 2.527276898098153e-06, "loss": 0.2379, "step": 16883 }, { "epoch": 0.78, "grad_norm": 0.822864454535755, "learning_rate": 2.5262882318002933e-06, "loss": 0.4453, "step": 16884 }, { "epoch": 0.78, "grad_norm": 0.6661796210757384, "learning_rate": 2.5252997309632123e-06, "loss": 0.4124, "step": 16885 }, { "epoch": 0.78, "grad_norm": 0.298500243893485, "learning_rate": 2.524311395608787e-06, "loss": 0.2732, "step": 16886 }, { "epoch": 0.78, "grad_norm": 0.2682300631141981, "learning_rate": 2.5233232257589037e-06, "loss": 0.0997, "step": 16887 }, { "epoch": 0.78, "grad_norm": 0.41674266254070313, "learning_rate": 2.5223352214354403e-06, "loss": 0.2584, "step": 16888 }, { "epoch": 0.78, "grad_norm": 0.597721082965969, "learning_rate": 2.5213473826602643e-06, "loss": 0.3151, "step": 16889 }, { "epoch": 0.78, "grad_norm": 0.3815100005426821, "learning_rate": 2.5203597094552534e-06, "loss": 0.2606, "step": 16890 }, { "epoch": 0.78, "grad_norm": 0.4368068267935512, "learning_rate": 2.5193722018422627e-06, "loss": 0.2705, "step": 16891 }, { "epoch": 0.78, "grad_norm": 0.5683527987148197, "learning_rate": 2.518384859843168e-06, "loss": 0.3579, "step": 16892 }, { "epoch": 0.78, "grad_norm": 0.2745876981304326, "learning_rate": 2.517397683479822e-06, "loss": 0.1575, "step": 16893 }, { "epoch": 0.78, "grad_norm": 0.5382777320927795, "learning_rate": 2.5164106727740754e-06, "loss": 0.3447, "step": 16894 }, { "epoch": 0.78, "grad_norm": 0.39568200048256325, "learning_rate": 2.515423827747785e-06, "loss": 0.2861, "step": 16895 }, { "epoch": 0.78, "grad_norm": 0.761951256315499, "learning_rate": 2.514437148422797e-06, "loss": 0.2979, "step": 16896 }, { "epoch": 0.78, "grad_norm": 0.5866253034442265, "learning_rate": 2.5134506348209588e-06, "loss": 0.31, "step": 16897 }, { "epoch": 0.78, "grad_norm": 0.27683190165021193, "learning_rate": 2.5124642869641047e-06, "loss": 0.2752, "step": 16898 }, { "epoch": 0.78, "grad_norm": 0.5070257434932641, "learning_rate": 2.5114781048740743e-06, "loss": 0.2275, "step": 16899 }, { "epoch": 0.78, "grad_norm": 0.5858175782984074, "learning_rate": 2.510492088572705e-06, "loss": 0.1949, "step": 16900 }, { "epoch": 0.78, "grad_norm": 0.3436963448845202, "learning_rate": 2.509506238081818e-06, "loss": 0.2528, "step": 16901 }, { "epoch": 0.78, "grad_norm": 0.39483053342906405, "learning_rate": 2.508520553423248e-06, "loss": 0.2826, "step": 16902 }, { "epoch": 0.78, "grad_norm": 0.5527862700215229, "learning_rate": 2.5075350346188088e-06, "loss": 0.1774, "step": 16903 }, { "epoch": 0.78, "grad_norm": 0.42214610893828297, "learning_rate": 2.5065496816903223e-06, "loss": 0.3151, "step": 16904 }, { "epoch": 0.78, "grad_norm": 0.5442473586267601, "learning_rate": 2.505564494659607e-06, "loss": 0.2434, "step": 16905 }, { "epoch": 0.78, "grad_norm": 0.3324821989307847, "learning_rate": 2.5045794735484675e-06, "loss": 0.2379, "step": 16906 }, { "epoch": 0.78, "grad_norm": 0.42777671943191353, "learning_rate": 2.5035946183787175e-06, "loss": 0.2876, "step": 16907 }, { "epoch": 0.78, "grad_norm": 0.4753776837958063, "learning_rate": 2.5026099291721517e-06, "loss": 0.2498, "step": 16908 }, { "epoch": 0.78, "grad_norm": 0.308651720522031, "learning_rate": 2.501625405950582e-06, "loss": 0.2118, "step": 16909 }, { "epoch": 0.78, "grad_norm": 0.3896609660569872, "learning_rate": 2.500641048735798e-06, "loss": 0.3212, "step": 16910 }, { "epoch": 0.78, "grad_norm": 0.9037952051072807, "learning_rate": 2.4996568575495906e-06, "loss": 0.4453, "step": 16911 }, { "epoch": 0.78, "grad_norm": 0.6756230401967475, "learning_rate": 2.498672832413751e-06, "loss": 0.345, "step": 16912 }, { "epoch": 0.78, "grad_norm": 0.256250434238195, "learning_rate": 2.4976889733500664e-06, "loss": 0.1615, "step": 16913 }, { "epoch": 0.78, "grad_norm": 0.2910456418231704, "learning_rate": 2.496705280380318e-06, "loss": 0.2491, "step": 16914 }, { "epoch": 0.78, "grad_norm": 1.3657642000799581, "learning_rate": 2.4957217535262824e-06, "loss": 0.8027, "step": 16915 }, { "epoch": 0.78, "grad_norm": 0.37933924434434824, "learning_rate": 2.4947383928097325e-06, "loss": 0.2068, "step": 16916 }, { "epoch": 0.78, "grad_norm": 0.5566496674036311, "learning_rate": 2.4937551982524443e-06, "loss": 0.3536, "step": 16917 }, { "epoch": 0.78, "grad_norm": 0.523803963235113, "learning_rate": 2.4927721698761796e-06, "loss": 0.3222, "step": 16918 }, { "epoch": 0.78, "grad_norm": 0.27523480750231805, "learning_rate": 2.4917893077027056e-06, "loss": 0.1669, "step": 16919 }, { "epoch": 0.78, "grad_norm": 0.49927242210888956, "learning_rate": 2.4908066117537766e-06, "loss": 0.2544, "step": 16920 }, { "epoch": 0.78, "grad_norm": 0.45955856928829436, "learning_rate": 2.489824082051152e-06, "loss": 0.3463, "step": 16921 }, { "epoch": 0.78, "grad_norm": 0.2985869777066035, "learning_rate": 2.4888417186165868e-06, "loss": 0.2231, "step": 16922 }, { "epoch": 0.78, "grad_norm": 0.9341184370967512, "learning_rate": 2.4878595214718236e-06, "loss": 0.3727, "step": 16923 }, { "epoch": 0.78, "grad_norm": 0.6890972966248986, "learning_rate": 2.486877490638613e-06, "loss": 0.3352, "step": 16924 }, { "epoch": 0.78, "grad_norm": 0.38828034988312793, "learning_rate": 2.485895626138688e-06, "loss": 0.2774, "step": 16925 }, { "epoch": 0.78, "grad_norm": 0.2372726782202294, "learning_rate": 2.4849139279937974e-06, "loss": 0.1658, "step": 16926 }, { "epoch": 0.78, "grad_norm": 0.7224570332482729, "learning_rate": 2.4839323962256668e-06, "loss": 0.4275, "step": 16927 }, { "epoch": 0.78, "grad_norm": 0.37822772641834596, "learning_rate": 2.482951030856031e-06, "loss": 0.2662, "step": 16928 }, { "epoch": 0.78, "grad_norm": 0.3997958380044706, "learning_rate": 2.481969831906612e-06, "loss": 0.2641, "step": 16929 }, { "epoch": 0.78, "grad_norm": 0.727595993338251, "learning_rate": 2.4809887993991344e-06, "loss": 0.3833, "step": 16930 }, { "epoch": 0.78, "grad_norm": 0.36846946846261613, "learning_rate": 2.4800079333553217e-06, "loss": 0.2704, "step": 16931 }, { "epoch": 0.78, "grad_norm": 0.31810168383484183, "learning_rate": 2.4790272337968813e-06, "loss": 0.1014, "step": 16932 }, { "epoch": 0.78, "grad_norm": 0.35266221344412063, "learning_rate": 2.47804670074553e-06, "loss": 0.3013, "step": 16933 }, { "epoch": 0.78, "grad_norm": 0.3476636841227362, "learning_rate": 2.4770663342229785e-06, "loss": 0.2716, "step": 16934 }, { "epoch": 0.78, "grad_norm": 0.7093547275713081, "learning_rate": 2.4760861342509235e-06, "loss": 0.3828, "step": 16935 }, { "epoch": 0.78, "grad_norm": 1.509486048090909, "learning_rate": 2.4751061008510736e-06, "loss": 0.2192, "step": 16936 }, { "epoch": 0.78, "grad_norm": 0.2666250661033359, "learning_rate": 2.4741262340451187e-06, "loss": 0.2376, "step": 16937 }, { "epoch": 0.78, "grad_norm": 0.39426939935170785, "learning_rate": 2.4731465338547556e-06, "loss": 0.2327, "step": 16938 }, { "epoch": 0.78, "grad_norm": 0.37709619473443684, "learning_rate": 2.4721670003016762e-06, "loss": 0.1428, "step": 16939 }, { "epoch": 0.78, "grad_norm": 0.37448939671037723, "learning_rate": 2.4711876334075623e-06, "loss": 0.2805, "step": 16940 }, { "epoch": 0.78, "grad_norm": 0.5090748656575231, "learning_rate": 2.4702084331941002e-06, "loss": 0.3063, "step": 16941 }, { "epoch": 0.78, "grad_norm": 0.45839736240020623, "learning_rate": 2.4692293996829597e-06, "loss": 0.2258, "step": 16942 }, { "epoch": 0.78, "grad_norm": 0.36634293993376316, "learning_rate": 2.4682505328958283e-06, "loss": 0.3026, "step": 16943 }, { "epoch": 0.78, "grad_norm": 0.2643927495902993, "learning_rate": 2.467271832854368e-06, "loss": 0.1573, "step": 16944 }, { "epoch": 0.78, "grad_norm": 0.3050608651365397, "learning_rate": 2.4662932995802514e-06, "loss": 0.2366, "step": 16945 }, { "epoch": 0.78, "grad_norm": 0.35779832033210524, "learning_rate": 2.4653149330951377e-06, "loss": 0.2444, "step": 16946 }, { "epoch": 0.78, "grad_norm": 0.6983783111526196, "learning_rate": 2.464336733420689e-06, "loss": 0.4104, "step": 16947 }, { "epoch": 0.78, "grad_norm": 1.330399029731723, "learning_rate": 2.4633587005785664e-06, "loss": 0.5298, "step": 16948 }, { "epoch": 0.78, "grad_norm": 0.30397420918511087, "learning_rate": 2.4623808345904142e-06, "loss": 0.1796, "step": 16949 }, { "epoch": 0.78, "grad_norm": 0.24412385153374297, "learning_rate": 2.461403135477888e-06, "loss": 0.2256, "step": 16950 }, { "epoch": 0.78, "grad_norm": 0.7826359483147893, "learning_rate": 2.4604256032626285e-06, "loss": 0.4204, "step": 16951 }, { "epoch": 0.78, "grad_norm": 0.3216460244693477, "learning_rate": 2.4594482379662787e-06, "loss": 0.2106, "step": 16952 }, { "epoch": 0.78, "grad_norm": 0.35846615674132337, "learning_rate": 2.4584710396104807e-06, "loss": 0.3144, "step": 16953 }, { "epoch": 0.78, "grad_norm": 1.6664077463195153, "learning_rate": 2.457494008216862e-06, "loss": 0.5844, "step": 16954 }, { "epoch": 0.78, "grad_norm": 0.3588565556874185, "learning_rate": 2.456517143807057e-06, "loss": 0.1951, "step": 16955 }, { "epoch": 0.78, "grad_norm": 0.5834727707629743, "learning_rate": 2.455540446402691e-06, "loss": 0.2488, "step": 16956 }, { "epoch": 0.78, "grad_norm": 0.375479290949618, "learning_rate": 2.454563916025392e-06, "loss": 0.2879, "step": 16957 }, { "epoch": 0.78, "grad_norm": 0.3419722792136791, "learning_rate": 2.4535875526967747e-06, "loss": 0.2217, "step": 16958 }, { "epoch": 0.78, "grad_norm": 1.2990383928409102, "learning_rate": 2.4526113564384502e-06, "loss": 0.8104, "step": 16959 }, { "epoch": 0.78, "grad_norm": 0.5795741853625167, "learning_rate": 2.451635327272042e-06, "loss": 0.248, "step": 16960 }, { "epoch": 0.78, "grad_norm": 0.3530166032328826, "learning_rate": 2.4506594652191485e-06, "loss": 0.2571, "step": 16961 }, { "epoch": 0.78, "grad_norm": 0.3809267507522062, "learning_rate": 2.449683770301382e-06, "loss": 0.2347, "step": 16962 }, { "epoch": 0.78, "grad_norm": 0.6809355621931512, "learning_rate": 2.4487082425403376e-06, "loss": 0.3997, "step": 16963 }, { "epoch": 0.78, "grad_norm": 0.4678938035625304, "learning_rate": 2.447732881957614e-06, "loss": 0.3057, "step": 16964 }, { "epoch": 0.78, "grad_norm": 0.2966454338947615, "learning_rate": 2.446757688574808e-06, "loss": 0.2432, "step": 16965 }, { "epoch": 0.78, "grad_norm": 0.3547027720305422, "learning_rate": 2.445782662413504e-06, "loss": 0.1865, "step": 16966 }, { "epoch": 0.78, "grad_norm": 0.4271673021146245, "learning_rate": 2.444807803495294e-06, "loss": 0.2669, "step": 16967 }, { "epoch": 0.78, "grad_norm": 0.6938130855430237, "learning_rate": 2.443833111841755e-06, "loss": 0.2592, "step": 16968 }, { "epoch": 0.78, "grad_norm": 0.36401897641925773, "learning_rate": 2.4428585874744682e-06, "loss": 0.302, "step": 16969 }, { "epoch": 0.78, "grad_norm": 0.4286795573552825, "learning_rate": 2.44188423041501e-06, "loss": 0.269, "step": 16970 }, { "epoch": 0.78, "grad_norm": 0.21988824195135032, "learning_rate": 2.4409100406849496e-06, "loss": 0.1497, "step": 16971 }, { "epoch": 0.78, "grad_norm": 1.4887848221505096, "learning_rate": 2.439936018305854e-06, "loss": 0.7312, "step": 16972 }, { "epoch": 0.78, "grad_norm": 0.28257214089767047, "learning_rate": 2.438962163299289e-06, "loss": 0.2418, "step": 16973 }, { "epoch": 0.78, "grad_norm": 0.5532610360652411, "learning_rate": 2.4379884756868167e-06, "loss": 0.3201, "step": 16974 }, { "epoch": 0.78, "grad_norm": 0.7413047015911038, "learning_rate": 2.4370149554899915e-06, "loss": 0.3019, "step": 16975 }, { "epoch": 0.78, "grad_norm": 0.337918537603653, "learning_rate": 2.43604160273036e-06, "loss": 0.2579, "step": 16976 }, { "epoch": 0.78, "grad_norm": 0.513233075978404, "learning_rate": 2.4350684174294824e-06, "loss": 0.3607, "step": 16977 }, { "epoch": 0.78, "grad_norm": 0.2969367169992921, "learning_rate": 2.434095399608897e-06, "loss": 0.157, "step": 16978 }, { "epoch": 0.78, "grad_norm": 0.4527418489354421, "learning_rate": 2.43312254929015e-06, "loss": 0.2462, "step": 16979 }, { "epoch": 0.78, "grad_norm": 0.5878091041638095, "learning_rate": 2.432149866494774e-06, "loss": 0.3401, "step": 16980 }, { "epoch": 0.78, "grad_norm": 0.3554207469994959, "learning_rate": 2.431177351244305e-06, "loss": 0.2586, "step": 16981 }, { "epoch": 0.78, "grad_norm": 0.4366120763728419, "learning_rate": 2.4302050035602785e-06, "loss": 0.2609, "step": 16982 }, { "epoch": 0.78, "grad_norm": 0.5805507902654186, "learning_rate": 2.4292328234642136e-06, "loss": 0.3985, "step": 16983 }, { "epoch": 0.78, "grad_norm": 0.21104397541424086, "learning_rate": 2.428260810977641e-06, "loss": 0.1549, "step": 16984 }, { "epoch": 0.78, "grad_norm": 0.4054415599968635, "learning_rate": 2.427288966122069e-06, "loss": 0.2559, "step": 16985 }, { "epoch": 0.78, "grad_norm": 0.49504710822478665, "learning_rate": 2.4263172889190278e-06, "loss": 0.3275, "step": 16986 }, { "epoch": 0.78, "grad_norm": 1.5769819907829796, "learning_rate": 2.4253457793900214e-06, "loss": 0.4378, "step": 16987 }, { "epoch": 0.78, "grad_norm": 0.3696904977715235, "learning_rate": 2.424374437556557e-06, "loss": 0.2147, "step": 16988 }, { "epoch": 0.78, "grad_norm": 0.34217947780265673, "learning_rate": 2.4234032634401404e-06, "loss": 0.2791, "step": 16989 }, { "epoch": 0.78, "grad_norm": 0.3810004631549262, "learning_rate": 2.4224322570622725e-06, "loss": 0.2031, "step": 16990 }, { "epoch": 0.78, "grad_norm": 0.44729662107919405, "learning_rate": 2.421461418444455e-06, "loss": 0.2074, "step": 16991 }, { "epoch": 0.78, "grad_norm": 0.5177051573339955, "learning_rate": 2.420490747608174e-06, "loss": 0.3445, "step": 16992 }, { "epoch": 0.78, "grad_norm": 0.40571388332829544, "learning_rate": 2.4195202445749232e-06, "loss": 0.3125, "step": 16993 }, { "epoch": 0.78, "grad_norm": 0.3163997904965117, "learning_rate": 2.41854990936619e-06, "loss": 0.2104, "step": 16994 }, { "epoch": 0.78, "grad_norm": 1.3112090192314, "learning_rate": 2.417579742003453e-06, "loss": 0.7431, "step": 16995 }, { "epoch": 0.78, "grad_norm": 0.31658930059476303, "learning_rate": 2.4166097425081946e-06, "loss": 0.2283, "step": 16996 }, { "epoch": 0.78, "grad_norm": 0.3161142110992237, "learning_rate": 2.4156399109018846e-06, "loss": 0.2167, "step": 16997 }, { "epoch": 0.78, "grad_norm": 0.39037493545955315, "learning_rate": 2.414670247205997e-06, "loss": 0.2413, "step": 16998 }, { "epoch": 0.78, "grad_norm": 1.3526551900662174, "learning_rate": 2.413700751442003e-06, "loss": 0.6451, "step": 16999 }, { "epoch": 0.78, "grad_norm": 0.7714883264961339, "learning_rate": 2.4127314236313593e-06, "loss": 0.3926, "step": 17000 }, { "epoch": 0.78, "grad_norm": 0.2806740618592558, "learning_rate": 2.4117622637955316e-06, "loss": 0.2224, "step": 17001 }, { "epoch": 0.78, "grad_norm": 0.6288195870800063, "learning_rate": 2.410793271955968e-06, "loss": 0.3762, "step": 17002 }, { "epoch": 0.78, "grad_norm": 0.6299350910753708, "learning_rate": 2.4098244481341327e-06, "loss": 0.3056, "step": 17003 }, { "epoch": 0.78, "grad_norm": 0.23840795090350542, "learning_rate": 2.4088557923514688e-06, "loss": 0.1529, "step": 17004 }, { "epoch": 0.78, "grad_norm": 0.39644351455361826, "learning_rate": 2.4078873046294183e-06, "loss": 0.2983, "step": 17005 }, { "epoch": 0.78, "grad_norm": 0.6133725305313518, "learning_rate": 2.406918984989426e-06, "loss": 0.414, "step": 17006 }, { "epoch": 0.78, "grad_norm": 0.41183547091232275, "learning_rate": 2.405950833452928e-06, "loss": 0.234, "step": 17007 }, { "epoch": 0.78, "grad_norm": 0.49390893702471617, "learning_rate": 2.404982850041363e-06, "loss": 0.3352, "step": 17008 }, { "epoch": 0.78, "grad_norm": 0.4074755524145822, "learning_rate": 2.4040150347761535e-06, "loss": 0.2637, "step": 17009 }, { "epoch": 0.78, "grad_norm": 0.31834719830685027, "learning_rate": 2.403047387678734e-06, "loss": 0.1829, "step": 17010 }, { "epoch": 0.78, "grad_norm": 0.5380632403560895, "learning_rate": 2.4020799087705203e-06, "loss": 0.2548, "step": 17011 }, { "epoch": 0.78, "grad_norm": 0.4004229048679349, "learning_rate": 2.4011125980729346e-06, "loss": 0.2837, "step": 17012 }, { "epoch": 0.78, "grad_norm": 0.33190351679353536, "learning_rate": 2.4001454556073946e-06, "loss": 0.2598, "step": 17013 }, { "epoch": 0.78, "grad_norm": 0.7329640788829259, "learning_rate": 2.399178481395307e-06, "loss": 0.2985, "step": 17014 }, { "epoch": 0.78, "grad_norm": 0.47025874711866006, "learning_rate": 2.3982116754580808e-06, "loss": 0.2752, "step": 17015 }, { "epoch": 0.78, "grad_norm": 0.26092192533856434, "learning_rate": 2.3972450378171254e-06, "loss": 0.1709, "step": 17016 }, { "epoch": 0.78, "grad_norm": 0.35230653997915773, "learning_rate": 2.3962785684938338e-06, "loss": 0.2472, "step": 17017 }, { "epoch": 0.78, "grad_norm": 0.8274106759767784, "learning_rate": 2.3953122675096096e-06, "loss": 0.4546, "step": 17018 }, { "epoch": 0.78, "grad_norm": 0.3851232653091529, "learning_rate": 2.3943461348858367e-06, "loss": 0.2866, "step": 17019 }, { "epoch": 0.78, "grad_norm": 0.3554613360869296, "learning_rate": 2.3933801706439154e-06, "loss": 0.2572, "step": 17020 }, { "epoch": 0.78, "grad_norm": 1.0635957910207081, "learning_rate": 2.392414374805222e-06, "loss": 0.417, "step": 17021 }, { "epoch": 0.78, "grad_norm": 0.30596608925244795, "learning_rate": 2.3914487473911463e-06, "loss": 0.2363, "step": 17022 }, { "epoch": 0.78, "grad_norm": 0.24436471871516222, "learning_rate": 2.3904832884230576e-06, "loss": 0.1132, "step": 17023 }, { "epoch": 0.78, "grad_norm": 0.3591843841107055, "learning_rate": 2.389517997922336e-06, "loss": 0.259, "step": 17024 }, { "epoch": 0.78, "grad_norm": 0.33134611909798173, "learning_rate": 2.388552875910354e-06, "loss": 0.2682, "step": 17025 }, { "epoch": 0.78, "grad_norm": 0.6710787006465406, "learning_rate": 2.3875879224084717e-06, "loss": 0.3887, "step": 17026 }, { "epoch": 0.78, "grad_norm": 0.6284728320169134, "learning_rate": 2.386623137438059e-06, "loss": 0.132, "step": 17027 }, { "epoch": 0.78, "grad_norm": 0.3248587771771781, "learning_rate": 2.3856585210204695e-06, "loss": 0.2409, "step": 17028 }, { "epoch": 0.78, "grad_norm": 0.2935524378890023, "learning_rate": 2.3846940731770606e-06, "loss": 0.2417, "step": 17029 }, { "epoch": 0.78, "grad_norm": 0.5715982008755994, "learning_rate": 2.3837297939291893e-06, "loss": 0.2246, "step": 17030 }, { "epoch": 0.78, "grad_norm": 0.40105583114530163, "learning_rate": 2.382765683298196e-06, "loss": 0.3011, "step": 17031 }, { "epoch": 0.78, "grad_norm": 0.46717818540006123, "learning_rate": 2.3818017413054296e-06, "loss": 0.3235, "step": 17032 }, { "epoch": 0.78, "grad_norm": 0.45870153525228036, "learning_rate": 2.380837967972233e-06, "loss": 0.1911, "step": 17033 }, { "epoch": 0.78, "grad_norm": 0.44201459612917576, "learning_rate": 2.3798743633199363e-06, "loss": 0.3167, "step": 17034 }, { "epoch": 0.78, "grad_norm": 0.3318049435041043, "learning_rate": 2.378910927369881e-06, "loss": 0.2125, "step": 17035 }, { "epoch": 0.78, "grad_norm": 0.3001763653230405, "learning_rate": 2.377947660143386e-06, "loss": 0.223, "step": 17036 }, { "epoch": 0.78, "grad_norm": 0.3325359961912638, "learning_rate": 2.3769845616617895e-06, "loss": 0.2649, "step": 17037 }, { "epoch": 0.78, "grad_norm": 0.9273895352271897, "learning_rate": 2.3760216319464047e-06, "loss": 0.403, "step": 17038 }, { "epoch": 0.78, "grad_norm": 1.710105467800304, "learning_rate": 2.375058871018555e-06, "loss": 0.7954, "step": 17039 }, { "epoch": 0.78, "grad_norm": 0.2882441236405574, "learning_rate": 2.3740962788995512e-06, "loss": 0.2094, "step": 17040 }, { "epoch": 0.78, "grad_norm": 0.36850261192660166, "learning_rate": 2.373133855610705e-06, "loss": 0.2679, "step": 17041 }, { "epoch": 0.78, "grad_norm": 0.7261630554160161, "learning_rate": 2.3721716011733285e-06, "loss": 0.3997, "step": 17042 }, { "epoch": 0.78, "grad_norm": 0.36082796126550604, "learning_rate": 2.371209515608718e-06, "loss": 0.2283, "step": 17043 }, { "epoch": 0.78, "grad_norm": 0.3623755025998804, "learning_rate": 2.3702475989381778e-06, "loss": 0.2353, "step": 17044 }, { "epoch": 0.78, "grad_norm": 0.5173006028378963, "learning_rate": 2.3692858511829997e-06, "loss": 0.2737, "step": 17045 }, { "epoch": 0.78, "grad_norm": 0.38109127130423665, "learning_rate": 2.3683242723644785e-06, "loss": 0.2041, "step": 17046 }, { "epoch": 0.78, "grad_norm": 0.4203378925697086, "learning_rate": 2.3673628625039047e-06, "loss": 0.2558, "step": 17047 }, { "epoch": 0.78, "grad_norm": 0.3243558761593813, "learning_rate": 2.3664016216225584e-06, "loss": 0.2958, "step": 17048 }, { "epoch": 0.78, "grad_norm": 0.37404677451811263, "learning_rate": 2.365440549741722e-06, "loss": 0.2247, "step": 17049 }, { "epoch": 0.78, "grad_norm": 0.5301435387152867, "learning_rate": 2.364479646882675e-06, "loss": 0.2637, "step": 17050 }, { "epoch": 0.78, "grad_norm": 0.8986516359301125, "learning_rate": 2.3635189130666914e-06, "loss": 0.2728, "step": 17051 }, { "epoch": 0.78, "grad_norm": 0.3941178563055379, "learning_rate": 2.3625583483150384e-06, "loss": 0.2673, "step": 17052 }, { "epoch": 0.78, "grad_norm": 0.359800135648748, "learning_rate": 2.3615979526489773e-06, "loss": 0.2573, "step": 17053 }, { "epoch": 0.78, "grad_norm": 0.7914490253622418, "learning_rate": 2.360637726089782e-06, "loss": 0.4397, "step": 17054 }, { "epoch": 0.78, "grad_norm": 0.35233103321829007, "learning_rate": 2.359677668658701e-06, "loss": 0.2657, "step": 17055 }, { "epoch": 0.78, "grad_norm": 0.21773616778191962, "learning_rate": 2.3587177803769945e-06, "loss": 0.1759, "step": 17056 }, { "epoch": 0.78, "grad_norm": 1.1736885065220493, "learning_rate": 2.3577580612659102e-06, "loss": 0.4617, "step": 17057 }, { "epoch": 0.78, "grad_norm": 0.35292321350770617, "learning_rate": 2.3567985113466963e-06, "loss": 0.2475, "step": 17058 }, { "epoch": 0.78, "grad_norm": 0.7317400272212284, "learning_rate": 2.3558391306405994e-06, "loss": 0.2975, "step": 17059 }, { "epoch": 0.78, "grad_norm": 0.3522989578142284, "learning_rate": 2.354879919168854e-06, "loss": 0.2966, "step": 17060 }, { "epoch": 0.78, "grad_norm": 0.34447940721456605, "learning_rate": 2.353920876952701e-06, "loss": 0.2366, "step": 17061 }, { "epoch": 0.78, "grad_norm": 0.43918982122979916, "learning_rate": 2.3529620040133683e-06, "loss": 0.2487, "step": 17062 }, { "epoch": 0.78, "grad_norm": 0.36047363796906834, "learning_rate": 2.3520033003720865e-06, "loss": 0.1767, "step": 17063 }, { "epoch": 0.78, "grad_norm": 0.29012177370671866, "learning_rate": 2.3510447660500825e-06, "loss": 0.2535, "step": 17064 }, { "epoch": 0.78, "grad_norm": 0.8468750588177727, "learning_rate": 2.350086401068573e-06, "loss": 0.4055, "step": 17065 }, { "epoch": 0.78, "grad_norm": 0.6602919427195721, "learning_rate": 2.3491282054487773e-06, "loss": 0.2758, "step": 17066 }, { "epoch": 0.78, "grad_norm": 0.3703074420279087, "learning_rate": 2.348170179211909e-06, "loss": 0.2743, "step": 17067 }, { "epoch": 0.78, "grad_norm": 0.2963957637076691, "learning_rate": 2.347212322379181e-06, "loss": 0.2201, "step": 17068 }, { "epoch": 0.78, "grad_norm": 0.38544552143432687, "learning_rate": 2.346254634971796e-06, "loss": 0.0896, "step": 17069 }, { "epoch": 0.78, "grad_norm": 0.3573189466180187, "learning_rate": 2.345297117010954e-06, "loss": 0.266, "step": 17070 }, { "epoch": 0.78, "grad_norm": 0.7373158887140219, "learning_rate": 2.344339768517857e-06, "loss": 0.34, "step": 17071 }, { "epoch": 0.78, "grad_norm": 0.36826743272041745, "learning_rate": 2.3433825895136977e-06, "loss": 0.2539, "step": 17072 }, { "epoch": 0.78, "grad_norm": 0.3617206081674422, "learning_rate": 2.3424255800196718e-06, "loss": 0.2567, "step": 17073 }, { "epoch": 0.78, "grad_norm": 0.31262599127317553, "learning_rate": 2.34146874005696e-06, "loss": 0.1804, "step": 17074 }, { "epoch": 0.78, "grad_norm": 1.2874694852176902, "learning_rate": 2.3405120696467485e-06, "loss": 0.4419, "step": 17075 }, { "epoch": 0.78, "grad_norm": 0.2743640354141252, "learning_rate": 2.339555568810221e-06, "loss": 0.231, "step": 17076 }, { "epoch": 0.78, "grad_norm": 0.6318625079890814, "learning_rate": 2.338599237568547e-06, "loss": 0.365, "step": 17077 }, { "epoch": 0.78, "grad_norm": 1.562500845358626, "learning_rate": 2.3376430759429047e-06, "loss": 0.7569, "step": 17078 }, { "epoch": 0.78, "grad_norm": 0.29404639353693074, "learning_rate": 2.3366870839544565e-06, "loss": 0.1938, "step": 17079 }, { "epoch": 0.78, "grad_norm": 0.3716199289723143, "learning_rate": 2.3357312616243697e-06, "loss": 0.292, "step": 17080 }, { "epoch": 0.78, "grad_norm": 0.35648859292470936, "learning_rate": 2.3347756089738093e-06, "loss": 0.1918, "step": 17081 }, { "epoch": 0.78, "grad_norm": 0.3059382487577487, "learning_rate": 2.333820126023927e-06, "loss": 0.2174, "step": 17082 }, { "epoch": 0.78, "grad_norm": 0.9372930811664, "learning_rate": 2.3328648127958776e-06, "loss": 0.4395, "step": 17083 }, { "epoch": 0.78, "grad_norm": 0.3742317252521186, "learning_rate": 2.331909669310811e-06, "loss": 0.3189, "step": 17084 }, { "epoch": 0.78, "grad_norm": 0.3606563030370136, "learning_rate": 2.3309546955898774e-06, "loss": 0.1731, "step": 17085 }, { "epoch": 0.78, "grad_norm": 0.3721473241197668, "learning_rate": 2.329999891654212e-06, "loss": 0.2489, "step": 17086 }, { "epoch": 0.78, "grad_norm": 0.37280523250104886, "learning_rate": 2.32904525752496e-06, "loss": 0.2686, "step": 17087 }, { "epoch": 0.79, "grad_norm": 0.4302570620411634, "learning_rate": 2.328090793223249e-06, "loss": 0.2604, "step": 17088 }, { "epoch": 0.79, "grad_norm": 0.41250084686869154, "learning_rate": 2.327136498770214e-06, "loss": 0.2468, "step": 17089 }, { "epoch": 0.79, "grad_norm": 1.2509651302445617, "learning_rate": 2.326182374186984e-06, "loss": 0.6774, "step": 17090 }, { "epoch": 0.79, "grad_norm": 0.4417184673691241, "learning_rate": 2.3252284194946783e-06, "loss": 0.2502, "step": 17091 }, { "epoch": 0.79, "grad_norm": 0.3143739606602177, "learning_rate": 2.3242746347144173e-06, "loss": 0.2231, "step": 17092 }, { "epoch": 0.79, "grad_norm": 0.6476153938642922, "learning_rate": 2.323321019867322e-06, "loss": 0.2388, "step": 17093 }, { "epoch": 0.79, "grad_norm": 0.44938530916048725, "learning_rate": 2.322367574974497e-06, "loss": 0.2779, "step": 17094 }, { "epoch": 0.79, "grad_norm": 0.2974743383959936, "learning_rate": 2.3214143000570567e-06, "loss": 0.1467, "step": 17095 }, { "epoch": 0.79, "grad_norm": 0.3475706667457637, "learning_rate": 2.3204611951360966e-06, "loss": 0.3164, "step": 17096 }, { "epoch": 0.79, "grad_norm": 0.34567835659424406, "learning_rate": 2.319508260232731e-06, "loss": 0.2469, "step": 17097 }, { "epoch": 0.79, "grad_norm": 1.515879941080919, "learning_rate": 2.31855549536805e-06, "loss": 0.2257, "step": 17098 }, { "epoch": 0.79, "grad_norm": 0.4459190336607469, "learning_rate": 2.317602900563143e-06, "loss": 0.3252, "step": 17099 }, { "epoch": 0.79, "grad_norm": 0.2824478728728605, "learning_rate": 2.3166504758391075e-06, "loss": 0.2558, "step": 17100 }, { "epoch": 0.79, "grad_norm": 0.3180683048293316, "learning_rate": 2.3156982212170187e-06, "loss": 0.1897, "step": 17101 }, { "epoch": 0.79, "grad_norm": 1.0425945800789964, "learning_rate": 2.3147461367179702e-06, "loss": 0.5229, "step": 17102 }, { "epoch": 0.79, "grad_norm": 0.71084973938481, "learning_rate": 2.3137942223630326e-06, "loss": 0.3683, "step": 17103 }, { "epoch": 0.79, "grad_norm": 0.2726141481229837, "learning_rate": 2.3128424781732863e-06, "loss": 0.2569, "step": 17104 }, { "epoch": 0.79, "grad_norm": 0.63819085184092, "learning_rate": 2.3118909041697957e-06, "loss": 0.2915, "step": 17105 }, { "epoch": 0.79, "grad_norm": 0.37101634914380033, "learning_rate": 2.31093950037363e-06, "loss": 0.1975, "step": 17106 }, { "epoch": 0.79, "grad_norm": 0.31801777304923756, "learning_rate": 2.309988266805856e-06, "loss": 0.2378, "step": 17107 }, { "epoch": 0.79, "grad_norm": 0.38403987422240415, "learning_rate": 2.3090372034875274e-06, "loss": 0.2779, "step": 17108 }, { "epoch": 0.79, "grad_norm": 0.5013916052041003, "learning_rate": 2.308086310439702e-06, "loss": 0.3036, "step": 17109 }, { "epoch": 0.79, "grad_norm": 0.5187925738845816, "learning_rate": 2.3071355876834357e-06, "loss": 0.2967, "step": 17110 }, { "epoch": 0.79, "grad_norm": 0.45077246538650934, "learning_rate": 2.3061850352397697e-06, "loss": 0.2518, "step": 17111 }, { "epoch": 0.79, "grad_norm": 0.33520045133619686, "learning_rate": 2.3052346531297542e-06, "loss": 0.2577, "step": 17112 }, { "epoch": 0.79, "grad_norm": 0.2776784349295418, "learning_rate": 2.3042844413744223e-06, "loss": 0.1939, "step": 17113 }, { "epoch": 0.79, "grad_norm": 1.370734979080085, "learning_rate": 2.303334399994821e-06, "loss": 0.7497, "step": 17114 }, { "epoch": 0.79, "grad_norm": 0.40821383346713336, "learning_rate": 2.302384529011975e-06, "loss": 0.1967, "step": 17115 }, { "epoch": 0.79, "grad_norm": 0.30902475061405105, "learning_rate": 2.301434828446919e-06, "loss": 0.2799, "step": 17116 }, { "epoch": 0.79, "grad_norm": 0.70340382644846, "learning_rate": 2.300485298320676e-06, "loss": 0.3983, "step": 17117 }, { "epoch": 0.79, "grad_norm": 0.36015960717053686, "learning_rate": 2.2995359386542625e-06, "loss": 0.1989, "step": 17118 }, { "epoch": 0.79, "grad_norm": 0.26000985536088944, "learning_rate": 2.2985867494687065e-06, "loss": 0.1571, "step": 17119 }, { "epoch": 0.79, "grad_norm": 0.3848284667719156, "learning_rate": 2.297637730785015e-06, "loss": 0.3138, "step": 17120 }, { "epoch": 0.79, "grad_norm": 0.667179532048464, "learning_rate": 2.296688882624203e-06, "loss": 0.1717, "step": 17121 }, { "epoch": 0.79, "grad_norm": 0.4079319134771883, "learning_rate": 2.2957402050072717e-06, "loss": 0.2785, "step": 17122 }, { "epoch": 0.79, "grad_norm": 0.3917199888762605, "learning_rate": 2.2947916979552265e-06, "loss": 0.2922, "step": 17123 }, { "epoch": 0.79, "grad_norm": 0.5775566247225241, "learning_rate": 2.2938433614890696e-06, "loss": 0.0951, "step": 17124 }, { "epoch": 0.79, "grad_norm": 0.3078485501239748, "learning_rate": 2.2928951956297907e-06, "loss": 0.2334, "step": 17125 }, { "epoch": 0.79, "grad_norm": 0.45069433396455005, "learning_rate": 2.2919472003983843e-06, "loss": 0.3179, "step": 17126 }, { "epoch": 0.79, "grad_norm": 0.45394170867305567, "learning_rate": 2.290999375815841e-06, "loss": 0.3074, "step": 17127 }, { "epoch": 0.79, "grad_norm": 0.30916602513827823, "learning_rate": 2.2900517219031383e-06, "loss": 0.234, "step": 17128 }, { "epoch": 0.79, "grad_norm": 1.7794223862372456, "learning_rate": 2.289104238681261e-06, "loss": 0.5018, "step": 17129 }, { "epoch": 0.79, "grad_norm": 0.738548153583518, "learning_rate": 2.288156926171182e-06, "loss": 0.3116, "step": 17130 }, { "epoch": 0.79, "grad_norm": 0.22264537689307137, "learning_rate": 2.287209784393877e-06, "loss": 0.2071, "step": 17131 }, { "epoch": 0.79, "grad_norm": 1.1944300992778474, "learning_rate": 2.2862628133703123e-06, "loss": 0.6965, "step": 17132 }, { "epoch": 0.79, "grad_norm": 0.5490869534171477, "learning_rate": 2.285316013121458e-06, "loss": 0.3031, "step": 17133 }, { "epoch": 0.79, "grad_norm": 0.3225210198201399, "learning_rate": 2.2843693836682714e-06, "loss": 0.191, "step": 17134 }, { "epoch": 0.79, "grad_norm": 0.39377404124638016, "learning_rate": 2.283422925031704e-06, "loss": 0.3232, "step": 17135 }, { "epoch": 0.79, "grad_norm": 0.7841319356189883, "learning_rate": 2.2824766372327223e-06, "loss": 0.3097, "step": 17136 }, { "epoch": 0.79, "grad_norm": 0.29212967386276545, "learning_rate": 2.2815305202922664e-06, "loss": 0.1793, "step": 17137 }, { "epoch": 0.79, "grad_norm": 0.6916170384877859, "learning_rate": 2.2805845742312882e-06, "loss": 0.4053, "step": 17138 }, { "epoch": 0.79, "grad_norm": 0.3833139120205697, "learning_rate": 2.279638799070726e-06, "loss": 0.2807, "step": 17139 }, { "epoch": 0.79, "grad_norm": 0.34013155265780776, "learning_rate": 2.2786931948315182e-06, "loss": 0.2956, "step": 17140 }, { "epoch": 0.79, "grad_norm": 0.567241433355386, "learning_rate": 2.2777477615346046e-06, "loss": 0.1188, "step": 17141 }, { "epoch": 0.79, "grad_norm": 0.9734823823247243, "learning_rate": 2.2768024992009097e-06, "loss": 0.366, "step": 17142 }, { "epoch": 0.79, "grad_norm": 0.2865073903836222, "learning_rate": 2.275857407851364e-06, "loss": 0.2203, "step": 17143 }, { "epoch": 0.79, "grad_norm": 0.4066187143781802, "learning_rate": 2.274912487506893e-06, "loss": 0.2723, "step": 17144 }, { "epoch": 0.79, "grad_norm": 0.7497357805028236, "learning_rate": 2.2739677381884117e-06, "loss": 0.3645, "step": 17145 }, { "epoch": 0.79, "grad_norm": 0.2760260297470873, "learning_rate": 2.2730231599168407e-06, "loss": 0.2156, "step": 17146 }, { "epoch": 0.79, "grad_norm": 0.4401887119945976, "learning_rate": 2.272078752713087e-06, "loss": 0.2502, "step": 17147 }, { "epoch": 0.79, "grad_norm": 0.8590114232034153, "learning_rate": 2.2711345165980616e-06, "loss": 0.4194, "step": 17148 }, { "epoch": 0.79, "grad_norm": 0.3834674172716536, "learning_rate": 2.2701904515926686e-06, "loss": 0.2664, "step": 17149 }, { "epoch": 0.79, "grad_norm": 0.6514056367495175, "learning_rate": 2.2692465577178113e-06, "loss": 0.2979, "step": 17150 }, { "epoch": 0.79, "grad_norm": 0.30747226299518776, "learning_rate": 2.2683028349943814e-06, "loss": 0.2493, "step": 17151 }, { "epoch": 0.79, "grad_norm": 0.415937319740506, "learning_rate": 2.2673592834432755e-06, "loss": 0.3372, "step": 17152 }, { "epoch": 0.79, "grad_norm": 0.3318510185028204, "learning_rate": 2.266415903085385e-06, "loss": 0.1462, "step": 17153 }, { "epoch": 0.79, "grad_norm": 0.7697280925485774, "learning_rate": 2.2654726939415895e-06, "loss": 0.1012, "step": 17154 }, { "epoch": 0.79, "grad_norm": 0.4381525126065141, "learning_rate": 2.264529656032777e-06, "loss": 0.3045, "step": 17155 }, { "epoch": 0.79, "grad_norm": 0.3570085240881158, "learning_rate": 2.263586789379819e-06, "loss": 0.3059, "step": 17156 }, { "epoch": 0.79, "grad_norm": 0.6003243779595991, "learning_rate": 2.262644094003594e-06, "loss": 0.2939, "step": 17157 }, { "epoch": 0.79, "grad_norm": 0.388779645884081, "learning_rate": 2.2617015699249735e-06, "loss": 0.2496, "step": 17158 }, { "epoch": 0.79, "grad_norm": 0.2512278806174626, "learning_rate": 2.2607592171648197e-06, "loss": 0.2097, "step": 17159 }, { "epoch": 0.79, "grad_norm": 0.833050622020885, "learning_rate": 2.259817035744e-06, "loss": 0.1108, "step": 17160 }, { "epoch": 0.79, "grad_norm": 0.3996464493257746, "learning_rate": 2.258875025683366e-06, "loss": 0.2804, "step": 17161 }, { "epoch": 0.79, "grad_norm": 0.6186153074205543, "learning_rate": 2.2579331870037822e-06, "loss": 0.4017, "step": 17162 }, { "epoch": 0.79, "grad_norm": 0.3067014989793992, "learning_rate": 2.2569915197260974e-06, "loss": 0.2243, "step": 17163 }, { "epoch": 0.79, "grad_norm": 0.38978267599642796, "learning_rate": 2.2560500238711534e-06, "loss": 0.2869, "step": 17164 }, { "epoch": 0.79, "grad_norm": 0.28318044096502587, "learning_rate": 2.2551086994597993e-06, "loss": 0.1696, "step": 17165 }, { "epoch": 0.79, "grad_norm": 0.777214384642687, "learning_rate": 2.254167546512873e-06, "loss": 0.3822, "step": 17166 }, { "epoch": 0.79, "grad_norm": 0.2498515895672066, "learning_rate": 2.2532265650512154e-06, "loss": 0.2158, "step": 17167 }, { "epoch": 0.79, "grad_norm": 0.8831558900009088, "learning_rate": 2.252285755095652e-06, "loss": 0.382, "step": 17168 }, { "epoch": 0.79, "grad_norm": 1.2803222159960825, "learning_rate": 2.251345116667014e-06, "loss": 0.6776, "step": 17169 }, { "epoch": 0.79, "grad_norm": 0.3164006888531394, "learning_rate": 2.2504046497861308e-06, "loss": 0.186, "step": 17170 }, { "epoch": 0.79, "grad_norm": 0.23475518441000343, "learning_rate": 2.249464354473816e-06, "loss": 0.1976, "step": 17171 }, { "epoch": 0.79, "grad_norm": 0.64297109713618, "learning_rate": 2.2485242307508936e-06, "loss": 0.3276, "step": 17172 }, { "epoch": 0.79, "grad_norm": 0.35440729949926225, "learning_rate": 2.247584278638171e-06, "loss": 0.2119, "step": 17173 }, { "epoch": 0.79, "grad_norm": 0.9972870600536571, "learning_rate": 2.2466444981564593e-06, "loss": 0.4353, "step": 17174 }, { "epoch": 0.79, "grad_norm": 0.3429775771133243, "learning_rate": 2.24570488932657e-06, "loss": 0.2984, "step": 17175 }, { "epoch": 0.79, "grad_norm": 0.3828628644203816, "learning_rate": 2.2447654521692975e-06, "loss": 0.2876, "step": 17176 }, { "epoch": 0.79, "grad_norm": 0.4170889793197809, "learning_rate": 2.243826186705446e-06, "loss": 0.0963, "step": 17177 }, { "epoch": 0.79, "grad_norm": 0.7400022987857334, "learning_rate": 2.2428870929558012e-06, "loss": 0.3557, "step": 17178 }, { "epoch": 0.79, "grad_norm": 0.2917936422072651, "learning_rate": 2.241948170941165e-06, "loss": 0.2369, "step": 17179 }, { "epoch": 0.79, "grad_norm": 0.5830577035725261, "learning_rate": 2.2410094206823173e-06, "loss": 0.2882, "step": 17180 }, { "epoch": 0.79, "grad_norm": 1.3682210912564259, "learning_rate": 2.240070842200045e-06, "loss": 0.7568, "step": 17181 }, { "epoch": 0.79, "grad_norm": 0.3461618045279277, "learning_rate": 2.239132435515122e-06, "loss": 0.2298, "step": 17182 }, { "epoch": 0.79, "grad_norm": 0.2795679108220238, "learning_rate": 2.238194200648328e-06, "loss": 0.1994, "step": 17183 }, { "epoch": 0.79, "grad_norm": 0.6689037242336835, "learning_rate": 2.237256137620436e-06, "loss": 0.2748, "step": 17184 }, { "epoch": 0.79, "grad_norm": 0.37406500837620066, "learning_rate": 2.236318246452208e-06, "loss": 0.2352, "step": 17185 }, { "epoch": 0.79, "grad_norm": 1.7144741753791801, "learning_rate": 2.2353805271644112e-06, "loss": 0.3476, "step": 17186 }, { "epoch": 0.79, "grad_norm": 0.40751667690425686, "learning_rate": 2.234442979777809e-06, "loss": 0.3069, "step": 17187 }, { "epoch": 0.79, "grad_norm": 0.3288656288229183, "learning_rate": 2.233505604313152e-06, "loss": 0.229, "step": 17188 }, { "epoch": 0.79, "grad_norm": 1.1228124576551153, "learning_rate": 2.2325684007911984e-06, "loss": 0.3886, "step": 17189 }, { "epoch": 0.79, "grad_norm": 0.2888745069202256, "learning_rate": 2.2316313692326907e-06, "loss": 0.217, "step": 17190 }, { "epoch": 0.79, "grad_norm": 0.46530315681055856, "learning_rate": 2.2306945096583775e-06, "loss": 0.2968, "step": 17191 }, { "epoch": 0.79, "grad_norm": 0.38386773205438335, "learning_rate": 2.2297578220890027e-06, "loss": 0.2457, "step": 17192 }, { "epoch": 0.79, "grad_norm": 0.6087834094924855, "learning_rate": 2.228821306545298e-06, "loss": 0.2449, "step": 17193 }, { "epoch": 0.79, "grad_norm": 0.391062903415256, "learning_rate": 2.2278849630480014e-06, "loss": 0.2682, "step": 17194 }, { "epoch": 0.79, "grad_norm": 0.379961351873939, "learning_rate": 2.2269487916178354e-06, "loss": 0.2922, "step": 17195 }, { "epoch": 0.79, "grad_norm": 1.265700092716687, "learning_rate": 2.2260127922755383e-06, "loss": 0.2115, "step": 17196 }, { "epoch": 0.79, "grad_norm": 0.27227524860969315, "learning_rate": 2.2250769650418213e-06, "loss": 0.2078, "step": 17197 }, { "epoch": 0.79, "grad_norm": 1.4414669491274652, "learning_rate": 2.22414130993741e-06, "loss": 0.6629, "step": 17198 }, { "epoch": 0.79, "grad_norm": 0.3479028958319379, "learning_rate": 2.2232058269830126e-06, "loss": 0.2432, "step": 17199 }, { "epoch": 0.79, "grad_norm": 0.35582280626712265, "learning_rate": 2.222270516199343e-06, "loss": 0.2586, "step": 17200 }, { "epoch": 0.79, "grad_norm": 0.7060125509241995, "learning_rate": 2.221335377607111e-06, "loss": 0.3715, "step": 17201 }, { "epoch": 0.79, "grad_norm": 0.519950536283211, "learning_rate": 2.220400411227014e-06, "loss": 0.3525, "step": 17202 }, { "epoch": 0.79, "grad_norm": 0.24539616972428374, "learning_rate": 2.2194656170797534e-06, "loss": 0.1826, "step": 17203 }, { "epoch": 0.79, "grad_norm": 0.4555420081351435, "learning_rate": 2.218530995186028e-06, "loss": 0.2236, "step": 17204 }, { "epoch": 0.79, "grad_norm": 1.2060774057892782, "learning_rate": 2.2175965455665225e-06, "loss": 0.6888, "step": 17205 }, { "epoch": 0.79, "grad_norm": 0.3534622409077698, "learning_rate": 2.2166622682419327e-06, "loss": 0.2019, "step": 17206 }, { "epoch": 0.79, "grad_norm": 0.3566074483303219, "learning_rate": 2.2157281632329353e-06, "loss": 0.2856, "step": 17207 }, { "epoch": 0.79, "grad_norm": 0.500638730218545, "learning_rate": 2.2147942305602144e-06, "loss": 0.2393, "step": 17208 }, { "epoch": 0.79, "grad_norm": 0.24283930002033238, "learning_rate": 2.213860470244448e-06, "loss": 0.1247, "step": 17209 }, { "epoch": 0.79, "grad_norm": 0.5768618166887786, "learning_rate": 2.2129268823063044e-06, "loss": 0.3832, "step": 17210 }, { "epoch": 0.79, "grad_norm": 0.3833830699173495, "learning_rate": 2.2119934667664555e-06, "loss": 0.2724, "step": 17211 }, { "epoch": 0.79, "grad_norm": 0.36551798767054433, "learning_rate": 2.211060223645561e-06, "loss": 0.1981, "step": 17212 }, { "epoch": 0.79, "grad_norm": 0.4884727495171653, "learning_rate": 2.2101271529642907e-06, "loss": 0.3079, "step": 17213 }, { "epoch": 0.79, "grad_norm": 0.5216664869888064, "learning_rate": 2.209194254743295e-06, "loss": 0.3577, "step": 17214 }, { "epoch": 0.79, "grad_norm": 0.3292120838414288, "learning_rate": 2.208261529003233e-06, "loss": 0.2811, "step": 17215 }, { "epoch": 0.79, "grad_norm": 0.24862638985651028, "learning_rate": 2.2073289757647477e-06, "loss": 0.1552, "step": 17216 }, { "epoch": 0.79, "grad_norm": 0.8830509652435399, "learning_rate": 2.2063965950484878e-06, "loss": 0.4498, "step": 17217 }, { "epoch": 0.79, "grad_norm": 0.38044774479665894, "learning_rate": 2.205464386875099e-06, "loss": 0.2661, "step": 17218 }, { "epoch": 0.79, "grad_norm": 0.30842645976079736, "learning_rate": 2.2045323512652128e-06, "loss": 0.2386, "step": 17219 }, { "epoch": 0.79, "grad_norm": 1.4772492118941674, "learning_rate": 2.2036004882394702e-06, "loss": 0.448, "step": 17220 }, { "epoch": 0.79, "grad_norm": 0.31301625706923664, "learning_rate": 2.202668797818496e-06, "loss": 0.1964, "step": 17221 }, { "epoch": 0.79, "grad_norm": 0.5322213749102688, "learning_rate": 2.2017372800229188e-06, "loss": 0.2786, "step": 17222 }, { "epoch": 0.79, "grad_norm": 0.3548614363570148, "learning_rate": 2.200805934873366e-06, "loss": 0.3016, "step": 17223 }, { "epoch": 0.79, "grad_norm": 0.35404673328192504, "learning_rate": 2.199874762390449e-06, "loss": 0.2496, "step": 17224 }, { "epoch": 0.79, "grad_norm": 0.47912460005126994, "learning_rate": 2.1989437625947873e-06, "loss": 0.1455, "step": 17225 }, { "epoch": 0.79, "grad_norm": 0.3572813072432284, "learning_rate": 2.198012935506991e-06, "loss": 0.2849, "step": 17226 }, { "epoch": 0.79, "grad_norm": 0.6022720433058638, "learning_rate": 2.197082281147673e-06, "loss": 0.2781, "step": 17227 }, { "epoch": 0.79, "grad_norm": 0.2955726209037957, "learning_rate": 2.1961517995374314e-06, "loss": 0.2334, "step": 17228 }, { "epoch": 0.79, "grad_norm": 0.6409940322879456, "learning_rate": 2.195221490696863e-06, "loss": 0.318, "step": 17229 }, { "epoch": 0.79, "grad_norm": 0.37250328819602857, "learning_rate": 2.194291354646574e-06, "loss": 0.2447, "step": 17230 }, { "epoch": 0.79, "grad_norm": 0.305204730973577, "learning_rate": 2.1933613914071474e-06, "loss": 0.2411, "step": 17231 }, { "epoch": 0.79, "grad_norm": 1.5849246082045143, "learning_rate": 2.1924316009991785e-06, "loss": 0.2396, "step": 17232 }, { "epoch": 0.79, "grad_norm": 0.7409587010773275, "learning_rate": 2.191501983443247e-06, "loss": 0.3488, "step": 17233 }, { "epoch": 0.79, "grad_norm": 0.3320531174829815, "learning_rate": 2.1905725387599355e-06, "loss": 0.2898, "step": 17234 }, { "epoch": 0.79, "grad_norm": 0.4880637009581049, "learning_rate": 2.1896432669698233e-06, "loss": 0.2668, "step": 17235 }, { "epoch": 0.79, "grad_norm": 0.5733790939152533, "learning_rate": 2.1887141680934786e-06, "loss": 0.2775, "step": 17236 }, { "epoch": 0.79, "grad_norm": 0.2676681366583005, "learning_rate": 2.1877852421514767e-06, "loss": 0.164, "step": 17237 }, { "epoch": 0.79, "grad_norm": 0.37885207014500677, "learning_rate": 2.186856489164377e-06, "loss": 0.2473, "step": 17238 }, { "epoch": 0.79, "grad_norm": 0.42500782696313366, "learning_rate": 2.185927909152745e-06, "loss": 0.2744, "step": 17239 }, { "epoch": 0.79, "grad_norm": 0.5438787531877634, "learning_rate": 2.1849995021371405e-06, "loss": 0.3417, "step": 17240 }, { "epoch": 0.79, "grad_norm": 0.6333724182358521, "learning_rate": 2.1840712681381116e-06, "loss": 0.3434, "step": 17241 }, { "epoch": 0.79, "grad_norm": 0.26762002505843974, "learning_rate": 2.1831432071762117e-06, "loss": 0.2289, "step": 17242 }, { "epoch": 0.79, "grad_norm": 0.24573068064034145, "learning_rate": 2.1822153192719876e-06, "loss": 0.1728, "step": 17243 }, { "epoch": 0.79, "grad_norm": 1.3793883546075032, "learning_rate": 2.181287604445984e-06, "loss": 0.4307, "step": 17244 }, { "epoch": 0.79, "grad_norm": 0.31935228572104957, "learning_rate": 2.180360062718734e-06, "loss": 0.1139, "step": 17245 }, { "epoch": 0.79, "grad_norm": 0.3335514511464505, "learning_rate": 2.179432694110776e-06, "loss": 0.2834, "step": 17246 }, { "epoch": 0.79, "grad_norm": 0.5280062565778948, "learning_rate": 2.1785054986426424e-06, "loss": 0.3451, "step": 17247 }, { "epoch": 0.79, "grad_norm": 0.30446234044287324, "learning_rate": 2.1775784763348575e-06, "loss": 0.1114, "step": 17248 }, { "epoch": 0.79, "grad_norm": 0.27398955412267384, "learning_rate": 2.1766516272079472e-06, "loss": 0.186, "step": 17249 }, { "epoch": 0.79, "grad_norm": 0.3619742392211366, "learning_rate": 2.1757249512824276e-06, "loss": 0.2803, "step": 17250 }, { "epoch": 0.79, "grad_norm": 0.36101096734417387, "learning_rate": 2.1747984485788155e-06, "loss": 0.113, "step": 17251 }, { "epoch": 0.79, "grad_norm": 0.5164446698839823, "learning_rate": 2.1738721191176273e-06, "loss": 0.2901, "step": 17252 }, { "epoch": 0.79, "grad_norm": 0.9602371705017546, "learning_rate": 2.1729459629193637e-06, "loss": 0.4584, "step": 17253 }, { "epoch": 0.79, "grad_norm": 0.391896258464414, "learning_rate": 2.1720199800045373e-06, "loss": 0.2914, "step": 17254 }, { "epoch": 0.79, "grad_norm": 0.29647344408756765, "learning_rate": 2.171094170393637e-06, "loss": 0.1686, "step": 17255 }, { "epoch": 0.79, "grad_norm": 0.6143951393809346, "learning_rate": 2.170168534107172e-06, "loss": 0.301, "step": 17256 }, { "epoch": 0.79, "grad_norm": 0.7414084135711221, "learning_rate": 2.169243071165629e-06, "loss": 0.2686, "step": 17257 }, { "epoch": 0.79, "grad_norm": 0.3192675350380784, "learning_rate": 2.168317781589494e-06, "loss": 0.2362, "step": 17258 }, { "epoch": 0.79, "grad_norm": 0.5075742922603717, "learning_rate": 2.167392665399256e-06, "loss": 0.3606, "step": 17259 }, { "epoch": 0.79, "grad_norm": 0.626177383963506, "learning_rate": 2.166467722615394e-06, "loss": 0.3506, "step": 17260 }, { "epoch": 0.79, "grad_norm": 0.22050588551725933, "learning_rate": 2.1655429532583905e-06, "loss": 0.1484, "step": 17261 }, { "epoch": 0.79, "grad_norm": 0.3888544501937455, "learning_rate": 2.164618357348711e-06, "loss": 0.2861, "step": 17262 }, { "epoch": 0.79, "grad_norm": 0.8746329846516026, "learning_rate": 2.163693934906831e-06, "loss": 0.3618, "step": 17263 }, { "epoch": 0.79, "grad_norm": 0.34879099767399935, "learning_rate": 2.1627696859532156e-06, "loss": 0.2188, "step": 17264 }, { "epoch": 0.79, "grad_norm": 1.1975194116332641, "learning_rate": 2.1618456105083242e-06, "loss": 0.5909, "step": 17265 }, { "epoch": 0.79, "grad_norm": 0.36712964440380685, "learning_rate": 2.160921708592618e-06, "loss": 0.3, "step": 17266 }, { "epoch": 0.79, "grad_norm": 0.45488473971069426, "learning_rate": 2.1599979802265482e-06, "loss": 0.3152, "step": 17267 }, { "epoch": 0.79, "grad_norm": 0.36838475416437205, "learning_rate": 2.1590744254305664e-06, "loss": 0.1185, "step": 17268 }, { "epoch": 0.79, "grad_norm": 0.6698052348074903, "learning_rate": 2.158151044225122e-06, "loss": 0.3521, "step": 17269 }, { "epoch": 0.79, "grad_norm": 0.25842929885787885, "learning_rate": 2.1572278366306533e-06, "loss": 0.2577, "step": 17270 }, { "epoch": 0.79, "grad_norm": 1.021456009233583, "learning_rate": 2.1563048026676037e-06, "loss": 0.4566, "step": 17271 }, { "epoch": 0.79, "grad_norm": 0.8742436816836272, "learning_rate": 2.1553819423564006e-06, "loss": 0.3677, "step": 17272 }, { "epoch": 0.79, "grad_norm": 0.23091128634988273, "learning_rate": 2.154459255717486e-06, "loss": 0.1863, "step": 17273 }, { "epoch": 0.79, "grad_norm": 0.35388327549937704, "learning_rate": 2.1535367427712784e-06, "loss": 0.2405, "step": 17274 }, { "epoch": 0.79, "grad_norm": 0.6064043977184346, "learning_rate": 2.152614403538209e-06, "loss": 0.3318, "step": 17275 }, { "epoch": 0.79, "grad_norm": 0.4268634805552709, "learning_rate": 2.1516922380386896e-06, "loss": 0.3133, "step": 17276 }, { "epoch": 0.79, "grad_norm": 1.2198094642755963, "learning_rate": 2.15077024629314e-06, "loss": 0.4009, "step": 17277 }, { "epoch": 0.79, "grad_norm": 0.3359046724505899, "learning_rate": 2.1498484283219747e-06, "loss": 0.2567, "step": 17278 }, { "epoch": 0.79, "grad_norm": 0.47742485412065594, "learning_rate": 2.148926784145596e-06, "loss": 0.3181, "step": 17279 }, { "epoch": 0.79, "grad_norm": 0.34615696417918396, "learning_rate": 2.1480053137844115e-06, "loss": 0.1963, "step": 17280 }, { "epoch": 0.79, "grad_norm": 0.9039904632146665, "learning_rate": 2.1470840172588246e-06, "loss": 0.2932, "step": 17281 }, { "epoch": 0.79, "grad_norm": 0.27634976831947505, "learning_rate": 2.1461628945892255e-06, "loss": 0.2302, "step": 17282 }, { "epoch": 0.79, "grad_norm": 0.5008727349554704, "learning_rate": 2.145241945796014e-06, "loss": 0.3812, "step": 17283 }, { "epoch": 0.79, "grad_norm": 2.57558298423319, "learning_rate": 2.1443211708995713e-06, "loss": 0.2064, "step": 17284 }, { "epoch": 0.79, "grad_norm": 0.3467532279561505, "learning_rate": 2.1434005699202877e-06, "loss": 0.2464, "step": 17285 }, { "epoch": 0.79, "grad_norm": 0.44929405051722027, "learning_rate": 2.1424801428785447e-06, "loss": 0.3041, "step": 17286 }, { "epoch": 0.79, "grad_norm": 0.2765644815979597, "learning_rate": 2.1415598897947164e-06, "loss": 0.118, "step": 17287 }, { "epoch": 0.79, "grad_norm": 0.4359138263904072, "learning_rate": 2.14063981068918e-06, "loss": 0.2993, "step": 17288 }, { "epoch": 0.79, "grad_norm": 1.352131963319057, "learning_rate": 2.139719905582298e-06, "loss": 0.7674, "step": 17289 }, { "epoch": 0.79, "grad_norm": 0.34526150294995045, "learning_rate": 2.1388001744944476e-06, "loss": 0.2139, "step": 17290 }, { "epoch": 0.79, "grad_norm": 0.3555498688023054, "learning_rate": 2.137880617445982e-06, "loss": 0.2633, "step": 17291 }, { "epoch": 0.79, "grad_norm": 0.6544705038833118, "learning_rate": 2.136961234457264e-06, "loss": 0.3704, "step": 17292 }, { "epoch": 0.79, "grad_norm": 0.2703776554245429, "learning_rate": 2.1360420255486426e-06, "loss": 0.1846, "step": 17293 }, { "epoch": 0.79, "grad_norm": 0.30865671460859356, "learning_rate": 2.1351229907404727e-06, "loss": 0.189, "step": 17294 }, { "epoch": 0.79, "grad_norm": 0.5242197007365211, "learning_rate": 2.1342041300531015e-06, "loss": 0.3761, "step": 17295 }, { "epoch": 0.79, "grad_norm": 1.0520448351560139, "learning_rate": 2.133285443506866e-06, "loss": 0.3963, "step": 17296 }, { "epoch": 0.79, "grad_norm": 0.31075686426074234, "learning_rate": 2.132366931122113e-06, "loss": 0.1849, "step": 17297 }, { "epoch": 0.79, "grad_norm": 0.37502535454705554, "learning_rate": 2.1314485929191698e-06, "loss": 0.295, "step": 17298 }, { "epoch": 0.79, "grad_norm": 0.26864979036440173, "learning_rate": 2.1305304289183714e-06, "loss": 0.1796, "step": 17299 }, { "epoch": 0.79, "grad_norm": 0.30988644192529574, "learning_rate": 2.1296124391400466e-06, "loss": 0.1978, "step": 17300 }, { "epoch": 0.79, "grad_norm": 0.545476977420377, "learning_rate": 2.128694623604515e-06, "loss": 0.3468, "step": 17301 }, { "epoch": 0.79, "grad_norm": 0.6007790819916371, "learning_rate": 2.127776982332097e-06, "loss": 0.3502, "step": 17302 }, { "epoch": 0.79, "grad_norm": 0.3337522800038289, "learning_rate": 2.126859515343113e-06, "loss": 0.259, "step": 17303 }, { "epoch": 0.79, "grad_norm": 0.6950721214055331, "learning_rate": 2.1259422226578675e-06, "loss": 0.2826, "step": 17304 }, { "epoch": 0.79, "grad_norm": 0.25783983102580255, "learning_rate": 2.1250251042966754e-06, "loss": 0.1865, "step": 17305 }, { "epoch": 0.8, "grad_norm": 0.29279831461136036, "learning_rate": 2.124108160279832e-06, "loss": 0.2455, "step": 17306 }, { "epoch": 0.8, "grad_norm": 1.0705471809371274, "learning_rate": 2.123191390627648e-06, "loss": 0.5253, "step": 17307 }, { "epoch": 0.8, "grad_norm": 0.7171836891691942, "learning_rate": 2.122274795360412e-06, "loss": 0.3772, "step": 17308 }, { "epoch": 0.8, "grad_norm": 0.3436653191901018, "learning_rate": 2.1213583744984223e-06, "loss": 0.2426, "step": 17309 }, { "epoch": 0.8, "grad_norm": 0.3615830002710391, "learning_rate": 2.1204421280619626e-06, "loss": 0.2467, "step": 17310 }, { "epoch": 0.8, "grad_norm": 0.4515786219581236, "learning_rate": 2.119526056071319e-06, "loss": 0.2151, "step": 17311 }, { "epoch": 0.8, "grad_norm": 0.35518488076041216, "learning_rate": 2.118610158546777e-06, "loss": 0.2577, "step": 17312 }, { "epoch": 0.8, "grad_norm": 0.2947321801697784, "learning_rate": 2.117694435508606e-06, "loss": 0.1775, "step": 17313 }, { "epoch": 0.8, "grad_norm": 0.4489838462552549, "learning_rate": 2.116778886977087e-06, "loss": 0.3228, "step": 17314 }, { "epoch": 0.8, "grad_norm": 0.43106549031142327, "learning_rate": 2.115863512972481e-06, "loss": 0.2824, "step": 17315 }, { "epoch": 0.8, "grad_norm": 0.5817303253146339, "learning_rate": 2.1149483135150597e-06, "loss": 0.3208, "step": 17316 }, { "epoch": 0.8, "grad_norm": 0.39077009237253246, "learning_rate": 2.1140332886250845e-06, "loss": 0.2327, "step": 17317 }, { "epoch": 0.8, "grad_norm": 0.43206135027873166, "learning_rate": 2.1131184383228097e-06, "loss": 0.2717, "step": 17318 }, { "epoch": 0.8, "grad_norm": 0.33102179999394077, "learning_rate": 2.112203762628491e-06, "loss": 0.261, "step": 17319 }, { "epoch": 0.8, "grad_norm": 0.692490159501316, "learning_rate": 2.1112892615623794e-06, "loss": 0.2744, "step": 17320 }, { "epoch": 0.8, "grad_norm": 0.28547886872361417, "learning_rate": 2.1103749351447223e-06, "loss": 0.2288, "step": 17321 }, { "epoch": 0.8, "grad_norm": 0.4224554267902391, "learning_rate": 2.1094607833957592e-06, "loss": 0.322, "step": 17322 }, { "epoch": 0.8, "grad_norm": 1.0238128016070904, "learning_rate": 2.108546806335725e-06, "loss": 0.2367, "step": 17323 }, { "epoch": 0.8, "grad_norm": 0.3226758423603047, "learning_rate": 2.1076330039848638e-06, "loss": 0.245, "step": 17324 }, { "epoch": 0.8, "grad_norm": 0.9276839364685976, "learning_rate": 2.106719376363399e-06, "loss": 0.4944, "step": 17325 }, { "epoch": 0.8, "grad_norm": 0.3059998765790841, "learning_rate": 2.105805923491562e-06, "loss": 0.2193, "step": 17326 }, { "epoch": 0.8, "grad_norm": 0.26167768850355283, "learning_rate": 2.10489264538957e-06, "loss": 0.1922, "step": 17327 }, { "epoch": 0.8, "grad_norm": 1.476242381854582, "learning_rate": 2.1039795420776456e-06, "loss": 0.5137, "step": 17328 }, { "epoch": 0.8, "grad_norm": 0.3932570132212613, "learning_rate": 2.103066613576007e-06, "loss": 0.3042, "step": 17329 }, { "epoch": 0.8, "grad_norm": 0.33706098740830437, "learning_rate": 2.1021538599048594e-06, "loss": 0.1914, "step": 17330 }, { "epoch": 0.8, "grad_norm": 1.0459027514122232, "learning_rate": 2.101241281084416e-06, "loss": 0.5199, "step": 17331 }, { "epoch": 0.8, "grad_norm": 0.44510510620090654, "learning_rate": 2.1003288771348752e-06, "loss": 0.2713, "step": 17332 }, { "epoch": 0.8, "grad_norm": 0.25855313632729365, "learning_rate": 2.099416648076439e-06, "loss": 0.1381, "step": 17333 }, { "epoch": 0.8, "grad_norm": 0.35798193479571144, "learning_rate": 2.098504593929306e-06, "loss": 0.2742, "step": 17334 }, { "epoch": 0.8, "grad_norm": 1.4086137523918492, "learning_rate": 2.097592714713663e-06, "loss": 0.4637, "step": 17335 }, { "epoch": 0.8, "grad_norm": 0.35853693881712917, "learning_rate": 2.0966810104497013e-06, "loss": 0.1791, "step": 17336 }, { "epoch": 0.8, "grad_norm": 0.3810430625241888, "learning_rate": 2.0957694811576058e-06, "loss": 0.2917, "step": 17337 }, { "epoch": 0.8, "grad_norm": 0.53963062082349, "learning_rate": 2.0948581268575565e-06, "loss": 0.3568, "step": 17338 }, { "epoch": 0.8, "grad_norm": 0.18836258489777002, "learning_rate": 2.093946947569727e-06, "loss": 0.1191, "step": 17339 }, { "epoch": 0.8, "grad_norm": 0.66899144139605, "learning_rate": 2.0930359433142934e-06, "loss": 0.3673, "step": 17340 }, { "epoch": 0.8, "grad_norm": 0.3852583892701345, "learning_rate": 2.092125114111425e-06, "loss": 0.3037, "step": 17341 }, { "epoch": 0.8, "grad_norm": 0.417185990965552, "learning_rate": 2.091214459981282e-06, "loss": 0.2665, "step": 17342 }, { "epoch": 0.8, "grad_norm": 0.5041915982005274, "learning_rate": 2.0903039809440307e-06, "loss": 0.2554, "step": 17343 }, { "epoch": 0.8, "grad_norm": 0.9557109828478421, "learning_rate": 2.0893936770198232e-06, "loss": 0.555, "step": 17344 }, { "epoch": 0.8, "grad_norm": 0.24948702098933245, "learning_rate": 2.088483548228816e-06, "loss": 0.2168, "step": 17345 }, { "epoch": 0.8, "grad_norm": 0.2859304880433394, "learning_rate": 2.0875735945911602e-06, "loss": 0.1917, "step": 17346 }, { "epoch": 0.8, "grad_norm": 1.372868635654554, "learning_rate": 2.086663816126996e-06, "loss": 0.4619, "step": 17347 }, { "epoch": 0.8, "grad_norm": 0.5881613621774432, "learning_rate": 2.0857542128564714e-06, "loss": 0.3262, "step": 17348 }, { "epoch": 0.8, "grad_norm": 0.31741885742638487, "learning_rate": 2.0848447847997145e-06, "loss": 0.2464, "step": 17349 }, { "epoch": 0.8, "grad_norm": 0.4918403554399696, "learning_rate": 2.0839355319768707e-06, "loss": 0.341, "step": 17350 }, { "epoch": 0.8, "grad_norm": 0.6901650884608159, "learning_rate": 2.0830264544080647e-06, "loss": 0.3278, "step": 17351 }, { "epoch": 0.8, "grad_norm": 0.22056904682657288, "learning_rate": 2.0821175521134208e-06, "loss": 0.1516, "step": 17352 }, { "epoch": 0.8, "grad_norm": 0.3831559226883887, "learning_rate": 2.0812088251130613e-06, "loss": 0.3066, "step": 17353 }, { "epoch": 0.8, "grad_norm": 0.6284276953454587, "learning_rate": 2.0803002734271073e-06, "loss": 0.3071, "step": 17354 }, { "epoch": 0.8, "grad_norm": 0.4204141780757606, "learning_rate": 2.0793918970756744e-06, "loss": 0.2975, "step": 17355 }, { "epoch": 0.8, "grad_norm": 1.4002266808187778, "learning_rate": 2.078483696078869e-06, "loss": 0.4217, "step": 17356 }, { "epoch": 0.8, "grad_norm": 0.29855785191741985, "learning_rate": 2.0775756704568018e-06, "loss": 0.2532, "step": 17357 }, { "epoch": 0.8, "grad_norm": 0.2390048461833621, "learning_rate": 2.0766678202295698e-06, "loss": 0.1962, "step": 17358 }, { "epoch": 0.8, "grad_norm": 0.8597073465455748, "learning_rate": 2.075760145417277e-06, "loss": 0.2887, "step": 17359 }, { "epoch": 0.8, "grad_norm": 0.5543420747943056, "learning_rate": 2.07485264604002e-06, "loss": 0.306, "step": 17360 }, { "epoch": 0.8, "grad_norm": 0.31285872491835925, "learning_rate": 2.073945322117884e-06, "loss": 0.2829, "step": 17361 }, { "epoch": 0.8, "grad_norm": 0.475938418923415, "learning_rate": 2.0730381736709583e-06, "loss": 0.2773, "step": 17362 }, { "epoch": 0.8, "grad_norm": 0.46531018788621603, "learning_rate": 2.072131200719332e-06, "loss": 0.2707, "step": 17363 }, { "epoch": 0.8, "grad_norm": 0.2679988474653125, "learning_rate": 2.0712244032830753e-06, "loss": 0.1984, "step": 17364 }, { "epoch": 0.8, "grad_norm": 0.3740904092582829, "learning_rate": 2.070317781382272e-06, "loss": 0.2638, "step": 17365 }, { "epoch": 0.8, "grad_norm": 0.5904689367034499, "learning_rate": 2.069411335036985e-06, "loss": 0.3044, "step": 17366 }, { "epoch": 0.8, "grad_norm": 0.4334117765879798, "learning_rate": 2.068505064267292e-06, "loss": 0.2765, "step": 17367 }, { "epoch": 0.8, "grad_norm": 1.2341946595608295, "learning_rate": 2.067598969093254e-06, "loss": 0.6385, "step": 17368 }, { "epoch": 0.8, "grad_norm": 0.3162363582527675, "learning_rate": 2.0666930495349256e-06, "loss": 0.2198, "step": 17369 }, { "epoch": 0.8, "grad_norm": 0.22186722137650894, "learning_rate": 2.065787305612367e-06, "loss": 0.1868, "step": 17370 }, { "epoch": 0.8, "grad_norm": 0.7838027176673482, "learning_rate": 2.064881737345631e-06, "loss": 0.3543, "step": 17371 }, { "epoch": 0.8, "grad_norm": 0.6665989802906895, "learning_rate": 2.063976344754768e-06, "loss": 0.12, "step": 17372 }, { "epoch": 0.8, "grad_norm": 0.26990211701147754, "learning_rate": 2.0630711278598157e-06, "loss": 0.2515, "step": 17373 }, { "epoch": 0.8, "grad_norm": 1.2288354676713495, "learning_rate": 2.0621660866808234e-06, "loss": 0.7528, "step": 17374 }, { "epoch": 0.8, "grad_norm": 0.7783017086036176, "learning_rate": 2.06126122123782e-06, "loss": 0.1582, "step": 17375 }, { "epoch": 0.8, "grad_norm": 0.3634845520035248, "learning_rate": 2.060356531550841e-06, "loss": 0.228, "step": 17376 }, { "epoch": 0.8, "grad_norm": 0.36361766080818464, "learning_rate": 2.05945201763992e-06, "loss": 0.3022, "step": 17377 }, { "epoch": 0.8, "grad_norm": 0.335279076976714, "learning_rate": 2.0585476795250746e-06, "loss": 0.1209, "step": 17378 }, { "epoch": 0.8, "grad_norm": 0.43699484879874745, "learning_rate": 2.05764351722633e-06, "loss": 0.3317, "step": 17379 }, { "epoch": 0.8, "grad_norm": 1.5145912152108578, "learning_rate": 2.056739530763705e-06, "loss": 0.7775, "step": 17380 }, { "epoch": 0.8, "grad_norm": 0.37731700057212597, "learning_rate": 2.0558357201572087e-06, "loss": 0.2797, "step": 17381 }, { "epoch": 0.8, "grad_norm": 0.373771349704236, "learning_rate": 2.054932085426856e-06, "loss": 0.2007, "step": 17382 }, { "epoch": 0.8, "grad_norm": 0.5366801705462236, "learning_rate": 2.0540286265926436e-06, "loss": 0.2819, "step": 17383 }, { "epoch": 0.8, "grad_norm": 0.347940533028942, "learning_rate": 2.0531253436745847e-06, "loss": 0.185, "step": 17384 }, { "epoch": 0.8, "grad_norm": 0.2778848135405373, "learning_rate": 2.052222236692668e-06, "loss": 0.2276, "step": 17385 }, { "epoch": 0.8, "grad_norm": 1.3247673049408129, "learning_rate": 2.0513193056668934e-06, "loss": 0.7172, "step": 17386 }, { "epoch": 0.8, "grad_norm": 1.031726448291546, "learning_rate": 2.050416550617249e-06, "loss": 0.4367, "step": 17387 }, { "epoch": 0.8, "grad_norm": 0.35984121014359366, "learning_rate": 2.049513971563715e-06, "loss": 0.2136, "step": 17388 }, { "epoch": 0.8, "grad_norm": 0.2813935644392902, "learning_rate": 2.0486115685262842e-06, "loss": 0.2473, "step": 17389 }, { "epoch": 0.8, "grad_norm": 0.4305641805375185, "learning_rate": 2.047709341524926e-06, "loss": 0.2664, "step": 17390 }, { "epoch": 0.8, "grad_norm": 0.3108062215438227, "learning_rate": 2.046807290579622e-06, "loss": 0.2031, "step": 17391 }, { "epoch": 0.8, "grad_norm": 1.2247628670865973, "learning_rate": 2.0459054157103363e-06, "loss": 0.5286, "step": 17392 }, { "epoch": 0.8, "grad_norm": 0.33863638603051954, "learning_rate": 2.0450037169370385e-06, "loss": 0.2765, "step": 17393 }, { "epoch": 0.8, "grad_norm": 0.392985576819022, "learning_rate": 2.0441021942796947e-06, "loss": 0.2709, "step": 17394 }, { "epoch": 0.8, "grad_norm": 1.1381327449050558, "learning_rate": 2.0432008477582567e-06, "loss": 0.2797, "step": 17395 }, { "epoch": 0.8, "grad_norm": 0.2652105388769732, "learning_rate": 2.0422996773926827e-06, "loss": 0.198, "step": 17396 }, { "epoch": 0.8, "grad_norm": 0.3169588578398246, "learning_rate": 2.0413986832029275e-06, "loss": 0.2733, "step": 17397 }, { "epoch": 0.8, "grad_norm": 1.4296217784095149, "learning_rate": 2.0404978652089325e-06, "loss": 0.3216, "step": 17398 }, { "epoch": 0.8, "grad_norm": 0.6051495106446253, "learning_rate": 2.039597223430645e-06, "loss": 0.3606, "step": 17399 }, { "epoch": 0.8, "grad_norm": 0.35464213854329785, "learning_rate": 2.038696757887998e-06, "loss": 0.2389, "step": 17400 }, { "epoch": 0.8, "grad_norm": 0.37505675196446625, "learning_rate": 2.0377964686009365e-06, "loss": 0.2499, "step": 17401 }, { "epoch": 0.8, "grad_norm": 0.24735264295867063, "learning_rate": 2.036896355589385e-06, "loss": 0.1626, "step": 17402 }, { "epoch": 0.8, "grad_norm": 0.379914912674075, "learning_rate": 2.035996418873275e-06, "loss": 0.2727, "step": 17403 }, { "epoch": 0.8, "grad_norm": 0.4808952807181051, "learning_rate": 2.0350966584725264e-06, "loss": 0.2907, "step": 17404 }, { "epoch": 0.8, "grad_norm": 0.48827431644018576, "learning_rate": 2.0341970744070617e-06, "loss": 0.3197, "step": 17405 }, { "epoch": 0.8, "grad_norm": 0.3322675775688114, "learning_rate": 2.0332976666967976e-06, "loss": 0.25, "step": 17406 }, { "epoch": 0.8, "grad_norm": 1.2767194385236558, "learning_rate": 2.0323984353616434e-06, "loss": 0.3931, "step": 17407 }, { "epoch": 0.8, "grad_norm": 0.27123829868822, "learning_rate": 2.03149938042151e-06, "loss": 0.2006, "step": 17408 }, { "epoch": 0.8, "grad_norm": 0.36092390400886815, "learning_rate": 2.030600501896298e-06, "loss": 0.2653, "step": 17409 }, { "epoch": 0.8, "grad_norm": 0.5127604782881632, "learning_rate": 2.02970179980591e-06, "loss": 0.2776, "step": 17410 }, { "epoch": 0.8, "grad_norm": 0.8898886575590601, "learning_rate": 2.0288032741702458e-06, "loss": 0.2824, "step": 17411 }, { "epoch": 0.8, "grad_norm": 0.3654653765481926, "learning_rate": 2.027904925009191e-06, "loss": 0.2556, "step": 17412 }, { "epoch": 0.8, "grad_norm": 0.3699120191868951, "learning_rate": 2.0270067523426373e-06, "loss": 0.2808, "step": 17413 }, { "epoch": 0.8, "grad_norm": 0.8827602258936752, "learning_rate": 2.0261087561904693e-06, "loss": 0.225, "step": 17414 }, { "epoch": 0.8, "grad_norm": 0.3577938529300041, "learning_rate": 2.0252109365725714e-06, "loss": 0.2651, "step": 17415 }, { "epoch": 0.8, "grad_norm": 0.4758614845370199, "learning_rate": 2.024313293508817e-06, "loss": 0.3013, "step": 17416 }, { "epoch": 0.8, "grad_norm": 0.324313946411488, "learning_rate": 2.0234158270190763e-06, "loss": 0.2702, "step": 17417 }, { "epoch": 0.8, "grad_norm": 0.331918932437627, "learning_rate": 2.0225185371232216e-06, "loss": 0.1809, "step": 17418 }, { "epoch": 0.8, "grad_norm": 1.4443944649547569, "learning_rate": 2.021621423841117e-06, "loss": 0.3842, "step": 17419 }, { "epoch": 0.8, "grad_norm": 0.360850154943085, "learning_rate": 2.020724487192628e-06, "loss": 0.3013, "step": 17420 }, { "epoch": 0.8, "grad_norm": 0.32511069378390905, "learning_rate": 2.019827727197605e-06, "loss": 0.1821, "step": 17421 }, { "epoch": 0.8, "grad_norm": 0.833136595530928, "learning_rate": 2.018931143875905e-06, "loss": 0.3602, "step": 17422 }, { "epoch": 0.8, "grad_norm": 0.3331160603570943, "learning_rate": 2.0180347372473807e-06, "loss": 0.1818, "step": 17423 }, { "epoch": 0.8, "grad_norm": 0.3258139671351841, "learning_rate": 2.0171385073318706e-06, "loss": 0.191, "step": 17424 }, { "epoch": 0.8, "grad_norm": 0.38792618992702615, "learning_rate": 2.016242454149223e-06, "loss": 0.2833, "step": 17425 }, { "epoch": 0.8, "grad_norm": 1.5232113291413734, "learning_rate": 2.0153465777192693e-06, "loss": 0.6892, "step": 17426 }, { "epoch": 0.8, "grad_norm": 0.33815351975224206, "learning_rate": 2.0144508780618476e-06, "loss": 0.2171, "step": 17427 }, { "epoch": 0.8, "grad_norm": 0.4855658940058251, "learning_rate": 2.013555355196789e-06, "loss": 0.3486, "step": 17428 }, { "epoch": 0.8, "grad_norm": 0.35448757872947984, "learning_rate": 2.012660009143914e-06, "loss": 0.2705, "step": 17429 }, { "epoch": 0.8, "grad_norm": 0.2574363949041688, "learning_rate": 2.0117648399230495e-06, "loss": 0.1952, "step": 17430 }, { "epoch": 0.8, "grad_norm": 1.408941787360122, "learning_rate": 2.0108698475540113e-06, "loss": 0.1934, "step": 17431 }, { "epoch": 0.8, "grad_norm": 0.3607938086379944, "learning_rate": 2.009975032056618e-06, "loss": 0.3028, "step": 17432 }, { "epoch": 0.8, "grad_norm": 0.32996958182908287, "learning_rate": 2.0090803934506765e-06, "loss": 0.2378, "step": 17433 }, { "epoch": 0.8, "grad_norm": 0.8021500559513837, "learning_rate": 2.0081859317559905e-06, "loss": 0.2919, "step": 17434 }, { "epoch": 0.8, "grad_norm": 0.3003686828398488, "learning_rate": 2.0072916469923654e-06, "loss": 0.1751, "step": 17435 }, { "epoch": 0.8, "grad_norm": 0.47067385584566673, "learning_rate": 2.0063975391796e-06, "loss": 0.2813, "step": 17436 }, { "epoch": 0.8, "grad_norm": 0.31775988447841064, "learning_rate": 2.00550360833749e-06, "loss": 0.2356, "step": 17437 }, { "epoch": 0.8, "grad_norm": 0.7170778500232188, "learning_rate": 2.004609854485824e-06, "loss": 0.4203, "step": 17438 }, { "epoch": 0.8, "grad_norm": 0.4580394069391533, "learning_rate": 2.0037162776443884e-06, "loss": 0.289, "step": 17439 }, { "epoch": 0.8, "grad_norm": 0.3278380748749609, "learning_rate": 2.0028228778329718e-06, "loss": 0.2535, "step": 17440 }, { "epoch": 0.8, "grad_norm": 1.3783771167464154, "learning_rate": 2.001929655071345e-06, "loss": 0.6034, "step": 17441 }, { "epoch": 0.8, "grad_norm": 0.2414993943618676, "learning_rate": 2.00103660937929e-06, "loss": 0.1652, "step": 17442 }, { "epoch": 0.8, "grad_norm": 0.6017706833867376, "learning_rate": 2.00014374077657e-06, "loss": 0.2956, "step": 17443 }, { "epoch": 0.8, "grad_norm": 0.34581204942754123, "learning_rate": 1.999251049282962e-06, "loss": 0.2326, "step": 17444 }, { "epoch": 0.8, "grad_norm": 0.3279163157451626, "learning_rate": 1.9983585349182243e-06, "loss": 0.2521, "step": 17445 }, { "epoch": 0.8, "grad_norm": 0.8872247240986167, "learning_rate": 1.997466197702115e-06, "loss": 0.4435, "step": 17446 }, { "epoch": 0.8, "grad_norm": 0.47384088010556785, "learning_rate": 1.996574037654393e-06, "loss": 0.112, "step": 17447 }, { "epoch": 0.8, "grad_norm": 0.24271663892932163, "learning_rate": 1.995682054794803e-06, "loss": 0.2005, "step": 17448 }, { "epoch": 0.8, "grad_norm": 0.3782285680227093, "learning_rate": 1.9947902491431024e-06, "loss": 0.2967, "step": 17449 }, { "epoch": 0.8, "grad_norm": 0.8288543411104783, "learning_rate": 1.9938986207190282e-06, "loss": 0.2872, "step": 17450 }, { "epoch": 0.8, "grad_norm": 0.3614207369776317, "learning_rate": 1.9930071695423246e-06, "loss": 0.2559, "step": 17451 }, { "epoch": 0.8, "grad_norm": 0.5737329133688324, "learning_rate": 1.9921158956327214e-06, "loss": 0.3997, "step": 17452 }, { "epoch": 0.8, "grad_norm": 0.40917583698030513, "learning_rate": 1.9912247990099556e-06, "loss": 0.1884, "step": 17453 }, { "epoch": 0.8, "grad_norm": 0.305413311915621, "learning_rate": 1.9903338796937556e-06, "loss": 0.1515, "step": 17454 }, { "epoch": 0.8, "grad_norm": 0.4203046627360346, "learning_rate": 1.9894431377038417e-06, "loss": 0.3039, "step": 17455 }, { "epoch": 0.8, "grad_norm": 0.3919339890330523, "learning_rate": 1.9885525730599353e-06, "loss": 0.2932, "step": 17456 }, { "epoch": 0.8, "grad_norm": 0.40935336394126803, "learning_rate": 1.9876621857817568e-06, "loss": 0.1971, "step": 17457 }, { "epoch": 0.8, "grad_norm": 0.5710417856378557, "learning_rate": 1.9867719758890113e-06, "loss": 0.3906, "step": 17458 }, { "epoch": 0.8, "grad_norm": 1.2540797655344296, "learning_rate": 1.9858819434014154e-06, "loss": 0.4473, "step": 17459 }, { "epoch": 0.8, "grad_norm": 0.24701172875408506, "learning_rate": 1.984992088338663e-06, "loss": 0.1634, "step": 17460 }, { "epoch": 0.8, "grad_norm": 0.29464905353549137, "learning_rate": 1.9841024107204653e-06, "loss": 0.2416, "step": 17461 }, { "epoch": 0.8, "grad_norm": 0.7213230863925465, "learning_rate": 1.9832129105665155e-06, "loss": 0.3825, "step": 17462 }, { "epoch": 0.8, "grad_norm": 0.4734300424148013, "learning_rate": 1.982323587896502e-06, "loss": 0.2009, "step": 17463 }, { "epoch": 0.8, "grad_norm": 0.3063852316533994, "learning_rate": 1.981434442730119e-06, "loss": 0.2682, "step": 17464 }, { "epoch": 0.8, "grad_norm": 1.410067136578374, "learning_rate": 1.9805454750870447e-06, "loss": 0.5576, "step": 17465 }, { "epoch": 0.8, "grad_norm": 0.25363496060548246, "learning_rate": 1.979656684986969e-06, "loss": 0.1579, "step": 17466 }, { "epoch": 0.8, "grad_norm": 0.5247362310008205, "learning_rate": 1.9787680724495617e-06, "loss": 0.3061, "step": 17467 }, { "epoch": 0.8, "grad_norm": 0.36013017745065956, "learning_rate": 1.977879637494502e-06, "loss": 0.3104, "step": 17468 }, { "epoch": 0.8, "grad_norm": 0.486336595394454, "learning_rate": 1.976991380141451e-06, "loss": 0.3004, "step": 17469 }, { "epoch": 0.8, "grad_norm": 0.48765146042070706, "learning_rate": 1.9761033004100793e-06, "loss": 0.2682, "step": 17470 }, { "epoch": 0.8, "grad_norm": 1.6648495767312834, "learning_rate": 1.9752153983200483e-06, "loss": 0.7302, "step": 17471 }, { "epoch": 0.8, "grad_norm": 0.32426312479842956, "learning_rate": 1.9743276738910124e-06, "loss": 0.2519, "step": 17472 }, { "epoch": 0.8, "grad_norm": 0.3655750512152428, "learning_rate": 1.9734401271426264e-06, "loss": 0.1953, "step": 17473 }, { "epoch": 0.8, "grad_norm": 0.4601058516110566, "learning_rate": 1.9725527580945423e-06, "loss": 0.2617, "step": 17474 }, { "epoch": 0.8, "grad_norm": 1.0275764714853401, "learning_rate": 1.971665566766401e-06, "loss": 0.5164, "step": 17475 }, { "epoch": 0.8, "grad_norm": 0.25298315728783144, "learning_rate": 1.970778553177849e-06, "loss": 0.2182, "step": 17476 }, { "epoch": 0.8, "grad_norm": 1.334227765110419, "learning_rate": 1.9698917173485175e-06, "loss": 0.5366, "step": 17477 }, { "epoch": 0.8, "grad_norm": 0.6222915606895938, "learning_rate": 1.9690050592980446e-06, "loss": 0.3011, "step": 17478 }, { "epoch": 0.8, "grad_norm": 0.3847951488804628, "learning_rate": 1.96811857904606e-06, "loss": 0.258, "step": 17479 }, { "epoch": 0.8, "grad_norm": 0.26787077752122934, "learning_rate": 1.96723227661219e-06, "loss": 0.2351, "step": 17480 }, { "epoch": 0.8, "grad_norm": 0.26408275735407316, "learning_rate": 1.9663461520160566e-06, "loss": 0.1623, "step": 17481 }, { "epoch": 0.8, "grad_norm": 0.41770340117450855, "learning_rate": 1.9654602052772708e-06, "loss": 0.2906, "step": 17482 }, { "epoch": 0.8, "grad_norm": 1.3825284046774293, "learning_rate": 1.964574436415457e-06, "loss": 0.245, "step": 17483 }, { "epoch": 0.8, "grad_norm": 0.32738337319671307, "learning_rate": 1.963688845450218e-06, "loss": 0.2802, "step": 17484 }, { "epoch": 0.8, "grad_norm": 0.40984477353611803, "learning_rate": 1.9628034324011656e-06, "loss": 0.3245, "step": 17485 }, { "epoch": 0.8, "grad_norm": 0.3783802727193963, "learning_rate": 1.9619181972878955e-06, "loss": 0.1714, "step": 17486 }, { "epoch": 0.8, "grad_norm": 0.45054050363413284, "learning_rate": 1.9610331401300097e-06, "loss": 0.2694, "step": 17487 }, { "epoch": 0.8, "grad_norm": 0.376053589384803, "learning_rate": 1.9601482609471055e-06, "loss": 0.2703, "step": 17488 }, { "epoch": 0.8, "grad_norm": 0.515977393730018, "learning_rate": 1.9592635597587663e-06, "loss": 0.2439, "step": 17489 }, { "epoch": 0.8, "grad_norm": 0.5442247071199551, "learning_rate": 1.9583790365845823e-06, "loss": 0.2811, "step": 17490 }, { "epoch": 0.8, "grad_norm": 0.45430634565116684, "learning_rate": 1.9574946914441386e-06, "loss": 0.3195, "step": 17491 }, { "epoch": 0.8, "grad_norm": 0.3176632687542709, "learning_rate": 1.956610524357009e-06, "loss": 0.2037, "step": 17492 }, { "epoch": 0.8, "grad_norm": 0.8302930529551902, "learning_rate": 1.9557265353427713e-06, "loss": 0.5089, "step": 17493 }, { "epoch": 0.8, "grad_norm": 0.28472935579914227, "learning_rate": 1.9548427244209935e-06, "loss": 0.2203, "step": 17494 }, { "epoch": 0.8, "grad_norm": 0.5076610014203942, "learning_rate": 1.953959091611243e-06, "loss": 0.3163, "step": 17495 }, { "epoch": 0.8, "grad_norm": 0.45042996515250217, "learning_rate": 1.953075636933084e-06, "loss": 0.2383, "step": 17496 }, { "epoch": 0.8, "grad_norm": 0.3935109041073421, "learning_rate": 1.9521923604060764e-06, "loss": 0.2746, "step": 17497 }, { "epoch": 0.8, "grad_norm": 0.6177315686089517, "learning_rate": 1.9513092620497744e-06, "loss": 0.2441, "step": 17498 }, { "epoch": 0.8, "grad_norm": 0.42099789110376307, "learning_rate": 1.950426341883721e-06, "loss": 0.2268, "step": 17499 }, { "epoch": 0.8, "grad_norm": 0.24177242806990507, "learning_rate": 1.949543599927477e-06, "loss": 0.2421, "step": 17500 }, { "epoch": 0.8, "grad_norm": 1.12207951305231, "learning_rate": 1.9486610362005755e-06, "loss": 0.4064, "step": 17501 }, { "epoch": 0.8, "grad_norm": 0.5349777401284257, "learning_rate": 1.9477786507225615e-06, "loss": 0.2325, "step": 17502 }, { "epoch": 0.8, "grad_norm": 0.40638855020972686, "learning_rate": 1.9468964435129643e-06, "loss": 0.2663, "step": 17503 }, { "epoch": 0.8, "grad_norm": 0.39493515937639534, "learning_rate": 1.9460144145913184e-06, "loss": 0.3103, "step": 17504 }, { "epoch": 0.8, "grad_norm": 0.3806628556932682, "learning_rate": 1.9451325639771536e-06, "loss": 0.2172, "step": 17505 }, { "epoch": 0.8, "grad_norm": 0.29098101249067376, "learning_rate": 1.944250891689987e-06, "loss": 0.2235, "step": 17506 }, { "epoch": 0.8, "grad_norm": 0.7035975919880475, "learning_rate": 1.9433693977493452e-06, "loss": 0.2585, "step": 17507 }, { "epoch": 0.8, "grad_norm": 0.45294384337207344, "learning_rate": 1.942488082174734e-06, "loss": 0.2724, "step": 17508 }, { "epoch": 0.8, "grad_norm": 0.40266647084085155, "learning_rate": 1.9416069449856757e-06, "loss": 0.2155, "step": 17509 }, { "epoch": 0.8, "grad_norm": 1.2128396971873856, "learning_rate": 1.9407259862016725e-06, "loss": 0.5199, "step": 17510 }, { "epoch": 0.8, "grad_norm": 0.51488299829181, "learning_rate": 1.939845205842227e-06, "loss": 0.3482, "step": 17511 }, { "epoch": 0.8, "grad_norm": 0.28071829781234403, "learning_rate": 1.9389646039268396e-06, "loss": 0.1924, "step": 17512 }, { "epoch": 0.8, "grad_norm": 0.5193625505869319, "learning_rate": 1.9380841804750063e-06, "loss": 0.2952, "step": 17513 }, { "epoch": 0.8, "grad_norm": 0.4496699728681639, "learning_rate": 1.9372039355062223e-06, "loss": 0.2016, "step": 17514 }, { "epoch": 0.8, "grad_norm": 0.4245173445923174, "learning_rate": 1.936323869039969e-06, "loss": 0.1986, "step": 17515 }, { "epoch": 0.8, "grad_norm": 0.3981202066122882, "learning_rate": 1.9354439810957324e-06, "loss": 0.3008, "step": 17516 }, { "epoch": 0.8, "grad_norm": 1.162600591650766, "learning_rate": 1.934564271692998e-06, "loss": 0.7564, "step": 17517 }, { "epoch": 0.8, "grad_norm": 0.3538151150066641, "learning_rate": 1.933684740851233e-06, "loss": 0.213, "step": 17518 }, { "epoch": 0.8, "grad_norm": 0.5533524651474537, "learning_rate": 1.9328053885899165e-06, "loss": 0.3407, "step": 17519 }, { "epoch": 0.8, "grad_norm": 0.2790711227466672, "learning_rate": 1.9319262149285113e-06, "loss": 0.1817, "step": 17520 }, { "epoch": 0.8, "grad_norm": 0.3924844737130819, "learning_rate": 1.9310472198864828e-06, "loss": 0.2763, "step": 17521 }, { "epoch": 0.8, "grad_norm": 1.075692128855626, "learning_rate": 1.9301684034832946e-06, "loss": 0.3564, "step": 17522 }, { "epoch": 0.8, "grad_norm": 0.35324513382275907, "learning_rate": 1.929289765738398e-06, "loss": 0.3012, "step": 17523 }, { "epoch": 0.81, "grad_norm": 0.4342231180145792, "learning_rate": 1.9284113066712496e-06, "loss": 0.2994, "step": 17524 }, { "epoch": 0.81, "grad_norm": 0.7615587218670656, "learning_rate": 1.9275330263012904e-06, "loss": 0.2774, "step": 17525 }, { "epoch": 0.81, "grad_norm": 0.390166261679381, "learning_rate": 1.9266549246479748e-06, "loss": 0.2073, "step": 17526 }, { "epoch": 0.81, "grad_norm": 0.3502451486351875, "learning_rate": 1.9257770017307376e-06, "loss": 0.2613, "step": 17527 }, { "epoch": 0.81, "grad_norm": 0.3449869514597588, "learning_rate": 1.924899257569014e-06, "loss": 0.2539, "step": 17528 }, { "epoch": 0.81, "grad_norm": 0.8325282952862263, "learning_rate": 1.9240216921822362e-06, "loss": 0.4312, "step": 17529 }, { "epoch": 0.81, "grad_norm": 0.32747449145830604, "learning_rate": 1.9231443055898356e-06, "loss": 0.2687, "step": 17530 }, { "epoch": 0.81, "grad_norm": 0.3639249307050056, "learning_rate": 1.922267097811238e-06, "loss": 0.2618, "step": 17531 }, { "epoch": 0.81, "grad_norm": 0.2998660442774641, "learning_rate": 1.9213900688658594e-06, "loss": 0.1679, "step": 17532 }, { "epoch": 0.81, "grad_norm": 0.34227602463217105, "learning_rate": 1.920513218773117e-06, "loss": 0.2561, "step": 17533 }, { "epoch": 0.81, "grad_norm": 1.358261670695232, "learning_rate": 1.919636547552428e-06, "loss": 0.5247, "step": 17534 }, { "epoch": 0.81, "grad_norm": 0.3524418163740295, "learning_rate": 1.9187600552231955e-06, "loss": 0.2626, "step": 17535 }, { "epoch": 0.81, "grad_norm": 0.3418491359690452, "learning_rate": 1.917883741804829e-06, "loss": 0.2628, "step": 17536 }, { "epoch": 0.81, "grad_norm": 1.0174230888653388, "learning_rate": 1.9170076073167245e-06, "loss": 0.5034, "step": 17537 }, { "epoch": 0.81, "grad_norm": 0.27207880604318396, "learning_rate": 1.9161316517782813e-06, "loss": 0.0752, "step": 17538 }, { "epoch": 0.81, "grad_norm": 0.35416998769814395, "learning_rate": 1.9152558752088947e-06, "loss": 0.2722, "step": 17539 }, { "epoch": 0.81, "grad_norm": 0.4158770867296729, "learning_rate": 1.9143802776279476e-06, "loss": 0.3069, "step": 17540 }, { "epoch": 0.81, "grad_norm": 0.5285166259902728, "learning_rate": 1.913504859054831e-06, "loss": 0.2643, "step": 17541 }, { "epoch": 0.81, "grad_norm": 0.36326564101602266, "learning_rate": 1.9126296195089165e-06, "loss": 0.2767, "step": 17542 }, { "epoch": 0.81, "grad_norm": 0.4920461312583416, "learning_rate": 1.9117545590095944e-06, "loss": 0.357, "step": 17543 }, { "epoch": 0.81, "grad_norm": 0.2610761530461123, "learning_rate": 1.9108796775762285e-06, "loss": 0.1854, "step": 17544 }, { "epoch": 0.81, "grad_norm": 0.31279081033228084, "learning_rate": 1.9100049752281914e-06, "loss": 0.1837, "step": 17545 }, { "epoch": 0.81, "grad_norm": 0.8053603385662662, "learning_rate": 1.9091304519848453e-06, "loss": 0.3883, "step": 17546 }, { "epoch": 0.81, "grad_norm": 0.37222084607605005, "learning_rate": 1.9082561078655513e-06, "loss": 0.2812, "step": 17547 }, { "epoch": 0.81, "grad_norm": 0.31919839224868163, "learning_rate": 1.9073819428896722e-06, "loss": 0.2156, "step": 17548 }, { "epoch": 0.81, "grad_norm": 1.199841510371724, "learning_rate": 1.9065079570765542e-06, "loss": 0.5346, "step": 17549 }, { "epoch": 0.81, "grad_norm": 0.26354155349447855, "learning_rate": 1.90563415044555e-06, "loss": 0.1439, "step": 17550 }, { "epoch": 0.81, "grad_norm": 0.24554603082839396, "learning_rate": 1.904760523016006e-06, "loss": 0.1959, "step": 17551 }, { "epoch": 0.81, "grad_norm": 0.7324894732626335, "learning_rate": 1.903887074807259e-06, "loss": 0.3096, "step": 17552 }, { "epoch": 0.81, "grad_norm": 0.6413775070583715, "learning_rate": 1.9030138058386526e-06, "loss": 0.3716, "step": 17553 }, { "epoch": 0.81, "grad_norm": 0.3089041575235801, "learning_rate": 1.9021407161295135e-06, "loss": 0.2039, "step": 17554 }, { "epoch": 0.81, "grad_norm": 0.5485958338362673, "learning_rate": 1.901267805699174e-06, "loss": 0.3183, "step": 17555 }, { "epoch": 0.81, "grad_norm": 0.4637767631811345, "learning_rate": 1.900395074566962e-06, "loss": 0.2561, "step": 17556 }, { "epoch": 0.81, "grad_norm": 0.2489765597238874, "learning_rate": 1.899522522752194e-06, "loss": 0.2098, "step": 17557 }, { "epoch": 0.81, "grad_norm": 0.6925201621952362, "learning_rate": 1.8986501502741928e-06, "loss": 0.2784, "step": 17558 }, { "epoch": 0.81, "grad_norm": 0.3813530178695753, "learning_rate": 1.8977779571522648e-06, "loss": 0.2902, "step": 17559 }, { "epoch": 0.81, "grad_norm": 0.6325370494304092, "learning_rate": 1.896905943405728e-06, "loss": 0.3512, "step": 17560 }, { "epoch": 0.81, "grad_norm": 0.4301766405134946, "learning_rate": 1.8960341090538813e-06, "loss": 0.2409, "step": 17561 }, { "epoch": 0.81, "grad_norm": 0.27525747680230406, "learning_rate": 1.8951624541160306e-06, "loss": 0.2114, "step": 17562 }, { "epoch": 0.81, "grad_norm": 0.40946861749814945, "learning_rate": 1.8942909786114704e-06, "loss": 0.2265, "step": 17563 }, { "epoch": 0.81, "grad_norm": 0.3832965487754755, "learning_rate": 1.8934196825594943e-06, "loss": 0.2658, "step": 17564 }, { "epoch": 0.81, "grad_norm": 1.3318345898818984, "learning_rate": 1.8925485659793962e-06, "loss": 0.7466, "step": 17565 }, { "epoch": 0.81, "grad_norm": 0.4120415828291118, "learning_rate": 1.8916776288904571e-06, "loss": 0.2684, "step": 17566 }, { "epoch": 0.81, "grad_norm": 0.33026009982688637, "learning_rate": 1.890806871311962e-06, "loss": 0.2466, "step": 17567 }, { "epoch": 0.81, "grad_norm": 1.6850958213452547, "learning_rate": 1.889936293263185e-06, "loss": 0.4766, "step": 17568 }, { "epoch": 0.81, "grad_norm": 0.3117092275357189, "learning_rate": 1.8890658947634011e-06, "loss": 0.221, "step": 17569 }, { "epoch": 0.81, "grad_norm": 0.49985617177540004, "learning_rate": 1.8881956758318843e-06, "loss": 0.3137, "step": 17570 }, { "epoch": 0.81, "grad_norm": 0.2806918368869582, "learning_rate": 1.8873256364878933e-06, "loss": 0.202, "step": 17571 }, { "epoch": 0.81, "grad_norm": 0.3564302915869451, "learning_rate": 1.8864557767506952e-06, "loss": 0.2565, "step": 17572 }, { "epoch": 0.81, "grad_norm": 1.2460651118824557, "learning_rate": 1.8855860966395446e-06, "loss": 0.6823, "step": 17573 }, { "epoch": 0.81, "grad_norm": 0.4614061719103919, "learning_rate": 1.8847165961737013e-06, "loss": 0.2423, "step": 17574 }, { "epoch": 0.81, "grad_norm": 0.3242832224904078, "learning_rate": 1.883847275372409e-06, "loss": 0.2608, "step": 17575 }, { "epoch": 0.81, "grad_norm": 0.509538939701538, "learning_rate": 1.8829781342549126e-06, "loss": 0.3456, "step": 17576 }, { "epoch": 0.81, "grad_norm": 0.1856604763509889, "learning_rate": 1.8821091728404606e-06, "loss": 0.1071, "step": 17577 }, { "epoch": 0.81, "grad_norm": 0.5731658151146986, "learning_rate": 1.8812403911482858e-06, "loss": 0.3192, "step": 17578 }, { "epoch": 0.81, "grad_norm": 0.2729378209838868, "learning_rate": 1.8803717891976258e-06, "loss": 0.2697, "step": 17579 }, { "epoch": 0.81, "grad_norm": 0.8600119752296859, "learning_rate": 1.8795033670077057e-06, "loss": 0.2674, "step": 17580 }, { "epoch": 0.81, "grad_norm": 0.617089758974944, "learning_rate": 1.8786351245977542e-06, "loss": 0.3226, "step": 17581 }, { "epoch": 0.81, "grad_norm": 0.3955971868870838, "learning_rate": 1.877767061986997e-06, "loss": 0.2409, "step": 17582 }, { "epoch": 0.81, "grad_norm": 0.2772408848818341, "learning_rate": 1.8768991791946455e-06, "loss": 0.2425, "step": 17583 }, { "epoch": 0.81, "grad_norm": 0.3581113003990737, "learning_rate": 1.8760314762399201e-06, "loss": 0.1867, "step": 17584 }, { "epoch": 0.81, "grad_norm": 0.6032035538232708, "learning_rate": 1.8751639531420253e-06, "loss": 0.3695, "step": 17585 }, { "epoch": 0.81, "grad_norm": 0.7395764529522229, "learning_rate": 1.8742966099201699e-06, "loss": 0.3641, "step": 17586 }, { "epoch": 0.81, "grad_norm": 0.28398213665811234, "learning_rate": 1.8734294465935577e-06, "loss": 0.2238, "step": 17587 }, { "epoch": 0.81, "grad_norm": 0.5730798864434758, "learning_rate": 1.8725624631813832e-06, "loss": 0.3429, "step": 17588 }, { "epoch": 0.81, "grad_norm": 0.4748127894089518, "learning_rate": 1.8716956597028424e-06, "loss": 0.2316, "step": 17589 }, { "epoch": 0.81, "grad_norm": 0.26270059850402633, "learning_rate": 1.8708290361771252e-06, "loss": 0.1537, "step": 17590 }, { "epoch": 0.81, "grad_norm": 0.320854453730702, "learning_rate": 1.869962592623421e-06, "loss": 0.2716, "step": 17591 }, { "epoch": 0.81, "grad_norm": 0.695734926807146, "learning_rate": 1.8690963290609088e-06, "loss": 0.3732, "step": 17592 }, { "epoch": 0.81, "grad_norm": 0.38804762656312597, "learning_rate": 1.868230245508762e-06, "loss": 0.1967, "step": 17593 }, { "epoch": 0.81, "grad_norm": 0.7283062085230795, "learning_rate": 1.8673643419861664e-06, "loss": 0.3575, "step": 17594 }, { "epoch": 0.81, "grad_norm": 0.3021653377429519, "learning_rate": 1.8664986185122825e-06, "loss": 0.2559, "step": 17595 }, { "epoch": 0.81, "grad_norm": 0.8650064720085949, "learning_rate": 1.8656330751062823e-06, "loss": 0.494, "step": 17596 }, { "epoch": 0.81, "grad_norm": 0.28306597193737143, "learning_rate": 1.864767711787323e-06, "loss": 0.1834, "step": 17597 }, { "epoch": 0.81, "grad_norm": 0.537904614011536, "learning_rate": 1.863902528574566e-06, "loss": 0.3352, "step": 17598 }, { "epoch": 0.81, "grad_norm": 0.4699291854499806, "learning_rate": 1.8630375254871679e-06, "loss": 0.3007, "step": 17599 }, { "epoch": 0.81, "grad_norm": 0.3759686947192036, "learning_rate": 1.8621727025442748e-06, "loss": 0.2047, "step": 17600 }, { "epoch": 0.81, "grad_norm": 1.375949633678169, "learning_rate": 1.8613080597650368e-06, "loss": 0.6975, "step": 17601 }, { "epoch": 0.81, "grad_norm": 0.32977058949190263, "learning_rate": 1.8604435971685908e-06, "loss": 0.2261, "step": 17602 }, { "epoch": 0.81, "grad_norm": 0.24327225669155564, "learning_rate": 1.8595793147740794e-06, "loss": 0.1926, "step": 17603 }, { "epoch": 0.81, "grad_norm": 0.6520274095589002, "learning_rate": 1.8587152126006391e-06, "loss": 0.3807, "step": 17604 }, { "epoch": 0.81, "grad_norm": 0.8124243394893229, "learning_rate": 1.857851290667394e-06, "loss": 0.281, "step": 17605 }, { "epoch": 0.81, "grad_norm": 0.3862330682042005, "learning_rate": 1.856987548993474e-06, "loss": 0.2092, "step": 17606 }, { "epoch": 0.81, "grad_norm": 0.40002263327388365, "learning_rate": 1.8561239875980008e-06, "loss": 0.3314, "step": 17607 }, { "epoch": 0.81, "grad_norm": 0.5813189470888308, "learning_rate": 1.8552606065000966e-06, "loss": 0.356, "step": 17608 }, { "epoch": 0.81, "grad_norm": 0.4173438402334249, "learning_rate": 1.8543974057188697e-06, "loss": 0.3019, "step": 17609 }, { "epoch": 0.81, "grad_norm": 0.30681560467746144, "learning_rate": 1.8535343852734333e-06, "loss": 0.1892, "step": 17610 }, { "epoch": 0.81, "grad_norm": 0.4057348371811246, "learning_rate": 1.852671545182897e-06, "loss": 0.2977, "step": 17611 }, { "epoch": 0.81, "grad_norm": 0.4447448855177972, "learning_rate": 1.8518088854663574e-06, "loss": 0.28, "step": 17612 }, { "epoch": 0.81, "grad_norm": 1.1981182192921929, "learning_rate": 1.8509464061429183e-06, "loss": 0.3597, "step": 17613 }, { "epoch": 0.81, "grad_norm": 0.6079404410432365, "learning_rate": 1.850084107231669e-06, "loss": 0.3618, "step": 17614 }, { "epoch": 0.81, "grad_norm": 0.27985795859004264, "learning_rate": 1.8492219887517027e-06, "loss": 0.2536, "step": 17615 }, { "epoch": 0.81, "grad_norm": 0.2781975370107386, "learning_rate": 1.8483600507221077e-06, "loss": 0.1385, "step": 17616 }, { "epoch": 0.81, "grad_norm": 0.9998037920830309, "learning_rate": 1.8474982931619622e-06, "loss": 0.4669, "step": 17617 }, { "epoch": 0.81, "grad_norm": 0.3767826015994102, "learning_rate": 1.846636716090351e-06, "loss": 0.2596, "step": 17618 }, { "epoch": 0.81, "grad_norm": 0.33381330252185515, "learning_rate": 1.8457753195263373e-06, "loss": 0.2577, "step": 17619 }, { "epoch": 0.81, "grad_norm": 0.6359837568264348, "learning_rate": 1.844914103489005e-06, "loss": 0.3563, "step": 17620 }, { "epoch": 0.81, "grad_norm": 0.4094552905805184, "learning_rate": 1.8440530679974145e-06, "loss": 0.2718, "step": 17621 }, { "epoch": 0.81, "grad_norm": 0.42374139769724245, "learning_rate": 1.8431922130706258e-06, "loss": 0.2056, "step": 17622 }, { "epoch": 0.81, "grad_norm": 0.2664617510745276, "learning_rate": 1.8423315387276997e-06, "loss": 0.1902, "step": 17623 }, { "epoch": 0.81, "grad_norm": 0.4051462342640805, "learning_rate": 1.8414710449876915e-06, "loss": 0.2704, "step": 17624 }, { "epoch": 0.81, "grad_norm": 1.2899799686356217, "learning_rate": 1.840610731869653e-06, "loss": 0.7544, "step": 17625 }, { "epoch": 0.81, "grad_norm": 0.43095812949829815, "learning_rate": 1.8397505993926256e-06, "loss": 0.2308, "step": 17626 }, { "epoch": 0.81, "grad_norm": 0.332475645188263, "learning_rate": 1.8388906475756586e-06, "loss": 0.2745, "step": 17627 }, { "epoch": 0.81, "grad_norm": 0.5786262372291936, "learning_rate": 1.8380308764377841e-06, "loss": 0.2808, "step": 17628 }, { "epoch": 0.81, "grad_norm": 0.3113781234420406, "learning_rate": 1.8371712859980395e-06, "loss": 0.0813, "step": 17629 }, { "epoch": 0.81, "grad_norm": 0.43830813118130924, "learning_rate": 1.8363118762754572e-06, "loss": 0.3035, "step": 17630 }, { "epoch": 0.81, "grad_norm": 0.3770260451958353, "learning_rate": 1.8354526472890588e-06, "loss": 0.2968, "step": 17631 }, { "epoch": 0.81, "grad_norm": 0.766503638281388, "learning_rate": 1.8345935990578711e-06, "loss": 0.2904, "step": 17632 }, { "epoch": 0.81, "grad_norm": 0.34067391108770506, "learning_rate": 1.8337347316009125e-06, "loss": 0.2491, "step": 17633 }, { "epoch": 0.81, "grad_norm": 0.38789054286669283, "learning_rate": 1.832876044937194e-06, "loss": 0.2791, "step": 17634 }, { "epoch": 0.81, "grad_norm": 0.31904860438619576, "learning_rate": 1.832017539085731e-06, "loss": 0.1525, "step": 17635 }, { "epoch": 0.81, "grad_norm": 0.39947217155886117, "learning_rate": 1.831159214065522e-06, "loss": 0.1945, "step": 17636 }, { "epoch": 0.81, "grad_norm": 0.7922670148544848, "learning_rate": 1.8303010698955803e-06, "loss": 0.4028, "step": 17637 }, { "epoch": 0.81, "grad_norm": 0.3877451575425467, "learning_rate": 1.829443106594896e-06, "loss": 0.3004, "step": 17638 }, { "epoch": 0.81, "grad_norm": 0.3293443173643252, "learning_rate": 1.8285853241824692e-06, "loss": 0.2045, "step": 17639 }, { "epoch": 0.81, "grad_norm": 1.4909580145794479, "learning_rate": 1.8277277226772849e-06, "loss": 0.6504, "step": 17640 }, { "epoch": 0.81, "grad_norm": 0.3411677281238788, "learning_rate": 1.8268703020983326e-06, "loss": 0.1877, "step": 17641 }, { "epoch": 0.81, "grad_norm": 0.2836506165340174, "learning_rate": 1.8260130624645956e-06, "loss": 0.2143, "step": 17642 }, { "epoch": 0.81, "grad_norm": 0.4737190555143232, "learning_rate": 1.82515600379505e-06, "loss": 0.3337, "step": 17643 }, { "epoch": 0.81, "grad_norm": 0.7701869782744053, "learning_rate": 1.824299126108674e-06, "loss": 0.4217, "step": 17644 }, { "epoch": 0.81, "grad_norm": 0.3183819996858973, "learning_rate": 1.8234424294244324e-06, "loss": 0.1866, "step": 17645 }, { "epoch": 0.81, "grad_norm": 0.3906354973821418, "learning_rate": 1.8225859137612945e-06, "loss": 0.2953, "step": 17646 }, { "epoch": 0.81, "grad_norm": 0.2805326123236495, "learning_rate": 1.8217295791382261e-06, "loss": 0.1528, "step": 17647 }, { "epoch": 0.81, "grad_norm": 0.4352743875620717, "learning_rate": 1.82087342557418e-06, "loss": 0.2808, "step": 17648 }, { "epoch": 0.81, "grad_norm": 0.7790169323226602, "learning_rate": 1.8200174530881133e-06, "loss": 0.3017, "step": 17649 }, { "epoch": 0.81, "grad_norm": 0.3379083772434371, "learning_rate": 1.8191616616989782e-06, "loss": 0.2896, "step": 17650 }, { "epoch": 0.81, "grad_norm": 0.3211337965088575, "learning_rate": 1.8183060514257167e-06, "loss": 0.2487, "step": 17651 }, { "epoch": 0.81, "grad_norm": 1.5420266416958888, "learning_rate": 1.8174506222872767e-06, "loss": 0.2489, "step": 17652 }, { "epoch": 0.81, "grad_norm": 0.2655946380497727, "learning_rate": 1.8165953743025878e-06, "loss": 0.1493, "step": 17653 }, { "epoch": 0.81, "grad_norm": 0.332874780506257, "learning_rate": 1.8157403074905956e-06, "loss": 0.2723, "step": 17654 }, { "epoch": 0.81, "grad_norm": 0.362002043392259, "learning_rate": 1.8148854218702217e-06, "loss": 0.2535, "step": 17655 }, { "epoch": 0.81, "grad_norm": 1.2143009580367556, "learning_rate": 1.8140307174603989e-06, "loss": 0.7832, "step": 17656 }, { "epoch": 0.81, "grad_norm": 0.33763140540346065, "learning_rate": 1.8131761942800453e-06, "loss": 0.2583, "step": 17657 }, { "epoch": 0.81, "grad_norm": 0.5831521119803689, "learning_rate": 1.8123218523480758e-06, "loss": 0.3685, "step": 17658 }, { "epoch": 0.81, "grad_norm": 0.27092322695470505, "learning_rate": 1.8114676916834139e-06, "loss": 0.1438, "step": 17659 }, { "epoch": 0.81, "grad_norm": 0.35463327345565193, "learning_rate": 1.8106137123049628e-06, "loss": 0.263, "step": 17660 }, { "epoch": 0.81, "grad_norm": 0.8584246436877984, "learning_rate": 1.8097599142316335e-06, "loss": 0.388, "step": 17661 }, { "epoch": 0.81, "grad_norm": 0.3678811728474368, "learning_rate": 1.8089062974823235e-06, "loss": 0.2694, "step": 17662 }, { "epoch": 0.81, "grad_norm": 0.44958617200454826, "learning_rate": 1.808052862075933e-06, "loss": 0.2798, "step": 17663 }, { "epoch": 0.81, "grad_norm": 0.6386959312097973, "learning_rate": 1.8071996080313602e-06, "loss": 0.37, "step": 17664 }, { "epoch": 0.81, "grad_norm": 0.5092444346293096, "learning_rate": 1.806346535367488e-06, "loss": 0.2577, "step": 17665 }, { "epoch": 0.81, "grad_norm": 0.41635235272326143, "learning_rate": 1.8054936441032067e-06, "loss": 0.2653, "step": 17666 }, { "epoch": 0.81, "grad_norm": 0.2507749002943778, "learning_rate": 1.8046409342574011e-06, "loss": 0.2176, "step": 17667 }, { "epoch": 0.81, "grad_norm": 0.9539739279532073, "learning_rate": 1.803788405848944e-06, "loss": 0.5225, "step": 17668 }, { "epoch": 0.81, "grad_norm": 0.36349225309323085, "learning_rate": 1.8029360588967138e-06, "loss": 0.2642, "step": 17669 }, { "epoch": 0.81, "grad_norm": 0.41191893278888075, "learning_rate": 1.802083893419574e-06, "loss": 0.2988, "step": 17670 }, { "epoch": 0.81, "grad_norm": 1.0710021315237246, "learning_rate": 1.8012319094364005e-06, "loss": 0.4873, "step": 17671 }, { "epoch": 0.81, "grad_norm": 0.3678836032807035, "learning_rate": 1.8003801069660487e-06, "loss": 0.2172, "step": 17672 }, { "epoch": 0.81, "grad_norm": 0.49340961338601536, "learning_rate": 1.7995284860273798e-06, "loss": 0.2462, "step": 17673 }, { "epoch": 0.81, "grad_norm": 0.3792381083854042, "learning_rate": 1.7986770466392445e-06, "loss": 0.2787, "step": 17674 }, { "epoch": 0.81, "grad_norm": 0.2504452864294809, "learning_rate": 1.7978257888204953e-06, "loss": 0.1634, "step": 17675 }, { "epoch": 0.81, "grad_norm": 1.3514811993508609, "learning_rate": 1.7969747125899795e-06, "loss": 0.5379, "step": 17676 }, { "epoch": 0.81, "grad_norm": 0.7561772260861458, "learning_rate": 1.7961238179665353e-06, "loss": 0.3655, "step": 17677 }, { "epoch": 0.81, "grad_norm": 0.2431029040003532, "learning_rate": 1.7952731049690053e-06, "loss": 0.2137, "step": 17678 }, { "epoch": 0.81, "grad_norm": 0.46466035807893097, "learning_rate": 1.7944225736162192e-06, "loss": 0.241, "step": 17679 }, { "epoch": 0.81, "grad_norm": 0.542753328383535, "learning_rate": 1.793572223927007e-06, "loss": 0.3495, "step": 17680 }, { "epoch": 0.81, "grad_norm": 0.3700206169260214, "learning_rate": 1.7927220559201997e-06, "loss": 0.1599, "step": 17681 }, { "epoch": 0.81, "grad_norm": 0.34075565819934583, "learning_rate": 1.791872069614613e-06, "loss": 0.2592, "step": 17682 }, { "epoch": 0.81, "grad_norm": 0.7511123225351447, "learning_rate": 1.7910222650290688e-06, "loss": 0.3698, "step": 17683 }, { "epoch": 0.81, "grad_norm": 0.6765649088301964, "learning_rate": 1.7901726421823784e-06, "loss": 0.2942, "step": 17684 }, { "epoch": 0.81, "grad_norm": 0.38042974452598977, "learning_rate": 1.789323201093356e-06, "loss": 0.2388, "step": 17685 }, { "epoch": 0.81, "grad_norm": 0.39006188683304643, "learning_rate": 1.788473941780804e-06, "loss": 0.3299, "step": 17686 }, { "epoch": 0.81, "grad_norm": 0.2374283900611283, "learning_rate": 1.787624864263522e-06, "loss": 0.1398, "step": 17687 }, { "epoch": 0.81, "grad_norm": 0.47006743557224434, "learning_rate": 1.7867759685603115e-06, "loss": 0.2423, "step": 17688 }, { "epoch": 0.81, "grad_norm": 0.8669306379113703, "learning_rate": 1.785927254689963e-06, "loss": 0.3892, "step": 17689 }, { "epoch": 0.81, "grad_norm": 0.29801453495126184, "learning_rate": 1.785078722671273e-06, "loss": 0.2685, "step": 17690 }, { "epoch": 0.81, "grad_norm": 0.5950983629138429, "learning_rate": 1.7842303725230181e-06, "loss": 0.2448, "step": 17691 }, { "epoch": 0.81, "grad_norm": 0.4139756080363069, "learning_rate": 1.7833822042639848e-06, "loss": 0.2329, "step": 17692 }, { "epoch": 0.81, "grad_norm": 0.40393589143278463, "learning_rate": 1.7825342179129535e-06, "loss": 0.2232, "step": 17693 }, { "epoch": 0.81, "grad_norm": 0.3186701093334031, "learning_rate": 1.7816864134886914e-06, "loss": 0.233, "step": 17694 }, { "epoch": 0.81, "grad_norm": 0.822582450428089, "learning_rate": 1.7808387910099733e-06, "loss": 0.3864, "step": 17695 }, { "epoch": 0.81, "grad_norm": 0.4742008085026481, "learning_rate": 1.7799913504955614e-06, "loss": 0.2775, "step": 17696 }, { "epoch": 0.81, "grad_norm": 0.6102588593108411, "learning_rate": 1.7791440919642178e-06, "loss": 0.3671, "step": 17697 }, { "epoch": 0.81, "grad_norm": 0.3570115560508427, "learning_rate": 1.7782970154347025e-06, "loss": 0.2452, "step": 17698 }, { "epoch": 0.81, "grad_norm": 0.28491422470308564, "learning_rate": 1.7774501209257655e-06, "loss": 0.1967, "step": 17699 }, { "epoch": 0.81, "grad_norm": 0.48524914918459794, "learning_rate": 1.7766034084561568e-06, "loss": 0.306, "step": 17700 }, { "epoch": 0.81, "grad_norm": 0.2789205940870507, "learning_rate": 1.7757568780446232e-06, "loss": 0.198, "step": 17701 }, { "epoch": 0.81, "grad_norm": 0.6781115126906824, "learning_rate": 1.774910529709909e-06, "loss": 0.2569, "step": 17702 }, { "epoch": 0.81, "grad_norm": 0.41084639765848374, "learning_rate": 1.7740643634707454e-06, "loss": 0.294, "step": 17703 }, { "epoch": 0.81, "grad_norm": 1.2267774066712085, "learning_rate": 1.7732183793458701e-06, "loss": 0.3534, "step": 17704 }, { "epoch": 0.81, "grad_norm": 0.5808180902445617, "learning_rate": 1.772372577354009e-06, "loss": 0.3055, "step": 17705 }, { "epoch": 0.81, "grad_norm": 0.31183530555021993, "learning_rate": 1.7715269575138893e-06, "loss": 0.2675, "step": 17706 }, { "epoch": 0.81, "grad_norm": 0.26560221229782, "learning_rate": 1.770681519844235e-06, "loss": 0.118, "step": 17707 }, { "epoch": 0.81, "grad_norm": 0.9536070241645741, "learning_rate": 1.7698362643637568e-06, "loss": 0.3295, "step": 17708 }, { "epoch": 0.81, "grad_norm": 0.42477865285789024, "learning_rate": 1.7689911910911717e-06, "loss": 0.3309, "step": 17709 }, { "epoch": 0.81, "grad_norm": 0.3701407932523433, "learning_rate": 1.7681463000451914e-06, "loss": 0.3182, "step": 17710 }, { "epoch": 0.81, "grad_norm": 0.381768361429879, "learning_rate": 1.7673015912445157e-06, "loss": 0.2085, "step": 17711 }, { "epoch": 0.81, "grad_norm": 0.4984345860922187, "learning_rate": 1.7664570647078494e-06, "loss": 0.3371, "step": 17712 }, { "epoch": 0.81, "grad_norm": 0.30531401311383954, "learning_rate": 1.7656127204538842e-06, "loss": 0.1962, "step": 17713 }, { "epoch": 0.81, "grad_norm": 0.31020131915483506, "learning_rate": 1.7647685585013208e-06, "loss": 0.1945, "step": 17714 }, { "epoch": 0.81, "grad_norm": 0.5578501156965504, "learning_rate": 1.7639245788688453e-06, "loss": 0.3381, "step": 17715 }, { "epoch": 0.81, "grad_norm": 0.8049611765017568, "learning_rate": 1.7630807815751394e-06, "loss": 0.448, "step": 17716 }, { "epoch": 0.81, "grad_norm": 0.35950937631050794, "learning_rate": 1.762237166638887e-06, "loss": 0.2077, "step": 17717 }, { "epoch": 0.81, "grad_norm": 0.3525961280225382, "learning_rate": 1.7613937340787602e-06, "loss": 0.2845, "step": 17718 }, { "epoch": 0.81, "grad_norm": 0.2984720246151478, "learning_rate": 1.7605504839134414e-06, "loss": 0.1776, "step": 17719 }, { "epoch": 0.81, "grad_norm": 0.6027460774310784, "learning_rate": 1.75970741616159e-06, "loss": 0.1002, "step": 17720 }, { "epoch": 0.81, "grad_norm": 0.41835754601797753, "learning_rate": 1.7588645308418771e-06, "loss": 0.3284, "step": 17721 }, { "epoch": 0.81, "grad_norm": 0.3765028793068553, "learning_rate": 1.7580218279729578e-06, "loss": 0.2721, "step": 17722 }, { "epoch": 0.81, "grad_norm": 0.87360421727514, "learning_rate": 1.7571793075734922e-06, "loss": 0.3258, "step": 17723 }, { "epoch": 0.81, "grad_norm": 0.42520324197174275, "learning_rate": 1.7563369696621335e-06, "loss": 0.2123, "step": 17724 }, { "epoch": 0.81, "grad_norm": 0.26501758817724974, "learning_rate": 1.7554948142575279e-06, "loss": 0.1611, "step": 17725 }, { "epoch": 0.81, "grad_norm": 0.36652803384918403, "learning_rate": 1.7546528413783203e-06, "loss": 0.3135, "step": 17726 }, { "epoch": 0.81, "grad_norm": 0.331152527603275, "learning_rate": 1.7538110510431538e-06, "loss": 0.2282, "step": 17727 }, { "epoch": 0.81, "grad_norm": 0.8301649562967964, "learning_rate": 1.7529694432706611e-06, "loss": 0.3804, "step": 17728 }, { "epoch": 0.81, "grad_norm": 0.3442238028190519, "learning_rate": 1.7521280180794787e-06, "loss": 0.2934, "step": 17729 }, { "epoch": 0.81, "grad_norm": 0.35564982321423017, "learning_rate": 1.7512867754882269e-06, "loss": 0.1959, "step": 17730 }, { "epoch": 0.81, "grad_norm": 0.36965325054843795, "learning_rate": 1.7504457155155419e-06, "loss": 0.1923, "step": 17731 }, { "epoch": 0.81, "grad_norm": 0.7467330382911693, "learning_rate": 1.7496048381800347e-06, "loss": 0.3111, "step": 17732 }, { "epoch": 0.81, "grad_norm": 0.42375208239538686, "learning_rate": 1.7487641435003266e-06, "loss": 0.2467, "step": 17733 }, { "epoch": 0.81, "grad_norm": 0.37151685221926545, "learning_rate": 1.7479236314950275e-06, "loss": 0.2957, "step": 17734 }, { "epoch": 0.81, "grad_norm": 0.5784508107773185, "learning_rate": 1.7470833021827416e-06, "loss": 0.3641, "step": 17735 }, { "epoch": 0.81, "grad_norm": 0.3970732111408475, "learning_rate": 1.7462431555820824e-06, "loss": 0.2713, "step": 17736 }, { "epoch": 0.81, "grad_norm": 0.32116470295510374, "learning_rate": 1.745403191711641e-06, "loss": 0.1974, "step": 17737 }, { "epoch": 0.81, "grad_norm": 0.4897202963031287, "learning_rate": 1.7445634105900199e-06, "loss": 0.2247, "step": 17738 }, { "epoch": 0.81, "grad_norm": 0.33021291379656426, "learning_rate": 1.7437238122358058e-06, "loss": 0.258, "step": 17739 }, { "epoch": 0.81, "grad_norm": 0.701785775494587, "learning_rate": 1.742884396667589e-06, "loss": 0.2929, "step": 17740 }, { "epoch": 0.82, "grad_norm": 0.3327058984740834, "learning_rate": 1.742045163903956e-06, "loss": 0.285, "step": 17741 }, { "epoch": 0.82, "grad_norm": 0.3956223045956301, "learning_rate": 1.7412061139634818e-06, "loss": 0.3056, "step": 17742 }, { "epoch": 0.82, "grad_norm": 0.471995110324767, "learning_rate": 1.7403672468647436e-06, "loss": 0.087, "step": 17743 }, { "epoch": 0.82, "grad_norm": 0.5372273395369969, "learning_rate": 1.739528562626317e-06, "loss": 0.2153, "step": 17744 }, { "epoch": 0.82, "grad_norm": 0.3114627146732643, "learning_rate": 1.7386900612667635e-06, "loss": 0.2664, "step": 17745 }, { "epoch": 0.82, "grad_norm": 0.48873881175607214, "learning_rate": 1.7378517428046527e-06, "loss": 0.2785, "step": 17746 }, { "epoch": 0.82, "grad_norm": 0.7945965131010365, "learning_rate": 1.7370136072585354e-06, "loss": 0.4964, "step": 17747 }, { "epoch": 0.82, "grad_norm": 0.3731840462490614, "learning_rate": 1.7361756546469788e-06, "loss": 0.2641, "step": 17748 }, { "epoch": 0.82, "grad_norm": 0.5807694834893935, "learning_rate": 1.7353378849885249e-06, "loss": 0.3019, "step": 17749 }, { "epoch": 0.82, "grad_norm": 0.2505972280302774, "learning_rate": 1.7345002983017278e-06, "loss": 0.1621, "step": 17750 }, { "epoch": 0.82, "grad_norm": 0.37292998741116357, "learning_rate": 1.733662894605127e-06, "loss": 0.2549, "step": 17751 }, { "epoch": 0.82, "grad_norm": 0.8601488311666952, "learning_rate": 1.7328256739172577e-06, "loss": 0.4516, "step": 17752 }, { "epoch": 0.82, "grad_norm": 0.33941799053567356, "learning_rate": 1.7319886362566662e-06, "loss": 0.2413, "step": 17753 }, { "epoch": 0.82, "grad_norm": 0.40243529887413604, "learning_rate": 1.7311517816418732e-06, "loss": 0.2761, "step": 17754 }, { "epoch": 0.82, "grad_norm": 0.62470320810335, "learning_rate": 1.7303151100914139e-06, "loss": 0.221, "step": 17755 }, { "epoch": 0.82, "grad_norm": 0.4145387171024202, "learning_rate": 1.7294786216238046e-06, "loss": 0.1958, "step": 17756 }, { "epoch": 0.82, "grad_norm": 0.3133999132752345, "learning_rate": 1.7286423162575684e-06, "loss": 0.2616, "step": 17757 }, { "epoch": 0.82, "grad_norm": 0.4741955775317056, "learning_rate": 1.72780619401122e-06, "loss": 0.3301, "step": 17758 }, { "epoch": 0.82, "grad_norm": 1.6644666431323134, "learning_rate": 1.7269702549032686e-06, "loss": 0.3669, "step": 17759 }, { "epoch": 0.82, "grad_norm": 0.33208576014068175, "learning_rate": 1.7261344989522212e-06, "loss": 0.2272, "step": 17760 }, { "epoch": 0.82, "grad_norm": 0.5425651696241437, "learning_rate": 1.725298926176584e-06, "loss": 0.3627, "step": 17761 }, { "epoch": 0.82, "grad_norm": 0.4447944214580623, "learning_rate": 1.7244635365948514e-06, "loss": 0.297, "step": 17762 }, { "epoch": 0.82, "grad_norm": 0.26362233352828157, "learning_rate": 1.723628330225523e-06, "loss": 0.1692, "step": 17763 }, { "epoch": 0.82, "grad_norm": 0.43735889080312873, "learning_rate": 1.7227933070870828e-06, "loss": 0.2835, "step": 17764 }, { "epoch": 0.82, "grad_norm": 0.402055229788881, "learning_rate": 1.7219584671980217e-06, "loss": 0.3007, "step": 17765 }, { "epoch": 0.82, "grad_norm": 0.301676342548234, "learning_rate": 1.7211238105768213e-06, "loss": 0.1775, "step": 17766 }, { "epoch": 0.82, "grad_norm": 0.9555714589395403, "learning_rate": 1.7202893372419637e-06, "loss": 0.4004, "step": 17767 }, { "epoch": 0.82, "grad_norm": 0.4346699915048842, "learning_rate": 1.7194550472119165e-06, "loss": 0.3271, "step": 17768 }, { "epoch": 0.82, "grad_norm": 0.3212821604057173, "learning_rate": 1.7186209405051547e-06, "loss": 0.1764, "step": 17769 }, { "epoch": 0.82, "grad_norm": 0.39396947312885783, "learning_rate": 1.7177870171401455e-06, "loss": 0.2937, "step": 17770 }, { "epoch": 0.82, "grad_norm": 0.4598056144970062, "learning_rate": 1.716953277135347e-06, "loss": 0.2349, "step": 17771 }, { "epoch": 0.82, "grad_norm": 0.32399864414097207, "learning_rate": 1.7161197205092217e-06, "loss": 0.1825, "step": 17772 }, { "epoch": 0.82, "grad_norm": 0.34941272587109506, "learning_rate": 1.7152863472802195e-06, "loss": 0.2834, "step": 17773 }, { "epoch": 0.82, "grad_norm": 0.7536597952490434, "learning_rate": 1.7144531574667934e-06, "loss": 0.3791, "step": 17774 }, { "epoch": 0.82, "grad_norm": 0.3561009957030318, "learning_rate": 1.7136201510873896e-06, "loss": 0.2582, "step": 17775 }, { "epoch": 0.82, "grad_norm": 0.2636845213632994, "learning_rate": 1.7127873281604479e-06, "loss": 0.1654, "step": 17776 }, { "epoch": 0.82, "grad_norm": 0.26706442081592396, "learning_rate": 1.711954688704407e-06, "loss": 0.2247, "step": 17777 }, { "epoch": 0.82, "grad_norm": 0.3627392289694131, "learning_rate": 1.7111222327377009e-06, "loss": 0.2465, "step": 17778 }, { "epoch": 0.82, "grad_norm": 0.8064578675692119, "learning_rate": 1.7102899602787625e-06, "loss": 0.2554, "step": 17779 }, { "epoch": 0.82, "grad_norm": 0.799560787314202, "learning_rate": 1.7094578713460154e-06, "loss": 0.4069, "step": 17780 }, { "epoch": 0.82, "grad_norm": 0.30559273316010704, "learning_rate": 1.7086259659578764e-06, "loss": 0.2619, "step": 17781 }, { "epoch": 0.82, "grad_norm": 0.4439632432382285, "learning_rate": 1.7077942441327689e-06, "loss": 0.2421, "step": 17782 }, { "epoch": 0.82, "grad_norm": 0.26649800993200773, "learning_rate": 1.7069627058891036e-06, "loss": 0.1688, "step": 17783 }, { "epoch": 0.82, "grad_norm": 0.36905973293316047, "learning_rate": 1.7061313512452937e-06, "loss": 0.265, "step": 17784 }, { "epoch": 0.82, "grad_norm": 0.44073531968664775, "learning_rate": 1.7053001802197388e-06, "loss": 0.3191, "step": 17785 }, { "epoch": 0.82, "grad_norm": 0.9134309249204701, "learning_rate": 1.7044691928308442e-06, "loss": 0.278, "step": 17786 }, { "epoch": 0.82, "grad_norm": 0.4370964119941887, "learning_rate": 1.7036383890970087e-06, "loss": 0.2717, "step": 17787 }, { "epoch": 0.82, "grad_norm": 0.576710861000084, "learning_rate": 1.7028077690366208e-06, "loss": 0.388, "step": 17788 }, { "epoch": 0.82, "grad_norm": 0.2197662206213494, "learning_rate": 1.7019773326680745e-06, "loss": 0.1826, "step": 17789 }, { "epoch": 0.82, "grad_norm": 0.40852248639628297, "learning_rate": 1.7011470800097496e-06, "loss": 0.2431, "step": 17790 }, { "epoch": 0.82, "grad_norm": 0.5575406774301187, "learning_rate": 1.7003170110800294e-06, "loss": 0.3108, "step": 17791 }, { "epoch": 0.82, "grad_norm": 1.5156084449621376, "learning_rate": 1.6994871258972944e-06, "loss": 0.2233, "step": 17792 }, { "epoch": 0.82, "grad_norm": 0.2918184532838313, "learning_rate": 1.6986574244799114e-06, "loss": 0.2438, "step": 17793 }, { "epoch": 0.82, "grad_norm": 0.47466923834266783, "learning_rate": 1.6978279068462544e-06, "loss": 0.3495, "step": 17794 }, { "epoch": 0.82, "grad_norm": 0.4108204334536216, "learning_rate": 1.696998573014682e-06, "loss": 0.1442, "step": 17795 }, { "epoch": 0.82, "grad_norm": 0.36090114187058847, "learning_rate": 1.696169423003563e-06, "loss": 0.2652, "step": 17796 }, { "epoch": 0.82, "grad_norm": 0.37175798937258103, "learning_rate": 1.6953404568312458e-06, "loss": 0.2998, "step": 17797 }, { "epoch": 0.82, "grad_norm": 0.5324461850004761, "learning_rate": 1.6945116745160906e-06, "loss": 0.2142, "step": 17798 }, { "epoch": 0.82, "grad_norm": 0.35554553130911054, "learning_rate": 1.69368307607644e-06, "loss": 0.1887, "step": 17799 }, { "epoch": 0.82, "grad_norm": 1.2674762491920029, "learning_rate": 1.6928546615306396e-06, "loss": 0.7728, "step": 17800 }, { "epoch": 0.82, "grad_norm": 0.40724388384717003, "learning_rate": 1.6920264308970325e-06, "loss": 0.2916, "step": 17801 }, { "epoch": 0.82, "grad_norm": 0.3825915363083655, "learning_rate": 1.6911983841939516e-06, "loss": 0.2198, "step": 17802 }, { "epoch": 0.82, "grad_norm": 0.48858148806069507, "learning_rate": 1.6903705214397292e-06, "loss": 0.2055, "step": 17803 }, { "epoch": 0.82, "grad_norm": 0.36413767013245807, "learning_rate": 1.6895428426526972e-06, "loss": 0.2113, "step": 17804 }, { "epoch": 0.82, "grad_norm": 0.305373038986419, "learning_rate": 1.6887153478511753e-06, "loss": 0.1904, "step": 17805 }, { "epoch": 0.82, "grad_norm": 0.5423464201197744, "learning_rate": 1.6878880370534866e-06, "loss": 0.3924, "step": 17806 }, { "epoch": 0.82, "grad_norm": 0.650601090977567, "learning_rate": 1.6870609102779411e-06, "loss": 0.3759, "step": 17807 }, { "epoch": 0.82, "grad_norm": 0.4105255267071267, "learning_rate": 1.6862339675428595e-06, "loss": 0.1963, "step": 17808 }, { "epoch": 0.82, "grad_norm": 0.2741041889130225, "learning_rate": 1.6854072088665453e-06, "loss": 0.246, "step": 17809 }, { "epoch": 0.82, "grad_norm": 0.6111350692786954, "learning_rate": 1.6845806342672988e-06, "loss": 0.2589, "step": 17810 }, { "epoch": 0.82, "grad_norm": 1.545052669473046, "learning_rate": 1.6837542437634257e-06, "loss": 0.3738, "step": 17811 }, { "epoch": 0.82, "grad_norm": 0.3231036589608299, "learning_rate": 1.6829280373732126e-06, "loss": 0.2486, "step": 17812 }, { "epoch": 0.82, "grad_norm": 0.5226476089498232, "learning_rate": 1.6821020151149624e-06, "loss": 0.3186, "step": 17813 }, { "epoch": 0.82, "grad_norm": 0.44556821877797, "learning_rate": 1.6812761770069541e-06, "loss": 0.2752, "step": 17814 }, { "epoch": 0.82, "grad_norm": 0.403442173435759, "learning_rate": 1.680450523067475e-06, "loss": 0.1572, "step": 17815 }, { "epoch": 0.82, "grad_norm": 0.3736816746893032, "learning_rate": 1.6796250533148018e-06, "loss": 0.2502, "step": 17816 }, { "epoch": 0.82, "grad_norm": 0.33994970836641814, "learning_rate": 1.6787997677672096e-06, "loss": 0.2656, "step": 17817 }, { "epoch": 0.82, "grad_norm": 0.4869682761866183, "learning_rate": 1.6779746664429731e-06, "loss": 0.2679, "step": 17818 }, { "epoch": 0.82, "grad_norm": 0.5703996382305457, "learning_rate": 1.677149749360355e-06, "loss": 0.3701, "step": 17819 }, { "epoch": 0.82, "grad_norm": 0.42672419321492977, "learning_rate": 1.6763250165376189e-06, "loss": 0.2422, "step": 17820 }, { "epoch": 0.82, "grad_norm": 0.2800836265997891, "learning_rate": 1.6755004679930275e-06, "loss": 0.2236, "step": 17821 }, { "epoch": 0.82, "grad_norm": 0.37172475002008415, "learning_rate": 1.674676103744829e-06, "loss": 0.1847, "step": 17822 }, { "epoch": 0.82, "grad_norm": 0.8152765973584594, "learning_rate": 1.6738519238112816e-06, "loss": 0.469, "step": 17823 }, { "epoch": 0.82, "grad_norm": 0.43457656158432384, "learning_rate": 1.6730279282106243e-06, "loss": 0.2772, "step": 17824 }, { "epoch": 0.82, "grad_norm": 0.3272671552209532, "learning_rate": 1.6722041169611026e-06, "loss": 0.2461, "step": 17825 }, { "epoch": 0.82, "grad_norm": 0.7240647603038967, "learning_rate": 1.6713804900809583e-06, "loss": 0.3525, "step": 17826 }, { "epoch": 0.82, "grad_norm": 0.45592062277616324, "learning_rate": 1.6705570475884203e-06, "loss": 0.2847, "step": 17827 }, { "epoch": 0.82, "grad_norm": 0.22970093704553485, "learning_rate": 1.669733789501724e-06, "loss": 0.1682, "step": 17828 }, { "epoch": 0.82, "grad_norm": 0.6226835195021604, "learning_rate": 1.6689107158390872e-06, "loss": 0.3588, "step": 17829 }, { "epoch": 0.82, "grad_norm": 0.41021067152568763, "learning_rate": 1.6680878266187428e-06, "loss": 0.3012, "step": 17830 }, { "epoch": 0.82, "grad_norm": 0.7869382915379514, "learning_rate": 1.6672651218589008e-06, "loss": 0.315, "step": 17831 }, { "epoch": 0.82, "grad_norm": 0.4323179329286414, "learning_rate": 1.6664426015777801e-06, "loss": 0.292, "step": 17832 }, { "epoch": 0.82, "grad_norm": 0.33096288125583884, "learning_rate": 1.6656202657935872e-06, "loss": 0.2668, "step": 17833 }, { "epoch": 0.82, "grad_norm": 0.2932062281169387, "learning_rate": 1.6647981145245273e-06, "loss": 0.087, "step": 17834 }, { "epoch": 0.82, "grad_norm": 0.4590536239644486, "learning_rate": 1.663976147788806e-06, "loss": 0.2593, "step": 17835 }, { "epoch": 0.82, "grad_norm": 0.5055173210434944, "learning_rate": 1.6631543656046167e-06, "loss": 0.3035, "step": 17836 }, { "epoch": 0.82, "grad_norm": 0.37766134460218065, "learning_rate": 1.6623327679901547e-06, "loss": 0.2997, "step": 17837 }, { "epoch": 0.82, "grad_norm": 0.38606130663615457, "learning_rate": 1.661511354963612e-06, "loss": 0.1785, "step": 17838 }, { "epoch": 0.82, "grad_norm": 0.5937270701780326, "learning_rate": 1.6606901265431675e-06, "loss": 0.3357, "step": 17839 }, { "epoch": 0.82, "grad_norm": 0.22344837592968383, "learning_rate": 1.6598690827470088e-06, "loss": 0.2021, "step": 17840 }, { "epoch": 0.82, "grad_norm": 0.860713818733959, "learning_rate": 1.659048223593308e-06, "loss": 0.2428, "step": 17841 }, { "epoch": 0.82, "grad_norm": 0.41166046914139925, "learning_rate": 1.6582275491002408e-06, "loss": 0.2868, "step": 17842 }, { "epoch": 0.82, "grad_norm": 0.992824793993233, "learning_rate": 1.657407059285976e-06, "loss": 0.5139, "step": 17843 }, { "epoch": 0.82, "grad_norm": 0.3412459915659717, "learning_rate": 1.6565867541686798e-06, "loss": 0.2212, "step": 17844 }, { "epoch": 0.82, "grad_norm": 0.37897388800693155, "learning_rate": 1.6557666337665124e-06, "loss": 0.2815, "step": 17845 }, { "epoch": 0.82, "grad_norm": 0.34853020672805135, "learning_rate": 1.6549466980976237e-06, "loss": 0.1765, "step": 17846 }, { "epoch": 0.82, "grad_norm": 0.563761401117358, "learning_rate": 1.6541269471801768e-06, "loss": 0.2291, "step": 17847 }, { "epoch": 0.82, "grad_norm": 0.33143211204399237, "learning_rate": 1.6533073810323142e-06, "loss": 0.289, "step": 17848 }, { "epoch": 0.82, "grad_norm": 0.517674384679477, "learning_rate": 1.652487999672182e-06, "loss": 0.37, "step": 17849 }, { "epoch": 0.82, "grad_norm": 0.8859703552231626, "learning_rate": 1.6516688031179195e-06, "loss": 0.4325, "step": 17850 }, { "epoch": 0.82, "grad_norm": 0.3909297581171697, "learning_rate": 1.650849791387662e-06, "loss": 0.2075, "step": 17851 }, { "epoch": 0.82, "grad_norm": 0.2902579496865952, "learning_rate": 1.6500309644995472e-06, "loss": 0.2392, "step": 17852 }, { "epoch": 0.82, "grad_norm": 0.5754277019411785, "learning_rate": 1.649212322471695e-06, "loss": 0.3288, "step": 17853 }, { "epoch": 0.82, "grad_norm": 0.26326085751444434, "learning_rate": 1.6483938653222364e-06, "loss": 0.1597, "step": 17854 }, { "epoch": 0.82, "grad_norm": 1.255700334265664, "learning_rate": 1.647575593069286e-06, "loss": 0.7306, "step": 17855 }, { "epoch": 0.82, "grad_norm": 0.31558366344555205, "learning_rate": 1.6467575057309614e-06, "loss": 0.2488, "step": 17856 }, { "epoch": 0.82, "grad_norm": 0.42782537834153983, "learning_rate": 1.6459396033253784e-06, "loss": 0.216, "step": 17857 }, { "epoch": 0.82, "grad_norm": 0.6761606614578485, "learning_rate": 1.6451218858706374e-06, "loss": 0.3944, "step": 17858 }, { "epoch": 0.82, "grad_norm": 0.4598650116273263, "learning_rate": 1.6443043533848446e-06, "loss": 0.2522, "step": 17859 }, { "epoch": 0.82, "grad_norm": 0.28346926439006015, "learning_rate": 1.6434870058861009e-06, "loss": 0.2183, "step": 17860 }, { "epoch": 0.82, "grad_norm": 0.3349176063453951, "learning_rate": 1.6426698433925038e-06, "loss": 0.2566, "step": 17861 }, { "epoch": 0.82, "grad_norm": 1.7624453742411625, "learning_rate": 1.6418528659221378e-06, "loss": 0.7206, "step": 17862 }, { "epoch": 0.82, "grad_norm": 0.3992551923522384, "learning_rate": 1.6410360734930942e-06, "loss": 0.2487, "step": 17863 }, { "epoch": 0.82, "grad_norm": 0.41513380094620994, "learning_rate": 1.6402194661234583e-06, "loss": 0.2543, "step": 17864 }, { "epoch": 0.82, "grad_norm": 0.7469366810898178, "learning_rate": 1.6394030438313025e-06, "loss": 0.4091, "step": 17865 }, { "epoch": 0.82, "grad_norm": 0.37338571349379807, "learning_rate": 1.6385868066347088e-06, "loss": 0.293, "step": 17866 }, { "epoch": 0.82, "grad_norm": 0.2879213692108565, "learning_rate": 1.6377707545517418e-06, "loss": 0.1464, "step": 17867 }, { "epoch": 0.82, "grad_norm": 0.28329991872660953, "learning_rate": 1.6369548876004704e-06, "loss": 0.2464, "step": 17868 }, { "epoch": 0.82, "grad_norm": 0.40121960531534223, "learning_rate": 1.63613920579896e-06, "loss": 0.2621, "step": 17869 }, { "epoch": 0.82, "grad_norm": 0.7076156079393486, "learning_rate": 1.6353237091652641e-06, "loss": 0.2909, "step": 17870 }, { "epoch": 0.82, "grad_norm": 0.8118680037477919, "learning_rate": 1.6345083977174414e-06, "loss": 0.3401, "step": 17871 }, { "epoch": 0.82, "grad_norm": 0.3655126024601235, "learning_rate": 1.6336932714735354e-06, "loss": 0.2501, "step": 17872 }, { "epoch": 0.82, "grad_norm": 0.2679541697813635, "learning_rate": 1.6328783304516016e-06, "loss": 0.2059, "step": 17873 }, { "epoch": 0.82, "grad_norm": 0.4775629973981503, "learning_rate": 1.6320635746696768e-06, "loss": 0.1873, "step": 17874 }, { "epoch": 0.82, "grad_norm": 0.39955514583744095, "learning_rate": 1.6312490041457973e-06, "loss": 0.2956, "step": 17875 }, { "epoch": 0.82, "grad_norm": 0.3997085373400413, "learning_rate": 1.6304346188979992e-06, "loss": 0.2877, "step": 17876 }, { "epoch": 0.82, "grad_norm": 0.7344360028403284, "learning_rate": 1.6296204189443121e-06, "loss": 0.2303, "step": 17877 }, { "epoch": 0.82, "grad_norm": 0.38044339631923324, "learning_rate": 1.628806404302763e-06, "loss": 0.2834, "step": 17878 }, { "epoch": 0.82, "grad_norm": 0.613118892334737, "learning_rate": 1.6279925749913693e-06, "loss": 0.3369, "step": 17879 }, { "epoch": 0.82, "grad_norm": 0.28436663447557947, "learning_rate": 1.6271789310281515e-06, "loss": 0.1805, "step": 17880 }, { "epoch": 0.82, "grad_norm": 0.3611682736987512, "learning_rate": 1.626365472431125e-06, "loss": 0.2432, "step": 17881 }, { "epoch": 0.82, "grad_norm": 0.7542239247415545, "learning_rate": 1.6255521992182942e-06, "loss": 0.3695, "step": 17882 }, { "epoch": 0.82, "grad_norm": 0.4510411912447935, "learning_rate": 1.6247391114076683e-06, "loss": 0.2421, "step": 17883 }, { "epoch": 0.82, "grad_norm": 0.2963030192975179, "learning_rate": 1.6239262090172436e-06, "loss": 0.2467, "step": 17884 }, { "epoch": 0.82, "grad_norm": 1.19114947788652, "learning_rate": 1.6231134920650193e-06, "loss": 0.6559, "step": 17885 }, { "epoch": 0.82, "grad_norm": 0.3267237945608854, "learning_rate": 1.622300960568992e-06, "loss": 0.1688, "step": 17886 }, { "epoch": 0.82, "grad_norm": 0.3474136105216397, "learning_rate": 1.6214886145471442e-06, "loss": 0.2554, "step": 17887 }, { "epoch": 0.82, "grad_norm": 0.40883381295824295, "learning_rate": 1.6206764540174657e-06, "loss": 0.2664, "step": 17888 }, { "epoch": 0.82, "grad_norm": 1.3823921732088091, "learning_rate": 1.619864478997929e-06, "loss": 0.5244, "step": 17889 }, { "epoch": 0.82, "grad_norm": 0.3028639446923094, "learning_rate": 1.6190526895065205e-06, "loss": 0.1794, "step": 17890 }, { "epoch": 0.82, "grad_norm": 1.3006765277034078, "learning_rate": 1.6182410855612085e-06, "loss": 0.7605, "step": 17891 }, { "epoch": 0.82, "grad_norm": 0.311274194836303, "learning_rate": 1.6174296671799571e-06, "loss": 0.2478, "step": 17892 }, { "epoch": 0.82, "grad_norm": 0.3127615542952781, "learning_rate": 1.6166184343807346e-06, "loss": 0.1825, "step": 17893 }, { "epoch": 0.82, "grad_norm": 0.49404070419757556, "learning_rate": 1.6158073871814995e-06, "loss": 0.2602, "step": 17894 }, { "epoch": 0.82, "grad_norm": 0.40027603779981735, "learning_rate": 1.614996525600211e-06, "loss": 0.294, "step": 17895 }, { "epoch": 0.82, "grad_norm": 0.3310231112688102, "learning_rate": 1.6141858496548147e-06, "loss": 0.1816, "step": 17896 }, { "epoch": 0.82, "grad_norm": 1.1545558204261583, "learning_rate": 1.6133753593632617e-06, "loss": 0.5579, "step": 17897 }, { "epoch": 0.82, "grad_norm": 0.7037685351075302, "learning_rate": 1.6125650547434979e-06, "loss": 0.3521, "step": 17898 }, { "epoch": 0.82, "grad_norm": 0.3716704635905693, "learning_rate": 1.6117549358134566e-06, "loss": 0.2536, "step": 17899 }, { "epoch": 0.82, "grad_norm": 0.2240276948034991, "learning_rate": 1.61094500259108e-06, "loss": 0.1668, "step": 17900 }, { "epoch": 0.82, "grad_norm": 1.5080092059926637, "learning_rate": 1.6101352550942916e-06, "loss": 0.7888, "step": 17901 }, { "epoch": 0.82, "grad_norm": 0.32581939872473537, "learning_rate": 1.609325693341024e-06, "loss": 0.2406, "step": 17902 }, { "epoch": 0.82, "grad_norm": 0.9279723157649599, "learning_rate": 1.6085163173492003e-06, "loss": 0.2958, "step": 17903 }, { "epoch": 0.82, "grad_norm": 0.3523750429180116, "learning_rate": 1.607707127136734e-06, "loss": 0.2902, "step": 17904 }, { "epoch": 0.82, "grad_norm": 0.34463953375991896, "learning_rate": 1.6068981227215475e-06, "loss": 0.2261, "step": 17905 }, { "epoch": 0.82, "grad_norm": 0.16085512172830718, "learning_rate": 1.6060893041215409e-06, "loss": 0.0695, "step": 17906 }, { "epoch": 0.82, "grad_norm": 0.38735692700757773, "learning_rate": 1.6052806713546321e-06, "loss": 0.3113, "step": 17907 }, { "epoch": 0.82, "grad_norm": 0.39905830246245694, "learning_rate": 1.6044722244387168e-06, "loss": 0.2612, "step": 17908 }, { "epoch": 0.82, "grad_norm": 0.496069079335485, "learning_rate": 1.6036639633916962e-06, "loss": 0.2734, "step": 17909 }, { "epoch": 0.82, "grad_norm": 0.6833809446227312, "learning_rate": 1.6028558882314604e-06, "loss": 0.3682, "step": 17910 }, { "epoch": 0.82, "grad_norm": 0.41285248307055894, "learning_rate": 1.602047998975903e-06, "loss": 0.2364, "step": 17911 }, { "epoch": 0.82, "grad_norm": 0.2767450941096929, "learning_rate": 1.6012402956429107e-06, "loss": 0.2414, "step": 17912 }, { "epoch": 0.82, "grad_norm": 0.47754686609990277, "learning_rate": 1.6004327782503603e-06, "loss": 0.1269, "step": 17913 }, { "epoch": 0.82, "grad_norm": 0.4124364330820471, "learning_rate": 1.5996254468161364e-06, "loss": 0.2695, "step": 17914 }, { "epoch": 0.82, "grad_norm": 0.48550868702886113, "learning_rate": 1.5988183013581048e-06, "loss": 0.3566, "step": 17915 }, { "epoch": 0.82, "grad_norm": 0.33709795998759534, "learning_rate": 1.59801134189414e-06, "loss": 0.2433, "step": 17916 }, { "epoch": 0.82, "grad_norm": 0.45992753936688874, "learning_rate": 1.5972045684421078e-06, "loss": 0.2539, "step": 17917 }, { "epoch": 0.82, "grad_norm": 0.2862187329897796, "learning_rate": 1.596397981019866e-06, "loss": 0.1682, "step": 17918 }, { "epoch": 0.82, "grad_norm": 0.5779185592869104, "learning_rate": 1.5955915796452736e-06, "loss": 0.2747, "step": 17919 }, { "epoch": 0.82, "grad_norm": 0.30298275328984553, "learning_rate": 1.5947853643361844e-06, "loss": 0.246, "step": 17920 }, { "epoch": 0.82, "grad_norm": 1.0690317283049726, "learning_rate": 1.5939793351104448e-06, "loss": 0.4099, "step": 17921 }, { "epoch": 0.82, "grad_norm": 0.7684602263393544, "learning_rate": 1.5931734919859033e-06, "loss": 0.2886, "step": 17922 }, { "epoch": 0.82, "grad_norm": 0.34379290855285777, "learning_rate": 1.5923678349803928e-06, "loss": 0.2549, "step": 17923 }, { "epoch": 0.82, "grad_norm": 0.3655213771276033, "learning_rate": 1.5915623641117605e-06, "loss": 0.2695, "step": 17924 }, { "epoch": 0.82, "grad_norm": 0.3069460237471101, "learning_rate": 1.5907570793978312e-06, "loss": 0.1915, "step": 17925 }, { "epoch": 0.82, "grad_norm": 0.4045490548556977, "learning_rate": 1.5899519808564368e-06, "loss": 0.2111, "step": 17926 }, { "epoch": 0.82, "grad_norm": 0.5325397743430399, "learning_rate": 1.589147068505398e-06, "loss": 0.3416, "step": 17927 }, { "epoch": 0.82, "grad_norm": 0.3751408190321858, "learning_rate": 1.5883423423625356e-06, "loss": 0.3053, "step": 17928 }, { "epoch": 0.82, "grad_norm": 0.9431345751184442, "learning_rate": 1.58753780244567e-06, "loss": 0.1269, "step": 17929 }, { "epoch": 0.82, "grad_norm": 0.31636453520879515, "learning_rate": 1.586733448772606e-06, "loss": 0.2335, "step": 17930 }, { "epoch": 0.82, "grad_norm": 0.303927579614148, "learning_rate": 1.5859292813611583e-06, "loss": 0.2493, "step": 17931 }, { "epoch": 0.82, "grad_norm": 0.3920819278512701, "learning_rate": 1.5851253002291234e-06, "loss": 0.197, "step": 17932 }, { "epoch": 0.82, "grad_norm": 0.4697931796273447, "learning_rate": 1.584321505394304e-06, "loss": 0.3398, "step": 17933 }, { "epoch": 0.82, "grad_norm": 1.325875353619506, "learning_rate": 1.583517896874498e-06, "loss": 0.5778, "step": 17934 }, { "epoch": 0.82, "grad_norm": 0.3481727455095071, "learning_rate": 1.5827144746874912e-06, "loss": 0.2024, "step": 17935 }, { "epoch": 0.82, "grad_norm": 0.31377243391835863, "learning_rate": 1.5819112388510739e-06, "loss": 0.2377, "step": 17936 }, { "epoch": 0.82, "grad_norm": 0.5327879998446331, "learning_rate": 1.5811081893830272e-06, "loss": 0.279, "step": 17937 }, { "epoch": 0.82, "grad_norm": 0.5507448121471459, "learning_rate": 1.5803053263011348e-06, "loss": 0.3464, "step": 17938 }, { "epoch": 0.82, "grad_norm": 0.37863610365997197, "learning_rate": 1.5795026496231658e-06, "loss": 0.235, "step": 17939 }, { "epoch": 0.82, "grad_norm": 0.36617158167496144, "learning_rate": 1.5787001593668882e-06, "loss": 0.3091, "step": 17940 }, { "epoch": 0.82, "grad_norm": 0.48393546036733776, "learning_rate": 1.577897855550078e-06, "loss": 0.2663, "step": 17941 }, { "epoch": 0.82, "grad_norm": 0.679040152182578, "learning_rate": 1.5770957381904894e-06, "loss": 0.2081, "step": 17942 }, { "epoch": 0.82, "grad_norm": 0.23420657091476496, "learning_rate": 1.5762938073058853e-06, "loss": 0.2155, "step": 17943 }, { "epoch": 0.82, "grad_norm": 0.4405390369853548, "learning_rate": 1.5754920629140146e-06, "loss": 0.2705, "step": 17944 }, { "epoch": 0.82, "grad_norm": 0.5301000889683434, "learning_rate": 1.57469050503263e-06, "loss": 0.2776, "step": 17945 }, { "epoch": 0.82, "grad_norm": 1.205504121421561, "learning_rate": 1.5738891336794805e-06, "loss": 0.5954, "step": 17946 }, { "epoch": 0.82, "grad_norm": 0.4681851802628399, "learning_rate": 1.5730879488723005e-06, "loss": 0.333, "step": 17947 }, { "epoch": 0.82, "grad_norm": 0.29761587772035697, "learning_rate": 1.5722869506288352e-06, "loss": 0.2243, "step": 17948 }, { "epoch": 0.82, "grad_norm": 0.48632445179820405, "learning_rate": 1.5714861389668113e-06, "loss": 0.2964, "step": 17949 }, { "epoch": 0.82, "grad_norm": 0.3787466845174593, "learning_rate": 1.57068551390396e-06, "loss": 0.2215, "step": 17950 }, { "epoch": 0.82, "grad_norm": 0.31654961738205817, "learning_rate": 1.5698850754580108e-06, "loss": 0.256, "step": 17951 }, { "epoch": 0.82, "grad_norm": 0.5546867662513121, "learning_rate": 1.569084823646677e-06, "loss": 0.2546, "step": 17952 }, { "epoch": 0.82, "grad_norm": 0.8726533284776277, "learning_rate": 1.5682847584876803e-06, "loss": 0.2997, "step": 17953 }, { "epoch": 0.82, "grad_norm": 0.4124420279017348, "learning_rate": 1.567484879998733e-06, "loss": 0.288, "step": 17954 }, { "epoch": 0.82, "grad_norm": 0.3617669801682913, "learning_rate": 1.5666851881975453e-06, "loss": 0.246, "step": 17955 }, { "epoch": 0.82, "grad_norm": 0.36132392601742513, "learning_rate": 1.5658856831018188e-06, "loss": 0.206, "step": 17956 }, { "epoch": 0.82, "grad_norm": 0.4166474962705854, "learning_rate": 1.5650863647292491e-06, "loss": 0.2949, "step": 17957 }, { "epoch": 0.82, "grad_norm": 0.4880196400892418, "learning_rate": 1.5642872330975434e-06, "loss": 0.1472, "step": 17958 }, { "epoch": 0.83, "grad_norm": 0.3211861643900025, "learning_rate": 1.5634882882243852e-06, "loss": 0.265, "step": 17959 }, { "epoch": 0.83, "grad_norm": 0.4263384774921326, "learning_rate": 1.562689530127468e-06, "loss": 0.306, "step": 17960 }, { "epoch": 0.83, "grad_norm": 0.576198421213249, "learning_rate": 1.561890958824469e-06, "loss": 0.2882, "step": 17961 }, { "epoch": 0.83, "grad_norm": 0.4535275798234041, "learning_rate": 1.561092574333073e-06, "loss": 0.3038, "step": 17962 }, { "epoch": 0.83, "grad_norm": 0.37847337169368267, "learning_rate": 1.5602943766709543e-06, "loss": 0.3051, "step": 17963 }, { "epoch": 0.83, "grad_norm": 0.25625915474582045, "learning_rate": 1.5594963658557827e-06, "loss": 0.202, "step": 17964 }, { "epoch": 0.83, "grad_norm": 0.9363420086752337, "learning_rate": 1.558698541905229e-06, "loss": 0.1394, "step": 17965 }, { "epoch": 0.83, "grad_norm": 0.37323717393903505, "learning_rate": 1.5579009048369486e-06, "loss": 0.2702, "step": 17966 }, { "epoch": 0.83, "grad_norm": 0.3726239705999417, "learning_rate": 1.5571034546686102e-06, "loss": 0.3107, "step": 17967 }, { "epoch": 0.83, "grad_norm": 0.4787336475324272, "learning_rate": 1.5563061914178646e-06, "loss": 0.133, "step": 17968 }, { "epoch": 0.83, "grad_norm": 0.37590661824669397, "learning_rate": 1.5555091151023594e-06, "loss": 0.2732, "step": 17969 }, { "epoch": 0.83, "grad_norm": 0.2979672090426112, "learning_rate": 1.554712225739743e-06, "loss": 0.1843, "step": 17970 }, { "epoch": 0.83, "grad_norm": 0.3365451715714415, "learning_rate": 1.5539155233476576e-06, "loss": 0.2238, "step": 17971 }, { "epoch": 0.83, "grad_norm": 0.3736541990261076, "learning_rate": 1.5531190079437453e-06, "loss": 0.2744, "step": 17972 }, { "epoch": 0.83, "grad_norm": 1.0442771677003688, "learning_rate": 1.5523226795456349e-06, "loss": 0.4375, "step": 17973 }, { "epoch": 0.83, "grad_norm": 0.6537473178825682, "learning_rate": 1.5515265381709598e-06, "loss": 0.1686, "step": 17974 }, { "epoch": 0.83, "grad_norm": 0.3234888021798698, "learning_rate": 1.5507305838373432e-06, "loss": 0.2741, "step": 17975 }, { "epoch": 0.83, "grad_norm": 0.3566459997427665, "learning_rate": 1.5499348165624073e-06, "loss": 0.2476, "step": 17976 }, { "epoch": 0.83, "grad_norm": 0.5569656167947696, "learning_rate": 1.5491392363637724e-06, "loss": 0.244, "step": 17977 }, { "epoch": 0.83, "grad_norm": 0.36361025316090034, "learning_rate": 1.548343843259048e-06, "loss": 0.217, "step": 17978 }, { "epoch": 0.83, "grad_norm": 0.3673220953253236, "learning_rate": 1.5475486372658444e-06, "loss": 0.3047, "step": 17979 }, { "epoch": 0.83, "grad_norm": 1.1491715145411694, "learning_rate": 1.5467536184017696e-06, "loss": 0.4374, "step": 17980 }, { "epoch": 0.83, "grad_norm": 0.3494019559362568, "learning_rate": 1.5459587866844205e-06, "loss": 0.2069, "step": 17981 }, { "epoch": 0.83, "grad_norm": 0.3439186344714828, "learning_rate": 1.545164142131399e-06, "loss": 0.2052, "step": 17982 }, { "epoch": 0.83, "grad_norm": 0.35360798071161786, "learning_rate": 1.5443696847602884e-06, "loss": 0.2704, "step": 17983 }, { "epoch": 0.83, "grad_norm": 0.35986972487194413, "learning_rate": 1.5435754145886882e-06, "loss": 0.2224, "step": 17984 }, { "epoch": 0.83, "grad_norm": 1.4340539270322243, "learning_rate": 1.5427813316341789e-06, "loss": 0.535, "step": 17985 }, { "epoch": 0.83, "grad_norm": 1.2279919830980677, "learning_rate": 1.5419874359143361e-06, "loss": 0.6776, "step": 17986 }, { "epoch": 0.83, "grad_norm": 0.24583901102301525, "learning_rate": 1.5411937274467404e-06, "loss": 0.2037, "step": 17987 }, { "epoch": 0.83, "grad_norm": 1.0286371821761173, "learning_rate": 1.5404002062489631e-06, "loss": 0.4332, "step": 17988 }, { "epoch": 0.83, "grad_norm": 0.4459414528282251, "learning_rate": 1.5396068723385737e-06, "loss": 0.2678, "step": 17989 }, { "epoch": 0.83, "grad_norm": 0.2494730044153878, "learning_rate": 1.5388137257331315e-06, "loss": 0.2031, "step": 17990 }, { "epoch": 0.83, "grad_norm": 0.41660481137766436, "learning_rate": 1.5380207664502e-06, "loss": 0.2275, "step": 17991 }, { "epoch": 0.83, "grad_norm": 1.314096869717473, "learning_rate": 1.537227994507332e-06, "loss": 0.7665, "step": 17992 }, { "epoch": 0.83, "grad_norm": 0.3719516520908303, "learning_rate": 1.536435409922079e-06, "loss": 0.2661, "step": 17993 }, { "epoch": 0.83, "grad_norm": 0.8486301665467313, "learning_rate": 1.5356430127119915e-06, "loss": 0.2801, "step": 17994 }, { "epoch": 0.83, "grad_norm": 0.3566478625770529, "learning_rate": 1.5348508028946063e-06, "loss": 0.2793, "step": 17995 }, { "epoch": 0.83, "grad_norm": 0.3905255356993447, "learning_rate": 1.5340587804874662e-06, "loss": 0.2722, "step": 17996 }, { "epoch": 0.83, "grad_norm": 0.3409118201937726, "learning_rate": 1.5332669455081074e-06, "loss": 0.0953, "step": 17997 }, { "epoch": 0.83, "grad_norm": 0.4478902739668914, "learning_rate": 1.5324752979740566e-06, "loss": 0.3501, "step": 17998 }, { "epoch": 0.83, "grad_norm": 0.39586152571393807, "learning_rate": 1.5316838379028431e-06, "loss": 0.2712, "step": 17999 }, { "epoch": 0.83, "grad_norm": 0.49855811886793444, "learning_rate": 1.5308925653119822e-06, "loss": 0.2667, "step": 18000 }, { "epoch": 0.83, "grad_norm": 1.0649299231188127, "learning_rate": 1.5301014802190027e-06, "loss": 0.4559, "step": 18001 }, { "epoch": 0.83, "grad_norm": 0.27059790379208454, "learning_rate": 1.5293105826414112e-06, "loss": 0.2079, "step": 18002 }, { "epoch": 0.83, "grad_norm": 0.28536241244279165, "learning_rate": 1.5285198725967209e-06, "loss": 0.2355, "step": 18003 }, { "epoch": 0.83, "grad_norm": 0.9910399167787206, "learning_rate": 1.5277293501024359e-06, "loss": 0.465, "step": 18004 }, { "epoch": 0.83, "grad_norm": 0.3205322066269283, "learning_rate": 1.526939015176052e-06, "loss": 0.2508, "step": 18005 }, { "epoch": 0.83, "grad_norm": 0.6658483011912187, "learning_rate": 1.5261488678350777e-06, "loss": 0.3527, "step": 18006 }, { "epoch": 0.83, "grad_norm": 0.3445343693624867, "learning_rate": 1.5253589080969976e-06, "loss": 0.2354, "step": 18007 }, { "epoch": 0.83, "grad_norm": 0.36352895318265754, "learning_rate": 1.5245691359793058e-06, "loss": 0.2554, "step": 18008 }, { "epoch": 0.83, "grad_norm": 0.3055318767201801, "learning_rate": 1.5237795514994813e-06, "loss": 0.1422, "step": 18009 }, { "epoch": 0.83, "grad_norm": 0.34368424022846283, "learning_rate": 1.5229901546750092e-06, "loss": 0.2529, "step": 18010 }, { "epoch": 0.83, "grad_norm": 0.3982156341068539, "learning_rate": 1.5222009455233666e-06, "loss": 0.2449, "step": 18011 }, { "epoch": 0.83, "grad_norm": 0.6770964671289202, "learning_rate": 1.5214119240620217e-06, "loss": 0.3806, "step": 18012 }, { "epoch": 0.83, "grad_norm": 0.5238136152779747, "learning_rate": 1.5206230903084451e-06, "loss": 0.1592, "step": 18013 }, { "epoch": 0.83, "grad_norm": 0.46736831158562875, "learning_rate": 1.5198344442801028e-06, "loss": 0.2597, "step": 18014 }, { "epoch": 0.83, "grad_norm": 0.2558877036628446, "learning_rate": 1.5190459859944506e-06, "loss": 0.2408, "step": 18015 }, { "epoch": 0.83, "grad_norm": 0.9354042653910232, "learning_rate": 1.518257715468948e-06, "loss": 0.4837, "step": 18016 }, { "epoch": 0.83, "grad_norm": 0.30470435785872035, "learning_rate": 1.5174696327210415e-06, "loss": 0.1751, "step": 18017 }, { "epoch": 0.83, "grad_norm": 0.452660651772885, "learning_rate": 1.5166817377681854e-06, "loss": 0.3215, "step": 18018 }, { "epoch": 0.83, "grad_norm": 0.6388633506409068, "learning_rate": 1.515894030627817e-06, "loss": 0.3009, "step": 18019 }, { "epoch": 0.83, "grad_norm": 0.32695794633277264, "learning_rate": 1.5151065113173802e-06, "loss": 0.1755, "step": 18020 }, { "epoch": 0.83, "grad_norm": 0.3555260365897052, "learning_rate": 1.5143191798543056e-06, "loss": 0.1909, "step": 18021 }, { "epoch": 0.83, "grad_norm": 0.5848815781353452, "learning_rate": 1.5135320362560246e-06, "loss": 0.3377, "step": 18022 }, { "epoch": 0.83, "grad_norm": 0.285677614637501, "learning_rate": 1.512745080539968e-06, "loss": 0.2122, "step": 18023 }, { "epoch": 0.83, "grad_norm": 0.7221017003516694, "learning_rate": 1.5119583127235525e-06, "loss": 0.4568, "step": 18024 }, { "epoch": 0.83, "grad_norm": 1.559993018203462, "learning_rate": 1.5111717328242016e-06, "loss": 0.5341, "step": 18025 }, { "epoch": 0.83, "grad_norm": 0.3520882283861581, "learning_rate": 1.510385340859325e-06, "loss": 0.2597, "step": 18026 }, { "epoch": 0.83, "grad_norm": 0.27273425987896144, "learning_rate": 1.5095991368463337e-06, "loss": 0.1894, "step": 18027 }, { "epoch": 0.83, "grad_norm": 0.4523832687249976, "learning_rate": 1.5088131208026368e-06, "loss": 0.2688, "step": 18028 }, { "epoch": 0.83, "grad_norm": 0.4187177055394945, "learning_rate": 1.5080272927456318e-06, "loss": 0.2554, "step": 18029 }, { "epoch": 0.83, "grad_norm": 1.0009966271589186, "learning_rate": 1.507241652692718e-06, "loss": 0.3331, "step": 18030 }, { "epoch": 0.83, "grad_norm": 0.40522175018924755, "learning_rate": 1.5064562006612882e-06, "loss": 0.2717, "step": 18031 }, { "epoch": 0.83, "grad_norm": 0.7981159163469557, "learning_rate": 1.5056709366687339e-06, "loss": 0.3055, "step": 18032 }, { "epoch": 0.83, "grad_norm": 0.21443565623901853, "learning_rate": 1.504885860732438e-06, "loss": 0.1507, "step": 18033 }, { "epoch": 0.83, "grad_norm": 0.37373059700156125, "learning_rate": 1.5041009728697797e-06, "loss": 0.3167, "step": 18034 }, { "epoch": 0.83, "grad_norm": 0.5916156265659752, "learning_rate": 1.5033162730981376e-06, "loss": 0.3042, "step": 18035 }, { "epoch": 0.83, "grad_norm": 0.3726830230623231, "learning_rate": 1.5025317614348834e-06, "loss": 0.245, "step": 18036 }, { "epoch": 0.83, "grad_norm": 1.3153526595682334, "learning_rate": 1.5017474378973884e-06, "loss": 0.532, "step": 18037 }, { "epoch": 0.83, "grad_norm": 0.42551576049934886, "learning_rate": 1.5009633025030124e-06, "loss": 0.2431, "step": 18038 }, { "epoch": 0.83, "grad_norm": 0.26485758372611823, "learning_rate": 1.5001793552691168e-06, "loss": 0.2507, "step": 18039 }, { "epoch": 0.83, "grad_norm": 0.7826060995368694, "learning_rate": 1.4993955962130613e-06, "loss": 0.2769, "step": 18040 }, { "epoch": 0.83, "grad_norm": 0.40234363583005434, "learning_rate": 1.4986120253521919e-06, "loss": 0.2682, "step": 18041 }, { "epoch": 0.83, "grad_norm": 0.2904331226856138, "learning_rate": 1.4978286427038602e-06, "loss": 0.2369, "step": 18042 }, { "epoch": 0.83, "grad_norm": 0.5847555653994496, "learning_rate": 1.4970454482854058e-06, "loss": 0.2513, "step": 18043 }, { "epoch": 0.83, "grad_norm": 0.5166471218360574, "learning_rate": 1.4962624421141702e-06, "loss": 0.2498, "step": 18044 }, { "epoch": 0.83, "grad_norm": 0.3255879149508685, "learning_rate": 1.4954796242074897e-06, "loss": 0.2014, "step": 18045 }, { "epoch": 0.83, "grad_norm": 0.3853528154455466, "learning_rate": 1.4946969945826917e-06, "loss": 0.2634, "step": 18046 }, { "epoch": 0.83, "grad_norm": 0.35635141303676876, "learning_rate": 1.4939145532571054e-06, "loss": 0.2669, "step": 18047 }, { "epoch": 0.83, "grad_norm": 1.2512292168062669, "learning_rate": 1.4931323002480513e-06, "loss": 0.7513, "step": 18048 }, { "epoch": 0.83, "grad_norm": 0.6742257733064705, "learning_rate": 1.4923502355728525e-06, "loss": 0.1055, "step": 18049 }, { "epoch": 0.83, "grad_norm": 0.35602087466771853, "learning_rate": 1.4915683592488195e-06, "loss": 0.2303, "step": 18050 }, { "epoch": 0.83, "grad_norm": 0.35863239655841717, "learning_rate": 1.4907866712932596e-06, "loss": 0.2949, "step": 18051 }, { "epoch": 0.83, "grad_norm": 0.7233417999120992, "learning_rate": 1.4900051717234821e-06, "loss": 0.4053, "step": 18052 }, { "epoch": 0.83, "grad_norm": 0.400665555981816, "learning_rate": 1.4892238605567876e-06, "loss": 0.152, "step": 18053 }, { "epoch": 0.83, "grad_norm": 0.270502520225025, "learning_rate": 1.488442737810476e-06, "loss": 0.2393, "step": 18054 }, { "epoch": 0.83, "grad_norm": 0.4099639613242563, "learning_rate": 1.4876618035018376e-06, "loss": 0.2125, "step": 18055 }, { "epoch": 0.83, "grad_norm": 0.4991277658667054, "learning_rate": 1.4868810576481618e-06, "loss": 0.1625, "step": 18056 }, { "epoch": 0.83, "grad_norm": 0.37640319142899137, "learning_rate": 1.4861005002667361e-06, "loss": 0.2983, "step": 18057 }, { "epoch": 0.83, "grad_norm": 0.5097789753440797, "learning_rate": 1.4853201313748378e-06, "loss": 0.2985, "step": 18058 }, { "epoch": 0.83, "grad_norm": 0.3163494801727335, "learning_rate": 1.4845399509897474e-06, "loss": 0.2143, "step": 18059 }, { "epoch": 0.83, "grad_norm": 0.3421998547284436, "learning_rate": 1.4837599591287333e-06, "loss": 0.2367, "step": 18060 }, { "epoch": 0.83, "grad_norm": 0.4367527611450621, "learning_rate": 1.482980155809065e-06, "loss": 0.1856, "step": 18061 }, { "epoch": 0.83, "grad_norm": 0.293045678911986, "learning_rate": 1.48220054104801e-06, "loss": 0.2084, "step": 18062 }, { "epoch": 0.83, "grad_norm": 0.5322901345206392, "learning_rate": 1.4814211148628232e-06, "loss": 0.3239, "step": 18063 }, { "epoch": 0.83, "grad_norm": 0.7937118042218305, "learning_rate": 1.4806418772707643e-06, "loss": 0.3923, "step": 18064 }, { "epoch": 0.83, "grad_norm": 0.5655585247370369, "learning_rate": 1.4798628282890793e-06, "loss": 0.3356, "step": 18065 }, { "epoch": 0.83, "grad_norm": 0.2876697829025247, "learning_rate": 1.4790839679350243e-06, "loss": 0.209, "step": 18066 }, { "epoch": 0.83, "grad_norm": 0.27601628748425405, "learning_rate": 1.478305296225835e-06, "loss": 0.1973, "step": 18067 }, { "epoch": 0.83, "grad_norm": 0.6251837183312858, "learning_rate": 1.4775268131787547e-06, "loss": 0.2853, "step": 18068 }, { "epoch": 0.83, "grad_norm": 0.4030026236550037, "learning_rate": 1.4767485188110154e-06, "loss": 0.2367, "step": 18069 }, { "epoch": 0.83, "grad_norm": 0.3917628035290433, "learning_rate": 1.4759704131398488e-06, "loss": 0.3003, "step": 18070 }, { "epoch": 0.83, "grad_norm": 0.875164925102872, "learning_rate": 1.4751924961824837e-06, "loss": 0.504, "step": 18071 }, { "epoch": 0.83, "grad_norm": 0.37961358627610026, "learning_rate": 1.4744147679561383e-06, "loss": 0.2134, "step": 18072 }, { "epoch": 0.83, "grad_norm": 0.3641219544739669, "learning_rate": 1.473637228478032e-06, "loss": 0.2219, "step": 18073 }, { "epoch": 0.83, "grad_norm": 0.333099635656096, "learning_rate": 1.4728598777653836e-06, "loss": 0.2645, "step": 18074 }, { "epoch": 0.83, "grad_norm": 0.3955044838376945, "learning_rate": 1.4720827158353957e-06, "loss": 0.2212, "step": 18075 }, { "epoch": 0.83, "grad_norm": 1.435336710258183, "learning_rate": 1.471305742705279e-06, "loss": 0.7051, "step": 18076 }, { "epoch": 0.83, "grad_norm": 0.8789767992020964, "learning_rate": 1.47052895839223e-06, "loss": 0.4428, "step": 18077 }, { "epoch": 0.83, "grad_norm": 0.26481279736533686, "learning_rate": 1.4697523629134525e-06, "loss": 0.2529, "step": 18078 }, { "epoch": 0.83, "grad_norm": 0.37738612554590467, "learning_rate": 1.468975956286136e-06, "loss": 0.1205, "step": 18079 }, { "epoch": 0.83, "grad_norm": 0.8914120619079094, "learning_rate": 1.4681997385274683e-06, "loss": 0.296, "step": 18080 }, { "epoch": 0.83, "grad_norm": 0.4649643378005454, "learning_rate": 1.4674237096546362e-06, "loss": 0.3031, "step": 18081 }, { "epoch": 0.83, "grad_norm": 0.3757900680460659, "learning_rate": 1.4666478696848153e-06, "loss": 0.2519, "step": 18082 }, { "epoch": 0.83, "grad_norm": 0.6442614776195555, "learning_rate": 1.4658722186351915e-06, "loss": 0.3919, "step": 18083 }, { "epoch": 0.83, "grad_norm": 0.4380821704380783, "learning_rate": 1.465096756522928e-06, "loss": 0.3013, "step": 18084 }, { "epoch": 0.83, "grad_norm": 0.7296760125446096, "learning_rate": 1.4643214833651997e-06, "loss": 0.2855, "step": 18085 }, { "epoch": 0.83, "grad_norm": 0.2856415638136337, "learning_rate": 1.4635463991791633e-06, "loss": 0.2304, "step": 18086 }, { "epoch": 0.83, "grad_norm": 0.28391649823352666, "learning_rate": 1.4627715039819834e-06, "loss": 0.2252, "step": 18087 }, { "epoch": 0.83, "grad_norm": 1.237433276976469, "learning_rate": 1.4619967977908157e-06, "loss": 0.316, "step": 18088 }, { "epoch": 0.83, "grad_norm": 0.9321338248744325, "learning_rate": 1.461222280622807e-06, "loss": 0.4907, "step": 18089 }, { "epoch": 0.83, "grad_norm": 0.2629161887373475, "learning_rate": 1.4604479524951087e-06, "loss": 0.2538, "step": 18090 }, { "epoch": 0.83, "grad_norm": 0.6145896257695453, "learning_rate": 1.4596738134248634e-06, "loss": 0.3659, "step": 18091 }, { "epoch": 0.83, "grad_norm": 0.22289655846947887, "learning_rate": 1.4588998634292062e-06, "loss": 0.0719, "step": 18092 }, { "epoch": 0.83, "grad_norm": 0.30505171459499464, "learning_rate": 1.4581261025252768e-06, "loss": 0.2107, "step": 18093 }, { "epoch": 0.83, "grad_norm": 0.446118266906929, "learning_rate": 1.4573525307302006e-06, "loss": 0.3178, "step": 18094 }, { "epoch": 0.83, "grad_norm": 0.600178524504717, "learning_rate": 1.4565791480611057e-06, "loss": 0.2868, "step": 18095 }, { "epoch": 0.83, "grad_norm": 0.4358986192548637, "learning_rate": 1.4558059545351144e-06, "loss": 0.2729, "step": 18096 }, { "epoch": 0.83, "grad_norm": 0.6839326754043568, "learning_rate": 1.4550329501693462e-06, "loss": 0.3732, "step": 18097 }, { "epoch": 0.83, "grad_norm": 0.27348523128154995, "learning_rate": 1.4542601349809127e-06, "loss": 0.1855, "step": 18098 }, { "epoch": 0.83, "grad_norm": 0.3732775907676523, "learning_rate": 1.4534875089869183e-06, "loss": 0.2887, "step": 18099 }, { "epoch": 0.83, "grad_norm": 0.651653375066412, "learning_rate": 1.4527150722044781e-06, "loss": 0.236, "step": 18100 }, { "epoch": 0.83, "grad_norm": 0.41340286753494937, "learning_rate": 1.4519428246506862e-06, "loss": 0.2025, "step": 18101 }, { "epoch": 0.83, "grad_norm": 0.31518018199604475, "learning_rate": 1.4511707663426443e-06, "loss": 0.2658, "step": 18102 }, { "epoch": 0.83, "grad_norm": 0.599798533573514, "learning_rate": 1.4503988972974393e-06, "loss": 0.349, "step": 18103 }, { "epoch": 0.83, "grad_norm": 0.5156347768948834, "learning_rate": 1.4496272175321624e-06, "loss": 0.1895, "step": 18104 }, { "epoch": 0.83, "grad_norm": 0.2525570623700407, "learning_rate": 1.4488557270639004e-06, "loss": 0.162, "step": 18105 }, { "epoch": 0.83, "grad_norm": 0.3940395636866371, "learning_rate": 1.448084425909728e-06, "loss": 0.3165, "step": 18106 }, { "epoch": 0.83, "grad_norm": 0.9382063778504801, "learning_rate": 1.4473133140867246e-06, "loss": 0.5166, "step": 18107 }, { "epoch": 0.83, "grad_norm": 0.3441388844486491, "learning_rate": 1.4465423916119637e-06, "loss": 0.2136, "step": 18108 }, { "epoch": 0.83, "grad_norm": 0.5137079514396238, "learning_rate": 1.4457716585025073e-06, "loss": 0.3186, "step": 18109 }, { "epoch": 0.83, "grad_norm": 0.5630580512911609, "learning_rate": 1.445001114775425e-06, "loss": 0.3059, "step": 18110 }, { "epoch": 0.83, "grad_norm": 0.19213778733029777, "learning_rate": 1.444230760447769e-06, "loss": 0.1304, "step": 18111 }, { "epoch": 0.83, "grad_norm": 1.403089291346263, "learning_rate": 1.4434605955365989e-06, "loss": 0.8025, "step": 18112 }, { "epoch": 0.83, "grad_norm": 0.4783803605202183, "learning_rate": 1.4426906200589641e-06, "loss": 0.3468, "step": 18113 }, { "epoch": 0.83, "grad_norm": 0.27485766458240046, "learning_rate": 1.4419208340319135e-06, "loss": 0.2218, "step": 18114 }, { "epoch": 0.83, "grad_norm": 1.059629114851713, "learning_rate": 1.4411512374724867e-06, "loss": 0.4605, "step": 18115 }, { "epoch": 0.83, "grad_norm": 0.6572806040901578, "learning_rate": 1.440381830397719e-06, "loss": 0.2446, "step": 18116 }, { "epoch": 0.83, "grad_norm": 0.3364706895953859, "learning_rate": 1.4396126128246513e-06, "loss": 0.2572, "step": 18117 }, { "epoch": 0.83, "grad_norm": 0.24749313718943855, "learning_rate": 1.4388435847703074e-06, "loss": 0.1898, "step": 18118 }, { "epoch": 0.83, "grad_norm": 0.605769595142131, "learning_rate": 1.4380747462517186e-06, "loss": 0.3636, "step": 18119 }, { "epoch": 0.83, "grad_norm": 0.4261243477600186, "learning_rate": 1.4373060972858999e-06, "loss": 0.2749, "step": 18120 }, { "epoch": 0.83, "grad_norm": 0.44562528835396936, "learning_rate": 1.436537637889871e-06, "loss": 0.2448, "step": 18121 }, { "epoch": 0.83, "grad_norm": 0.4576172087306867, "learning_rate": 1.4357693680806485e-06, "loss": 0.2642, "step": 18122 }, { "epoch": 0.83, "grad_norm": 0.3676033939427828, "learning_rate": 1.435001287875234e-06, "loss": 0.2648, "step": 18123 }, { "epoch": 0.83, "grad_norm": 0.40861945150740275, "learning_rate": 1.4342333972906398e-06, "loss": 0.1862, "step": 18124 }, { "epoch": 0.83, "grad_norm": 0.45210078882973675, "learning_rate": 1.4334656963438587e-06, "loss": 0.3017, "step": 18125 }, { "epoch": 0.83, "grad_norm": 0.28604394473350964, "learning_rate": 1.4326981850518917e-06, "loss": 0.2377, "step": 18126 }, { "epoch": 0.83, "grad_norm": 0.527125257948193, "learning_rate": 1.4319308634317308e-06, "loss": 0.147, "step": 18127 }, { "epoch": 0.83, "grad_norm": 1.6034416230158934, "learning_rate": 1.4311637315003612e-06, "loss": 0.5589, "step": 18128 }, { "epoch": 0.83, "grad_norm": 0.23994646247124496, "learning_rate": 1.4303967892747684e-06, "loss": 0.1879, "step": 18129 }, { "epoch": 0.83, "grad_norm": 0.3917087939770791, "learning_rate": 1.4296300367719297e-06, "loss": 0.2998, "step": 18130 }, { "epoch": 0.83, "grad_norm": 0.6456164270902304, "learning_rate": 1.4288634740088247e-06, "loss": 0.2958, "step": 18131 }, { "epoch": 0.83, "grad_norm": 0.3588479412569209, "learning_rate": 1.4280971010024192e-06, "loss": 0.2494, "step": 18132 }, { "epoch": 0.83, "grad_norm": 0.5757243419839844, "learning_rate": 1.4273309177696826e-06, "loss": 0.2695, "step": 18133 }, { "epoch": 0.83, "grad_norm": 0.3575877226696018, "learning_rate": 1.4265649243275782e-06, "loss": 0.2358, "step": 18134 }, { "epoch": 0.83, "grad_norm": 0.3155418247159494, "learning_rate": 1.4257991206930622e-06, "loss": 0.2262, "step": 18135 }, { "epoch": 0.83, "grad_norm": 0.7649864707231198, "learning_rate": 1.4250335068830913e-06, "loss": 0.444, "step": 18136 }, { "epoch": 0.83, "grad_norm": 0.3333589848411431, "learning_rate": 1.4242680829146117e-06, "loss": 0.2635, "step": 18137 }, { "epoch": 0.83, "grad_norm": 0.4197667395846607, "learning_rate": 1.423502848804571e-06, "loss": 0.2733, "step": 18138 }, { "epoch": 0.83, "grad_norm": 0.2987100474482119, "learning_rate": 1.4227378045699137e-06, "loss": 0.2054, "step": 18139 }, { "epoch": 0.83, "grad_norm": 1.7471159853752192, "learning_rate": 1.4219729502275726e-06, "loss": 0.2068, "step": 18140 }, { "epoch": 0.83, "grad_norm": 0.4343917243897533, "learning_rate": 1.4212082857944842e-06, "loss": 0.2771, "step": 18141 }, { "epoch": 0.83, "grad_norm": 0.3174130452203371, "learning_rate": 1.420443811287572e-06, "loss": 0.2663, "step": 18142 }, { "epoch": 0.83, "grad_norm": 0.7838966221708615, "learning_rate": 1.4196795267237695e-06, "loss": 0.405, "step": 18143 }, { "epoch": 0.83, "grad_norm": 0.3083155685183564, "learning_rate": 1.4189154321199917e-06, "loss": 0.2031, "step": 18144 }, { "epoch": 0.83, "grad_norm": 0.2444757528386469, "learning_rate": 1.4181515274931545e-06, "loss": 0.2244, "step": 18145 }, { "epoch": 0.83, "grad_norm": 1.6604570659717346, "learning_rate": 1.4173878128601704e-06, "loss": 0.7331, "step": 18146 }, { "epoch": 0.83, "grad_norm": 0.3385861190731466, "learning_rate": 1.4166242882379478e-06, "loss": 0.1875, "step": 18147 }, { "epoch": 0.83, "grad_norm": 0.6228128037246986, "learning_rate": 1.4158609536433944e-06, "loss": 0.3693, "step": 18148 }, { "epoch": 0.83, "grad_norm": 0.4296787036514854, "learning_rate": 1.415097809093402e-06, "loss": 0.2949, "step": 18149 }, { "epoch": 0.83, "grad_norm": 0.3258243293170314, "learning_rate": 1.4143348546048706e-06, "loss": 0.2118, "step": 18150 }, { "epoch": 0.83, "grad_norm": 0.28464229829898596, "learning_rate": 1.4135720901946936e-06, "loss": 0.1836, "step": 18151 }, { "epoch": 0.83, "grad_norm": 0.951724281644622, "learning_rate": 1.4128095158797517e-06, "loss": 0.3999, "step": 18152 }, { "epoch": 0.83, "grad_norm": 0.34178031465150255, "learning_rate": 1.4120471316769324e-06, "loss": 0.2618, "step": 18153 }, { "epoch": 0.83, "grad_norm": 0.3564088210299669, "learning_rate": 1.4112849376031112e-06, "loss": 0.2528, "step": 18154 }, { "epoch": 0.83, "grad_norm": 1.1312117733385187, "learning_rate": 1.4105229336751636e-06, "loss": 0.6136, "step": 18155 }, { "epoch": 0.83, "grad_norm": 0.7060319574947277, "learning_rate": 1.4097611199099615e-06, "loss": 0.3621, "step": 18156 }, { "epoch": 0.83, "grad_norm": 0.23496095416094162, "learning_rate": 1.4089994963243658e-06, "loss": 0.1992, "step": 18157 }, { "epoch": 0.83, "grad_norm": 0.5434280550063493, "learning_rate": 1.4082380629352444e-06, "loss": 0.2149, "step": 18158 }, { "epoch": 0.83, "grad_norm": 0.6130912822260625, "learning_rate": 1.407476819759447e-06, "loss": 0.2963, "step": 18159 }, { "epoch": 0.83, "grad_norm": 0.40356686162362815, "learning_rate": 1.4067157668138354e-06, "loss": 0.2322, "step": 18160 }, { "epoch": 0.83, "grad_norm": 0.3671287068229323, "learning_rate": 1.405954904115252e-06, "loss": 0.2774, "step": 18161 }, { "epoch": 0.83, "grad_norm": 0.4133176366336819, "learning_rate": 1.4051942316805468e-06, "loss": 0.2912, "step": 18162 }, { "epoch": 0.83, "grad_norm": 0.28347598842355753, "learning_rate": 1.4044337495265548e-06, "loss": 0.1314, "step": 18163 }, { "epoch": 0.83, "grad_norm": 0.8814561652303995, "learning_rate": 1.403673457670115e-06, "loss": 0.4168, "step": 18164 }, { "epoch": 0.83, "grad_norm": 0.39075223866866426, "learning_rate": 1.4029133561280618e-06, "loss": 0.2564, "step": 18165 }, { "epoch": 0.83, "grad_norm": 0.40799242059498464, "learning_rate": 1.402153444917218e-06, "loss": 0.2832, "step": 18166 }, { "epoch": 0.83, "grad_norm": 1.1512475970083706, "learning_rate": 1.4013937240544118e-06, "loss": 0.4557, "step": 18167 }, { "epoch": 0.83, "grad_norm": 0.43182953910728566, "learning_rate": 1.4006341935564628e-06, "loss": 0.2709, "step": 18168 }, { "epoch": 0.83, "grad_norm": 0.3970760613398073, "learning_rate": 1.3998748534401817e-06, "loss": 0.3015, "step": 18169 }, { "epoch": 0.83, "grad_norm": 0.2873751927451588, "learning_rate": 1.3991157037223857e-06, "loss": 0.1833, "step": 18170 }, { "epoch": 0.83, "grad_norm": 0.4562357676599878, "learning_rate": 1.3983567444198753e-06, "loss": 0.2731, "step": 18171 }, { "epoch": 0.83, "grad_norm": 0.6760248742599947, "learning_rate": 1.3975979755494562e-06, "loss": 0.3514, "step": 18172 }, { "epoch": 0.83, "grad_norm": 0.3747274074919487, "learning_rate": 1.3968393971279293e-06, "loss": 0.2551, "step": 18173 }, { "epoch": 0.83, "grad_norm": 0.546961268567488, "learning_rate": 1.3960810091720844e-06, "loss": 0.2915, "step": 18174 }, { "epoch": 0.83, "grad_norm": 0.4519380026360848, "learning_rate": 1.395322811698715e-06, "loss": 0.2887, "step": 18175 }, { "epoch": 0.83, "grad_norm": 0.3205983095772739, "learning_rate": 1.3945648047246007e-06, "loss": 0.2146, "step": 18176 }, { "epoch": 0.84, "grad_norm": 0.38675624263691544, "learning_rate": 1.3938069882665327e-06, "loss": 0.2545, "step": 18177 }, { "epoch": 0.84, "grad_norm": 0.32222129147567685, "learning_rate": 1.3930493623412812e-06, "loss": 0.23, "step": 18178 }, { "epoch": 0.84, "grad_norm": 1.2987602195937102, "learning_rate": 1.3922919269656232e-06, "loss": 0.7506, "step": 18179 }, { "epoch": 0.84, "grad_norm": 0.3554013338677597, "learning_rate": 1.3915346821563235e-06, "loss": 0.1111, "step": 18180 }, { "epoch": 0.84, "grad_norm": 0.28835058650507406, "learning_rate": 1.3907776279301488e-06, "loss": 0.2735, "step": 18181 }, { "epoch": 0.84, "grad_norm": 0.7427304773208124, "learning_rate": 1.390020764303862e-06, "loss": 0.3889, "step": 18182 }, { "epoch": 0.84, "grad_norm": 0.22662791360786488, "learning_rate": 1.3892640912942146e-06, "loss": 0.1397, "step": 18183 }, { "epoch": 0.84, "grad_norm": 0.6607942540776736, "learning_rate": 1.388507608917964e-06, "loss": 0.3414, "step": 18184 }, { "epoch": 0.84, "grad_norm": 0.3838073883376752, "learning_rate": 1.387751317191852e-06, "loss": 0.3136, "step": 18185 }, { "epoch": 0.84, "grad_norm": 0.4429494689130015, "learning_rate": 1.3869952161326261e-06, "loss": 0.1927, "step": 18186 }, { "epoch": 0.84, "grad_norm": 0.6194883777514623, "learning_rate": 1.3862393057570267e-06, "loss": 0.3467, "step": 18187 }, { "epoch": 0.84, "grad_norm": 0.8265362452908632, "learning_rate": 1.385483586081785e-06, "loss": 0.3535, "step": 18188 }, { "epoch": 0.84, "grad_norm": 0.21992358605925325, "learning_rate": 1.3847280571236332e-06, "loss": 0.1835, "step": 18189 }, { "epoch": 0.84, "grad_norm": 0.3469621347449507, "learning_rate": 1.3839727188993014e-06, "loss": 0.2266, "step": 18190 }, { "epoch": 0.84, "grad_norm": 1.2293026612426468, "learning_rate": 1.3832175714255068e-06, "loss": 0.6376, "step": 18191 }, { "epoch": 0.84, "grad_norm": 0.9990717145302259, "learning_rate": 1.3824626147189734e-06, "loss": 0.4017, "step": 18192 }, { "epoch": 0.84, "grad_norm": 0.25546697148001346, "learning_rate": 1.3817078487964063e-06, "loss": 0.2133, "step": 18193 }, { "epoch": 0.84, "grad_norm": 0.7205622639168112, "learning_rate": 1.3809532736745269e-06, "loss": 0.3943, "step": 18194 }, { "epoch": 0.84, "grad_norm": 0.2421382207965262, "learning_rate": 1.3801988893700312e-06, "loss": 0.1338, "step": 18195 }, { "epoch": 0.84, "grad_norm": 0.37414188194655623, "learning_rate": 1.3794446958996277e-06, "loss": 0.2089, "step": 18196 }, { "epoch": 0.84, "grad_norm": 0.3529886736063192, "learning_rate": 1.3786906932800071e-06, "loss": 0.3069, "step": 18197 }, { "epoch": 0.84, "grad_norm": 0.7977665578803115, "learning_rate": 1.3779368815278648e-06, "loss": 0.3434, "step": 18198 }, { "epoch": 0.84, "grad_norm": 0.39558928471104043, "learning_rate": 1.3771832606598935e-06, "loss": 0.2478, "step": 18199 }, { "epoch": 0.84, "grad_norm": 1.687070817392689, "learning_rate": 1.3764298306927703e-06, "loss": 0.693, "step": 18200 }, { "epoch": 0.84, "grad_norm": 0.26921954219390976, "learning_rate": 1.3756765916431825e-06, "loss": 0.2195, "step": 18201 }, { "epoch": 0.84, "grad_norm": 0.252331106557988, "learning_rate": 1.3749235435277997e-06, "loss": 0.1608, "step": 18202 }, { "epoch": 0.84, "grad_norm": 1.3867589701883567, "learning_rate": 1.3741706863632976e-06, "loss": 0.7759, "step": 18203 }, { "epoch": 0.84, "grad_norm": 0.5517134184872348, "learning_rate": 1.373418020166344e-06, "loss": 0.2868, "step": 18204 }, { "epoch": 0.84, "grad_norm": 0.3155788234593152, "learning_rate": 1.3726655449535998e-06, "loss": 0.2704, "step": 18205 }, { "epoch": 0.84, "grad_norm": 0.5725567306673096, "learning_rate": 1.371913260741724e-06, "loss": 0.2418, "step": 18206 }, { "epoch": 0.84, "grad_norm": 0.37124600479101655, "learning_rate": 1.3711611675473734e-06, "loss": 0.2206, "step": 18207 }, { "epoch": 0.84, "grad_norm": 0.31257267594450916, "learning_rate": 1.3704092653872002e-06, "loss": 0.24, "step": 18208 }, { "epoch": 0.84, "grad_norm": 0.35362463421338136, "learning_rate": 1.369657554277849e-06, "loss": 0.275, "step": 18209 }, { "epoch": 0.84, "grad_norm": 0.7638492512110073, "learning_rate": 1.3689060342359573e-06, "loss": 0.3737, "step": 18210 }, { "epoch": 0.84, "grad_norm": 0.37406343765717726, "learning_rate": 1.3681547052781707e-06, "loss": 0.2575, "step": 18211 }, { "epoch": 0.84, "grad_norm": 0.4005267723409763, "learning_rate": 1.3674035674211183e-06, "loss": 0.2284, "step": 18212 }, { "epoch": 0.84, "grad_norm": 0.46746327980802027, "learning_rate": 1.3666526206814323e-06, "loss": 0.2459, "step": 18213 }, { "epoch": 0.84, "grad_norm": 0.27692313763712756, "learning_rate": 1.3659018650757339e-06, "loss": 0.217, "step": 18214 }, { "epoch": 0.84, "grad_norm": 0.7301021531223986, "learning_rate": 1.3651513006206463e-06, "loss": 0.3219, "step": 18215 }, { "epoch": 0.84, "grad_norm": 0.4769601690616754, "learning_rate": 1.3644009273327896e-06, "loss": 0.2828, "step": 18216 }, { "epoch": 0.84, "grad_norm": 0.31912991999539886, "learning_rate": 1.3636507452287706e-06, "loss": 0.2759, "step": 18217 }, { "epoch": 0.84, "grad_norm": 1.703766412369911, "learning_rate": 1.3629007543252027e-06, "loss": 0.5994, "step": 18218 }, { "epoch": 0.84, "grad_norm": 0.5271275157347987, "learning_rate": 1.3621509546386847e-06, "loss": 0.1165, "step": 18219 }, { "epoch": 0.84, "grad_norm": 0.36199526360939055, "learning_rate": 1.36140134618582e-06, "loss": 0.2894, "step": 18220 }, { "epoch": 0.84, "grad_norm": 0.3527667643222183, "learning_rate": 1.3606519289832054e-06, "loss": 0.2759, "step": 18221 }, { "epoch": 0.84, "grad_norm": 0.7862873046353133, "learning_rate": 1.3599027030474288e-06, "loss": 0.2847, "step": 18222 }, { "epoch": 0.84, "grad_norm": 0.3865830703537873, "learning_rate": 1.3591536683950779e-06, "loss": 0.256, "step": 18223 }, { "epoch": 0.84, "grad_norm": 0.33921484896926096, "learning_rate": 1.3584048250427373e-06, "loss": 0.2456, "step": 18224 }, { "epoch": 0.84, "grad_norm": 0.43069135953057586, "learning_rate": 1.357656173006987e-06, "loss": 0.1911, "step": 18225 }, { "epoch": 0.84, "grad_norm": 0.3053350783472205, "learning_rate": 1.3569077123043973e-06, "loss": 0.2232, "step": 18226 }, { "epoch": 0.84, "grad_norm": 0.7038154611902642, "learning_rate": 1.3561594429515412e-06, "loss": 0.3929, "step": 18227 }, { "epoch": 0.84, "grad_norm": 0.3789085172540409, "learning_rate": 1.3554113649649847e-06, "loss": 0.2529, "step": 18228 }, { "epoch": 0.84, "grad_norm": 0.3520233126767513, "learning_rate": 1.3546634783612877e-06, "loss": 0.2636, "step": 18229 }, { "epoch": 0.84, "grad_norm": 0.6479573296094628, "learning_rate": 1.3539157831570105e-06, "loss": 0.2158, "step": 18230 }, { "epoch": 0.84, "grad_norm": 1.6480518991265372, "learning_rate": 1.353168279368703e-06, "loss": 0.7475, "step": 18231 }, { "epoch": 0.84, "grad_norm": 0.531421930917357, "learning_rate": 1.3524209670129152e-06, "loss": 0.2177, "step": 18232 }, { "epoch": 0.84, "grad_norm": 0.3701187304070856, "learning_rate": 1.3516738461061952e-06, "loss": 0.2891, "step": 18233 }, { "epoch": 0.84, "grad_norm": 0.9626978686329741, "learning_rate": 1.3509269166650785e-06, "loss": 0.5051, "step": 18234 }, { "epoch": 0.84, "grad_norm": 0.18356981311313056, "learning_rate": 1.3501801787061065e-06, "loss": 0.1388, "step": 18235 }, { "epoch": 0.84, "grad_norm": 1.599429372054145, "learning_rate": 1.3494336322458034e-06, "loss": 0.7666, "step": 18236 }, { "epoch": 0.84, "grad_norm": 0.38936045921397605, "learning_rate": 1.3486872773007064e-06, "loss": 0.2811, "step": 18237 }, { "epoch": 0.84, "grad_norm": 0.3244927225438379, "learning_rate": 1.3479411138873354e-06, "loss": 0.2103, "step": 18238 }, { "epoch": 0.84, "grad_norm": 0.7711852978203485, "learning_rate": 1.3471951420222075e-06, "loss": 0.4033, "step": 18239 }, { "epoch": 0.84, "grad_norm": 0.3710787579507663, "learning_rate": 1.3464493617218389e-06, "loss": 0.3277, "step": 18240 }, { "epoch": 0.84, "grad_norm": 0.3189683918993497, "learning_rate": 1.3457037730027411e-06, "loss": 0.1873, "step": 18241 }, { "epoch": 0.84, "grad_norm": 0.35512600113006515, "learning_rate": 1.3449583758814222e-06, "loss": 0.1853, "step": 18242 }, { "epoch": 0.84, "grad_norm": 0.7719686941697731, "learning_rate": 1.3442131703743821e-06, "loss": 0.4014, "step": 18243 }, { "epoch": 0.84, "grad_norm": 0.4041083189067108, "learning_rate": 1.343468156498121e-06, "loss": 0.2476, "step": 18244 }, { "epoch": 0.84, "grad_norm": 0.34984070214113927, "learning_rate": 1.3427233342691293e-06, "loss": 0.2521, "step": 18245 }, { "epoch": 0.84, "grad_norm": 1.4983053330361942, "learning_rate": 1.3419787037039e-06, "loss": 0.6536, "step": 18246 }, { "epoch": 0.84, "grad_norm": 0.32960600849565425, "learning_rate": 1.3412342648189192e-06, "loss": 0.2389, "step": 18247 }, { "epoch": 0.84, "grad_norm": 0.21041651600051087, "learning_rate": 1.3404900176306635e-06, "loss": 0.1605, "step": 18248 }, { "epoch": 0.84, "grad_norm": 0.971889729259226, "learning_rate": 1.339745962155613e-06, "loss": 0.405, "step": 18249 }, { "epoch": 0.84, "grad_norm": 0.3648475922367184, "learning_rate": 1.3390020984102426e-06, "loss": 0.266, "step": 18250 }, { "epoch": 0.84, "grad_norm": 0.9237508915682208, "learning_rate": 1.3382584264110165e-06, "loss": 0.3108, "step": 18251 }, { "epoch": 0.84, "grad_norm": 0.3507495396155283, "learning_rate": 1.3375149461744019e-06, "loss": 0.2928, "step": 18252 }, { "epoch": 0.84, "grad_norm": 0.36671409344922407, "learning_rate": 1.3367716577168531e-06, "loss": 0.2423, "step": 18253 }, { "epoch": 0.84, "grad_norm": 0.2963961798190344, "learning_rate": 1.3360285610548341e-06, "loss": 0.0779, "step": 18254 }, { "epoch": 0.84, "grad_norm": 0.6487567304498819, "learning_rate": 1.3352856562047912e-06, "loss": 0.3594, "step": 18255 }, { "epoch": 0.84, "grad_norm": 0.2911878953322162, "learning_rate": 1.3345429431831735e-06, "loss": 0.238, "step": 18256 }, { "epoch": 0.84, "grad_norm": 0.5556880500075052, "learning_rate": 1.3338004220064227e-06, "loss": 0.3615, "step": 18257 }, { "epoch": 0.84, "grad_norm": 1.2389806677470196, "learning_rate": 1.3330580926909765e-06, "loss": 0.3901, "step": 18258 }, { "epoch": 0.84, "grad_norm": 0.4337604997757784, "learning_rate": 1.3323159552532738e-06, "loss": 0.2651, "step": 18259 }, { "epoch": 0.84, "grad_norm": 0.2587123152428974, "learning_rate": 1.3315740097097386e-06, "loss": 0.2064, "step": 18260 }, { "epoch": 0.84, "grad_norm": 0.46082695451759753, "learning_rate": 1.3308322560768038e-06, "loss": 0.2689, "step": 18261 }, { "epoch": 0.84, "grad_norm": 0.42415794228714326, "learning_rate": 1.3300906943708836e-06, "loss": 0.2412, "step": 18262 }, { "epoch": 0.84, "grad_norm": 0.5802999529238541, "learning_rate": 1.329349324608401e-06, "loss": 0.3678, "step": 18263 }, { "epoch": 0.84, "grad_norm": 0.33773388141461513, "learning_rate": 1.3286081468057689e-06, "loss": 0.2429, "step": 18264 }, { "epoch": 0.84, "grad_norm": 0.4996610573554146, "learning_rate": 1.3278671609793915e-06, "loss": 0.2491, "step": 18265 }, { "epoch": 0.84, "grad_norm": 0.29291532265947956, "learning_rate": 1.327126367145678e-06, "loss": 0.2025, "step": 18266 }, { "epoch": 0.84, "grad_norm": 0.7046132345240388, "learning_rate": 1.32638576532103e-06, "loss": 0.3679, "step": 18267 }, { "epoch": 0.84, "grad_norm": 0.26820225104211287, "learning_rate": 1.3256453555218385e-06, "loss": 0.2006, "step": 18268 }, { "epoch": 0.84, "grad_norm": 0.6488939108170363, "learning_rate": 1.3249051377645018e-06, "loss": 0.3285, "step": 18269 }, { "epoch": 0.84, "grad_norm": 1.3610509276466798, "learning_rate": 1.3241651120653986e-06, "loss": 0.6752, "step": 18270 }, { "epoch": 0.84, "grad_norm": 0.3166872234356702, "learning_rate": 1.323425278440923e-06, "loss": 0.1888, "step": 18271 }, { "epoch": 0.84, "grad_norm": 0.49699065927766595, "learning_rate": 1.322685636907447e-06, "loss": 0.335, "step": 18272 }, { "epoch": 0.84, "grad_norm": 0.34568880395028856, "learning_rate": 1.3219461874813489e-06, "loss": 0.2614, "step": 18273 }, { "epoch": 0.84, "grad_norm": 0.27129210885590926, "learning_rate": 1.3212069301789966e-06, "loss": 0.1516, "step": 18274 }, { "epoch": 0.84, "grad_norm": 1.341592904412576, "learning_rate": 1.320467865016759e-06, "loss": 0.6056, "step": 18275 }, { "epoch": 0.84, "grad_norm": 0.3963328010129757, "learning_rate": 1.3197289920109991e-06, "loss": 0.3146, "step": 18276 }, { "epoch": 0.84, "grad_norm": 0.35776597043376046, "learning_rate": 1.31899031117807e-06, "loss": 0.0731, "step": 18277 }, { "epoch": 0.84, "grad_norm": 0.4458153789723602, "learning_rate": 1.3182518225343322e-06, "loss": 0.2877, "step": 18278 }, { "epoch": 0.84, "grad_norm": 0.2992467474875995, "learning_rate": 1.317513526096128e-06, "loss": 0.2161, "step": 18279 }, { "epoch": 0.84, "grad_norm": 0.37573354572968565, "learning_rate": 1.3167754218798067e-06, "loss": 0.2469, "step": 18280 }, { "epoch": 0.84, "grad_norm": 0.45212814681935537, "learning_rate": 1.3160375099017108e-06, "loss": 0.2403, "step": 18281 }, { "epoch": 0.84, "grad_norm": 1.173705423779554, "learning_rate": 1.3152997901781717e-06, "loss": 0.4724, "step": 18282 }, { "epoch": 0.84, "grad_norm": 0.5982283479752917, "learning_rate": 1.314562262725526e-06, "loss": 0.2895, "step": 18283 }, { "epoch": 0.84, "grad_norm": 0.2622329488110608, "learning_rate": 1.3138249275601024e-06, "loss": 0.2383, "step": 18284 }, { "epoch": 0.84, "grad_norm": 0.9048839619647755, "learning_rate": 1.3130877846982204e-06, "loss": 0.4223, "step": 18285 }, { "epoch": 0.84, "grad_norm": 0.22927404048248723, "learning_rate": 1.3123508341562052e-06, "loss": 0.1417, "step": 18286 }, { "epoch": 0.84, "grad_norm": 0.38274260976284213, "learning_rate": 1.3116140759503648e-06, "loss": 0.2588, "step": 18287 }, { "epoch": 0.84, "grad_norm": 0.3697216507581401, "learning_rate": 1.3108775100970183e-06, "loss": 0.2864, "step": 18288 }, { "epoch": 0.84, "grad_norm": 0.4037774986981472, "learning_rate": 1.3101411366124682e-06, "loss": 0.2873, "step": 18289 }, { "epoch": 0.84, "grad_norm": 0.535087843370923, "learning_rate": 1.3094049555130195e-06, "loss": 0.2584, "step": 18290 }, { "epoch": 0.84, "grad_norm": 0.3860786489524906, "learning_rate": 1.3086689668149665e-06, "loss": 0.2024, "step": 18291 }, { "epoch": 0.84, "grad_norm": 0.2748769232022338, "learning_rate": 1.3079331705346055e-06, "loss": 0.2421, "step": 18292 }, { "epoch": 0.84, "grad_norm": 0.6193863712694652, "learning_rate": 1.3071975666882297e-06, "loss": 0.3448, "step": 18293 }, { "epoch": 0.84, "grad_norm": 1.1877291402041013, "learning_rate": 1.3064621552921185e-06, "loss": 0.348, "step": 18294 }, { "epoch": 0.84, "grad_norm": 0.5644565979741699, "learning_rate": 1.305726936362559e-06, "loss": 0.3117, "step": 18295 }, { "epoch": 0.84, "grad_norm": 0.2795727052023342, "learning_rate": 1.3049919099158236e-06, "loss": 0.2676, "step": 18296 }, { "epoch": 0.84, "grad_norm": 0.5399081304840454, "learning_rate": 1.3042570759681862e-06, "loss": 0.1559, "step": 18297 }, { "epoch": 0.84, "grad_norm": 0.39383334333303627, "learning_rate": 1.3035224345359188e-06, "loss": 0.236, "step": 18298 }, { "epoch": 0.84, "grad_norm": 0.35798682720556085, "learning_rate": 1.3027879856352798e-06, "loss": 0.2865, "step": 18299 }, { "epoch": 0.84, "grad_norm": 0.3725868116203345, "learning_rate": 1.302053729282533e-06, "loss": 0.2502, "step": 18300 }, { "epoch": 0.84, "grad_norm": 0.6089947177018004, "learning_rate": 1.3013196654939341e-06, "loss": 0.3097, "step": 18301 }, { "epoch": 0.84, "grad_norm": 0.4538844863251005, "learning_rate": 1.3005857942857358e-06, "loss": 0.2789, "step": 18302 }, { "epoch": 0.84, "grad_norm": 3.075166625570616, "learning_rate": 1.2998521156741828e-06, "loss": 0.1584, "step": 18303 }, { "epoch": 0.84, "grad_norm": 0.2448033283016496, "learning_rate": 1.2991186296755142e-06, "loss": 0.2031, "step": 18304 }, { "epoch": 0.84, "grad_norm": 0.4092702064317997, "learning_rate": 1.2983853363059785e-06, "loss": 0.3315, "step": 18305 }, { "epoch": 0.84, "grad_norm": 0.83165876715656, "learning_rate": 1.297652235581801e-06, "loss": 0.4004, "step": 18306 }, { "epoch": 0.84, "grad_norm": 0.2920359543645724, "learning_rate": 1.2969193275192193e-06, "loss": 0.1855, "step": 18307 }, { "epoch": 0.84, "grad_norm": 0.3232254794346485, "learning_rate": 1.2961866121344524e-06, "loss": 0.2597, "step": 18308 }, { "epoch": 0.84, "grad_norm": 1.7246716482091662, "learning_rate": 1.2954540894437251e-06, "loss": 0.4892, "step": 18309 }, { "epoch": 0.84, "grad_norm": 0.2525896875108071, "learning_rate": 1.2947217594632577e-06, "loss": 0.1357, "step": 18310 }, { "epoch": 0.84, "grad_norm": 0.592869668101887, "learning_rate": 1.2939896222092574e-06, "loss": 0.3979, "step": 18311 }, { "epoch": 0.84, "grad_norm": 0.38455032480939805, "learning_rate": 1.2932576776979379e-06, "loss": 0.2811, "step": 18312 }, { "epoch": 0.84, "grad_norm": 0.3803467492881652, "learning_rate": 1.2925259259455004e-06, "loss": 0.1617, "step": 18313 }, { "epoch": 0.84, "grad_norm": 0.4728424813820251, "learning_rate": 1.2917943669681455e-06, "loss": 0.2913, "step": 18314 }, { "epoch": 0.84, "grad_norm": 0.6000021143131452, "learning_rate": 1.2910630007820734e-06, "loss": 0.3354, "step": 18315 }, { "epoch": 0.84, "grad_norm": 0.39788999903280187, "learning_rate": 1.2903318274034692e-06, "loss": 0.2078, "step": 18316 }, { "epoch": 0.84, "grad_norm": 0.3584796959032887, "learning_rate": 1.2896008468485254e-06, "loss": 0.2783, "step": 18317 }, { "epoch": 0.84, "grad_norm": 0.7011700070386414, "learning_rate": 1.2888700591334225e-06, "loss": 0.3873, "step": 18318 }, { "epoch": 0.84, "grad_norm": 0.24871554069145616, "learning_rate": 1.2881394642743438e-06, "loss": 0.175, "step": 18319 }, { "epoch": 0.84, "grad_norm": 0.2729341860865139, "learning_rate": 1.287409062287458e-06, "loss": 0.2104, "step": 18320 }, { "epoch": 0.84, "grad_norm": 1.4375318188899784, "learning_rate": 1.2866788531889406e-06, "loss": 0.5366, "step": 18321 }, { "epoch": 0.84, "grad_norm": 0.82105341222846, "learning_rate": 1.2859488369949524e-06, "loss": 0.4634, "step": 18322 }, { "epoch": 0.84, "grad_norm": 0.29825471980416135, "learning_rate": 1.2852190137216592e-06, "loss": 0.2221, "step": 18323 }, { "epoch": 0.84, "grad_norm": 0.5440056084717765, "learning_rate": 1.28448938338522e-06, "loss": 0.3199, "step": 18324 }, { "epoch": 0.84, "grad_norm": 0.2779128093552639, "learning_rate": 1.2837599460017824e-06, "loss": 0.1844, "step": 18325 }, { "epoch": 0.84, "grad_norm": 0.38617741696885105, "learning_rate": 1.2830307015874999e-06, "loss": 0.1997, "step": 18326 }, { "epoch": 0.84, "grad_norm": 0.581865896020572, "learning_rate": 1.2823016501585172e-06, "loss": 0.2991, "step": 18327 }, { "epoch": 0.84, "grad_norm": 0.464265025004806, "learning_rate": 1.2815727917309727e-06, "loss": 0.3087, "step": 18328 }, { "epoch": 0.84, "grad_norm": 0.38639436173357034, "learning_rate": 1.2808441263210059e-06, "loss": 0.2169, "step": 18329 }, { "epoch": 0.84, "grad_norm": 0.7372489939508887, "learning_rate": 1.2801156539447413e-06, "loss": 0.4114, "step": 18330 }, { "epoch": 0.84, "grad_norm": 0.3046190780578103, "learning_rate": 1.2793873746183162e-06, "loss": 0.1886, "step": 18331 }, { "epoch": 0.84, "grad_norm": 0.31389461261787965, "learning_rate": 1.2786592883578497e-06, "loss": 0.2736, "step": 18332 }, { "epoch": 0.84, "grad_norm": 0.4342942100588863, "learning_rate": 1.2779313951794591e-06, "loss": 0.0837, "step": 18333 }, { "epoch": 0.84, "grad_norm": 0.7217141381624553, "learning_rate": 1.2772036950992606e-06, "loss": 0.4407, "step": 18334 }, { "epoch": 0.84, "grad_norm": 0.3358247453152375, "learning_rate": 1.2764761881333653e-06, "loss": 0.2589, "step": 18335 }, { "epoch": 0.84, "grad_norm": 0.3263993836636742, "learning_rate": 1.2757488742978818e-06, "loss": 0.2615, "step": 18336 }, { "epoch": 0.84, "grad_norm": 0.5077049445081057, "learning_rate": 1.275021753608907e-06, "loss": 0.1941, "step": 18337 }, { "epoch": 0.84, "grad_norm": 0.2614110612438898, "learning_rate": 1.2742948260825439e-06, "loss": 0.1986, "step": 18338 }, { "epoch": 0.84, "grad_norm": 0.5720392109357952, "learning_rate": 1.2735680917348802e-06, "loss": 0.2299, "step": 18339 }, { "epoch": 0.84, "grad_norm": 0.5056769507258967, "learning_rate": 1.2728415505820091e-06, "loss": 0.32, "step": 18340 }, { "epoch": 0.84, "grad_norm": 0.3825077598483137, "learning_rate": 1.2721152026400174e-06, "loss": 0.2643, "step": 18341 }, { "epoch": 0.84, "grad_norm": 0.9133805992207336, "learning_rate": 1.2713890479249803e-06, "loss": 0.5113, "step": 18342 }, { "epoch": 0.84, "grad_norm": 0.35374790470991097, "learning_rate": 1.2706630864529768e-06, "loss": 0.2657, "step": 18343 }, { "epoch": 0.84, "grad_norm": 0.27519954123799933, "learning_rate": 1.2699373182400821e-06, "loss": 0.2188, "step": 18344 }, { "epoch": 0.84, "grad_norm": 0.6159065762042707, "learning_rate": 1.2692117433023577e-06, "loss": 0.2876, "step": 18345 }, { "epoch": 0.84, "grad_norm": 0.5287466580115844, "learning_rate": 1.2684863616558728e-06, "loss": 0.2646, "step": 18346 }, { "epoch": 0.84, "grad_norm": 0.3716601014966549, "learning_rate": 1.2677611733166807e-06, "loss": 0.2721, "step": 18347 }, { "epoch": 0.84, "grad_norm": 0.41349985685475216, "learning_rate": 1.2670361783008446e-06, "loss": 0.3039, "step": 18348 }, { "epoch": 0.84, "grad_norm": 0.42237216986654635, "learning_rate": 1.2663113766244094e-06, "loss": 0.1289, "step": 18349 }, { "epoch": 0.84, "grad_norm": 0.37579516416562303, "learning_rate": 1.2655867683034217e-06, "loss": 0.2729, "step": 18350 }, { "epoch": 0.84, "grad_norm": 0.28142624528551063, "learning_rate": 1.2648623533539262e-06, "loss": 0.218, "step": 18351 }, { "epoch": 0.84, "grad_norm": 0.6212086793523379, "learning_rate": 1.2641381317919542e-06, "loss": 0.2815, "step": 18352 }, { "epoch": 0.84, "grad_norm": 0.3515714150728949, "learning_rate": 1.263414103633549e-06, "loss": 0.252, "step": 18353 }, { "epoch": 0.84, "grad_norm": 1.1936057687721544, "learning_rate": 1.262690268894734e-06, "loss": 0.6659, "step": 18354 }, { "epoch": 0.84, "grad_norm": 0.3774875132235774, "learning_rate": 1.2619666275915366e-06, "loss": 0.2317, "step": 18355 }, { "epoch": 0.84, "grad_norm": 0.28050655471654656, "learning_rate": 1.2612431797399738e-06, "loss": 0.208, "step": 18356 }, { "epoch": 0.84, "grad_norm": 0.5790296303316192, "learning_rate": 1.260519925356064e-06, "loss": 0.2843, "step": 18357 }, { "epoch": 0.84, "grad_norm": 0.8369948805204668, "learning_rate": 1.2597968644558234e-06, "loss": 0.4782, "step": 18358 }, { "epoch": 0.84, "grad_norm": 0.28237020385287975, "learning_rate": 1.2590739970552534e-06, "loss": 0.2009, "step": 18359 }, { "epoch": 0.84, "grad_norm": 0.5793622160038471, "learning_rate": 1.2583513231703604e-06, "loss": 0.3698, "step": 18360 }, { "epoch": 0.84, "grad_norm": 1.1874080773087419, "learning_rate": 1.2576288428171467e-06, "loss": 0.4375, "step": 18361 }, { "epoch": 0.84, "grad_norm": 0.23865158244412135, "learning_rate": 1.2569065560116012e-06, "loss": 0.1534, "step": 18362 }, { "epoch": 0.84, "grad_norm": 0.2872731439473153, "learning_rate": 1.2561844627697205e-06, "loss": 0.2461, "step": 18363 }, { "epoch": 0.84, "grad_norm": 0.653137734886242, "learning_rate": 1.2554625631074846e-06, "loss": 0.3894, "step": 18364 }, { "epoch": 0.84, "grad_norm": 0.2940214831802127, "learning_rate": 1.2547408570408826e-06, "loss": 0.1825, "step": 18365 }, { "epoch": 0.84, "grad_norm": 1.2804171550079295, "learning_rate": 1.2540193445858883e-06, "loss": 0.7555, "step": 18366 }, { "epoch": 0.84, "grad_norm": 0.3872638429881968, "learning_rate": 1.253298025758477e-06, "loss": 0.2691, "step": 18367 }, { "epoch": 0.84, "grad_norm": 0.31454952072195236, "learning_rate": 1.252576900574618e-06, "loss": 0.1804, "step": 18368 }, { "epoch": 0.84, "grad_norm": 0.6171198312176669, "learning_rate": 1.251855969050272e-06, "loss": 0.3252, "step": 18369 }, { "epoch": 0.84, "grad_norm": 0.3328572402900212, "learning_rate": 1.2511352312014068e-06, "loss": 0.1682, "step": 18370 }, { "epoch": 0.84, "grad_norm": 0.35593740927673934, "learning_rate": 1.2504146870439726e-06, "loss": 0.2678, "step": 18371 }, { "epoch": 0.84, "grad_norm": 0.38895650973657475, "learning_rate": 1.2496943365939273e-06, "loss": 0.2735, "step": 18372 }, { "epoch": 0.84, "grad_norm": 1.6649132214142823, "learning_rate": 1.248974179867214e-06, "loss": 0.7678, "step": 18373 }, { "epoch": 0.84, "grad_norm": 0.34190150944456876, "learning_rate": 1.2482542168797772e-06, "loss": 0.2408, "step": 18374 }, { "epoch": 0.84, "grad_norm": 0.34079964623374936, "learning_rate": 1.2475344476475593e-06, "loss": 0.2487, "step": 18375 }, { "epoch": 0.84, "grad_norm": 0.35654738734408165, "learning_rate": 1.2468148721864904e-06, "loss": 0.1736, "step": 18376 }, { "epoch": 0.84, "grad_norm": 0.36699083278516226, "learning_rate": 1.2460954905125045e-06, "loss": 0.2786, "step": 18377 }, { "epoch": 0.84, "grad_norm": 1.127813046447214, "learning_rate": 1.245376302641529e-06, "loss": 0.5096, "step": 18378 }, { "epoch": 0.84, "grad_norm": 0.43460507228742423, "learning_rate": 1.244657308589482e-06, "loss": 0.3015, "step": 18379 }, { "epoch": 0.84, "grad_norm": 0.3490033485265696, "learning_rate": 1.2439385083722866e-06, "loss": 0.2437, "step": 18380 }, { "epoch": 0.84, "grad_norm": 0.7161084328908159, "learning_rate": 1.243219902005851e-06, "loss": 0.2815, "step": 18381 }, { "epoch": 0.84, "grad_norm": 0.2883059202100981, "learning_rate": 1.2425014895060871e-06, "loss": 0.1833, "step": 18382 }, { "epoch": 0.84, "grad_norm": 0.5917663070907673, "learning_rate": 1.2417832708888988e-06, "loss": 0.2576, "step": 18383 }, { "epoch": 0.84, "grad_norm": 0.39891384710206995, "learning_rate": 1.2410652461701899e-06, "loss": 0.3189, "step": 18384 }, { "epoch": 0.84, "grad_norm": 0.7750257887709735, "learning_rate": 1.2403474153658534e-06, "loss": 0.291, "step": 18385 }, { "epoch": 0.84, "grad_norm": 0.4226853018298562, "learning_rate": 1.239629778491781e-06, "loss": 0.2643, "step": 18386 }, { "epoch": 0.84, "grad_norm": 0.30198170067587116, "learning_rate": 1.2389123355638655e-06, "loss": 0.275, "step": 18387 }, { "epoch": 0.84, "grad_norm": 0.19785658811124057, "learning_rate": 1.2381950865979841e-06, "loss": 0.0726, "step": 18388 }, { "epoch": 0.84, "grad_norm": 0.43604604000047653, "learning_rate": 1.237478031610021e-06, "loss": 0.2834, "step": 18389 }, { "epoch": 0.84, "grad_norm": 0.5858892931742264, "learning_rate": 1.2367611706158467e-06, "loss": 0.4341, "step": 18390 }, { "epoch": 0.84, "grad_norm": 0.34141748132359667, "learning_rate": 1.236044503631333e-06, "loss": 0.2531, "step": 18391 }, { "epoch": 0.84, "grad_norm": 0.40263935702471865, "learning_rate": 1.2353280306723503e-06, "loss": 0.2555, "step": 18392 }, { "epoch": 0.84, "grad_norm": 0.747953255506472, "learning_rate": 1.234611751754755e-06, "loss": 0.3262, "step": 18393 }, { "epoch": 0.85, "grad_norm": 0.3850009499023764, "learning_rate": 1.233895666894408e-06, "loss": 0.193, "step": 18394 }, { "epoch": 0.85, "grad_norm": 0.26063635051246775, "learning_rate": 1.2331797761071618e-06, "loss": 0.2105, "step": 18395 }, { "epoch": 0.85, "grad_norm": 1.2837809213226825, "learning_rate": 1.2324640794088671e-06, "loss": 0.675, "step": 18396 }, { "epoch": 0.85, "grad_norm": 0.6673461970980067, "learning_rate": 1.2317485768153681e-06, "loss": 0.3818, "step": 18397 }, { "epoch": 0.85, "grad_norm": 0.3355873691697014, "learning_rate": 1.231033268342503e-06, "loss": 0.2015, "step": 18398 }, { "epoch": 0.85, "grad_norm": 0.38211933202847975, "learning_rate": 1.2303181540061083e-06, "loss": 0.2927, "step": 18399 }, { "epoch": 0.85, "grad_norm": 0.6422682583427372, "learning_rate": 1.2296032338220188e-06, "loss": 0.2135, "step": 18400 }, { "epoch": 0.85, "grad_norm": 0.3774542282538439, "learning_rate": 1.2288885078060619e-06, "loss": 0.1593, "step": 18401 }, { "epoch": 0.85, "grad_norm": 0.38730285842565143, "learning_rate": 1.2281739759740575e-06, "loss": 0.2825, "step": 18402 }, { "epoch": 0.85, "grad_norm": 0.3656620418719611, "learning_rate": 1.227459638341828e-06, "loss": 0.3124, "step": 18403 }, { "epoch": 0.85, "grad_norm": 0.36613403173972836, "learning_rate": 1.2267454949251877e-06, "loss": 0.0835, "step": 18404 }, { "epoch": 0.85, "grad_norm": 0.48136195962327094, "learning_rate": 1.2260315457399453e-06, "loss": 0.2718, "step": 18405 }, { "epoch": 0.85, "grad_norm": 0.3663269618951646, "learning_rate": 1.22531779080191e-06, "loss": 0.2322, "step": 18406 }, { "epoch": 0.85, "grad_norm": 0.36647094127241453, "learning_rate": 1.224604230126879e-06, "loss": 0.2558, "step": 18407 }, { "epoch": 0.85, "grad_norm": 0.5431615183956858, "learning_rate": 1.2238908637306534e-06, "loss": 0.2537, "step": 18408 }, { "epoch": 0.85, "grad_norm": 0.8529799629959973, "learning_rate": 1.2231776916290273e-06, "loss": 0.4398, "step": 18409 }, { "epoch": 0.85, "grad_norm": 0.3409475009270906, "learning_rate": 1.2224647138377854e-06, "loss": 0.1921, "step": 18410 }, { "epoch": 0.85, "grad_norm": 0.25114416065541095, "learning_rate": 1.2217519303727165e-06, "loss": 0.2237, "step": 18411 }, { "epoch": 0.85, "grad_norm": 1.4371853581217737, "learning_rate": 1.2210393412495958e-06, "loss": 0.439, "step": 18412 }, { "epoch": 0.85, "grad_norm": 0.6216958555918276, "learning_rate": 1.2203269464842071e-06, "loss": 0.3551, "step": 18413 }, { "epoch": 0.85, "grad_norm": 0.3996980767578418, "learning_rate": 1.219614746092318e-06, "loss": 0.2302, "step": 18414 }, { "epoch": 0.85, "grad_norm": 0.3464015156164392, "learning_rate": 1.2189027400896935e-06, "loss": 0.2825, "step": 18415 }, { "epoch": 0.85, "grad_norm": 0.26568681649238823, "learning_rate": 1.218190928492099e-06, "loss": 0.2043, "step": 18416 }, { "epoch": 0.85, "grad_norm": 0.6667343485944556, "learning_rate": 1.2174793113152928e-06, "loss": 0.2094, "step": 18417 }, { "epoch": 0.85, "grad_norm": 0.38563511906128456, "learning_rate": 1.2167678885750322e-06, "loss": 0.2642, "step": 18418 }, { "epoch": 0.85, "grad_norm": 0.8779205264619279, "learning_rate": 1.216056660287064e-06, "loss": 0.4627, "step": 18419 }, { "epoch": 0.85, "grad_norm": 0.3666020684536555, "learning_rate": 1.2153456264671337e-06, "loss": 0.2801, "step": 18420 }, { "epoch": 0.85, "grad_norm": 0.3011981537559982, "learning_rate": 1.2146347871309882e-06, "loss": 0.1666, "step": 18421 }, { "epoch": 0.85, "grad_norm": 0.4236458103987891, "learning_rate": 1.2139241422943582e-06, "loss": 0.2915, "step": 18422 }, { "epoch": 0.85, "grad_norm": 0.3151596465324451, "learning_rate": 1.213213691972981e-06, "loss": 0.2702, "step": 18423 }, { "epoch": 0.85, "grad_norm": 1.846776675659577, "learning_rate": 1.2125034361825805e-06, "loss": 0.2163, "step": 18424 }, { "epoch": 0.85, "grad_norm": 0.6079796256047143, "learning_rate": 1.2117933749388889e-06, "loss": 0.3397, "step": 18425 }, { "epoch": 0.85, "grad_norm": 0.3888254720613999, "learning_rate": 1.211083508257621e-06, "loss": 0.268, "step": 18426 }, { "epoch": 0.85, "grad_norm": 0.3544895414106576, "learning_rate": 1.2103738361544914e-06, "loss": 0.2573, "step": 18427 }, { "epoch": 0.85, "grad_norm": 0.24196975633846807, "learning_rate": 1.209664358645216e-06, "loss": 0.1442, "step": 18428 }, { "epoch": 0.85, "grad_norm": 0.41865785788101456, "learning_rate": 1.2089550757454948e-06, "loss": 0.2709, "step": 18429 }, { "epoch": 0.85, "grad_norm": 0.43089496441061653, "learning_rate": 1.2082459874710405e-06, "loss": 0.2354, "step": 18430 }, { "epoch": 0.85, "grad_norm": 0.3945539937173343, "learning_rate": 1.207537093837543e-06, "loss": 0.2673, "step": 18431 }, { "epoch": 0.85, "grad_norm": 0.4417466505795868, "learning_rate": 1.206828394860703e-06, "loss": 0.3009, "step": 18432 }, { "epoch": 0.85, "grad_norm": 0.9144225172149618, "learning_rate": 1.2061198905562043e-06, "loss": 0.5229, "step": 18433 }, { "epoch": 0.85, "grad_norm": 0.27560003886959195, "learning_rate": 1.2054115809397371e-06, "loss": 0.1815, "step": 18434 }, { "epoch": 0.85, "grad_norm": 0.2983885965929589, "learning_rate": 1.2047034660269818e-06, "loss": 0.2222, "step": 18435 }, { "epoch": 0.85, "grad_norm": 1.0792518302069471, "learning_rate": 1.203995545833614e-06, "loss": 0.4479, "step": 18436 }, { "epoch": 0.85, "grad_norm": 0.5819984334549985, "learning_rate": 1.2032878203753062e-06, "loss": 0.2647, "step": 18437 }, { "epoch": 0.85, "grad_norm": 0.34653831927836926, "learning_rate": 1.2025802896677297e-06, "loss": 0.2613, "step": 18438 }, { "epoch": 0.85, "grad_norm": 0.408647250261592, "learning_rate": 1.201872953726544e-06, "loss": 0.3144, "step": 18439 }, { "epoch": 0.85, "grad_norm": 0.2955337592511455, "learning_rate": 1.2011658125674141e-06, "loss": 0.078, "step": 18440 }, { "epoch": 0.85, "grad_norm": 0.4266059742784034, "learning_rate": 1.2004588662059913e-06, "loss": 0.2686, "step": 18441 }, { "epoch": 0.85, "grad_norm": 0.35470911800902205, "learning_rate": 1.1997521146579272e-06, "loss": 0.2912, "step": 18442 }, { "epoch": 0.85, "grad_norm": 0.6660419688271784, "learning_rate": 1.1990455579388715e-06, "loss": 0.2425, "step": 18443 }, { "epoch": 0.85, "grad_norm": 0.36518878123619075, "learning_rate": 1.198339196064463e-06, "loss": 0.2698, "step": 18444 }, { "epoch": 0.85, "grad_norm": 1.3555596696696948, "learning_rate": 1.1976330290503434e-06, "loss": 0.7061, "step": 18445 }, { "epoch": 0.85, "grad_norm": 0.33828814233024784, "learning_rate": 1.1969270569121406e-06, "loss": 0.2644, "step": 18446 }, { "epoch": 0.85, "grad_norm": 0.313220307387397, "learning_rate": 1.1962212796654927e-06, "loss": 0.1659, "step": 18447 }, { "epoch": 0.85, "grad_norm": 0.8882947173598601, "learning_rate": 1.1955156973260184e-06, "loss": 0.3488, "step": 18448 }, { "epoch": 0.85, "grad_norm": 0.7134463739868675, "learning_rate": 1.1948103099093422e-06, "loss": 0.3772, "step": 18449 }, { "epoch": 0.85, "grad_norm": 0.3530218438364439, "learning_rate": 1.1941051174310768e-06, "loss": 0.2048, "step": 18450 }, { "epoch": 0.85, "grad_norm": 0.4085928759120029, "learning_rate": 1.193400119906838e-06, "loss": 0.3069, "step": 18451 }, { "epoch": 0.85, "grad_norm": 0.23021038491267404, "learning_rate": 1.1926953173522337e-06, "loss": 0.1281, "step": 18452 }, { "epoch": 0.85, "grad_norm": 0.34971224361402575, "learning_rate": 1.1919907097828654e-06, "loss": 0.1986, "step": 18453 }, { "epoch": 0.85, "grad_norm": 0.37305292626635445, "learning_rate": 1.1912862972143325e-06, "loss": 0.2933, "step": 18454 }, { "epoch": 0.85, "grad_norm": 1.1901580456930732, "learning_rate": 1.1905820796622336e-06, "loss": 0.4686, "step": 18455 }, { "epoch": 0.85, "grad_norm": 0.2882285808753656, "learning_rate": 1.1898780571421554e-06, "loss": 0.1883, "step": 18456 }, { "epoch": 0.85, "grad_norm": 1.4396089483610623, "learning_rate": 1.1891742296696873e-06, "loss": 0.6281, "step": 18457 }, { "epoch": 0.85, "grad_norm": 0.42424674443129645, "learning_rate": 1.188470597260407e-06, "loss": 0.2773, "step": 18458 }, { "epoch": 0.85, "grad_norm": 0.2570280054579128, "learning_rate": 1.1877671599298957e-06, "loss": 0.202, "step": 18459 }, { "epoch": 0.85, "grad_norm": 0.46903292085253917, "learning_rate": 1.187063917693726e-06, "loss": 0.2065, "step": 18460 }, { "epoch": 0.85, "grad_norm": 1.3451293997278146, "learning_rate": 1.186360870567469e-06, "loss": 0.7321, "step": 18461 }, { "epoch": 0.85, "grad_norm": 0.2989824796331753, "learning_rate": 1.1856580185666878e-06, "loss": 0.2538, "step": 18462 }, { "epoch": 0.85, "grad_norm": 0.46328141452638744, "learning_rate": 1.1849553617069386e-06, "loss": 0.2562, "step": 18463 }, { "epoch": 0.85, "grad_norm": 1.0501533551359017, "learning_rate": 1.184252900003786e-06, "loss": 0.3843, "step": 18464 }, { "epoch": 0.85, "grad_norm": 0.3474449293072234, "learning_rate": 1.1835506334727754e-06, "loss": 0.2698, "step": 18465 }, { "epoch": 0.85, "grad_norm": 0.3659375824110046, "learning_rate": 1.1828485621294583e-06, "loss": 0.2465, "step": 18466 }, { "epoch": 0.85, "grad_norm": 0.39388991680878294, "learning_rate": 1.1821466859893738e-06, "loss": 0.1863, "step": 18467 }, { "epoch": 0.85, "grad_norm": 0.3827593734476382, "learning_rate": 1.1814450050680626e-06, "loss": 0.2705, "step": 18468 }, { "epoch": 0.85, "grad_norm": 1.267487868262863, "learning_rate": 1.1807435193810623e-06, "loss": 0.3009, "step": 18469 }, { "epoch": 0.85, "grad_norm": 0.35637710741488415, "learning_rate": 1.1800422289438984e-06, "loss": 0.2851, "step": 18470 }, { "epoch": 0.85, "grad_norm": 0.3520532584793097, "learning_rate": 1.1793411337720994e-06, "loss": 0.2447, "step": 18471 }, { "epoch": 0.85, "grad_norm": 0.49041255804908956, "learning_rate": 1.1786402338811853e-06, "loss": 0.2563, "step": 18472 }, { "epoch": 0.85, "grad_norm": 0.2924966326105871, "learning_rate": 1.1779395292866746e-06, "loss": 0.1689, "step": 18473 }, { "epoch": 0.85, "grad_norm": 0.35869819876629216, "learning_rate": 1.1772390200040817e-06, "loss": 0.2769, "step": 18474 }, { "epoch": 0.85, "grad_norm": 0.5236479125020699, "learning_rate": 1.176538706048911e-06, "loss": 0.3476, "step": 18475 }, { "epoch": 0.85, "grad_norm": 0.7615581066627264, "learning_rate": 1.1758385874366696e-06, "loss": 0.2677, "step": 18476 }, { "epoch": 0.85, "grad_norm": 0.34906258249323385, "learning_rate": 1.1751386641828567e-06, "loss": 0.259, "step": 18477 }, { "epoch": 0.85, "grad_norm": 0.4076652830941242, "learning_rate": 1.1744389363029707e-06, "loss": 0.2771, "step": 18478 }, { "epoch": 0.85, "grad_norm": 0.1865067892014689, "learning_rate": 1.1737394038124994e-06, "loss": 0.099, "step": 18479 }, { "epoch": 0.85, "grad_norm": 0.3854405676527759, "learning_rate": 1.1730400667269282e-06, "loss": 0.2569, "step": 18480 }, { "epoch": 0.85, "grad_norm": 1.659534759891341, "learning_rate": 1.1723409250617456e-06, "loss": 0.5704, "step": 18481 }, { "epoch": 0.85, "grad_norm": 0.3917114573579003, "learning_rate": 1.1716419788324252e-06, "loss": 0.251, "step": 18482 }, { "epoch": 0.85, "grad_norm": 0.3418924829861694, "learning_rate": 1.170943228054444e-06, "loss": 0.2494, "step": 18483 }, { "epoch": 0.85, "grad_norm": 1.69043375622048, "learning_rate": 1.1702446727432681e-06, "loss": 0.5016, "step": 18484 }, { "epoch": 0.85, "grad_norm": 0.30023143315625417, "learning_rate": 1.1695463129143647e-06, "loss": 0.2242, "step": 18485 }, { "epoch": 0.85, "grad_norm": 0.2694071417644677, "learning_rate": 1.1688481485831982e-06, "loss": 0.2039, "step": 18486 }, { "epoch": 0.85, "grad_norm": 0.48177585407850515, "learning_rate": 1.1681501797652194e-06, "loss": 0.3259, "step": 18487 }, { "epoch": 0.85, "grad_norm": 0.7888364205142923, "learning_rate": 1.1674524064758851e-06, "loss": 0.3538, "step": 18488 }, { "epoch": 0.85, "grad_norm": 0.5074515066881172, "learning_rate": 1.1667548287306373e-06, "loss": 0.1677, "step": 18489 }, { "epoch": 0.85, "grad_norm": 0.3374891337953125, "learning_rate": 1.1660574465449293e-06, "loss": 0.2741, "step": 18490 }, { "epoch": 0.85, "grad_norm": 0.4321489689657549, "learning_rate": 1.165360259934194e-06, "loss": 0.3, "step": 18491 }, { "epoch": 0.85, "grad_norm": 0.3102724751799246, "learning_rate": 1.164663268913866e-06, "loss": 0.18, "step": 18492 }, { "epoch": 0.85, "grad_norm": 0.4619999679870109, "learning_rate": 1.1639664734993783e-06, "loss": 0.2922, "step": 18493 }, { "epoch": 0.85, "grad_norm": 0.3656184367885025, "learning_rate": 1.1632698737061553e-06, "loss": 0.2843, "step": 18494 }, { "epoch": 0.85, "grad_norm": 0.303024305594965, "learning_rate": 1.162573469549624e-06, "loss": 0.1751, "step": 18495 }, { "epoch": 0.85, "grad_norm": 1.486096433897814, "learning_rate": 1.1618772610451956e-06, "loss": 0.4545, "step": 18496 }, { "epoch": 0.85, "grad_norm": 1.2467100525061277, "learning_rate": 1.1611812482082862e-06, "loss": 0.7977, "step": 18497 }, { "epoch": 0.85, "grad_norm": 0.27766831295425964, "learning_rate": 1.1604854310543068e-06, "loss": 0.242, "step": 18498 }, { "epoch": 0.85, "grad_norm": 0.34487378670716096, "learning_rate": 1.1597898095986582e-06, "loss": 0.1768, "step": 18499 }, { "epoch": 0.85, "grad_norm": 0.5153320010009277, "learning_rate": 1.159094383856746e-06, "loss": 0.291, "step": 18500 }, { "epoch": 0.85, "grad_norm": 0.33930731204218867, "learning_rate": 1.15839915384396e-06, "loss": 0.2343, "step": 18501 }, { "epoch": 0.85, "grad_norm": 0.37880821878352244, "learning_rate": 1.1577041195756954e-06, "loss": 0.2204, "step": 18502 }, { "epoch": 0.85, "grad_norm": 1.1606668863354124, "learning_rate": 1.1570092810673417e-06, "loss": 0.5956, "step": 18503 }, { "epoch": 0.85, "grad_norm": 0.4004540540598084, "learning_rate": 1.156314638334277e-06, "loss": 0.2616, "step": 18504 }, { "epoch": 0.85, "grad_norm": 0.45435963373724036, "learning_rate": 1.1556201913918852e-06, "loss": 0.2604, "step": 18505 }, { "epoch": 0.85, "grad_norm": 0.24849755773060228, "learning_rate": 1.1549259402555336e-06, "loss": 0.1974, "step": 18506 }, { "epoch": 0.85, "grad_norm": 0.6358200294507124, "learning_rate": 1.1542318849406008e-06, "loss": 0.294, "step": 18507 }, { "epoch": 0.85, "grad_norm": 0.4442642138296424, "learning_rate": 1.1535380254624485e-06, "loss": 0.3227, "step": 18508 }, { "epoch": 0.85, "grad_norm": 0.3333682951005025, "learning_rate": 1.152844361836435e-06, "loss": 0.258, "step": 18509 }, { "epoch": 0.85, "grad_norm": 0.43541507800716234, "learning_rate": 1.1521508940779214e-06, "loss": 0.2654, "step": 18510 }, { "epoch": 0.85, "grad_norm": 0.45583122652456276, "learning_rate": 1.1514576222022589e-06, "loss": 0.3023, "step": 18511 }, { "epoch": 0.85, "grad_norm": 0.25991239478952716, "learning_rate": 1.1507645462247985e-06, "loss": 0.0945, "step": 18512 }, { "epoch": 0.85, "grad_norm": 0.4177212449984254, "learning_rate": 1.15007166616088e-06, "loss": 0.2316, "step": 18513 }, { "epoch": 0.85, "grad_norm": 0.3317148581622481, "learning_rate": 1.149378982025845e-06, "loss": 0.291, "step": 18514 }, { "epoch": 0.85, "grad_norm": 1.1109337001691912, "learning_rate": 1.1486864938350317e-06, "loss": 0.4646, "step": 18515 }, { "epoch": 0.85, "grad_norm": 0.41470232740767804, "learning_rate": 1.147994201603766e-06, "loss": 0.305, "step": 18516 }, { "epoch": 0.85, "grad_norm": 0.5199809022853263, "learning_rate": 1.1473021053473787e-06, "loss": 0.3363, "step": 18517 }, { "epoch": 0.85, "grad_norm": 0.2259227779112923, "learning_rate": 1.146610205081189e-06, "loss": 0.1725, "step": 18518 }, { "epoch": 0.85, "grad_norm": 0.519393208762347, "learning_rate": 1.1459185008205154e-06, "loss": 0.2577, "step": 18519 }, { "epoch": 0.85, "grad_norm": 0.6156440578777506, "learning_rate": 1.1452269925806757e-06, "loss": 0.3412, "step": 18520 }, { "epoch": 0.85, "grad_norm": 0.4908314101488441, "learning_rate": 1.1445356803769736e-06, "loss": 0.3236, "step": 18521 }, { "epoch": 0.85, "grad_norm": 0.3056714739910209, "learning_rate": 1.1438445642247177e-06, "loss": 0.1986, "step": 18522 }, { "epoch": 0.85, "grad_norm": 0.5222418531004269, "learning_rate": 1.1431536441392045e-06, "loss": 0.3577, "step": 18523 }, { "epoch": 0.85, "grad_norm": 0.3275356557577568, "learning_rate": 1.1424629201357373e-06, "loss": 0.1731, "step": 18524 }, { "epoch": 0.85, "grad_norm": 0.31921402795743786, "learning_rate": 1.1417723922296008e-06, "loss": 0.0656, "step": 18525 }, { "epoch": 0.85, "grad_norm": 0.2915461046143253, "learning_rate": 1.1410820604360895e-06, "loss": 0.2662, "step": 18526 }, { "epoch": 0.85, "grad_norm": 0.6702393546228886, "learning_rate": 1.1403919247704799e-06, "loss": 0.363, "step": 18527 }, { "epoch": 0.85, "grad_norm": 0.7699350196582578, "learning_rate": 1.139701985248055e-06, "loss": 0.201, "step": 18528 }, { "epoch": 0.85, "grad_norm": 0.3486030571751547, "learning_rate": 1.1390122418840899e-06, "loss": 0.3031, "step": 18529 }, { "epoch": 0.85, "grad_norm": 0.5443888738535169, "learning_rate": 1.1383226946938508e-06, "loss": 0.3448, "step": 18530 }, { "epoch": 0.85, "grad_norm": 0.23996665621455848, "learning_rate": 1.137633343692609e-06, "loss": 0.1162, "step": 18531 }, { "epoch": 0.85, "grad_norm": 0.6117798410059235, "learning_rate": 1.1369441888956212e-06, "loss": 0.3571, "step": 18532 }, { "epoch": 0.85, "grad_norm": 0.5651548999230909, "learning_rate": 1.1362552303181473e-06, "loss": 0.3046, "step": 18533 }, { "epoch": 0.85, "grad_norm": 0.4433352793103084, "learning_rate": 1.1355664679754408e-06, "loss": 0.2676, "step": 18534 }, { "epoch": 0.85, "grad_norm": 0.37247945709177277, "learning_rate": 1.1348779018827472e-06, "loss": 0.2424, "step": 18535 }, { "epoch": 0.85, "grad_norm": 0.5865115031748358, "learning_rate": 1.1341895320553132e-06, "loss": 0.2615, "step": 18536 }, { "epoch": 0.85, "grad_norm": 0.31227244792105735, "learning_rate": 1.1335013585083797e-06, "loss": 0.2259, "step": 18537 }, { "epoch": 0.85, "grad_norm": 0.3225702320974326, "learning_rate": 1.1328133812571784e-06, "loss": 0.2181, "step": 18538 }, { "epoch": 0.85, "grad_norm": 0.6715692697466935, "learning_rate": 1.1321256003169445e-06, "loss": 0.392, "step": 18539 }, { "epoch": 0.85, "grad_norm": 0.575511639653504, "learning_rate": 1.131438015702898e-06, "loss": 0.316, "step": 18540 }, { "epoch": 0.85, "grad_norm": 0.4264359841903588, "learning_rate": 1.1307506274302715e-06, "loss": 0.2303, "step": 18541 }, { "epoch": 0.85, "grad_norm": 0.3889750839928973, "learning_rate": 1.1300634355142748e-06, "loss": 0.2894, "step": 18542 }, { "epoch": 0.85, "grad_norm": 0.2564367414375854, "learning_rate": 1.1293764399701269e-06, "loss": 0.1589, "step": 18543 }, { "epoch": 0.85, "grad_norm": 0.3522245713995217, "learning_rate": 1.1286896408130333e-06, "loss": 0.2197, "step": 18544 }, { "epoch": 0.85, "grad_norm": 0.3890666027816536, "learning_rate": 1.1280030380582007e-06, "loss": 0.2945, "step": 18545 }, { "epoch": 0.85, "grad_norm": 0.9348965771651383, "learning_rate": 1.1273166317208317e-06, "loss": 0.416, "step": 18546 }, { "epoch": 0.85, "grad_norm": 0.36063396892950506, "learning_rate": 1.1266304218161195e-06, "loss": 0.2831, "step": 18547 }, { "epoch": 0.85, "grad_norm": 0.9122101632942758, "learning_rate": 1.1259444083592585e-06, "loss": 0.2347, "step": 18548 }, { "epoch": 0.85, "grad_norm": 0.21611097335453472, "learning_rate": 1.1252585913654347e-06, "loss": 0.1902, "step": 18549 }, { "epoch": 0.85, "grad_norm": 0.4079413680629914, "learning_rate": 1.124572970849831e-06, "loss": 0.2586, "step": 18550 }, { "epoch": 0.85, "grad_norm": 0.7992349647426591, "learning_rate": 1.12388754682763e-06, "loss": 0.2708, "step": 18551 }, { "epoch": 0.85, "grad_norm": 0.6392882487613913, "learning_rate": 1.1232023193140018e-06, "loss": 0.364, "step": 18552 }, { "epoch": 0.85, "grad_norm": 0.3373874855017208, "learning_rate": 1.1225172883241187e-06, "loss": 0.2665, "step": 18553 }, { "epoch": 0.85, "grad_norm": 0.5934688049772899, "learning_rate": 1.1218324538731462e-06, "loss": 0.2202, "step": 18554 }, { "epoch": 0.85, "grad_norm": 0.4488962022313426, "learning_rate": 1.121147815976248e-06, "loss": 0.2197, "step": 18555 }, { "epoch": 0.85, "grad_norm": 0.4192671480643173, "learning_rate": 1.1204633746485806e-06, "loss": 0.3262, "step": 18556 }, { "epoch": 0.85, "grad_norm": 0.2931286517846229, "learning_rate": 1.1197791299052907e-06, "loss": 0.2039, "step": 18557 }, { "epoch": 0.85, "grad_norm": 0.7557320222532726, "learning_rate": 1.1190950817615375e-06, "loss": 0.3379, "step": 18558 }, { "epoch": 0.85, "grad_norm": 0.41521819051202913, "learning_rate": 1.1184112302324568e-06, "loss": 0.2919, "step": 18559 }, { "epoch": 0.85, "grad_norm": 1.415985480299404, "learning_rate": 1.1177275753331928e-06, "loss": 0.5429, "step": 18560 }, { "epoch": 0.85, "grad_norm": 0.35862678103296897, "learning_rate": 1.1170441170788782e-06, "loss": 0.2417, "step": 18561 }, { "epoch": 0.85, "grad_norm": 0.36005089519746364, "learning_rate": 1.116360855484645e-06, "loss": 0.2496, "step": 18562 }, { "epoch": 0.85, "grad_norm": 0.3429337690560491, "learning_rate": 1.1156777905656224e-06, "loss": 0.2259, "step": 18563 }, { "epoch": 0.85, "grad_norm": 0.5491816733173454, "learning_rate": 1.1149949223369282e-06, "loss": 0.2545, "step": 18564 }, { "epoch": 0.85, "grad_norm": 0.29465767492852074, "learning_rate": 1.1143122508136861e-06, "loss": 0.2559, "step": 18565 }, { "epoch": 0.85, "grad_norm": 0.5897542357407781, "learning_rate": 1.1136297760110038e-06, "loss": 0.3093, "step": 18566 }, { "epoch": 0.85, "grad_norm": 0.6711588156960568, "learning_rate": 1.1129474979439937e-06, "loss": 0.2528, "step": 18567 }, { "epoch": 0.85, "grad_norm": 0.3843502712966087, "learning_rate": 1.1122654166277624e-06, "loss": 0.2674, "step": 18568 }, { "epoch": 0.85, "grad_norm": 0.2714603605856902, "learning_rate": 1.111583532077407e-06, "loss": 0.1984, "step": 18569 }, { "epoch": 0.85, "grad_norm": 0.4865320618229059, "learning_rate": 1.1109018443080256e-06, "loss": 0.2508, "step": 18570 }, { "epoch": 0.85, "grad_norm": 0.39454255962482615, "learning_rate": 1.1102203533347089e-06, "loss": 0.2685, "step": 18571 }, { "epoch": 0.85, "grad_norm": 0.9876325015848909, "learning_rate": 1.1095390591725485e-06, "loss": 0.4191, "step": 18572 }, { "epoch": 0.85, "grad_norm": 0.3488915733101975, "learning_rate": 1.1088579618366235e-06, "loss": 0.2918, "step": 18573 }, { "epoch": 0.85, "grad_norm": 0.3382613753915631, "learning_rate": 1.1081770613420107e-06, "loss": 0.2211, "step": 18574 }, { "epoch": 0.85, "grad_norm": 0.40911927874198656, "learning_rate": 1.1074963577037912e-06, "loss": 0.1648, "step": 18575 }, { "epoch": 0.85, "grad_norm": 0.7227061940851612, "learning_rate": 1.1068158509370309e-06, "loss": 0.4664, "step": 18576 }, { "epoch": 0.85, "grad_norm": 0.28891944316542145, "learning_rate": 1.1061355410567965e-06, "loss": 0.2013, "step": 18577 }, { "epoch": 0.85, "grad_norm": 0.4777330129494525, "learning_rate": 1.1054554280781483e-06, "loss": 0.3262, "step": 18578 }, { "epoch": 0.85, "grad_norm": 1.4894182154734759, "learning_rate": 1.1047755120161441e-06, "loss": 0.3644, "step": 18579 }, { "epoch": 0.85, "grad_norm": 0.3323684411387164, "learning_rate": 1.1040957928858386e-06, "loss": 0.1797, "step": 18580 }, { "epoch": 0.85, "grad_norm": 0.24338116117250558, "learning_rate": 1.1034162707022765e-06, "loss": 0.2107, "step": 18581 }, { "epoch": 0.85, "grad_norm": 1.2970215337498758, "learning_rate": 1.1027369454805058e-06, "loss": 0.6948, "step": 18582 }, { "epoch": 0.85, "grad_norm": 0.33714501958101595, "learning_rate": 1.1020578172355611e-06, "loss": 0.1935, "step": 18583 }, { "epoch": 0.85, "grad_norm": 0.6550779644098745, "learning_rate": 1.1013788859824804e-06, "loss": 0.3527, "step": 18584 }, { "epoch": 0.85, "grad_norm": 0.37598662860874915, "learning_rate": 1.1007001517362969e-06, "loss": 0.3118, "step": 18585 }, { "epoch": 0.85, "grad_norm": 0.34706122862601296, "learning_rate": 1.1000216145120324e-06, "loss": 0.2601, "step": 18586 }, { "epoch": 0.85, "grad_norm": 0.43078861772849436, "learning_rate": 1.0993432743247123e-06, "loss": 0.0844, "step": 18587 }, { "epoch": 0.85, "grad_norm": 0.48965528302910083, "learning_rate": 1.0986651311893525e-06, "loss": 0.3587, "step": 18588 }, { "epoch": 0.85, "grad_norm": 0.27665785871164866, "learning_rate": 1.09798718512097e-06, "loss": 0.2205, "step": 18589 }, { "epoch": 0.85, "grad_norm": 0.5931830683625802, "learning_rate": 1.0973094361345694e-06, "loss": 0.2051, "step": 18590 }, { "epoch": 0.85, "grad_norm": 0.9625867285124854, "learning_rate": 1.0966318842451596e-06, "loss": 0.3589, "step": 18591 }, { "epoch": 0.85, "grad_norm": 0.42482197004674255, "learning_rate": 1.0959545294677366e-06, "loss": 0.2463, "step": 18592 }, { "epoch": 0.85, "grad_norm": 0.29328785612137515, "learning_rate": 1.0952773718172982e-06, "loss": 0.2328, "step": 18593 }, { "epoch": 0.85, "grad_norm": 0.8097767129126426, "learning_rate": 1.0946004113088381e-06, "loss": 0.515, "step": 18594 }, { "epoch": 0.85, "grad_norm": 0.38620183909029965, "learning_rate": 1.09392364795734e-06, "loss": 0.246, "step": 18595 }, { "epoch": 0.85, "grad_norm": 0.36582631139372745, "learning_rate": 1.0932470817777884e-06, "loss": 0.184, "step": 18596 }, { "epoch": 0.85, "grad_norm": 0.3932779388703519, "learning_rate": 1.0925707127851648e-06, "loss": 0.2911, "step": 18597 }, { "epoch": 0.85, "grad_norm": 0.34993401479634867, "learning_rate": 1.0918945409944382e-06, "loss": 0.2561, "step": 18598 }, { "epoch": 0.85, "grad_norm": 1.3545386966927844, "learning_rate": 1.0912185664205822e-06, "loss": 0.4351, "step": 18599 }, { "epoch": 0.85, "grad_norm": 0.4910395521720643, "learning_rate": 1.0905427890785569e-06, "loss": 0.2809, "step": 18600 }, { "epoch": 0.85, "grad_norm": 0.2736428517049694, "learning_rate": 1.0898672089833307e-06, "loss": 0.2591, "step": 18601 }, { "epoch": 0.85, "grad_norm": 0.45646161190044654, "learning_rate": 1.089191826149858e-06, "loss": 0.2671, "step": 18602 }, { "epoch": 0.85, "grad_norm": 0.40313714418714025, "learning_rate": 1.088516640593087e-06, "loss": 0.0856, "step": 18603 }, { "epoch": 0.85, "grad_norm": 0.37175668213173024, "learning_rate": 1.087841652327969e-06, "loss": 0.2435, "step": 18604 }, { "epoch": 0.85, "grad_norm": 0.41340277511544443, "learning_rate": 1.0871668613694465e-06, "loss": 0.2999, "step": 18605 }, { "epoch": 0.85, "grad_norm": 0.7825194010053912, "learning_rate": 1.086492267732462e-06, "loss": 0.3346, "step": 18606 }, { "epoch": 0.85, "grad_norm": 0.3235395377144184, "learning_rate": 1.0858178714319457e-06, "loss": 0.2616, "step": 18607 }, { "epoch": 0.85, "grad_norm": 0.9070543684926277, "learning_rate": 1.0851436724828323e-06, "loss": 0.5117, "step": 18608 }, { "epoch": 0.85, "grad_norm": 0.22887653680775552, "learning_rate": 1.0844696709000435e-06, "loss": 0.1607, "step": 18609 }, { "epoch": 0.85, "grad_norm": 0.4379293003026843, "learning_rate": 1.0837958666985038e-06, "loss": 0.2256, "step": 18610 }, { "epoch": 0.85, "grad_norm": 0.6428478315502666, "learning_rate": 1.0831222598931312e-06, "loss": 0.3397, "step": 18611 }, { "epoch": 0.86, "grad_norm": 0.3450514052541221, "learning_rate": 1.082448850498836e-06, "loss": 0.2865, "step": 18612 }, { "epoch": 0.86, "grad_norm": 0.40001107987512025, "learning_rate": 1.0817756385305278e-06, "loss": 0.1969, "step": 18613 }, { "epoch": 0.86, "grad_norm": 0.563998861088998, "learning_rate": 1.0811026240031142e-06, "loss": 0.3492, "step": 18614 }, { "epoch": 0.86, "grad_norm": 0.38194266795406717, "learning_rate": 1.08042980693149e-06, "loss": 0.1906, "step": 18615 }, { "epoch": 0.86, "grad_norm": 0.3059251536968794, "learning_rate": 1.0797571873305557e-06, "loss": 0.1833, "step": 18616 }, { "epoch": 0.86, "grad_norm": 0.3729300571650092, "learning_rate": 1.079084765215196e-06, "loss": 0.3221, "step": 18617 }, { "epoch": 0.86, "grad_norm": 0.7939708432623358, "learning_rate": 1.0784125406003044e-06, "loss": 0.3815, "step": 18618 }, { "epoch": 0.86, "grad_norm": 0.3226179182364412, "learning_rate": 1.077740513500759e-06, "loss": 0.206, "step": 18619 }, { "epoch": 0.86, "grad_norm": 0.494084465689211, "learning_rate": 1.0770686839314415e-06, "loss": 0.3778, "step": 18620 }, { "epoch": 0.86, "grad_norm": 0.2610899797124783, "learning_rate": 1.076397051907222e-06, "loss": 0.2062, "step": 18621 }, { "epoch": 0.86, "grad_norm": 0.36609197532263715, "learning_rate": 1.0757256174429686e-06, "loss": 0.1829, "step": 18622 }, { "epoch": 0.86, "grad_norm": 1.2806371649262305, "learning_rate": 1.075054380553552e-06, "loss": 0.4958, "step": 18623 }, { "epoch": 0.86, "grad_norm": 0.5224213459431699, "learning_rate": 1.0743833412538275e-06, "loss": 0.3136, "step": 18624 }, { "epoch": 0.86, "grad_norm": 0.33866442939322916, "learning_rate": 1.0737124995586556e-06, "loss": 0.2536, "step": 18625 }, { "epoch": 0.86, "grad_norm": 0.6136305449393494, "learning_rate": 1.0730418554828836e-06, "loss": 0.2376, "step": 18626 }, { "epoch": 0.86, "grad_norm": 0.2755812819570444, "learning_rate": 1.0723714090413607e-06, "loss": 0.1539, "step": 18627 }, { "epoch": 0.86, "grad_norm": 0.4479001748828852, "learning_rate": 1.0717011602489324e-06, "loss": 0.2721, "step": 18628 }, { "epoch": 0.86, "grad_norm": 0.3321924008905497, "learning_rate": 1.071031109120433e-06, "loss": 0.2413, "step": 18629 }, { "epoch": 0.86, "grad_norm": 0.6372198850325546, "learning_rate": 1.0703612556706988e-06, "loss": 0.4011, "step": 18630 }, { "epoch": 0.86, "grad_norm": 0.588652120811114, "learning_rate": 1.0696915999145629e-06, "loss": 0.304, "step": 18631 }, { "epoch": 0.86, "grad_norm": 0.3489001048349934, "learning_rate": 1.0690221418668444e-06, "loss": 0.2321, "step": 18632 }, { "epoch": 0.86, "grad_norm": 0.26724561066474967, "learning_rate": 1.0683528815423705e-06, "loss": 0.1756, "step": 18633 }, { "epoch": 0.86, "grad_norm": 0.6235706838075445, "learning_rate": 1.0676838189559524e-06, "loss": 0.3158, "step": 18634 }, { "epoch": 0.86, "grad_norm": 0.46316771451345295, "learning_rate": 1.0670149541224085e-06, "loss": 0.2717, "step": 18635 }, { "epoch": 0.86, "grad_norm": 0.4888647477409954, "learning_rate": 1.066346287056541e-06, "loss": 0.2817, "step": 18636 }, { "epoch": 0.86, "grad_norm": 0.34003879596702186, "learning_rate": 1.0656778177731597e-06, "loss": 0.2727, "step": 18637 }, { "epoch": 0.86, "grad_norm": 0.6221216111175275, "learning_rate": 1.0650095462870602e-06, "loss": 0.3265, "step": 18638 }, { "epoch": 0.86, "grad_norm": 0.1710092640478974, "learning_rate": 1.064341472613033e-06, "loss": 0.0751, "step": 18639 }, { "epoch": 0.86, "grad_norm": 0.3756051190254683, "learning_rate": 1.0636735967658785e-06, "loss": 0.2493, "step": 18640 }, { "epoch": 0.86, "grad_norm": 0.4166444089144006, "learning_rate": 1.0630059187603748e-06, "loss": 0.306, "step": 18641 }, { "epoch": 0.86, "grad_norm": 0.6917105693781808, "learning_rate": 1.0623384386113088e-06, "loss": 0.3175, "step": 18642 }, { "epoch": 0.86, "grad_norm": 0.434312006734094, "learning_rate": 1.0616711563334537e-06, "loss": 0.2624, "step": 18643 }, { "epoch": 0.86, "grad_norm": 0.5482271338488345, "learning_rate": 1.0610040719415838e-06, "loss": 0.3468, "step": 18644 }, { "epoch": 0.86, "grad_norm": 0.2673379206838608, "learning_rate": 1.0603371854504696e-06, "loss": 0.1656, "step": 18645 }, { "epoch": 0.86, "grad_norm": 0.7043577054954441, "learning_rate": 1.0596704968748727e-06, "loss": 0.2791, "step": 18646 }, { "epoch": 0.86, "grad_norm": 0.41984829674476787, "learning_rate": 1.059004006229555e-06, "loss": 0.3107, "step": 18647 }, { "epoch": 0.86, "grad_norm": 0.3777457374221175, "learning_rate": 1.0583377135292728e-06, "loss": 0.2592, "step": 18648 }, { "epoch": 0.86, "grad_norm": 0.36622885453214377, "learning_rate": 1.0576716187887726e-06, "loss": 0.1565, "step": 18649 }, { "epoch": 0.86, "grad_norm": 0.40042926644571364, "learning_rate": 1.057005722022807e-06, "loss": 0.2925, "step": 18650 }, { "epoch": 0.86, "grad_norm": 0.51386520284655, "learning_rate": 1.056340023246113e-06, "loss": 0.3168, "step": 18651 }, { "epoch": 0.86, "grad_norm": 0.3774279752829283, "learning_rate": 1.055674522473431e-06, "loss": 0.2032, "step": 18652 }, { "epoch": 0.86, "grad_norm": 0.3111044716345392, "learning_rate": 1.0550092197194939e-06, "loss": 0.2788, "step": 18653 }, { "epoch": 0.86, "grad_norm": 0.4923543900579858, "learning_rate": 1.054344114999034e-06, "loss": 0.2927, "step": 18654 }, { "epoch": 0.86, "grad_norm": 0.7031399097394464, "learning_rate": 1.053679208326771e-06, "loss": 0.2116, "step": 18655 }, { "epoch": 0.86, "grad_norm": 0.2990698927859586, "learning_rate": 1.0530144997174275e-06, "loss": 0.2706, "step": 18656 }, { "epoch": 0.86, "grad_norm": 1.3921179864236217, "learning_rate": 1.0523499891857226e-06, "loss": 0.5001, "step": 18657 }, { "epoch": 0.86, "grad_norm": 0.4204023472048564, "learning_rate": 1.0516856767463624e-06, "loss": 0.216, "step": 18658 }, { "epoch": 0.86, "grad_norm": 0.5742226738943116, "learning_rate": 1.0510215624140596e-06, "loss": 0.3333, "step": 18659 }, { "epoch": 0.86, "grad_norm": 0.23166906659247136, "learning_rate": 1.0503576462035113e-06, "loss": 0.2067, "step": 18660 }, { "epoch": 0.86, "grad_norm": 0.5574536246054336, "learning_rate": 1.0496939281294193e-06, "loss": 0.3572, "step": 18661 }, { "epoch": 0.86, "grad_norm": 0.4412555692493903, "learning_rate": 1.0490304082064795e-06, "loss": 0.2066, "step": 18662 }, { "epoch": 0.86, "grad_norm": 0.6537779637516484, "learning_rate": 1.0483670864493777e-06, "loss": 0.3094, "step": 18663 }, { "epoch": 0.86, "grad_norm": 0.35007260553789427, "learning_rate": 1.0477039628728002e-06, "loss": 0.2793, "step": 18664 }, { "epoch": 0.86, "grad_norm": 0.3365208261101066, "learning_rate": 1.0470410374914286e-06, "loss": 0.1929, "step": 18665 }, { "epoch": 0.86, "grad_norm": 0.28270174219138705, "learning_rate": 1.046378310319942e-06, "loss": 0.2013, "step": 18666 }, { "epoch": 0.86, "grad_norm": 1.659825124549638, "learning_rate": 1.0457157813730102e-06, "loss": 0.6363, "step": 18667 }, { "epoch": 0.86, "grad_norm": 0.24674942813648695, "learning_rate": 1.0450534506652987e-06, "loss": 0.2024, "step": 18668 }, { "epoch": 0.86, "grad_norm": 0.7016017250668796, "learning_rate": 1.0443913182114717e-06, "loss": 0.3626, "step": 18669 }, { "epoch": 0.86, "grad_norm": 0.7889914087559371, "learning_rate": 1.0437293840261908e-06, "loss": 0.3928, "step": 18670 }, { "epoch": 0.86, "grad_norm": 0.23909162718263413, "learning_rate": 1.0430676481241108e-06, "loss": 0.1554, "step": 18671 }, { "epoch": 0.86, "grad_norm": 0.3612949786482525, "learning_rate": 1.042406110519878e-06, "loss": 0.2979, "step": 18672 }, { "epoch": 0.86, "grad_norm": 0.39391885551147027, "learning_rate": 1.0417447712281403e-06, "loss": 0.2506, "step": 18673 }, { "epoch": 0.86, "grad_norm": 0.42370122899481627, "learning_rate": 1.0410836302635418e-06, "loss": 0.2788, "step": 18674 }, { "epoch": 0.86, "grad_norm": 0.7576182433128071, "learning_rate": 1.0404226876407142e-06, "loss": 0.2873, "step": 18675 }, { "epoch": 0.86, "grad_norm": 0.3436602279063442, "learning_rate": 1.0397619433742955e-06, "loss": 0.2553, "step": 18676 }, { "epoch": 0.86, "grad_norm": 0.4316210695604483, "learning_rate": 1.03910139747891e-06, "loss": 0.2468, "step": 18677 }, { "epoch": 0.86, "grad_norm": 0.49456526780372245, "learning_rate": 1.0384410499691821e-06, "loss": 0.156, "step": 18678 }, { "epoch": 0.86, "grad_norm": 0.39545731892894964, "learning_rate": 1.037780900859735e-06, "loss": 0.1928, "step": 18679 }, { "epoch": 0.86, "grad_norm": 0.40266402838437787, "learning_rate": 1.037120950165178e-06, "loss": 0.282, "step": 18680 }, { "epoch": 0.86, "grad_norm": 0.34984594338121056, "learning_rate": 1.036461197900126e-06, "loss": 0.2542, "step": 18681 }, { "epoch": 0.86, "grad_norm": 0.8954710541362723, "learning_rate": 1.0358016440791818e-06, "loss": 0.4169, "step": 18682 }, { "epoch": 0.86, "grad_norm": 0.39294471400605707, "learning_rate": 1.0351422887169515e-06, "loss": 0.2771, "step": 18683 }, { "epoch": 0.86, "grad_norm": 0.3648169587476222, "learning_rate": 1.03448313182803e-06, "loss": 0.2684, "step": 18684 }, { "epoch": 0.86, "grad_norm": 0.3362345811443943, "learning_rate": 1.0338241734270116e-06, "loss": 0.1983, "step": 18685 }, { "epoch": 0.86, "grad_norm": 0.4341480724136818, "learning_rate": 1.033165413528483e-06, "loss": 0.2504, "step": 18686 }, { "epoch": 0.86, "grad_norm": 0.6709914646444647, "learning_rate": 1.0325068521470294e-06, "loss": 0.3518, "step": 18687 }, { "epoch": 0.86, "grad_norm": 0.36518393518362713, "learning_rate": 1.0318484892972336e-06, "loss": 0.2271, "step": 18688 }, { "epoch": 0.86, "grad_norm": 0.33176595550482674, "learning_rate": 1.031190324993666e-06, "loss": 0.2379, "step": 18689 }, { "epoch": 0.86, "grad_norm": 1.3446727491137787, "learning_rate": 1.030532359250901e-06, "loss": 0.7188, "step": 18690 }, { "epoch": 0.86, "grad_norm": 0.7046058476389304, "learning_rate": 1.0298745920835073e-06, "loss": 0.2483, "step": 18691 }, { "epoch": 0.86, "grad_norm": 0.26533624323119187, "learning_rate": 1.0292170235060417e-06, "loss": 0.242, "step": 18692 }, { "epoch": 0.86, "grad_norm": 0.44979835337151663, "learning_rate": 1.0285596535330667e-06, "loss": 0.2625, "step": 18693 }, { "epoch": 0.86, "grad_norm": 0.4152663737001791, "learning_rate": 1.0279024821791306e-06, "loss": 0.1064, "step": 18694 }, { "epoch": 0.86, "grad_norm": 0.4107879309756198, "learning_rate": 1.0272455094587896e-06, "loss": 0.2779, "step": 18695 }, { "epoch": 0.86, "grad_norm": 0.3818477542731597, "learning_rate": 1.0265887353865856e-06, "loss": 0.3025, "step": 18696 }, { "epoch": 0.86, "grad_norm": 0.5922981129737003, "learning_rate": 1.0259321599770566e-06, "loss": 0.2612, "step": 18697 }, { "epoch": 0.86, "grad_norm": 0.37941957303468116, "learning_rate": 1.0252757832447424e-06, "loss": 0.2739, "step": 18698 }, { "epoch": 0.86, "grad_norm": 0.31556211697239017, "learning_rate": 1.024619605204168e-06, "loss": 0.1895, "step": 18699 }, { "epoch": 0.86, "grad_norm": 0.3344852281024109, "learning_rate": 1.0239636258698683e-06, "loss": 0.2608, "step": 18700 }, { "epoch": 0.86, "grad_norm": 0.41119821361503944, "learning_rate": 1.0233078452563617e-06, "loss": 0.2057, "step": 18701 }, { "epoch": 0.86, "grad_norm": 1.2572827350304026, "learning_rate": 1.0226522633781688e-06, "loss": 0.6748, "step": 18702 }, { "epoch": 0.86, "grad_norm": 0.7157911739472715, "learning_rate": 1.0219968802498004e-06, "loss": 0.3409, "step": 18703 }, { "epoch": 0.86, "grad_norm": 0.23008948592140596, "learning_rate": 1.021341695885768e-06, "loss": 0.2162, "step": 18704 }, { "epoch": 0.86, "grad_norm": 0.3741882240694689, "learning_rate": 1.020686710300579e-06, "loss": 0.187, "step": 18705 }, { "epoch": 0.86, "grad_norm": 1.5313109842837906, "learning_rate": 1.0200319235087297e-06, "loss": 0.7347, "step": 18706 }, { "epoch": 0.86, "grad_norm": 0.34298063130928497, "learning_rate": 1.0193773355247183e-06, "loss": 0.2025, "step": 18707 }, { "epoch": 0.86, "grad_norm": 0.37870020913210695, "learning_rate": 1.01872294636304e-06, "loss": 0.3148, "step": 18708 }, { "epoch": 0.86, "grad_norm": 0.6182895505595318, "learning_rate": 1.0180687560381764e-06, "loss": 0.365, "step": 18709 }, { "epoch": 0.86, "grad_norm": 0.34425286500000357, "learning_rate": 1.017414764564616e-06, "loss": 0.202, "step": 18710 }, { "epoch": 0.86, "grad_norm": 0.32506380196461593, "learning_rate": 1.016760971956834e-06, "loss": 0.1814, "step": 18711 }, { "epoch": 0.86, "grad_norm": 0.4006755358089029, "learning_rate": 1.0161073782293051e-06, "loss": 0.2951, "step": 18712 }, { "epoch": 0.86, "grad_norm": 0.35815340975359067, "learning_rate": 1.0154539833964994e-06, "loss": 0.2658, "step": 18713 }, { "epoch": 0.86, "grad_norm": 0.8393129519555721, "learning_rate": 1.014800787472886e-06, "loss": 0.3144, "step": 18714 }, { "epoch": 0.86, "grad_norm": 0.3909485591427999, "learning_rate": 1.0141477904729225e-06, "loss": 0.3407, "step": 18715 }, { "epoch": 0.86, "grad_norm": 0.4653553348789618, "learning_rate": 1.0134949924110627e-06, "loss": 0.3015, "step": 18716 }, { "epoch": 0.86, "grad_norm": 0.36826369904815975, "learning_rate": 1.0128423933017674e-06, "loss": 0.1621, "step": 18717 }, { "epoch": 0.86, "grad_norm": 0.5138433275092511, "learning_rate": 1.0121899931594758e-06, "loss": 0.2427, "step": 18718 }, { "epoch": 0.86, "grad_norm": 0.4268475825697538, "learning_rate": 1.011537791998638e-06, "loss": 0.2418, "step": 18719 }, { "epoch": 0.86, "grad_norm": 0.3213659931256797, "learning_rate": 1.0108857898336887e-06, "loss": 0.2468, "step": 18720 }, { "epoch": 0.86, "grad_norm": 0.780571705896692, "learning_rate": 1.0102339866790633e-06, "loss": 0.4253, "step": 18721 }, { "epoch": 0.86, "grad_norm": 0.33284583107544924, "learning_rate": 1.0095823825491957e-06, "loss": 0.2841, "step": 18722 }, { "epoch": 0.86, "grad_norm": 0.39106314806121506, "learning_rate": 1.0089309774585066e-06, "loss": 0.2499, "step": 18723 }, { "epoch": 0.86, "grad_norm": 0.3467714150334075, "learning_rate": 1.008279771421421e-06, "loss": 0.1785, "step": 18724 }, { "epoch": 0.86, "grad_norm": 0.358761772292316, "learning_rate": 1.0076287644523552e-06, "loss": 0.2671, "step": 18725 }, { "epoch": 0.86, "grad_norm": 0.7464391080990729, "learning_rate": 1.0069779565657212e-06, "loss": 0.3853, "step": 18726 }, { "epoch": 0.86, "grad_norm": 0.37560015677486686, "learning_rate": 1.0063273477759283e-06, "loss": 0.2619, "step": 18727 }, { "epoch": 0.86, "grad_norm": 0.35161126729687153, "learning_rate": 1.0056769380973785e-06, "loss": 0.2539, "step": 18728 }, { "epoch": 0.86, "grad_norm": 1.4776399258691444, "learning_rate": 1.0050267275444725e-06, "loss": 0.4742, "step": 18729 }, { "epoch": 0.86, "grad_norm": 0.29672215732189927, "learning_rate": 1.0043767161316053e-06, "loss": 0.115, "step": 18730 }, { "epoch": 0.86, "grad_norm": 0.42685329150290496, "learning_rate": 1.0037269038731689e-06, "loss": 0.2683, "step": 18731 }, { "epoch": 0.86, "grad_norm": 0.3035689156763195, "learning_rate": 1.0030772907835484e-06, "loss": 0.2751, "step": 18732 }, { "epoch": 0.86, "grad_norm": 0.9528861895836189, "learning_rate": 1.0024278768771223e-06, "loss": 0.3518, "step": 18733 }, { "epoch": 0.86, "grad_norm": 0.4244016067804892, "learning_rate": 1.0017786621682734e-06, "loss": 0.2457, "step": 18734 }, { "epoch": 0.86, "grad_norm": 0.6498856869623918, "learning_rate": 1.0011296466713717e-06, "loss": 0.3071, "step": 18735 }, { "epoch": 0.86, "grad_norm": 0.2180288729217266, "learning_rate": 1.0004808304007873e-06, "loss": 0.1778, "step": 18736 }, { "epoch": 0.86, "grad_norm": 0.44904866034245694, "learning_rate": 9.998322133708827e-07, "loss": 0.2786, "step": 18737 }, { "epoch": 0.86, "grad_norm": 0.561620195355768, "learning_rate": 9.991837955960171e-07, "loss": 0.331, "step": 18738 }, { "epoch": 0.86, "grad_norm": 0.3642671711563693, "learning_rate": 9.985355770905502e-07, "loss": 0.3028, "step": 18739 }, { "epoch": 0.86, "grad_norm": 0.3803745405446701, "learning_rate": 9.978875578688274e-07, "loss": 0.1789, "step": 18740 }, { "epoch": 0.86, "grad_norm": 0.794093409662379, "learning_rate": 9.972397379452003e-07, "loss": 0.3393, "step": 18741 }, { "epoch": 0.86, "grad_norm": 0.30784399288001274, "learning_rate": 9.965921173340054e-07, "loss": 0.1833, "step": 18742 }, { "epoch": 0.86, "grad_norm": 0.38089923198040615, "learning_rate": 9.959446960495845e-07, "loss": 0.2232, "step": 18743 }, { "epoch": 0.86, "grad_norm": 0.3727951098394824, "learning_rate": 9.952974741062704e-07, "loss": 0.2799, "step": 18744 }, { "epoch": 0.86, "grad_norm": 1.1714135737909928, "learning_rate": 9.946504515183909e-07, "loss": 0.7299, "step": 18745 }, { "epoch": 0.86, "grad_norm": 0.4089982340334295, "learning_rate": 9.940036283002695e-07, "loss": 0.1972, "step": 18746 }, { "epoch": 0.86, "grad_norm": 1.677128203283916, "learning_rate": 9.93357004466229e-07, "loss": 0.5986, "step": 18747 }, { "epoch": 0.86, "grad_norm": 0.3385923689132285, "learning_rate": 9.927105800305858e-07, "loss": 0.2825, "step": 18748 }, { "epoch": 0.86, "grad_norm": 0.4541128933262122, "learning_rate": 9.92064355007646e-07, "loss": 0.2847, "step": 18749 }, { "epoch": 0.86, "grad_norm": 0.25828813050281324, "learning_rate": 9.914183294117197e-07, "loss": 0.1232, "step": 18750 }, { "epoch": 0.86, "grad_norm": 0.37063286003405876, "learning_rate": 9.907725032571113e-07, "loss": 0.3158, "step": 18751 }, { "epoch": 0.86, "grad_norm": 0.8602429738185712, "learning_rate": 9.90126876558114e-07, "loss": 0.4475, "step": 18752 }, { "epoch": 0.86, "grad_norm": 0.3524135840110389, "learning_rate": 9.89481449329026e-07, "loss": 0.1981, "step": 18753 }, { "epoch": 0.86, "grad_norm": 0.6756599718420799, "learning_rate": 9.88836221584133e-07, "loss": 0.3721, "step": 18754 }, { "epoch": 0.86, "grad_norm": 0.4053602365209885, "learning_rate": 9.881911933377197e-07, "loss": 0.2725, "step": 18755 }, { "epoch": 0.86, "grad_norm": 0.24205991513098726, "learning_rate": 9.875463646040706e-07, "loss": 0.1691, "step": 18756 }, { "epoch": 0.86, "grad_norm": 1.3178938304793124, "learning_rate": 9.869017353974563e-07, "loss": 0.7411, "step": 18757 }, { "epoch": 0.86, "grad_norm": 0.6825938190038657, "learning_rate": 9.862573057321535e-07, "loss": 0.285, "step": 18758 }, { "epoch": 0.86, "grad_norm": 0.28339053429332756, "learning_rate": 9.856130756224214e-07, "loss": 0.2393, "step": 18759 }, { "epoch": 0.86, "grad_norm": 0.654179414906362, "learning_rate": 9.84969045082531e-07, "loss": 0.3828, "step": 18760 }, { "epoch": 0.86, "grad_norm": 0.6054461044150481, "learning_rate": 9.84325214126739e-07, "loss": 0.349, "step": 18761 }, { "epoch": 0.86, "grad_norm": 0.2407489920656504, "learning_rate": 9.836815827692936e-07, "loss": 0.2043, "step": 18762 }, { "epoch": 0.86, "grad_norm": 0.35465522223014795, "learning_rate": 9.830381510244491e-07, "loss": 0.2598, "step": 18763 }, { "epoch": 0.86, "grad_norm": 0.5081768619928726, "learning_rate": 9.823949189064486e-07, "loss": 0.2663, "step": 18764 }, { "epoch": 0.86, "grad_norm": 0.7252849209384994, "learning_rate": 9.817518864295362e-07, "loss": 0.3483, "step": 18765 }, { "epoch": 0.86, "grad_norm": 0.7247961035079349, "learning_rate": 9.811090536079426e-07, "loss": 0.2804, "step": 18766 }, { "epoch": 0.86, "grad_norm": 0.2959093990059026, "learning_rate": 9.804664204559012e-07, "loss": 0.2483, "step": 18767 }, { "epoch": 0.86, "grad_norm": 0.5427880511897198, "learning_rate": 9.798239869876435e-07, "loss": 0.3219, "step": 18768 }, { "epoch": 0.86, "grad_norm": 0.1983111592098258, "learning_rate": 9.791817532173864e-07, "loss": 0.1259, "step": 18769 }, { "epoch": 0.86, "grad_norm": 0.9812437413922588, "learning_rate": 9.785397191593527e-07, "loss": 0.4212, "step": 18770 }, { "epoch": 0.86, "grad_norm": 0.30938243251541014, "learning_rate": 9.77897884827752e-07, "loss": 0.2794, "step": 18771 }, { "epoch": 0.86, "grad_norm": 0.41971397759887613, "learning_rate": 9.772562502367976e-07, "loss": 0.2564, "step": 18772 }, { "epoch": 0.86, "grad_norm": 0.970521899034133, "learning_rate": 9.766148154006948e-07, "loss": 0.4832, "step": 18773 }, { "epoch": 0.86, "grad_norm": 0.3058992575342056, "learning_rate": 9.759735803336424e-07, "loss": 0.2197, "step": 18774 }, { "epoch": 0.86, "grad_norm": 0.3813923248200112, "learning_rate": 9.753325450498386e-07, "loss": 0.297, "step": 18775 }, { "epoch": 0.86, "grad_norm": 0.2935622369039107, "learning_rate": 9.7469170956347e-07, "loss": 0.1047, "step": 18776 }, { "epoch": 0.86, "grad_norm": 0.37754125436616504, "learning_rate": 9.740510738887322e-07, "loss": 0.2866, "step": 18777 }, { "epoch": 0.86, "grad_norm": 0.9682928029472468, "learning_rate": 9.734106380398022e-07, "loss": 0.4539, "step": 18778 }, { "epoch": 0.86, "grad_norm": 0.3432623747260534, "learning_rate": 9.727704020308638e-07, "loss": 0.1923, "step": 18779 }, { "epoch": 0.86, "grad_norm": 0.45355040898575505, "learning_rate": 9.72130365876085e-07, "loss": 0.2924, "step": 18780 }, { "epoch": 0.86, "grad_norm": 1.2389473549267436, "learning_rate": 9.714905295896393e-07, "loss": 0.7132, "step": 18781 }, { "epoch": 0.86, "grad_norm": 0.4757175651789911, "learning_rate": 9.70850893185693e-07, "loss": 0.1779, "step": 18782 }, { "epoch": 0.86, "grad_norm": 0.34493713925595115, "learning_rate": 9.702114566784049e-07, "loss": 0.284, "step": 18783 }, { "epoch": 0.86, "grad_norm": 0.36410437011753966, "learning_rate": 9.695722200819301e-07, "loss": 0.2522, "step": 18784 }, { "epoch": 0.86, "grad_norm": 0.4552950027817988, "learning_rate": 9.689331834104266e-07, "loss": 0.1615, "step": 18785 }, { "epoch": 0.86, "grad_norm": 0.574644251361816, "learning_rate": 9.682943466780348e-07, "loss": 0.357, "step": 18786 }, { "epoch": 0.86, "grad_norm": 0.3806069756340322, "learning_rate": 9.676557098989036e-07, "loss": 0.3252, "step": 18787 }, { "epoch": 0.86, "grad_norm": 0.6665581065850456, "learning_rate": 9.670172730871674e-07, "loss": 0.3325, "step": 18788 }, { "epoch": 0.86, "grad_norm": 0.35336607403707654, "learning_rate": 9.663790362569637e-07, "loss": 0.2202, "step": 18789 }, { "epoch": 0.86, "grad_norm": 0.3332132134959122, "learning_rate": 9.65740999422422e-07, "loss": 0.156, "step": 18790 }, { "epoch": 0.86, "grad_norm": 0.3522909716667261, "learning_rate": 9.65103162597666e-07, "loss": 0.2661, "step": 18791 }, { "epoch": 0.86, "grad_norm": 0.2976605399255977, "learning_rate": 9.644655257968204e-07, "loss": 0.1868, "step": 18792 }, { "epoch": 0.86, "grad_norm": 1.2195101490674904, "learning_rate": 9.638280890339945e-07, "loss": 0.5236, "step": 18793 }, { "epoch": 0.86, "grad_norm": 0.46289429444153046, "learning_rate": 9.631908523233102e-07, "loss": 0.3544, "step": 18794 }, { "epoch": 0.86, "grad_norm": 0.28082337963282467, "learning_rate": 9.625538156788683e-07, "loss": 0.2128, "step": 18795 }, { "epoch": 0.86, "grad_norm": 0.3681580811434375, "learning_rate": 9.619169791147775e-07, "loss": 0.1702, "step": 18796 }, { "epoch": 0.86, "grad_norm": 0.9510674321589174, "learning_rate": 9.61280342645131e-07, "loss": 0.3396, "step": 18797 }, { "epoch": 0.86, "grad_norm": 0.3352421777033682, "learning_rate": 9.606439062840256e-07, "loss": 0.2256, "step": 18798 }, { "epoch": 0.86, "grad_norm": 0.3740895714491285, "learning_rate": 9.60007670045554e-07, "loss": 0.2849, "step": 18799 }, { "epoch": 0.86, "grad_norm": 0.7074572732064504, "learning_rate": 9.593716339437986e-07, "loss": 0.3434, "step": 18800 }, { "epoch": 0.86, "grad_norm": 0.3824209631427379, "learning_rate": 9.587357979928414e-07, "loss": 0.2675, "step": 18801 }, { "epoch": 0.86, "grad_norm": 0.29972127657285114, "learning_rate": 9.581001622067609e-07, "loss": 0.0784, "step": 18802 }, { "epoch": 0.86, "grad_norm": 0.34422641886103383, "learning_rate": 9.574647265996272e-07, "loss": 0.2749, "step": 18803 }, { "epoch": 0.86, "grad_norm": 0.3940740043548812, "learning_rate": 9.568294911855102e-07, "loss": 0.2785, "step": 18804 }, { "epoch": 0.86, "grad_norm": 0.6755693768932453, "learning_rate": 9.561944559784708e-07, "loss": 0.3163, "step": 18805 }, { "epoch": 0.86, "grad_norm": 0.4513552119735532, "learning_rate": 9.555596209925687e-07, "loss": 0.3109, "step": 18806 }, { "epoch": 0.86, "grad_norm": 0.298287075164543, "learning_rate": 9.54924986241863e-07, "loss": 0.2463, "step": 18807 }, { "epoch": 0.86, "grad_norm": 0.3326941182035881, "learning_rate": 9.542905517403977e-07, "loss": 0.0807, "step": 18808 }, { "epoch": 0.86, "grad_norm": 1.500076913625921, "learning_rate": 9.536563175022229e-07, "loss": 0.5835, "step": 18809 }, { "epoch": 0.86, "grad_norm": 0.3236400834489422, "learning_rate": 9.530222835413739e-07, "loss": 0.2462, "step": 18810 }, { "epoch": 0.86, "grad_norm": 0.3555063437134827, "learning_rate": 9.523884498718972e-07, "loss": 0.2483, "step": 18811 }, { "epoch": 0.86, "grad_norm": 0.9665254133902232, "learning_rate": 9.517548165078173e-07, "loss": 0.4433, "step": 18812 }, { "epoch": 0.86, "grad_norm": 0.4187777398010818, "learning_rate": 9.51121383463166e-07, "loss": 0.2498, "step": 18813 }, { "epoch": 0.86, "grad_norm": 0.278632084100606, "learning_rate": 9.504881507519658e-07, "loss": 0.1549, "step": 18814 }, { "epoch": 0.86, "grad_norm": 0.3888897105802697, "learning_rate": 9.498551183882343e-07, "loss": 0.2407, "step": 18815 }, { "epoch": 0.86, "grad_norm": 0.33762901189885725, "learning_rate": 9.492222863859912e-07, "loss": 0.2557, "step": 18816 }, { "epoch": 0.86, "grad_norm": 0.6936415221550374, "learning_rate": 9.4858965475924e-07, "loss": 0.36, "step": 18817 }, { "epoch": 0.86, "grad_norm": 0.4046094084143305, "learning_rate": 9.479572235219925e-07, "loss": 0.2572, "step": 18818 }, { "epoch": 0.86, "grad_norm": 0.3495032573166012, "learning_rate": 9.473249926882466e-07, "loss": 0.2336, "step": 18819 }, { "epoch": 0.86, "grad_norm": 0.4100858800528111, "learning_rate": 9.46692962271999e-07, "loss": 0.2387, "step": 18820 }, { "epoch": 0.86, "grad_norm": 0.4299534095699312, "learning_rate": 9.46061132287246e-07, "loss": 0.1493, "step": 18821 }, { "epoch": 0.86, "grad_norm": 0.37438787638157117, "learning_rate": 9.454295027479709e-07, "loss": 0.26, "step": 18822 }, { "epoch": 0.86, "grad_norm": 0.3893512213958239, "learning_rate": 9.447980736681606e-07, "loss": 0.3063, "step": 18823 }, { "epoch": 0.86, "grad_norm": 1.7994330914516738, "learning_rate": 9.441668450617924e-07, "loss": 0.3876, "step": 18824 }, { "epoch": 0.86, "grad_norm": 0.3486993986514949, "learning_rate": 9.435358169428444e-07, "loss": 0.2368, "step": 18825 }, { "epoch": 0.86, "grad_norm": 0.26104371871676996, "learning_rate": 9.429049893252851e-07, "loss": 0.2094, "step": 18826 }, { "epoch": 0.86, "grad_norm": 0.5398834996541773, "learning_rate": 9.422743622230757e-07, "loss": 0.2779, "step": 18827 }, { "epoch": 0.86, "grad_norm": 0.3209426612002473, "learning_rate": 9.416439356501861e-07, "loss": 0.2043, "step": 18828 }, { "epoch": 0.86, "grad_norm": 0.8734088595990264, "learning_rate": 9.410137096205674e-07, "loss": 0.4558, "step": 18829 }, { "epoch": 0.87, "grad_norm": 0.5920457479640567, "learning_rate": 9.40383684148175e-07, "loss": 0.3282, "step": 18830 }, { "epoch": 0.87, "grad_norm": 0.2923753763290717, "learning_rate": 9.397538592469557e-07, "loss": 0.2016, "step": 18831 }, { "epoch": 0.87, "grad_norm": 1.6326765195036865, "learning_rate": 9.391242349308527e-07, "loss": 0.6477, "step": 18832 }, { "epoch": 0.87, "grad_norm": 0.3504124623178383, "learning_rate": 9.384948112138082e-07, "loss": 0.2105, "step": 18833 }, { "epoch": 0.87, "grad_norm": 0.29447065809239237, "learning_rate": 9.37865588109752e-07, "loss": 0.2246, "step": 18834 }, { "epoch": 0.87, "grad_norm": 0.5704340579896158, "learning_rate": 9.37236565632621e-07, "loss": 0.3827, "step": 18835 }, { "epoch": 0.87, "grad_norm": 1.363555127578698, "learning_rate": 9.36607743796335e-07, "loss": 0.6199, "step": 18836 }, { "epoch": 0.87, "grad_norm": 0.3438149296795903, "learning_rate": 9.359791226148185e-07, "loss": 0.1512, "step": 18837 }, { "epoch": 0.87, "grad_norm": 0.3397907413118579, "learning_rate": 9.353507021019892e-07, "loss": 0.2495, "step": 18838 }, { "epoch": 0.87, "grad_norm": 0.368151597078527, "learning_rate": 9.347224822717571e-07, "loss": 0.2392, "step": 18839 }, { "epoch": 0.87, "grad_norm": 0.4287633962978534, "learning_rate": 9.340944631380333e-07, "loss": 0.2851, "step": 18840 }, { "epoch": 0.87, "grad_norm": 0.5944363968693355, "learning_rate": 9.334666447147189e-07, "loss": 0.2857, "step": 18841 }, { "epoch": 0.87, "grad_norm": 0.3630627966509355, "learning_rate": 9.328390270157172e-07, "loss": 0.2856, "step": 18842 }, { "epoch": 0.87, "grad_norm": 0.46366260384704483, "learning_rate": 9.322116100549172e-07, "loss": 0.2621, "step": 18843 }, { "epoch": 0.87, "grad_norm": 0.6342174081457774, "learning_rate": 9.315843938462143e-07, "loss": 0.2254, "step": 18844 }, { "epoch": 0.87, "grad_norm": 0.73671605280944, "learning_rate": 9.309573784034931e-07, "loss": 0.3659, "step": 18845 }, { "epoch": 0.87, "grad_norm": 0.2194343119840035, "learning_rate": 9.303305637406335e-07, "loss": 0.1549, "step": 18846 }, { "epoch": 0.87, "grad_norm": 0.29566095238270096, "learning_rate": 9.297039498715155e-07, "loss": 0.247, "step": 18847 }, { "epoch": 0.87, "grad_norm": 1.6209792244684877, "learning_rate": 9.29077536810008e-07, "loss": 0.7435, "step": 18848 }, { "epoch": 0.87, "grad_norm": 0.36110234246484674, "learning_rate": 9.284513245699823e-07, "loss": 0.2537, "step": 18849 }, { "epoch": 0.87, "grad_norm": 0.5232560925517842, "learning_rate": 9.278253131653014e-07, "loss": 0.2357, "step": 18850 }, { "epoch": 0.87, "grad_norm": 0.5192236217253473, "learning_rate": 9.271995026098224e-07, "loss": 0.292, "step": 18851 }, { "epoch": 0.87, "grad_norm": 0.3830818663878956, "learning_rate": 9.265738929174051e-07, "loss": 0.2662, "step": 18852 }, { "epoch": 0.87, "grad_norm": 0.4647701773957379, "learning_rate": 9.259484841018917e-07, "loss": 0.2933, "step": 18853 }, { "epoch": 0.87, "grad_norm": 0.2918725182806727, "learning_rate": 9.253232761771369e-07, "loss": 0.2143, "step": 18854 }, { "epoch": 0.87, "grad_norm": 0.5885689597253098, "learning_rate": 9.246982691569794e-07, "loss": 0.2528, "step": 18855 }, { "epoch": 0.87, "grad_norm": 0.5247729722126491, "learning_rate": 9.240734630552528e-07, "loss": 0.3224, "step": 18856 }, { "epoch": 0.87, "grad_norm": 0.4494132334443572, "learning_rate": 9.234488578857925e-07, "loss": 0.2013, "step": 18857 }, { "epoch": 0.87, "grad_norm": 0.3930843042539551, "learning_rate": 9.228244536624264e-07, "loss": 0.2753, "step": 18858 }, { "epoch": 0.87, "grad_norm": 0.36024233552190976, "learning_rate": 9.222002503989803e-07, "loss": 0.2915, "step": 18859 }, { "epoch": 0.87, "grad_norm": 0.3598019090739894, "learning_rate": 9.215762481092694e-07, "loss": 0.1219, "step": 18860 }, { "epoch": 0.87, "grad_norm": 0.4244392754975432, "learning_rate": 9.209524468071096e-07, "loss": 0.295, "step": 18861 }, { "epoch": 0.87, "grad_norm": 0.3258787291325904, "learning_rate": 9.203288465063143e-07, "loss": 0.2679, "step": 18862 }, { "epoch": 0.87, "grad_norm": 0.8217004797780705, "learning_rate": 9.197054472206857e-07, "loss": 0.262, "step": 18863 }, { "epoch": 0.87, "grad_norm": 0.4887247900594674, "learning_rate": 9.190822489640294e-07, "loss": 0.2904, "step": 18864 }, { "epoch": 0.87, "grad_norm": 0.8843274438861373, "learning_rate": 9.184592517501367e-07, "loss": 0.4211, "step": 18865 }, { "epoch": 0.87, "grad_norm": 0.28823465783532826, "learning_rate": 9.178364555928043e-07, "loss": 0.2385, "step": 18866 }, { "epoch": 0.87, "grad_norm": 0.320127196856453, "learning_rate": 9.172138605058201e-07, "loss": 0.1644, "step": 18867 }, { "epoch": 0.87, "grad_norm": 0.5093834549822057, "learning_rate": 9.165914665029663e-07, "loss": 0.3173, "step": 18868 }, { "epoch": 0.87, "grad_norm": 0.9993546129032954, "learning_rate": 9.15969273598023e-07, "loss": 0.3777, "step": 18869 }, { "epoch": 0.87, "grad_norm": 0.2638497601368738, "learning_rate": 9.153472818047627e-07, "loss": 0.2049, "step": 18870 }, { "epoch": 0.87, "grad_norm": 0.514858592531016, "learning_rate": 9.147254911369597e-07, "loss": 0.3542, "step": 18871 }, { "epoch": 0.87, "grad_norm": 0.3087457247789488, "learning_rate": 9.141039016083786e-07, "loss": 0.1959, "step": 18872 }, { "epoch": 0.87, "grad_norm": 0.4140340378500285, "learning_rate": 9.134825132327784e-07, "loss": 0.1971, "step": 18873 }, { "epoch": 0.87, "grad_norm": 0.3772149688147411, "learning_rate": 9.128613260239172e-07, "loss": 0.2981, "step": 18874 }, { "epoch": 0.87, "grad_norm": 0.5976708732402051, "learning_rate": 9.122403399955493e-07, "loss": 0.3239, "step": 18875 }, { "epoch": 0.87, "grad_norm": 0.8570607138616448, "learning_rate": 9.116195551614215e-07, "loss": 0.4279, "step": 18876 }, { "epoch": 0.87, "grad_norm": 0.3434920635418428, "learning_rate": 9.109989715352762e-07, "loss": 0.2283, "step": 18877 }, { "epoch": 0.87, "grad_norm": 0.29188599507292157, "learning_rate": 9.103785891308548e-07, "loss": 0.2391, "step": 18878 }, { "epoch": 0.87, "grad_norm": 0.5865646152486707, "learning_rate": 9.097584079618893e-07, "loss": 0.2927, "step": 18879 }, { "epoch": 0.87, "grad_norm": 0.32190440293246897, "learning_rate": 9.091384280421123e-07, "loss": 0.2056, "step": 18880 }, { "epoch": 0.87, "grad_norm": 1.4658020704934267, "learning_rate": 9.085186493852494e-07, "loss": 0.6687, "step": 18881 }, { "epoch": 0.87, "grad_norm": 0.3196103736867, "learning_rate": 9.078990720050196e-07, "loss": 0.2435, "step": 18882 }, { "epoch": 0.87, "grad_norm": 0.3886127094075236, "learning_rate": 9.072796959151409e-07, "loss": 0.2598, "step": 18883 }, { "epoch": 0.87, "grad_norm": 0.7620096921376133, "learning_rate": 9.066605211293278e-07, "loss": 0.3855, "step": 18884 }, { "epoch": 0.87, "grad_norm": 0.652874890761138, "learning_rate": 9.060415476612849e-07, "loss": 0.325, "step": 18885 }, { "epoch": 0.87, "grad_norm": 0.3119538187146621, "learning_rate": 9.054227755247191e-07, "loss": 0.1916, "step": 18886 }, { "epoch": 0.87, "grad_norm": 0.399123611326978, "learning_rate": 9.048042047333239e-07, "loss": 0.2448, "step": 18887 }, { "epoch": 0.87, "grad_norm": 0.5641628087018107, "learning_rate": 9.041858353008015e-07, "loss": 0.3206, "step": 18888 }, { "epoch": 0.87, "grad_norm": 0.43354807046476623, "learning_rate": 9.035676672408367e-07, "loss": 0.3033, "step": 18889 }, { "epoch": 0.87, "grad_norm": 0.37617807192630454, "learning_rate": 9.029497005671173e-07, "loss": 0.2511, "step": 18890 }, { "epoch": 0.87, "grad_norm": 0.4502097985776204, "learning_rate": 9.023319352933225e-07, "loss": 0.3049, "step": 18891 }, { "epoch": 0.87, "grad_norm": 0.3356574198342961, "learning_rate": 9.0171437143313e-07, "loss": 0.1925, "step": 18892 }, { "epoch": 0.87, "grad_norm": 0.41889309019794485, "learning_rate": 9.010970090002135e-07, "loss": 0.1645, "step": 18893 }, { "epoch": 0.87, "grad_norm": 0.4428809998005926, "learning_rate": 9.004798480082388e-07, "loss": 0.2855, "step": 18894 }, { "epoch": 0.87, "grad_norm": 0.3726263492083325, "learning_rate": 8.998628884708705e-07, "loss": 0.3003, "step": 18895 }, { "epoch": 0.87, "grad_norm": 0.6474236434967158, "learning_rate": 8.992461304017663e-07, "loss": 0.2963, "step": 18896 }, { "epoch": 0.87, "grad_norm": 0.9175221644915363, "learning_rate": 8.986295738145812e-07, "loss": 0.371, "step": 18897 }, { "epoch": 0.87, "grad_norm": 0.2763343729730441, "learning_rate": 8.980132187229673e-07, "loss": 0.2467, "step": 18898 }, { "epoch": 0.87, "grad_norm": 0.31078048715961265, "learning_rate": 8.973970651405661e-07, "loss": 0.0818, "step": 18899 }, { "epoch": 0.87, "grad_norm": 0.7217040705181044, "learning_rate": 8.9678111308102e-07, "loss": 0.3481, "step": 18900 }, { "epoch": 0.87, "grad_norm": 0.39691667455029683, "learning_rate": 8.961653625579691e-07, "loss": 0.3002, "step": 18901 }, { "epoch": 0.87, "grad_norm": 0.40179015563465464, "learning_rate": 8.955498135850405e-07, "loss": 0.2882, "step": 18902 }, { "epoch": 0.87, "grad_norm": 0.7449130602274193, "learning_rate": 8.949344661758652e-07, "loss": 0.1774, "step": 18903 }, { "epoch": 0.87, "grad_norm": 0.29920893193675446, "learning_rate": 8.943193203440625e-07, "loss": 0.2331, "step": 18904 }, { "epoch": 0.87, "grad_norm": 0.47564299098732626, "learning_rate": 8.937043761032571e-07, "loss": 0.2199, "step": 18905 }, { "epoch": 0.87, "grad_norm": 0.29436078119338727, "learning_rate": 8.93089633467058e-07, "loss": 0.2204, "step": 18906 }, { "epoch": 0.87, "grad_norm": 0.4103587457819994, "learning_rate": 8.924750924490799e-07, "loss": 0.2833, "step": 18907 }, { "epoch": 0.87, "grad_norm": 0.5726936716283302, "learning_rate": 8.91860753062923e-07, "loss": 0.3424, "step": 18908 }, { "epoch": 0.87, "grad_norm": 0.5670378023407201, "learning_rate": 8.912466153221899e-07, "loss": 0.2116, "step": 18909 }, { "epoch": 0.87, "grad_norm": 0.2921827245634143, "learning_rate": 8.906326792404796e-07, "loss": 0.2404, "step": 18910 }, { "epoch": 0.87, "grad_norm": 0.2855471693231724, "learning_rate": 8.900189448313812e-07, "loss": 0.1683, "step": 18911 }, { "epoch": 0.87, "grad_norm": 0.7998165762495214, "learning_rate": 8.894054121084839e-07, "loss": 0.2552, "step": 18912 }, { "epoch": 0.87, "grad_norm": 0.3732932526263299, "learning_rate": 8.887920810853678e-07, "loss": 0.2805, "step": 18913 }, { "epoch": 0.87, "grad_norm": 0.3678071890655849, "learning_rate": 8.881789517756145e-07, "loss": 0.3198, "step": 18914 }, { "epoch": 0.87, "grad_norm": 1.6643275548060616, "learning_rate": 8.875660241927985e-07, "loss": 0.7905, "step": 18915 }, { "epoch": 0.87, "grad_norm": 0.3975593958959978, "learning_rate": 8.869532983504859e-07, "loss": 0.1977, "step": 18916 }, { "epoch": 0.87, "grad_norm": 0.3665686178618616, "learning_rate": 8.863407742622443e-07, "loss": 0.1575, "step": 18917 }, { "epoch": 0.87, "grad_norm": 0.46384417793004634, "learning_rate": 8.857284519416343e-07, "loss": 0.3051, "step": 18918 }, { "epoch": 0.87, "grad_norm": 0.3618020837454022, "learning_rate": 8.851163314022138e-07, "loss": 0.2158, "step": 18919 }, { "epoch": 0.87, "grad_norm": 1.2669080404141906, "learning_rate": 8.84504412657532e-07, "loss": 0.7385, "step": 18920 }, { "epoch": 0.87, "grad_norm": 0.5050831190506294, "learning_rate": 8.838926957211336e-07, "loss": 0.3386, "step": 18921 }, { "epoch": 0.87, "grad_norm": 0.2967969580370743, "learning_rate": 8.832811806065689e-07, "loss": 0.2076, "step": 18922 }, { "epoch": 0.87, "grad_norm": 0.3390828264274052, "learning_rate": 8.826698673273692e-07, "loss": 0.2051, "step": 18923 }, { "epoch": 0.87, "grad_norm": 0.5588968127604035, "learning_rate": 8.820587558970739e-07, "loss": 0.2998, "step": 18924 }, { "epoch": 0.87, "grad_norm": 0.3684949562092011, "learning_rate": 8.814478463292076e-07, "loss": 0.2156, "step": 18925 }, { "epoch": 0.87, "grad_norm": 0.35456679243741357, "learning_rate": 8.808371386372971e-07, "loss": 0.3077, "step": 18926 }, { "epoch": 0.87, "grad_norm": 0.9361735552321727, "learning_rate": 8.802266328348663e-07, "loss": 0.4029, "step": 18927 }, { "epoch": 0.87, "grad_norm": 0.3882030177387654, "learning_rate": 8.796163289354253e-07, "loss": 0.2702, "step": 18928 }, { "epoch": 0.87, "grad_norm": 0.28570409058523233, "learning_rate": 8.790062269524901e-07, "loss": 0.1609, "step": 18929 }, { "epoch": 0.87, "grad_norm": 0.5087992929157799, "learning_rate": 8.783963268995643e-07, "loss": 0.3295, "step": 18930 }, { "epoch": 0.87, "grad_norm": 0.33976686319062094, "learning_rate": 8.777866287901526e-07, "loss": 0.253, "step": 18931 }, { "epoch": 0.87, "grad_norm": 1.1794297448413247, "learning_rate": 8.771771326377543e-07, "loss": 0.3992, "step": 18932 }, { "epoch": 0.87, "grad_norm": 0.39741501997140877, "learning_rate": 8.765678384558607e-07, "loss": 0.279, "step": 18933 }, { "epoch": 0.87, "grad_norm": 0.3437074860414685, "learning_rate": 8.75958746257961e-07, "loss": 0.2574, "step": 18934 }, { "epoch": 0.87, "grad_norm": 0.4171877711553692, "learning_rate": 8.753498560575402e-07, "loss": 0.1403, "step": 18935 }, { "epoch": 0.87, "grad_norm": 0.4226782786099076, "learning_rate": 8.747411678680817e-07, "loss": 0.2618, "step": 18936 }, { "epoch": 0.87, "grad_norm": 0.34152708157740014, "learning_rate": 8.741326817030594e-07, "loss": 0.2492, "step": 18937 }, { "epoch": 0.87, "grad_norm": 0.3534839711812888, "learning_rate": 8.735243975759411e-07, "loss": 0.2496, "step": 18938 }, { "epoch": 0.87, "grad_norm": 1.3868618994597817, "learning_rate": 8.729163155001975e-07, "loss": 0.4175, "step": 18939 }, { "epoch": 0.87, "grad_norm": 0.36058788097455874, "learning_rate": 8.723084354892886e-07, "loss": 0.261, "step": 18940 }, { "epoch": 0.87, "grad_norm": 1.1027058531891762, "learning_rate": 8.71700757556676e-07, "loss": 0.3553, "step": 18941 }, { "epoch": 0.87, "grad_norm": 0.2745882122097378, "learning_rate": 8.710932817158091e-07, "loss": 0.2023, "step": 18942 }, { "epoch": 0.87, "grad_norm": 0.380078601130948, "learning_rate": 8.704860079801381e-07, "loss": 0.257, "step": 18943 }, { "epoch": 0.87, "grad_norm": 0.42378470403585644, "learning_rate": 8.698789363631088e-07, "loss": 0.2786, "step": 18944 }, { "epoch": 0.87, "grad_norm": 0.37952895638988604, "learning_rate": 8.692720668781596e-07, "loss": 0.2299, "step": 18945 }, { "epoch": 0.87, "grad_norm": 0.37010618611198703, "learning_rate": 8.686653995387273e-07, "loss": 0.2559, "step": 18946 }, { "epoch": 0.87, "grad_norm": 0.6935291674141774, "learning_rate": 8.68058934358239e-07, "loss": 0.3777, "step": 18947 }, { "epoch": 0.87, "grad_norm": 0.43240093940848606, "learning_rate": 8.674526713501286e-07, "loss": 0.1495, "step": 18948 }, { "epoch": 0.87, "grad_norm": 0.3479697041473474, "learning_rate": 8.668466105278128e-07, "loss": 0.2694, "step": 18949 }, { "epoch": 0.87, "grad_norm": 0.3601097343161797, "learning_rate": 8.662407519047089e-07, "loss": 0.3297, "step": 18950 }, { "epoch": 0.87, "grad_norm": 0.4555246551112367, "learning_rate": 8.656350954942328e-07, "loss": 0.0826, "step": 18951 }, { "epoch": 0.87, "grad_norm": 0.3215596246859388, "learning_rate": 8.650296413097903e-07, "loss": 0.2241, "step": 18952 }, { "epoch": 0.87, "grad_norm": 0.4762956326888779, "learning_rate": 8.644243893647897e-07, "loss": 0.3393, "step": 18953 }, { "epoch": 0.87, "grad_norm": 0.6275685048213777, "learning_rate": 8.638193396726257e-07, "loss": 0.308, "step": 18954 }, { "epoch": 0.87, "grad_norm": 0.23710470467552497, "learning_rate": 8.63214492246699e-07, "loss": 0.1631, "step": 18955 }, { "epoch": 0.87, "grad_norm": 1.3023343534560967, "learning_rate": 8.626098471003941e-07, "loss": 0.7332, "step": 18956 }, { "epoch": 0.87, "grad_norm": 0.2913590717654896, "learning_rate": 8.620054042471015e-07, "loss": 0.2716, "step": 18957 }, { "epoch": 0.87, "grad_norm": 0.3400575354588975, "learning_rate": 8.614011637002029e-07, "loss": 0.2093, "step": 18958 }, { "epoch": 0.87, "grad_norm": 0.7299331363516635, "learning_rate": 8.607971254730741e-07, "loss": 0.3653, "step": 18959 }, { "epoch": 0.87, "grad_norm": 1.4538820876940137, "learning_rate": 8.601932895790877e-07, "loss": 0.5566, "step": 18960 }, { "epoch": 0.87, "grad_norm": 0.29500953961414933, "learning_rate": 8.595896560316142e-07, "loss": 0.1941, "step": 18961 }, { "epoch": 0.87, "grad_norm": 0.3801070490627266, "learning_rate": 8.58986224844014e-07, "loss": 0.3098, "step": 18962 }, { "epoch": 0.87, "grad_norm": 0.35607095004376244, "learning_rate": 8.583829960296519e-07, "loss": 0.214, "step": 18963 }, { "epoch": 0.87, "grad_norm": 0.3198778201900577, "learning_rate": 8.57779969601874e-07, "loss": 0.188, "step": 18964 }, { "epoch": 0.87, "grad_norm": 0.3814237324776373, "learning_rate": 8.571771455740407e-07, "loss": 0.2729, "step": 18965 }, { "epoch": 0.87, "grad_norm": 1.9794993125079294, "learning_rate": 8.565745239594936e-07, "loss": 0.3982, "step": 18966 }, { "epoch": 0.87, "grad_norm": 0.4327866528871953, "learning_rate": 8.559721047715719e-07, "loss": 0.2624, "step": 18967 }, { "epoch": 0.87, "grad_norm": 0.21737636530942112, "learning_rate": 8.553698880236172e-07, "loss": 0.1504, "step": 18968 }, { "epoch": 0.87, "grad_norm": 0.3866805763574534, "learning_rate": 8.547678737289556e-07, "loss": 0.3136, "step": 18969 }, { "epoch": 0.87, "grad_norm": 0.41532138717673067, "learning_rate": 8.541660619009217e-07, "loss": 0.2742, "step": 18970 }, { "epoch": 0.87, "grad_norm": 0.5814246492432071, "learning_rate": 8.535644525528353e-07, "loss": 0.2381, "step": 18971 }, { "epoch": 0.87, "grad_norm": 1.4576454644419, "learning_rate": 8.529630456980175e-07, "loss": 0.5314, "step": 18972 }, { "epoch": 0.87, "grad_norm": 0.2641624510333769, "learning_rate": 8.523618413497814e-07, "loss": 0.2404, "step": 18973 }, { "epoch": 0.87, "grad_norm": 0.5452765595102245, "learning_rate": 8.517608395214361e-07, "loss": 0.2631, "step": 18974 }, { "epoch": 0.87, "grad_norm": 0.47618578067920797, "learning_rate": 8.51160040226291e-07, "loss": 0.2403, "step": 18975 }, { "epoch": 0.87, "grad_norm": 0.3312721818217621, "learning_rate": 8.505594434776432e-07, "loss": 0.2361, "step": 18976 }, { "epoch": 0.87, "grad_norm": 0.2647445344264366, "learning_rate": 8.49959049288791e-07, "loss": 0.1885, "step": 18977 }, { "epoch": 0.87, "grad_norm": 1.2865961274945428, "learning_rate": 8.493588576730283e-07, "loss": 0.4625, "step": 18978 }, { "epoch": 0.87, "grad_norm": 0.576051959752146, "learning_rate": 8.487588686436387e-07, "loss": 0.3094, "step": 18979 }, { "epoch": 0.87, "grad_norm": 0.43333879142018555, "learning_rate": 8.481590822139108e-07, "loss": 0.2912, "step": 18980 }, { "epoch": 0.87, "grad_norm": 0.36261927183282977, "learning_rate": 8.475594983971148e-07, "loss": 0.2571, "step": 18981 }, { "epoch": 0.87, "grad_norm": 0.6324646657873435, "learning_rate": 8.469601172065356e-07, "loss": 0.301, "step": 18982 }, { "epoch": 0.87, "grad_norm": 0.2462914965218332, "learning_rate": 8.463609386554339e-07, "loss": 0.1967, "step": 18983 }, { "epoch": 0.87, "grad_norm": 2.1128689960052904, "learning_rate": 8.457619627570824e-07, "loss": 0.2888, "step": 18984 }, { "epoch": 0.87, "grad_norm": 0.34622813444542877, "learning_rate": 8.45163189524737e-07, "loss": 0.2902, "step": 18985 }, { "epoch": 0.87, "grad_norm": 0.42748284655691493, "learning_rate": 8.445646189716506e-07, "loss": 0.3169, "step": 18986 }, { "epoch": 0.87, "grad_norm": 0.8097034959160273, "learning_rate": 8.439662511110846e-07, "loss": 0.2681, "step": 18987 }, { "epoch": 0.87, "grad_norm": 0.3341323654020718, "learning_rate": 8.433680859562787e-07, "loss": 0.2052, "step": 18988 }, { "epoch": 0.87, "grad_norm": 0.2644793304128655, "learning_rate": 8.4277012352048e-07, "loss": 0.2358, "step": 18989 }, { "epoch": 0.87, "grad_norm": 1.6192977165995233, "learning_rate": 8.421723638169222e-07, "loss": 0.7347, "step": 18990 }, { "epoch": 0.87, "grad_norm": 0.37274164176763475, "learning_rate": 8.415748068588425e-07, "loss": 0.2117, "step": 18991 }, { "epoch": 0.87, "grad_norm": 0.6525883417708729, "learning_rate": 8.409774526594716e-07, "loss": 0.3236, "step": 18992 }, { "epoch": 0.87, "grad_norm": 0.3815008517674871, "learning_rate": 8.403803012320311e-07, "loss": 0.2876, "step": 18993 }, { "epoch": 0.87, "grad_norm": 0.5238316435682695, "learning_rate": 8.397833525897415e-07, "loss": 0.172, "step": 18994 }, { "epoch": 0.87, "grad_norm": 0.27184076687514025, "learning_rate": 8.391866067458221e-07, "loss": 0.2086, "step": 18995 }, { "epoch": 0.87, "grad_norm": 1.3120938292607551, "learning_rate": 8.385900637134792e-07, "loss": 0.5248, "step": 18996 }, { "epoch": 0.87, "grad_norm": 0.289955944085589, "learning_rate": 8.379937235059254e-07, "loss": 0.199, "step": 18997 }, { "epoch": 0.87, "grad_norm": 0.41110906995783775, "learning_rate": 8.373975861363582e-07, "loss": 0.2882, "step": 18998 }, { "epoch": 0.87, "grad_norm": 0.864466697011512, "learning_rate": 8.368016516179766e-07, "loss": 0.4581, "step": 18999 }, { "epoch": 0.87, "grad_norm": 0.1891065185586739, "learning_rate": 8.36205919963975e-07, "loss": 0.0701, "step": 19000 }, { "epoch": 0.87, "grad_norm": 0.3238247214464417, "learning_rate": 8.356103911875446e-07, "loss": 0.2423, "step": 19001 }, { "epoch": 0.87, "grad_norm": 1.7312028826365304, "learning_rate": 8.350150653018651e-07, "loss": 0.6667, "step": 19002 }, { "epoch": 0.87, "grad_norm": 0.5933598010889024, "learning_rate": 8.344199423201194e-07, "loss": 0.3277, "step": 19003 }, { "epoch": 0.87, "grad_norm": 0.3640038421313204, "learning_rate": 8.338250222554833e-07, "loss": 0.2359, "step": 19004 }, { "epoch": 0.87, "grad_norm": 0.391795445380038, "learning_rate": 8.332303051211244e-07, "loss": 0.2986, "step": 19005 }, { "epoch": 0.87, "grad_norm": 0.9451842798290248, "learning_rate": 8.326357909302141e-07, "loss": 0.2717, "step": 19006 }, { "epoch": 0.87, "grad_norm": 0.24797767512882987, "learning_rate": 8.320414796959097e-07, "loss": 0.1548, "step": 19007 }, { "epoch": 0.87, "grad_norm": 1.5115961773945388, "learning_rate": 8.31447371431372e-07, "loss": 0.6177, "step": 19008 }, { "epoch": 0.87, "grad_norm": 0.3026605604782784, "learning_rate": 8.308534661497525e-07, "loss": 0.2548, "step": 19009 }, { "epoch": 0.87, "grad_norm": 0.35722717921630737, "learning_rate": 8.302597638641996e-07, "loss": 0.2369, "step": 19010 }, { "epoch": 0.87, "grad_norm": 1.0598073565192736, "learning_rate": 8.296662645878573e-07, "loss": 0.4888, "step": 19011 }, { "epoch": 0.87, "grad_norm": 0.6200732566681314, "learning_rate": 8.290729683338649e-07, "loss": 0.297, "step": 19012 }, { "epoch": 0.87, "grad_norm": 0.2581691729174255, "learning_rate": 8.2847987511536e-07, "loss": 0.2309, "step": 19013 }, { "epoch": 0.87, "grad_norm": 0.33419975573685173, "learning_rate": 8.278869849454718e-07, "loss": 0.202, "step": 19014 }, { "epoch": 0.87, "grad_norm": 0.6462974547577311, "learning_rate": 8.272942978373222e-07, "loss": 0.3232, "step": 19015 }, { "epoch": 0.87, "grad_norm": 0.3678025661468835, "learning_rate": 8.267018138040372e-07, "loss": 0.2827, "step": 19016 }, { "epoch": 0.87, "grad_norm": 0.3920925839484375, "learning_rate": 8.261095328587332e-07, "loss": 0.2555, "step": 19017 }, { "epoch": 0.87, "grad_norm": 0.9873372888059545, "learning_rate": 8.255174550145229e-07, "loss": 0.4414, "step": 19018 }, { "epoch": 0.87, "grad_norm": 0.2738688936877376, "learning_rate": 8.249255802845124e-07, "loss": 0.2045, "step": 19019 }, { "epoch": 0.87, "grad_norm": 0.35347059883707593, "learning_rate": 8.243339086818059e-07, "loss": 0.188, "step": 19020 }, { "epoch": 0.87, "grad_norm": 0.4636443973610433, "learning_rate": 8.23742440219506e-07, "loss": 0.2911, "step": 19021 }, { "epoch": 0.87, "grad_norm": 0.37960958301044795, "learning_rate": 8.231511749107013e-07, "loss": 0.3028, "step": 19022 }, { "epoch": 0.87, "grad_norm": 0.9112937605775095, "learning_rate": 8.225601127684867e-07, "loss": 0.3699, "step": 19023 }, { "epoch": 0.87, "grad_norm": 0.5030961517723759, "learning_rate": 8.219692538059454e-07, "loss": 0.2925, "step": 19024 }, { "epoch": 0.87, "grad_norm": 0.30622602018718664, "learning_rate": 8.213785980361577e-07, "loss": 0.2637, "step": 19025 }, { "epoch": 0.87, "grad_norm": 0.2952368102824688, "learning_rate": 8.207881454722033e-07, "loss": 0.1363, "step": 19026 }, { "epoch": 0.87, "grad_norm": 0.6050218237378773, "learning_rate": 8.201978961271506e-07, "loss": 0.2908, "step": 19027 }, { "epoch": 0.87, "grad_norm": 0.416794538914146, "learning_rate": 8.196078500140703e-07, "loss": 0.3089, "step": 19028 }, { "epoch": 0.87, "grad_norm": 0.3752686152699415, "learning_rate": 8.190180071460218e-07, "loss": 0.2931, "step": 19029 }, { "epoch": 0.87, "grad_norm": 0.2712976163975354, "learning_rate": 8.184283675360683e-07, "loss": 0.0679, "step": 19030 }, { "epoch": 0.87, "grad_norm": 0.43859590879094684, "learning_rate": 8.178389311972612e-07, "loss": 0.2897, "step": 19031 }, { "epoch": 0.87, "grad_norm": 0.40590827281764735, "learning_rate": 8.172496981426492e-07, "loss": 0.268, "step": 19032 }, { "epoch": 0.87, "grad_norm": 0.2833302209088497, "learning_rate": 8.166606683852784e-07, "loss": 0.1813, "step": 19033 }, { "epoch": 0.87, "grad_norm": 0.3514643407549719, "learning_rate": 8.160718419381886e-07, "loss": 0.2558, "step": 19034 }, { "epoch": 0.87, "grad_norm": 1.4597336347787988, "learning_rate": 8.154832188144191e-07, "loss": 0.54, "step": 19035 }, { "epoch": 0.87, "grad_norm": 0.3957739446574237, "learning_rate": 8.148947990269973e-07, "loss": 0.2269, "step": 19036 }, { "epoch": 0.87, "grad_norm": 0.3325904970818053, "learning_rate": 8.143065825889518e-07, "loss": 0.2321, "step": 19037 }, { "epoch": 0.87, "grad_norm": 0.6391397999000846, "learning_rate": 8.137185695133076e-07, "loss": 0.3442, "step": 19038 }, { "epoch": 0.87, "grad_norm": 0.25491727382312285, "learning_rate": 8.131307598130778e-07, "loss": 0.1201, "step": 19039 }, { "epoch": 0.87, "grad_norm": 0.3556588612588018, "learning_rate": 8.125431535012807e-07, "loss": 0.2827, "step": 19040 }, { "epoch": 0.87, "grad_norm": 0.28030210751157425, "learning_rate": 8.119557505909214e-07, "loss": 0.2513, "step": 19041 }, { "epoch": 0.87, "grad_norm": 1.2190992801998461, "learning_rate": 8.113685510950054e-07, "loss": 0.5076, "step": 19042 }, { "epoch": 0.87, "grad_norm": 0.3416880376232932, "learning_rate": 8.107815550265363e-07, "loss": 0.2082, "step": 19043 }, { "epoch": 0.87, "grad_norm": 0.47439585190525857, "learning_rate": 8.101947623985051e-07, "loss": 0.3297, "step": 19044 }, { "epoch": 0.87, "grad_norm": 0.5334662355908194, "learning_rate": 8.096081732239058e-07, "loss": 0.3864, "step": 19045 }, { "epoch": 0.87, "grad_norm": 0.300371633079931, "learning_rate": 8.090217875157203e-07, "loss": 0.1852, "step": 19046 }, { "epoch": 0.88, "grad_norm": 0.2689298491021298, "learning_rate": 8.08435605286938e-07, "loss": 0.1825, "step": 19047 }, { "epoch": 0.88, "grad_norm": 0.37326502490629226, "learning_rate": 8.078496265505309e-07, "loss": 0.2746, "step": 19048 }, { "epoch": 0.88, "grad_norm": 0.35925652137268366, "learning_rate": 8.072638513194752e-07, "loss": 0.2097, "step": 19049 }, { "epoch": 0.88, "grad_norm": 0.6736882157557524, "learning_rate": 8.066782796067351e-07, "loss": 0.3525, "step": 19050 }, { "epoch": 0.88, "grad_norm": 0.9366757556141992, "learning_rate": 8.06092911425278e-07, "loss": 0.4013, "step": 19051 }, { "epoch": 0.88, "grad_norm": 0.33630107910461077, "learning_rate": 8.055077467880645e-07, "loss": 0.2016, "step": 19052 }, { "epoch": 0.88, "grad_norm": 0.25572830619763703, "learning_rate": 8.049227857080455e-07, "loss": 0.2175, "step": 19053 }, { "epoch": 0.88, "grad_norm": 0.769528195731873, "learning_rate": 8.043380281981739e-07, "loss": 0.357, "step": 19054 }, { "epoch": 0.88, "grad_norm": 0.3449461984693298, "learning_rate": 8.03753474271397e-07, "loss": 0.271, "step": 19055 }, { "epoch": 0.88, "grad_norm": 0.3512712186541674, "learning_rate": 8.031691239406536e-07, "loss": 0.2402, "step": 19056 }, { "epoch": 0.88, "grad_norm": 1.3853436884094832, "learning_rate": 8.025849772188831e-07, "loss": 0.5689, "step": 19057 }, { "epoch": 0.88, "grad_norm": 0.35783712998611034, "learning_rate": 8.020010341190154e-07, "loss": 0.2815, "step": 19058 }, { "epoch": 0.88, "grad_norm": 0.2390374312111081, "learning_rate": 8.014172946539789e-07, "loss": 0.0908, "step": 19059 }, { "epoch": 0.88, "grad_norm": 0.38651309132575284, "learning_rate": 8.008337588366999e-07, "loss": 0.288, "step": 19060 }, { "epoch": 0.88, "grad_norm": 0.3503191353526383, "learning_rate": 8.002504266800937e-07, "loss": 0.2696, "step": 19061 }, { "epoch": 0.88, "grad_norm": 0.9987790710210278, "learning_rate": 7.996672981970777e-07, "loss": 0.2243, "step": 19062 }, { "epoch": 0.88, "grad_norm": 1.1867048413839014, "learning_rate": 7.99084373400556e-07, "loss": 0.7392, "step": 19063 }, { "epoch": 0.88, "grad_norm": 0.3513326562858509, "learning_rate": 7.985016523034428e-07, "loss": 0.2618, "step": 19064 }, { "epoch": 0.88, "grad_norm": 0.2200918569227306, "learning_rate": 7.979191349186322e-07, "loss": 0.1663, "step": 19065 }, { "epoch": 0.88, "grad_norm": 0.6783453206394917, "learning_rate": 7.973368212590249e-07, "loss": 0.3526, "step": 19066 }, { "epoch": 0.88, "grad_norm": 0.4298862751791666, "learning_rate": 7.967547113375096e-07, "loss": 0.2798, "step": 19067 }, { "epoch": 0.88, "grad_norm": 0.4282416692434011, "learning_rate": 7.961728051669737e-07, "loss": 0.3158, "step": 19068 }, { "epoch": 0.88, "grad_norm": 0.424678224180368, "learning_rate": 7.955911027603036e-07, "loss": 0.2643, "step": 19069 }, { "epoch": 0.88, "grad_norm": 0.3515285539289351, "learning_rate": 7.950096041303734e-07, "loss": 0.2379, "step": 19070 }, { "epoch": 0.88, "grad_norm": 0.7662790755210265, "learning_rate": 7.944283092900584e-07, "loss": 0.4195, "step": 19071 }, { "epoch": 0.88, "grad_norm": 0.3901816534392451, "learning_rate": 7.938472182522305e-07, "loss": 0.2518, "step": 19072 }, { "epoch": 0.88, "grad_norm": 0.2593516000315393, "learning_rate": 7.932663310297495e-07, "loss": 0.2008, "step": 19073 }, { "epoch": 0.88, "grad_norm": 0.6722491973801278, "learning_rate": 7.926856476354805e-07, "loss": 0.2264, "step": 19074 }, { "epoch": 0.88, "grad_norm": 1.1161784194660844, "learning_rate": 7.921051680822756e-07, "loss": 0.5161, "step": 19075 }, { "epoch": 0.88, "grad_norm": 0.30326251383523245, "learning_rate": 7.915248923829877e-07, "loss": 0.2346, "step": 19076 }, { "epoch": 0.88, "grad_norm": 0.49811330525421477, "learning_rate": 7.909448205504633e-07, "loss": 0.3234, "step": 19077 }, { "epoch": 0.88, "grad_norm": 0.3953999072122668, "learning_rate": 7.903649525975465e-07, "loss": 0.2044, "step": 19078 }, { "epoch": 0.88, "grad_norm": 0.262267232759829, "learning_rate": 7.897852885370727e-07, "loss": 0.2048, "step": 19079 }, { "epoch": 0.88, "grad_norm": 0.6713813033419213, "learning_rate": 7.892058283818727e-07, "loss": 0.3087, "step": 19080 }, { "epoch": 0.88, "grad_norm": 0.5008968674257203, "learning_rate": 7.886265721447816e-07, "loss": 0.3197, "step": 19081 }, { "epoch": 0.88, "grad_norm": 0.30133102887793933, "learning_rate": 7.880475198386195e-07, "loss": 0.1906, "step": 19082 }, { "epoch": 0.88, "grad_norm": 0.6961966223697794, "learning_rate": 7.874686714762069e-07, "loss": 0.3919, "step": 19083 }, { "epoch": 0.88, "grad_norm": 0.40254700995907683, "learning_rate": 7.868900270703572e-07, "loss": 0.2996, "step": 19084 }, { "epoch": 0.88, "grad_norm": 0.26200285918769367, "learning_rate": 7.863115866338833e-07, "loss": 0.1305, "step": 19085 }, { "epoch": 0.88, "grad_norm": 0.3750703519470904, "learning_rate": 7.857333501795927e-07, "loss": 0.2404, "step": 19086 }, { "epoch": 0.88, "grad_norm": 0.46395877063394975, "learning_rate": 7.85155317720282e-07, "loss": 0.3159, "step": 19087 }, { "epoch": 0.88, "grad_norm": 0.30236062210862735, "learning_rate": 7.84577489268753e-07, "loss": 0.2014, "step": 19088 }, { "epoch": 0.88, "grad_norm": 0.5421736601551634, "learning_rate": 7.839998648377956e-07, "loss": 0.3032, "step": 19089 }, { "epoch": 0.88, "grad_norm": 1.6894963417842135, "learning_rate": 7.834224444401983e-07, "loss": 0.6027, "step": 19090 }, { "epoch": 0.88, "grad_norm": 0.2763263006832676, "learning_rate": 7.828452280887466e-07, "loss": 0.1637, "step": 19091 }, { "epoch": 0.88, "grad_norm": 0.292350665131583, "learning_rate": 7.822682157962159e-07, "loss": 0.2323, "step": 19092 }, { "epoch": 0.88, "grad_norm": 1.373045548217957, "learning_rate": 7.816914075753834e-07, "loss": 0.8395, "step": 19093 }, { "epoch": 0.88, "grad_norm": 0.4083229408056449, "learning_rate": 7.811148034390182e-07, "loss": 0.2512, "step": 19094 }, { "epoch": 0.88, "grad_norm": 0.5200554628936158, "learning_rate": 7.805384033998875e-07, "loss": 0.2578, "step": 19095 }, { "epoch": 0.88, "grad_norm": 0.38494575216860555, "learning_rate": 7.799622074707513e-07, "loss": 0.2737, "step": 19096 }, { "epoch": 0.88, "grad_norm": 0.32550826928639337, "learning_rate": 7.793862156643617e-07, "loss": 0.2042, "step": 19097 }, { "epoch": 0.88, "grad_norm": 0.29811316451366265, "learning_rate": 7.788104279934772e-07, "loss": 0.1424, "step": 19098 }, { "epoch": 0.88, "grad_norm": 0.4730283661181188, "learning_rate": 7.782348444708409e-07, "loss": 0.3404, "step": 19099 }, { "epoch": 0.88, "grad_norm": 0.3330903895874114, "learning_rate": 7.776594651091995e-07, "loss": 0.2736, "step": 19100 }, { "epoch": 0.88, "grad_norm": 0.491622494954163, "learning_rate": 7.77084289921286e-07, "loss": 0.256, "step": 19101 }, { "epoch": 0.88, "grad_norm": 1.4193965905033123, "learning_rate": 7.765093189198381e-07, "loss": 0.5064, "step": 19102 }, { "epoch": 0.88, "grad_norm": 0.31727248369951444, "learning_rate": 7.759345521175854e-07, "loss": 0.1464, "step": 19103 }, { "epoch": 0.88, "grad_norm": 0.24008301562319506, "learning_rate": 7.753599895272501e-07, "loss": 0.2053, "step": 19104 }, { "epoch": 0.88, "grad_norm": 0.6683857943026734, "learning_rate": 7.747856311615554e-07, "loss": 0.4289, "step": 19105 }, { "epoch": 0.88, "grad_norm": 0.595364616127965, "learning_rate": 7.742114770332132e-07, "loss": 0.3227, "step": 19106 }, { "epoch": 0.88, "grad_norm": 0.43414959253844804, "learning_rate": 7.736375271549379e-07, "loss": 0.31, "step": 19107 }, { "epoch": 0.88, "grad_norm": 0.40293418441699713, "learning_rate": 7.73063781539437e-07, "loss": 0.234, "step": 19108 }, { "epoch": 0.88, "grad_norm": 0.656618466884473, "learning_rate": 7.724902401994084e-07, "loss": 0.3287, "step": 19109 }, { "epoch": 0.88, "grad_norm": 0.34254417746340204, "learning_rate": 7.719169031475526e-07, "loss": 0.2046, "step": 19110 }, { "epoch": 0.88, "grad_norm": 0.47301847023541294, "learning_rate": 7.713437703965621e-07, "loss": 0.2615, "step": 19111 }, { "epoch": 0.88, "grad_norm": 0.3570579264271897, "learning_rate": 7.707708419591286e-07, "loss": 0.2587, "step": 19112 }, { "epoch": 0.88, "grad_norm": 0.5373763369602805, "learning_rate": 7.701981178479312e-07, "loss": 0.34, "step": 19113 }, { "epoch": 0.88, "grad_norm": 1.9674568654069153, "learning_rate": 7.696255980756506e-07, "loss": 0.2194, "step": 19114 }, { "epoch": 0.88, "grad_norm": 0.32195481722936126, "learning_rate": 7.690532826549657e-07, "loss": 0.2219, "step": 19115 }, { "epoch": 0.88, "grad_norm": 0.32845690795284216, "learning_rate": 7.684811715985429e-07, "loss": 0.2865, "step": 19116 }, { "epoch": 0.88, "grad_norm": 0.506812272437964, "learning_rate": 7.6790926491905e-07, "loss": 0.258, "step": 19117 }, { "epoch": 0.88, "grad_norm": 0.35994821138178995, "learning_rate": 7.673375626291468e-07, "loss": 0.2171, "step": 19118 }, { "epoch": 0.88, "grad_norm": 0.5741611775301507, "learning_rate": 7.66766064741492e-07, "loss": 0.3448, "step": 19119 }, { "epoch": 0.88, "grad_norm": 0.40086705752628304, "learning_rate": 7.661947712687389e-07, "loss": 0.2922, "step": 19120 }, { "epoch": 0.88, "grad_norm": 1.117984088311144, "learning_rate": 7.656236822235318e-07, "loss": 0.1306, "step": 19121 }, { "epoch": 0.88, "grad_norm": 0.29075011889927244, "learning_rate": 7.650527976185174e-07, "loss": 0.219, "step": 19122 }, { "epoch": 0.88, "grad_norm": 0.49223978356667186, "learning_rate": 7.644821174663308e-07, "loss": 0.3372, "step": 19123 }, { "epoch": 0.88, "grad_norm": 0.3517379154038919, "learning_rate": 7.639116417796122e-07, "loss": 0.1986, "step": 19124 }, { "epoch": 0.88, "grad_norm": 0.30887089658244643, "learning_rate": 7.63341370570988e-07, "loss": 0.224, "step": 19125 }, { "epoch": 0.88, "grad_norm": 1.341481118710694, "learning_rate": 7.627713038530815e-07, "loss": 0.4765, "step": 19126 }, { "epoch": 0.88, "grad_norm": 0.4436689636064535, "learning_rate": 7.622014416385148e-07, "loss": 0.1964, "step": 19127 }, { "epoch": 0.88, "grad_norm": 0.26976445472079835, "learning_rate": 7.616317839399057e-07, "loss": 0.2256, "step": 19128 }, { "epoch": 0.88, "grad_norm": 0.7121597900792968, "learning_rate": 7.610623307698662e-07, "loss": 0.3696, "step": 19129 }, { "epoch": 0.88, "grad_norm": 0.7945609456718764, "learning_rate": 7.604930821409995e-07, "loss": 0.4382, "step": 19130 }, { "epoch": 0.88, "grad_norm": 0.3089730277815874, "learning_rate": 7.599240380659123e-07, "loss": 0.164, "step": 19131 }, { "epoch": 0.88, "grad_norm": 0.3758814774626434, "learning_rate": 7.593551985572023e-07, "loss": 0.2914, "step": 19132 }, { "epoch": 0.88, "grad_norm": 0.6269428750560389, "learning_rate": 7.587865636274594e-07, "loss": 0.2706, "step": 19133 }, { "epoch": 0.88, "grad_norm": 0.38462883468165826, "learning_rate": 7.58218133289278e-07, "loss": 0.232, "step": 19134 }, { "epoch": 0.88, "grad_norm": 0.46555698105700166, "learning_rate": 7.57649907555238e-07, "loss": 0.3294, "step": 19135 }, { "epoch": 0.88, "grad_norm": 0.39517994597928713, "learning_rate": 7.570818864379203e-07, "loss": 0.2622, "step": 19136 }, { "epoch": 0.88, "grad_norm": 0.3052914867016372, "learning_rate": 7.56514069949904e-07, "loss": 0.1806, "step": 19137 }, { "epoch": 0.88, "grad_norm": 0.6525958107606444, "learning_rate": 7.559464581037546e-07, "loss": 0.2122, "step": 19138 }, { "epoch": 0.88, "grad_norm": 0.39448062940169276, "learning_rate": 7.553790509120429e-07, "loss": 0.2866, "step": 19139 }, { "epoch": 0.88, "grad_norm": 0.31692685612009375, "learning_rate": 7.548118483873257e-07, "loss": 0.2167, "step": 19140 }, { "epoch": 0.88, "grad_norm": 0.8733360849689117, "learning_rate": 7.542448505421673e-07, "loss": 0.5067, "step": 19141 }, { "epoch": 0.88, "grad_norm": 1.5006315621657527, "learning_rate": 7.536780573891144e-07, "loss": 0.5767, "step": 19142 }, { "epoch": 0.88, "grad_norm": 0.26424488135252666, "learning_rate": 7.531114689407204e-07, "loss": 0.2308, "step": 19143 }, { "epoch": 0.88, "grad_norm": 0.2653388494462439, "learning_rate": 7.525450852095229e-07, "loss": 0.1864, "step": 19144 }, { "epoch": 0.88, "grad_norm": 0.7011935675902602, "learning_rate": 7.519789062080662e-07, "loss": 0.2847, "step": 19145 }, { "epoch": 0.88, "grad_norm": 0.37042884560913886, "learning_rate": 7.514129319488839e-07, "loss": 0.2707, "step": 19146 }, { "epoch": 0.88, "grad_norm": 0.7069425804854719, "learning_rate": 7.508471624445035e-07, "loss": 0.2572, "step": 19147 }, { "epoch": 0.88, "grad_norm": 0.43648140368588034, "learning_rate": 7.50281597707454e-07, "loss": 0.3182, "step": 19148 }, { "epoch": 0.88, "grad_norm": 0.2865351862503098, "learning_rate": 7.497162377502543e-07, "loss": 0.2301, "step": 19149 }, { "epoch": 0.88, "grad_norm": 0.49340578249555367, "learning_rate": 7.491510825854198e-07, "loss": 0.1372, "step": 19150 }, { "epoch": 0.88, "grad_norm": 0.3367042040755629, "learning_rate": 7.485861322254673e-07, "loss": 0.2575, "step": 19151 }, { "epoch": 0.88, "grad_norm": 0.38330447591012007, "learning_rate": 7.480213866828989e-07, "loss": 0.2554, "step": 19152 }, { "epoch": 0.88, "grad_norm": 1.0667690114892183, "learning_rate": 7.474568459702203e-07, "loss": 0.2613, "step": 19153 }, { "epoch": 0.88, "grad_norm": 0.5697743074330679, "learning_rate": 7.468925100999314e-07, "loss": 0.3186, "step": 19154 }, { "epoch": 0.88, "grad_norm": 0.4389947799843306, "learning_rate": 7.463283790845221e-07, "loss": 0.3053, "step": 19155 }, { "epoch": 0.88, "grad_norm": 0.237365384974626, "learning_rate": 7.45764452936485e-07, "loss": 0.2099, "step": 19156 }, { "epoch": 0.88, "grad_norm": 0.7238410983707284, "learning_rate": 7.452007316683007e-07, "loss": 0.2882, "step": 19157 }, { "epoch": 0.88, "grad_norm": 0.3427214504908176, "learning_rate": 7.446372152924552e-07, "loss": 0.2562, "step": 19158 }, { "epoch": 0.88, "grad_norm": 0.4173291447341031, "learning_rate": 7.440739038214195e-07, "loss": 0.2863, "step": 19159 }, { "epoch": 0.88, "grad_norm": 0.9628351843439291, "learning_rate": 7.435107972676691e-07, "loss": 0.5201, "step": 19160 }, { "epoch": 0.88, "grad_norm": 0.33805056365634545, "learning_rate": 7.429478956436653e-07, "loss": 0.2429, "step": 19161 }, { "epoch": 0.88, "grad_norm": 0.4819980918193347, "learning_rate": 7.423851989618735e-07, "loss": 0.193, "step": 19162 }, { "epoch": 0.88, "grad_norm": 0.39664099158765115, "learning_rate": 7.418227072347528e-07, "loss": 0.2473, "step": 19163 }, { "epoch": 0.88, "grad_norm": 0.387638212156763, "learning_rate": 7.412604204747531e-07, "loss": 0.2567, "step": 19164 }, { "epoch": 0.88, "grad_norm": 1.651374651824193, "learning_rate": 7.406983386943245e-07, "loss": 0.6351, "step": 19165 }, { "epoch": 0.88, "grad_norm": 0.5144010401186656, "learning_rate": 7.401364619059093e-07, "loss": 0.2868, "step": 19166 }, { "epoch": 0.88, "grad_norm": 0.2696545386256489, "learning_rate": 7.395747901219474e-07, "loss": 0.2539, "step": 19167 }, { "epoch": 0.88, "grad_norm": 0.9578600893319451, "learning_rate": 7.390133233548768e-07, "loss": 0.4334, "step": 19168 }, { "epoch": 0.88, "grad_norm": 0.35420670559684964, "learning_rate": 7.384520616171232e-07, "loss": 0.2174, "step": 19169 }, { "epoch": 0.88, "grad_norm": 0.31358738871384395, "learning_rate": 7.378910049211152e-07, "loss": 0.1814, "step": 19170 }, { "epoch": 0.88, "grad_norm": 0.39855158148708747, "learning_rate": 7.373301532792754e-07, "loss": 0.2944, "step": 19171 }, { "epoch": 0.88, "grad_norm": 1.2421401723655467, "learning_rate": 7.367695067040159e-07, "loss": 0.8148, "step": 19172 }, { "epoch": 0.88, "grad_norm": 0.34370070717259343, "learning_rate": 7.362090652077536e-07, "loss": 0.1875, "step": 19173 }, { "epoch": 0.88, "grad_norm": 0.9950254136008255, "learning_rate": 7.356488288028907e-07, "loss": 0.3521, "step": 19174 }, { "epoch": 0.88, "grad_norm": 0.32937495265021244, "learning_rate": 7.350887975018362e-07, "loss": 0.2405, "step": 19175 }, { "epoch": 0.88, "grad_norm": 0.23085615553001654, "learning_rate": 7.345289713169856e-07, "loss": 0.1515, "step": 19176 }, { "epoch": 0.88, "grad_norm": 1.4666311198421587, "learning_rate": 7.339693502607337e-07, "loss": 0.6272, "step": 19177 }, { "epoch": 0.88, "grad_norm": 1.4056212885044614, "learning_rate": 7.334099343454692e-07, "loss": 0.6426, "step": 19178 }, { "epoch": 0.88, "grad_norm": 0.27892941341359917, "learning_rate": 7.328507235835769e-07, "loss": 0.203, "step": 19179 }, { "epoch": 0.88, "grad_norm": 0.46047776713148203, "learning_rate": 7.322917179874401e-07, "loss": 0.3211, "step": 19180 }, { "epoch": 0.88, "grad_norm": 0.2745424877795719, "learning_rate": 7.317329175694299e-07, "loss": 0.1669, "step": 19181 }, { "epoch": 0.88, "grad_norm": 0.36522066395350244, "learning_rate": 7.311743223419221e-07, "loss": 0.2566, "step": 19182 }, { "epoch": 0.88, "grad_norm": 0.36905851855195365, "learning_rate": 7.306159323172801e-07, "loss": 0.2219, "step": 19183 }, { "epoch": 0.88, "grad_norm": 1.0051614637708495, "learning_rate": 7.300577475078663e-07, "loss": 0.4976, "step": 19184 }, { "epoch": 0.88, "grad_norm": 0.3662537147358378, "learning_rate": 7.294997679260418e-07, "loss": 0.2646, "step": 19185 }, { "epoch": 0.88, "grad_norm": 0.8873711417767954, "learning_rate": 7.289419935841557e-07, "loss": 0.278, "step": 19186 }, { "epoch": 0.88, "grad_norm": 0.2774295308209739, "learning_rate": 7.283844244945581e-07, "loss": 0.241, "step": 19187 }, { "epoch": 0.88, "grad_norm": 0.44803017485754965, "learning_rate": 7.278270606695937e-07, "loss": 0.2606, "step": 19188 }, { "epoch": 0.88, "grad_norm": 0.32887153838662353, "learning_rate": 7.272699021216034e-07, "loss": 0.1788, "step": 19189 }, { "epoch": 0.88, "grad_norm": 0.4550450680187026, "learning_rate": 7.267129488629199e-07, "loss": 0.2955, "step": 19190 }, { "epoch": 0.88, "grad_norm": 0.4001647215177, "learning_rate": 7.261562009058709e-07, "loss": 0.2653, "step": 19191 }, { "epoch": 0.88, "grad_norm": 0.39976034650706493, "learning_rate": 7.255996582627878e-07, "loss": 0.2528, "step": 19192 }, { "epoch": 0.88, "grad_norm": 0.41842127896002995, "learning_rate": 7.250433209459895e-07, "loss": 0.16, "step": 19193 }, { "epoch": 0.88, "grad_norm": 0.607069803871317, "learning_rate": 7.244871889677929e-07, "loss": 0.2473, "step": 19194 }, { "epoch": 0.88, "grad_norm": 0.31951253536005747, "learning_rate": 7.239312623405092e-07, "loss": 0.2624, "step": 19195 }, { "epoch": 0.88, "grad_norm": 0.767804889107416, "learning_rate": 7.233755410764465e-07, "loss": 0.3147, "step": 19196 }, { "epoch": 0.88, "grad_norm": 0.3526254105148667, "learning_rate": 7.228200251879102e-07, "loss": 0.2664, "step": 19197 }, { "epoch": 0.88, "grad_norm": 0.8594456190346461, "learning_rate": 7.222647146871952e-07, "loss": 0.4196, "step": 19198 }, { "epoch": 0.88, "grad_norm": 0.38647757877160077, "learning_rate": 7.217096095865995e-07, "loss": 0.2292, "step": 19199 }, { "epoch": 0.88, "grad_norm": 0.2576814779809863, "learning_rate": 7.211547098984084e-07, "loss": 0.203, "step": 19200 }, { "epoch": 0.88, "grad_norm": 0.48300131056923046, "learning_rate": 7.206000156349103e-07, "loss": 0.2502, "step": 19201 }, { "epoch": 0.88, "grad_norm": 0.42232358814305526, "learning_rate": 7.20045526808384e-07, "loss": 0.267, "step": 19202 }, { "epoch": 0.88, "grad_norm": 0.2673786884096133, "learning_rate": 7.194912434311052e-07, "loss": 0.2327, "step": 19203 }, { "epoch": 0.88, "grad_norm": 1.4068913733915827, "learning_rate": 7.189371655153455e-07, "loss": 0.6702, "step": 19204 }, { "epoch": 0.88, "grad_norm": 0.5383284123558172, "learning_rate": 7.183832930733714e-07, "loss": 0.0973, "step": 19205 }, { "epoch": 0.88, "grad_norm": 0.3076218016685364, "learning_rate": 7.178296261174467e-07, "loss": 0.1579, "step": 19206 }, { "epoch": 0.88, "grad_norm": 0.27879660090390457, "learning_rate": 7.17276164659827e-07, "loss": 0.2565, "step": 19207 }, { "epoch": 0.88, "grad_norm": 0.6366163414383302, "learning_rate": 7.167229087127669e-07, "loss": 0.366, "step": 19208 }, { "epoch": 0.88, "grad_norm": 0.35744465355295635, "learning_rate": 7.161698582885135e-07, "loss": 0.1758, "step": 19209 }, { "epoch": 0.88, "grad_norm": 0.5852466236818958, "learning_rate": 7.156170133993112e-07, "loss": 0.3306, "step": 19210 }, { "epoch": 0.88, "grad_norm": 0.3895572733872164, "learning_rate": 7.150643740574015e-07, "loss": 0.3003, "step": 19211 }, { "epoch": 0.88, "grad_norm": 0.27339503955572314, "learning_rate": 7.145119402750167e-07, "loss": 0.1307, "step": 19212 }, { "epoch": 0.88, "grad_norm": 0.38054505213932543, "learning_rate": 7.13959712064387e-07, "loss": 0.2498, "step": 19213 }, { "epoch": 0.88, "grad_norm": 0.7886480691958353, "learning_rate": 7.134076894377407e-07, "loss": 0.4153, "step": 19214 }, { "epoch": 0.88, "grad_norm": 0.2597452948511349, "learning_rate": 7.128558724072976e-07, "loss": 0.2119, "step": 19215 }, { "epoch": 0.88, "grad_norm": 0.7401159415146347, "learning_rate": 7.123042609852748e-07, "loss": 0.3857, "step": 19216 }, { "epoch": 0.88, "grad_norm": 1.2879497620174907, "learning_rate": 7.117528551838804e-07, "loss": 0.4913, "step": 19217 }, { "epoch": 0.88, "grad_norm": 0.22557738847132777, "learning_rate": 7.1120165501533e-07, "loss": 0.1511, "step": 19218 }, { "epoch": 0.88, "grad_norm": 0.37139860122979707, "learning_rate": 7.106506604918217e-07, "loss": 0.2852, "step": 19219 }, { "epoch": 0.88, "grad_norm": 0.6687166125061019, "learning_rate": 7.100998716255536e-07, "loss": 0.3651, "step": 19220 }, { "epoch": 0.88, "grad_norm": 0.4159865348403842, "learning_rate": 7.095492884287192e-07, "loss": 0.2821, "step": 19221 }, { "epoch": 0.88, "grad_norm": 0.2865968756505844, "learning_rate": 7.089989109135109e-07, "loss": 0.1349, "step": 19222 }, { "epoch": 0.88, "grad_norm": 0.4241610673735297, "learning_rate": 7.084487390921125e-07, "loss": 0.2796, "step": 19223 }, { "epoch": 0.88, "grad_norm": 0.5922881756689353, "learning_rate": 7.078987729767028e-07, "loss": 0.239, "step": 19224 }, { "epoch": 0.88, "grad_norm": 0.40091204688977466, "learning_rate": 7.073490125794591e-07, "loss": 0.2471, "step": 19225 }, { "epoch": 0.88, "grad_norm": 0.30835857867203303, "learning_rate": 7.067994579125515e-07, "loss": 0.2817, "step": 19226 }, { "epoch": 0.88, "grad_norm": 0.8448069816073958, "learning_rate": 7.062501089881458e-07, "loss": 0.4297, "step": 19227 }, { "epoch": 0.88, "grad_norm": 0.2823904993721932, "learning_rate": 7.057009658184078e-07, "loss": 0.1656, "step": 19228 }, { "epoch": 0.88, "grad_norm": 1.7786339393312804, "learning_rate": 7.051520284154911e-07, "loss": 0.5132, "step": 19229 }, { "epoch": 0.88, "grad_norm": 0.5486923571018723, "learning_rate": 7.046032967915484e-07, "loss": 0.2807, "step": 19230 }, { "epoch": 0.88, "grad_norm": 0.27276447200254633, "learning_rate": 7.040547709587331e-07, "loss": 0.269, "step": 19231 }, { "epoch": 0.88, "grad_norm": 1.0638934942008298, "learning_rate": 7.035064509291833e-07, "loss": 0.3118, "step": 19232 }, { "epoch": 0.88, "grad_norm": 0.3076638423286795, "learning_rate": 7.029583367150416e-07, "loss": 0.1933, "step": 19233 }, { "epoch": 0.88, "grad_norm": 0.34028037205160244, "learning_rate": 7.024104283284394e-07, "loss": 0.228, "step": 19234 }, { "epoch": 0.88, "grad_norm": 0.3435567762262159, "learning_rate": 7.018627257815113e-07, "loss": 0.2148, "step": 19235 }, { "epoch": 0.88, "grad_norm": 0.5297540285894589, "learning_rate": 7.0131522908638e-07, "loss": 0.3039, "step": 19236 }, { "epoch": 0.88, "grad_norm": 0.4210898997043807, "learning_rate": 7.007679382551691e-07, "loss": 0.2801, "step": 19237 }, { "epoch": 0.88, "grad_norm": 0.34473605633110976, "learning_rate": 7.002208532999933e-07, "loss": 0.2479, "step": 19238 }, { "epoch": 0.88, "grad_norm": 0.41438206898807484, "learning_rate": 6.996739742329606e-07, "loss": 0.2883, "step": 19239 }, { "epoch": 0.88, "grad_norm": 0.38184646594687427, "learning_rate": 6.99127301066187e-07, "loss": 0.2342, "step": 19240 }, { "epoch": 0.88, "grad_norm": 0.4988983488168713, "learning_rate": 6.985808338117673e-07, "loss": 0.1964, "step": 19241 }, { "epoch": 0.88, "grad_norm": 0.4503269553335103, "learning_rate": 6.980345724818061e-07, "loss": 0.3175, "step": 19242 }, { "epoch": 0.88, "grad_norm": 0.3250307658892617, "learning_rate": 6.974885170883916e-07, "loss": 0.2619, "step": 19243 }, { "epoch": 0.88, "grad_norm": 1.3439332542989475, "learning_rate": 6.969426676436164e-07, "loss": 0.676, "step": 19244 }, { "epoch": 0.88, "grad_norm": 0.33792103847466226, "learning_rate": 6.963970241595653e-07, "loss": 0.1004, "step": 19245 }, { "epoch": 0.88, "grad_norm": 0.2974932484397419, "learning_rate": 6.958515866483151e-07, "loss": 0.203, "step": 19246 }, { "epoch": 0.88, "grad_norm": 0.38326174916754424, "learning_rate": 6.95306355121943e-07, "loss": 0.2854, "step": 19247 }, { "epoch": 0.88, "grad_norm": 0.5652467202564389, "learning_rate": 6.947613295925226e-07, "loss": 0.2141, "step": 19248 }, { "epoch": 0.88, "grad_norm": 0.518119486740566, "learning_rate": 6.942165100721165e-07, "loss": 0.2971, "step": 19249 }, { "epoch": 0.88, "grad_norm": 1.4755982385474888, "learning_rate": 6.936718965727884e-07, "loss": 0.3463, "step": 19250 }, { "epoch": 0.88, "grad_norm": 0.3470077405772606, "learning_rate": 6.931274891065931e-07, "loss": 0.1976, "step": 19251 }, { "epoch": 0.88, "grad_norm": 0.2562906156583866, "learning_rate": 6.925832876855876e-07, "loss": 0.1906, "step": 19252 }, { "epoch": 0.88, "grad_norm": 0.7807209972048277, "learning_rate": 6.920392923218156e-07, "loss": 0.3955, "step": 19253 }, { "epoch": 0.88, "grad_norm": 0.3278206509188804, "learning_rate": 6.914955030273251e-07, "loss": 0.2133, "step": 19254 }, { "epoch": 0.88, "grad_norm": 0.4536696196560434, "learning_rate": 6.909519198141512e-07, "loss": 0.2941, "step": 19255 }, { "epoch": 0.88, "grad_norm": 1.1606015884512435, "learning_rate": 6.904085426943275e-07, "loss": 0.544, "step": 19256 }, { "epoch": 0.88, "grad_norm": 0.5297817702240754, "learning_rate": 6.898653716798887e-07, "loss": 0.2283, "step": 19257 }, { "epoch": 0.88, "grad_norm": 0.353494199275767, "learning_rate": 6.893224067828552e-07, "loss": 0.2115, "step": 19258 }, { "epoch": 0.88, "grad_norm": 0.35156753718874845, "learning_rate": 6.887796480152531e-07, "loss": 0.3064, "step": 19259 }, { "epoch": 0.88, "grad_norm": 0.32155043145552376, "learning_rate": 6.882370953890927e-07, "loss": 0.1678, "step": 19260 }, { "epoch": 0.88, "grad_norm": 0.4134695919309794, "learning_rate": 6.876947489163877e-07, "loss": 0.2129, "step": 19261 }, { "epoch": 0.88, "grad_norm": 0.3802575637156816, "learning_rate": 6.871526086091473e-07, "loss": 0.3363, "step": 19262 }, { "epoch": 0.88, "grad_norm": 1.4299050991215738, "learning_rate": 6.86610674479371e-07, "loss": 0.5412, "step": 19263 }, { "epoch": 0.88, "grad_norm": 0.31327250090320174, "learning_rate": 6.860689465390591e-07, "loss": 0.1975, "step": 19264 }, { "epoch": 0.89, "grad_norm": 0.34505078454851085, "learning_rate": 6.855274248002042e-07, "loss": 0.2069, "step": 19265 }, { "epoch": 0.89, "grad_norm": 0.3855991376548766, "learning_rate": 6.849861092747934e-07, "loss": 0.292, "step": 19266 }, { "epoch": 0.89, "grad_norm": 0.2968159837478404, "learning_rate": 6.844449999748137e-07, "loss": 0.1831, "step": 19267 }, { "epoch": 0.89, "grad_norm": 1.1974805473349812, "learning_rate": 6.839040969122401e-07, "loss": 0.7142, "step": 19268 }, { "epoch": 0.89, "grad_norm": 0.7956044411792038, "learning_rate": 6.833634000990541e-07, "loss": 0.3191, "step": 19269 }, { "epoch": 0.89, "grad_norm": 0.29852511430722, "learning_rate": 6.828229095472217e-07, "loss": 0.2676, "step": 19270 }, { "epoch": 0.89, "grad_norm": 0.5373356124464185, "learning_rate": 6.822826252687109e-07, "loss": 0.2598, "step": 19271 }, { "epoch": 0.89, "grad_norm": 0.23417208973854192, "learning_rate": 6.817425472754813e-07, "loss": 0.1447, "step": 19272 }, { "epoch": 0.89, "grad_norm": 0.36149216458784794, "learning_rate": 6.812026755794899e-07, "loss": 0.2572, "step": 19273 }, { "epoch": 0.89, "grad_norm": 0.3560949309202671, "learning_rate": 6.806630101926926e-07, "loss": 0.258, "step": 19274 }, { "epoch": 0.89, "grad_norm": 0.7172271712062704, "learning_rate": 6.80123551127031e-07, "loss": 0.3788, "step": 19275 }, { "epoch": 0.89, "grad_norm": 0.35134700708120375, "learning_rate": 6.795842983944545e-07, "loss": 0.2484, "step": 19276 }, { "epoch": 0.89, "grad_norm": 0.5418956978905485, "learning_rate": 6.790452520068957e-07, "loss": 0.1753, "step": 19277 }, { "epoch": 0.89, "grad_norm": 0.2971629890691787, "learning_rate": 6.785064119762919e-07, "loss": 0.2224, "step": 19278 }, { "epoch": 0.89, "grad_norm": 0.3913571097387703, "learning_rate": 6.779677783145732e-07, "loss": 0.271, "step": 19279 }, { "epoch": 0.89, "grad_norm": 1.8469547938644046, "learning_rate": 6.774293510336615e-07, "loss": 0.3485, "step": 19280 }, { "epoch": 0.89, "grad_norm": 0.7234212342784379, "learning_rate": 6.768911301454794e-07, "loss": 0.3479, "step": 19281 }, { "epoch": 0.89, "grad_norm": 0.2872656267700285, "learning_rate": 6.763531156619418e-07, "loss": 0.2371, "step": 19282 }, { "epoch": 0.89, "grad_norm": 0.5619362059418193, "learning_rate": 6.758153075949613e-07, "loss": 0.3137, "step": 19283 }, { "epoch": 0.89, "grad_norm": 0.24475089825449406, "learning_rate": 6.752777059564431e-07, "loss": 0.082, "step": 19284 }, { "epoch": 0.89, "grad_norm": 0.33058254850905905, "learning_rate": 6.747403107582884e-07, "loss": 0.2497, "step": 19285 }, { "epoch": 0.89, "grad_norm": 0.34372700418444846, "learning_rate": 6.742031220123946e-07, "loss": 0.2977, "step": 19286 }, { "epoch": 0.89, "grad_norm": 0.6762834132783561, "learning_rate": 6.736661397306554e-07, "loss": 0.3026, "step": 19287 }, { "epoch": 0.89, "grad_norm": 0.35300511203847806, "learning_rate": 6.731293639249604e-07, "loss": 0.2597, "step": 19288 }, { "epoch": 0.89, "grad_norm": 1.294282014224038, "learning_rate": 6.725927946071908e-07, "loss": 0.4886, "step": 19289 }, { "epoch": 0.89, "grad_norm": 0.2071939987812276, "learning_rate": 6.720564317892275e-07, "loss": 0.1682, "step": 19290 }, { "epoch": 0.89, "grad_norm": 0.38599507229309754, "learning_rate": 6.715202754829453e-07, "loss": 0.28, "step": 19291 }, { "epoch": 0.89, "grad_norm": 0.9158237135644358, "learning_rate": 6.709843257002113e-07, "loss": 0.4875, "step": 19292 }, { "epoch": 0.89, "grad_norm": 0.41436712473059667, "learning_rate": 6.70448582452895e-07, "loss": 0.2551, "step": 19293 }, { "epoch": 0.89, "grad_norm": 0.3720876754585612, "learning_rate": 6.699130457528535e-07, "loss": 0.2765, "step": 19294 }, { "epoch": 0.89, "grad_norm": 0.5220026134805323, "learning_rate": 6.693777156119441e-07, "loss": 0.3011, "step": 19295 }, { "epoch": 0.89, "grad_norm": 0.2976415256316706, "learning_rate": 6.688425920420216e-07, "loss": 0.1703, "step": 19296 }, { "epoch": 0.89, "grad_norm": 0.31437248131197465, "learning_rate": 6.683076750549288e-07, "loss": 0.1831, "step": 19297 }, { "epoch": 0.89, "grad_norm": 0.34576507846748544, "learning_rate": 6.677729646625097e-07, "loss": 0.2907, "step": 19298 }, { "epoch": 0.89, "grad_norm": 0.6383618609597203, "learning_rate": 6.672384608766025e-07, "loss": 0.3769, "step": 19299 }, { "epoch": 0.89, "grad_norm": 0.32125736650602976, "learning_rate": 6.667041637090432e-07, "loss": 0.1962, "step": 19300 }, { "epoch": 0.89, "grad_norm": 1.4590306584945751, "learning_rate": 6.661700731716558e-07, "loss": 0.4416, "step": 19301 }, { "epoch": 0.89, "grad_norm": 0.39300259439950214, "learning_rate": 6.656361892762686e-07, "loss": 0.3239, "step": 19302 }, { "epoch": 0.89, "grad_norm": 0.2403431488918557, "learning_rate": 6.651025120346988e-07, "loss": 0.1607, "step": 19303 }, { "epoch": 0.89, "grad_norm": 0.7266648657689263, "learning_rate": 6.645690414587613e-07, "loss": 0.3765, "step": 19304 }, { "epoch": 0.89, "grad_norm": 0.5240077198371482, "learning_rate": 6.640357775602701e-07, "loss": 0.3791, "step": 19305 }, { "epoch": 0.89, "grad_norm": 0.22311643999491565, "learning_rate": 6.635027203510258e-07, "loss": 0.1758, "step": 19306 }, { "epoch": 0.89, "grad_norm": 1.7238022972978235, "learning_rate": 6.629698698428333e-07, "loss": 0.6118, "step": 19307 }, { "epoch": 0.89, "grad_norm": 0.4614443227540382, "learning_rate": 6.62437226047491e-07, "loss": 0.2642, "step": 19308 }, { "epoch": 0.89, "grad_norm": 0.3362431270685603, "learning_rate": 6.619047889767871e-07, "loss": 0.2429, "step": 19309 }, { "epoch": 0.89, "grad_norm": 0.35626178826564253, "learning_rate": 6.613725586425112e-07, "loss": 0.2497, "step": 19310 }, { "epoch": 0.89, "grad_norm": 0.44925093120466697, "learning_rate": 6.608405350564451e-07, "loss": 0.2624, "step": 19311 }, { "epoch": 0.89, "grad_norm": 0.42915532983785387, "learning_rate": 6.603087182303702e-07, "loss": 0.2924, "step": 19312 }, { "epoch": 0.89, "grad_norm": 0.7596291624774706, "learning_rate": 6.597771081760584e-07, "loss": 0.2097, "step": 19313 }, { "epoch": 0.89, "grad_norm": 0.4377995005949378, "learning_rate": 6.592457049052781e-07, "loss": 0.3171, "step": 19314 }, { "epoch": 0.89, "grad_norm": 0.403123035577979, "learning_rate": 6.587145084297963e-07, "loss": 0.2293, "step": 19315 }, { "epoch": 0.89, "grad_norm": 0.47948769869408847, "learning_rate": 6.581835187613695e-07, "loss": 0.2674, "step": 19316 }, { "epoch": 0.89, "grad_norm": 0.40265252583005723, "learning_rate": 6.57652735911759e-07, "loss": 0.2446, "step": 19317 }, { "epoch": 0.89, "grad_norm": 0.27738093271911946, "learning_rate": 6.571221598927102e-07, "loss": 0.2526, "step": 19318 }, { "epoch": 0.89, "grad_norm": 0.5152921785521215, "learning_rate": 6.565917907159747e-07, "loss": 0.1681, "step": 19319 }, { "epoch": 0.89, "grad_norm": 0.953059922589591, "learning_rate": 6.560616283932897e-07, "loss": 0.4495, "step": 19320 }, { "epoch": 0.89, "grad_norm": 0.4248977661221667, "learning_rate": 6.555316729363937e-07, "loss": 0.2725, "step": 19321 }, { "epoch": 0.89, "grad_norm": 0.31371014057055197, "learning_rate": 6.550019243570227e-07, "loss": 0.2939, "step": 19322 }, { "epoch": 0.89, "grad_norm": 0.2703229457972463, "learning_rate": 6.544723826668998e-07, "loss": 0.168, "step": 19323 }, { "epoch": 0.89, "grad_norm": 0.2570232491692117, "learning_rate": 6.53943047877752e-07, "loss": 0.2044, "step": 19324 }, { "epoch": 0.89, "grad_norm": 1.6515158642725487, "learning_rate": 6.534139200012979e-07, "loss": 0.5292, "step": 19325 }, { "epoch": 0.89, "grad_norm": 0.37650659054123, "learning_rate": 6.528849990492503e-07, "loss": 0.2337, "step": 19326 }, { "epoch": 0.89, "grad_norm": 0.3336105096393883, "learning_rate": 6.523562850333221e-07, "loss": 0.2403, "step": 19327 }, { "epoch": 0.89, "grad_norm": 0.9125595063515183, "learning_rate": 6.518277779652115e-07, "loss": 0.5061, "step": 19328 }, { "epoch": 0.89, "grad_norm": 0.3315793184602384, "learning_rate": 6.512994778566284e-07, "loss": 0.252, "step": 19329 }, { "epoch": 0.89, "grad_norm": 0.3375279406468978, "learning_rate": 6.507713847192643e-07, "loss": 0.1987, "step": 19330 }, { "epoch": 0.89, "grad_norm": 0.3146508357843586, "learning_rate": 6.502434985648098e-07, "loss": 0.24, "step": 19331 }, { "epoch": 0.89, "grad_norm": 0.6461567353632968, "learning_rate": 6.497158194049535e-07, "loss": 0.278, "step": 19332 }, { "epoch": 0.89, "grad_norm": 0.40320234901610724, "learning_rate": 6.491883472513738e-07, "loss": 0.2393, "step": 19333 }, { "epoch": 0.89, "grad_norm": 0.3362351329666341, "learning_rate": 6.486610821157557e-07, "loss": 0.2852, "step": 19334 }, { "epoch": 0.89, "grad_norm": 1.1789486724509293, "learning_rate": 6.481340240097655e-07, "loss": 0.6892, "step": 19335 }, { "epoch": 0.89, "grad_norm": 0.2991461621643354, "learning_rate": 6.476071729450772e-07, "loss": 0.1375, "step": 19336 }, { "epoch": 0.89, "grad_norm": 0.31554517965054824, "learning_rate": 6.470805289333504e-07, "loss": 0.2455, "step": 19337 }, { "epoch": 0.89, "grad_norm": 0.4614306843287431, "learning_rate": 6.465540919862457e-07, "loss": 0.318, "step": 19338 }, { "epoch": 0.89, "grad_norm": 0.4016540047400901, "learning_rate": 6.460278621154203e-07, "loss": 0.2016, "step": 19339 }, { "epoch": 0.89, "grad_norm": 0.6110033753888543, "learning_rate": 6.455018393325218e-07, "loss": 0.3029, "step": 19340 }, { "epoch": 0.89, "grad_norm": 0.5265732284067997, "learning_rate": 6.449760236491953e-07, "loss": 0.3752, "step": 19341 }, { "epoch": 0.89, "grad_norm": 0.23708505985340458, "learning_rate": 6.444504150770859e-07, "loss": 0.1559, "step": 19342 }, { "epoch": 0.89, "grad_norm": 0.3719736188749245, "learning_rate": 6.439250136278253e-07, "loss": 0.2355, "step": 19343 }, { "epoch": 0.89, "grad_norm": 0.8177555115930268, "learning_rate": 6.433998193130486e-07, "loss": 0.3757, "step": 19344 }, { "epoch": 0.89, "grad_norm": 0.3545133310917489, "learning_rate": 6.4287483214438e-07, "loss": 0.1831, "step": 19345 }, { "epoch": 0.89, "grad_norm": 0.3559362173148364, "learning_rate": 6.423500521334447e-07, "loss": 0.2911, "step": 19346 }, { "epoch": 0.89, "grad_norm": 1.483148668584176, "learning_rate": 6.418254792918598e-07, "loss": 0.5996, "step": 19347 }, { "epoch": 0.89, "grad_norm": 0.43766951455509023, "learning_rate": 6.413011136312419e-07, "loss": 0.1943, "step": 19348 }, { "epoch": 0.89, "grad_norm": 0.25030754781779846, "learning_rate": 6.40776955163196e-07, "loss": 0.1977, "step": 19349 }, { "epoch": 0.89, "grad_norm": 0.4393473843311356, "learning_rate": 6.402530038993249e-07, "loss": 0.3295, "step": 19350 }, { "epoch": 0.89, "grad_norm": 0.9106636249087902, "learning_rate": 6.39729259851235e-07, "loss": 0.4535, "step": 19351 }, { "epoch": 0.89, "grad_norm": 0.3662780068001187, "learning_rate": 6.39205723030516e-07, "loss": 0.2119, "step": 19352 }, { "epoch": 0.89, "grad_norm": 0.35018605809127135, "learning_rate": 6.386823934487619e-07, "loss": 0.2952, "step": 19353 }, { "epoch": 0.89, "grad_norm": 0.59779877839737, "learning_rate": 6.381592711175555e-07, "loss": 0.2667, "step": 19354 }, { "epoch": 0.89, "grad_norm": 0.2917772872243583, "learning_rate": 6.376363560484789e-07, "loss": 0.2056, "step": 19355 }, { "epoch": 0.89, "grad_norm": 0.5095208133445415, "learning_rate": 6.371136482531126e-07, "loss": 0.247, "step": 19356 }, { "epoch": 0.89, "grad_norm": 0.4573497922283493, "learning_rate": 6.365911477430242e-07, "loss": 0.2668, "step": 19357 }, { "epoch": 0.89, "grad_norm": 0.327349984635034, "learning_rate": 6.360688545297822e-07, "loss": 0.2675, "step": 19358 }, { "epoch": 0.89, "grad_norm": 1.7068460200662563, "learning_rate": 6.355467686249528e-07, "loss": 0.3789, "step": 19359 }, { "epoch": 0.89, "grad_norm": 0.5624429786579328, "learning_rate": 6.350248900400913e-07, "loss": 0.3135, "step": 19360 }, { "epoch": 0.89, "grad_norm": 0.40307942142253794, "learning_rate": 6.345032187867539e-07, "loss": 0.2946, "step": 19361 }, { "epoch": 0.89, "grad_norm": 0.2378402495747571, "learning_rate": 6.33981754876487e-07, "loss": 0.1818, "step": 19362 }, { "epoch": 0.89, "grad_norm": 0.6318358701936131, "learning_rate": 6.33460498320837e-07, "loss": 0.3239, "step": 19363 }, { "epoch": 0.89, "grad_norm": 0.450940398282647, "learning_rate": 6.329394491313445e-07, "loss": 0.3545, "step": 19364 }, { "epoch": 0.89, "grad_norm": 0.33735041889798073, "learning_rate": 6.32418607319546e-07, "loss": 0.2615, "step": 19365 }, { "epoch": 0.89, "grad_norm": 0.7524162396387354, "learning_rate": 6.318979728969687e-07, "loss": 0.3209, "step": 19366 }, { "epoch": 0.89, "grad_norm": 0.37244493696922476, "learning_rate": 6.313775458751415e-07, "loss": 0.2699, "step": 19367 }, { "epoch": 0.89, "grad_norm": 0.43691738446713946, "learning_rate": 6.30857326265587e-07, "loss": 0.1348, "step": 19368 }, { "epoch": 0.89, "grad_norm": 0.34571823744855684, "learning_rate": 6.303373140798197e-07, "loss": 0.2688, "step": 19369 }, { "epoch": 0.89, "grad_norm": 0.37849016289173865, "learning_rate": 6.298175093293557e-07, "loss": 0.3017, "step": 19370 }, { "epoch": 0.89, "grad_norm": 1.1541719297063717, "learning_rate": 6.292979120256992e-07, "loss": 0.4704, "step": 19371 }, { "epoch": 0.89, "grad_norm": 0.6122205296979082, "learning_rate": 6.287785221803555e-07, "loss": 0.2616, "step": 19372 }, { "epoch": 0.89, "grad_norm": 0.31170435388489137, "learning_rate": 6.282593398048254e-07, "loss": 0.2557, "step": 19373 }, { "epoch": 0.89, "grad_norm": 0.5372987932338423, "learning_rate": 6.277403649105985e-07, "loss": 0.3492, "step": 19374 }, { "epoch": 0.89, "grad_norm": 0.2861833641465051, "learning_rate": 6.272215975091678e-07, "loss": 0.0901, "step": 19375 }, { "epoch": 0.89, "grad_norm": 0.407529043158273, "learning_rate": 6.267030376120154e-07, "loss": 0.341, "step": 19376 }, { "epoch": 0.89, "grad_norm": 0.35819377738234265, "learning_rate": 6.261846852306264e-07, "loss": 0.2999, "step": 19377 }, { "epoch": 0.89, "grad_norm": 0.49980554869766025, "learning_rate": 6.256665403764739e-07, "loss": 0.2489, "step": 19378 }, { "epoch": 0.89, "grad_norm": 0.43380630071232396, "learning_rate": 6.251486030610266e-07, "loss": 0.2665, "step": 19379 }, { "epoch": 0.89, "grad_norm": 0.49537820329052495, "learning_rate": 6.246308732957551e-07, "loss": 0.1923, "step": 19380 }, { "epoch": 0.89, "grad_norm": 0.2496297213480803, "learning_rate": 6.241133510921193e-07, "loss": 0.1631, "step": 19381 }, { "epoch": 0.89, "grad_norm": 0.44377976046645234, "learning_rate": 6.235960364615779e-07, "loss": 0.3154, "step": 19382 }, { "epoch": 0.89, "grad_norm": 0.789491206584543, "learning_rate": 6.230789294155826e-07, "loss": 0.4228, "step": 19383 }, { "epoch": 0.89, "grad_norm": 0.780913989629484, "learning_rate": 6.225620299655821e-07, "loss": 0.3835, "step": 19384 }, { "epoch": 0.89, "grad_norm": 0.26451024679968543, "learning_rate": 6.220453381230219e-07, "loss": 0.1983, "step": 19385 }, { "epoch": 0.89, "grad_norm": 0.3824376216333412, "learning_rate": 6.21528853899338e-07, "loss": 0.2814, "step": 19386 }, { "epoch": 0.89, "grad_norm": 0.47654661551089184, "learning_rate": 6.210125773059672e-07, "loss": 0.262, "step": 19387 }, { "epoch": 0.89, "grad_norm": 0.31173545206745584, "learning_rate": 6.204965083543368e-07, "loss": 0.2012, "step": 19388 }, { "epoch": 0.89, "grad_norm": 0.3455479855856143, "learning_rate": 6.199806470558744e-07, "loss": 0.2842, "step": 19389 }, { "epoch": 0.89, "grad_norm": 0.7721047252262552, "learning_rate": 6.194649934220009e-07, "loss": 0.4057, "step": 19390 }, { "epoch": 0.89, "grad_norm": 0.3591928303074338, "learning_rate": 6.189495474641293e-07, "loss": 0.1845, "step": 19391 }, { "epoch": 0.89, "grad_norm": 1.3833431954435864, "learning_rate": 6.184343091936751e-07, "loss": 0.4509, "step": 19392 }, { "epoch": 0.89, "grad_norm": 0.25073552101392166, "learning_rate": 6.1791927862204e-07, "loss": 0.2202, "step": 19393 }, { "epoch": 0.89, "grad_norm": 0.3277288603599387, "learning_rate": 6.174044557606329e-07, "loss": 0.2025, "step": 19394 }, { "epoch": 0.89, "grad_norm": 0.717750845206327, "learning_rate": 6.168898406208479e-07, "loss": 0.3645, "step": 19395 }, { "epoch": 0.89, "grad_norm": 0.8804302489269706, "learning_rate": 6.16375433214077e-07, "loss": 0.5006, "step": 19396 }, { "epoch": 0.89, "grad_norm": 0.30523221088621844, "learning_rate": 6.1586123355171e-07, "loss": 0.2532, "step": 19397 }, { "epoch": 0.89, "grad_norm": 0.6972045812168728, "learning_rate": 6.153472416451301e-07, "loss": 0.2386, "step": 19398 }, { "epoch": 0.89, "grad_norm": 0.4821174894118999, "learning_rate": 6.148334575057191e-07, "loss": 0.2678, "step": 19399 }, { "epoch": 0.89, "grad_norm": 0.35569729390183025, "learning_rate": 6.14319881144848e-07, "loss": 0.2592, "step": 19400 }, { "epoch": 0.89, "grad_norm": 0.34377455265383444, "learning_rate": 6.138065125738901e-07, "loss": 0.2494, "step": 19401 }, { "epoch": 0.89, "grad_norm": 0.4446249125903841, "learning_rate": 6.132933518042094e-07, "loss": 0.2829, "step": 19402 }, { "epoch": 0.89, "grad_norm": 0.3735080765879937, "learning_rate": 6.127803988471659e-07, "loss": 0.2721, "step": 19403 }, { "epoch": 0.89, "grad_norm": 0.7032681274098433, "learning_rate": 6.122676537141182e-07, "loss": 0.248, "step": 19404 }, { "epoch": 0.89, "grad_norm": 0.36336825146632873, "learning_rate": 6.117551164164159e-07, "loss": 0.2751, "step": 19405 }, { "epoch": 0.89, "grad_norm": 0.3379826316242414, "learning_rate": 6.112427869654059e-07, "loss": 0.2467, "step": 19406 }, { "epoch": 0.89, "grad_norm": 0.7612297529412821, "learning_rate": 6.107306653724332e-07, "loss": 0.3003, "step": 19407 }, { "epoch": 0.89, "grad_norm": 0.43851991376851945, "learning_rate": 6.102187516488323e-07, "loss": 0.2488, "step": 19408 }, { "epoch": 0.89, "grad_norm": 0.2861592469040228, "learning_rate": 6.097070458059406e-07, "loss": 0.2417, "step": 19409 }, { "epoch": 0.89, "grad_norm": 1.4134174752803659, "learning_rate": 6.091955478550815e-07, "loss": 0.4973, "step": 19410 }, { "epoch": 0.89, "grad_norm": 0.6930038615247549, "learning_rate": 6.086842578075835e-07, "loss": 0.2633, "step": 19411 }, { "epoch": 0.89, "grad_norm": 0.35406910311529566, "learning_rate": 6.081731756747644e-07, "loss": 0.2691, "step": 19412 }, { "epoch": 0.89, "grad_norm": 0.36335280781815466, "learning_rate": 6.076623014679406e-07, "loss": 0.2958, "step": 19413 }, { "epoch": 0.89, "grad_norm": 0.3194835933180014, "learning_rate": 6.071516351984197e-07, "loss": 0.1162, "step": 19414 }, { "epoch": 0.89, "grad_norm": 0.4323786296577063, "learning_rate": 6.066411768775083e-07, "loss": 0.2579, "step": 19415 }, { "epoch": 0.89, "grad_norm": 0.6848942729070734, "learning_rate": 6.061309265165094e-07, "loss": 0.3316, "step": 19416 }, { "epoch": 0.89, "grad_norm": 0.35496582409054395, "learning_rate": 6.056208841267153e-07, "loss": 0.2356, "step": 19417 }, { "epoch": 0.89, "grad_norm": 0.3822526091803811, "learning_rate": 6.051110497194213e-07, "loss": 0.2684, "step": 19418 }, { "epoch": 0.89, "grad_norm": 0.7161795921314614, "learning_rate": 6.046014233059161e-07, "loss": 0.4655, "step": 19419 }, { "epoch": 0.89, "grad_norm": 0.21624760364641082, "learning_rate": 6.040920048974774e-07, "loss": 0.1651, "step": 19420 }, { "epoch": 0.89, "grad_norm": 0.352422385438307, "learning_rate": 6.035827945053874e-07, "loss": 0.2496, "step": 19421 }, { "epoch": 0.89, "grad_norm": 1.4680557023536365, "learning_rate": 6.030737921409169e-07, "loss": 0.4356, "step": 19422 }, { "epoch": 0.89, "grad_norm": 0.8506876538445198, "learning_rate": 6.025649978153358e-07, "loss": 0.3345, "step": 19423 }, { "epoch": 0.89, "grad_norm": 0.36522173005242015, "learning_rate": 6.020564115399085e-07, "loss": 0.2078, "step": 19424 }, { "epoch": 0.89, "grad_norm": 0.38610344761970394, "learning_rate": 6.015480333258949e-07, "loss": 0.3142, "step": 19425 }, { "epoch": 0.89, "grad_norm": 0.4557996304800295, "learning_rate": 6.010398631845493e-07, "loss": 0.2487, "step": 19426 }, { "epoch": 0.89, "grad_norm": 0.2901577459956041, "learning_rate": 6.005319011271205e-07, "loss": 0.161, "step": 19427 }, { "epoch": 0.89, "grad_norm": 1.039855760616618, "learning_rate": 6.000241471648582e-07, "loss": 0.4405, "step": 19428 }, { "epoch": 0.89, "grad_norm": 0.3615874166830275, "learning_rate": 5.995166013090004e-07, "loss": 0.3021, "step": 19429 }, { "epoch": 0.89, "grad_norm": 0.35270930655655214, "learning_rate": 5.990092635707856e-07, "loss": 0.1814, "step": 19430 }, { "epoch": 0.89, "grad_norm": 1.3321280171541616, "learning_rate": 5.985021339614449e-07, "loss": 0.7909, "step": 19431 }, { "epoch": 0.89, "grad_norm": 0.411896462502767, "learning_rate": 5.979952124922039e-07, "loss": 0.3373, "step": 19432 }, { "epoch": 0.89, "grad_norm": 0.25527619123574175, "learning_rate": 5.974884991742902e-07, "loss": 0.1162, "step": 19433 }, { "epoch": 0.89, "grad_norm": 0.38655337960344727, "learning_rate": 5.969819940189159e-07, "loss": 0.2677, "step": 19434 }, { "epoch": 0.89, "grad_norm": 0.7090671761199423, "learning_rate": 5.964756970372998e-07, "loss": 0.3575, "step": 19435 }, { "epoch": 0.89, "grad_norm": 0.5974598634155753, "learning_rate": 5.959696082406474e-07, "loss": 0.3002, "step": 19436 }, { "epoch": 0.89, "grad_norm": 0.2753184209205759, "learning_rate": 5.954637276401643e-07, "loss": 0.2489, "step": 19437 }, { "epoch": 0.89, "grad_norm": 0.32556935244222474, "learning_rate": 5.949580552470502e-07, "loss": 0.1862, "step": 19438 }, { "epoch": 0.89, "grad_norm": 0.40221940467920436, "learning_rate": 5.944525910724996e-07, "loss": 0.2335, "step": 19439 }, { "epoch": 0.89, "grad_norm": 0.38726074289808626, "learning_rate": 5.939473351277037e-07, "loss": 0.251, "step": 19440 }, { "epoch": 0.89, "grad_norm": 0.5446160385748545, "learning_rate": 5.934422874238466e-07, "loss": 0.3468, "step": 19441 }, { "epoch": 0.89, "grad_norm": 0.4948161619657878, "learning_rate": 5.92937447972114e-07, "loss": 0.259, "step": 19442 }, { "epoch": 0.89, "grad_norm": 0.5810466310252292, "learning_rate": 5.924328167836791e-07, "loss": 0.2771, "step": 19443 }, { "epoch": 0.89, "grad_norm": 0.3472336298062181, "learning_rate": 5.919283938697118e-07, "loss": 0.2766, "step": 19444 }, { "epoch": 0.89, "grad_norm": 0.288846263349043, "learning_rate": 5.914241792413855e-07, "loss": 0.1876, "step": 19445 }, { "epoch": 0.89, "grad_norm": 0.4784640650315341, "learning_rate": 5.90920172909858e-07, "loss": 0.2564, "step": 19446 }, { "epoch": 0.89, "grad_norm": 0.4818491873725768, "learning_rate": 5.904163748862902e-07, "loss": 0.2674, "step": 19447 }, { "epoch": 0.89, "grad_norm": 0.3330787446198168, "learning_rate": 5.899127851818342e-07, "loss": 0.244, "step": 19448 }, { "epoch": 0.89, "grad_norm": 0.4137655503127341, "learning_rate": 5.894094038076392e-07, "loss": 0.3159, "step": 19449 }, { "epoch": 0.89, "grad_norm": 0.27598339298049307, "learning_rate": 5.889062307748517e-07, "loss": 0.119, "step": 19450 }, { "epoch": 0.89, "grad_norm": 0.4113829426351915, "learning_rate": 5.884032660946071e-07, "loss": 0.2736, "step": 19451 }, { "epoch": 0.89, "grad_norm": 0.5328368846844337, "learning_rate": 5.879005097780455e-07, "loss": 0.3099, "step": 19452 }, { "epoch": 0.89, "grad_norm": 0.3331385255521013, "learning_rate": 5.873979618362935e-07, "loss": 0.1932, "step": 19453 }, { "epoch": 0.89, "grad_norm": 0.6094260404893885, "learning_rate": 5.868956222804789e-07, "loss": 0.3552, "step": 19454 }, { "epoch": 0.89, "grad_norm": 0.4504885068921421, "learning_rate": 5.863934911217239e-07, "loss": 0.353, "step": 19455 }, { "epoch": 0.89, "grad_norm": 0.3445020439013414, "learning_rate": 5.85891568371143e-07, "loss": 0.2327, "step": 19456 }, { "epoch": 0.89, "grad_norm": 0.6251606229293999, "learning_rate": 5.853898540398495e-07, "loss": 0.2943, "step": 19457 }, { "epoch": 0.89, "grad_norm": 0.3992083676782403, "learning_rate": 5.8488834813895e-07, "loss": 0.2911, "step": 19458 }, { "epoch": 0.89, "grad_norm": 0.2594681445495387, "learning_rate": 5.843870506795502e-07, "loss": 0.081, "step": 19459 }, { "epoch": 0.89, "grad_norm": 0.32547519914632644, "learning_rate": 5.838859616727455e-07, "loss": 0.2581, "step": 19460 }, { "epoch": 0.89, "grad_norm": 0.3793930579102918, "learning_rate": 5.833850811296282e-07, "loss": 0.3048, "step": 19461 }, { "epoch": 0.89, "grad_norm": 1.1247583537727894, "learning_rate": 5.828844090612918e-07, "loss": 0.4597, "step": 19462 }, { "epoch": 0.89, "grad_norm": 0.44324822110961787, "learning_rate": 5.823839454788161e-07, "loss": 0.2178, "step": 19463 }, { "epoch": 0.89, "grad_norm": 0.47470864976821686, "learning_rate": 5.818836903932857e-07, "loss": 0.2956, "step": 19464 }, { "epoch": 0.89, "grad_norm": 0.2501264945155644, "learning_rate": 5.813836438157716e-07, "loss": 0.183, "step": 19465 }, { "epoch": 0.89, "grad_norm": 0.3505468328392777, "learning_rate": 5.808838057573451e-07, "loss": 0.1881, "step": 19466 }, { "epoch": 0.89, "grad_norm": 0.6005927463443071, "learning_rate": 5.803841762290741e-07, "loss": 0.369, "step": 19467 }, { "epoch": 0.89, "grad_norm": 0.38753030703860153, "learning_rate": 5.798847552420184e-07, "loss": 0.2825, "step": 19468 }, { "epoch": 0.89, "grad_norm": 0.5075264765892931, "learning_rate": 5.793855428072348e-07, "loss": 0.2249, "step": 19469 }, { "epoch": 0.89, "grad_norm": 0.4262243846034928, "learning_rate": 5.788865389357745e-07, "loss": 0.2834, "step": 19470 }, { "epoch": 0.89, "grad_norm": 0.2703331153704299, "learning_rate": 5.783877436386876e-07, "loss": 0.1782, "step": 19471 }, { "epoch": 0.89, "grad_norm": 0.3571432286423395, "learning_rate": 5.778891569270162e-07, "loss": 0.2699, "step": 19472 }, { "epoch": 0.89, "grad_norm": 0.35315362334060674, "learning_rate": 5.77390778811796e-07, "loss": 0.2269, "step": 19473 }, { "epoch": 0.89, "grad_norm": 0.6018882501296571, "learning_rate": 5.768926093040617e-07, "loss": 0.3412, "step": 19474 }, { "epoch": 0.89, "grad_norm": 1.0671678500241466, "learning_rate": 5.763946484148442e-07, "loss": 0.4041, "step": 19475 }, { "epoch": 0.89, "grad_norm": 0.27204079004696546, "learning_rate": 5.758968961551669e-07, "loss": 0.2246, "step": 19476 }, { "epoch": 0.89, "grad_norm": 0.5493241201960452, "learning_rate": 5.75399352536048e-07, "loss": 0.1805, "step": 19477 }, { "epoch": 0.89, "grad_norm": 0.4054301864489693, "learning_rate": 5.749020175685038e-07, "loss": 0.2738, "step": 19478 }, { "epoch": 0.89, "grad_norm": 0.372262524237946, "learning_rate": 5.744048912635469e-07, "loss": 0.2336, "step": 19479 }, { "epoch": 0.89, "grad_norm": 0.37738706359355795, "learning_rate": 5.739079736321796e-07, "loss": 0.2907, "step": 19480 }, { "epoch": 0.89, "grad_norm": 0.6798143267531725, "learning_rate": 5.734112646854062e-07, "loss": 0.3343, "step": 19481 }, { "epoch": 0.89, "grad_norm": 0.42945108752446853, "learning_rate": 5.729147644342204e-07, "loss": 0.2226, "step": 19482 }, { "epoch": 0.9, "grad_norm": 0.25813971401887836, "learning_rate": 5.724184728896165e-07, "loss": 0.1431, "step": 19483 }, { "epoch": 0.9, "grad_norm": 0.3254489910391765, "learning_rate": 5.719223900625814e-07, "loss": 0.27, "step": 19484 }, { "epoch": 0.9, "grad_norm": 0.42677267634340726, "learning_rate": 5.714265159640974e-07, "loss": 0.2954, "step": 19485 }, { "epoch": 0.9, "grad_norm": 0.6273777039549431, "learning_rate": 5.709308506051436e-07, "loss": 0.2848, "step": 19486 }, { "epoch": 0.9, "grad_norm": 0.825461172667066, "learning_rate": 5.7043539399669e-07, "loss": 0.2632, "step": 19487 }, { "epoch": 0.9, "grad_norm": 0.34733526711764046, "learning_rate": 5.699401461497111e-07, "loss": 0.2847, "step": 19488 }, { "epoch": 0.9, "grad_norm": 0.33541865072786525, "learning_rate": 5.694451070751695e-07, "loss": 0.1647, "step": 19489 }, { "epoch": 0.9, "grad_norm": 0.9548438339795196, "learning_rate": 5.689502767840215e-07, "loss": 0.4663, "step": 19490 }, { "epoch": 0.9, "grad_norm": 0.3278747496853244, "learning_rate": 5.684556552872256e-07, "loss": 0.2401, "step": 19491 }, { "epoch": 0.9, "grad_norm": 0.27429348212126975, "learning_rate": 5.679612425957304e-07, "loss": 0.2131, "step": 19492 }, { "epoch": 0.9, "grad_norm": 1.0985252066559386, "learning_rate": 5.67467038720485e-07, "loss": 0.3806, "step": 19493 }, { "epoch": 0.9, "grad_norm": 0.391041876063372, "learning_rate": 5.669730436724263e-07, "loss": 0.273, "step": 19494 }, { "epoch": 0.9, "grad_norm": 1.0925805941316105, "learning_rate": 5.664792574624934e-07, "loss": 0.2394, "step": 19495 }, { "epoch": 0.9, "grad_norm": 0.45368272529833187, "learning_rate": 5.659856801016173e-07, "loss": 0.2725, "step": 19496 }, { "epoch": 0.9, "grad_norm": 0.34931069083216426, "learning_rate": 5.65492311600725e-07, "loss": 0.2764, "step": 19497 }, { "epoch": 0.9, "grad_norm": 0.872270013248097, "learning_rate": 5.649991519707409e-07, "loss": 0.4919, "step": 19498 }, { "epoch": 0.9, "grad_norm": 0.29982733126917793, "learning_rate": 5.64506201222581e-07, "loss": 0.1394, "step": 19499 }, { "epoch": 0.9, "grad_norm": 0.3057956591155022, "learning_rate": 5.640134593671598e-07, "loss": 0.2516, "step": 19500 }, { "epoch": 0.9, "grad_norm": 1.3232166164868833, "learning_rate": 5.635209264153874e-07, "loss": 0.3737, "step": 19501 }, { "epoch": 0.9, "grad_norm": 0.576559561644349, "learning_rate": 5.63028602378165e-07, "loss": 0.2319, "step": 19502 }, { "epoch": 0.9, "grad_norm": 0.4141251241639885, "learning_rate": 5.625364872663963e-07, "loss": 0.2795, "step": 19503 }, { "epoch": 0.9, "grad_norm": 0.366525372927563, "learning_rate": 5.620445810909703e-07, "loss": 0.2981, "step": 19504 }, { "epoch": 0.9, "grad_norm": 0.1374273000053429, "learning_rate": 5.615528838627838e-07, "loss": 0.0689, "step": 19505 }, { "epoch": 0.9, "grad_norm": 0.4315390063896131, "learning_rate": 5.61061395592718e-07, "loss": 0.303, "step": 19506 }, { "epoch": 0.9, "grad_norm": 1.0234380104797598, "learning_rate": 5.605701162916566e-07, "loss": 0.3933, "step": 19507 }, { "epoch": 0.9, "grad_norm": 0.3383641370599889, "learning_rate": 5.600790459704742e-07, "loss": 0.2442, "step": 19508 }, { "epoch": 0.9, "grad_norm": 0.3491526831085786, "learning_rate": 5.59588184640043e-07, "loss": 0.2414, "step": 19509 }, { "epoch": 0.9, "grad_norm": 0.5977105187509787, "learning_rate": 5.590975323112324e-07, "loss": 0.2541, "step": 19510 }, { "epoch": 0.9, "grad_norm": 0.613543446574011, "learning_rate": 5.586070889949013e-07, "loss": 0.2714, "step": 19511 }, { "epoch": 0.9, "grad_norm": 0.2568584535482536, "learning_rate": 5.581168547019112e-07, "loss": 0.2164, "step": 19512 }, { "epoch": 0.9, "grad_norm": 1.1696856517485377, "learning_rate": 5.576268294431131e-07, "loss": 0.4274, "step": 19513 }, { "epoch": 0.9, "grad_norm": 0.7603651793144063, "learning_rate": 5.571370132293552e-07, "loss": 0.3984, "step": 19514 }, { "epoch": 0.9, "grad_norm": 0.3066639817005141, "learning_rate": 5.566474060714844e-07, "loss": 0.1988, "step": 19515 }, { "epoch": 0.9, "grad_norm": 0.37118270829059813, "learning_rate": 5.561580079803375e-07, "loss": 0.3167, "step": 19516 }, { "epoch": 0.9, "grad_norm": 1.0306085896225783, "learning_rate": 5.556688189667492e-07, "loss": 0.1625, "step": 19517 }, { "epoch": 0.9, "grad_norm": 0.3107723342864482, "learning_rate": 5.55179839041553e-07, "loss": 0.1779, "step": 19518 }, { "epoch": 0.9, "grad_norm": 0.7372747698490006, "learning_rate": 5.546910682155704e-07, "loss": 0.4002, "step": 19519 }, { "epoch": 0.9, "grad_norm": 0.3648138041339951, "learning_rate": 5.542025064996248e-07, "loss": 0.299, "step": 19520 }, { "epoch": 0.9, "grad_norm": 0.29524586493765337, "learning_rate": 5.537141539045298e-07, "loss": 0.1882, "step": 19521 }, { "epoch": 0.9, "grad_norm": 1.2376353837688123, "learning_rate": 5.532260104411014e-07, "loss": 0.53, "step": 19522 }, { "epoch": 0.9, "grad_norm": 0.2669633203671443, "learning_rate": 5.527380761201428e-07, "loss": 0.1889, "step": 19523 }, { "epoch": 0.9, "grad_norm": 0.3785260016416561, "learning_rate": 5.522503509524591e-07, "loss": 0.2678, "step": 19524 }, { "epoch": 0.9, "grad_norm": 0.5454181293191016, "learning_rate": 5.517628349488458e-07, "loss": 0.2577, "step": 19525 }, { "epoch": 0.9, "grad_norm": 0.8810513994462302, "learning_rate": 5.512755281200965e-07, "loss": 0.4511, "step": 19526 }, { "epoch": 0.9, "grad_norm": 0.335831981770893, "learning_rate": 5.507884304770028e-07, "loss": 0.2592, "step": 19527 }, { "epoch": 0.9, "grad_norm": 0.32256497286262864, "learning_rate": 5.503015420303437e-07, "loss": 0.2457, "step": 19528 }, { "epoch": 0.9, "grad_norm": 0.26240729063673424, "learning_rate": 5.498148627909017e-07, "loss": 0.1583, "step": 19529 }, { "epoch": 0.9, "grad_norm": 0.38437328275928395, "learning_rate": 5.493283927694492e-07, "loss": 0.2343, "step": 19530 }, { "epoch": 0.9, "grad_norm": 0.4607063767153047, "learning_rate": 5.488421319767578e-07, "loss": 0.2561, "step": 19531 }, { "epoch": 0.9, "grad_norm": 0.6374528843923447, "learning_rate": 5.483560804235943e-07, "loss": 0.3631, "step": 19532 }, { "epoch": 0.9, "grad_norm": 0.3380508135557405, "learning_rate": 5.478702381207146e-07, "loss": 0.249, "step": 19533 }, { "epoch": 0.9, "grad_norm": 1.472709549865522, "learning_rate": 5.473846050788789e-07, "loss": 0.2867, "step": 19534 }, { "epoch": 0.9, "grad_norm": 0.24015571464238913, "learning_rate": 5.468991813088375e-07, "loss": 0.2188, "step": 19535 }, { "epoch": 0.9, "grad_norm": 0.349468326815021, "learning_rate": 5.464139668213386e-07, "loss": 0.235, "step": 19536 }, { "epoch": 0.9, "grad_norm": 0.6619302382591248, "learning_rate": 5.459289616271224e-07, "loss": 0.3563, "step": 19537 }, { "epoch": 0.9, "grad_norm": 1.0513130135012814, "learning_rate": 5.454441657369247e-07, "loss": 0.3341, "step": 19538 }, { "epoch": 0.9, "grad_norm": 0.42263858936616483, "learning_rate": 5.449595791614836e-07, "loss": 0.2698, "step": 19539 }, { "epoch": 0.9, "grad_norm": 0.3101466776684008, "learning_rate": 5.444752019115229e-07, "loss": 0.2802, "step": 19540 }, { "epoch": 0.9, "grad_norm": 0.5406091135683908, "learning_rate": 5.439910339977694e-07, "loss": 0.1888, "step": 19541 }, { "epoch": 0.9, "grad_norm": 0.4549053927322506, "learning_rate": 5.435070754309402e-07, "loss": 0.2608, "step": 19542 }, { "epoch": 0.9, "grad_norm": 0.2945722692526129, "learning_rate": 5.430233262217488e-07, "loss": 0.2622, "step": 19543 }, { "epoch": 0.9, "grad_norm": 0.5130242383216363, "learning_rate": 5.425397863809079e-07, "loss": 0.2462, "step": 19544 }, { "epoch": 0.9, "grad_norm": 0.3248577152922799, "learning_rate": 5.420564559191188e-07, "loss": 0.2335, "step": 19545 }, { "epoch": 0.9, "grad_norm": 1.4070545486253954, "learning_rate": 5.415733348470864e-07, "loss": 0.4504, "step": 19546 }, { "epoch": 0.9, "grad_norm": 0.3902777301198903, "learning_rate": 5.410904231755032e-07, "loss": 0.2412, "step": 19547 }, { "epoch": 0.9, "grad_norm": 0.35341047684089694, "learning_rate": 5.406077209150606e-07, "loss": 0.2275, "step": 19548 }, { "epoch": 0.9, "grad_norm": 0.3329012954175152, "learning_rate": 5.401252280764469e-07, "loss": 0.2141, "step": 19549 }, { "epoch": 0.9, "grad_norm": 1.435140010245774, "learning_rate": 5.396429446703433e-07, "loss": 0.7832, "step": 19550 }, { "epoch": 0.9, "grad_norm": 0.32700795106193686, "learning_rate": 5.391608707074258e-07, "loss": 0.2047, "step": 19551 }, { "epoch": 0.9, "grad_norm": 0.36045084168404257, "learning_rate": 5.386790061983682e-07, "loss": 0.2898, "step": 19552 }, { "epoch": 0.9, "grad_norm": 1.0226706262825154, "learning_rate": 5.381973511538396e-07, "loss": 0.3851, "step": 19553 }, { "epoch": 0.9, "grad_norm": 0.3451572256093321, "learning_rate": 5.377159055845028e-07, "loss": 0.214, "step": 19554 }, { "epoch": 0.9, "grad_norm": 0.42548728789439844, "learning_rate": 5.372346695010145e-07, "loss": 0.2532, "step": 19555 }, { "epoch": 0.9, "grad_norm": 0.4041027884326333, "learning_rate": 5.367536429140308e-07, "loss": 0.2431, "step": 19556 }, { "epoch": 0.9, "grad_norm": 0.35770669044320574, "learning_rate": 5.362728258341998e-07, "loss": 0.184, "step": 19557 }, { "epoch": 0.9, "grad_norm": 0.6485639923932551, "learning_rate": 5.357922182721687e-07, "loss": 0.3874, "step": 19558 }, { "epoch": 0.9, "grad_norm": 0.37034597076583015, "learning_rate": 5.353118202385743e-07, "loss": 0.2948, "step": 19559 }, { "epoch": 0.9, "grad_norm": 0.369206817896097, "learning_rate": 5.348316317440549e-07, "loss": 0.2013, "step": 19560 }, { "epoch": 0.9, "grad_norm": 0.3743217322839835, "learning_rate": 5.34351652799242e-07, "loss": 0.2441, "step": 19561 }, { "epoch": 0.9, "grad_norm": 0.6082038549322307, "learning_rate": 5.338718834147583e-07, "loss": 0.2271, "step": 19562 }, { "epoch": 0.9, "grad_norm": 0.38191081718532044, "learning_rate": 5.333923236012295e-07, "loss": 0.2748, "step": 19563 }, { "epoch": 0.9, "grad_norm": 0.33149731099439955, "learning_rate": 5.329129733692684e-07, "loss": 0.2337, "step": 19564 }, { "epoch": 0.9, "grad_norm": 0.7064794956872598, "learning_rate": 5.324338327294909e-07, "loss": 0.3646, "step": 19565 }, { "epoch": 0.9, "grad_norm": 0.7050058186233589, "learning_rate": 5.319549016925041e-07, "loss": 0.2827, "step": 19566 }, { "epoch": 0.9, "grad_norm": 0.3001333371978063, "learning_rate": 5.314761802689083e-07, "loss": 0.1732, "step": 19567 }, { "epoch": 0.9, "grad_norm": 0.3713104733442832, "learning_rate": 5.309976684693053e-07, "loss": 0.2664, "step": 19568 }, { "epoch": 0.9, "grad_norm": 0.3999708170510087, "learning_rate": 5.305193663042862e-07, "loss": 0.2731, "step": 19569 }, { "epoch": 0.9, "grad_norm": 0.58872414662589, "learning_rate": 5.30041273784444e-07, "loss": 0.2406, "step": 19570 }, { "epoch": 0.9, "grad_norm": 0.35200948140516203, "learning_rate": 5.295633909203591e-07, "loss": 0.2969, "step": 19571 }, { "epoch": 0.9, "grad_norm": 0.40278998675373134, "learning_rate": 5.290857177226139e-07, "loss": 0.2411, "step": 19572 }, { "epoch": 0.9, "grad_norm": 0.6211036948515942, "learning_rate": 5.286082542017812e-07, "loss": 0.2171, "step": 19573 }, { "epoch": 0.9, "grad_norm": 0.25164053817988535, "learning_rate": 5.281310003684337e-07, "loss": 0.1785, "step": 19574 }, { "epoch": 0.9, "grad_norm": 0.34537468029334767, "learning_rate": 5.276539562331384e-07, "loss": 0.2552, "step": 19575 }, { "epoch": 0.9, "grad_norm": 0.38454438588556517, "learning_rate": 5.271771218064526e-07, "loss": 0.2776, "step": 19576 }, { "epoch": 0.9, "grad_norm": 0.7080971729085463, "learning_rate": 5.267004970989365e-07, "loss": 0.2856, "step": 19577 }, { "epoch": 0.9, "grad_norm": 0.710040166600337, "learning_rate": 5.262240821211417e-07, "loss": 0.2564, "step": 19578 }, { "epoch": 0.9, "grad_norm": 0.39034852512430224, "learning_rate": 5.257478768836133e-07, "loss": 0.2655, "step": 19579 }, { "epoch": 0.9, "grad_norm": 0.2370447145042257, "learning_rate": 5.252718813968971e-07, "loss": 0.1579, "step": 19580 }, { "epoch": 0.9, "grad_norm": 0.6328402325884102, "learning_rate": 5.247960956715259e-07, "loss": 0.297, "step": 19581 }, { "epoch": 0.9, "grad_norm": 0.38275854652265867, "learning_rate": 5.243205197180412e-07, "loss": 0.2823, "step": 19582 }, { "epoch": 0.9, "grad_norm": 0.36522964623513204, "learning_rate": 5.238451535469658e-07, "loss": 0.2453, "step": 19583 }, { "epoch": 0.9, "grad_norm": 0.5912301414077614, "learning_rate": 5.233699971688256e-07, "loss": 0.2455, "step": 19584 }, { "epoch": 0.9, "grad_norm": 0.42143163426848074, "learning_rate": 5.228950505941399e-07, "loss": 0.2759, "step": 19585 }, { "epoch": 0.9, "grad_norm": 0.2901665660874478, "learning_rate": 5.224203138334216e-07, "loss": 0.1464, "step": 19586 }, { "epoch": 0.9, "grad_norm": 0.2885509936705279, "learning_rate": 5.219457868971856e-07, "loss": 0.2371, "step": 19587 }, { "epoch": 0.9, "grad_norm": 0.5448218149539864, "learning_rate": 5.214714697959333e-07, "loss": 0.3377, "step": 19588 }, { "epoch": 0.9, "grad_norm": 0.4960796515277802, "learning_rate": 5.209973625401687e-07, "loss": 0.3035, "step": 19589 }, { "epoch": 0.9, "grad_norm": 0.5111457771830094, "learning_rate": 5.205234651403857e-07, "loss": 0.1661, "step": 19590 }, { "epoch": 0.9, "grad_norm": 0.41976264360372134, "learning_rate": 5.200497776070756e-07, "loss": 0.3228, "step": 19591 }, { "epoch": 0.9, "grad_norm": 0.49621808618065694, "learning_rate": 5.195762999507292e-07, "loss": 0.3253, "step": 19592 }, { "epoch": 0.9, "grad_norm": 0.6315173936842243, "learning_rate": 5.191030321818236e-07, "loss": 0.2394, "step": 19593 }, { "epoch": 0.9, "grad_norm": 0.4153674162110678, "learning_rate": 5.186299743108392e-07, "loss": 0.2827, "step": 19594 }, { "epoch": 0.9, "grad_norm": 0.22381856968066582, "learning_rate": 5.18157126348251e-07, "loss": 0.2042, "step": 19595 }, { "epoch": 0.9, "grad_norm": 0.9226079575953229, "learning_rate": 5.17684488304524e-07, "loss": 0.1162, "step": 19596 }, { "epoch": 0.9, "grad_norm": 0.4939142537847988, "learning_rate": 5.172120601901243e-07, "loss": 0.3007, "step": 19597 }, { "epoch": 0.9, "grad_norm": 0.46736350609043203, "learning_rate": 5.167398420155068e-07, "loss": 0.3297, "step": 19598 }, { "epoch": 0.9, "grad_norm": 0.4424770792520786, "learning_rate": 5.16267833791132e-07, "loss": 0.3039, "step": 19599 }, { "epoch": 0.9, "grad_norm": 0.371209593083219, "learning_rate": 5.157960355274461e-07, "loss": 0.2233, "step": 19600 }, { "epoch": 0.9, "grad_norm": 0.2755786208095063, "learning_rate": 5.153244472348951e-07, "loss": 0.1517, "step": 19601 }, { "epoch": 0.9, "grad_norm": 1.033116538517288, "learning_rate": 5.148530689239206e-07, "loss": 0.3489, "step": 19602 }, { "epoch": 0.9, "grad_norm": 0.25746873352794136, "learning_rate": 5.143819006049532e-07, "loss": 0.2169, "step": 19603 }, { "epoch": 0.9, "grad_norm": 1.0034686947591478, "learning_rate": 5.139109422884326e-07, "loss": 0.4384, "step": 19604 }, { "epoch": 0.9, "grad_norm": 0.747423985984192, "learning_rate": 5.134401939847789e-07, "loss": 0.3747, "step": 19605 }, { "epoch": 0.9, "grad_norm": 0.3610567336007929, "learning_rate": 5.129696557044173e-07, "loss": 0.2135, "step": 19606 }, { "epoch": 0.9, "grad_norm": 0.27768318707035533, "learning_rate": 5.124993274577617e-07, "loss": 0.2255, "step": 19607 }, { "epoch": 0.9, "grad_norm": 0.4091754429497875, "learning_rate": 5.12029209255227e-07, "loss": 0.1977, "step": 19608 }, { "epoch": 0.9, "grad_norm": 0.4359770661904798, "learning_rate": 5.115593011072229e-07, "loss": 0.2174, "step": 19609 }, { "epoch": 0.9, "grad_norm": 0.4314319137275342, "learning_rate": 5.110896030241497e-07, "loss": 0.3063, "step": 19610 }, { "epoch": 0.9, "grad_norm": 0.4444521185235352, "learning_rate": 5.10620115016407e-07, "loss": 0.3027, "step": 19611 }, { "epoch": 0.9, "grad_norm": 0.37832480380725725, "learning_rate": 5.101508370943897e-07, "loss": 0.3076, "step": 19612 }, { "epoch": 0.9, "grad_norm": 0.2674426170772036, "learning_rate": 5.096817692684864e-07, "loss": 0.1038, "step": 19613 }, { "epoch": 0.9, "grad_norm": 0.5502366182170125, "learning_rate": 5.092129115490818e-07, "loss": 0.3429, "step": 19614 }, { "epoch": 0.9, "grad_norm": 0.26351502322059167, "learning_rate": 5.087442639465557e-07, "loss": 0.243, "step": 19615 }, { "epoch": 0.9, "grad_norm": 0.653914918491935, "learning_rate": 5.082758264712828e-07, "loss": 0.2668, "step": 19616 }, { "epoch": 0.9, "grad_norm": 0.8484333902960137, "learning_rate": 5.078075991336351e-07, "loss": 0.5113, "step": 19617 }, { "epoch": 0.9, "grad_norm": 0.38693701897836535, "learning_rate": 5.073395819439797e-07, "loss": 0.2994, "step": 19618 }, { "epoch": 0.9, "grad_norm": 0.4309157312124287, "learning_rate": 5.068717749126772e-07, "loss": 0.2315, "step": 19619 }, { "epoch": 0.9, "grad_norm": 0.27435493336142197, "learning_rate": 5.064041780500817e-07, "loss": 0.1669, "step": 19620 }, { "epoch": 0.9, "grad_norm": 0.3830696055662994, "learning_rate": 5.059367913665503e-07, "loss": 0.2719, "step": 19621 }, { "epoch": 0.9, "grad_norm": 0.7556990647460939, "learning_rate": 5.054696148724259e-07, "loss": 0.2822, "step": 19622 }, { "epoch": 0.9, "grad_norm": 0.34434066872051233, "learning_rate": 5.050026485780546e-07, "loss": 0.3016, "step": 19623 }, { "epoch": 0.9, "grad_norm": 0.3693384659842256, "learning_rate": 5.045358924937726e-07, "loss": 0.2705, "step": 19624 }, { "epoch": 0.9, "grad_norm": 1.5732034596227875, "learning_rate": 5.040693466299129e-07, "loss": 0.4483, "step": 19625 }, { "epoch": 0.9, "grad_norm": 0.20061946145955564, "learning_rate": 5.036030109968082e-07, "loss": 0.152, "step": 19626 }, { "epoch": 0.9, "grad_norm": 0.3655570234364517, "learning_rate": 5.03136885604778e-07, "loss": 0.2606, "step": 19627 }, { "epoch": 0.9, "grad_norm": 0.7057237169058612, "learning_rate": 5.026709704641441e-07, "loss": 0.3454, "step": 19628 }, { "epoch": 0.9, "grad_norm": 0.9295579075306751, "learning_rate": 5.022052655852228e-07, "loss": 0.2565, "step": 19629 }, { "epoch": 0.9, "grad_norm": 0.4108744787879554, "learning_rate": 5.017397709783212e-07, "loss": 0.2931, "step": 19630 }, { "epoch": 0.9, "grad_norm": 0.39780159591038405, "learning_rate": 5.012744866537478e-07, "loss": 0.2678, "step": 19631 }, { "epoch": 0.9, "grad_norm": 0.40484375787503546, "learning_rate": 5.00809412621801e-07, "loss": 0.1349, "step": 19632 }, { "epoch": 0.9, "grad_norm": 0.33348181540655064, "learning_rate": 5.003445488927794e-07, "loss": 0.2477, "step": 19633 }, { "epoch": 0.9, "grad_norm": 0.5613506797260215, "learning_rate": 4.998798954769724e-07, "loss": 0.3497, "step": 19634 }, { "epoch": 0.9, "grad_norm": 0.4712242288126376, "learning_rate": 4.994154523846695e-07, "loss": 0.2834, "step": 19635 }, { "epoch": 0.9, "grad_norm": 0.3641231548796775, "learning_rate": 4.989512196261503e-07, "loss": 0.2565, "step": 19636 }, { "epoch": 0.9, "grad_norm": 1.783803333416028, "learning_rate": 4.984871972116945e-07, "loss": 0.5295, "step": 19637 }, { "epoch": 0.9, "grad_norm": 0.30109901561395735, "learning_rate": 4.980233851515759e-07, "loss": 0.2298, "step": 19638 }, { "epoch": 0.9, "grad_norm": 0.342230552838601, "learning_rate": 4.975597834560597e-07, "loss": 0.217, "step": 19639 }, { "epoch": 0.9, "grad_norm": 0.40426406321144526, "learning_rate": 4.970963921354133e-07, "loss": 0.2151, "step": 19640 }, { "epoch": 0.9, "grad_norm": 1.2673502442551616, "learning_rate": 4.966332111998918e-07, "loss": 0.5978, "step": 19641 }, { "epoch": 0.9, "grad_norm": 0.3573757646755341, "learning_rate": 4.961702406597513e-07, "loss": 0.1814, "step": 19642 }, { "epoch": 0.9, "grad_norm": 0.4271281896969654, "learning_rate": 4.957074805252438e-07, "loss": 0.2692, "step": 19643 }, { "epoch": 0.9, "grad_norm": 0.6822296158400306, "learning_rate": 4.952449308066099e-07, "loss": 0.3746, "step": 19644 }, { "epoch": 0.9, "grad_norm": 0.3365251071173328, "learning_rate": 4.947825915140946e-07, "loss": 0.1904, "step": 19645 }, { "epoch": 0.9, "grad_norm": 0.2686791125298311, "learning_rate": 4.94320462657929e-07, "loss": 0.181, "step": 19646 }, { "epoch": 0.9, "grad_norm": 0.5117345591170361, "learning_rate": 4.93858544248349e-07, "loss": 0.3892, "step": 19647 }, { "epoch": 0.9, "grad_norm": 0.3317222949272828, "learning_rate": 4.933968362955788e-07, "loss": 0.1821, "step": 19648 }, { "epoch": 0.9, "grad_norm": 0.8268837516276252, "learning_rate": 4.929353388098379e-07, "loss": 0.3948, "step": 19649 }, { "epoch": 0.9, "grad_norm": 0.47167547509246294, "learning_rate": 4.924740518013471e-07, "loss": 0.3324, "step": 19650 }, { "epoch": 0.9, "grad_norm": 0.31109891637165965, "learning_rate": 4.92012975280316e-07, "loss": 0.267, "step": 19651 }, { "epoch": 0.9, "grad_norm": 0.3357105022891711, "learning_rate": 4.915521092569553e-07, "loss": 0.1001, "step": 19652 }, { "epoch": 0.9, "grad_norm": 1.428533001954699, "learning_rate": 4.910914537414657e-07, "loss": 0.7241, "step": 19653 }, { "epoch": 0.9, "grad_norm": 0.3328088537488865, "learning_rate": 4.906310087440469e-07, "loss": 0.2212, "step": 19654 }, { "epoch": 0.9, "grad_norm": 0.3509923634391299, "learning_rate": 4.90170774274893e-07, "loss": 0.2336, "step": 19655 }, { "epoch": 0.9, "grad_norm": 0.6132607002630902, "learning_rate": 4.897107503441912e-07, "loss": 0.3547, "step": 19656 }, { "epoch": 0.9, "grad_norm": 0.36333551498806105, "learning_rate": 4.892509369621279e-07, "loss": 0.2463, "step": 19657 }, { "epoch": 0.9, "grad_norm": 0.21649006606653148, "learning_rate": 4.887913341388817e-07, "loss": 0.1334, "step": 19658 }, { "epoch": 0.9, "grad_norm": 0.5126165050404505, "learning_rate": 4.883319418846277e-07, "loss": 0.3799, "step": 19659 }, { "epoch": 0.9, "grad_norm": 0.3438260140668514, "learning_rate": 4.87872760209539e-07, "loss": 0.2564, "step": 19660 }, { "epoch": 0.9, "grad_norm": 0.680784170345311, "learning_rate": 4.874137891237784e-07, "loss": 0.2659, "step": 19661 }, { "epoch": 0.9, "grad_norm": 0.3765494729902323, "learning_rate": 4.869550286375091e-07, "loss": 0.2781, "step": 19662 }, { "epoch": 0.9, "grad_norm": 0.3717314043152907, "learning_rate": 4.864964787608839e-07, "loss": 0.2225, "step": 19663 }, { "epoch": 0.9, "grad_norm": 0.2949407503610009, "learning_rate": 4.860381395040604e-07, "loss": 0.1845, "step": 19664 }, { "epoch": 0.9, "grad_norm": 0.5625720403647588, "learning_rate": 4.855800108771814e-07, "loss": 0.2902, "step": 19665 }, { "epoch": 0.9, "grad_norm": 0.41834165866616, "learning_rate": 4.851220928903922e-07, "loss": 0.2857, "step": 19666 }, { "epoch": 0.9, "grad_norm": 0.418505000138813, "learning_rate": 4.84664385553828e-07, "loss": 0.2908, "step": 19667 }, { "epoch": 0.9, "grad_norm": 1.2778845138338732, "learning_rate": 4.84206888877623e-07, "loss": 0.2531, "step": 19668 }, { "epoch": 0.9, "grad_norm": 0.43619572955125463, "learning_rate": 4.837496028719079e-07, "loss": 0.2762, "step": 19669 }, { "epoch": 0.9, "grad_norm": 0.25350154502389105, "learning_rate": 4.832925275468025e-07, "loss": 0.2272, "step": 19670 }, { "epoch": 0.9, "grad_norm": 1.0472404000705837, "learning_rate": 4.828356629124287e-07, "loss": 0.5238, "step": 19671 }, { "epoch": 0.9, "grad_norm": 0.27181130801103, "learning_rate": 4.823790089789026e-07, "loss": 0.221, "step": 19672 }, { "epoch": 0.9, "grad_norm": 0.6478343085224642, "learning_rate": 4.819225657563298e-07, "loss": 0.3844, "step": 19673 }, { "epoch": 0.9, "grad_norm": 0.4159980831174123, "learning_rate": 4.814663332548197e-07, "loss": 0.2363, "step": 19674 }, { "epoch": 0.9, "grad_norm": 0.3730436567155494, "learning_rate": 4.810103114844688e-07, "loss": 0.2535, "step": 19675 }, { "epoch": 0.9, "grad_norm": 0.5432841945783066, "learning_rate": 4.805545004553757e-07, "loss": 0.2153, "step": 19676 }, { "epoch": 0.9, "grad_norm": 0.9882795880700765, "learning_rate": 4.800989001776323e-07, "loss": 0.563, "step": 19677 }, { "epoch": 0.9, "grad_norm": 0.30364035888077595, "learning_rate": 4.796435106613217e-07, "loss": 0.1981, "step": 19678 }, { "epoch": 0.9, "grad_norm": 0.41098185686447475, "learning_rate": 4.791883319165302e-07, "loss": 0.2884, "step": 19679 }, { "epoch": 0.9, "grad_norm": 0.5238633499587887, "learning_rate": 4.787333639533298e-07, "loss": 0.2322, "step": 19680 }, { "epoch": 0.9, "grad_norm": 0.34938431977299, "learning_rate": 4.782786067817991e-07, "loss": 0.1717, "step": 19681 }, { "epoch": 0.9, "grad_norm": 0.34274778254202876, "learning_rate": 4.77824060412001e-07, "loss": 0.281, "step": 19682 }, { "epoch": 0.9, "grad_norm": 0.8241661078263844, "learning_rate": 4.773697248540022e-07, "loss": 0.4141, "step": 19683 }, { "epoch": 0.9, "grad_norm": 0.4056326951656576, "learning_rate": 4.769156001178576e-07, "loss": 0.1959, "step": 19684 }, { "epoch": 0.9, "grad_norm": 0.26316576888897975, "learning_rate": 4.76461686213624e-07, "loss": 0.1985, "step": 19685 }, { "epoch": 0.9, "grad_norm": 0.4323788151244527, "learning_rate": 4.760079831513509e-07, "loss": 0.299, "step": 19686 }, { "epoch": 0.9, "grad_norm": 0.4143349113728948, "learning_rate": 4.755544909410803e-07, "loss": 0.1577, "step": 19687 }, { "epoch": 0.9, "grad_norm": 0.5748322040193202, "learning_rate": 4.7510120959285313e-07, "loss": 0.3446, "step": 19688 }, { "epoch": 0.9, "grad_norm": 0.7227308648162886, "learning_rate": 4.746481391167068e-07, "loss": 0.3674, "step": 19689 }, { "epoch": 0.9, "grad_norm": 0.3292687923279854, "learning_rate": 4.7419527952266896e-07, "loss": 0.2742, "step": 19690 }, { "epoch": 0.9, "grad_norm": 0.4178898078263602, "learning_rate": 4.737426308207671e-07, "loss": 0.2352, "step": 19691 }, { "epoch": 0.9, "grad_norm": 0.32260181426177587, "learning_rate": 4.7329019302102096e-07, "loss": 0.1753, "step": 19692 }, { "epoch": 0.9, "grad_norm": 0.4129840386667242, "learning_rate": 4.728379661334481e-07, "loss": 0.2323, "step": 19693 }, { "epoch": 0.9, "grad_norm": 0.32454101903148397, "learning_rate": 4.723859501680594e-07, "loss": 0.2443, "step": 19694 }, { "epoch": 0.9, "grad_norm": 0.7199251552262581, "learning_rate": 4.7193414513486577e-07, "loss": 0.3886, "step": 19695 }, { "epoch": 0.9, "grad_norm": 0.5069539180343947, "learning_rate": 4.7148255104386585e-07, "loss": 0.2775, "step": 19696 }, { "epoch": 0.9, "grad_norm": 0.376878172241434, "learning_rate": 4.710311679050561e-07, "loss": 0.159, "step": 19697 }, { "epoch": 0.9, "grad_norm": 0.34960728413117415, "learning_rate": 4.7057999572843516e-07, "loss": 0.2368, "step": 19698 }, { "epoch": 0.9, "grad_norm": 1.0924672190479658, "learning_rate": 4.7012903452398615e-07, "loss": 0.3852, "step": 19699 }, { "epoch": 0.9, "grad_norm": 0.36059689624808944, "learning_rate": 4.696782843016978e-07, "loss": 0.2156, "step": 19700 }, { "epoch": 0.91, "grad_norm": 0.4933696973496774, "learning_rate": 4.6922774507154543e-07, "loss": 0.336, "step": 19701 }, { "epoch": 0.91, "grad_norm": 0.3760125699511816, "learning_rate": 4.687774168435044e-07, "loss": 0.2564, "step": 19702 }, { "epoch": 0.91, "grad_norm": 0.43444275631195906, "learning_rate": 4.6832729962754676e-07, "loss": 0.3085, "step": 19703 }, { "epoch": 0.91, "grad_norm": 0.18826943294063364, "learning_rate": 4.678773934336334e-07, "loss": 0.0715, "step": 19704 }, { "epoch": 0.91, "grad_norm": 1.0038717761266667, "learning_rate": 4.674276982717307e-07, "loss": 0.2989, "step": 19705 }, { "epoch": 0.91, "grad_norm": 0.2789744000259988, "learning_rate": 4.6697821415178867e-07, "loss": 0.2799, "step": 19706 }, { "epoch": 0.91, "grad_norm": 0.6135899969025143, "learning_rate": 4.6652894108376034e-07, "loss": 0.2823, "step": 19707 }, { "epoch": 0.91, "grad_norm": 0.5726634170603374, "learning_rate": 4.6607987907759556e-07, "loss": 0.342, "step": 19708 }, { "epoch": 0.91, "grad_norm": 0.3361356297417865, "learning_rate": 4.656310281432308e-07, "loss": 0.2179, "step": 19709 }, { "epoch": 0.91, "grad_norm": 0.2837792841623217, "learning_rate": 4.651823882906059e-07, "loss": 0.1965, "step": 19710 }, { "epoch": 0.91, "grad_norm": 0.803259917653567, "learning_rate": 4.6473395952965406e-07, "loss": 0.2846, "step": 19711 }, { "epoch": 0.91, "grad_norm": 0.40945308889029214, "learning_rate": 4.6428574187030284e-07, "loss": 0.2931, "step": 19712 }, { "epoch": 0.91, "grad_norm": 0.7242143497057505, "learning_rate": 4.6383773532247433e-07, "loss": 0.3799, "step": 19713 }, { "epoch": 0.91, "grad_norm": 0.2507826002586037, "learning_rate": 4.6338993989608506e-07, "loss": 0.2089, "step": 19714 }, { "epoch": 0.91, "grad_norm": 0.5764000414362074, "learning_rate": 4.6294235560105374e-07, "loss": 0.341, "step": 19715 }, { "epoch": 0.91, "grad_norm": 0.39813677132321484, "learning_rate": 4.6249498244728573e-07, "loss": 0.1805, "step": 19716 }, { "epoch": 0.91, "grad_norm": 0.6196067231669172, "learning_rate": 4.6204782044468654e-07, "loss": 0.2187, "step": 19717 }, { "epoch": 0.91, "grad_norm": 0.25662900185794185, "learning_rate": 4.61600869603156e-07, "loss": 0.2589, "step": 19718 }, { "epoch": 0.91, "grad_norm": 0.7283203335900867, "learning_rate": 4.611541299325883e-07, "loss": 0.3807, "step": 19719 }, { "epoch": 0.91, "grad_norm": 1.092738342565134, "learning_rate": 4.6070760144287576e-07, "loss": 0.2109, "step": 19720 }, { "epoch": 0.91, "grad_norm": 0.4111538269586557, "learning_rate": 4.602612841439014e-07, "loss": 0.2526, "step": 19721 }, { "epoch": 0.91, "grad_norm": 0.30565356392077386, "learning_rate": 4.5981517804554843e-07, "loss": 0.2823, "step": 19722 }, { "epoch": 0.91, "grad_norm": 0.2907672161228244, "learning_rate": 4.5936928315769235e-07, "loss": 0.1194, "step": 19723 }, { "epoch": 0.91, "grad_norm": 0.400908450759209, "learning_rate": 4.5892359949020413e-07, "loss": 0.2797, "step": 19724 }, { "epoch": 0.91, "grad_norm": 1.3536647085690066, "learning_rate": 4.584781270529537e-07, "loss": 0.7272, "step": 19725 }, { "epoch": 0.91, "grad_norm": 0.37145407325281, "learning_rate": 4.5803286585579973e-07, "loss": 0.3058, "step": 19726 }, { "epoch": 0.91, "grad_norm": 0.3402555374466048, "learning_rate": 4.5758781590860115e-07, "loss": 0.1883, "step": 19727 }, { "epoch": 0.91, "grad_norm": 0.37104664438654733, "learning_rate": 4.5714297722121105e-07, "loss": 0.1623, "step": 19728 }, { "epoch": 0.91, "grad_norm": 0.41852115508656573, "learning_rate": 4.5669834980347936e-07, "loss": 0.2822, "step": 19729 }, { "epoch": 0.91, "grad_norm": 0.2911871814365297, "learning_rate": 4.562539336652472e-07, "loss": 0.2115, "step": 19730 }, { "epoch": 0.91, "grad_norm": 0.875946276897322, "learning_rate": 4.5580972881635434e-07, "loss": 0.3933, "step": 19731 }, { "epoch": 0.91, "grad_norm": 0.9689322472608545, "learning_rate": 4.553657352666363e-07, "loss": 0.4092, "step": 19732 }, { "epoch": 0.91, "grad_norm": 0.3406543413540663, "learning_rate": 4.549219530259208e-07, "loss": 0.1958, "step": 19733 }, { "epoch": 0.91, "grad_norm": 0.3451287375943511, "learning_rate": 4.544783821040355e-07, "loss": 0.2645, "step": 19734 }, { "epoch": 0.91, "grad_norm": 0.7439674246675786, "learning_rate": 4.540350225107959e-07, "loss": 0.366, "step": 19735 }, { "epoch": 0.91, "grad_norm": 0.2082561503270713, "learning_rate": 4.535918742560219e-07, "loss": 0.1429, "step": 19736 }, { "epoch": 0.91, "grad_norm": 0.3788886713640703, "learning_rate": 4.531489373495235e-07, "loss": 0.3283, "step": 19737 }, { "epoch": 0.91, "grad_norm": 1.293010485749024, "learning_rate": 4.5270621180110497e-07, "loss": 0.6909, "step": 19738 }, { "epoch": 0.91, "grad_norm": 0.33674316864104575, "learning_rate": 4.5226369762056965e-07, "loss": 0.2447, "step": 19739 }, { "epoch": 0.91, "grad_norm": 0.89930482753975, "learning_rate": 4.518213948177119e-07, "loss": 0.266, "step": 19740 }, { "epoch": 0.91, "grad_norm": 0.37768048847922786, "learning_rate": 4.513793034023295e-07, "loss": 0.282, "step": 19741 }, { "epoch": 0.91, "grad_norm": 0.26326430884617374, "learning_rate": 4.509374233842045e-07, "loss": 0.2179, "step": 19742 }, { "epoch": 0.91, "grad_norm": 0.2936378141780968, "learning_rate": 4.5049575477312145e-07, "loss": 0.1209, "step": 19743 }, { "epoch": 0.91, "grad_norm": 1.0414377939499828, "learning_rate": 4.50054297578858e-07, "loss": 0.3817, "step": 19744 }, { "epoch": 0.91, "grad_norm": 0.39027183973080437, "learning_rate": 4.4961305181118743e-07, "loss": 0.2753, "step": 19745 }, { "epoch": 0.91, "grad_norm": 0.33699468590754833, "learning_rate": 4.491720174798808e-07, "loss": 0.2453, "step": 19746 }, { "epoch": 0.91, "grad_norm": 0.7103607597284152, "learning_rate": 4.4873119459469925e-07, "loss": 0.3486, "step": 19747 }, { "epoch": 0.91, "grad_norm": 0.3762038229527056, "learning_rate": 4.482905831654039e-07, "loss": 0.276, "step": 19748 }, { "epoch": 0.91, "grad_norm": 0.2219767020981018, "learning_rate": 4.478501832017501e-07, "loss": 0.1736, "step": 19749 }, { "epoch": 0.91, "grad_norm": 0.8060964864426851, "learning_rate": 4.474099947134847e-07, "loss": 0.4676, "step": 19750 }, { "epoch": 0.91, "grad_norm": 0.3561407094641517, "learning_rate": 4.469700177103575e-07, "loss": 0.2618, "step": 19751 }, { "epoch": 0.91, "grad_norm": 0.664969371254339, "learning_rate": 4.465302522021042e-07, "loss": 0.3429, "step": 19752 }, { "epoch": 0.91, "grad_norm": 0.3815591769767246, "learning_rate": 4.4609069819846473e-07, "loss": 0.2234, "step": 19753 }, { "epoch": 0.91, "grad_norm": 0.3514167519748393, "learning_rate": 4.456513557091691e-07, "loss": 0.2281, "step": 19754 }, { "epoch": 0.91, "grad_norm": 0.3723628282419125, "learning_rate": 4.4521222474394276e-07, "loss": 0.194, "step": 19755 }, { "epoch": 0.91, "grad_norm": 1.0613067796026163, "learning_rate": 4.447733053125114e-07, "loss": 0.3707, "step": 19756 }, { "epoch": 0.91, "grad_norm": 0.35486525465206176, "learning_rate": 4.4433459742458496e-07, "loss": 0.2693, "step": 19757 }, { "epoch": 0.91, "grad_norm": 0.34877866663426244, "learning_rate": 4.438961010898846e-07, "loss": 0.2973, "step": 19758 }, { "epoch": 0.91, "grad_norm": 2.0739547121001447, "learning_rate": 4.434578163181125e-07, "loss": 0.1492, "step": 19759 }, { "epoch": 0.91, "grad_norm": 0.2663948095723982, "learning_rate": 4.4301974311897533e-07, "loss": 0.2034, "step": 19760 }, { "epoch": 0.91, "grad_norm": 0.4255795553645608, "learning_rate": 4.4258188150216875e-07, "loss": 0.2592, "step": 19761 }, { "epoch": 0.91, "grad_norm": 0.3797626386090198, "learning_rate": 4.4214423147738714e-07, "loss": 0.2533, "step": 19762 }, { "epoch": 0.91, "grad_norm": 0.34444976853319015, "learning_rate": 4.417067930543217e-07, "loss": 0.2462, "step": 19763 }, { "epoch": 0.91, "grad_norm": 1.2201148908971131, "learning_rate": 4.412695662426547e-07, "loss": 0.3439, "step": 19764 }, { "epoch": 0.91, "grad_norm": 0.41309647063277094, "learning_rate": 4.408325510520661e-07, "loss": 0.2826, "step": 19765 }, { "epoch": 0.91, "grad_norm": 0.3582109704642266, "learning_rate": 4.403957474922327e-07, "loss": 0.1942, "step": 19766 }, { "epoch": 0.91, "grad_norm": 0.573767306999602, "learning_rate": 4.399591555728233e-07, "loss": 0.2585, "step": 19767 }, { "epoch": 0.91, "grad_norm": 0.6807180208184898, "learning_rate": 4.3952277530350473e-07, "loss": 0.379, "step": 19768 }, { "epoch": 0.91, "grad_norm": 0.3147821299585097, "learning_rate": 4.390866066939359e-07, "loss": 0.1857, "step": 19769 }, { "epoch": 0.91, "grad_norm": 0.2960851281082761, "learning_rate": 4.386506497537757e-07, "loss": 0.2518, "step": 19770 }, { "epoch": 0.91, "grad_norm": 1.7004741670427852, "learning_rate": 4.3821490449267534e-07, "loss": 0.5845, "step": 19771 }, { "epoch": 0.91, "grad_norm": 0.2950058679593831, "learning_rate": 4.377793709202804e-07, "loss": 0.1837, "step": 19772 }, { "epoch": 0.91, "grad_norm": 0.3021267494878759, "learning_rate": 4.373440490462344e-07, "loss": 0.2706, "step": 19773 }, { "epoch": 0.91, "grad_norm": 0.9180732422579636, "learning_rate": 4.369089388801728e-07, "loss": 0.5291, "step": 19774 }, { "epoch": 0.91, "grad_norm": 0.34708281508369293, "learning_rate": 4.3647404043173246e-07, "loss": 0.2086, "step": 19775 }, { "epoch": 0.91, "grad_norm": 0.4858649183863681, "learning_rate": 4.3603935371053784e-07, "loss": 0.2698, "step": 19776 }, { "epoch": 0.91, "grad_norm": 0.4000847379651149, "learning_rate": 4.356048787262157e-07, "loss": 0.2964, "step": 19777 }, { "epoch": 0.91, "grad_norm": 0.34787394064988847, "learning_rate": 4.351706154883828e-07, "loss": 0.2469, "step": 19778 }, { "epoch": 0.91, "grad_norm": 1.153077847298311, "learning_rate": 4.3473656400665256e-07, "loss": 0.3617, "step": 19779 }, { "epoch": 0.91, "grad_norm": 0.604102738560976, "learning_rate": 4.343027242906372e-07, "loss": 0.3501, "step": 19780 }, { "epoch": 0.91, "grad_norm": 0.2833022869709932, "learning_rate": 4.3386909634993923e-07, "loss": 0.2382, "step": 19781 }, { "epoch": 0.91, "grad_norm": 0.23452828881729748, "learning_rate": 4.334356801941597e-07, "loss": 0.1442, "step": 19782 }, { "epoch": 0.91, "grad_norm": 1.2747507230643942, "learning_rate": 4.330024758328932e-07, "loss": 0.4115, "step": 19783 }, { "epoch": 0.91, "grad_norm": 0.4149898560806994, "learning_rate": 4.3256948327573214e-07, "loss": 0.2455, "step": 19784 }, { "epoch": 0.91, "grad_norm": 0.2972455159974874, "learning_rate": 4.321367025322609e-07, "loss": 0.2335, "step": 19785 }, { "epoch": 0.91, "grad_norm": 0.6136455630499438, "learning_rate": 4.31704133612062e-07, "loss": 0.4006, "step": 19786 }, { "epoch": 0.91, "grad_norm": 0.4046642300211354, "learning_rate": 4.3127177652470987e-07, "loss": 0.2309, "step": 19787 }, { "epoch": 0.91, "grad_norm": 0.2937780580837999, "learning_rate": 4.308396312797802e-07, "loss": 0.1419, "step": 19788 }, { "epoch": 0.91, "grad_norm": 0.359238590438979, "learning_rate": 4.304076978868377e-07, "loss": 0.2902, "step": 19789 }, { "epoch": 0.91, "grad_norm": 0.4486169797273892, "learning_rate": 4.2997597635544563e-07, "loss": 0.2806, "step": 19790 }, { "epoch": 0.91, "grad_norm": 0.501753037111615, "learning_rate": 4.295444666951598e-07, "loss": 0.3068, "step": 19791 }, { "epoch": 0.91, "grad_norm": 0.861883459541339, "learning_rate": 4.29113168915537e-07, "loss": 0.2967, "step": 19792 }, { "epoch": 0.91, "grad_norm": 0.28982507873488367, "learning_rate": 4.28682083026124e-07, "loss": 0.2484, "step": 19793 }, { "epoch": 0.91, "grad_norm": 0.3036750843834868, "learning_rate": 4.2825120903646543e-07, "loss": 0.2132, "step": 19794 }, { "epoch": 0.91, "grad_norm": 2.304524203264433, "learning_rate": 4.2782054695609807e-07, "loss": 0.1853, "step": 19795 }, { "epoch": 0.91, "grad_norm": 0.4092160182506747, "learning_rate": 4.273900967945588e-07, "loss": 0.2708, "step": 19796 }, { "epoch": 0.91, "grad_norm": 0.3832262830059241, "learning_rate": 4.2695985856137767e-07, "loss": 0.2965, "step": 19797 }, { "epoch": 0.91, "grad_norm": 0.45138579902102494, "learning_rate": 4.2652983226607716e-07, "loss": 0.2676, "step": 19798 }, { "epoch": 0.91, "grad_norm": 0.3339018841304625, "learning_rate": 4.261000179181807e-07, "loss": 0.2492, "step": 19799 }, { "epoch": 0.91, "grad_norm": 0.5659456858931539, "learning_rate": 4.2567041552720066e-07, "loss": 0.2265, "step": 19800 }, { "epoch": 0.91, "grad_norm": 0.28436534322503054, "learning_rate": 4.252410251026495e-07, "loss": 0.1946, "step": 19801 }, { "epoch": 0.91, "grad_norm": 0.46856686153181293, "learning_rate": 4.248118466540352e-07, "loss": 0.2606, "step": 19802 }, { "epoch": 0.91, "grad_norm": 0.5102042285757433, "learning_rate": 4.243828801908578e-07, "loss": 0.3124, "step": 19803 }, { "epoch": 0.91, "grad_norm": 0.4817684645473332, "learning_rate": 4.239541257226132e-07, "loss": 0.3589, "step": 19804 }, { "epoch": 0.91, "grad_norm": 0.3377212346069086, "learning_rate": 4.2352558325879477e-07, "loss": 0.1892, "step": 19805 }, { "epoch": 0.91, "grad_norm": 0.302474484727652, "learning_rate": 4.230972528088917e-07, "loss": 0.226, "step": 19806 }, { "epoch": 0.91, "grad_norm": 0.5720702123811231, "learning_rate": 4.2266913438238524e-07, "loss": 0.2241, "step": 19807 }, { "epoch": 0.91, "grad_norm": 0.36213143987426427, "learning_rate": 4.2224122798875e-07, "loss": 0.1763, "step": 19808 }, { "epoch": 0.91, "grad_norm": 0.33069920549693566, "learning_rate": 4.2181353363746624e-07, "loss": 0.2846, "step": 19809 }, { "epoch": 0.91, "grad_norm": 0.9110626429122548, "learning_rate": 4.213860513379975e-07, "loss": 0.4987, "step": 19810 }, { "epoch": 0.91, "grad_norm": 0.339500681998412, "learning_rate": 4.209587810998117e-07, "loss": 0.1925, "step": 19811 }, { "epoch": 0.91, "grad_norm": 0.2544588755551442, "learning_rate": 4.2053172293236354e-07, "loss": 0.1958, "step": 19812 }, { "epoch": 0.91, "grad_norm": 0.4021873665829171, "learning_rate": 4.2010487684511105e-07, "loss": 0.2933, "step": 19813 }, { "epoch": 0.91, "grad_norm": 0.42210001958679977, "learning_rate": 4.1967824284750436e-07, "loss": 0.1948, "step": 19814 }, { "epoch": 0.91, "grad_norm": 0.48653784944104306, "learning_rate": 4.192518209489871e-07, "loss": 0.3606, "step": 19815 }, { "epoch": 0.91, "grad_norm": 1.2554167127531906, "learning_rate": 4.1882561115900054e-07, "loss": 0.5862, "step": 19816 }, { "epoch": 0.91, "grad_norm": 0.342675420844382, "learning_rate": 4.1839961348698054e-07, "loss": 0.2464, "step": 19817 }, { "epoch": 0.91, "grad_norm": 0.4176102567013411, "learning_rate": 4.179738279423573e-07, "loss": 0.2232, "step": 19818 }, { "epoch": 0.91, "grad_norm": 0.4769599159867558, "learning_rate": 4.1754825453455995e-07, "loss": 0.2129, "step": 19819 }, { "epoch": 0.91, "grad_norm": 0.5556131275289173, "learning_rate": 4.171228932730065e-07, "loss": 0.3087, "step": 19820 }, { "epoch": 0.91, "grad_norm": 0.2779744591972851, "learning_rate": 4.1669774416711715e-07, "loss": 0.2379, "step": 19821 }, { "epoch": 0.91, "grad_norm": 1.1230407746773776, "learning_rate": 4.1627280722630224e-07, "loss": 0.7306, "step": 19822 }, { "epoch": 0.91, "grad_norm": 1.0068110536288042, "learning_rate": 4.1584808245997313e-07, "loss": 0.323, "step": 19823 }, { "epoch": 0.91, "grad_norm": 0.28430074361406493, "learning_rate": 4.1542356987752775e-07, "loss": 0.1561, "step": 19824 }, { "epoch": 0.91, "grad_norm": 0.2809496820285546, "learning_rate": 4.149992694883664e-07, "loss": 0.2396, "step": 19825 }, { "epoch": 0.91, "grad_norm": 0.7192541618546603, "learning_rate": 4.1457518130188613e-07, "loss": 0.3517, "step": 19826 }, { "epoch": 0.91, "grad_norm": 0.40111991365850913, "learning_rate": 4.141513053274704e-07, "loss": 0.2193, "step": 19827 }, { "epoch": 0.91, "grad_norm": 1.407630646117947, "learning_rate": 4.137276415745062e-07, "loss": 0.7636, "step": 19828 }, { "epoch": 0.91, "grad_norm": 0.29834461721221045, "learning_rate": 4.1330419005237266e-07, "loss": 0.2311, "step": 19829 }, { "epoch": 0.91, "grad_norm": 0.44983202754844104, "learning_rate": 4.128809507704445e-07, "loss": 0.3446, "step": 19830 }, { "epoch": 0.91, "grad_norm": 0.8206746406892417, "learning_rate": 4.1245792373809213e-07, "loss": 0.2768, "step": 19831 }, { "epoch": 0.91, "grad_norm": 0.3059814248251906, "learning_rate": 4.1203510896468124e-07, "loss": 0.234, "step": 19832 }, { "epoch": 0.91, "grad_norm": 0.25980440023167445, "learning_rate": 4.116125064595722e-07, "loss": 0.2337, "step": 19833 }, { "epoch": 0.91, "grad_norm": 1.1630961426503912, "learning_rate": 4.111901162321175e-07, "loss": 0.5136, "step": 19834 }, { "epoch": 0.91, "grad_norm": 0.6564656270625742, "learning_rate": 4.107679382916763e-07, "loss": 0.3237, "step": 19835 }, { "epoch": 0.91, "grad_norm": 0.43015717468762144, "learning_rate": 4.103459726475889e-07, "loss": 0.2954, "step": 19836 }, { "epoch": 0.91, "grad_norm": 0.3782946871726423, "learning_rate": 4.09924219309199e-07, "loss": 0.265, "step": 19837 }, { "epoch": 0.91, "grad_norm": 0.34221109465433935, "learning_rate": 4.095026782858436e-07, "loss": 0.1657, "step": 19838 }, { "epoch": 0.91, "grad_norm": 0.4019943113102182, "learning_rate": 4.0908134958685506e-07, "loss": 0.2946, "step": 19839 }, { "epoch": 0.91, "grad_norm": 0.7192032351701517, "learning_rate": 4.086602332215628e-07, "loss": 0.3832, "step": 19840 }, { "epoch": 0.91, "grad_norm": 0.56774413506875, "learning_rate": 4.082393291992881e-07, "loss": 0.2147, "step": 19841 }, { "epoch": 0.91, "grad_norm": 0.3993334499086172, "learning_rate": 4.0781863752935026e-07, "loss": 0.2656, "step": 19842 }, { "epoch": 0.91, "grad_norm": 0.6146832923142538, "learning_rate": 4.073981582210629e-07, "loss": 0.3479, "step": 19843 }, { "epoch": 0.91, "grad_norm": 0.2644310243021898, "learning_rate": 4.06977891283733e-07, "loss": 0.1496, "step": 19844 }, { "epoch": 0.91, "grad_norm": 0.24713858658997417, "learning_rate": 4.065578367266698e-07, "loss": 0.2089, "step": 19845 }, { "epoch": 0.91, "grad_norm": 1.2982743337991483, "learning_rate": 4.0613799455916704e-07, "loss": 0.7555, "step": 19846 }, { "epoch": 0.91, "grad_norm": 0.7103384837908386, "learning_rate": 4.0571836479052384e-07, "loss": 0.1892, "step": 19847 }, { "epoch": 0.91, "grad_norm": 0.3428313305646021, "learning_rate": 4.052989474300295e-07, "loss": 0.2492, "step": 19848 }, { "epoch": 0.91, "grad_norm": 0.4855649379008054, "learning_rate": 4.048797424869677e-07, "loss": 0.3177, "step": 19849 }, { "epoch": 0.91, "grad_norm": 0.7677384446727405, "learning_rate": 4.04460749970621e-07, "loss": 0.1094, "step": 19850 }, { "epoch": 0.91, "grad_norm": 0.3160381517904302, "learning_rate": 4.0404196989026313e-07, "loss": 0.2319, "step": 19851 }, { "epoch": 0.91, "grad_norm": 0.49577837297947086, "learning_rate": 4.036234022551711e-07, "loss": 0.3736, "step": 19852 }, { "epoch": 0.91, "grad_norm": 0.3156342121526298, "learning_rate": 4.032050470746063e-07, "loss": 0.2291, "step": 19853 }, { "epoch": 0.91, "grad_norm": 0.35343553640327374, "learning_rate": 4.027869043578314e-07, "loss": 0.213, "step": 19854 }, { "epoch": 0.91, "grad_norm": 0.8590904377050814, "learning_rate": 4.023689741141046e-07, "loss": 0.3757, "step": 19855 }, { "epoch": 0.91, "grad_norm": 0.35040191760095213, "learning_rate": 4.019512563526784e-07, "loss": 0.2573, "step": 19856 }, { "epoch": 0.91, "grad_norm": 0.24742512969226757, "learning_rate": 4.0153375108280104e-07, "loss": 0.158, "step": 19857 }, { "epoch": 0.91, "grad_norm": 0.41920099643243186, "learning_rate": 4.0111645831371506e-07, "loss": 0.2934, "step": 19858 }, { "epoch": 0.91, "grad_norm": 0.7967682088857647, "learning_rate": 4.0069937805466084e-07, "loss": 0.3294, "step": 19859 }, { "epoch": 0.91, "grad_norm": 0.3020426522857551, "learning_rate": 4.0028251031486775e-07, "loss": 0.2152, "step": 19860 }, { "epoch": 0.91, "grad_norm": 0.5039197975907477, "learning_rate": 3.9986585510356835e-07, "loss": 0.2954, "step": 19861 }, { "epoch": 0.91, "grad_norm": 3.8899929926924717, "learning_rate": 3.9944941242998747e-07, "loss": 0.5789, "step": 19862 }, { "epoch": 0.91, "grad_norm": 0.3106495163642383, "learning_rate": 3.990331823033422e-07, "loss": 0.2034, "step": 19863 }, { "epoch": 0.91, "grad_norm": 0.48808622301736526, "learning_rate": 3.986171647328496e-07, "loss": 0.3692, "step": 19864 }, { "epoch": 0.91, "grad_norm": 0.3331986018138216, "learning_rate": 3.98201359727719e-07, "loss": 0.2435, "step": 19865 }, { "epoch": 0.91, "grad_norm": 0.3477163095835327, "learning_rate": 3.977857672971552e-07, "loss": 0.2698, "step": 19866 }, { "epoch": 0.91, "grad_norm": 0.4475541027831273, "learning_rate": 3.9737038745036095e-07, "loss": 0.0954, "step": 19867 }, { "epoch": 0.91, "grad_norm": 0.3961288873255856, "learning_rate": 3.9695522019652874e-07, "loss": 0.2919, "step": 19868 }, { "epoch": 0.91, "grad_norm": 0.31665232384926084, "learning_rate": 3.965402655448547e-07, "loss": 0.2511, "step": 19869 }, { "epoch": 0.91, "grad_norm": 0.9095326132532182, "learning_rate": 3.9612552350452247e-07, "loss": 0.337, "step": 19870 }, { "epoch": 0.91, "grad_norm": 0.42779134732612795, "learning_rate": 3.9571099408471594e-07, "loss": 0.2174, "step": 19871 }, { "epoch": 0.91, "grad_norm": 0.37312409874350105, "learning_rate": 3.9529667729461094e-07, "loss": 0.2621, "step": 19872 }, { "epoch": 0.91, "grad_norm": 0.31454070674186574, "learning_rate": 3.948825731433781e-07, "loss": 0.192, "step": 19873 }, { "epoch": 0.91, "grad_norm": 0.9516343102382538, "learning_rate": 3.9446868164018993e-07, "loss": 0.3685, "step": 19874 }, { "epoch": 0.91, "grad_norm": 0.350510324429391, "learning_rate": 3.9405500279420583e-07, "loss": 0.2711, "step": 19875 }, { "epoch": 0.91, "grad_norm": 0.5877969612030379, "learning_rate": 3.936415366145874e-07, "loss": 0.2538, "step": 19876 }, { "epoch": 0.91, "grad_norm": 0.7231314208777663, "learning_rate": 3.93228283110485e-07, "loss": 0.3876, "step": 19877 }, { "epoch": 0.91, "grad_norm": 0.34470765887975996, "learning_rate": 3.928152422910492e-07, "loss": 0.2449, "step": 19878 }, { "epoch": 0.91, "grad_norm": 0.28969117992946786, "learning_rate": 3.924024141654259e-07, "loss": 0.1546, "step": 19879 }, { "epoch": 0.91, "grad_norm": 0.41522483250863024, "learning_rate": 3.9198979874275235e-07, "loss": 0.2313, "step": 19880 }, { "epoch": 0.91, "grad_norm": 0.3476039626720442, "learning_rate": 3.9157739603216337e-07, "loss": 0.2621, "step": 19881 }, { "epoch": 0.91, "grad_norm": 0.7238059127524109, "learning_rate": 3.9116520604279285e-07, "loss": 0.4561, "step": 19882 }, { "epoch": 0.91, "grad_norm": 0.9597618642870126, "learning_rate": 3.907532287837612e-07, "loss": 0.2829, "step": 19883 }, { "epoch": 0.91, "grad_norm": 0.2825385672652789, "learning_rate": 3.903414642641923e-07, "loss": 0.2507, "step": 19884 }, { "epoch": 0.91, "grad_norm": 0.2630026951721663, "learning_rate": 3.899299124931999e-07, "loss": 0.1959, "step": 19885 }, { "epoch": 0.91, "grad_norm": 1.5364991096522524, "learning_rate": 3.89518573479899e-07, "loss": 0.3017, "step": 19886 }, { "epoch": 0.91, "grad_norm": 0.3425652702646359, "learning_rate": 3.8910744723339334e-07, "loss": 0.2404, "step": 19887 }, { "epoch": 0.91, "grad_norm": 0.43316565911924715, "learning_rate": 3.8869653376278684e-07, "loss": 0.282, "step": 19888 }, { "epoch": 0.91, "grad_norm": 0.7836103390204027, "learning_rate": 3.8828583307717435e-07, "loss": 0.3245, "step": 19889 }, { "epoch": 0.91, "grad_norm": 0.41183017602888133, "learning_rate": 3.878753451856487e-07, "loss": 0.2661, "step": 19890 }, { "epoch": 0.91, "grad_norm": 0.28630296931988164, "learning_rate": 3.874650700973015e-07, "loss": 0.218, "step": 19891 }, { "epoch": 0.91, "grad_norm": 0.41275176075058767, "learning_rate": 3.8705500782121096e-07, "loss": 0.2958, "step": 19892 }, { "epoch": 0.91, "grad_norm": 0.31717182080086875, "learning_rate": 3.866451583664588e-07, "loss": 0.1801, "step": 19893 }, { "epoch": 0.91, "grad_norm": 0.6167852749254489, "learning_rate": 3.862355217421154e-07, "loss": 0.3549, "step": 19894 }, { "epoch": 0.91, "grad_norm": 1.3392562587555885, "learning_rate": 3.8582609795725256e-07, "loss": 0.5543, "step": 19895 }, { "epoch": 0.91, "grad_norm": 0.27867943987355087, "learning_rate": 3.8541688702093405e-07, "loss": 0.199, "step": 19896 }, { "epoch": 0.91, "grad_norm": 0.2837445480732412, "learning_rate": 3.850078889422182e-07, "loss": 0.2144, "step": 19897 }, { "epoch": 0.91, "grad_norm": 1.6390383584427333, "learning_rate": 3.8459910373016105e-07, "loss": 0.7468, "step": 19898 }, { "epoch": 0.91, "grad_norm": 0.3334933277665799, "learning_rate": 3.841905313938121e-07, "loss": 0.2119, "step": 19899 }, { "epoch": 0.91, "grad_norm": 0.3799398315581481, "learning_rate": 3.8378217194221743e-07, "loss": 0.3103, "step": 19900 }, { "epoch": 0.91, "grad_norm": 1.3136629268529836, "learning_rate": 3.833740253844187e-07, "loss": 0.8052, "step": 19901 }, { "epoch": 0.91, "grad_norm": 0.3145367882430307, "learning_rate": 3.829660917294475e-07, "loss": 0.1829, "step": 19902 }, { "epoch": 0.91, "grad_norm": 0.3369501483409386, "learning_rate": 3.8255837098633897e-07, "loss": 0.159, "step": 19903 }, { "epoch": 0.91, "grad_norm": 0.399800259503826, "learning_rate": 3.821508631641191e-07, "loss": 0.2778, "step": 19904 }, { "epoch": 0.91, "grad_norm": 0.347008833254004, "learning_rate": 3.817435682718096e-07, "loss": 0.2636, "step": 19905 }, { "epoch": 0.91, "grad_norm": 0.8975740526911679, "learning_rate": 3.813364863184266e-07, "loss": 0.3296, "step": 19906 }, { "epoch": 0.91, "grad_norm": 0.5110464300875857, "learning_rate": 3.8092961731298285e-07, "loss": 0.3255, "step": 19907 }, { "epoch": 0.91, "grad_norm": 0.40564447347518345, "learning_rate": 3.8052296126448897e-07, "loss": 0.2604, "step": 19908 }, { "epoch": 0.91, "grad_norm": 0.2694780948973626, "learning_rate": 3.8011651818194216e-07, "loss": 0.151, "step": 19909 }, { "epoch": 0.91, "grad_norm": 0.7635059801695246, "learning_rate": 3.797102880743464e-07, "loss": 0.3983, "step": 19910 }, { "epoch": 0.91, "grad_norm": 0.7612143127809361, "learning_rate": 3.793042709506911e-07, "loss": 0.3141, "step": 19911 }, { "epoch": 0.91, "grad_norm": 0.27084523528884896, "learning_rate": 3.788984668199669e-07, "loss": 0.2426, "step": 19912 }, { "epoch": 0.91, "grad_norm": 1.3569523326147956, "learning_rate": 3.7849287569115876e-07, "loss": 0.5478, "step": 19913 }, { "epoch": 0.91, "grad_norm": 0.45821735212188525, "learning_rate": 3.7808749757324293e-07, "loss": 0.2701, "step": 19914 }, { "epoch": 0.91, "grad_norm": 0.22000263009414384, "learning_rate": 3.776823324751977e-07, "loss": 0.1565, "step": 19915 }, { "epoch": 0.91, "grad_norm": 0.5172344714932559, "learning_rate": 3.772773804059904e-07, "loss": 0.3137, "step": 19916 }, { "epoch": 0.91, "grad_norm": 0.4059399426169871, "learning_rate": 3.768726413745893e-07, "loss": 0.2852, "step": 19917 }, { "epoch": 0.92, "grad_norm": 0.6134693094137341, "learning_rate": 3.76468115389953e-07, "loss": 0.3633, "step": 19918 }, { "epoch": 0.92, "grad_norm": 0.5443067207022229, "learning_rate": 3.7606380246103524e-07, "loss": 0.2229, "step": 19919 }, { "epoch": 0.92, "grad_norm": 0.3358937803822818, "learning_rate": 3.756597025967901e-07, "loss": 0.2417, "step": 19920 }, { "epoch": 0.92, "grad_norm": 0.5816465794835072, "learning_rate": 3.7525581580616255e-07, "loss": 0.3236, "step": 19921 }, { "epoch": 0.92, "grad_norm": 0.39927751934554173, "learning_rate": 3.748521420980966e-07, "loss": 0.1665, "step": 19922 }, { "epoch": 0.92, "grad_norm": 0.4316333650601254, "learning_rate": 3.7444868148152627e-07, "loss": 0.2648, "step": 19923 }, { "epoch": 0.92, "grad_norm": 0.3178169485301669, "learning_rate": 3.740454339653843e-07, "loss": 0.2805, "step": 19924 }, { "epoch": 0.92, "grad_norm": 1.5372655801819484, "learning_rate": 3.7364239955860025e-07, "loss": 0.3753, "step": 19925 }, { "epoch": 0.92, "grad_norm": 0.7422869460697129, "learning_rate": 3.732395782700937e-07, "loss": 0.3136, "step": 19926 }, { "epoch": 0.92, "grad_norm": 0.4546342004984768, "learning_rate": 3.7283697010878636e-07, "loss": 0.2651, "step": 19927 }, { "epoch": 0.92, "grad_norm": 0.28318483006238593, "learning_rate": 3.7243457508358784e-07, "loss": 0.2162, "step": 19928 }, { "epoch": 0.92, "grad_norm": 0.3580493502161358, "learning_rate": 3.720323932034098e-07, "loss": 0.1958, "step": 19929 }, { "epoch": 0.92, "grad_norm": 0.4147679704134558, "learning_rate": 3.7163042447715627e-07, "loss": 0.3199, "step": 19930 }, { "epoch": 0.92, "grad_norm": 0.5587071754929154, "learning_rate": 3.7122866891372346e-07, "loss": 0.385, "step": 19931 }, { "epoch": 0.92, "grad_norm": 0.3072101987997162, "learning_rate": 3.708271265220087e-07, "loss": 0.1734, "step": 19932 }, { "epoch": 0.92, "grad_norm": 0.4185232243988091, "learning_rate": 3.704257973108982e-07, "loss": 0.2854, "step": 19933 }, { "epoch": 0.92, "grad_norm": 0.8722022609450718, "learning_rate": 3.700246812892816e-07, "loss": 0.4461, "step": 19934 }, { "epoch": 0.92, "grad_norm": 0.170759954143786, "learning_rate": 3.6962377846603614e-07, "loss": 0.1141, "step": 19935 }, { "epoch": 0.92, "grad_norm": 0.31802085764689164, "learning_rate": 3.692230888500392e-07, "loss": 0.2947, "step": 19936 }, { "epoch": 0.92, "grad_norm": 1.3710467997212215, "learning_rate": 3.6882261245016036e-07, "loss": 0.5155, "step": 19937 }, { "epoch": 0.92, "grad_norm": 0.4232609728695872, "learning_rate": 3.6842234927526587e-07, "loss": 0.1792, "step": 19938 }, { "epoch": 0.92, "grad_norm": 0.5122152006494809, "learning_rate": 3.680222993342186e-07, "loss": 0.3297, "step": 19939 }, { "epoch": 0.92, "grad_norm": 0.3856510584094294, "learning_rate": 3.6762246263587265e-07, "loss": 0.2865, "step": 19940 }, { "epoch": 0.92, "grad_norm": 0.3669151614645981, "learning_rate": 3.6722283918908195e-07, "loss": 0.1464, "step": 19941 }, { "epoch": 0.92, "grad_norm": 0.6175031000521726, "learning_rate": 3.668234290026951e-07, "loss": 0.4005, "step": 19942 }, { "epoch": 0.92, "grad_norm": 0.34574251370342707, "learning_rate": 3.6642423208555157e-07, "loss": 0.2821, "step": 19943 }, { "epoch": 0.92, "grad_norm": 0.7628507861902257, "learning_rate": 3.66025248446491e-07, "loss": 0.3895, "step": 19944 }, { "epoch": 0.92, "grad_norm": 0.36283444785113733, "learning_rate": 3.656264780943441e-07, "loss": 0.2262, "step": 19945 }, { "epoch": 0.92, "grad_norm": 1.010684942178908, "learning_rate": 3.652279210379439e-07, "loss": 0.4187, "step": 19946 }, { "epoch": 0.92, "grad_norm": 0.42642223186166195, "learning_rate": 3.6482957728611213e-07, "loss": 0.3192, "step": 19947 }, { "epoch": 0.92, "grad_norm": 0.30212769143575047, "learning_rate": 3.644314468476651e-07, "loss": 0.2273, "step": 19948 }, { "epoch": 0.92, "grad_norm": 0.29573742166870615, "learning_rate": 3.6403352973141904e-07, "loss": 0.1877, "step": 19949 }, { "epoch": 0.92, "grad_norm": 0.8086591971582502, "learning_rate": 3.6363582594618254e-07, "loss": 0.3001, "step": 19950 }, { "epoch": 0.92, "grad_norm": 0.3343840287563893, "learning_rate": 3.632383355007629e-07, "loss": 0.2168, "step": 19951 }, { "epoch": 0.92, "grad_norm": 0.5495309458733093, "learning_rate": 3.6284105840395765e-07, "loss": 0.3031, "step": 19952 }, { "epoch": 0.92, "grad_norm": 0.7845803857087339, "learning_rate": 3.6244399466456415e-07, "loss": 0.4598, "step": 19953 }, { "epoch": 0.92, "grad_norm": 0.25964317178650065, "learning_rate": 3.6204714429136976e-07, "loss": 0.1783, "step": 19954 }, { "epoch": 0.92, "grad_norm": 0.3148871484035735, "learning_rate": 3.616505072931631e-07, "loss": 0.2129, "step": 19955 }, { "epoch": 0.92, "grad_norm": 0.4485615431833554, "learning_rate": 3.6125408367872594e-07, "loss": 0.2765, "step": 19956 }, { "epoch": 0.92, "grad_norm": 0.4111445671733057, "learning_rate": 3.6085787345683243e-07, "loss": 0.2691, "step": 19957 }, { "epoch": 0.92, "grad_norm": 1.0002068188321827, "learning_rate": 3.604618766362544e-07, "loss": 0.2423, "step": 19958 }, { "epoch": 0.92, "grad_norm": 0.3517317134963925, "learning_rate": 3.600660932257616e-07, "loss": 0.2561, "step": 19959 }, { "epoch": 0.92, "grad_norm": 0.4126417800009638, "learning_rate": 3.5967052323411354e-07, "loss": 0.2857, "step": 19960 }, { "epoch": 0.92, "grad_norm": 0.4812825944824377, "learning_rate": 3.592751666700689e-07, "loss": 0.2111, "step": 19961 }, { "epoch": 0.92, "grad_norm": 0.40297684145969614, "learning_rate": 3.588800235423795e-07, "loss": 0.2186, "step": 19962 }, { "epoch": 0.92, "grad_norm": 0.4201351728328389, "learning_rate": 3.584850938597939e-07, "loss": 0.2848, "step": 19963 }, { "epoch": 0.92, "grad_norm": 0.35639618481861624, "learning_rate": 3.5809037763105626e-07, "loss": 0.223, "step": 19964 }, { "epoch": 0.92, "grad_norm": 0.8370608424754384, "learning_rate": 3.5769587486490507e-07, "loss": 0.429, "step": 19965 }, { "epoch": 0.92, "grad_norm": 0.3897325122925885, "learning_rate": 3.5730158557007454e-07, "loss": 0.2921, "step": 19966 }, { "epoch": 0.92, "grad_norm": 0.34398365335995246, "learning_rate": 3.5690750975528986e-07, "loss": 0.2717, "step": 19967 }, { "epoch": 0.92, "grad_norm": 0.41074221218611123, "learning_rate": 3.5651364742928186e-07, "loss": 0.1565, "step": 19968 }, { "epoch": 0.92, "grad_norm": 0.32279580429885923, "learning_rate": 3.5611999860076683e-07, "loss": 0.2269, "step": 19969 }, { "epoch": 0.92, "grad_norm": 1.7469096812840725, "learning_rate": 3.557265632784601e-07, "loss": 0.6329, "step": 19970 }, { "epoch": 0.92, "grad_norm": 0.35920937373869566, "learning_rate": 3.5533334147107133e-07, "loss": 0.2284, "step": 19971 }, { "epoch": 0.92, "grad_norm": 0.36816665907220764, "learning_rate": 3.549403331873058e-07, "loss": 0.2724, "step": 19972 }, { "epoch": 0.92, "grad_norm": 0.8298626092969419, "learning_rate": 3.5454753843586767e-07, "loss": 0.4264, "step": 19973 }, { "epoch": 0.92, "grad_norm": 0.3157006719959381, "learning_rate": 3.541549572254488e-07, "loss": 0.1435, "step": 19974 }, { "epoch": 0.92, "grad_norm": 0.36797205745718725, "learning_rate": 3.537625895647423e-07, "loss": 0.2852, "step": 19975 }, { "epoch": 0.92, "grad_norm": 0.3677344099615494, "learning_rate": 3.533704354624368e-07, "loss": 0.267, "step": 19976 }, { "epoch": 0.92, "grad_norm": 0.6598532283083989, "learning_rate": 3.5297849492721083e-07, "loss": 0.1672, "step": 19977 }, { "epoch": 0.92, "grad_norm": 0.41842781174059906, "learning_rate": 3.525867679677442e-07, "loss": 0.3241, "step": 19978 }, { "epoch": 0.92, "grad_norm": 0.36563236528870313, "learning_rate": 3.521952545927065e-07, "loss": 0.2908, "step": 19979 }, { "epoch": 0.92, "grad_norm": 0.9384831011623197, "learning_rate": 3.5180395481076767e-07, "loss": 0.4577, "step": 19980 }, { "epoch": 0.92, "grad_norm": 0.23270048304817367, "learning_rate": 3.5141286863059064e-07, "loss": 0.1523, "step": 19981 }, { "epoch": 0.92, "grad_norm": 0.5898310376648472, "learning_rate": 3.510219960608341e-07, "loss": 0.2481, "step": 19982 }, { "epoch": 0.92, "grad_norm": 0.37189018436359517, "learning_rate": 3.5063133711014884e-07, "loss": 0.3239, "step": 19983 }, { "epoch": 0.92, "grad_norm": 0.3358643306727903, "learning_rate": 3.502408917871869e-07, "loss": 0.2018, "step": 19984 }, { "epoch": 0.92, "grad_norm": 0.6938782594785033, "learning_rate": 3.4985066010059134e-07, "loss": 0.3891, "step": 19985 }, { "epoch": 0.92, "grad_norm": 0.5868070852709735, "learning_rate": 3.4946064205899966e-07, "loss": 0.2347, "step": 19986 }, { "epoch": 0.92, "grad_norm": 0.25124001526846423, "learning_rate": 3.4907083767105053e-07, "loss": 0.2182, "step": 19987 }, { "epoch": 0.92, "grad_norm": 0.41446750003183525, "learning_rate": 3.4868124694536933e-07, "loss": 0.2173, "step": 19988 }, { "epoch": 0.92, "grad_norm": 0.9879616607875817, "learning_rate": 3.482918698905835e-07, "loss": 0.4844, "step": 19989 }, { "epoch": 0.92, "grad_norm": 0.4015938346324124, "learning_rate": 3.47902706515314e-07, "loss": 0.2064, "step": 19990 }, { "epoch": 0.92, "grad_norm": 0.3694916549469895, "learning_rate": 3.47513756828175e-07, "loss": 0.2805, "step": 19991 }, { "epoch": 0.92, "grad_norm": 1.476889715975262, "learning_rate": 3.4712502083777964e-07, "loss": 0.5958, "step": 19992 }, { "epoch": 0.92, "grad_norm": 0.35226314732815156, "learning_rate": 3.4673649855272997e-07, "loss": 0.2409, "step": 19993 }, { "epoch": 0.92, "grad_norm": 0.25530260015267053, "learning_rate": 3.4634818998163233e-07, "loss": 0.0776, "step": 19994 }, { "epoch": 0.92, "grad_norm": 0.37727041974845216, "learning_rate": 3.4596009513308215e-07, "loss": 0.3275, "step": 19995 }, { "epoch": 0.92, "grad_norm": 0.462614583444022, "learning_rate": 3.455722140156692e-07, "loss": 0.2968, "step": 19996 }, { "epoch": 0.92, "grad_norm": 0.5230042679034724, "learning_rate": 3.451845466379833e-07, "loss": 0.273, "step": 19997 }, { "epoch": 0.92, "grad_norm": 0.5999673071788141, "learning_rate": 3.4479709300860533e-07, "loss": 0.3514, "step": 19998 }, { "epoch": 0.92, "grad_norm": 0.36288271706838937, "learning_rate": 3.4440985313611507e-07, "loss": 0.2698, "step": 19999 }, { "epoch": 0.92, "grad_norm": 0.2656433833317198, "learning_rate": 3.4402282702908353e-07, "loss": 0.1374, "step": 20000 }, { "epoch": 0.92, "grad_norm": 0.6620025047564988, "learning_rate": 3.436360146960793e-07, "loss": 0.3731, "step": 20001 }, { "epoch": 0.92, "grad_norm": 0.34183171104713556, "learning_rate": 3.4324941614566897e-07, "loss": 0.2587, "step": 20002 }, { "epoch": 0.92, "grad_norm": 0.3407070963775404, "learning_rate": 3.428630313864079e-07, "loss": 0.2635, "step": 20003 }, { "epoch": 0.92, "grad_norm": 1.583030481820721, "learning_rate": 3.424768604268525e-07, "loss": 0.7012, "step": 20004 }, { "epoch": 0.92, "grad_norm": 0.3496434753458176, "learning_rate": 3.420909032755504e-07, "loss": 0.2338, "step": 20005 }, { "epoch": 0.92, "grad_norm": 0.3458974792920106, "learning_rate": 3.41705159941047e-07, "loss": 0.1615, "step": 20006 }, { "epoch": 0.92, "grad_norm": 0.3559367010334245, "learning_rate": 3.4131963043188333e-07, "loss": 0.2468, "step": 20007 }, { "epoch": 0.92, "grad_norm": 0.3602301381721926, "learning_rate": 3.4093431475659355e-07, "loss": 0.252, "step": 20008 }, { "epoch": 0.92, "grad_norm": 1.1940564430583402, "learning_rate": 3.4054921292370977e-07, "loss": 0.6941, "step": 20009 }, { "epoch": 0.92, "grad_norm": 0.3566038208566618, "learning_rate": 3.40164324941753e-07, "loss": 0.2368, "step": 20010 }, { "epoch": 0.92, "grad_norm": 0.32749568002746926, "learning_rate": 3.397796508192519e-07, "loss": 0.2292, "step": 20011 }, { "epoch": 0.92, "grad_norm": 0.2995468963678539, "learning_rate": 3.393951905647175e-07, "loss": 0.147, "step": 20012 }, { "epoch": 0.92, "grad_norm": 0.6057201736308819, "learning_rate": 3.390109441866618e-07, "loss": 0.2911, "step": 20013 }, { "epoch": 0.92, "grad_norm": 0.4181620933261763, "learning_rate": 3.386269116935914e-07, "loss": 0.2674, "step": 20014 }, { "epoch": 0.92, "grad_norm": 0.386383044920115, "learning_rate": 3.382430930940106e-07, "loss": 0.2965, "step": 20015 }, { "epoch": 0.92, "grad_norm": 1.8742038713011822, "learning_rate": 3.37859488396417e-07, "loss": 0.2827, "step": 20016 }, { "epoch": 0.92, "grad_norm": 0.4473341454023465, "learning_rate": 3.3747609760929944e-07, "loss": 0.2601, "step": 20017 }, { "epoch": 0.92, "grad_norm": 0.389867358198897, "learning_rate": 3.3709292074114887e-07, "loss": 0.278, "step": 20018 }, { "epoch": 0.92, "grad_norm": 0.3608077281210852, "learning_rate": 3.367099578004496e-07, "loss": 0.291, "step": 20019 }, { "epoch": 0.92, "grad_norm": 0.26456942102633285, "learning_rate": 3.3632720879567594e-07, "loss": 0.1577, "step": 20020 }, { "epoch": 0.92, "grad_norm": 1.2381364092853324, "learning_rate": 3.359446737353056e-07, "loss": 0.7711, "step": 20021 }, { "epoch": 0.92, "grad_norm": 1.5681495020986826, "learning_rate": 3.3556235262780513e-07, "loss": 0.5365, "step": 20022 }, { "epoch": 0.92, "grad_norm": 0.23284479839944283, "learning_rate": 3.3518024548163887e-07, "loss": 0.2078, "step": 20023 }, { "epoch": 0.92, "grad_norm": 0.9606229493679549, "learning_rate": 3.3479835230526894e-07, "loss": 0.3788, "step": 20024 }, { "epoch": 0.92, "grad_norm": 0.3315819184633779, "learning_rate": 3.344166731071452e-07, "loss": 0.2296, "step": 20025 }, { "epoch": 0.92, "grad_norm": 0.3076619777026424, "learning_rate": 3.340352078957232e-07, "loss": 0.2067, "step": 20026 }, { "epoch": 0.92, "grad_norm": 0.371517835866063, "learning_rate": 3.336539566794428e-07, "loss": 0.3008, "step": 20027 }, { "epoch": 0.92, "grad_norm": 1.4125771820426318, "learning_rate": 3.332729194667494e-07, "loss": 0.5225, "step": 20028 }, { "epoch": 0.92, "grad_norm": 0.31079195207070665, "learning_rate": 3.3289209626607533e-07, "loss": 0.1852, "step": 20029 }, { "epoch": 0.92, "grad_norm": 0.592868706130989, "learning_rate": 3.325114870858548e-07, "loss": 0.329, "step": 20030 }, { "epoch": 0.92, "grad_norm": 0.22287237990105, "learning_rate": 3.3213109193451e-07, "loss": 0.2122, "step": 20031 }, { "epoch": 0.92, "grad_norm": 0.6407406001049274, "learning_rate": 3.317509108204664e-07, "loss": 0.2673, "step": 20032 }, { "epoch": 0.92, "grad_norm": 0.4227412089128269, "learning_rate": 3.3137094375213843e-07, "loss": 0.2401, "step": 20033 }, { "epoch": 0.92, "grad_norm": 0.38654054939632837, "learning_rate": 3.309911907379393e-07, "loss": 0.2991, "step": 20034 }, { "epoch": 0.92, "grad_norm": 0.6643718408789809, "learning_rate": 3.306116517862756e-07, "loss": 0.2982, "step": 20035 }, { "epoch": 0.92, "grad_norm": 0.4591282467617554, "learning_rate": 3.3023232690555184e-07, "loss": 0.2495, "step": 20036 }, { "epoch": 0.92, "grad_norm": 0.5484319092214814, "learning_rate": 3.298532161041634e-07, "loss": 0.2405, "step": 20037 }, { "epoch": 0.92, "grad_norm": 0.40665761788732585, "learning_rate": 3.294743193905059e-07, "loss": 0.2847, "step": 20038 }, { "epoch": 0.92, "grad_norm": 0.25786155293640856, "learning_rate": 3.2909563677296473e-07, "loss": 0.2078, "step": 20039 }, { "epoch": 0.92, "grad_norm": 1.7432892091035728, "learning_rate": 3.287171682599255e-07, "loss": 0.5314, "step": 20040 }, { "epoch": 0.92, "grad_norm": 0.4401901052827708, "learning_rate": 3.283389138597681e-07, "loss": 0.2674, "step": 20041 }, { "epoch": 0.92, "grad_norm": 0.4766102437945921, "learning_rate": 3.279608735808637e-07, "loss": 0.2498, "step": 20042 }, { "epoch": 0.92, "grad_norm": 0.40673897023428773, "learning_rate": 3.2758304743158554e-07, "loss": 0.2801, "step": 20043 }, { "epoch": 0.92, "grad_norm": 0.43782070327692807, "learning_rate": 3.272054354202936e-07, "loss": 0.2986, "step": 20044 }, { "epoch": 0.92, "grad_norm": 0.567137151346914, "learning_rate": 3.2682803755535233e-07, "loss": 0.384, "step": 20045 }, { "epoch": 0.92, "grad_norm": 0.24823178067374493, "learning_rate": 3.26450853845115e-07, "loss": 0.1771, "step": 20046 }, { "epoch": 0.92, "grad_norm": 0.5892734116705268, "learning_rate": 3.2607388429793274e-07, "loss": 0.3082, "step": 20047 }, { "epoch": 0.92, "grad_norm": 0.4274727084090879, "learning_rate": 3.2569712892215e-07, "loss": 0.2892, "step": 20048 }, { "epoch": 0.92, "grad_norm": 1.4988576645200309, "learning_rate": 3.2532058772610895e-07, "loss": 0.325, "step": 20049 }, { "epoch": 0.92, "grad_norm": 0.5794215876193486, "learning_rate": 3.2494426071814523e-07, "loss": 0.3087, "step": 20050 }, { "epoch": 0.92, "grad_norm": 0.29156370079823435, "learning_rate": 3.2456814790659096e-07, "loss": 0.2716, "step": 20051 }, { "epoch": 0.92, "grad_norm": 0.3212997203048011, "learning_rate": 3.241922492997729e-07, "loss": 0.1846, "step": 20052 }, { "epoch": 0.92, "grad_norm": 0.6155676286535304, "learning_rate": 3.238165649060121e-07, "loss": 0.2636, "step": 20053 }, { "epoch": 0.92, "grad_norm": 0.3167114780956871, "learning_rate": 3.234410947336264e-07, "loss": 0.273, "step": 20054 }, { "epoch": 0.92, "grad_norm": 0.5896699451238753, "learning_rate": 3.2306583879093023e-07, "loss": 0.2108, "step": 20055 }, { "epoch": 0.92, "grad_norm": 0.6688405500362632, "learning_rate": 3.226907970862281e-07, "loss": 0.4424, "step": 20056 }, { "epoch": 0.92, "grad_norm": 0.3046481110032741, "learning_rate": 3.223159696278244e-07, "loss": 0.2135, "step": 20057 }, { "epoch": 0.92, "grad_norm": 0.44683056504965746, "learning_rate": 3.2194135642401705e-07, "loss": 0.3062, "step": 20058 }, { "epoch": 0.92, "grad_norm": 0.2966384546749176, "learning_rate": 3.215669574831026e-07, "loss": 0.1804, "step": 20059 }, { "epoch": 0.92, "grad_norm": 0.42658652461904467, "learning_rate": 3.211927728133668e-07, "loss": 0.298, "step": 20060 }, { "epoch": 0.92, "grad_norm": 1.364608308356298, "learning_rate": 3.208188024230918e-07, "loss": 0.5944, "step": 20061 }, { "epoch": 0.92, "grad_norm": 0.30449796114658834, "learning_rate": 3.204450463205633e-07, "loss": 0.2113, "step": 20062 }, { "epoch": 0.92, "grad_norm": 0.4215930756244218, "learning_rate": 3.200715045140501e-07, "loss": 0.3093, "step": 20063 }, { "epoch": 0.92, "grad_norm": 0.597943612119744, "learning_rate": 3.196981770118246e-07, "loss": 0.3336, "step": 20064 }, { "epoch": 0.92, "grad_norm": 0.15710787491637743, "learning_rate": 3.193250638221512e-07, "loss": 0.0718, "step": 20065 }, { "epoch": 0.92, "grad_norm": 0.3299102389667, "learning_rate": 3.1895216495329116e-07, "loss": 0.2789, "step": 20066 }, { "epoch": 0.92, "grad_norm": 0.5785092486359147, "learning_rate": 3.1857948041349894e-07, "loss": 0.3252, "step": 20067 }, { "epoch": 0.92, "grad_norm": 0.5510607015579685, "learning_rate": 3.1820701021102576e-07, "loss": 0.2596, "step": 20068 }, { "epoch": 0.92, "grad_norm": 0.37921533474901487, "learning_rate": 3.1783475435411935e-07, "loss": 0.2765, "step": 20069 }, { "epoch": 0.92, "grad_norm": 0.36553048481916767, "learning_rate": 3.174627128510188e-07, "loss": 0.301, "step": 20070 }, { "epoch": 0.92, "grad_norm": 0.403351157126921, "learning_rate": 3.170908857099608e-07, "loss": 0.1883, "step": 20071 }, { "epoch": 0.92, "grad_norm": 0.25823888554696645, "learning_rate": 3.167192729391799e-07, "loss": 0.1668, "step": 20072 }, { "epoch": 0.92, "grad_norm": 1.3356014895391508, "learning_rate": 3.1634787454689954e-07, "loss": 0.7151, "step": 20073 }, { "epoch": 0.92, "grad_norm": 0.3369234828242105, "learning_rate": 3.1597669054134417e-07, "loss": 0.2886, "step": 20074 }, { "epoch": 0.92, "grad_norm": 0.3370695209270014, "learning_rate": 3.156057209307317e-07, "loss": 0.2257, "step": 20075 }, { "epoch": 0.92, "grad_norm": 0.7828519389273079, "learning_rate": 3.152349657232756e-07, "loss": 0.3961, "step": 20076 }, { "epoch": 0.92, "grad_norm": 0.3088735810118458, "learning_rate": 3.148644249271826e-07, "loss": 0.1776, "step": 20077 }, { "epoch": 0.92, "grad_norm": 0.29949245571140787, "learning_rate": 3.1449409855065506e-07, "loss": 0.209, "step": 20078 }, { "epoch": 0.92, "grad_norm": 0.5805740139720473, "learning_rate": 3.141239866018952e-07, "loss": 0.3308, "step": 20079 }, { "epoch": 0.92, "grad_norm": 0.6469044786353962, "learning_rate": 3.1375408908909333e-07, "loss": 0.4029, "step": 20080 }, { "epoch": 0.92, "grad_norm": 0.37057449673291254, "learning_rate": 3.133844060204416e-07, "loss": 0.25, "step": 20081 }, { "epoch": 0.92, "grad_norm": 0.3499481414055778, "learning_rate": 3.130149374041225e-07, "loss": 0.2549, "step": 20082 }, { "epoch": 0.92, "grad_norm": 0.39585581118871255, "learning_rate": 3.12645683248316e-07, "loss": 0.1561, "step": 20083 }, { "epoch": 0.92, "grad_norm": 0.29746354023328586, "learning_rate": 3.122766435611979e-07, "loss": 0.2345, "step": 20084 }, { "epoch": 0.92, "grad_norm": 1.0207877008422335, "learning_rate": 3.119078183509372e-07, "loss": 0.3247, "step": 20085 }, { "epoch": 0.92, "grad_norm": 0.35295820352837787, "learning_rate": 3.115392076257007e-07, "loss": 0.2717, "step": 20086 }, { "epoch": 0.92, "grad_norm": 0.33301019449562125, "learning_rate": 3.1117081139364626e-07, "loss": 0.2275, "step": 20087 }, { "epoch": 0.92, "grad_norm": 1.1992313954871763, "learning_rate": 3.1080262966293294e-07, "loss": 0.3053, "step": 20088 }, { "epoch": 0.92, "grad_norm": 0.28900644081914445, "learning_rate": 3.1043466244171204e-07, "loss": 0.1703, "step": 20089 }, { "epoch": 0.92, "grad_norm": 0.2900488723534963, "learning_rate": 3.1006690973812704e-07, "loss": 0.2485, "step": 20090 }, { "epoch": 0.92, "grad_norm": 0.4906743368051409, "learning_rate": 3.096993715603225e-07, "loss": 0.2334, "step": 20091 }, { "epoch": 0.92, "grad_norm": 0.5468764569104397, "learning_rate": 3.09332047916433e-07, "loss": 0.3645, "step": 20092 }, { "epoch": 0.92, "grad_norm": 0.37309010189270414, "learning_rate": 3.0896493881459323e-07, "loss": 0.2748, "step": 20093 }, { "epoch": 0.92, "grad_norm": 0.36642005857148335, "learning_rate": 3.085980442629288e-07, "loss": 0.2908, "step": 20094 }, { "epoch": 0.92, "grad_norm": 0.5883005099219749, "learning_rate": 3.0823136426956334e-07, "loss": 0.1184, "step": 20095 }, { "epoch": 0.92, "grad_norm": 0.2872717748195638, "learning_rate": 3.078648988426147e-07, "loss": 0.2148, "step": 20096 }, { "epoch": 0.92, "grad_norm": 0.9988833964207855, "learning_rate": 3.0749864799019426e-07, "loss": 0.4919, "step": 20097 }, { "epoch": 0.92, "grad_norm": 0.37152959692920323, "learning_rate": 3.071326117204143e-07, "loss": 0.2448, "step": 20098 }, { "epoch": 0.92, "grad_norm": 0.36289021047111236, "learning_rate": 3.067667900413751e-07, "loss": 0.2675, "step": 20099 }, { "epoch": 0.92, "grad_norm": 1.2371040243122153, "learning_rate": 3.064011829611757e-07, "loss": 0.6413, "step": 20100 }, { "epoch": 0.92, "grad_norm": 0.3280054118548916, "learning_rate": 3.06035790487913e-07, "loss": 0.1618, "step": 20101 }, { "epoch": 0.92, "grad_norm": 0.3793311838822268, "learning_rate": 3.0567061262967376e-07, "loss": 0.2664, "step": 20102 }, { "epoch": 0.92, "grad_norm": 0.3762066444416537, "learning_rate": 3.053056493945439e-07, "loss": 0.2352, "step": 20103 }, { "epoch": 0.92, "grad_norm": 0.8260370499677854, "learning_rate": 3.0494090079060235e-07, "loss": 0.3199, "step": 20104 }, { "epoch": 0.92, "grad_norm": 0.3414318808237356, "learning_rate": 3.0457636682592604e-07, "loss": 0.2578, "step": 20105 }, { "epoch": 0.92, "grad_norm": 0.3798873991042024, "learning_rate": 3.042120475085852e-07, "loss": 0.3121, "step": 20106 }, { "epoch": 0.92, "grad_norm": 1.4831672630835995, "learning_rate": 3.038479428466423e-07, "loss": 0.5124, "step": 20107 }, { "epoch": 0.92, "grad_norm": 0.2315393388058869, "learning_rate": 3.0348405284816193e-07, "loss": 0.1496, "step": 20108 }, { "epoch": 0.92, "grad_norm": 0.46951042864463327, "learning_rate": 3.031203775211988e-07, "loss": 0.2618, "step": 20109 }, { "epoch": 0.92, "grad_norm": 0.3643427511132734, "learning_rate": 3.0275691687380536e-07, "loss": 0.3299, "step": 20110 }, { "epoch": 0.92, "grad_norm": 0.3113673064075314, "learning_rate": 3.023936709140263e-07, "loss": 0.2078, "step": 20111 }, { "epoch": 0.92, "grad_norm": 1.2082536996703304, "learning_rate": 3.020306396499062e-07, "loss": 0.3945, "step": 20112 }, { "epoch": 0.92, "grad_norm": 0.6297023765287655, "learning_rate": 3.016678230894787e-07, "loss": 0.3316, "step": 20113 }, { "epoch": 0.92, "grad_norm": 0.21233945043759542, "learning_rate": 3.0130522124077967e-07, "loss": 0.1692, "step": 20114 }, { "epoch": 0.92, "grad_norm": 0.741102810374366, "learning_rate": 3.009428341118359e-07, "loss": 0.3744, "step": 20115 }, { "epoch": 0.92, "grad_norm": 0.4656100806538979, "learning_rate": 3.005806617106677e-07, "loss": 0.2786, "step": 20116 }, { "epoch": 0.92, "grad_norm": 0.31951203519703775, "learning_rate": 3.002187040452964e-07, "loss": 0.1799, "step": 20117 }, { "epoch": 0.92, "grad_norm": 0.44964500414831005, "learning_rate": 2.9985696112373455e-07, "loss": 0.2944, "step": 20118 }, { "epoch": 0.92, "grad_norm": 0.9393446868217752, "learning_rate": 2.9949543295398896e-07, "loss": 0.4099, "step": 20119 }, { "epoch": 0.92, "grad_norm": 0.42207292789399414, "learning_rate": 2.991341195440678e-07, "loss": 0.2628, "step": 20120 }, { "epoch": 0.92, "grad_norm": 0.4152277544593938, "learning_rate": 2.987730209019635e-07, "loss": 0.2573, "step": 20121 }, { "epoch": 0.92, "grad_norm": 0.3616595768622279, "learning_rate": 2.984121370356774e-07, "loss": 0.2691, "step": 20122 }, { "epoch": 0.92, "grad_norm": 0.4323388260288412, "learning_rate": 2.9805146795319537e-07, "loss": 0.2807, "step": 20123 }, { "epoch": 0.92, "grad_norm": 0.4065079422113088, "learning_rate": 2.976910136625033e-07, "loss": 0.1617, "step": 20124 }, { "epoch": 0.92, "grad_norm": 0.5771983329855844, "learning_rate": 2.973307741715803e-07, "loss": 0.334, "step": 20125 }, { "epoch": 0.92, "grad_norm": 0.29140134517512134, "learning_rate": 2.969707494884022e-07, "loss": 0.2543, "step": 20126 }, { "epoch": 0.92, "grad_norm": 0.6597708414289951, "learning_rate": 2.9661093962094045e-07, "loss": 0.2806, "step": 20127 }, { "epoch": 0.92, "grad_norm": 0.44319121406711504, "learning_rate": 2.9625134457715975e-07, "loss": 0.2802, "step": 20128 }, { "epoch": 0.92, "grad_norm": 0.35362216422282455, "learning_rate": 2.9589196436502267e-07, "loss": 0.2499, "step": 20129 }, { "epoch": 0.92, "grad_norm": 0.25176602956947075, "learning_rate": 2.955327989924839e-07, "loss": 0.1723, "step": 20130 }, { "epoch": 0.92, "grad_norm": 0.7211725899673292, "learning_rate": 2.9517384846749485e-07, "loss": 0.3827, "step": 20131 }, { "epoch": 0.92, "grad_norm": 0.33066685937795576, "learning_rate": 2.9481511279800477e-07, "loss": 0.2522, "step": 20132 }, { "epoch": 0.92, "grad_norm": 1.043378444324432, "learning_rate": 2.9445659199195285e-07, "loss": 0.4287, "step": 20133 }, { "epoch": 0.92, "grad_norm": 0.36333313517049026, "learning_rate": 2.940982860572772e-07, "loss": 0.256, "step": 20134 }, { "epoch": 0.92, "grad_norm": 0.4172372244432512, "learning_rate": 2.9374019500191255e-07, "loss": 0.2558, "step": 20135 }, { "epoch": 0.93, "grad_norm": 0.29884128577672325, "learning_rate": 2.9338231883378365e-07, "loss": 0.1826, "step": 20136 }, { "epoch": 0.93, "grad_norm": 0.37566362178976254, "learning_rate": 2.9302465756081646e-07, "loss": 0.2597, "step": 20137 }, { "epoch": 0.93, "grad_norm": 0.4286135849902286, "learning_rate": 2.9266721119092454e-07, "loss": 0.2546, "step": 20138 }, { "epoch": 0.93, "grad_norm": 0.5682725374234371, "learning_rate": 2.9230997973202724e-07, "loss": 0.3201, "step": 20139 }, { "epoch": 0.93, "grad_norm": 1.1895890465416745, "learning_rate": 2.9195296319202927e-07, "loss": 0.4468, "step": 20140 }, { "epoch": 0.93, "grad_norm": 0.4467348731807258, "learning_rate": 2.9159616157883763e-07, "loss": 0.2986, "step": 20141 }, { "epoch": 0.93, "grad_norm": 0.21482362384647072, "learning_rate": 2.912395749003494e-07, "loss": 0.2054, "step": 20142 }, { "epoch": 0.93, "grad_norm": 0.6986049620345507, "learning_rate": 2.9088320316445705e-07, "loss": 0.2676, "step": 20143 }, { "epoch": 0.93, "grad_norm": 0.4140355007499485, "learning_rate": 2.905270463790555e-07, "loss": 0.268, "step": 20144 }, { "epoch": 0.93, "grad_norm": 0.41713146953259794, "learning_rate": 2.9017110455202613e-07, "loss": 0.2667, "step": 20145 }, { "epoch": 0.93, "grad_norm": 0.5145376808125004, "learning_rate": 2.8981537769125046e-07, "loss": 0.3782, "step": 20146 }, { "epoch": 0.93, "grad_norm": 0.3236385114088312, "learning_rate": 2.894598658046033e-07, "loss": 0.2034, "step": 20147 }, { "epoch": 0.93, "grad_norm": 0.3428730147587895, "learning_rate": 2.89104568899955e-07, "loss": 0.1651, "step": 20148 }, { "epoch": 0.93, "grad_norm": 0.3867476671522409, "learning_rate": 2.887494869851737e-07, "loss": 0.3001, "step": 20149 }, { "epoch": 0.93, "grad_norm": 0.37520127391224767, "learning_rate": 2.883946200681176e-07, "loss": 0.2112, "step": 20150 }, { "epoch": 0.93, "grad_norm": 1.4413669975833727, "learning_rate": 2.880399681566437e-07, "loss": 0.5489, "step": 20151 }, { "epoch": 0.93, "grad_norm": 1.1805838556041948, "learning_rate": 2.8768553125860577e-07, "loss": 0.6865, "step": 20152 }, { "epoch": 0.93, "grad_norm": 0.35754755360634777, "learning_rate": 2.873313093818486e-07, "loss": 0.184, "step": 20153 }, { "epoch": 0.93, "grad_norm": 0.34890149420800903, "learning_rate": 2.8697730253421595e-07, "loss": 0.2686, "step": 20154 }, { "epoch": 0.93, "grad_norm": 0.36238774177898353, "learning_rate": 2.8662351072354267e-07, "loss": 0.2115, "step": 20155 }, { "epoch": 0.93, "grad_norm": 0.4159066836281099, "learning_rate": 2.8626993395766467e-07, "loss": 0.1725, "step": 20156 }, { "epoch": 0.93, "grad_norm": 0.41600941336656855, "learning_rate": 2.859165722444068e-07, "loss": 0.2851, "step": 20157 }, { "epoch": 0.93, "grad_norm": 0.5413607402466021, "learning_rate": 2.8556342559159513e-07, "loss": 0.3957, "step": 20158 }, { "epoch": 0.93, "grad_norm": 0.6078644735670684, "learning_rate": 2.852104940070455e-07, "loss": 0.3534, "step": 20159 }, { "epoch": 0.93, "grad_norm": 0.38913923473040934, "learning_rate": 2.848577774985717e-07, "loss": 0.2347, "step": 20160 }, { "epoch": 0.93, "grad_norm": 0.3664355380307951, "learning_rate": 2.8450527607398416e-07, "loss": 0.2675, "step": 20161 }, { "epoch": 0.93, "grad_norm": 0.33061116795768813, "learning_rate": 2.8415298974108443e-07, "loss": 0.2325, "step": 20162 }, { "epoch": 0.93, "grad_norm": 0.5090773637124499, "learning_rate": 2.838009185076751e-07, "loss": 0.2171, "step": 20163 }, { "epoch": 0.93, "grad_norm": 1.2690319142176463, "learning_rate": 2.834490623815478e-07, "loss": 0.7513, "step": 20164 }, { "epoch": 0.93, "grad_norm": 0.551514320981443, "learning_rate": 2.830974213704929e-07, "loss": 0.2514, "step": 20165 }, { "epoch": 0.93, "grad_norm": 0.3736412005902798, "learning_rate": 2.827459954822964e-07, "loss": 0.2258, "step": 20166 }, { "epoch": 0.93, "grad_norm": 0.4707505814274513, "learning_rate": 2.823947847247377e-07, "loss": 0.2555, "step": 20167 }, { "epoch": 0.93, "grad_norm": 0.3038351998527382, "learning_rate": 2.820437891055927e-07, "loss": 0.2047, "step": 20168 }, { "epoch": 0.93, "grad_norm": 0.3963720453849057, "learning_rate": 2.8169300863263084e-07, "loss": 0.2082, "step": 20169 }, { "epoch": 0.93, "grad_norm": 0.5413860526503246, "learning_rate": 2.813424433136214e-07, "loss": 0.3642, "step": 20170 }, { "epoch": 0.93, "grad_norm": 0.5995151851571557, "learning_rate": 2.809920931563226e-07, "loss": 0.3172, "step": 20171 }, { "epoch": 0.93, "grad_norm": 0.4619368588640355, "learning_rate": 2.806419581684905e-07, "loss": 0.2902, "step": 20172 }, { "epoch": 0.93, "grad_norm": 0.3488466308094244, "learning_rate": 2.802920383578778e-07, "loss": 0.2324, "step": 20173 }, { "epoch": 0.93, "grad_norm": 0.27232748118659855, "learning_rate": 2.7994233373223155e-07, "loss": 0.1684, "step": 20174 }, { "epoch": 0.93, "grad_norm": 0.40474158641460867, "learning_rate": 2.7959284429929456e-07, "loss": 0.2721, "step": 20175 }, { "epoch": 0.93, "grad_norm": 0.9626553578901039, "learning_rate": 2.792435700668028e-07, "loss": 0.3241, "step": 20176 }, { "epoch": 0.93, "grad_norm": 0.31722322062937275, "learning_rate": 2.78894511042489e-07, "loss": 0.2604, "step": 20177 }, { "epoch": 0.93, "grad_norm": 0.3895468554815851, "learning_rate": 2.785456672340825e-07, "loss": 0.294, "step": 20178 }, { "epoch": 0.93, "grad_norm": 1.4939854993319297, "learning_rate": 2.781970386493049e-07, "loss": 0.2432, "step": 20179 }, { "epoch": 0.93, "grad_norm": 0.2915987221868721, "learning_rate": 2.7784862529587565e-07, "loss": 0.1529, "step": 20180 }, { "epoch": 0.93, "grad_norm": 0.28184409448982767, "learning_rate": 2.7750042718150514e-07, "loss": 0.2585, "step": 20181 }, { "epoch": 0.93, "grad_norm": 0.9899062014939967, "learning_rate": 2.771524443139062e-07, "loss": 0.2794, "step": 20182 }, { "epoch": 0.93, "grad_norm": 0.5350820048618158, "learning_rate": 2.768046767007815e-07, "loss": 0.3114, "step": 20183 }, { "epoch": 0.93, "grad_norm": 0.45072258099149337, "learning_rate": 2.764571243498282e-07, "loss": 0.3151, "step": 20184 }, { "epoch": 0.93, "grad_norm": 0.38721633349725504, "learning_rate": 2.761097872687435e-07, "loss": 0.3144, "step": 20185 }, { "epoch": 0.93, "grad_norm": 0.15391859611414427, "learning_rate": 2.757626654652157e-07, "loss": 0.0697, "step": 20186 }, { "epoch": 0.93, "grad_norm": 0.4129131986024776, "learning_rate": 2.7541575894693194e-07, "loss": 0.2583, "step": 20187 }, { "epoch": 0.93, "grad_norm": 0.6860461859416019, "learning_rate": 2.750690677215684e-07, "loss": 0.3578, "step": 20188 }, { "epoch": 0.93, "grad_norm": 0.29277410857329794, "learning_rate": 2.7472259179680436e-07, "loss": 0.2134, "step": 20189 }, { "epoch": 0.93, "grad_norm": 0.41219622653327004, "learning_rate": 2.7437633118030714e-07, "loss": 0.2876, "step": 20190 }, { "epoch": 0.93, "grad_norm": 1.417225158143789, "learning_rate": 2.74030285879745e-07, "loss": 0.6476, "step": 20191 }, { "epoch": 0.93, "grad_norm": 0.2301404947418482, "learning_rate": 2.736844559027796e-07, "loss": 0.0757, "step": 20192 }, { "epoch": 0.93, "grad_norm": 0.261165745726926, "learning_rate": 2.7333884125706366e-07, "loss": 0.2584, "step": 20193 }, { "epoch": 0.93, "grad_norm": 0.7182975732933482, "learning_rate": 2.729934419502522e-07, "loss": 0.3662, "step": 20194 }, { "epoch": 0.93, "grad_norm": 0.7670753992433383, "learning_rate": 2.726482579899914e-07, "loss": 0.2481, "step": 20195 }, { "epoch": 0.93, "grad_norm": 0.3555808371749825, "learning_rate": 2.723032893839217e-07, "loss": 0.2588, "step": 20196 }, { "epoch": 0.93, "grad_norm": 0.36739059675262153, "learning_rate": 2.719585361396837e-07, "loss": 0.2841, "step": 20197 }, { "epoch": 0.93, "grad_norm": 0.5240409322559296, "learning_rate": 2.7161399826490466e-07, "loss": 0.2057, "step": 20198 }, { "epoch": 0.93, "grad_norm": 0.2661827074728033, "learning_rate": 2.712696757672173e-07, "loss": 0.1756, "step": 20199 }, { "epoch": 0.93, "grad_norm": 0.7452292715715435, "learning_rate": 2.7092556865424335e-07, "loss": 0.3628, "step": 20200 }, { "epoch": 0.93, "grad_norm": 0.3563883628963731, "learning_rate": 2.7058167693359894e-07, "loss": 0.2953, "step": 20201 }, { "epoch": 0.93, "grad_norm": 0.3570397838882156, "learning_rate": 2.702380006128991e-07, "loss": 0.2106, "step": 20202 }, { "epoch": 0.93, "grad_norm": 1.3362907943324822, "learning_rate": 2.69894539699751e-07, "loss": 0.5229, "step": 20203 }, { "epoch": 0.93, "grad_norm": 0.3556723230714628, "learning_rate": 2.6955129420176193e-07, "loss": 0.2353, "step": 20204 }, { "epoch": 0.93, "grad_norm": 0.3091573995403711, "learning_rate": 2.692082641265281e-07, "loss": 0.2184, "step": 20205 }, { "epoch": 0.93, "grad_norm": 0.42567940828249273, "learning_rate": 2.688654494816445e-07, "loss": 0.2699, "step": 20206 }, { "epoch": 0.93, "grad_norm": 0.8022598823523924, "learning_rate": 2.685228502747006e-07, "loss": 0.5746, "step": 20207 }, { "epoch": 0.93, "grad_norm": 0.36396721921117126, "learning_rate": 2.6818046651328143e-07, "loss": 0.2803, "step": 20208 }, { "epoch": 0.93, "grad_norm": 0.46629831862394444, "learning_rate": 2.6783829820496875e-07, "loss": 0.2347, "step": 20209 }, { "epoch": 0.93, "grad_norm": 0.6155483486563386, "learning_rate": 2.6749634535733425e-07, "loss": 0.2873, "step": 20210 }, { "epoch": 0.93, "grad_norm": 0.35869568701384413, "learning_rate": 2.6715460797795077e-07, "loss": 0.255, "step": 20211 }, { "epoch": 0.93, "grad_norm": 0.39641371785188345, "learning_rate": 2.668130860743845e-07, "loss": 0.2253, "step": 20212 }, { "epoch": 0.93, "grad_norm": 0.526819493104097, "learning_rate": 2.664717796541949e-07, "loss": 0.3509, "step": 20213 }, { "epoch": 0.93, "grad_norm": 0.3299100849529229, "learning_rate": 2.661306887249393e-07, "loss": 0.2572, "step": 20214 }, { "epoch": 0.93, "grad_norm": 1.738010369940992, "learning_rate": 2.657898132941661e-07, "loss": 0.1408, "step": 20215 }, { "epoch": 0.93, "grad_norm": 0.5461471972474474, "learning_rate": 2.65449153369427e-07, "loss": 0.2994, "step": 20216 }, { "epoch": 0.93, "grad_norm": 0.2888324069364487, "learning_rate": 2.6510870895826044e-07, "loss": 0.2387, "step": 20217 }, { "epoch": 0.93, "grad_norm": 0.41673286033025686, "learning_rate": 2.64768480068206e-07, "loss": 0.1825, "step": 20218 }, { "epoch": 0.93, "grad_norm": 0.7993532437251042, "learning_rate": 2.6442846670679424e-07, "loss": 0.4802, "step": 20219 }, { "epoch": 0.93, "grad_norm": 0.28531139039427766, "learning_rate": 2.6408866888155024e-07, "loss": 0.2131, "step": 20220 }, { "epoch": 0.93, "grad_norm": 0.4050006639587751, "learning_rate": 2.6374908660000255e-07, "loss": 0.2812, "step": 20221 }, { "epoch": 0.93, "grad_norm": 0.6525850454659222, "learning_rate": 2.634097198696639e-07, "loss": 0.2532, "step": 20222 }, { "epoch": 0.93, "grad_norm": 0.3810252928933606, "learning_rate": 2.630705686980517e-07, "loss": 0.2651, "step": 20223 }, { "epoch": 0.93, "grad_norm": 0.5456574501643089, "learning_rate": 2.6273163309267215e-07, "loss": 0.2559, "step": 20224 }, { "epoch": 0.93, "grad_norm": 0.33147757532260236, "learning_rate": 2.623929130610281e-07, "loss": 0.2553, "step": 20225 }, { "epoch": 0.93, "grad_norm": 0.3694990538687372, "learning_rate": 2.620544086106214e-07, "loss": 0.2574, "step": 20226 }, { "epoch": 0.93, "grad_norm": 0.5833577935788045, "learning_rate": 2.617161197489426e-07, "loss": 0.2971, "step": 20227 }, { "epoch": 0.93, "grad_norm": 0.4601823133389908, "learning_rate": 2.6137804648348475e-07, "loss": 0.2521, "step": 20228 }, { "epoch": 0.93, "grad_norm": 0.30097295006029495, "learning_rate": 2.6104018882173065e-07, "loss": 0.2559, "step": 20229 }, { "epoch": 0.93, "grad_norm": 0.9482623066789242, "learning_rate": 2.6070254677115883e-07, "loss": 0.4963, "step": 20230 }, { "epoch": 0.93, "grad_norm": 0.246937631188175, "learning_rate": 2.603651203392477e-07, "loss": 0.1434, "step": 20231 }, { "epoch": 0.93, "grad_norm": 0.2745141671934632, "learning_rate": 2.600279095334635e-07, "loss": 0.2006, "step": 20232 }, { "epoch": 0.93, "grad_norm": 0.3840054725868079, "learning_rate": 2.596909143612747e-07, "loss": 0.299, "step": 20233 }, { "epoch": 0.93, "grad_norm": 0.6084770027547077, "learning_rate": 2.5935413483014096e-07, "loss": 0.3199, "step": 20234 }, { "epoch": 0.93, "grad_norm": 0.436650169971636, "learning_rate": 2.5901757094751956e-07, "loss": 0.1849, "step": 20235 }, { "epoch": 0.93, "grad_norm": 1.5088172664479822, "learning_rate": 2.5868122272086127e-07, "loss": 0.457, "step": 20236 }, { "epoch": 0.93, "grad_norm": 0.35984480993276813, "learning_rate": 2.58345090157609e-07, "loss": 0.3084, "step": 20237 }, { "epoch": 0.93, "grad_norm": 0.32307510187584904, "learning_rate": 2.5800917326521013e-07, "loss": 0.1801, "step": 20238 }, { "epoch": 0.93, "grad_norm": 0.3737985025641793, "learning_rate": 2.5767347205109763e-07, "loss": 0.1828, "step": 20239 }, { "epoch": 0.93, "grad_norm": 0.400257088207057, "learning_rate": 2.5733798652270435e-07, "loss": 0.2886, "step": 20240 }, { "epoch": 0.93, "grad_norm": 0.31851108753004104, "learning_rate": 2.570027166874578e-07, "loss": 0.1835, "step": 20241 }, { "epoch": 0.93, "grad_norm": 1.3338231288364275, "learning_rate": 2.5666766255278087e-07, "loss": 0.4715, "step": 20242 }, { "epoch": 0.93, "grad_norm": 0.47592708100632153, "learning_rate": 2.5633282412609207e-07, "loss": 0.2862, "step": 20243 }, { "epoch": 0.93, "grad_norm": 0.3154445641298411, "learning_rate": 2.5599820141480326e-07, "loss": 0.1858, "step": 20244 }, { "epoch": 0.93, "grad_norm": 0.30290634283070234, "learning_rate": 2.5566379442632185e-07, "loss": 0.2411, "step": 20245 }, { "epoch": 0.93, "grad_norm": 0.8617087596788787, "learning_rate": 2.55329603168053e-07, "loss": 0.3666, "step": 20246 }, { "epoch": 0.93, "grad_norm": 0.42513575359285893, "learning_rate": 2.549956276473953e-07, "loss": 0.2486, "step": 20247 }, { "epoch": 0.93, "grad_norm": 0.32321571423805295, "learning_rate": 2.546618678717416e-07, "loss": 0.2313, "step": 20248 }, { "epoch": 0.93, "grad_norm": 1.277329839984253, "learning_rate": 2.543283238484806e-07, "loss": 0.6595, "step": 20249 }, { "epoch": 0.93, "grad_norm": 0.4980861580041878, "learning_rate": 2.539949955849985e-07, "loss": 0.2724, "step": 20250 }, { "epoch": 0.93, "grad_norm": 0.4478850520930908, "learning_rate": 2.5366188308867277e-07, "loss": 0.2443, "step": 20251 }, { "epoch": 0.93, "grad_norm": 0.24753836591677975, "learning_rate": 2.5332898636688087e-07, "loss": 0.2269, "step": 20252 }, { "epoch": 0.93, "grad_norm": 0.35801743365078215, "learning_rate": 2.5299630542699015e-07, "loss": 0.2607, "step": 20253 }, { "epoch": 0.93, "grad_norm": 2.044771878249905, "learning_rate": 2.5266384027636705e-07, "loss": 0.2272, "step": 20254 }, { "epoch": 0.93, "grad_norm": 0.8769861360240941, "learning_rate": 2.523315909223723e-07, "loss": 0.5199, "step": 20255 }, { "epoch": 0.93, "grad_norm": 0.3867640874302784, "learning_rate": 2.5199955737236104e-07, "loss": 0.252, "step": 20256 }, { "epoch": 0.93, "grad_norm": 0.3430958669740715, "learning_rate": 2.516677396336842e-07, "loss": 0.2535, "step": 20257 }, { "epoch": 0.93, "grad_norm": 0.39511471586655034, "learning_rate": 2.5133613771368803e-07, "loss": 0.1536, "step": 20258 }, { "epoch": 0.93, "grad_norm": 0.739350940546067, "learning_rate": 2.510047516197134e-07, "loss": 0.3264, "step": 20259 }, { "epoch": 0.93, "grad_norm": 0.32475921453100937, "learning_rate": 2.506735813590988e-07, "loss": 0.2792, "step": 20260 }, { "epoch": 0.93, "grad_norm": 0.4252590542063217, "learning_rate": 2.50342626939174e-07, "loss": 0.2705, "step": 20261 }, { "epoch": 0.93, "grad_norm": 0.4370952043631025, "learning_rate": 2.500118883672653e-07, "loss": 0.2475, "step": 20262 }, { "epoch": 0.93, "grad_norm": 0.5073994312791925, "learning_rate": 2.496813656506969e-07, "loss": 0.3274, "step": 20263 }, { "epoch": 0.93, "grad_norm": 0.2713759143800125, "learning_rate": 2.4935105879678734e-07, "loss": 0.1917, "step": 20264 }, { "epoch": 0.93, "grad_norm": 0.2963030583515216, "learning_rate": 2.4902096781284633e-07, "loss": 0.1758, "step": 20265 }, { "epoch": 0.93, "grad_norm": 0.6415973249817809, "learning_rate": 2.486910927061825e-07, "loss": 0.3741, "step": 20266 }, { "epoch": 0.93, "grad_norm": 0.8155998931265895, "learning_rate": 2.483614334841e-07, "loss": 0.2965, "step": 20267 }, { "epoch": 0.93, "grad_norm": 0.3350300342590162, "learning_rate": 2.4803199015389524e-07, "loss": 0.2624, "step": 20268 }, { "epoch": 0.93, "grad_norm": 0.4055295121337285, "learning_rate": 2.4770276272286563e-07, "loss": 0.3003, "step": 20269 }, { "epoch": 0.93, "grad_norm": 0.30571723614964197, "learning_rate": 2.473737511982954e-07, "loss": 0.0832, "step": 20270 }, { "epoch": 0.93, "grad_norm": 0.43670735212686623, "learning_rate": 2.4704495558747097e-07, "loss": 0.2584, "step": 20271 }, { "epoch": 0.93, "grad_norm": 0.6380377944041562, "learning_rate": 2.467163758976721e-07, "loss": 0.369, "step": 20272 }, { "epoch": 0.93, "grad_norm": 0.35140796119154283, "learning_rate": 2.463880121361717e-07, "loss": 0.2948, "step": 20273 }, { "epoch": 0.93, "grad_norm": 0.3804209734815149, "learning_rate": 2.4605986431024075e-07, "loss": 0.2192, "step": 20274 }, { "epoch": 0.93, "grad_norm": 1.4749005472439265, "learning_rate": 2.4573193242714234e-07, "loss": 0.5619, "step": 20275 }, { "epoch": 0.93, "grad_norm": 0.36173073449424503, "learning_rate": 2.454042164941384e-07, "loss": 0.2341, "step": 20276 }, { "epoch": 0.93, "grad_norm": 0.3229009453146505, "learning_rate": 2.450767165184831e-07, "loss": 0.0794, "step": 20277 }, { "epoch": 0.93, "grad_norm": 0.42162576358308523, "learning_rate": 2.4474943250742734e-07, "loss": 0.3738, "step": 20278 }, { "epoch": 0.93, "grad_norm": 0.6112125037337589, "learning_rate": 2.4442236446821754e-07, "loss": 0.3663, "step": 20279 }, { "epoch": 0.93, "grad_norm": 0.29490886378306325, "learning_rate": 2.4409551240809237e-07, "loss": 0.2105, "step": 20280 }, { "epoch": 0.93, "grad_norm": 0.452786444533277, "learning_rate": 2.437688763342916e-07, "loss": 0.3006, "step": 20281 }, { "epoch": 0.93, "grad_norm": 0.45123505309602124, "learning_rate": 2.4344245625404385e-07, "loss": 0.204, "step": 20282 }, { "epoch": 0.93, "grad_norm": 0.250896575916827, "learning_rate": 2.431162521745778e-07, "loss": 0.1253, "step": 20283 }, { "epoch": 0.93, "grad_norm": 0.31551979535233116, "learning_rate": 2.4279026410311326e-07, "loss": 0.2764, "step": 20284 }, { "epoch": 0.93, "grad_norm": 0.8200551498706821, "learning_rate": 2.424644920468677e-07, "loss": 0.3629, "step": 20285 }, { "epoch": 0.93, "grad_norm": 0.5641014690955516, "learning_rate": 2.421389360130544e-07, "loss": 0.338, "step": 20286 }, { "epoch": 0.93, "grad_norm": 0.4785305909289923, "learning_rate": 2.4181359600887965e-07, "loss": 0.2125, "step": 20287 }, { "epoch": 0.93, "grad_norm": 0.4061423224059129, "learning_rate": 2.414884720415467e-07, "loss": 0.274, "step": 20288 }, { "epoch": 0.93, "grad_norm": 0.30088749255608993, "learning_rate": 2.4116356411825526e-07, "loss": 0.1783, "step": 20289 }, { "epoch": 0.93, "grad_norm": 0.4105270021940145, "learning_rate": 2.4083887224619517e-07, "loss": 0.2309, "step": 20290 }, { "epoch": 0.93, "grad_norm": 0.842164330084923, "learning_rate": 2.4051439643255737e-07, "loss": 0.4385, "step": 20291 }, { "epoch": 0.93, "grad_norm": 0.302655314777836, "learning_rate": 2.4019013668452385e-07, "loss": 0.2484, "step": 20292 }, { "epoch": 0.93, "grad_norm": 0.5277050958918428, "learning_rate": 2.3986609300927443e-07, "loss": 0.2296, "step": 20293 }, { "epoch": 0.93, "grad_norm": 0.3968767535723013, "learning_rate": 2.395422654139834e-07, "loss": 0.1911, "step": 20294 }, { "epoch": 0.93, "grad_norm": 0.39800450306383267, "learning_rate": 2.3921865390581834e-07, "loss": 0.2442, "step": 20295 }, { "epoch": 0.93, "grad_norm": 0.2530313238900536, "learning_rate": 2.3889525849194573e-07, "loss": 0.2302, "step": 20296 }, { "epoch": 0.93, "grad_norm": 0.6657040919509456, "learning_rate": 2.385720791795221e-07, "loss": 0.3626, "step": 20297 }, { "epoch": 0.93, "grad_norm": 0.9596038743778246, "learning_rate": 2.382491159757072e-07, "loss": 0.5399, "step": 20298 }, { "epoch": 0.93, "grad_norm": 0.41118919885413835, "learning_rate": 2.3792636888764653e-07, "loss": 0.2798, "step": 20299 }, { "epoch": 0.93, "grad_norm": 0.46458227413717607, "learning_rate": 2.3760383792248877e-07, "loss": 0.2362, "step": 20300 }, { "epoch": 0.93, "grad_norm": 0.4206024100970382, "learning_rate": 2.372815230873715e-07, "loss": 0.2384, "step": 20301 }, { "epoch": 0.93, "grad_norm": 0.39675777516914296, "learning_rate": 2.3695942438943242e-07, "loss": 0.2776, "step": 20302 }, { "epoch": 0.93, "grad_norm": 0.9036010448243601, "learning_rate": 2.3663754183580246e-07, "loss": 0.3673, "step": 20303 }, { "epoch": 0.93, "grad_norm": 0.3099949562224228, "learning_rate": 2.363158754336059e-07, "loss": 0.2501, "step": 20304 }, { "epoch": 0.93, "grad_norm": 0.3270929693647476, "learning_rate": 2.3599442518996595e-07, "loss": 0.2273, "step": 20305 }, { "epoch": 0.93, "grad_norm": 1.8138154553927484, "learning_rate": 2.3567319111200026e-07, "loss": 0.1846, "step": 20306 }, { "epoch": 0.93, "grad_norm": 0.5444685251749236, "learning_rate": 2.3535217320681757e-07, "loss": 0.2963, "step": 20307 }, { "epoch": 0.93, "grad_norm": 0.32499922480771637, "learning_rate": 2.3503137148152667e-07, "loss": 0.2809, "step": 20308 }, { "epoch": 0.93, "grad_norm": 0.3244533675020009, "learning_rate": 2.347107859432296e-07, "loss": 0.216, "step": 20309 }, { "epoch": 0.93, "grad_norm": 0.38251829464169473, "learning_rate": 2.3439041659902405e-07, "loss": 0.1854, "step": 20310 }, { "epoch": 0.93, "grad_norm": 0.6514739418433115, "learning_rate": 2.3407026345600326e-07, "loss": 0.3373, "step": 20311 }, { "epoch": 0.93, "grad_norm": 0.46085583374294253, "learning_rate": 2.3375032652125262e-07, "loss": 0.3232, "step": 20312 }, { "epoch": 0.93, "grad_norm": 0.7056120353029429, "learning_rate": 2.334306058018587e-07, "loss": 0.2697, "step": 20313 }, { "epoch": 0.93, "grad_norm": 0.3627885529221782, "learning_rate": 2.3311110130489589e-07, "loss": 0.2501, "step": 20314 }, { "epoch": 0.93, "grad_norm": 0.5040363119911151, "learning_rate": 2.3279181303744182e-07, "loss": 0.3626, "step": 20315 }, { "epoch": 0.93, "grad_norm": 0.2784540163617053, "learning_rate": 2.3247274100656192e-07, "loss": 0.1625, "step": 20316 }, { "epoch": 0.93, "grad_norm": 0.28793214363624575, "learning_rate": 2.321538852193228e-07, "loss": 0.2236, "step": 20317 }, { "epoch": 0.93, "grad_norm": 1.0457888910222868, "learning_rate": 2.3183524568278103e-07, "loss": 0.4959, "step": 20318 }, { "epoch": 0.93, "grad_norm": 0.4212363905585375, "learning_rate": 2.315168224039932e-07, "loss": 0.2373, "step": 20319 }, { "epoch": 0.93, "grad_norm": 0.32649739895160507, "learning_rate": 2.311986153900081e-07, "loss": 0.2568, "step": 20320 }, { "epoch": 0.93, "grad_norm": 1.334089284694028, "learning_rate": 2.3088062464786898e-07, "loss": 0.6777, "step": 20321 }, { "epoch": 0.93, "grad_norm": 0.366063592687289, "learning_rate": 2.30562850184618e-07, "loss": 0.1818, "step": 20322 }, { "epoch": 0.93, "grad_norm": 0.3955331787358139, "learning_rate": 2.3024529200728952e-07, "loss": 0.187, "step": 20323 }, { "epoch": 0.93, "grad_norm": 0.3861155678140227, "learning_rate": 2.299279501229146e-07, "loss": 0.2874, "step": 20324 }, { "epoch": 0.93, "grad_norm": 0.6414790965607733, "learning_rate": 2.296108245385187e-07, "loss": 0.3351, "step": 20325 }, { "epoch": 0.93, "grad_norm": 0.3242079721436717, "learning_rate": 2.2929391526112067e-07, "loss": 0.1969, "step": 20326 }, { "epoch": 0.93, "grad_norm": 1.270508341889147, "learning_rate": 2.2897722229773934e-07, "loss": 0.8165, "step": 20327 }, { "epoch": 0.93, "grad_norm": 0.27067775847068815, "learning_rate": 2.2866074565538355e-07, "loss": 0.2275, "step": 20328 }, { "epoch": 0.93, "grad_norm": 0.2784601732216756, "learning_rate": 2.2834448534106322e-07, "loss": 0.1578, "step": 20329 }, { "epoch": 0.93, "grad_norm": 0.6654540134533653, "learning_rate": 2.2802844136177727e-07, "loss": 0.4029, "step": 20330 }, { "epoch": 0.93, "grad_norm": 0.36523997297511285, "learning_rate": 2.2771261372452225e-07, "loss": 0.2815, "step": 20331 }, { "epoch": 0.93, "grad_norm": 0.3874379345787041, "learning_rate": 2.2739700243629258e-07, "loss": 0.1865, "step": 20332 }, { "epoch": 0.93, "grad_norm": 1.2624394491082165, "learning_rate": 2.2708160750407272e-07, "loss": 0.7589, "step": 20333 }, { "epoch": 0.93, "grad_norm": 0.4753703274883254, "learning_rate": 2.2676642893484924e-07, "loss": 0.2531, "step": 20334 }, { "epoch": 0.93, "grad_norm": 0.29742470537833593, "learning_rate": 2.2645146673559548e-07, "loss": 0.222, "step": 20335 }, { "epoch": 0.93, "grad_norm": 0.37956837268024224, "learning_rate": 2.26136720913287e-07, "loss": 0.2525, "step": 20336 }, { "epoch": 0.93, "grad_norm": 1.0212594133599044, "learning_rate": 2.2582219147489148e-07, "loss": 0.3834, "step": 20337 }, { "epoch": 0.93, "grad_norm": 0.3218352834113083, "learning_rate": 2.255078784273712e-07, "loss": 0.2429, "step": 20338 }, { "epoch": 0.93, "grad_norm": 1.2726911456318988, "learning_rate": 2.2519378177768726e-07, "loss": 0.5171, "step": 20339 }, { "epoch": 0.93, "grad_norm": 0.3019517703327587, "learning_rate": 2.248799015327907e-07, "loss": 0.2344, "step": 20340 }, { "epoch": 0.93, "grad_norm": 0.3631963923240562, "learning_rate": 2.245662376996316e-07, "loss": 0.2641, "step": 20341 }, { "epoch": 0.93, "grad_norm": 0.47189501742338735, "learning_rate": 2.2425279028515658e-07, "loss": 0.2008, "step": 20342 }, { "epoch": 0.93, "grad_norm": 0.39569661663070327, "learning_rate": 2.2393955929630006e-07, "loss": 0.2883, "step": 20343 }, { "epoch": 0.93, "grad_norm": 0.38570537432736174, "learning_rate": 2.236265447399999e-07, "loss": 0.2255, "step": 20344 }, { "epoch": 0.93, "grad_norm": 0.5477151484180477, "learning_rate": 2.2331374662318606e-07, "loss": 0.2596, "step": 20345 }, { "epoch": 0.93, "grad_norm": 0.81591999427598, "learning_rate": 2.23001164952783e-07, "loss": 0.3123, "step": 20346 }, { "epoch": 0.93, "grad_norm": 0.38507966022435786, "learning_rate": 2.2268879973571077e-07, "loss": 0.2834, "step": 20347 }, { "epoch": 0.93, "grad_norm": 0.407515416413074, "learning_rate": 2.2237665097888494e-07, "loss": 0.3007, "step": 20348 }, { "epoch": 0.93, "grad_norm": 0.1556462289393866, "learning_rate": 2.2206471868921775e-07, "loss": 0.069, "step": 20349 }, { "epoch": 0.93, "grad_norm": 0.3638398600921348, "learning_rate": 2.2175300287361146e-07, "loss": 0.2723, "step": 20350 }, { "epoch": 0.93, "grad_norm": 0.5305481133851238, "learning_rate": 2.2144150353897053e-07, "loss": 0.4006, "step": 20351 }, { "epoch": 0.93, "grad_norm": 0.5128084600900923, "learning_rate": 2.2113022069218947e-07, "loss": 0.2433, "step": 20352 }, { "epoch": 0.93, "grad_norm": 0.35666864695916084, "learning_rate": 2.2081915434016053e-07, "loss": 0.252, "step": 20353 }, { "epoch": 0.94, "grad_norm": 0.3513070545024936, "learning_rate": 2.2050830448977046e-07, "loss": 0.2015, "step": 20354 }, { "epoch": 0.94, "grad_norm": 0.4797732697225549, "learning_rate": 2.2019767114790037e-07, "loss": 0.2237, "step": 20355 }, { "epoch": 0.94, "grad_norm": 0.4320824685646187, "learning_rate": 2.1988725432142921e-07, "loss": 0.2603, "step": 20356 }, { "epoch": 0.94, "grad_norm": 1.219671450713135, "learning_rate": 2.1957705401722486e-07, "loss": 0.7308, "step": 20357 }, { "epoch": 0.94, "grad_norm": 0.5907908938222902, "learning_rate": 2.1926707024216065e-07, "loss": 0.29, "step": 20358 }, { "epoch": 0.94, "grad_norm": 0.2980162293910632, "learning_rate": 2.1895730300309782e-07, "loss": 0.2547, "step": 20359 }, { "epoch": 0.94, "grad_norm": 0.5376113299389748, "learning_rate": 2.1864775230689082e-07, "loss": 0.3275, "step": 20360 }, { "epoch": 0.94, "grad_norm": 0.3447872746986093, "learning_rate": 2.1833841816039536e-07, "loss": 0.1724, "step": 20361 }, { "epoch": 0.94, "grad_norm": 0.5777005569542517, "learning_rate": 2.1802930057046033e-07, "loss": 0.1709, "step": 20362 }, { "epoch": 0.94, "grad_norm": 0.36678873634119313, "learning_rate": 2.177203995439292e-07, "loss": 0.2942, "step": 20363 }, { "epoch": 0.94, "grad_norm": 0.453671282522417, "learning_rate": 2.174117150876398e-07, "loss": 0.3196, "step": 20364 }, { "epoch": 0.94, "grad_norm": 0.4022097635019398, "learning_rate": 2.1710324720842556e-07, "loss": 0.2013, "step": 20365 }, { "epoch": 0.94, "grad_norm": 0.610924593817953, "learning_rate": 2.167949959131177e-07, "loss": 0.3092, "step": 20366 }, { "epoch": 0.94, "grad_norm": 0.24023013007139188, "learning_rate": 2.1648696120853852e-07, "loss": 0.1977, "step": 20367 }, { "epoch": 0.94, "grad_norm": 0.30836719366336796, "learning_rate": 2.1617914310150923e-07, "loss": 0.1762, "step": 20368 }, { "epoch": 0.94, "grad_norm": 0.8934089653922884, "learning_rate": 2.1587154159884326e-07, "loss": 0.5387, "step": 20369 }, { "epoch": 0.94, "grad_norm": 0.7326153035333582, "learning_rate": 2.1556415670735186e-07, "loss": 0.3405, "step": 20370 }, { "epoch": 0.94, "grad_norm": 0.2687671004486713, "learning_rate": 2.1525698843383957e-07, "loss": 0.2047, "step": 20371 }, { "epoch": 0.94, "grad_norm": 0.6407793777763497, "learning_rate": 2.149500367851065e-07, "loss": 0.3433, "step": 20372 }, { "epoch": 0.94, "grad_norm": 0.24486951353909198, "learning_rate": 2.1464330176795057e-07, "loss": 0.1356, "step": 20373 }, { "epoch": 0.94, "grad_norm": 0.43065552540764385, "learning_rate": 2.1433678338915743e-07, "loss": 0.2612, "step": 20374 }, { "epoch": 0.94, "grad_norm": 0.3642075185994928, "learning_rate": 2.140304816555183e-07, "loss": 0.2396, "step": 20375 }, { "epoch": 0.94, "grad_norm": 0.6808316565385003, "learning_rate": 2.1372439657381339e-07, "loss": 0.3066, "step": 20376 }, { "epoch": 0.94, "grad_norm": 0.3551770542473789, "learning_rate": 2.134185281508161e-07, "loss": 0.2614, "step": 20377 }, { "epoch": 0.94, "grad_norm": 1.4415529381269057, "learning_rate": 2.1311287639330102e-07, "loss": 0.1986, "step": 20378 }, { "epoch": 0.94, "grad_norm": 0.23632459020531968, "learning_rate": 2.1280744130803387e-07, "loss": 0.2085, "step": 20379 }, { "epoch": 0.94, "grad_norm": 0.39953285597734156, "learning_rate": 2.1250222290177813e-07, "loss": 0.241, "step": 20380 }, { "epoch": 0.94, "grad_norm": 0.48470030142612236, "learning_rate": 2.1219722118128838e-07, "loss": 0.2571, "step": 20381 }, { "epoch": 0.94, "grad_norm": 0.706063137222035, "learning_rate": 2.1189243615331923e-07, "loss": 0.3918, "step": 20382 }, { "epoch": 0.94, "grad_norm": 0.3872402879199893, "learning_rate": 2.1158786782461749e-07, "loss": 0.2777, "step": 20383 }, { "epoch": 0.94, "grad_norm": 0.3272993457786425, "learning_rate": 2.1128351620192666e-07, "loss": 0.2237, "step": 20384 }, { "epoch": 0.94, "grad_norm": 0.3221901484105169, "learning_rate": 2.109793812919847e-07, "loss": 0.1791, "step": 20385 }, { "epoch": 0.94, "grad_norm": 0.6111052974105129, "learning_rate": 2.1067546310152287e-07, "loss": 0.3089, "step": 20386 }, { "epoch": 0.94, "grad_norm": 0.27871068665779475, "learning_rate": 2.1037176163727136e-07, "loss": 0.2717, "step": 20387 }, { "epoch": 0.94, "grad_norm": 0.7988840093341497, "learning_rate": 2.1006827690595478e-07, "loss": 0.3091, "step": 20388 }, { "epoch": 0.94, "grad_norm": 0.4735241670265819, "learning_rate": 2.0976500891429107e-07, "loss": 0.2695, "step": 20389 }, { "epoch": 0.94, "grad_norm": 0.6613151062401929, "learning_rate": 2.094619576689938e-07, "loss": 0.3487, "step": 20390 }, { "epoch": 0.94, "grad_norm": 0.2598688411503686, "learning_rate": 2.091591231767709e-07, "loss": 0.1915, "step": 20391 }, { "epoch": 0.94, "grad_norm": 0.3280152449456758, "learning_rate": 2.0885650544433033e-07, "loss": 0.2131, "step": 20392 }, { "epoch": 0.94, "grad_norm": 0.48272306235511014, "learning_rate": 2.0855410447836899e-07, "loss": 0.3169, "step": 20393 }, { "epoch": 0.94, "grad_norm": 1.0209784982127665, "learning_rate": 2.0825192028558373e-07, "loss": 0.4927, "step": 20394 }, { "epoch": 0.94, "grad_norm": 0.28851719200288073, "learning_rate": 2.0794995287266251e-07, "loss": 0.2671, "step": 20395 }, { "epoch": 0.94, "grad_norm": 0.933235717363819, "learning_rate": 2.0764820224629222e-07, "loss": 0.3166, "step": 20396 }, { "epoch": 0.94, "grad_norm": 0.5017730251616959, "learning_rate": 2.073466684131531e-07, "loss": 0.1522, "step": 20397 }, { "epoch": 0.94, "grad_norm": 0.43156852936186113, "learning_rate": 2.0704535137991867e-07, "loss": 0.2671, "step": 20398 }, { "epoch": 0.94, "grad_norm": 0.32873267894150576, "learning_rate": 2.0674425115326357e-07, "loss": 0.2725, "step": 20399 }, { "epoch": 0.94, "grad_norm": 0.9888763073706089, "learning_rate": 2.064433677398514e-07, "loss": 0.5853, "step": 20400 }, { "epoch": 0.94, "grad_norm": 0.4128605356417361, "learning_rate": 2.0614270114634238e-07, "loss": 0.0844, "step": 20401 }, { "epoch": 0.94, "grad_norm": 0.4461166208872364, "learning_rate": 2.0584225137939673e-07, "loss": 0.2971, "step": 20402 }, { "epoch": 0.94, "grad_norm": 0.36416643002486687, "learning_rate": 2.0554201844566246e-07, "loss": 0.2931, "step": 20403 }, { "epoch": 0.94, "grad_norm": 0.6022783532858119, "learning_rate": 2.052420023517887e-07, "loss": 0.1899, "step": 20404 }, { "epoch": 0.94, "grad_norm": 0.3200812607866496, "learning_rate": 2.0494220310441683e-07, "loss": 0.2569, "step": 20405 }, { "epoch": 0.94, "grad_norm": 1.1696846086953894, "learning_rate": 2.0464262071018258e-07, "loss": 0.7248, "step": 20406 }, { "epoch": 0.94, "grad_norm": 0.2392843467032607, "learning_rate": 2.0434325517572185e-07, "loss": 0.1645, "step": 20407 }, { "epoch": 0.94, "grad_norm": 0.5100837631449398, "learning_rate": 2.0404410650765817e-07, "loss": 0.2815, "step": 20408 }, { "epoch": 0.94, "grad_norm": 0.8881724471217609, "learning_rate": 2.0374517471261734e-07, "loss": 0.3767, "step": 20409 }, { "epoch": 0.94, "grad_norm": 0.47059438844974977, "learning_rate": 2.0344645979721632e-07, "loss": 0.2169, "step": 20410 }, { "epoch": 0.94, "grad_norm": 0.3945350936540093, "learning_rate": 2.0314796176806984e-07, "loss": 0.2953, "step": 20411 }, { "epoch": 0.94, "grad_norm": 0.5164100091668331, "learning_rate": 2.0284968063178477e-07, "loss": 0.3876, "step": 20412 }, { "epoch": 0.94, "grad_norm": 0.23973671206336145, "learning_rate": 2.025516163949637e-07, "loss": 0.1405, "step": 20413 }, { "epoch": 0.94, "grad_norm": 0.39926256369913904, "learning_rate": 2.02253769064209e-07, "loss": 0.2164, "step": 20414 }, { "epoch": 0.94, "grad_norm": 0.4069668460569309, "learning_rate": 2.0195613864611108e-07, "loss": 0.2847, "step": 20415 }, { "epoch": 0.94, "grad_norm": 0.6034305812371357, "learning_rate": 2.0165872514726237e-07, "loss": 0.3068, "step": 20416 }, { "epoch": 0.94, "grad_norm": 0.4245702671823467, "learning_rate": 2.013615285742443e-07, "loss": 0.2192, "step": 20417 }, { "epoch": 0.94, "grad_norm": 0.33197027042437155, "learning_rate": 2.0106454893363824e-07, "loss": 0.2693, "step": 20418 }, { "epoch": 0.94, "grad_norm": 0.3099281917365217, "learning_rate": 2.0076778623201898e-07, "loss": 0.2169, "step": 20419 }, { "epoch": 0.94, "grad_norm": 0.36704963176160615, "learning_rate": 2.0047124047595567e-07, "loss": 0.1908, "step": 20420 }, { "epoch": 0.94, "grad_norm": 0.7703200634484844, "learning_rate": 2.001749116720153e-07, "loss": 0.3597, "step": 20421 }, { "epoch": 0.94, "grad_norm": 1.2707164665385928, "learning_rate": 1.9987879982675596e-07, "loss": 0.329, "step": 20422 }, { "epoch": 0.94, "grad_norm": 0.25195888092046703, "learning_rate": 1.995829049467357e-07, "loss": 0.2317, "step": 20423 }, { "epoch": 0.94, "grad_norm": 1.5811561566242762, "learning_rate": 1.992872270385038e-07, "loss": 0.724, "step": 20424 }, { "epoch": 0.94, "grad_norm": 0.3043885414567743, "learning_rate": 1.9899176610860605e-07, "loss": 0.1614, "step": 20425 }, { "epoch": 0.94, "grad_norm": 0.39372796653292413, "learning_rate": 1.9869652216358505e-07, "loss": 0.2715, "step": 20426 }, { "epoch": 0.94, "grad_norm": 0.35851093330294664, "learning_rate": 1.9840149520997552e-07, "loss": 0.2358, "step": 20427 }, { "epoch": 0.94, "grad_norm": 0.957601474558348, "learning_rate": 1.981066852543112e-07, "loss": 0.3854, "step": 20428 }, { "epoch": 0.94, "grad_norm": 0.3932757507110718, "learning_rate": 1.9781209230311682e-07, "loss": 0.2677, "step": 20429 }, { "epoch": 0.94, "grad_norm": 0.5287798956466142, "learning_rate": 1.9751771636291496e-07, "loss": 0.2612, "step": 20430 }, { "epoch": 0.94, "grad_norm": 0.3418548491039063, "learning_rate": 1.972235574402237e-07, "loss": 0.2239, "step": 20431 }, { "epoch": 0.94, "grad_norm": 0.2845129583643551, "learning_rate": 1.9692961554155455e-07, "loss": 0.2113, "step": 20432 }, { "epoch": 0.94, "grad_norm": 0.6648581863837347, "learning_rate": 1.966358906734167e-07, "loss": 0.284, "step": 20433 }, { "epoch": 0.94, "grad_norm": 0.43754425350539133, "learning_rate": 1.9634238284230945e-07, "loss": 0.2885, "step": 20434 }, { "epoch": 0.94, "grad_norm": 0.3433553621456016, "learning_rate": 1.960490920547342e-07, "loss": 0.2884, "step": 20435 }, { "epoch": 0.94, "grad_norm": 1.10506298031863, "learning_rate": 1.957560183171825e-07, "loss": 0.5028, "step": 20436 }, { "epoch": 0.94, "grad_norm": 0.5508027036185996, "learning_rate": 1.9546316163614354e-07, "loss": 0.2761, "step": 20437 }, { "epoch": 0.94, "grad_norm": 0.35337702173589947, "learning_rate": 1.9517052201809994e-07, "loss": 0.2557, "step": 20438 }, { "epoch": 0.94, "grad_norm": 0.31641671055389003, "learning_rate": 1.9487809946953095e-07, "loss": 0.2466, "step": 20439 }, { "epoch": 0.94, "grad_norm": 1.0594179808228281, "learning_rate": 1.945858939969114e-07, "loss": 0.1026, "step": 20440 }, { "epoch": 0.94, "grad_norm": 0.3712443940130986, "learning_rate": 1.9429390560670946e-07, "loss": 0.2781, "step": 20441 }, { "epoch": 0.94, "grad_norm": 0.5788349920310496, "learning_rate": 1.9400213430538773e-07, "loss": 0.3295, "step": 20442 }, { "epoch": 0.94, "grad_norm": 0.44125149317889895, "learning_rate": 1.937105800994099e-07, "loss": 0.2368, "step": 20443 }, { "epoch": 0.94, "grad_norm": 0.38456601402433777, "learning_rate": 1.9341924299522641e-07, "loss": 0.2561, "step": 20444 }, { "epoch": 0.94, "grad_norm": 0.2952617166111256, "learning_rate": 1.9312812299929096e-07, "loss": 0.1808, "step": 20445 }, { "epoch": 0.94, "grad_norm": 0.3828287251667097, "learning_rate": 1.9283722011804616e-07, "loss": 0.2246, "step": 20446 }, { "epoch": 0.94, "grad_norm": 0.3572317357069083, "learning_rate": 1.9254653435793247e-07, "loss": 0.2674, "step": 20447 }, { "epoch": 0.94, "grad_norm": 1.2261124422778444, "learning_rate": 1.9225606572538691e-07, "loss": 0.6444, "step": 20448 }, { "epoch": 0.94, "grad_norm": 0.641795778479192, "learning_rate": 1.9196581422683879e-07, "loss": 0.3487, "step": 20449 }, { "epoch": 0.94, "grad_norm": 0.3160819442364377, "learning_rate": 1.916757798687152e-07, "loss": 0.184, "step": 20450 }, { "epoch": 0.94, "grad_norm": 0.2666931696691146, "learning_rate": 1.9138596265743437e-07, "loss": 0.2108, "step": 20451 }, { "epoch": 0.94, "grad_norm": 0.9001285574396501, "learning_rate": 1.9109636259941665e-07, "loss": 0.4365, "step": 20452 }, { "epoch": 0.94, "grad_norm": 0.3354497451641189, "learning_rate": 1.9080697970107143e-07, "loss": 0.1997, "step": 20453 }, { "epoch": 0.94, "grad_norm": 0.3562307992058924, "learning_rate": 1.9051781396880465e-07, "loss": 0.2761, "step": 20454 }, { "epoch": 0.94, "grad_norm": 0.7190937300166942, "learning_rate": 1.9022886540901896e-07, "loss": 0.3587, "step": 20455 }, { "epoch": 0.94, "grad_norm": 0.33736248086153364, "learning_rate": 1.8994013402811152e-07, "loss": 0.1967, "step": 20456 }, { "epoch": 0.94, "grad_norm": 0.38291055506058985, "learning_rate": 1.8965161983247494e-07, "loss": 0.1826, "step": 20457 }, { "epoch": 0.94, "grad_norm": 0.3634390794393071, "learning_rate": 1.8936332282849524e-07, "loss": 0.2942, "step": 20458 }, { "epoch": 0.94, "grad_norm": 0.3134220166612323, "learning_rate": 1.890752430225573e-07, "loss": 0.1992, "step": 20459 }, { "epoch": 0.94, "grad_norm": 0.7358527077854286, "learning_rate": 1.8878738042103717e-07, "loss": 0.4177, "step": 20460 }, { "epoch": 0.94, "grad_norm": 0.8232860449028477, "learning_rate": 1.884997350303075e-07, "loss": 0.4002, "step": 20461 }, { "epoch": 0.94, "grad_norm": 0.34073203080387404, "learning_rate": 1.8821230685673763e-07, "loss": 0.2539, "step": 20462 }, { "epoch": 0.94, "grad_norm": 0.3773305505852029, "learning_rate": 1.8792509590669028e-07, "loss": 0.2166, "step": 20463 }, { "epoch": 0.94, "grad_norm": 0.3947361355671965, "learning_rate": 1.8763810218652478e-07, "loss": 0.2126, "step": 20464 }, { "epoch": 0.94, "grad_norm": 0.32581076371949746, "learning_rate": 1.8735132570259497e-07, "loss": 0.259, "step": 20465 }, { "epoch": 0.94, "grad_norm": 0.45798812597979743, "learning_rate": 1.8706476646124794e-07, "loss": 0.2512, "step": 20466 }, { "epoch": 0.94, "grad_norm": 0.5369868188462893, "learning_rate": 1.8677842446883087e-07, "loss": 0.3506, "step": 20467 }, { "epoch": 0.94, "grad_norm": 0.4337113445391564, "learning_rate": 1.864922997316787e-07, "loss": 0.255, "step": 20468 }, { "epoch": 0.94, "grad_norm": 0.3409451569285432, "learning_rate": 1.8620639225613078e-07, "loss": 0.1286, "step": 20469 }, { "epoch": 0.94, "grad_norm": 0.3700127269553743, "learning_rate": 1.8592070204851542e-07, "loss": 0.2946, "step": 20470 }, { "epoch": 0.94, "grad_norm": 0.4262934698041841, "learning_rate": 1.856352291151553e-07, "loss": 0.2502, "step": 20471 }, { "epoch": 0.94, "grad_norm": 0.5591357314816566, "learning_rate": 1.8534997346237094e-07, "loss": 0.2649, "step": 20472 }, { "epoch": 0.94, "grad_norm": 1.3081355861364123, "learning_rate": 1.850649350964806e-07, "loss": 0.6253, "step": 20473 }, { "epoch": 0.94, "grad_norm": 0.34105478965163877, "learning_rate": 1.8478011402379258e-07, "loss": 0.2518, "step": 20474 }, { "epoch": 0.94, "grad_norm": 0.4159186954680078, "learning_rate": 1.8449551025061186e-07, "loss": 0.2761, "step": 20475 }, { "epoch": 0.94, "grad_norm": 0.1739731628125064, "learning_rate": 1.8421112378324113e-07, "loss": 0.0918, "step": 20476 }, { "epoch": 0.94, "grad_norm": 0.3551346296474776, "learning_rate": 1.8392695462797537e-07, "loss": 0.2786, "step": 20477 }, { "epoch": 0.94, "grad_norm": 0.4849899641672736, "learning_rate": 1.8364300279110514e-07, "loss": 0.3308, "step": 20478 }, { "epoch": 0.94, "grad_norm": 0.5578742994492052, "learning_rate": 1.833592682789187e-07, "loss": 0.2628, "step": 20479 }, { "epoch": 0.94, "grad_norm": 0.41016988027942997, "learning_rate": 1.830757510976966e-07, "loss": 0.2546, "step": 20480 }, { "epoch": 0.94, "grad_norm": 1.9021470652297021, "learning_rate": 1.827924512537149e-07, "loss": 0.5034, "step": 20481 }, { "epoch": 0.94, "grad_norm": 0.2110853031075331, "learning_rate": 1.8250936875324755e-07, "loss": 0.1657, "step": 20482 }, { "epoch": 0.94, "grad_norm": 0.3452904691240392, "learning_rate": 1.8222650360255944e-07, "loss": 0.2515, "step": 20483 }, { "epoch": 0.94, "grad_norm": 0.6430167247263239, "learning_rate": 1.8194385580791562e-07, "loss": 0.3735, "step": 20484 }, { "epoch": 0.94, "grad_norm": 0.5487244406749696, "learning_rate": 1.8166142537556997e-07, "loss": 0.2702, "step": 20485 }, { "epoch": 0.94, "grad_norm": 0.43554094012383743, "learning_rate": 1.8137921231177856e-07, "loss": 0.2786, "step": 20486 }, { "epoch": 0.94, "grad_norm": 0.44008215794976246, "learning_rate": 1.8109721662278755e-07, "loss": 0.2807, "step": 20487 }, { "epoch": 0.94, "grad_norm": 0.4872874386164632, "learning_rate": 1.8081543831484082e-07, "loss": 0.2695, "step": 20488 }, { "epoch": 0.94, "grad_norm": 0.48281113191000186, "learning_rate": 1.8053387739417782e-07, "loss": 0.1935, "step": 20489 }, { "epoch": 0.94, "grad_norm": 0.3061508092708022, "learning_rate": 1.80252533867028e-07, "loss": 0.2823, "step": 20490 }, { "epoch": 0.94, "grad_norm": 0.4860581173143456, "learning_rate": 1.7997140773962418e-07, "loss": 0.3248, "step": 20491 }, { "epoch": 0.94, "grad_norm": 0.37810209978247084, "learning_rate": 1.7969049901818913e-07, "loss": 0.1651, "step": 20492 }, { "epoch": 0.94, "grad_norm": 0.648351473682077, "learning_rate": 1.7940980770894122e-07, "loss": 0.3368, "step": 20493 }, { "epoch": 0.94, "grad_norm": 0.38140417822583, "learning_rate": 1.791293338180944e-07, "loss": 0.2965, "step": 20494 }, { "epoch": 0.94, "grad_norm": 0.3228661667115769, "learning_rate": 1.7884907735185807e-07, "loss": 0.2171, "step": 20495 }, { "epoch": 0.94, "grad_norm": 0.8766083083615784, "learning_rate": 1.7856903831643957e-07, "loss": 0.5052, "step": 20496 }, { "epoch": 0.94, "grad_norm": 0.3164946816302706, "learning_rate": 1.78289216718035e-07, "loss": 0.1667, "step": 20497 }, { "epoch": 0.94, "grad_norm": 0.26628472415444915, "learning_rate": 1.7800961256284054e-07, "loss": 0.2126, "step": 20498 }, { "epoch": 0.94, "grad_norm": 1.6943038042317498, "learning_rate": 1.777302258570479e-07, "loss": 0.4154, "step": 20499 }, { "epoch": 0.94, "grad_norm": 0.6635018843264386, "learning_rate": 1.7745105660683993e-07, "loss": 0.3697, "step": 20500 }, { "epoch": 0.94, "grad_norm": 0.4245086142165088, "learning_rate": 1.7717210481839942e-07, "loss": 0.2907, "step": 20501 }, { "epoch": 0.94, "grad_norm": 0.30697746514597013, "learning_rate": 1.7689337049790035e-07, "loss": 0.2381, "step": 20502 }, { "epoch": 0.94, "grad_norm": 0.26114647542628233, "learning_rate": 1.7661485365151553e-07, "loss": 0.1643, "step": 20503 }, { "epoch": 0.94, "grad_norm": 0.6409470330149446, "learning_rate": 1.7633655428540897e-07, "loss": 0.3168, "step": 20504 }, { "epoch": 0.94, "grad_norm": 0.4231173436237854, "learning_rate": 1.7605847240574346e-07, "loss": 0.2184, "step": 20505 }, { "epoch": 0.94, "grad_norm": 0.38606656324697736, "learning_rate": 1.7578060801867524e-07, "loss": 0.307, "step": 20506 }, { "epoch": 0.94, "grad_norm": 0.6954259545403917, "learning_rate": 1.7550296113035493e-07, "loss": 0.3573, "step": 20507 }, { "epoch": 0.94, "grad_norm": 0.39662291682772427, "learning_rate": 1.7522553174693091e-07, "loss": 0.2485, "step": 20508 }, { "epoch": 0.94, "grad_norm": 0.24883322856511075, "learning_rate": 1.7494831987454276e-07, "loss": 0.1795, "step": 20509 }, { "epoch": 0.94, "grad_norm": 0.4238716187523525, "learning_rate": 1.7467132551933107e-07, "loss": 0.2928, "step": 20510 }, { "epoch": 0.94, "grad_norm": 0.4501244271365935, "learning_rate": 1.7439454868742544e-07, "loss": 0.222, "step": 20511 }, { "epoch": 0.94, "grad_norm": 0.9020058530950563, "learning_rate": 1.741179893849554e-07, "loss": 0.4087, "step": 20512 }, { "epoch": 0.94, "grad_norm": 0.6367162663365894, "learning_rate": 1.7384164761804266e-07, "loss": 0.368, "step": 20513 }, { "epoch": 0.94, "grad_norm": 0.2709768499484475, "learning_rate": 1.7356552339280353e-07, "loss": 0.266, "step": 20514 }, { "epoch": 0.94, "grad_norm": 0.3725812762441721, "learning_rate": 1.7328961671535415e-07, "loss": 0.1006, "step": 20515 }, { "epoch": 0.94, "grad_norm": 0.40369228025531934, "learning_rate": 1.730139275918019e-07, "loss": 0.2427, "step": 20516 }, { "epoch": 0.94, "grad_norm": 0.6043181322799305, "learning_rate": 1.7273845602824967e-07, "loss": 0.3097, "step": 20517 }, { "epoch": 0.94, "grad_norm": 0.3852209302611917, "learning_rate": 1.7246320203079702e-07, "loss": 0.2527, "step": 20518 }, { "epoch": 0.94, "grad_norm": 0.4912208421506404, "learning_rate": 1.7218816560553575e-07, "loss": 0.2826, "step": 20519 }, { "epoch": 0.94, "grad_norm": 0.5695267983866613, "learning_rate": 1.7191334675855654e-07, "loss": 0.3805, "step": 20520 }, { "epoch": 0.94, "grad_norm": 0.2362732118700907, "learning_rate": 1.716387454959434e-07, "loss": 0.1397, "step": 20521 }, { "epoch": 0.94, "grad_norm": 0.49685712246660824, "learning_rate": 1.7136436182377703e-07, "loss": 0.2723, "step": 20522 }, { "epoch": 0.94, "grad_norm": 0.371983856161379, "learning_rate": 1.7109019574812925e-07, "loss": 0.2828, "step": 20523 }, { "epoch": 0.94, "grad_norm": 0.8721997845298104, "learning_rate": 1.7081624727507184e-07, "loss": 0.2965, "step": 20524 }, { "epoch": 0.94, "grad_norm": 0.6810482582357328, "learning_rate": 1.7054251641066999e-07, "loss": 0.4024, "step": 20525 }, { "epoch": 0.94, "grad_norm": 0.2666340310350867, "learning_rate": 1.7026900316098217e-07, "loss": 0.2765, "step": 20526 }, { "epoch": 0.94, "grad_norm": 0.6172578138634932, "learning_rate": 1.6999570753206574e-07, "loss": 0.2732, "step": 20527 }, { "epoch": 0.94, "grad_norm": 0.5385625859111121, "learning_rate": 1.6972262952996921e-07, "loss": 0.1998, "step": 20528 }, { "epoch": 0.94, "grad_norm": 0.4552642022317783, "learning_rate": 1.6944976916073774e-07, "loss": 0.2271, "step": 20529 }, { "epoch": 0.94, "grad_norm": 0.380535445678306, "learning_rate": 1.6917712643041539e-07, "loss": 0.2745, "step": 20530 }, { "epoch": 0.94, "grad_norm": 0.5232590610561363, "learning_rate": 1.6890470134503621e-07, "loss": 0.1596, "step": 20531 }, { "epoch": 0.94, "grad_norm": 0.4115750124789743, "learning_rate": 1.6863249391063097e-07, "loss": 0.3071, "step": 20532 }, { "epoch": 0.94, "grad_norm": 0.698334630770339, "learning_rate": 1.6836050413322702e-07, "loss": 0.227, "step": 20533 }, { "epoch": 0.94, "grad_norm": 0.30837970071372506, "learning_rate": 1.6808873201884624e-07, "loss": 0.217, "step": 20534 }, { "epoch": 0.94, "grad_norm": 0.4275719880438468, "learning_rate": 1.6781717757350492e-07, "loss": 0.2927, "step": 20535 }, { "epoch": 0.94, "grad_norm": 0.5217407989372366, "learning_rate": 1.675458408032138e-07, "loss": 0.2472, "step": 20536 }, { "epoch": 0.94, "grad_norm": 0.30058818967363676, "learning_rate": 1.672747217139814e-07, "loss": 0.2146, "step": 20537 }, { "epoch": 0.94, "grad_norm": 0.42278227502314064, "learning_rate": 1.6700382031180962e-07, "loss": 0.348, "step": 20538 }, { "epoch": 0.94, "grad_norm": 1.0815819135430929, "learning_rate": 1.6673313660269695e-07, "loss": 0.4834, "step": 20539 }, { "epoch": 0.94, "grad_norm": 0.6030929184570383, "learning_rate": 1.6646267059263422e-07, "loss": 0.3081, "step": 20540 }, { "epoch": 0.94, "grad_norm": 0.2591730134648022, "learning_rate": 1.661924222876099e-07, "loss": 0.1626, "step": 20541 }, { "epoch": 0.94, "grad_norm": 0.2859069549167505, "learning_rate": 1.6592239169360924e-07, "loss": 0.2332, "step": 20542 }, { "epoch": 0.94, "grad_norm": 1.299930481311688, "learning_rate": 1.6565257881660746e-07, "loss": 0.7919, "step": 20543 }, { "epoch": 0.94, "grad_norm": 0.35311503431033286, "learning_rate": 1.6538298366257975e-07, "loss": 0.1959, "step": 20544 }, { "epoch": 0.94, "grad_norm": 0.48688598421538504, "learning_rate": 1.6511360623749362e-07, "loss": 0.3158, "step": 20545 }, { "epoch": 0.94, "grad_norm": 0.43124338825691466, "learning_rate": 1.6484444654731203e-07, "loss": 0.2964, "step": 20546 }, { "epoch": 0.94, "grad_norm": 0.2523231195951359, "learning_rate": 1.6457550459799687e-07, "loss": 0.1541, "step": 20547 }, { "epoch": 0.94, "grad_norm": 0.45320137850318426, "learning_rate": 1.6430678039550008e-07, "loss": 0.2094, "step": 20548 }, { "epoch": 0.94, "grad_norm": 0.4281758057601902, "learning_rate": 1.6403827394577244e-07, "loss": 0.2951, "step": 20549 }, { "epoch": 0.94, "grad_norm": 0.309488784806853, "learning_rate": 1.6376998525475473e-07, "loss": 0.2224, "step": 20550 }, { "epoch": 0.94, "grad_norm": 0.7905387940721017, "learning_rate": 1.6350191432839114e-07, "loss": 0.3674, "step": 20551 }, { "epoch": 0.94, "grad_norm": 0.663994398977876, "learning_rate": 1.6323406117261465e-07, "loss": 0.3309, "step": 20552 }, { "epoch": 0.94, "grad_norm": 0.37766137762842583, "learning_rate": 1.6296642579335497e-07, "loss": 0.2775, "step": 20553 }, { "epoch": 0.94, "grad_norm": 0.2269788111243829, "learning_rate": 1.626990081965374e-07, "loss": 0.1611, "step": 20554 }, { "epoch": 0.94, "grad_norm": 0.852158449880713, "learning_rate": 1.624318083880827e-07, "loss": 0.5495, "step": 20555 }, { "epoch": 0.94, "grad_norm": 0.41235650281134567, "learning_rate": 1.6216482637390618e-07, "loss": 0.291, "step": 20556 }, { "epoch": 0.94, "grad_norm": 0.3851375127426412, "learning_rate": 1.6189806215991865e-07, "loss": 0.2466, "step": 20557 }, { "epoch": 0.94, "grad_norm": 0.7487880501146439, "learning_rate": 1.616315157520254e-07, "loss": 0.4068, "step": 20558 }, { "epoch": 0.94, "grad_norm": 0.3755536542120205, "learning_rate": 1.6136518715612837e-07, "loss": 0.2644, "step": 20559 }, { "epoch": 0.94, "grad_norm": 0.28798944690432743, "learning_rate": 1.6109907637812283e-07, "loss": 0.0902, "step": 20560 }, { "epoch": 0.94, "grad_norm": 0.38107512472268873, "learning_rate": 1.6083318342390298e-07, "loss": 0.2967, "step": 20561 }, { "epoch": 0.94, "grad_norm": 0.3529802466242184, "learning_rate": 1.6056750829935076e-07, "loss": 0.2581, "step": 20562 }, { "epoch": 0.94, "grad_norm": 0.6626733160826997, "learning_rate": 1.6030205101035146e-07, "loss": 0.3622, "step": 20563 }, { "epoch": 0.94, "grad_norm": 2.0577084282109124, "learning_rate": 1.600368115627826e-07, "loss": 0.1926, "step": 20564 }, { "epoch": 0.94, "grad_norm": 0.3023344382331097, "learning_rate": 1.5977178996251285e-07, "loss": 0.2512, "step": 20565 }, { "epoch": 0.94, "grad_norm": 0.3615232549958181, "learning_rate": 1.5950698621541305e-07, "loss": 0.2347, "step": 20566 }, { "epoch": 0.94, "grad_norm": 0.49178067681717913, "learning_rate": 1.5924240032734296e-07, "loss": 0.1325, "step": 20567 }, { "epoch": 0.94, "grad_norm": 0.4025581359812394, "learning_rate": 1.589780323041623e-07, "loss": 0.2734, "step": 20568 }, { "epoch": 0.94, "grad_norm": 0.4933478701809438, "learning_rate": 1.5871388215172202e-07, "loss": 0.3347, "step": 20569 }, { "epoch": 0.94, "grad_norm": 0.4630282346162931, "learning_rate": 1.5844994987587293e-07, "loss": 0.2065, "step": 20570 }, { "epoch": 0.95, "grad_norm": 0.40562905503907637, "learning_rate": 1.5818623548245482e-07, "loss": 0.2678, "step": 20571 }, { "epoch": 0.95, "grad_norm": 0.2776803886630753, "learning_rate": 1.5792273897730858e-07, "loss": 0.1698, "step": 20572 }, { "epoch": 0.95, "grad_norm": 0.36350197279380075, "learning_rate": 1.5765946036626734e-07, "loss": 0.2484, "step": 20573 }, { "epoch": 0.95, "grad_norm": 0.38383679749533844, "learning_rate": 1.5739639965515863e-07, "loss": 0.2664, "step": 20574 }, { "epoch": 0.95, "grad_norm": 0.7057024799790953, "learning_rate": 1.571335568498078e-07, "loss": 0.3588, "step": 20575 }, { "epoch": 0.95, "grad_norm": 1.753413046148868, "learning_rate": 1.5687093195603353e-07, "loss": 0.4749, "step": 20576 }, { "epoch": 0.95, "grad_norm": 0.3064426030160529, "learning_rate": 1.5660852497965008e-07, "loss": 0.1812, "step": 20577 }, { "epoch": 0.95, "grad_norm": 0.23913216714039906, "learning_rate": 1.563463359264661e-07, "loss": 0.205, "step": 20578 }, { "epoch": 0.95, "grad_norm": 0.8204179766454661, "learning_rate": 1.5608436480228696e-07, "loss": 0.3715, "step": 20579 }, { "epoch": 0.95, "grad_norm": 0.3577960960543862, "learning_rate": 1.5582261161291246e-07, "loss": 0.2183, "step": 20580 }, { "epoch": 0.95, "grad_norm": 0.3716783122079666, "learning_rate": 1.5556107636413685e-07, "loss": 0.3227, "step": 20581 }, { "epoch": 0.95, "grad_norm": 1.5209756670900243, "learning_rate": 1.552997590617511e-07, "loss": 0.6691, "step": 20582 }, { "epoch": 0.95, "grad_norm": 0.3473830868594975, "learning_rate": 1.5503865971154052e-07, "loss": 0.1888, "step": 20583 }, { "epoch": 0.95, "grad_norm": 0.6784745063417846, "learning_rate": 1.5477777831928497e-07, "loss": 0.2514, "step": 20584 }, { "epoch": 0.95, "grad_norm": 0.3620669425077828, "learning_rate": 1.5451711489076094e-07, "loss": 0.2963, "step": 20585 }, { "epoch": 0.95, "grad_norm": 0.33849070350144034, "learning_rate": 1.5425666943173822e-07, "loss": 0.2119, "step": 20586 }, { "epoch": 0.95, "grad_norm": 1.397768748868032, "learning_rate": 1.5399644194798335e-07, "loss": 0.7126, "step": 20587 }, { "epoch": 0.95, "grad_norm": 0.47104127197102663, "learning_rate": 1.5373643244525838e-07, "loss": 0.2108, "step": 20588 }, { "epoch": 0.95, "grad_norm": 0.44504718512374813, "learning_rate": 1.5347664092931758e-07, "loss": 0.2428, "step": 20589 }, { "epoch": 0.95, "grad_norm": 0.3614696211523254, "learning_rate": 1.5321706740591525e-07, "loss": 0.2223, "step": 20590 }, { "epoch": 0.95, "grad_norm": 0.6075341928077955, "learning_rate": 1.5295771188079568e-07, "loss": 0.3516, "step": 20591 }, { "epoch": 0.95, "grad_norm": 0.4267960389882433, "learning_rate": 1.5269857435970093e-07, "loss": 0.2854, "step": 20592 }, { "epoch": 0.95, "grad_norm": 0.2935933744480745, "learning_rate": 1.5243965484837086e-07, "loss": 0.2481, "step": 20593 }, { "epoch": 0.95, "grad_norm": 0.31129415850020214, "learning_rate": 1.5218095335253423e-07, "loss": 0.1579, "step": 20594 }, { "epoch": 0.95, "grad_norm": 0.4256009005006297, "learning_rate": 1.519224698779198e-07, "loss": 0.2764, "step": 20595 }, { "epoch": 0.95, "grad_norm": 0.5257367849838214, "learning_rate": 1.5166420443025076e-07, "loss": 0.2458, "step": 20596 }, { "epoch": 0.95, "grad_norm": 0.3465059048756922, "learning_rate": 1.5140615701524364e-07, "loss": 0.2761, "step": 20597 }, { "epoch": 0.95, "grad_norm": 0.537112871215346, "learning_rate": 1.5114832763861164e-07, "loss": 0.2656, "step": 20598 }, { "epoch": 0.95, "grad_norm": 0.22901007055540692, "learning_rate": 1.5089071630606466e-07, "loss": 0.1469, "step": 20599 }, { "epoch": 0.95, "grad_norm": 1.6622188424882447, "learning_rate": 1.5063332302330368e-07, "loss": 0.5666, "step": 20600 }, { "epoch": 0.95, "grad_norm": 0.2876046519019182, "learning_rate": 1.5037614779602748e-07, "loss": 0.2321, "step": 20601 }, { "epoch": 0.95, "grad_norm": 0.5065872005160751, "learning_rate": 1.5011919062993152e-07, "loss": 0.3089, "step": 20602 }, { "epoch": 0.95, "grad_norm": 0.6682465626191572, "learning_rate": 1.4986245153070234e-07, "loss": 0.275, "step": 20603 }, { "epoch": 0.95, "grad_norm": 0.32859676915869407, "learning_rate": 1.4960593050402537e-07, "loss": 0.2864, "step": 20604 }, { "epoch": 0.95, "grad_norm": 0.5342556839934085, "learning_rate": 1.4934962755557835e-07, "loss": 0.363, "step": 20605 }, { "epoch": 0.95, "grad_norm": 0.3278569763690953, "learning_rate": 1.4909354269103672e-07, "loss": 0.1507, "step": 20606 }, { "epoch": 0.95, "grad_norm": 0.446644126715135, "learning_rate": 1.4883767591606924e-07, "loss": 0.244, "step": 20607 }, { "epoch": 0.95, "grad_norm": 0.544534397646364, "learning_rate": 1.485820272363414e-07, "loss": 0.2862, "step": 20608 }, { "epoch": 0.95, "grad_norm": 0.37654454237145846, "learning_rate": 1.4832659665751316e-07, "loss": 0.258, "step": 20609 }, { "epoch": 0.95, "grad_norm": 0.45323773736872014, "learning_rate": 1.480713841852377e-07, "loss": 0.2686, "step": 20610 }, { "epoch": 0.95, "grad_norm": 0.5941862624360262, "learning_rate": 1.4781638982516723e-07, "loss": 0.3929, "step": 20611 }, { "epoch": 0.95, "grad_norm": 0.2155394146196827, "learning_rate": 1.4756161358294608e-07, "loss": 0.1583, "step": 20612 }, { "epoch": 0.95, "grad_norm": 0.4154579073962875, "learning_rate": 1.4730705546421532e-07, "loss": 0.2533, "step": 20613 }, { "epoch": 0.95, "grad_norm": 0.5089990303570201, "learning_rate": 1.470527154746093e-07, "loss": 0.3154, "step": 20614 }, { "epoch": 0.95, "grad_norm": 1.49968892730712, "learning_rate": 1.4679859361975913e-07, "loss": 0.4487, "step": 20615 }, { "epoch": 0.95, "grad_norm": 0.37013722902406837, "learning_rate": 1.4654468990529357e-07, "loss": 0.21, "step": 20616 }, { "epoch": 0.95, "grad_norm": 0.3397656695057043, "learning_rate": 1.4629100433683042e-07, "loss": 0.2776, "step": 20617 }, { "epoch": 0.95, "grad_norm": 0.3819614158423416, "learning_rate": 1.4603753691998735e-07, "loss": 0.1724, "step": 20618 }, { "epoch": 0.95, "grad_norm": 0.3867415984829305, "learning_rate": 1.4578428766037654e-07, "loss": 0.1878, "step": 20619 }, { "epoch": 0.95, "grad_norm": 0.5483608516327109, "learning_rate": 1.4553125656360245e-07, "loss": 0.3424, "step": 20620 }, { "epoch": 0.95, "grad_norm": 0.4296049875352283, "learning_rate": 1.4527844363527056e-07, "loss": 0.3065, "step": 20621 }, { "epoch": 0.95, "grad_norm": 0.3240333291506084, "learning_rate": 1.4502584888097416e-07, "loss": 0.208, "step": 20622 }, { "epoch": 0.95, "grad_norm": 1.4150929266549446, "learning_rate": 1.4477347230630767e-07, "loss": 0.7787, "step": 20623 }, { "epoch": 0.95, "grad_norm": 0.3838270226282027, "learning_rate": 1.4452131391685776e-07, "loss": 0.2724, "step": 20624 }, { "epoch": 0.95, "grad_norm": 0.3372114949609897, "learning_rate": 1.4426937371820772e-07, "loss": 0.2132, "step": 20625 }, { "epoch": 0.95, "grad_norm": 0.3686484191851059, "learning_rate": 1.4401765171593418e-07, "loss": 0.2695, "step": 20626 }, { "epoch": 0.95, "grad_norm": 1.3768605775936185, "learning_rate": 1.437661479156094e-07, "loss": 0.5532, "step": 20627 }, { "epoch": 0.95, "grad_norm": 0.8457937336742294, "learning_rate": 1.4351486232280442e-07, "loss": 0.4219, "step": 20628 }, { "epoch": 0.95, "grad_norm": 0.2697674485887778, "learning_rate": 1.432637949430804e-07, "loss": 0.2113, "step": 20629 }, { "epoch": 0.95, "grad_norm": 0.6791989645071949, "learning_rate": 1.4301294578199508e-07, "loss": 0.3447, "step": 20630 }, { "epoch": 0.95, "grad_norm": 0.5945166450743877, "learning_rate": 1.4276231484510295e-07, "loss": 0.3097, "step": 20631 }, { "epoch": 0.95, "grad_norm": 0.23192793431062594, "learning_rate": 1.4251190213795286e-07, "loss": 0.1518, "step": 20632 }, { "epoch": 0.95, "grad_norm": 0.3688595092365043, "learning_rate": 1.4226170766608927e-07, "loss": 0.2828, "step": 20633 }, { "epoch": 0.95, "grad_norm": 0.606395385121721, "learning_rate": 1.4201173143504887e-07, "loss": 0.3729, "step": 20634 }, { "epoch": 0.95, "grad_norm": 0.42974434574059384, "learning_rate": 1.4176197345036835e-07, "loss": 0.2249, "step": 20635 }, { "epoch": 0.95, "grad_norm": 0.509120063420832, "learning_rate": 1.4151243371757663e-07, "loss": 0.3111, "step": 20636 }, { "epoch": 0.95, "grad_norm": 0.42286792034774806, "learning_rate": 1.4126311224219702e-07, "loss": 0.2814, "step": 20637 }, { "epoch": 0.95, "grad_norm": 0.30655206484798125, "learning_rate": 1.410140090297507e-07, "loss": 0.172, "step": 20638 }, { "epoch": 0.95, "grad_norm": 0.5224652183597427, "learning_rate": 1.4076512408575216e-07, "loss": 0.2152, "step": 20639 }, { "epoch": 0.95, "grad_norm": 0.4291925973995371, "learning_rate": 1.405164574157103e-07, "loss": 0.302, "step": 20640 }, { "epoch": 0.95, "grad_norm": 0.35032888082878083, "learning_rate": 1.4026800902513293e-07, "loss": 0.2827, "step": 20641 }, { "epoch": 0.95, "grad_norm": 0.6445050792114264, "learning_rate": 1.400197789195179e-07, "loss": 0.2656, "step": 20642 }, { "epoch": 0.95, "grad_norm": 0.4564393594952866, "learning_rate": 1.3977176710436191e-07, "loss": 0.3001, "step": 20643 }, { "epoch": 0.95, "grad_norm": 0.27278780212702575, "learning_rate": 1.39523973585155e-07, "loss": 0.1968, "step": 20644 }, { "epoch": 0.95, "grad_norm": 0.3955634121940631, "learning_rate": 1.3927639836738505e-07, "loss": 0.2632, "step": 20645 }, { "epoch": 0.95, "grad_norm": 0.9021953705898903, "learning_rate": 1.3902904145653094e-07, "loss": 0.417, "step": 20646 }, { "epoch": 0.95, "grad_norm": 0.35996958783814886, "learning_rate": 1.3878190285807057e-07, "loss": 0.2474, "step": 20647 }, { "epoch": 0.95, "grad_norm": 0.338763963002527, "learning_rate": 1.385349825774729e-07, "loss": 0.2542, "step": 20648 }, { "epoch": 0.95, "grad_norm": 0.9139617686952466, "learning_rate": 1.3828828062020683e-07, "loss": 0.418, "step": 20649 }, { "epoch": 0.95, "grad_norm": 0.2717189498012119, "learning_rate": 1.3804179699173358e-07, "loss": 0.203, "step": 20650 }, { "epoch": 0.95, "grad_norm": 0.2615893599229896, "learning_rate": 1.3779553169750992e-07, "loss": 0.1196, "step": 20651 }, { "epoch": 0.95, "grad_norm": 0.38169336698347794, "learning_rate": 1.37549484742987e-07, "loss": 0.2764, "step": 20652 }, { "epoch": 0.95, "grad_norm": 0.4473010113881805, "learning_rate": 1.3730365613361497e-07, "loss": 0.2667, "step": 20653 }, { "epoch": 0.95, "grad_norm": 0.7559415895602231, "learning_rate": 1.3705804587483274e-07, "loss": 0.3831, "step": 20654 }, { "epoch": 0.95, "grad_norm": 0.760355300796119, "learning_rate": 1.3681265397207932e-07, "loss": 0.1242, "step": 20655 }, { "epoch": 0.95, "grad_norm": 0.33682353287116307, "learning_rate": 1.3656748043078815e-07, "loss": 0.2314, "step": 20656 }, { "epoch": 0.95, "grad_norm": 0.2804567454971787, "learning_rate": 1.363225252563849e-07, "loss": 0.2373, "step": 20657 }, { "epoch": 0.95, "grad_norm": 0.47407169529877385, "learning_rate": 1.360777884542963e-07, "loss": 0.2181, "step": 20658 }, { "epoch": 0.95, "grad_norm": 0.3998187148872344, "learning_rate": 1.3583327002993695e-07, "loss": 0.3059, "step": 20659 }, { "epoch": 0.95, "grad_norm": 0.6215313915293608, "learning_rate": 1.355889699887225e-07, "loss": 0.3201, "step": 20660 }, { "epoch": 0.95, "grad_norm": 0.4748953965664612, "learning_rate": 1.3534488833605976e-07, "loss": 0.1897, "step": 20661 }, { "epoch": 0.95, "grad_norm": 0.42257722539914655, "learning_rate": 1.3510102507735544e-07, "loss": 0.2805, "step": 20662 }, { "epoch": 0.95, "grad_norm": 0.3065216146144009, "learning_rate": 1.3485738021800532e-07, "loss": 0.1956, "step": 20663 }, { "epoch": 0.95, "grad_norm": 0.31780619837123913, "learning_rate": 1.3461395376340502e-07, "loss": 0.2208, "step": 20664 }, { "epoch": 0.95, "grad_norm": 0.3641136963474888, "learning_rate": 1.343707457189425e-07, "loss": 0.2654, "step": 20665 }, { "epoch": 0.95, "grad_norm": 1.0031694703250642, "learning_rate": 1.341277560900034e-07, "loss": 0.4591, "step": 20666 }, { "epoch": 0.95, "grad_norm": 1.71204442386652, "learning_rate": 1.3388498488196787e-07, "loss": 0.7008, "step": 20667 }, { "epoch": 0.95, "grad_norm": 0.2532703919088304, "learning_rate": 1.3364243210020943e-07, "loss": 0.2021, "step": 20668 }, { "epoch": 0.95, "grad_norm": 0.33679116476752935, "learning_rate": 1.334000977500982e-07, "loss": 0.2414, "step": 20669 }, { "epoch": 0.95, "grad_norm": 0.6846364611720372, "learning_rate": 1.331579818369988e-07, "loss": 0.3841, "step": 20670 }, { "epoch": 0.95, "grad_norm": 0.3552760556877613, "learning_rate": 1.3291608436627135e-07, "loss": 0.2167, "step": 20671 }, { "epoch": 0.95, "grad_norm": 0.3897932103621604, "learning_rate": 1.3267440534327381e-07, "loss": 0.2763, "step": 20672 }, { "epoch": 0.95, "grad_norm": 0.5019402997556139, "learning_rate": 1.32432944773353e-07, "loss": 0.288, "step": 20673 }, { "epoch": 0.95, "grad_norm": 0.3753268856216738, "learning_rate": 1.3219170266185577e-07, "loss": 0.1982, "step": 20674 }, { "epoch": 0.95, "grad_norm": 0.460201394051659, "learning_rate": 1.319506790141245e-07, "loss": 0.2653, "step": 20675 }, { "epoch": 0.95, "grad_norm": 0.32518520618545643, "learning_rate": 1.3170987383549495e-07, "loss": 0.269, "step": 20676 }, { "epoch": 0.95, "grad_norm": 0.34316726746303916, "learning_rate": 1.3146928713129726e-07, "loss": 0.2182, "step": 20677 }, { "epoch": 0.95, "grad_norm": 0.9130698899227945, "learning_rate": 1.3122891890685606e-07, "loss": 0.2913, "step": 20678 }, { "epoch": 0.95, "grad_norm": 1.0363730179381838, "learning_rate": 1.3098876916749713e-07, "loss": 0.324, "step": 20679 }, { "epoch": 0.95, "grad_norm": 0.3781339361250915, "learning_rate": 1.3074883791853398e-07, "loss": 0.2611, "step": 20680 }, { "epoch": 0.95, "grad_norm": 0.3518694956616934, "learning_rate": 1.3050912516528125e-07, "loss": 0.2612, "step": 20681 }, { "epoch": 0.95, "grad_norm": 0.7204022986248192, "learning_rate": 1.3026963091304246e-07, "loss": 0.4051, "step": 20682 }, { "epoch": 0.95, "grad_norm": 0.38637551150969396, "learning_rate": 1.3003035516712116e-07, "loss": 0.2781, "step": 20683 }, { "epoch": 0.95, "grad_norm": 0.2462810600346777, "learning_rate": 1.2979129793281641e-07, "loss": 0.1754, "step": 20684 }, { "epoch": 0.95, "grad_norm": 1.2602664983780982, "learning_rate": 1.2955245921541847e-07, "loss": 0.4663, "step": 20685 }, { "epoch": 0.95, "grad_norm": 0.5554679570428661, "learning_rate": 1.2931383902021533e-07, "loss": 0.2438, "step": 20686 }, { "epoch": 0.95, "grad_norm": 0.6487358845247734, "learning_rate": 1.2907543735249163e-07, "loss": 0.2884, "step": 20687 }, { "epoch": 0.95, "grad_norm": 0.3920177312651138, "learning_rate": 1.2883725421752203e-07, "loss": 0.3114, "step": 20688 }, { "epoch": 0.95, "grad_norm": 0.3368389466333891, "learning_rate": 1.2859928962058344e-07, "loss": 0.256, "step": 20689 }, { "epoch": 0.95, "grad_norm": 0.41373404903617544, "learning_rate": 1.2836154356694163e-07, "loss": 0.2117, "step": 20690 }, { "epoch": 0.95, "grad_norm": 0.38659062207455097, "learning_rate": 1.281240160618613e-07, "loss": 0.1759, "step": 20691 }, { "epoch": 0.95, "grad_norm": 0.27482368293027104, "learning_rate": 1.2788670711060046e-07, "loss": 0.2337, "step": 20692 }, { "epoch": 0.95, "grad_norm": 0.9042109860427079, "learning_rate": 1.276496167184127e-07, "loss": 0.3832, "step": 20693 }, { "epoch": 0.95, "grad_norm": 0.7685272778489982, "learning_rate": 1.2741274489054823e-07, "loss": 0.294, "step": 20694 }, { "epoch": 0.95, "grad_norm": 0.3663933729094589, "learning_rate": 1.2717609163224843e-07, "loss": 0.2583, "step": 20695 }, { "epoch": 0.95, "grad_norm": 0.30759146560388095, "learning_rate": 1.2693965694875689e-07, "loss": 0.2567, "step": 20696 }, { "epoch": 0.95, "grad_norm": 0.381156015514647, "learning_rate": 1.2670344084530384e-07, "loss": 0.0775, "step": 20697 }, { "epoch": 0.95, "grad_norm": 0.37905845851795905, "learning_rate": 1.264674433271218e-07, "loss": 0.2822, "step": 20698 }, { "epoch": 0.95, "grad_norm": 0.6889517287067817, "learning_rate": 1.2623166439943325e-07, "loss": 0.3555, "step": 20699 }, { "epoch": 0.95, "grad_norm": 0.3563619249963613, "learning_rate": 1.2599610406745844e-07, "loss": 0.2458, "step": 20700 }, { "epoch": 0.95, "grad_norm": 0.35179627987335754, "learning_rate": 1.2576076233641543e-07, "loss": 0.2707, "step": 20701 }, { "epoch": 0.95, "grad_norm": 0.3352054568594747, "learning_rate": 1.2552563921151116e-07, "loss": 0.1673, "step": 20702 }, { "epoch": 0.95, "grad_norm": 1.8334751571614538, "learning_rate": 1.252907346979515e-07, "loss": 0.4468, "step": 20703 }, { "epoch": 0.95, "grad_norm": 0.24162683751234798, "learning_rate": 1.2505604880093892e-07, "loss": 0.218, "step": 20704 }, { "epoch": 0.95, "grad_norm": 0.6351750612395176, "learning_rate": 1.2482158152566591e-07, "loss": 0.3568, "step": 20705 }, { "epoch": 0.95, "grad_norm": 1.3546354890731713, "learning_rate": 1.2458733287732728e-07, "loss": 0.6961, "step": 20706 }, { "epoch": 0.95, "grad_norm": 0.3052501612605196, "learning_rate": 1.2435330286110549e-07, "loss": 0.1915, "step": 20707 }, { "epoch": 0.95, "grad_norm": 0.368190222128102, "learning_rate": 1.2411949148218415e-07, "loss": 0.278, "step": 20708 }, { "epoch": 0.95, "grad_norm": 0.3489554686883393, "learning_rate": 1.2388589874573808e-07, "loss": 0.1947, "step": 20709 }, { "epoch": 0.95, "grad_norm": 0.37506395325684777, "learning_rate": 1.2365252465694088e-07, "loss": 0.2263, "step": 20710 }, { "epoch": 0.95, "grad_norm": 1.384312316971383, "learning_rate": 1.234193692209562e-07, "loss": 0.4955, "step": 20711 }, { "epoch": 0.95, "grad_norm": 0.35621244535515667, "learning_rate": 1.2318643244294882e-07, "loss": 0.311, "step": 20712 }, { "epoch": 0.95, "grad_norm": 0.3651315739647565, "learning_rate": 1.2295371432807346e-07, "loss": 0.1753, "step": 20713 }, { "epoch": 0.95, "grad_norm": 0.39302018827370017, "learning_rate": 1.227212148814838e-07, "loss": 0.212, "step": 20714 }, { "epoch": 0.95, "grad_norm": 0.3522928584184849, "learning_rate": 1.2248893410832686e-07, "loss": 0.2516, "step": 20715 }, { "epoch": 0.95, "grad_norm": 0.4660371806244114, "learning_rate": 1.2225687201374403e-07, "loss": 0.2265, "step": 20716 }, { "epoch": 0.95, "grad_norm": 0.39455263510759686, "learning_rate": 1.2202502860287457e-07, "loss": 0.2294, "step": 20717 }, { "epoch": 0.95, "grad_norm": 1.2189134934369297, "learning_rate": 1.2179340388084993e-07, "loss": 0.7798, "step": 20718 }, { "epoch": 0.95, "grad_norm": 0.5190433423006692, "learning_rate": 1.215619978527993e-07, "loss": 0.3028, "step": 20719 }, { "epoch": 0.95, "grad_norm": 0.3132570596805649, "learning_rate": 1.213308105238442e-07, "loss": 0.2226, "step": 20720 }, { "epoch": 0.95, "grad_norm": 0.5332478859318329, "learning_rate": 1.2109984189910385e-07, "loss": 0.2989, "step": 20721 }, { "epoch": 0.95, "grad_norm": 0.4423235642743931, "learning_rate": 1.208690919836919e-07, "loss": 0.2554, "step": 20722 }, { "epoch": 0.95, "grad_norm": 0.3155619412854641, "learning_rate": 1.2063856078271762e-07, "loss": 0.1457, "step": 20723 }, { "epoch": 0.95, "grad_norm": 0.34451037738490725, "learning_rate": 1.204082483012825e-07, "loss": 0.2968, "step": 20724 }, { "epoch": 0.95, "grad_norm": 0.3284787047881008, "learning_rate": 1.2017815454448578e-07, "loss": 0.2401, "step": 20725 }, { "epoch": 0.95, "grad_norm": 1.5195555770389837, "learning_rate": 1.1994827951742338e-07, "loss": 0.1955, "step": 20726 }, { "epoch": 0.95, "grad_norm": 0.4820150201165967, "learning_rate": 1.1971862322518458e-07, "loss": 0.3291, "step": 20727 }, { "epoch": 0.95, "grad_norm": 0.27260686122269695, "learning_rate": 1.1948918567285195e-07, "loss": 0.2434, "step": 20728 }, { "epoch": 0.95, "grad_norm": 0.27323742305774895, "learning_rate": 1.192599668655059e-07, "loss": 0.1387, "step": 20729 }, { "epoch": 0.95, "grad_norm": 0.9526749972963651, "learning_rate": 1.1903096680822012e-07, "loss": 0.5031, "step": 20730 }, { "epoch": 0.95, "grad_norm": 0.7747997667842748, "learning_rate": 1.1880218550606615e-07, "loss": 0.3078, "step": 20731 }, { "epoch": 0.95, "grad_norm": 0.258047288689007, "learning_rate": 1.1857362296410879e-07, "loss": 0.2493, "step": 20732 }, { "epoch": 0.95, "grad_norm": 0.6623880705877354, "learning_rate": 1.1834527918740624e-07, "loss": 0.2828, "step": 20733 }, { "epoch": 0.95, "grad_norm": 0.34876351700589153, "learning_rate": 1.1811715418101555e-07, "loss": 0.1744, "step": 20734 }, { "epoch": 0.95, "grad_norm": 0.3213230210993715, "learning_rate": 1.1788924794998713e-07, "loss": 0.2311, "step": 20735 }, { "epoch": 0.95, "grad_norm": 0.3290569298079769, "learning_rate": 1.1766156049936583e-07, "loss": 0.2474, "step": 20736 }, { "epoch": 0.95, "grad_norm": 0.5332233490166133, "learning_rate": 1.1743409183419319e-07, "loss": 0.2679, "step": 20737 }, { "epoch": 0.95, "grad_norm": 0.5146897110260323, "learning_rate": 1.1720684195950405e-07, "loss": 0.3003, "step": 20738 }, { "epoch": 0.95, "grad_norm": 0.4627086697692388, "learning_rate": 1.1697981088033106e-07, "loss": 0.2535, "step": 20739 }, { "epoch": 0.95, "grad_norm": 0.3682057158955699, "learning_rate": 1.167529986017002e-07, "loss": 0.2568, "step": 20740 }, { "epoch": 0.95, "grad_norm": 0.30254631513094815, "learning_rate": 1.1652640512863189e-07, "loss": 0.1902, "step": 20741 }, { "epoch": 0.95, "grad_norm": 1.3528080011684007, "learning_rate": 1.1630003046614324e-07, "loss": 0.7938, "step": 20742 }, { "epoch": 0.95, "grad_norm": 0.4497056766571873, "learning_rate": 1.1607387461924468e-07, "loss": 0.1895, "step": 20743 }, { "epoch": 0.95, "grad_norm": 0.30125257439753106, "learning_rate": 1.1584793759294555e-07, "loss": 0.2652, "step": 20744 }, { "epoch": 0.95, "grad_norm": 0.7438882343291316, "learning_rate": 1.1562221939224627e-07, "loss": 0.3452, "step": 20745 }, { "epoch": 0.95, "grad_norm": 0.33698653173336357, "learning_rate": 1.1539672002214508e-07, "loss": 0.191, "step": 20746 }, { "epoch": 0.95, "grad_norm": 0.2946056486933315, "learning_rate": 1.1517143948763243e-07, "loss": 0.1996, "step": 20747 }, { "epoch": 0.95, "grad_norm": 0.39341762678260567, "learning_rate": 1.1494637779369766e-07, "loss": 0.329, "step": 20748 }, { "epoch": 0.95, "grad_norm": 0.6238143961986106, "learning_rate": 1.1472153494532235e-07, "loss": 0.1598, "step": 20749 }, { "epoch": 0.95, "grad_norm": 0.40356620534238624, "learning_rate": 1.1449691094748472e-07, "loss": 0.3182, "step": 20750 }, { "epoch": 0.95, "grad_norm": 0.391425105791511, "learning_rate": 1.1427250580515859e-07, "loss": 0.2922, "step": 20751 }, { "epoch": 0.95, "grad_norm": 0.8669023789594598, "learning_rate": 1.1404831952330997e-07, "loss": 0.096, "step": 20752 }, { "epoch": 0.95, "grad_norm": 0.2996275123671956, "learning_rate": 1.138243521069038e-07, "loss": 0.2367, "step": 20753 }, { "epoch": 0.95, "grad_norm": 0.4650952847327551, "learning_rate": 1.1360060356089941e-07, "loss": 0.3078, "step": 20754 }, { "epoch": 0.95, "grad_norm": 0.44300937727253387, "learning_rate": 1.1337707389024621e-07, "loss": 0.2636, "step": 20755 }, { "epoch": 0.95, "grad_norm": 0.29016510058034867, "learning_rate": 1.13153763099898e-07, "loss": 0.2406, "step": 20756 }, { "epoch": 0.95, "grad_norm": 1.663982122903921, "learning_rate": 1.1293067119479528e-07, "loss": 0.4673, "step": 20757 }, { "epoch": 0.95, "grad_norm": 0.8334899444616646, "learning_rate": 1.1270779817987965e-07, "loss": 0.306, "step": 20758 }, { "epoch": 0.95, "grad_norm": 0.22990419405453819, "learning_rate": 1.124851440600827e-07, "loss": 0.2129, "step": 20759 }, { "epoch": 0.95, "grad_norm": 1.2773472435999411, "learning_rate": 1.1226270884033386e-07, "loss": 0.7469, "step": 20760 }, { "epoch": 0.95, "grad_norm": 0.4839664252259881, "learning_rate": 1.1204049252556138e-07, "loss": 0.2829, "step": 20761 }, { "epoch": 0.95, "grad_norm": 0.2987469381954329, "learning_rate": 1.1181849512068021e-07, "loss": 0.1826, "step": 20762 }, { "epoch": 0.95, "grad_norm": 0.41178376215444074, "learning_rate": 1.1159671663060868e-07, "loss": 0.3073, "step": 20763 }, { "epoch": 0.95, "grad_norm": 0.7568904686394, "learning_rate": 1.1137515706025393e-07, "loss": 0.2657, "step": 20764 }, { "epoch": 0.95, "grad_norm": 0.2816664680916136, "learning_rate": 1.1115381641452205e-07, "loss": 0.1679, "step": 20765 }, { "epoch": 0.95, "grad_norm": 0.6387373290028822, "learning_rate": 1.109326946983158e-07, "loss": 0.3826, "step": 20766 }, { "epoch": 0.95, "grad_norm": 0.4362002869971888, "learning_rate": 1.1071179191652681e-07, "loss": 0.2812, "step": 20767 }, { "epoch": 0.95, "grad_norm": 0.3125475131392368, "learning_rate": 1.1049110807404783e-07, "loss": 0.2836, "step": 20768 }, { "epoch": 0.95, "grad_norm": 0.48275375279537397, "learning_rate": 1.1027064317576386e-07, "loss": 0.1138, "step": 20769 }, { "epoch": 0.95, "grad_norm": 1.0128665941014303, "learning_rate": 1.1005039722655653e-07, "loss": 0.3423, "step": 20770 }, { "epoch": 0.95, "grad_norm": 0.29100319660013674, "learning_rate": 1.0983037023130083e-07, "loss": 0.2213, "step": 20771 }, { "epoch": 0.95, "grad_norm": 0.35666283230782536, "learning_rate": 1.0961056219486843e-07, "loss": 0.2592, "step": 20772 }, { "epoch": 0.95, "grad_norm": 0.8541162473652517, "learning_rate": 1.0939097312212543e-07, "loss": 0.3468, "step": 20773 }, { "epoch": 0.95, "grad_norm": 0.278406707094813, "learning_rate": 1.0917160301793461e-07, "loss": 0.2203, "step": 20774 }, { "epoch": 0.95, "grad_norm": 0.39257826060095213, "learning_rate": 1.0895245188715097e-07, "loss": 0.2324, "step": 20775 }, { "epoch": 0.95, "grad_norm": 0.9375911849386959, "learning_rate": 1.0873351973462731e-07, "loss": 0.4127, "step": 20776 }, { "epoch": 0.95, "grad_norm": 0.38924173456233513, "learning_rate": 1.0851480656520975e-07, "loss": 0.2777, "step": 20777 }, { "epoch": 0.95, "grad_norm": 0.6350143675389002, "learning_rate": 1.0829631238374105e-07, "loss": 0.2744, "step": 20778 }, { "epoch": 0.95, "grad_norm": 0.31948055623560906, "learning_rate": 1.0807803719505849e-07, "loss": 0.2694, "step": 20779 }, { "epoch": 0.95, "grad_norm": 0.4011095697978131, "learning_rate": 1.0785998100399376e-07, "loss": 0.2975, "step": 20780 }, { "epoch": 0.95, "grad_norm": 0.3844968039755211, "learning_rate": 1.0764214381537519e-07, "loss": 0.1367, "step": 20781 }, { "epoch": 0.95, "grad_norm": 0.7991233497783274, "learning_rate": 1.074245256340245e-07, "loss": 0.0955, "step": 20782 }, { "epoch": 0.95, "grad_norm": 0.38456615385273496, "learning_rate": 1.0720712646476116e-07, "loss": 0.2783, "step": 20783 }, { "epoch": 0.95, "grad_norm": 0.3715078813488768, "learning_rate": 1.0698994631239689e-07, "loss": 0.281, "step": 20784 }, { "epoch": 0.95, "grad_norm": 0.5839737382227904, "learning_rate": 1.0677298518174006e-07, "loss": 0.287, "step": 20785 }, { "epoch": 0.95, "grad_norm": 0.3911856436521603, "learning_rate": 1.0655624307759348e-07, "loss": 0.2949, "step": 20786 }, { "epoch": 0.95, "grad_norm": 0.2487312606204923, "learning_rate": 1.0633972000475779e-07, "loss": 0.2065, "step": 20787 }, { "epoch": 0.95, "grad_norm": 0.8784199628099109, "learning_rate": 1.0612341596802467e-07, "loss": 0.1081, "step": 20788 }, { "epoch": 0.96, "grad_norm": 0.38829483837473877, "learning_rate": 1.0590733097218142e-07, "loss": 0.2664, "step": 20789 }, { "epoch": 0.96, "grad_norm": 0.6531115786074333, "learning_rate": 1.0569146502201643e-07, "loss": 0.3687, "step": 20790 }, { "epoch": 0.96, "grad_norm": 0.3283151652790656, "learning_rate": 1.0547581812230478e-07, "loss": 0.2367, "step": 20791 }, { "epoch": 0.96, "grad_norm": 0.40992427640878837, "learning_rate": 1.0526039027782264e-07, "loss": 0.2769, "step": 20792 }, { "epoch": 0.96, "grad_norm": 0.3026814798780175, "learning_rate": 1.0504518149333731e-07, "loss": 0.1992, "step": 20793 }, { "epoch": 0.96, "grad_norm": 0.8052698058055426, "learning_rate": 1.0483019177361609e-07, "loss": 0.3831, "step": 20794 }, { "epoch": 0.96, "grad_norm": 0.23937265382546366, "learning_rate": 1.0461542112341738e-07, "loss": 0.2034, "step": 20795 }, { "epoch": 0.96, "grad_norm": 0.8101610964808613, "learning_rate": 1.0440086954749517e-07, "loss": 0.3845, "step": 20796 }, { "epoch": 0.96, "grad_norm": 1.3046895822101234, "learning_rate": 1.0418653705060123e-07, "loss": 0.8179, "step": 20797 }, { "epoch": 0.96, "grad_norm": 0.3128319050566063, "learning_rate": 1.039724236374795e-07, "loss": 0.1795, "step": 20798 }, { "epoch": 0.96, "grad_norm": 0.2403984993030371, "learning_rate": 1.0375852931286956e-07, "loss": 0.2099, "step": 20799 }, { "epoch": 0.96, "grad_norm": 0.660756532109065, "learning_rate": 1.0354485408150871e-07, "loss": 0.3272, "step": 20800 }, { "epoch": 0.96, "grad_norm": 0.35127447118304683, "learning_rate": 1.0333139794812541e-07, "loss": 0.2106, "step": 20801 }, { "epoch": 0.96, "grad_norm": 1.0386561962292427, "learning_rate": 1.0311816091744698e-07, "loss": 0.472, "step": 20802 }, { "epoch": 0.96, "grad_norm": 0.3419692615268063, "learning_rate": 1.0290514299419296e-07, "loss": 0.2951, "step": 20803 }, { "epoch": 0.96, "grad_norm": 0.38702540430725063, "learning_rate": 1.0269234418308183e-07, "loss": 0.2813, "step": 20804 }, { "epoch": 0.96, "grad_norm": 0.4417887629067861, "learning_rate": 1.0247976448882202e-07, "loss": 0.0976, "step": 20805 }, { "epoch": 0.96, "grad_norm": 0.7157111569903305, "learning_rate": 1.0226740391612089e-07, "loss": 0.3648, "step": 20806 }, { "epoch": 0.96, "grad_norm": 0.2839922185727559, "learning_rate": 1.0205526246968023e-07, "loss": 0.2547, "step": 20807 }, { "epoch": 0.96, "grad_norm": 0.5059263714278324, "learning_rate": 1.0184334015419517e-07, "loss": 0.2713, "step": 20808 }, { "epoch": 0.96, "grad_norm": 1.2462544463711431, "learning_rate": 1.0163163697435974e-07, "loss": 0.7168, "step": 20809 }, { "epoch": 0.96, "grad_norm": 0.3382274794775425, "learning_rate": 1.0142015293485907e-07, "loss": 0.2555, "step": 20810 }, { "epoch": 0.96, "grad_norm": 0.27431032947997974, "learning_rate": 1.012088880403761e-07, "loss": 0.1942, "step": 20811 }, { "epoch": 0.96, "grad_norm": 0.44125605991899713, "learning_rate": 1.0099784229558817e-07, "loss": 0.2572, "step": 20812 }, { "epoch": 0.96, "grad_norm": 0.3561812911181639, "learning_rate": 1.0078701570516602e-07, "loss": 0.2589, "step": 20813 }, { "epoch": 0.96, "grad_norm": 1.892282693443322, "learning_rate": 1.0057640827378034e-07, "loss": 0.327, "step": 20814 }, { "epoch": 0.96, "grad_norm": 0.3736953952079247, "learning_rate": 1.0036602000608963e-07, "loss": 0.3028, "step": 20815 }, { "epoch": 0.96, "grad_norm": 0.3324474242000121, "learning_rate": 1.0015585090675573e-07, "loss": 0.2596, "step": 20816 }, { "epoch": 0.96, "grad_norm": 1.1745604159450236, "learning_rate": 9.994590098042822e-08, "loss": 0.4094, "step": 20817 }, { "epoch": 0.96, "grad_norm": 0.2889475477218363, "learning_rate": 9.973617023175786e-08, "loss": 0.2239, "step": 20818 }, { "epoch": 0.96, "grad_norm": 0.4002042585059943, "learning_rate": 9.952665866538536e-08, "loss": 0.2482, "step": 20819 }, { "epoch": 0.96, "grad_norm": 0.3491096101982642, "learning_rate": 9.931736628595146e-08, "loss": 0.2508, "step": 20820 }, { "epoch": 0.96, "grad_norm": 0.5914407775014046, "learning_rate": 9.910829309808801e-08, "loss": 0.2446, "step": 20821 }, { "epoch": 0.96, "grad_norm": 0.3465350598076389, "learning_rate": 9.889943910642463e-08, "loss": 0.2569, "step": 20822 }, { "epoch": 0.96, "grad_norm": 0.38730402185230806, "learning_rate": 9.869080431558542e-08, "loss": 0.2963, "step": 20823 }, { "epoch": 0.96, "grad_norm": 0.4967044547694842, "learning_rate": 9.84823887301889e-08, "loss": 0.2124, "step": 20824 }, { "epoch": 0.96, "grad_norm": 0.2610611634859002, "learning_rate": 9.827419235484803e-08, "loss": 0.2047, "step": 20825 }, { "epoch": 0.96, "grad_norm": 1.2164033249489292, "learning_rate": 9.806621519417358e-08, "loss": 0.6175, "step": 20826 }, { "epoch": 0.96, "grad_norm": 0.4213754891471513, "learning_rate": 9.785845725276966e-08, "loss": 0.2495, "step": 20827 }, { "epoch": 0.96, "grad_norm": 0.3426560797761467, "learning_rate": 9.765091853523478e-08, "loss": 0.2342, "step": 20828 }, { "epoch": 0.96, "grad_norm": 0.6673574349711081, "learning_rate": 9.74435990461664e-08, "loss": 0.3752, "step": 20829 }, { "epoch": 0.96, "grad_norm": 0.5110573737562784, "learning_rate": 9.723649879015085e-08, "loss": 0.3179, "step": 20830 }, { "epoch": 0.96, "grad_norm": 0.22375368787094405, "learning_rate": 9.702961777177556e-08, "loss": 0.1764, "step": 20831 }, { "epoch": 0.96, "grad_norm": 0.40435991570706886, "learning_rate": 9.682295599561908e-08, "loss": 0.2027, "step": 20832 }, { "epoch": 0.96, "grad_norm": 1.3809643848379378, "learning_rate": 9.661651346625889e-08, "loss": 0.6955, "step": 20833 }, { "epoch": 0.96, "grad_norm": 0.3317144817301387, "learning_rate": 9.641029018826465e-08, "loss": 0.1959, "step": 20834 }, { "epoch": 0.96, "grad_norm": 0.36537049709495073, "learning_rate": 9.620428616619936e-08, "loss": 0.288, "step": 20835 }, { "epoch": 0.96, "grad_norm": 0.4829106079737012, "learning_rate": 9.599850140462719e-08, "loss": 0.206, "step": 20836 }, { "epoch": 0.96, "grad_norm": 0.2148894402796256, "learning_rate": 9.579293590810113e-08, "loss": 0.1141, "step": 20837 }, { "epoch": 0.96, "grad_norm": 0.5748870926814279, "learning_rate": 9.558758968117532e-08, "loss": 0.3588, "step": 20838 }, { "epoch": 0.96, "grad_norm": 0.40419198079435953, "learning_rate": 9.53824627283928e-08, "loss": 0.3119, "step": 20839 }, { "epoch": 0.96, "grad_norm": 0.4924648615857512, "learning_rate": 9.51775550542977e-08, "loss": 0.1824, "step": 20840 }, { "epoch": 0.96, "grad_norm": 0.4732904819517592, "learning_rate": 9.49728666634242e-08, "loss": 0.3231, "step": 20841 }, { "epoch": 0.96, "grad_norm": 0.5271751715870728, "learning_rate": 9.476839756030531e-08, "loss": 0.3093, "step": 20842 }, { "epoch": 0.96, "grad_norm": 0.3076982898455123, "learning_rate": 9.456414774946743e-08, "loss": 0.2363, "step": 20843 }, { "epoch": 0.96, "grad_norm": 0.22349633058371374, "learning_rate": 9.436011723543137e-08, "loss": 0.1431, "step": 20844 }, { "epoch": 0.96, "grad_norm": 0.9140091510322358, "learning_rate": 9.415630602271575e-08, "loss": 0.4798, "step": 20845 }, { "epoch": 0.96, "grad_norm": 0.4201375788544067, "learning_rate": 9.395271411583251e-08, "loss": 0.2567, "step": 20846 }, { "epoch": 0.96, "grad_norm": 0.34397009759387076, "learning_rate": 9.374934151928916e-08, "loss": 0.2571, "step": 20847 }, { "epoch": 0.96, "grad_norm": 1.4089349732548058, "learning_rate": 9.354618823758654e-08, "loss": 0.4506, "step": 20848 }, { "epoch": 0.96, "grad_norm": 0.3242831348415367, "learning_rate": 9.334325427522328e-08, "loss": 0.2095, "step": 20849 }, { "epoch": 0.96, "grad_norm": 0.5165063125170287, "learning_rate": 9.314053963669245e-08, "loss": 0.2734, "step": 20850 }, { "epoch": 0.96, "grad_norm": 0.3510599195698513, "learning_rate": 9.293804432648157e-08, "loss": 0.2783, "step": 20851 }, { "epoch": 0.96, "grad_norm": 0.33263484052898906, "learning_rate": 9.273576834907483e-08, "loss": 0.2441, "step": 20852 }, { "epoch": 0.96, "grad_norm": 0.5669293073206008, "learning_rate": 9.253371170894865e-08, "loss": 0.1489, "step": 20853 }, { "epoch": 0.96, "grad_norm": 0.36630369353747055, "learning_rate": 9.233187441057612e-08, "loss": 0.2916, "step": 20854 }, { "epoch": 0.96, "grad_norm": 0.6741567523970805, "learning_rate": 9.213025645842921e-08, "loss": 0.3701, "step": 20855 }, { "epoch": 0.96, "grad_norm": 0.3204710034985468, "learning_rate": 9.192885785696659e-08, "loss": 0.262, "step": 20856 }, { "epoch": 0.96, "grad_norm": 0.5836075680518281, "learning_rate": 9.172767861065135e-08, "loss": 0.2983, "step": 20857 }, { "epoch": 0.96, "grad_norm": 0.37710069569883, "learning_rate": 9.152671872393437e-08, "loss": 0.2376, "step": 20858 }, { "epoch": 0.96, "grad_norm": 0.25585110812094386, "learning_rate": 9.132597820126654e-08, "loss": 0.2245, "step": 20859 }, { "epoch": 0.96, "grad_norm": 1.4007490278216879, "learning_rate": 9.112545704709207e-08, "loss": 0.1976, "step": 20860 }, { "epoch": 0.96, "grad_norm": 0.7442109115197545, "learning_rate": 9.092515526584855e-08, "loss": 0.3505, "step": 20861 }, { "epoch": 0.96, "grad_norm": 0.32618250766267587, "learning_rate": 9.07250728619713e-08, "loss": 0.2814, "step": 20862 }, { "epoch": 0.96, "grad_norm": 0.44855672147709247, "learning_rate": 9.052520983989233e-08, "loss": 0.2624, "step": 20863 }, { "epoch": 0.96, "grad_norm": 0.4629268515198908, "learning_rate": 9.032556620403254e-08, "loss": 0.2764, "step": 20864 }, { "epoch": 0.96, "grad_norm": 0.27158603257987185, "learning_rate": 9.012614195881397e-08, "loss": 0.2022, "step": 20865 }, { "epoch": 0.96, "grad_norm": 0.42642081157990547, "learning_rate": 8.992693710865197e-08, "loss": 0.2493, "step": 20866 }, { "epoch": 0.96, "grad_norm": 0.40966032182455425, "learning_rate": 8.972795165795522e-08, "loss": 0.2198, "step": 20867 }, { "epoch": 0.96, "grad_norm": 0.5555061616885009, "learning_rate": 8.952918561113022e-08, "loss": 0.3601, "step": 20868 }, { "epoch": 0.96, "grad_norm": 0.5813969184202027, "learning_rate": 8.933063897257787e-08, "loss": 0.3631, "step": 20869 }, { "epoch": 0.96, "grad_norm": 0.3395446133581907, "learning_rate": 8.913231174669246e-08, "loss": 0.2117, "step": 20870 }, { "epoch": 0.96, "grad_norm": 0.2633476817248766, "learning_rate": 8.89342039378649e-08, "loss": 0.1805, "step": 20871 }, { "epoch": 0.96, "grad_norm": 1.3745869516244957, "learning_rate": 8.87363155504828e-08, "loss": 0.4074, "step": 20872 }, { "epoch": 0.96, "grad_norm": 0.3423359018775441, "learning_rate": 8.853864658892596e-08, "loss": 0.1136, "step": 20873 }, { "epoch": 0.96, "grad_norm": 0.3623496574207589, "learning_rate": 8.834119705757093e-08, "loss": 0.2881, "step": 20874 }, { "epoch": 0.96, "grad_norm": 0.4420455861019985, "learning_rate": 8.814396696078753e-08, "loss": 0.332, "step": 20875 }, { "epoch": 0.96, "grad_norm": 0.2906953844396678, "learning_rate": 8.794695630294447e-08, "loss": 0.1133, "step": 20876 }, { "epoch": 0.96, "grad_norm": 0.26415644697070917, "learning_rate": 8.775016508840273e-08, "loss": 0.1818, "step": 20877 }, { "epoch": 0.96, "grad_norm": 0.38169615738706814, "learning_rate": 8.755359332151769e-08, "loss": 0.2701, "step": 20878 }, { "epoch": 0.96, "grad_norm": 0.3019191263952281, "learning_rate": 8.735724100664256e-08, "loss": 0.1098, "step": 20879 }, { "epoch": 0.96, "grad_norm": 0.3837706603241768, "learning_rate": 8.716110814812496e-08, "loss": 0.2912, "step": 20880 }, { "epoch": 0.96, "grad_norm": 1.0753798470204377, "learning_rate": 8.696519475030585e-08, "loss": 0.4949, "step": 20881 }, { "epoch": 0.96, "grad_norm": 0.3706112023524697, "learning_rate": 8.6769500817524e-08, "loss": 0.2763, "step": 20882 }, { "epoch": 0.96, "grad_norm": 0.27039971150375186, "learning_rate": 8.657402635410928e-08, "loss": 0.1572, "step": 20883 }, { "epoch": 0.96, "grad_norm": 0.6440284424813976, "learning_rate": 8.637877136439155e-08, "loss": 0.2392, "step": 20884 }, { "epoch": 0.96, "grad_norm": 0.5770686648673842, "learning_rate": 8.61837358526929e-08, "loss": 0.2822, "step": 20885 }, { "epoch": 0.96, "grad_norm": 0.33295461153943073, "learning_rate": 8.598891982333213e-08, "loss": 0.2292, "step": 20886 }, { "epoch": 0.96, "grad_norm": 0.495381146477583, "learning_rate": 8.57943232806202e-08, "loss": 0.3657, "step": 20887 }, { "epoch": 0.96, "grad_norm": 0.653089012636174, "learning_rate": 8.559994622886702e-08, "loss": 0.3456, "step": 20888 }, { "epoch": 0.96, "grad_norm": 0.2269785772390465, "learning_rate": 8.540578867237581e-08, "loss": 0.1496, "step": 20889 }, { "epoch": 0.96, "grad_norm": 0.39192652770169045, "learning_rate": 8.521185061544423e-08, "loss": 0.2598, "step": 20890 }, { "epoch": 0.96, "grad_norm": 0.8021736812475867, "learning_rate": 8.501813206236664e-08, "loss": 0.4142, "step": 20891 }, { "epoch": 0.96, "grad_norm": 0.3219627113294337, "learning_rate": 8.482463301743182e-08, "loss": 0.2113, "step": 20892 }, { "epoch": 0.96, "grad_norm": 1.1239801730082162, "learning_rate": 8.463135348492191e-08, "loss": 0.5786, "step": 20893 }, { "epoch": 0.96, "grad_norm": 0.37202503324251085, "learning_rate": 8.443829346911792e-08, "loss": 0.2836, "step": 20894 }, { "epoch": 0.96, "grad_norm": 0.41039813633639977, "learning_rate": 8.424545297429309e-08, "loss": 0.2654, "step": 20895 }, { "epoch": 0.96, "grad_norm": 0.25241410114457336, "learning_rate": 8.405283200471848e-08, "loss": 0.1054, "step": 20896 }, { "epoch": 0.96, "grad_norm": 0.5797586963597887, "learning_rate": 8.38604305646551e-08, "loss": 0.3377, "step": 20897 }, { "epoch": 0.96, "grad_norm": 0.26450182210355294, "learning_rate": 8.36682486583651e-08, "loss": 0.2482, "step": 20898 }, { "epoch": 0.96, "grad_norm": 1.16640056548404, "learning_rate": 8.347628629010285e-08, "loss": 0.462, "step": 20899 }, { "epoch": 0.96, "grad_norm": 1.1183143143031313, "learning_rate": 8.32845434641183e-08, "loss": 0.3473, "step": 20900 }, { "epoch": 0.96, "grad_norm": 0.21857714365090183, "learning_rate": 8.309302018465581e-08, "loss": 0.1625, "step": 20901 }, { "epoch": 0.96, "grad_norm": 0.35085293253043426, "learning_rate": 8.290171645595535e-08, "loss": 0.2342, "step": 20902 }, { "epoch": 0.96, "grad_norm": 0.6299361488360684, "learning_rate": 8.271063228225351e-08, "loss": 0.3315, "step": 20903 }, { "epoch": 0.96, "grad_norm": 0.34729952627141764, "learning_rate": 8.251976766777914e-08, "loss": 0.2494, "step": 20904 }, { "epoch": 0.96, "grad_norm": 1.3255736442213075, "learning_rate": 8.232912261675774e-08, "loss": 0.3862, "step": 20905 }, { "epoch": 0.96, "grad_norm": 0.3732522797955375, "learning_rate": 8.21386971334126e-08, "loss": 0.2767, "step": 20906 }, { "epoch": 0.96, "grad_norm": 0.4627466230901579, "learning_rate": 8.194849122195702e-08, "loss": 0.2982, "step": 20907 }, { "epoch": 0.96, "grad_norm": 0.34163293298774766, "learning_rate": 8.175850488660209e-08, "loss": 0.1892, "step": 20908 }, { "epoch": 0.96, "grad_norm": 0.7493727448231007, "learning_rate": 8.156873813155442e-08, "loss": 0.2529, "step": 20909 }, { "epoch": 0.96, "grad_norm": 0.2811481335486801, "learning_rate": 8.13791909610162e-08, "loss": 0.2457, "step": 20910 }, { "epoch": 0.96, "grad_norm": 0.543120884948702, "learning_rate": 8.118986337918411e-08, "loss": 0.3767, "step": 20911 }, { "epoch": 0.96, "grad_norm": 3.4170232680063313, "learning_rate": 8.100075539024699e-08, "loss": 0.1796, "step": 20912 }, { "epoch": 0.96, "grad_norm": 0.3444819797008657, "learning_rate": 8.081186699839371e-08, "loss": 0.2479, "step": 20913 }, { "epoch": 0.96, "grad_norm": 0.3698564434755987, "learning_rate": 8.062319820780428e-08, "loss": 0.2945, "step": 20914 }, { "epoch": 0.96, "grad_norm": 0.2489373354363003, "learning_rate": 8.043474902265757e-08, "loss": 0.1121, "step": 20915 }, { "epoch": 0.96, "grad_norm": 0.3856458572983423, "learning_rate": 8.024651944712469e-08, "loss": 0.2704, "step": 20916 }, { "epoch": 0.96, "grad_norm": 1.336237141679605, "learning_rate": 8.005850948537453e-08, "loss": 0.7046, "step": 20917 }, { "epoch": 0.96, "grad_norm": 0.3311376014040576, "learning_rate": 7.987071914156597e-08, "loss": 0.2184, "step": 20918 }, { "epoch": 0.96, "grad_norm": 0.3848520750916892, "learning_rate": 7.968314841985902e-08, "loss": 0.2871, "step": 20919 }, { "epoch": 0.96, "grad_norm": 0.6290212964259561, "learning_rate": 7.949579732440705e-08, "loss": 0.3579, "step": 20920 }, { "epoch": 0.96, "grad_norm": 0.2941406420185858, "learning_rate": 7.93086658593556e-08, "loss": 0.1795, "step": 20921 }, { "epoch": 0.96, "grad_norm": 0.32304831792081323, "learning_rate": 7.912175402884914e-08, "loss": 0.1806, "step": 20922 }, { "epoch": 0.96, "grad_norm": 0.49024716811534175, "learning_rate": 7.893506183702437e-08, "loss": 0.3805, "step": 20923 }, { "epoch": 0.96, "grad_norm": 0.9384423046934137, "learning_rate": 7.874858928801577e-08, "loss": 0.4119, "step": 20924 }, { "epoch": 0.96, "grad_norm": 0.29944458278838615, "learning_rate": 7.856233638595223e-08, "loss": 0.1807, "step": 20925 }, { "epoch": 0.96, "grad_norm": 0.37669615605650464, "learning_rate": 7.837630313495493e-08, "loss": 0.3007, "step": 20926 }, { "epoch": 0.96, "grad_norm": 0.2827886269204321, "learning_rate": 7.819048953914387e-08, "loss": 0.1674, "step": 20927 }, { "epoch": 0.96, "grad_norm": 0.3272916859500001, "learning_rate": 7.800489560263247e-08, "loss": 0.2014, "step": 20928 }, { "epoch": 0.96, "grad_norm": 0.5426117183216848, "learning_rate": 7.781952132952963e-08, "loss": 0.3921, "step": 20929 }, { "epoch": 0.96, "grad_norm": 0.5478441375290847, "learning_rate": 7.763436672393987e-08, "loss": 0.3465, "step": 20930 }, { "epoch": 0.96, "grad_norm": 0.328964867641083, "learning_rate": 7.7449431789961e-08, "loss": 0.2485, "step": 20931 }, { "epoch": 0.96, "grad_norm": 0.6791546981515926, "learning_rate": 7.726471653168977e-08, "loss": 0.2773, "step": 20932 }, { "epoch": 0.96, "grad_norm": 0.23668818074695283, "learning_rate": 7.708022095321288e-08, "loss": 0.1772, "step": 20933 }, { "epoch": 0.96, "grad_norm": 0.27782492865400477, "learning_rate": 7.689594505861708e-08, "loss": 0.2328, "step": 20934 }, { "epoch": 0.96, "grad_norm": 1.0227714225666613, "learning_rate": 7.671188885198022e-08, "loss": 0.5186, "step": 20935 }, { "epoch": 0.96, "grad_norm": 0.6833071725458699, "learning_rate": 7.652805233737792e-08, "loss": 0.362, "step": 20936 }, { "epoch": 0.96, "grad_norm": 0.3487903309815111, "learning_rate": 7.634443551888137e-08, "loss": 0.25, "step": 20937 }, { "epoch": 0.96, "grad_norm": 0.3568585370008684, "learning_rate": 7.616103840055289e-08, "loss": 0.2357, "step": 20938 }, { "epoch": 0.96, "grad_norm": 0.5692708035998152, "learning_rate": 7.597786098645477e-08, "loss": 0.2935, "step": 20939 }, { "epoch": 0.96, "grad_norm": 0.352655623743412, "learning_rate": 7.579490328064265e-08, "loss": 0.2406, "step": 20940 }, { "epoch": 0.96, "grad_norm": 0.32831862225873626, "learning_rate": 7.561216528716552e-08, "loss": 0.1976, "step": 20941 }, { "epoch": 0.96, "grad_norm": 0.45159428126693574, "learning_rate": 7.542964701007016e-08, "loss": 0.3274, "step": 20942 }, { "epoch": 0.96, "grad_norm": 0.43178168512532805, "learning_rate": 7.524734845339665e-08, "loss": 0.2711, "step": 20943 }, { "epoch": 0.96, "grad_norm": 0.5852231991827269, "learning_rate": 7.506526962118176e-08, "loss": 0.3279, "step": 20944 }, { "epoch": 0.96, "grad_norm": 0.4507357410168006, "learning_rate": 7.488341051745562e-08, "loss": 0.2432, "step": 20945 }, { "epoch": 0.96, "grad_norm": 0.4184224805417246, "learning_rate": 7.4701771146245e-08, "loss": 0.2667, "step": 20946 }, { "epoch": 0.96, "grad_norm": 0.3766177038063524, "learning_rate": 7.45203515115711e-08, "loss": 0.2642, "step": 20947 }, { "epoch": 0.96, "grad_norm": 0.6011207543980479, "learning_rate": 7.433915161744965e-08, "loss": 0.2736, "step": 20948 }, { "epoch": 0.96, "grad_norm": 0.2577723024016978, "learning_rate": 7.415817146789406e-08, "loss": 0.1898, "step": 20949 }, { "epoch": 0.96, "grad_norm": 0.42736346259781277, "learning_rate": 7.397741106690892e-08, "loss": 0.2865, "step": 20950 }, { "epoch": 0.96, "grad_norm": 1.247096702231234, "learning_rate": 7.37968704184977e-08, "loss": 0.2411, "step": 20951 }, { "epoch": 0.96, "grad_norm": 0.337915982871162, "learning_rate": 7.361654952665608e-08, "loss": 0.2453, "step": 20952 }, { "epoch": 0.96, "grad_norm": 0.8992801223892667, "learning_rate": 7.343644839537756e-08, "loss": 0.5287, "step": 20953 }, { "epoch": 0.96, "grad_norm": 0.2781801729042686, "learning_rate": 7.325656702864891e-08, "loss": 0.2157, "step": 20954 }, { "epoch": 0.96, "grad_norm": 0.2656787111445052, "learning_rate": 7.307690543045142e-08, "loss": 0.2038, "step": 20955 }, { "epoch": 0.96, "grad_norm": 1.5594629924756103, "learning_rate": 7.289746360476524e-08, "loss": 0.5222, "step": 20956 }, { "epoch": 0.96, "grad_norm": 0.40075593671668047, "learning_rate": 7.27182415555594e-08, "loss": 0.2695, "step": 20957 }, { "epoch": 0.96, "grad_norm": 0.31043194877996777, "learning_rate": 7.253923928680406e-08, "loss": 0.1916, "step": 20958 }, { "epoch": 0.96, "grad_norm": 0.9304813055901388, "learning_rate": 7.236045680246273e-08, "loss": 0.4644, "step": 20959 }, { "epoch": 0.96, "grad_norm": 0.4475450213943127, "learning_rate": 7.218189410649113e-08, "loss": 0.2743, "step": 20960 }, { "epoch": 0.96, "grad_norm": 0.248892337187839, "learning_rate": 7.200355120284496e-08, "loss": 0.1286, "step": 20961 }, { "epoch": 0.96, "grad_norm": 0.3290205961407414, "learning_rate": 7.182542809547111e-08, "loss": 0.2765, "step": 20962 }, { "epoch": 0.96, "grad_norm": 1.4834556045454674, "learning_rate": 7.164752478831305e-08, "loss": 0.4459, "step": 20963 }, { "epoch": 0.96, "grad_norm": 0.36448259271028577, "learning_rate": 7.146984128530988e-08, "loss": 0.1746, "step": 20964 }, { "epoch": 0.96, "grad_norm": 0.3546452947096683, "learning_rate": 7.129237759039509e-08, "loss": 0.2771, "step": 20965 }, { "epoch": 0.96, "grad_norm": 0.4902707925376102, "learning_rate": 7.11151337074989e-08, "loss": 0.3311, "step": 20966 }, { "epoch": 0.96, "grad_norm": 0.18624699967589212, "learning_rate": 7.09381096405426e-08, "loss": 0.1202, "step": 20967 }, { "epoch": 0.96, "grad_norm": 0.7822412910398101, "learning_rate": 7.07613053934475e-08, "loss": 0.3361, "step": 20968 }, { "epoch": 0.96, "grad_norm": 0.38006838628332645, "learning_rate": 7.058472097012715e-08, "loss": 0.295, "step": 20969 }, { "epoch": 0.96, "grad_norm": 0.44913289921966887, "learning_rate": 7.040835637449062e-08, "loss": 0.2884, "step": 20970 }, { "epoch": 0.96, "grad_norm": 0.4890102212725251, "learning_rate": 7.023221161044258e-08, "loss": 0.2486, "step": 20971 }, { "epoch": 0.96, "grad_norm": 0.8703076335245795, "learning_rate": 7.005628668188325e-08, "loss": 0.4948, "step": 20972 }, { "epoch": 0.96, "grad_norm": 0.2659242523458978, "learning_rate": 6.988058159270727e-08, "loss": 0.2025, "step": 20973 }, { "epoch": 0.96, "grad_norm": 0.280910965525807, "learning_rate": 6.970509634680378e-08, "loss": 0.1779, "step": 20974 }, { "epoch": 0.96, "grad_norm": 1.2799547202824724, "learning_rate": 6.952983094805965e-08, "loss": 0.4506, "step": 20975 }, { "epoch": 0.96, "grad_norm": 0.5272729290816845, "learning_rate": 6.9354785400354e-08, "loss": 0.2912, "step": 20976 }, { "epoch": 0.96, "grad_norm": 0.32819716796095183, "learning_rate": 6.917995970756153e-08, "loss": 0.2413, "step": 20977 }, { "epoch": 0.96, "grad_norm": 0.4979588026306972, "learning_rate": 6.900535387355245e-08, "loss": 0.3412, "step": 20978 }, { "epoch": 0.96, "grad_norm": 0.7149091934708959, "learning_rate": 6.883096790219479e-08, "loss": 0.297, "step": 20979 }, { "epoch": 0.96, "grad_norm": 0.24804894744224407, "learning_rate": 6.865680179734657e-08, "loss": 0.1513, "step": 20980 }, { "epoch": 0.96, "grad_norm": 0.3804917484073137, "learning_rate": 6.848285556286583e-08, "loss": 0.2787, "step": 20981 }, { "epoch": 0.96, "grad_norm": 0.6138913461661227, "learning_rate": 6.830912920260169e-08, "loss": 0.3419, "step": 20982 }, { "epoch": 0.96, "grad_norm": 0.4368497983663204, "learning_rate": 6.813562272040109e-08, "loss": 0.3011, "step": 20983 }, { "epoch": 0.96, "grad_norm": 1.6674525531911986, "learning_rate": 6.796233612010539e-08, "loss": 0.3966, "step": 20984 }, { "epoch": 0.96, "grad_norm": 0.29825239211327864, "learning_rate": 6.778926940555152e-08, "loss": 0.2255, "step": 20985 }, { "epoch": 0.96, "grad_norm": 0.25089060112955863, "learning_rate": 6.761642258056977e-08, "loss": 0.1946, "step": 20986 }, { "epoch": 0.96, "grad_norm": 0.7977865478637034, "learning_rate": 6.744379564898818e-08, "loss": 0.241, "step": 20987 }, { "epoch": 0.96, "grad_norm": 0.5537487225069052, "learning_rate": 6.727138861462812e-08, "loss": 0.2903, "step": 20988 }, { "epoch": 0.96, "grad_norm": 0.33226320495919426, "learning_rate": 6.709920148130544e-08, "loss": 0.2975, "step": 20989 }, { "epoch": 0.96, "grad_norm": 0.4723067324242132, "learning_rate": 6.692723425283265e-08, "loss": 0.272, "step": 20990 }, { "epoch": 0.96, "grad_norm": 0.4785276005933155, "learning_rate": 6.675548693301781e-08, "loss": 0.2775, "step": 20991 }, { "epoch": 0.96, "grad_norm": 0.24439470465992322, "learning_rate": 6.658395952566233e-08, "loss": 0.1536, "step": 20992 }, { "epoch": 0.96, "grad_norm": 0.35119293766639925, "learning_rate": 6.641265203456537e-08, "loss": 0.2416, "step": 20993 }, { "epoch": 0.96, "grad_norm": 0.5679376007196011, "learning_rate": 6.624156446351615e-08, "loss": 0.2873, "step": 20994 }, { "epoch": 0.96, "grad_norm": 0.4345223423672163, "learning_rate": 6.607069681630606e-08, "loss": 0.316, "step": 20995 }, { "epoch": 0.96, "grad_norm": 1.4845227169624138, "learning_rate": 6.590004909671543e-08, "loss": 0.5733, "step": 20996 }, { "epoch": 0.96, "grad_norm": 0.3168726207894635, "learning_rate": 6.572962130852345e-08, "loss": 0.2189, "step": 20997 }, { "epoch": 0.96, "grad_norm": 0.24651593406613984, "learning_rate": 6.555941345550265e-08, "loss": 0.1982, "step": 20998 }, { "epoch": 0.96, "grad_norm": 0.6599167224065907, "learning_rate": 6.538942554142114e-08, "loss": 0.3512, "step": 20999 }, { "epoch": 0.96, "grad_norm": 0.6367149066849355, "learning_rate": 6.521965757004367e-08, "loss": 0.1027, "step": 21000 }, { "epoch": 0.96, "grad_norm": 0.2635131481922569, "learning_rate": 6.505010954512725e-08, "loss": 0.2701, "step": 21001 }, { "epoch": 0.96, "grad_norm": 1.3268981156118795, "learning_rate": 6.488078147042554e-08, "loss": 0.7576, "step": 21002 }, { "epoch": 0.96, "grad_norm": 0.7807833963395009, "learning_rate": 6.471167334968887e-08, "loss": 0.1341, "step": 21003 }, { "epoch": 0.96, "grad_norm": 0.3098628571783303, "learning_rate": 6.454278518665869e-08, "loss": 0.2544, "step": 21004 }, { "epoch": 0.96, "grad_norm": 0.35385541120747066, "learning_rate": 6.437411698507645e-08, "loss": 0.2901, "step": 21005 }, { "epoch": 0.96, "grad_norm": 0.33579937195910586, "learning_rate": 6.420566874867363e-08, "loss": 0.1197, "step": 21006 }, { "epoch": 0.97, "grad_norm": 0.4216155896744583, "learning_rate": 6.403744048118277e-08, "loss": 0.3068, "step": 21007 }, { "epoch": 0.97, "grad_norm": 1.374009464580873, "learning_rate": 6.386943218632535e-08, "loss": 0.717, "step": 21008 }, { "epoch": 0.97, "grad_norm": 0.341526128496463, "learning_rate": 6.370164386782285e-08, "loss": 0.2525, "step": 21009 }, { "epoch": 0.97, "grad_norm": 0.3493405122156894, "learning_rate": 6.353407552938895e-08, "loss": 0.1892, "step": 21010 }, { "epoch": 0.97, "grad_norm": 0.44740823313160427, "learning_rate": 6.336672717473402e-08, "loss": 0.2717, "step": 21011 }, { "epoch": 0.97, "grad_norm": 0.3375390911257085, "learning_rate": 6.319959880756176e-08, "loss": 0.1778, "step": 21012 }, { "epoch": 0.97, "grad_norm": 0.25874487195947016, "learning_rate": 6.303269043157367e-08, "loss": 0.2233, "step": 21013 }, { "epoch": 0.97, "grad_norm": 1.3159685635081946, "learning_rate": 6.286600205046566e-08, "loss": 0.7655, "step": 21014 }, { "epoch": 0.97, "grad_norm": 0.9693206785714636, "learning_rate": 6.269953366792481e-08, "loss": 0.4191, "step": 21015 }, { "epoch": 0.97, "grad_norm": 0.40390567134190547, "learning_rate": 6.253328528764035e-08, "loss": 0.2137, "step": 21016 }, { "epoch": 0.97, "grad_norm": 0.2918628400038654, "learning_rate": 6.236725691329049e-08, "loss": 0.2478, "step": 21017 }, { "epoch": 0.97, "grad_norm": 0.5166313739900902, "learning_rate": 6.220144854855115e-08, "loss": 0.2892, "step": 21018 }, { "epoch": 0.97, "grad_norm": 0.3111552586826631, "learning_rate": 6.203586019709384e-08, "loss": 0.2033, "step": 21019 }, { "epoch": 0.97, "grad_norm": 1.5071738096138139, "learning_rate": 6.187049186258453e-08, "loss": 0.5232, "step": 21020 }, { "epoch": 0.97, "grad_norm": 0.3414389570798249, "learning_rate": 6.170534354868251e-08, "loss": 0.2651, "step": 21021 }, { "epoch": 0.97, "grad_norm": 0.34882043652653827, "learning_rate": 6.154041525904708e-08, "loss": 0.2677, "step": 21022 }, { "epoch": 0.97, "grad_norm": 1.0199661157709752, "learning_rate": 6.137570699732753e-08, "loss": 0.2477, "step": 21023 }, { "epoch": 0.97, "grad_norm": 0.24240787157595187, "learning_rate": 6.121121876717206e-08, "loss": 0.1939, "step": 21024 }, { "epoch": 0.97, "grad_norm": 0.2972542099972891, "learning_rate": 6.104695057221887e-08, "loss": 0.2688, "step": 21025 }, { "epoch": 0.97, "grad_norm": 2.0054562355267063, "learning_rate": 6.08829024161084e-08, "loss": 0.3024, "step": 21026 }, { "epoch": 0.97, "grad_norm": 0.5329130865929097, "learning_rate": 6.071907430247104e-08, "loss": 0.3458, "step": 21027 }, { "epoch": 0.97, "grad_norm": 0.3941837523435308, "learning_rate": 6.055546623493392e-08, "loss": 0.2537, "step": 21028 }, { "epoch": 0.97, "grad_norm": 0.39080321529263906, "learning_rate": 6.039207821711856e-08, "loss": 0.2499, "step": 21029 }, { "epoch": 0.97, "grad_norm": 0.2668253796144861, "learning_rate": 6.02289102526421e-08, "loss": 0.1661, "step": 21030 }, { "epoch": 0.97, "grad_norm": 0.40433685724306456, "learning_rate": 6.00659623451183e-08, "loss": 0.2613, "step": 21031 }, { "epoch": 0.97, "grad_norm": 0.5072479338361667, "learning_rate": 5.990323449815316e-08, "loss": 0.2696, "step": 21032 }, { "epoch": 0.97, "grad_norm": 0.4795865715266076, "learning_rate": 5.974072671535047e-08, "loss": 0.2961, "step": 21033 }, { "epoch": 0.97, "grad_norm": 0.34712187506857195, "learning_rate": 5.957843900030735e-08, "loss": 0.2529, "step": 21034 }, { "epoch": 0.97, "grad_norm": 1.1667171278496133, "learning_rate": 5.9416371356617596e-08, "loss": 0.3876, "step": 21035 }, { "epoch": 0.97, "grad_norm": 0.28197657818009786, "learning_rate": 5.925452378786833e-08, "loss": 0.1996, "step": 21036 }, { "epoch": 0.97, "grad_norm": 0.3411686794312948, "learning_rate": 5.9092896297642254e-08, "loss": 0.2629, "step": 21037 }, { "epoch": 0.97, "grad_norm": 0.4868318614874262, "learning_rate": 5.893148888951872e-08, "loss": 0.2403, "step": 21038 }, { "epoch": 0.97, "grad_norm": 0.7566832572607907, "learning_rate": 5.877030156707042e-08, "loss": 0.2779, "step": 21039 }, { "epoch": 0.97, "grad_norm": 0.3273158831965462, "learning_rate": 5.860933433386673e-08, "loss": 0.237, "step": 21040 }, { "epoch": 0.97, "grad_norm": 0.3665577847433682, "learning_rate": 5.844858719347035e-08, "loss": 0.2869, "step": 21041 }, { "epoch": 0.97, "grad_norm": 0.9720960958138958, "learning_rate": 5.828806014943955e-08, "loss": 0.2239, "step": 21042 }, { "epoch": 0.97, "grad_norm": 0.37827517985385767, "learning_rate": 5.8127753205330375e-08, "loss": 0.2607, "step": 21043 }, { "epoch": 0.97, "grad_norm": 0.48589045401312925, "learning_rate": 5.7967666364689975e-08, "loss": 0.306, "step": 21044 }, { "epoch": 0.97, "grad_norm": 0.2989415029962626, "learning_rate": 5.7807799631064423e-08, "loss": 0.2698, "step": 21045 }, { "epoch": 0.97, "grad_norm": 0.3265293889674919, "learning_rate": 5.764815300798976e-08, "loss": 0.1909, "step": 21046 }, { "epoch": 0.97, "grad_norm": 1.400948657107216, "learning_rate": 5.748872649900428e-08, "loss": 0.4168, "step": 21047 }, { "epoch": 0.97, "grad_norm": 0.3708611335060279, "learning_rate": 5.732952010763515e-08, "loss": 0.3267, "step": 21048 }, { "epoch": 0.97, "grad_norm": 0.3068513815828977, "learning_rate": 5.717053383740734e-08, "loss": 0.1803, "step": 21049 }, { "epoch": 0.97, "grad_norm": 0.7669352683266277, "learning_rate": 5.701176769184025e-08, "loss": 0.4029, "step": 21050 }, { "epoch": 0.97, "grad_norm": 0.33158510907614885, "learning_rate": 5.685322167444995e-08, "loss": 0.2158, "step": 21051 }, { "epoch": 0.97, "grad_norm": 0.28984673113048603, "learning_rate": 5.6694895788746984e-08, "loss": 0.1793, "step": 21052 }, { "epoch": 0.97, "grad_norm": 0.3820019687630575, "learning_rate": 5.653679003823409e-08, "loss": 0.2729, "step": 21053 }, { "epoch": 0.97, "grad_norm": 1.313660678012032, "learning_rate": 5.637890442641403e-08, "loss": 0.5951, "step": 21054 }, { "epoch": 0.97, "grad_norm": 0.34318564712964045, "learning_rate": 5.6221238956780664e-08, "loss": 0.2093, "step": 21055 }, { "epoch": 0.97, "grad_norm": 0.4321675398302973, "learning_rate": 5.6063793632825655e-08, "loss": 0.3034, "step": 21056 }, { "epoch": 0.97, "grad_norm": 0.3628331983910679, "learning_rate": 5.590656845803399e-08, "loss": 0.2551, "step": 21057 }, { "epoch": 0.97, "grad_norm": 0.28415210544096003, "learning_rate": 5.574956343588622e-08, "loss": 0.2061, "step": 21058 }, { "epoch": 0.97, "grad_norm": 1.6048733845177607, "learning_rate": 5.559277856985845e-08, "loss": 0.1641, "step": 21059 }, { "epoch": 0.97, "grad_norm": 0.37301445085082735, "learning_rate": 5.543621386342346e-08, "loss": 0.327, "step": 21060 }, { "epoch": 0.97, "grad_norm": 0.3589962709287775, "learning_rate": 5.527986932004403e-08, "loss": 0.2463, "step": 21061 }, { "epoch": 0.97, "grad_norm": 0.6080953459712184, "learning_rate": 5.512374494318518e-08, "loss": 0.2731, "step": 21062 }, { "epoch": 0.97, "grad_norm": 0.27833664189840807, "learning_rate": 5.4967840736300795e-08, "loss": 0.1709, "step": 21063 }, { "epoch": 0.97, "grad_norm": 0.42373087746989385, "learning_rate": 5.481215670284368e-08, "loss": 0.2677, "step": 21064 }, { "epoch": 0.97, "grad_norm": 0.3402161081578707, "learning_rate": 5.465669284625996e-08, "loss": 0.2266, "step": 21065 }, { "epoch": 0.97, "grad_norm": 0.8005976894015006, "learning_rate": 5.450144916999134e-08, "loss": 0.4147, "step": 21066 }, { "epoch": 0.97, "grad_norm": 0.4197180773698471, "learning_rate": 5.434642567747506e-08, "loss": 0.2587, "step": 21067 }, { "epoch": 0.97, "grad_norm": 0.2897076669049023, "learning_rate": 5.4191622372143924e-08, "loss": 0.2419, "step": 21068 }, { "epoch": 0.97, "grad_norm": 1.464334737225091, "learning_rate": 5.4037039257422986e-08, "loss": 0.5636, "step": 21069 }, { "epoch": 0.97, "grad_norm": 0.22004676736744955, "learning_rate": 5.388267633673727e-08, "loss": 0.1504, "step": 21070 }, { "epoch": 0.97, "grad_norm": 0.6864883978978314, "learning_rate": 5.3728533613502944e-08, "loss": 0.3482, "step": 21071 }, { "epoch": 0.97, "grad_norm": 0.3435431529233863, "learning_rate": 5.357461109113171e-08, "loss": 0.2479, "step": 21072 }, { "epoch": 0.97, "grad_norm": 0.3503505072199705, "learning_rate": 5.342090877303196e-08, "loss": 0.2685, "step": 21073 }, { "epoch": 0.97, "grad_norm": 0.7985746599691683, "learning_rate": 5.326742666260765e-08, "loss": 0.4346, "step": 21074 }, { "epoch": 0.97, "grad_norm": 0.6667805766036662, "learning_rate": 5.3114164763254925e-08, "loss": 0.1023, "step": 21075 }, { "epoch": 0.97, "grad_norm": 0.2719418761550178, "learning_rate": 5.296112307836776e-08, "loss": 0.2028, "step": 21076 }, { "epoch": 0.97, "grad_norm": 0.36642683951161004, "learning_rate": 5.280830161133455e-08, "loss": 0.2793, "step": 21077 }, { "epoch": 0.97, "grad_norm": 0.7236212952436392, "learning_rate": 5.265570036553813e-08, "loss": 0.271, "step": 21078 }, { "epoch": 0.97, "grad_norm": 0.3584295670886505, "learning_rate": 5.2503319344356926e-08, "loss": 0.2666, "step": 21079 }, { "epoch": 0.97, "grad_norm": 0.5130197882624794, "learning_rate": 5.23511585511649e-08, "loss": 0.3477, "step": 21080 }, { "epoch": 0.97, "grad_norm": 0.46114344980404376, "learning_rate": 5.2199217989330475e-08, "loss": 0.1741, "step": 21081 }, { "epoch": 0.97, "grad_norm": 0.37840433132689727, "learning_rate": 5.2047497662217617e-08, "loss": 0.1985, "step": 21082 }, { "epoch": 0.97, "grad_norm": 0.43870601246326696, "learning_rate": 5.1895997573185865e-08, "loss": 0.3148, "step": 21083 }, { "epoch": 0.97, "grad_norm": 0.35122471261549876, "learning_rate": 5.1744717725588087e-08, "loss": 0.2862, "step": 21084 }, { "epoch": 0.97, "grad_norm": 0.4105564272989338, "learning_rate": 5.1593658122773835e-08, "loss": 0.1968, "step": 21085 }, { "epoch": 0.97, "grad_norm": 0.5400570320109478, "learning_rate": 5.14428187680871e-08, "loss": 0.3592, "step": 21086 }, { "epoch": 0.97, "grad_norm": 1.5205512720960765, "learning_rate": 5.1292199664868534e-08, "loss": 0.4631, "step": 21087 }, { "epoch": 0.97, "grad_norm": 0.2523982014319497, "learning_rate": 5.1141800816452144e-08, "loss": 0.1766, "step": 21088 }, { "epoch": 0.97, "grad_norm": 0.3003613142168817, "learning_rate": 5.0991622226167494e-08, "loss": 0.2387, "step": 21089 }, { "epoch": 0.97, "grad_norm": 0.637288497901785, "learning_rate": 5.084166389733858e-08, "loss": 0.3764, "step": 21090 }, { "epoch": 0.97, "grad_norm": 0.5670730657497081, "learning_rate": 5.069192583328719e-08, "loss": 0.1959, "step": 21091 }, { "epoch": 0.97, "grad_norm": 0.310162056162934, "learning_rate": 5.054240803732624e-08, "loss": 0.2871, "step": 21092 }, { "epoch": 0.97, "grad_norm": 1.6757636151313895, "learning_rate": 5.039311051276752e-08, "loss": 0.5598, "step": 21093 }, { "epoch": 0.97, "grad_norm": 0.24018036370079948, "learning_rate": 5.024403326291505e-08, "loss": 0.1567, "step": 21094 }, { "epoch": 0.97, "grad_norm": 0.4163847925276713, "learning_rate": 5.009517629107063e-08, "loss": 0.2125, "step": 21095 }, { "epoch": 0.97, "grad_norm": 0.33646559433904505, "learning_rate": 4.994653960052942e-08, "loss": 0.2993, "step": 21096 }, { "epoch": 0.97, "grad_norm": 0.4266638964812246, "learning_rate": 4.9798123194580994e-08, "loss": 0.232, "step": 21097 }, { "epoch": 0.97, "grad_norm": 0.49100924805239876, "learning_rate": 4.964992707651273e-08, "loss": 0.2565, "step": 21098 }, { "epoch": 0.97, "grad_norm": 1.5217569695239344, "learning_rate": 4.9501951249605326e-08, "loss": 0.5678, "step": 21099 }, { "epoch": 0.97, "grad_norm": 0.3495297658278421, "learning_rate": 4.935419571713285e-08, "loss": 0.2794, "step": 21100 }, { "epoch": 0.97, "grad_norm": 0.35212144113344696, "learning_rate": 4.920666048236933e-08, "loss": 0.1941, "step": 21101 }, { "epoch": 0.97, "grad_norm": 0.5149780279089359, "learning_rate": 4.905934554857772e-08, "loss": 0.2497, "step": 21102 }, { "epoch": 0.97, "grad_norm": 0.8744784321572094, "learning_rate": 4.8912250919023186e-08, "loss": 0.4955, "step": 21103 }, { "epoch": 0.97, "grad_norm": 0.2528673517505503, "learning_rate": 4.876537659695979e-08, "loss": 0.223, "step": 21104 }, { "epoch": 0.97, "grad_norm": 1.3046576319012568, "learning_rate": 4.861872258564049e-08, "loss": 0.4883, "step": 21105 }, { "epoch": 0.97, "grad_norm": 0.6646035749145004, "learning_rate": 4.847228888831046e-08, "loss": 0.3031, "step": 21106 }, { "epoch": 0.97, "grad_norm": 0.41056298233446153, "learning_rate": 4.832607550821267e-08, "loss": 0.2456, "step": 21107 }, { "epoch": 0.97, "grad_norm": 0.29751743505254297, "learning_rate": 4.818008244858452e-08, "loss": 0.2363, "step": 21108 }, { "epoch": 0.97, "grad_norm": 0.3575051636913753, "learning_rate": 4.803430971265677e-08, "loss": 0.2404, "step": 21109 }, { "epoch": 0.97, "grad_norm": 0.39003777610185875, "learning_rate": 4.788875730365905e-08, "loss": 0.3075, "step": 21110 }, { "epoch": 0.97, "grad_norm": 1.6747443999028828, "learning_rate": 4.7743425224811014e-08, "loss": 0.2265, "step": 21111 }, { "epoch": 0.97, "grad_norm": 0.33630030558678903, "learning_rate": 4.759831347933119e-08, "loss": 0.2692, "step": 21112 }, { "epoch": 0.97, "grad_norm": 0.42058105460767614, "learning_rate": 4.7453422070433685e-08, "loss": 0.3031, "step": 21113 }, { "epoch": 0.97, "grad_norm": 0.3516580337872266, "learning_rate": 4.730875100132481e-08, "loss": 0.1551, "step": 21114 }, { "epoch": 0.97, "grad_norm": 0.39433957713335116, "learning_rate": 4.7164300275206465e-08, "loss": 0.2714, "step": 21115 }, { "epoch": 0.97, "grad_norm": 0.372107570831673, "learning_rate": 4.702006989527941e-08, "loss": 0.2982, "step": 21116 }, { "epoch": 0.97, "grad_norm": 0.4974362338379551, "learning_rate": 4.6876059864734425e-08, "loss": 0.2352, "step": 21117 }, { "epoch": 0.97, "grad_norm": 0.5936583397743813, "learning_rate": 4.673227018676119e-08, "loss": 0.3264, "step": 21118 }, { "epoch": 0.97, "grad_norm": 0.44054090911972454, "learning_rate": 4.658870086454048e-08, "loss": 0.2843, "step": 21119 }, { "epoch": 0.97, "grad_norm": 0.300082661884542, "learning_rate": 4.644535190125421e-08, "loss": 0.2005, "step": 21120 }, { "epoch": 0.97, "grad_norm": 0.8772850497603905, "learning_rate": 4.630222330007428e-08, "loss": 0.5063, "step": 21121 }, { "epoch": 0.97, "grad_norm": 0.2724254784174644, "learning_rate": 4.615931506417038e-08, "loss": 0.211, "step": 21122 }, { "epoch": 0.97, "grad_norm": 0.5464751653214102, "learning_rate": 4.601662719670441e-08, "loss": 0.2861, "step": 21123 }, { "epoch": 0.97, "grad_norm": 0.4067677371090619, "learning_rate": 4.58741597008372e-08, "loss": 0.2482, "step": 21124 }, { "epoch": 0.97, "grad_norm": 0.37787775723618844, "learning_rate": 4.573191257972176e-08, "loss": 0.2652, "step": 21125 }, { "epoch": 0.97, "grad_norm": 0.5389939986745881, "learning_rate": 4.558988583650781e-08, "loss": 0.2313, "step": 21126 }, { "epoch": 0.97, "grad_norm": 0.4176353016003204, "learning_rate": 4.54480794743406e-08, "loss": 0.2142, "step": 21127 }, { "epoch": 0.97, "grad_norm": 0.2341460087765664, "learning_rate": 4.530649349635763e-08, "loss": 0.2295, "step": 21128 }, { "epoch": 0.97, "grad_norm": 0.9263619864837682, "learning_rate": 4.516512790569416e-08, "loss": 0.4546, "step": 21129 }, { "epoch": 0.97, "grad_norm": 0.5625177551348991, "learning_rate": 4.502398270548103e-08, "loss": 0.2255, "step": 21130 }, { "epoch": 0.97, "grad_norm": 0.4962413215534032, "learning_rate": 4.488305789884129e-08, "loss": 0.2871, "step": 21131 }, { "epoch": 0.97, "grad_norm": 0.36205985313862665, "learning_rate": 4.474235348889577e-08, "loss": 0.2843, "step": 21132 }, { "epoch": 0.97, "grad_norm": 0.36900435914783886, "learning_rate": 4.460186947876088e-08, "loss": 0.2187, "step": 21133 }, { "epoch": 0.97, "grad_norm": 0.3125631010406034, "learning_rate": 4.4461605871544136e-08, "loss": 0.2238, "step": 21134 }, { "epoch": 0.97, "grad_norm": 0.33763838686029524, "learning_rate": 4.4321562670353036e-08, "loss": 0.2512, "step": 21135 }, { "epoch": 0.97, "grad_norm": 0.44480054999027174, "learning_rate": 4.4181739878286224e-08, "loss": 0.278, "step": 21136 }, { "epoch": 0.97, "grad_norm": 0.4207981674636647, "learning_rate": 4.404213749844011e-08, "loss": 0.1966, "step": 21137 }, { "epoch": 0.97, "grad_norm": 1.390059346075542, "learning_rate": 4.390275553390555e-08, "loss": 0.5865, "step": 21138 }, { "epoch": 0.97, "grad_norm": 0.5447445076689306, "learning_rate": 4.3763593987768974e-08, "loss": 0.4026, "step": 21139 }, { "epoch": 0.97, "grad_norm": 0.2568867802394545, "learning_rate": 4.3624652863110126e-08, "loss": 0.2052, "step": 21140 }, { "epoch": 0.97, "grad_norm": 0.4258187132900209, "learning_rate": 4.348593216300545e-08, "loss": 0.2674, "step": 21141 }, { "epoch": 0.97, "grad_norm": 0.6210853711131512, "learning_rate": 4.334743189052581e-08, "loss": 0.2759, "step": 21142 }, { "epoch": 0.97, "grad_norm": 0.48781223201105295, "learning_rate": 4.3209152048737656e-08, "loss": 0.1972, "step": 21143 }, { "epoch": 0.97, "grad_norm": 0.37373762865719584, "learning_rate": 4.307109264070297e-08, "loss": 0.2859, "step": 21144 }, { "epoch": 0.97, "grad_norm": 1.2006803936275638, "learning_rate": 4.2933253669477096e-08, "loss": 0.7454, "step": 21145 }, { "epoch": 0.97, "grad_norm": 0.3392050791420737, "learning_rate": 4.2795635138112025e-08, "loss": 0.2111, "step": 21146 }, { "epoch": 0.97, "grad_norm": 0.45404522575058237, "learning_rate": 4.2658237049655325e-08, "loss": 0.326, "step": 21147 }, { "epoch": 0.97, "grad_norm": 0.25112403325781696, "learning_rate": 4.25210594071479e-08, "loss": 0.1764, "step": 21148 }, { "epoch": 0.97, "grad_norm": 0.3820738855619782, "learning_rate": 4.238410221362621e-08, "loss": 0.293, "step": 21149 }, { "epoch": 0.97, "grad_norm": 1.2899239708395975, "learning_rate": 4.224736547212449e-08, "loss": 0.3577, "step": 21150 }, { "epoch": 0.97, "grad_norm": 0.33520807674430275, "learning_rate": 4.21108491856681e-08, "loss": 0.2906, "step": 21151 }, { "epoch": 0.97, "grad_norm": 0.7828227675197496, "learning_rate": 4.1974553357281287e-08, "loss": 0.2488, "step": 21152 }, { "epoch": 0.97, "grad_norm": 0.6763536946890087, "learning_rate": 4.183847798997831e-08, "loss": 0.2712, "step": 21153 }, { "epoch": 0.97, "grad_norm": 0.3755502869864424, "learning_rate": 4.170262308677453e-08, "loss": 0.1857, "step": 21154 }, { "epoch": 0.97, "grad_norm": 0.36775123688393413, "learning_rate": 4.156698865067643e-08, "loss": 0.2712, "step": 21155 }, { "epoch": 0.97, "grad_norm": 0.35837980960294924, "learning_rate": 4.143157468468717e-08, "loss": 0.2368, "step": 21156 }, { "epoch": 0.97, "grad_norm": 0.7352457177494476, "learning_rate": 4.1296381191805456e-08, "loss": 0.3569, "step": 21157 }, { "epoch": 0.97, "grad_norm": 0.3447528485542165, "learning_rate": 4.116140817502223e-08, "loss": 0.2508, "step": 21158 }, { "epoch": 0.97, "grad_norm": 0.4069240183607295, "learning_rate": 4.102665563732844e-08, "loss": 0.2374, "step": 21159 }, { "epoch": 0.97, "grad_norm": 0.28272824598592494, "learning_rate": 4.089212358170502e-08, "loss": 0.1264, "step": 21160 }, { "epoch": 0.97, "grad_norm": 0.3336567824170479, "learning_rate": 4.0757812011131826e-08, "loss": 0.2527, "step": 21161 }, { "epoch": 0.97, "grad_norm": 1.3354131508591367, "learning_rate": 4.062372092858091e-08, "loss": 0.5973, "step": 21162 }, { "epoch": 0.97, "grad_norm": 0.3500555340706462, "learning_rate": 4.048985033702213e-08, "loss": 0.2553, "step": 21163 }, { "epoch": 0.97, "grad_norm": 0.3649962646381245, "learning_rate": 4.035620023941978e-08, "loss": 0.2658, "step": 21164 }, { "epoch": 0.97, "grad_norm": 1.0019637059957476, "learning_rate": 4.022277063873037e-08, "loss": 0.5109, "step": 21165 }, { "epoch": 0.97, "grad_norm": 0.2827467731650862, "learning_rate": 4.0089561537910436e-08, "loss": 0.0766, "step": 21166 }, { "epoch": 0.97, "grad_norm": 0.33517833406336506, "learning_rate": 3.995657293990762e-08, "loss": 0.2441, "step": 21167 }, { "epoch": 0.97, "grad_norm": 0.43954007001851886, "learning_rate": 3.9823804847667345e-08, "loss": 0.3241, "step": 21168 }, { "epoch": 0.97, "grad_norm": 0.6139278212217922, "learning_rate": 3.969125726412837e-08, "loss": 0.2553, "step": 21169 }, { "epoch": 0.97, "grad_norm": 0.3895024509949226, "learning_rate": 3.955893019222501e-08, "loss": 0.267, "step": 21170 }, { "epoch": 0.97, "grad_norm": 0.5150541960225514, "learning_rate": 3.9426823634887146e-08, "loss": 0.3739, "step": 21171 }, { "epoch": 0.97, "grad_norm": 0.2733817085484941, "learning_rate": 3.9294937595038e-08, "loss": 0.1971, "step": 21172 }, { "epoch": 0.97, "grad_norm": 0.3278815056491724, "learning_rate": 3.916327207559967e-08, "loss": 0.1849, "step": 21173 }, { "epoch": 0.97, "grad_norm": 0.7441019052134975, "learning_rate": 3.9031827079486494e-08, "loss": 0.3174, "step": 21174 }, { "epoch": 0.97, "grad_norm": 0.38421909331348925, "learning_rate": 3.890060260960726e-08, "loss": 0.2779, "step": 21175 }, { "epoch": 0.97, "grad_norm": 0.3377984463415079, "learning_rate": 3.8769598668868533e-08, "loss": 0.2178, "step": 21176 }, { "epoch": 0.97, "grad_norm": 1.3234114712323035, "learning_rate": 3.8638815260170216e-08, "loss": 0.5345, "step": 21177 }, { "epoch": 0.97, "grad_norm": 0.3171097486719009, "learning_rate": 3.8508252386407766e-08, "loss": 0.2007, "step": 21178 }, { "epoch": 0.97, "grad_norm": 0.2586488596845841, "learning_rate": 3.837791005047109e-08, "loss": 0.1942, "step": 21179 }, { "epoch": 0.97, "grad_norm": 0.5537220405789994, "learning_rate": 3.824778825524678e-08, "loss": 0.3234, "step": 21180 }, { "epoch": 0.97, "grad_norm": 0.6891975327188988, "learning_rate": 3.811788700361474e-08, "loss": 0.376, "step": 21181 }, { "epoch": 0.97, "grad_norm": 0.3167959515814778, "learning_rate": 3.798820629845157e-08, "loss": 0.2016, "step": 21182 }, { "epoch": 0.97, "grad_norm": 0.49363761044188875, "learning_rate": 3.78587461426283e-08, "loss": 0.3204, "step": 21183 }, { "epoch": 0.97, "grad_norm": 0.4076611109048864, "learning_rate": 3.7729506539009306e-08, "loss": 0.2507, "step": 21184 }, { "epoch": 0.97, "grad_norm": 0.300317878697749, "learning_rate": 3.760048749045897e-08, "loss": 0.2218, "step": 21185 }, { "epoch": 0.97, "grad_norm": 0.6442524984432371, "learning_rate": 3.747168899983167e-08, "loss": 0.2419, "step": 21186 }, { "epoch": 0.97, "grad_norm": 0.36976103359497225, "learning_rate": 3.734311106997845e-08, "loss": 0.3158, "step": 21187 }, { "epoch": 0.97, "grad_norm": 0.7121765990205521, "learning_rate": 3.721475370374705e-08, "loss": 0.3614, "step": 21188 }, { "epoch": 0.97, "grad_norm": 0.41234097706890954, "learning_rate": 3.7086616903978525e-08, "loss": 0.2316, "step": 21189 }, { "epoch": 0.97, "grad_norm": 0.3098329020265625, "learning_rate": 3.6958700673510596e-08, "loss": 0.2373, "step": 21190 }, { "epoch": 0.97, "grad_norm": 0.4378150333579222, "learning_rate": 3.6831005015173224e-08, "loss": 0.2596, "step": 21191 }, { "epoch": 0.97, "grad_norm": 0.36293096165001487, "learning_rate": 3.6703529931796376e-08, "loss": 0.2564, "step": 21192 }, { "epoch": 0.97, "grad_norm": 1.8038479033501196, "learning_rate": 3.6576275426200014e-08, "loss": 0.6379, "step": 21193 }, { "epoch": 0.97, "grad_norm": 0.4256762242256026, "learning_rate": 3.644924150120188e-08, "loss": 0.2625, "step": 21194 }, { "epoch": 0.97, "grad_norm": 0.32622133401488496, "learning_rate": 3.632242815961418e-08, "loss": 0.2301, "step": 21195 }, { "epoch": 0.97, "grad_norm": 2.1332287721141627, "learning_rate": 3.619583540424465e-08, "loss": 0.4592, "step": 21196 }, { "epoch": 0.97, "grad_norm": 0.29067740770146955, "learning_rate": 3.606946323789662e-08, "loss": 0.2099, "step": 21197 }, { "epoch": 0.97, "grad_norm": 0.5377624650099606, "learning_rate": 3.594331166336784e-08, "loss": 0.3049, "step": 21198 }, { "epoch": 0.97, "grad_norm": 0.3454137926577501, "learning_rate": 3.5817380683450534e-08, "loss": 0.2017, "step": 21199 }, { "epoch": 0.97, "grad_norm": 0.35488055391906154, "learning_rate": 3.5691670300932456e-08, "loss": 0.282, "step": 21200 }, { "epoch": 0.97, "grad_norm": 1.3948825295376965, "learning_rate": 3.5566180518595836e-08, "loss": 0.6282, "step": 21201 }, { "epoch": 0.97, "grad_norm": 0.46387491837264816, "learning_rate": 3.544091133922179e-08, "loss": 0.2331, "step": 21202 }, { "epoch": 0.97, "grad_norm": 0.3284127838592469, "learning_rate": 3.531586276558141e-08, "loss": 0.2464, "step": 21203 }, { "epoch": 0.97, "grad_norm": 0.47830274508794735, "learning_rate": 3.5191034800444724e-08, "loss": 0.3185, "step": 21204 }, { "epoch": 0.97, "grad_norm": 0.1926771375287304, "learning_rate": 3.506642744657285e-08, "loss": 0.1135, "step": 21205 }, { "epoch": 0.97, "grad_norm": 0.6683990935179004, "learning_rate": 3.494204070672691e-08, "loss": 0.3456, "step": 21206 }, { "epoch": 0.97, "grad_norm": 0.27475559463820753, "learning_rate": 3.481787458365915e-08, "loss": 0.2658, "step": 21207 }, { "epoch": 0.97, "grad_norm": 0.7368775766741292, "learning_rate": 3.4693929080119596e-08, "loss": 0.2639, "step": 21208 }, { "epoch": 0.97, "grad_norm": 0.6157885787519518, "learning_rate": 3.457020419885049e-08, "loss": 0.2942, "step": 21209 }, { "epoch": 0.97, "grad_norm": 0.3236562004900343, "learning_rate": 3.4446699942594083e-08, "loss": 0.208, "step": 21210 }, { "epoch": 0.97, "grad_norm": 0.286559086850683, "learning_rate": 3.432341631408154e-08, "loss": 0.2536, "step": 21211 }, { "epoch": 0.97, "grad_norm": 0.3407040510631902, "learning_rate": 3.4200353316043986e-08, "loss": 0.1861, "step": 21212 }, { "epoch": 0.97, "grad_norm": 0.6476043268069748, "learning_rate": 3.407751095120593e-08, "loss": 0.3231, "step": 21213 }, { "epoch": 0.97, "grad_norm": 1.2731813404021057, "learning_rate": 3.395488922228518e-08, "loss": 0.3844, "step": 21214 }, { "epoch": 0.97, "grad_norm": 0.2839452090941616, "learning_rate": 3.383248813199846e-08, "loss": 0.2135, "step": 21215 }, { "epoch": 0.97, "grad_norm": 0.5440037389550196, "learning_rate": 3.371030768305583e-08, "loss": 0.3183, "step": 21216 }, { "epoch": 0.97, "grad_norm": 0.4545933964056159, "learning_rate": 3.358834787816068e-08, "loss": 0.2573, "step": 21217 }, { "epoch": 0.97, "grad_norm": 0.24017430964761644, "learning_rate": 3.346660872001306e-08, "loss": 0.1449, "step": 21218 }, { "epoch": 0.97, "grad_norm": 0.31539219481897435, "learning_rate": 3.3345090211309714e-08, "loss": 0.275, "step": 21219 }, { "epoch": 0.97, "grad_norm": 0.8003366768286112, "learning_rate": 3.32237923547396e-08, "loss": 0.3542, "step": 21220 }, { "epoch": 0.97, "grad_norm": 0.4013846819438035, "learning_rate": 3.3102715152989464e-08, "loss": 0.1926, "step": 21221 }, { "epoch": 0.97, "grad_norm": 0.7209293263188188, "learning_rate": 3.298185860873826e-08, "loss": 0.3139, "step": 21222 }, { "epoch": 0.97, "grad_norm": 0.2787475070774912, "learning_rate": 3.286122272466164e-08, "loss": 0.2492, "step": 21223 }, { "epoch": 0.98, "grad_norm": 0.9342032863628137, "learning_rate": 3.2740807503433e-08, "loss": 0.5369, "step": 21224 }, { "epoch": 0.98, "grad_norm": 0.29178973945427933, "learning_rate": 3.262061294771468e-08, "loss": 0.1788, "step": 21225 }, { "epoch": 0.98, "grad_norm": 0.4512308396879576, "learning_rate": 3.250063906017009e-08, "loss": 0.3065, "step": 21226 }, { "epoch": 0.98, "grad_norm": 0.43534664975249715, "learning_rate": 3.238088584345489e-08, "loss": 0.2576, "step": 21227 }, { "epoch": 0.98, "grad_norm": 0.4004113421969863, "learning_rate": 3.226135330021918e-08, "loss": 0.2008, "step": 21228 }, { "epoch": 0.98, "grad_norm": 1.4152930772629513, "learning_rate": 3.2142041433109725e-08, "loss": 0.6204, "step": 21229 }, { "epoch": 0.98, "grad_norm": 0.32284866501451115, "learning_rate": 3.202295024476887e-08, "loss": 0.2474, "step": 21230 }, { "epoch": 0.98, "grad_norm": 0.25706249011883076, "learning_rate": 3.190407973783338e-08, "loss": 0.195, "step": 21231 }, { "epoch": 0.98, "grad_norm": 0.6353603765712579, "learning_rate": 3.178542991493339e-08, "loss": 0.3619, "step": 21232 }, { "epoch": 0.98, "grad_norm": 0.8455389317000258, "learning_rate": 3.166700077869678e-08, "loss": 0.2639, "step": 21233 }, { "epoch": 0.98, "grad_norm": 0.37674966122253767, "learning_rate": 3.1548792331744795e-08, "loss": 0.2167, "step": 21234 }, { "epoch": 0.98, "grad_norm": 0.35140975981692507, "learning_rate": 3.143080457669423e-08, "loss": 0.2987, "step": 21235 }, { "epoch": 0.98, "grad_norm": 0.5871168381309729, "learning_rate": 3.131303751615855e-08, "loss": 0.3149, "step": 21236 }, { "epoch": 0.98, "grad_norm": 0.4197770304680542, "learning_rate": 3.119549115274456e-08, "loss": 0.2847, "step": 21237 }, { "epoch": 0.98, "grad_norm": 0.29249004434999737, "learning_rate": 3.107816548905462e-08, "loss": 0.1914, "step": 21238 }, { "epoch": 0.98, "grad_norm": 0.3872772296648083, "learning_rate": 3.0961060527685546e-08, "loss": 0.2602, "step": 21239 }, { "epoch": 0.98, "grad_norm": 0.5092018362602075, "learning_rate": 3.084417627122971e-08, "loss": 0.2948, "step": 21240 }, { "epoch": 0.98, "grad_norm": 1.433082363358442, "learning_rate": 3.0727512722276143e-08, "loss": 0.3518, "step": 21241 }, { "epoch": 0.98, "grad_norm": 0.6410559524884702, "learning_rate": 3.061106988340612e-08, "loss": 0.3204, "step": 21242 }, { "epoch": 0.98, "grad_norm": 0.24684011441846793, "learning_rate": 3.04948477571998e-08, "loss": 0.258, "step": 21243 }, { "epoch": 0.98, "grad_norm": 0.33928708251979944, "learning_rate": 3.0378846346227345e-08, "loss": 0.142, "step": 21244 }, { "epoch": 0.98, "grad_norm": 1.0384358074695823, "learning_rate": 3.0263065653058923e-08, "loss": 0.329, "step": 21245 }, { "epoch": 0.98, "grad_norm": 0.42327810529431803, "learning_rate": 3.014750568025804e-08, "loss": 0.2726, "step": 21246 }, { "epoch": 0.98, "grad_norm": 0.3616440827123659, "learning_rate": 3.003216643038154e-08, "loss": 0.2597, "step": 21247 }, { "epoch": 0.98, "grad_norm": 0.6133712102811347, "learning_rate": 2.9917047905982934e-08, "loss": 0.3668, "step": 21248 }, { "epoch": 0.98, "grad_norm": 0.3686513266249708, "learning_rate": 2.9802150109612405e-08, "loss": 0.2865, "step": 21249 }, { "epoch": 0.98, "grad_norm": 0.5102102563509107, "learning_rate": 2.9687473043813476e-08, "loss": 0.2812, "step": 21250 }, { "epoch": 0.98, "grad_norm": 0.24414369689729915, "learning_rate": 2.9573016711124113e-08, "loss": 0.1685, "step": 21251 }, { "epoch": 0.98, "grad_norm": 0.39799343110575863, "learning_rate": 2.945878111407785e-08, "loss": 0.292, "step": 21252 }, { "epoch": 0.98, "grad_norm": 1.368877773107355, "learning_rate": 2.9344766255204883e-08, "loss": 0.8098, "step": 21253 }, { "epoch": 0.98, "grad_norm": 0.37144858207335923, "learning_rate": 2.9230972137028745e-08, "loss": 0.232, "step": 21254 }, { "epoch": 0.98, "grad_norm": 0.33987127005080076, "learning_rate": 2.911739876206965e-08, "loss": 0.2904, "step": 21255 }, { "epoch": 0.98, "grad_norm": 0.5599640184414972, "learning_rate": 2.9004046132840026e-08, "loss": 0.2653, "step": 21256 }, { "epoch": 0.98, "grad_norm": 0.345666156640323, "learning_rate": 2.8890914251851198e-08, "loss": 0.0796, "step": 21257 }, { "epoch": 0.98, "grad_norm": 0.4331338850598414, "learning_rate": 2.8778003121607834e-08, "loss": 0.2831, "step": 21258 }, { "epoch": 0.98, "grad_norm": 0.352709555514618, "learning_rate": 2.866531274460904e-08, "loss": 0.2728, "step": 21259 }, { "epoch": 0.98, "grad_norm": 0.7054333240007781, "learning_rate": 2.8552843123349494e-08, "loss": 0.2726, "step": 21260 }, { "epoch": 0.98, "grad_norm": 0.3412544342856643, "learning_rate": 2.844059426031831e-08, "loss": 0.2668, "step": 21261 }, { "epoch": 0.98, "grad_norm": 0.369696372883487, "learning_rate": 2.8328566158002392e-08, "loss": 0.2716, "step": 21262 }, { "epoch": 0.98, "grad_norm": 0.3521884000407015, "learning_rate": 2.8216758818881972e-08, "loss": 0.1607, "step": 21263 }, { "epoch": 0.98, "grad_norm": 0.3961871718288436, "learning_rate": 2.8105172245430633e-08, "loss": 0.1935, "step": 21264 }, { "epoch": 0.98, "grad_norm": 0.7101404435381233, "learning_rate": 2.799380644012084e-08, "loss": 0.4301, "step": 21265 }, { "epoch": 0.98, "grad_norm": 0.36668904113978323, "learning_rate": 2.7882661405416177e-08, "loss": 0.3166, "step": 21266 }, { "epoch": 0.98, "grad_norm": 0.31766907604596556, "learning_rate": 2.777173714377801e-08, "loss": 0.2089, "step": 21267 }, { "epoch": 0.98, "grad_norm": 1.6691790317194544, "learning_rate": 2.766103365766215e-08, "loss": 0.5932, "step": 21268 }, { "epoch": 0.98, "grad_norm": 0.34161704165660167, "learning_rate": 2.7550550949519972e-08, "loss": 0.1624, "step": 21269 }, { "epoch": 0.98, "grad_norm": 0.26600573706564357, "learning_rate": 2.7440289021797293e-08, "loss": 0.2051, "step": 21270 }, { "epoch": 0.98, "grad_norm": 0.5092789110746891, "learning_rate": 2.733024787693439e-08, "loss": 0.3228, "step": 21271 }, { "epoch": 0.98, "grad_norm": 0.8702057125583568, "learning_rate": 2.7220427517368196e-08, "loss": 0.4356, "step": 21272 }, { "epoch": 0.98, "grad_norm": 0.30987222007073606, "learning_rate": 2.7110827945530106e-08, "loss": 0.1815, "step": 21273 }, { "epoch": 0.98, "grad_norm": 0.4007343467849004, "learning_rate": 2.700144916384595e-08, "loss": 0.3157, "step": 21274 }, { "epoch": 0.98, "grad_norm": 0.3431377247540255, "learning_rate": 2.6892291174737127e-08, "loss": 0.2036, "step": 21275 }, { "epoch": 0.98, "grad_norm": 0.3866486044637885, "learning_rate": 2.6783353980621705e-08, "loss": 0.3035, "step": 21276 }, { "epoch": 0.98, "grad_norm": 0.660129926345083, "learning_rate": 2.667463758390998e-08, "loss": 0.2863, "step": 21277 }, { "epoch": 0.98, "grad_norm": 0.386075677318688, "learning_rate": 2.656614198701002e-08, "loss": 0.3326, "step": 21278 }, { "epoch": 0.98, "grad_norm": 0.36646764704006635, "learning_rate": 2.6457867192322128e-08, "loss": 0.2509, "step": 21279 }, { "epoch": 0.98, "grad_norm": 1.576757556302514, "learning_rate": 2.63498132022455e-08, "loss": 0.2143, "step": 21280 }, { "epoch": 0.98, "grad_norm": 0.2645309400044281, "learning_rate": 2.6241980019170445e-08, "loss": 0.1436, "step": 21281 }, { "epoch": 0.98, "grad_norm": 0.33933324455144787, "learning_rate": 2.613436764548505e-08, "loss": 0.2398, "step": 21282 }, { "epoch": 0.98, "grad_norm": 0.3655010655720267, "learning_rate": 2.6026976083572963e-08, "loss": 0.2619, "step": 21283 }, { "epoch": 0.98, "grad_norm": 1.2599414169956256, "learning_rate": 2.5919805335810067e-08, "loss": 0.7693, "step": 21284 }, { "epoch": 0.98, "grad_norm": 0.34157161480711806, "learning_rate": 2.5812855404568903e-08, "loss": 0.2623, "step": 21285 }, { "epoch": 0.98, "grad_norm": 0.5565150788690916, "learning_rate": 2.570612629221758e-08, "loss": 0.3704, "step": 21286 }, { "epoch": 0.98, "grad_norm": 0.30421787885274804, "learning_rate": 2.5599618001120874e-08, "loss": 0.1587, "step": 21287 }, { "epoch": 0.98, "grad_norm": 0.35501860426510085, "learning_rate": 2.5493330533633564e-08, "loss": 0.2582, "step": 21288 }, { "epoch": 0.98, "grad_norm": 0.8412439799572042, "learning_rate": 2.5387263892111546e-08, "loss": 0.3905, "step": 21289 }, { "epoch": 0.98, "grad_norm": 0.33771670088479894, "learning_rate": 2.5281418078900723e-08, "loss": 0.2689, "step": 21290 }, { "epoch": 0.98, "grad_norm": 0.4398475008834995, "learning_rate": 2.517579309634588e-08, "loss": 0.2757, "step": 21291 }, { "epoch": 0.98, "grad_norm": 0.6290943742878116, "learning_rate": 2.507038894678626e-08, "loss": 0.3374, "step": 21292 }, { "epoch": 0.98, "grad_norm": 0.4795959606786148, "learning_rate": 2.4965205632553334e-08, "loss": 0.2541, "step": 21293 }, { "epoch": 0.98, "grad_norm": 0.42918830067401353, "learning_rate": 2.486024315597635e-08, "loss": 0.2524, "step": 21294 }, { "epoch": 0.98, "grad_norm": 0.25327089427839544, "learning_rate": 2.475550151938011e-08, "loss": 0.2173, "step": 21295 }, { "epoch": 0.98, "grad_norm": 1.080946765224079, "learning_rate": 2.4650980725082762e-08, "loss": 0.5248, "step": 21296 }, { "epoch": 0.98, "grad_norm": 0.35778627677436703, "learning_rate": 2.4546680775398013e-08, "loss": 0.2706, "step": 21297 }, { "epoch": 0.98, "grad_norm": 0.41466058702632164, "learning_rate": 2.4442601672635125e-08, "loss": 0.2884, "step": 21298 }, { "epoch": 0.98, "grad_norm": 0.9727301430201135, "learning_rate": 2.433874341909892e-08, "loss": 0.4358, "step": 21299 }, { "epoch": 0.98, "grad_norm": 0.33762389645408836, "learning_rate": 2.4235106017087562e-08, "loss": 0.2125, "step": 21300 }, { "epoch": 0.98, "grad_norm": 0.5579403047485948, "learning_rate": 2.413168946889699e-08, "loss": 0.3124, "step": 21301 }, { "epoch": 0.98, "grad_norm": 0.3902427560940118, "learning_rate": 2.4028493776815375e-08, "loss": 0.3282, "step": 21302 }, { "epoch": 0.98, "grad_norm": 0.23493729017604723, "learning_rate": 2.3925518943128667e-08, "loss": 0.1577, "step": 21303 }, { "epoch": 0.98, "grad_norm": 1.6799045193712885, "learning_rate": 2.3822764970115042e-08, "loss": 0.5659, "step": 21304 }, { "epoch": 0.98, "grad_norm": 0.6026877435219661, "learning_rate": 2.3720231860051567e-08, "loss": 0.3542, "step": 21305 }, { "epoch": 0.98, "grad_norm": 0.25011324802411417, "learning_rate": 2.361791961520532e-08, "loss": 0.2201, "step": 21306 }, { "epoch": 0.98, "grad_norm": 0.39371531913698155, "learning_rate": 2.3515828237843376e-08, "loss": 0.1782, "step": 21307 }, { "epoch": 0.98, "grad_norm": 0.4849964366831416, "learning_rate": 2.3413957730226144e-08, "loss": 0.3395, "step": 21308 }, { "epoch": 0.98, "grad_norm": 0.35801688402775805, "learning_rate": 2.3312308094607382e-08, "loss": 0.1732, "step": 21309 }, { "epoch": 0.98, "grad_norm": 0.34919518761617685, "learning_rate": 2.321087933323973e-08, "loss": 0.2932, "step": 21310 }, { "epoch": 0.98, "grad_norm": 0.5854497949209628, "learning_rate": 2.3109671448366955e-08, "loss": 0.3206, "step": 21311 }, { "epoch": 0.98, "grad_norm": 0.5375385914320893, "learning_rate": 2.300868444222948e-08, "loss": 0.2343, "step": 21312 }, { "epoch": 0.98, "grad_norm": 0.38273048188306785, "learning_rate": 2.2907918317064403e-08, "loss": 0.2294, "step": 21313 }, { "epoch": 0.98, "grad_norm": 0.3647377467137911, "learning_rate": 2.280737307510217e-08, "loss": 0.2905, "step": 21314 }, { "epoch": 0.98, "grad_norm": 0.23397169590163214, "learning_rate": 2.270704871856877e-08, "loss": 0.1551, "step": 21315 }, { "epoch": 0.98, "grad_norm": 0.4052270864039506, "learning_rate": 2.2606945249684654e-08, "loss": 0.2434, "step": 21316 }, { "epoch": 0.98, "grad_norm": 0.7502293236598188, "learning_rate": 2.2507062670665826e-08, "loss": 0.4108, "step": 21317 }, { "epoch": 0.98, "grad_norm": 0.29471859520888355, "learning_rate": 2.240740098372496e-08, "loss": 0.2658, "step": 21318 }, { "epoch": 0.98, "grad_norm": 0.6622238365736859, "learning_rate": 2.2307960191066956e-08, "loss": 0.262, "step": 21319 }, { "epoch": 0.98, "grad_norm": 0.45151838436898933, "learning_rate": 2.2208740294895613e-08, "loss": 0.2679, "step": 21320 }, { "epoch": 0.98, "grad_norm": 0.3652121867557897, "learning_rate": 2.210974129740473e-08, "loss": 0.226, "step": 21321 }, { "epoch": 0.98, "grad_norm": 0.2983816854341923, "learning_rate": 2.2010963200786995e-08, "loss": 0.2409, "step": 21322 }, { "epoch": 0.98, "grad_norm": 0.6172571065155857, "learning_rate": 2.191240600723066e-08, "loss": 0.3492, "step": 21323 }, { "epoch": 0.98, "grad_norm": 0.5041019598930978, "learning_rate": 2.1814069718916198e-08, "loss": 0.2589, "step": 21324 }, { "epoch": 0.98, "grad_norm": 0.5411510835184253, "learning_rate": 2.171595433802187e-08, "loss": 0.3529, "step": 21325 }, { "epoch": 0.98, "grad_norm": 0.3723726555736365, "learning_rate": 2.1618059866718166e-08, "loss": 0.2617, "step": 21326 }, { "epoch": 0.98, "grad_norm": 0.29151194509133554, "learning_rate": 2.1520386307173346e-08, "loss": 0.2004, "step": 21327 }, { "epoch": 0.98, "grad_norm": 0.47669243734525635, "learning_rate": 2.1422933661550127e-08, "loss": 0.2957, "step": 21328 }, { "epoch": 0.98, "grad_norm": 0.2995718036469097, "learning_rate": 2.132570193200567e-08, "loss": 0.201, "step": 21329 }, { "epoch": 0.98, "grad_norm": 0.693968674488601, "learning_rate": 2.12286911206927e-08, "loss": 0.3785, "step": 21330 }, { "epoch": 0.98, "grad_norm": 0.4376728374311516, "learning_rate": 2.113190122975839e-08, "loss": 0.2915, "step": 21331 }, { "epoch": 0.98, "grad_norm": 1.5141101124695313, "learning_rate": 2.1035332261346576e-08, "loss": 0.3555, "step": 21332 }, { "epoch": 0.98, "grad_norm": 0.6060788249286279, "learning_rate": 2.0938984217594437e-08, "loss": 0.2501, "step": 21333 }, { "epoch": 0.98, "grad_norm": 0.2679824033350691, "learning_rate": 2.0842857100635826e-08, "loss": 0.2615, "step": 21334 }, { "epoch": 0.98, "grad_norm": 0.25513368066484793, "learning_rate": 2.074695091259793e-08, "loss": 0.1117, "step": 21335 }, { "epoch": 0.98, "grad_norm": 0.7425642097530736, "learning_rate": 2.065126565560349e-08, "loss": 0.3308, "step": 21336 }, { "epoch": 0.98, "grad_norm": 0.4401652797310039, "learning_rate": 2.055580133177304e-08, "loss": 0.3431, "step": 21337 }, { "epoch": 0.98, "grad_norm": 0.37529100286738093, "learning_rate": 2.046055794321822e-08, "loss": 0.2973, "step": 21338 }, { "epoch": 0.98, "grad_norm": 0.4132268392744198, "learning_rate": 2.036553549204845e-08, "loss": 0.1987, "step": 21339 }, { "epoch": 0.98, "grad_norm": 0.48773075506266494, "learning_rate": 2.0270733980366496e-08, "loss": 0.3223, "step": 21340 }, { "epoch": 0.98, "grad_norm": 0.31417343644405044, "learning_rate": 2.0176153410272902e-08, "loss": 0.2068, "step": 21341 }, { "epoch": 0.98, "grad_norm": 0.3143277721599033, "learning_rate": 2.0081793783860437e-08, "loss": 0.1904, "step": 21342 }, { "epoch": 0.98, "grad_norm": 0.6360589443059183, "learning_rate": 1.998765510321743e-08, "loss": 0.3334, "step": 21343 }, { "epoch": 0.98, "grad_norm": 0.7081486553529889, "learning_rate": 1.989373737042999e-08, "loss": 0.4229, "step": 21344 }, { "epoch": 0.98, "grad_norm": 0.4160069480029801, "learning_rate": 1.9800040587575342e-08, "loss": 0.2287, "step": 21345 }, { "epoch": 0.98, "grad_norm": 0.3482785764092505, "learning_rate": 1.9706564756729606e-08, "loss": 0.2749, "step": 21346 }, { "epoch": 0.98, "grad_norm": 0.2824763869286202, "learning_rate": 1.961330987996113e-08, "loss": 0.1727, "step": 21347 }, { "epoch": 0.98, "grad_norm": 0.6970283086935132, "learning_rate": 1.9520275959334922e-08, "loss": 0.0961, "step": 21348 }, { "epoch": 0.98, "grad_norm": 0.3944109513640567, "learning_rate": 1.9427462996910452e-08, "loss": 0.3157, "step": 21349 }, { "epoch": 0.98, "grad_norm": 0.38949912529158587, "learning_rate": 1.933487099474163e-08, "loss": 0.299, "step": 21350 }, { "epoch": 0.98, "grad_norm": 0.6082916501084299, "learning_rate": 1.9242499954880145e-08, "loss": 0.3167, "step": 21351 }, { "epoch": 0.98, "grad_norm": 0.48454505816411697, "learning_rate": 1.9150349879369924e-08, "loss": 0.2059, "step": 21352 }, { "epoch": 0.98, "grad_norm": 0.29144940894457944, "learning_rate": 1.905842077025155e-08, "loss": 0.1739, "step": 21353 }, { "epoch": 0.98, "grad_norm": 0.34396326961043594, "learning_rate": 1.896671262955896e-08, "loss": 0.2637, "step": 21354 }, { "epoch": 0.98, "grad_norm": 0.34922781290675475, "learning_rate": 1.8875225459323853e-08, "loss": 0.2214, "step": 21355 }, { "epoch": 0.98, "grad_norm": 0.6407810511251222, "learning_rate": 1.878395926157239e-08, "loss": 0.3867, "step": 21356 }, { "epoch": 0.98, "grad_norm": 0.33454042850774107, "learning_rate": 1.869291403832407e-08, "loss": 0.283, "step": 21357 }, { "epoch": 0.98, "grad_norm": 0.3473279800208587, "learning_rate": 1.8602089791592836e-08, "loss": 0.2078, "step": 21358 }, { "epoch": 0.98, "grad_norm": 0.38243798884197344, "learning_rate": 1.851148652339263e-08, "loss": 0.1612, "step": 21359 }, { "epoch": 0.98, "grad_norm": 0.7445926408566705, "learning_rate": 1.8421104235727406e-08, "loss": 0.3188, "step": 21360 }, { "epoch": 0.98, "grad_norm": 0.39312976958267143, "learning_rate": 1.8330942930598894e-08, "loss": 0.236, "step": 21361 }, { "epoch": 0.98, "grad_norm": 0.3290475215844878, "learning_rate": 1.8241002610002167e-08, "loss": 0.2869, "step": 21362 }, { "epoch": 0.98, "grad_norm": 0.5714816575137822, "learning_rate": 1.8151283275928966e-08, "loss": 0.3136, "step": 21363 }, { "epoch": 0.98, "grad_norm": 0.43796923289986767, "learning_rate": 1.8061784930366587e-08, "loss": 0.3078, "step": 21364 }, { "epoch": 0.98, "grad_norm": 0.3846413864364781, "learning_rate": 1.7972507575294564e-08, "loss": 0.2088, "step": 21365 }, { "epoch": 0.98, "grad_norm": 0.47570284698234705, "learning_rate": 1.7883451212691307e-08, "loss": 0.2092, "step": 21366 }, { "epoch": 0.98, "grad_norm": 0.34079647160835025, "learning_rate": 1.779461584452746e-08, "loss": 0.2509, "step": 21367 }, { "epoch": 0.98, "grad_norm": 0.5943221061808651, "learning_rate": 1.770600147276924e-08, "loss": 0.2921, "step": 21368 }, { "epoch": 0.98, "grad_norm": 0.3490005787653116, "learning_rate": 1.7617608099379514e-08, "loss": 0.2619, "step": 21369 }, { "epoch": 0.98, "grad_norm": 0.37053821908122403, "learning_rate": 1.7529435726315602e-08, "loss": 0.2889, "step": 21370 }, { "epoch": 0.98, "grad_norm": 0.5093449288495477, "learning_rate": 1.744148435552706e-08, "loss": 0.0847, "step": 21371 }, { "epoch": 0.98, "grad_norm": 0.486502360872377, "learning_rate": 1.735375398896344e-08, "loss": 0.231, "step": 21372 }, { "epoch": 0.98, "grad_norm": 0.3036772053791677, "learning_rate": 1.726624462856652e-08, "loss": 0.2491, "step": 21373 }, { "epoch": 0.98, "grad_norm": 0.4851969622693287, "learning_rate": 1.717895627627364e-08, "loss": 0.2716, "step": 21374 }, { "epoch": 0.98, "grad_norm": 0.7859446706697011, "learning_rate": 1.7091888934016586e-08, "loss": 0.4836, "step": 21375 }, { "epoch": 0.98, "grad_norm": 0.37740454108999655, "learning_rate": 1.700504260372382e-08, "loss": 0.2426, "step": 21376 }, { "epoch": 0.98, "grad_norm": 0.5894058889389044, "learning_rate": 1.6918417287318245e-08, "loss": 0.3395, "step": 21377 }, { "epoch": 0.98, "grad_norm": 0.2459647796658079, "learning_rate": 1.6832012986716107e-08, "loss": 0.1589, "step": 21378 }, { "epoch": 0.98, "grad_norm": 0.371035775795198, "learning_rate": 1.6745829703831428e-08, "loss": 0.252, "step": 21379 }, { "epoch": 0.98, "grad_norm": 0.793475639671936, "learning_rate": 1.6659867440572685e-08, "loss": 0.4412, "step": 21380 }, { "epoch": 0.98, "grad_norm": 0.3010231957959712, "learning_rate": 1.657412619884169e-08, "loss": 0.2326, "step": 21381 }, { "epoch": 0.98, "grad_norm": 0.38469722305541704, "learning_rate": 1.648860598053803e-08, "loss": 0.2659, "step": 21382 }, { "epoch": 0.98, "grad_norm": 0.6916044277506731, "learning_rate": 1.640330678755464e-08, "loss": 0.2333, "step": 21383 }, { "epoch": 0.98, "grad_norm": 0.46728579785011565, "learning_rate": 1.63182286217789e-08, "loss": 0.2, "step": 21384 }, { "epoch": 0.98, "grad_norm": 0.27332715095222404, "learning_rate": 1.6233371485094852e-08, "loss": 0.2584, "step": 21385 }, { "epoch": 0.98, "grad_norm": 0.4842132021336142, "learning_rate": 1.614873537938211e-08, "loss": 0.3485, "step": 21386 }, { "epoch": 0.98, "grad_norm": 1.7473790827464588, "learning_rate": 1.6064320306513616e-08, "loss": 0.3474, "step": 21387 }, { "epoch": 0.98, "grad_norm": 0.3254348512201076, "learning_rate": 1.5980126268358985e-08, "loss": 0.2546, "step": 21388 }, { "epoch": 0.98, "grad_norm": 0.5258563295234254, "learning_rate": 1.5896153266781177e-08, "loss": 0.3133, "step": 21389 }, { "epoch": 0.98, "grad_norm": 0.4458996195858037, "learning_rate": 1.5812401303639813e-08, "loss": 0.3132, "step": 21390 }, { "epoch": 0.98, "grad_norm": 0.26577259944799275, "learning_rate": 1.5728870380788962e-08, "loss": 0.1733, "step": 21391 }, { "epoch": 0.98, "grad_norm": 0.43053350481897545, "learning_rate": 1.5645560500078262e-08, "loss": 0.2519, "step": 21392 }, { "epoch": 0.98, "grad_norm": 0.3925129341697558, "learning_rate": 1.5562471663351787e-08, "loss": 0.3136, "step": 21393 }, { "epoch": 0.98, "grad_norm": 0.3201601637457968, "learning_rate": 1.5479603872448068e-08, "loss": 0.178, "step": 21394 }, { "epoch": 0.98, "grad_norm": 0.9346199355606215, "learning_rate": 1.5396957129203416e-08, "loss": 0.4417, "step": 21395 }, { "epoch": 0.98, "grad_norm": 0.4884474059913416, "learning_rate": 1.5314531435447477e-08, "loss": 0.3097, "step": 21396 }, { "epoch": 0.98, "grad_norm": 0.3044605236767771, "learning_rate": 1.523232679300324e-08, "loss": 0.1816, "step": 21397 }, { "epoch": 0.98, "grad_norm": 0.336287815575612, "learning_rate": 1.5150343203692574e-08, "loss": 0.2229, "step": 21398 }, { "epoch": 0.98, "grad_norm": 0.5316219945471311, "learning_rate": 1.506858066932959e-08, "loss": 0.2703, "step": 21399 }, { "epoch": 0.98, "grad_norm": 0.3183849877125431, "learning_rate": 1.498703919172506e-08, "loss": 0.1817, "step": 21400 }, { "epoch": 0.98, "grad_norm": 0.3691607112251648, "learning_rate": 1.49057187726831e-08, "loss": 0.2758, "step": 21401 }, { "epoch": 0.98, "grad_norm": 0.7430593630501673, "learning_rate": 1.4824619414005592e-08, "loss": 0.3655, "step": 21402 }, { "epoch": 0.98, "grad_norm": 0.3206136959706364, "learning_rate": 1.4743741117486666e-08, "loss": 0.2218, "step": 21403 }, { "epoch": 0.98, "grad_norm": 0.2972440402852377, "learning_rate": 1.4663083884917107e-08, "loss": 0.1671, "step": 21404 }, { "epoch": 0.98, "grad_norm": 0.2659607812568832, "learning_rate": 1.4582647718083264e-08, "loss": 0.2268, "step": 21405 }, { "epoch": 0.98, "grad_norm": 0.3621797524273526, "learning_rate": 1.4502432618765938e-08, "loss": 0.2392, "step": 21406 }, { "epoch": 0.98, "grad_norm": 0.9116437916921559, "learning_rate": 1.4422438588740372e-08, "loss": 0.2356, "step": 21407 }, { "epoch": 0.98, "grad_norm": 0.8701648671885002, "learning_rate": 1.4342665629777375e-08, "loss": 0.3714, "step": 21408 }, { "epoch": 0.98, "grad_norm": 0.2885970029428302, "learning_rate": 1.4263113743643308e-08, "loss": 0.2598, "step": 21409 }, { "epoch": 0.98, "grad_norm": 0.47969700176231017, "learning_rate": 1.4183782932100099e-08, "loss": 0.2451, "step": 21410 }, { "epoch": 0.98, "grad_norm": 0.29292654065527207, "learning_rate": 1.4104673196903007e-08, "loss": 0.183, "step": 21411 }, { "epoch": 0.98, "grad_norm": 0.3320630433037302, "learning_rate": 1.4025784539803966e-08, "loss": 0.2691, "step": 21412 }, { "epoch": 0.98, "grad_norm": 0.36344725918382764, "learning_rate": 1.3947116962549356e-08, "loss": 0.2915, "step": 21413 }, { "epoch": 0.98, "grad_norm": 0.8431029062533919, "learning_rate": 1.3868670466882229e-08, "loss": 0.2808, "step": 21414 }, { "epoch": 0.98, "grad_norm": 0.43288094741148736, "learning_rate": 1.379044505453675e-08, "loss": 0.2651, "step": 21415 }, { "epoch": 0.98, "grad_norm": 0.6125725358015277, "learning_rate": 1.371244072724598e-08, "loss": 0.407, "step": 21416 }, { "epoch": 0.98, "grad_norm": 0.2243136041331308, "learning_rate": 1.3634657486737424e-08, "loss": 0.1797, "step": 21417 }, { "epoch": 0.98, "grad_norm": 0.435768294323353, "learning_rate": 1.3557095334731928e-08, "loss": 0.2798, "step": 21418 }, { "epoch": 0.98, "grad_norm": 0.44151959667755075, "learning_rate": 1.347975427294701e-08, "loss": 0.323, "step": 21419 }, { "epoch": 0.98, "grad_norm": 1.4135068496302063, "learning_rate": 1.340263430309574e-08, "loss": 0.1857, "step": 21420 }, { "epoch": 0.98, "grad_norm": 0.28211412407628406, "learning_rate": 1.3325735426885644e-08, "loss": 0.2308, "step": 21421 }, { "epoch": 0.98, "grad_norm": 0.5166156718762321, "learning_rate": 1.3249057646017583e-08, "loss": 0.3569, "step": 21422 }, { "epoch": 0.98, "grad_norm": 0.41855496783663165, "learning_rate": 1.3172600962190196e-08, "loss": 0.1448, "step": 21423 }, { "epoch": 0.98, "grad_norm": 0.34279074320989944, "learning_rate": 1.3096365377095466e-08, "loss": 0.2573, "step": 21424 }, { "epoch": 0.98, "grad_norm": 0.3612364547001016, "learning_rate": 1.302035089242204e-08, "loss": 0.3015, "step": 21425 }, { "epoch": 0.98, "grad_norm": 0.41494882316468046, "learning_rate": 1.2944557509853017e-08, "loss": 0.2168, "step": 21426 }, { "epoch": 0.98, "grad_norm": 0.36323578903913956, "learning_rate": 1.2868985231065945e-08, "loss": 0.1842, "step": 21427 }, { "epoch": 0.98, "grad_norm": 1.1827290624131916, "learning_rate": 1.2793634057732817e-08, "loss": 0.6482, "step": 21428 }, { "epoch": 0.98, "grad_norm": 0.39474622838025364, "learning_rate": 1.2718503991523412e-08, "loss": 0.2954, "step": 21429 }, { "epoch": 0.98, "grad_norm": 0.3509030599713413, "learning_rate": 1.2643595034100842e-08, "loss": 0.2057, "step": 21430 }, { "epoch": 0.98, "grad_norm": 0.5418254391523893, "learning_rate": 1.2568907187122669e-08, "loss": 0.271, "step": 21431 }, { "epoch": 0.98, "grad_norm": 0.41381462209647335, "learning_rate": 1.2494440452243128e-08, "loss": 0.206, "step": 21432 }, { "epoch": 0.98, "grad_norm": 0.30476195298695946, "learning_rate": 1.2420194831110899e-08, "loss": 0.1999, "step": 21433 }, { "epoch": 0.98, "grad_norm": 0.5972368383314923, "learning_rate": 1.2346170325368001e-08, "loss": 0.4318, "step": 21434 }, { "epoch": 0.98, "grad_norm": 0.694370966541876, "learning_rate": 1.2272366936656454e-08, "loss": 0.3913, "step": 21435 }, { "epoch": 0.98, "grad_norm": 0.3865850516238881, "learning_rate": 1.2198784666607177e-08, "loss": 0.1931, "step": 21436 }, { "epoch": 0.98, "grad_norm": 0.26412259132432797, "learning_rate": 1.2125423516851087e-08, "loss": 0.2315, "step": 21437 }, { "epoch": 0.98, "grad_norm": 0.640891446762223, "learning_rate": 1.205228348901133e-08, "loss": 0.2151, "step": 21438 }, { "epoch": 0.98, "grad_norm": 0.6677313329424343, "learning_rate": 1.197936458470772e-08, "loss": 0.3154, "step": 21439 }, { "epoch": 0.98, "grad_norm": 0.3170431112606332, "learning_rate": 1.1906666805554523e-08, "loss": 0.2562, "step": 21440 }, { "epoch": 0.98, "grad_norm": 0.466802882961251, "learning_rate": 1.1834190153160452e-08, "loss": 0.3261, "step": 21441 }, { "epoch": 0.99, "grad_norm": 0.4338549633435439, "learning_rate": 1.176193462913089e-08, "loss": 0.2742, "step": 21442 }, { "epoch": 0.99, "grad_norm": 0.39209135191580274, "learning_rate": 1.1689900235065666e-08, "loss": 0.1453, "step": 21443 }, { "epoch": 0.99, "grad_norm": 0.36593671518857845, "learning_rate": 1.1618086972559062e-08, "loss": 0.24, "step": 21444 }, { "epoch": 0.99, "grad_norm": 0.34187874634592147, "learning_rate": 1.1546494843200917e-08, "loss": 0.2653, "step": 21445 }, { "epoch": 0.99, "grad_norm": 0.5127058783860863, "learning_rate": 1.147512384857663e-08, "loss": 0.2753, "step": 21446 }, { "epoch": 0.99, "grad_norm": 0.583544291950978, "learning_rate": 1.1403973990266049e-08, "loss": 0.3324, "step": 21447 }, { "epoch": 0.99, "grad_norm": 0.46383763193807964, "learning_rate": 1.1333045269843467e-08, "loss": 0.2646, "step": 21448 }, { "epoch": 0.99, "grad_norm": 0.31313801674014163, "learning_rate": 1.1262337688880964e-08, "loss": 0.2334, "step": 21449 }, { "epoch": 0.99, "grad_norm": 0.35724312795616076, "learning_rate": 1.1191851248942842e-08, "loss": 0.1528, "step": 21450 }, { "epoch": 0.99, "grad_norm": 0.8234689721889634, "learning_rate": 1.1121585951590074e-08, "loss": 0.4019, "step": 21451 }, { "epoch": 0.99, "grad_norm": 0.36334023472000265, "learning_rate": 1.1051541798378086e-08, "loss": 0.2677, "step": 21452 }, { "epoch": 0.99, "grad_norm": 0.35157013984139307, "learning_rate": 1.0981718790856744e-08, "loss": 0.2595, "step": 21453 }, { "epoch": 0.99, "grad_norm": 0.7744710429800236, "learning_rate": 1.0912116930572592e-08, "loss": 0.2674, "step": 21454 }, { "epoch": 0.99, "grad_norm": 0.4121275045507686, "learning_rate": 1.084273621906773e-08, "loss": 0.2726, "step": 21455 }, { "epoch": 0.99, "grad_norm": 0.22254162498513125, "learning_rate": 1.0773576657875372e-08, "loss": 0.1614, "step": 21456 }, { "epoch": 0.99, "grad_norm": 0.6856359784415711, "learning_rate": 1.0704638248529852e-08, "loss": 0.3856, "step": 21457 }, { "epoch": 0.99, "grad_norm": 0.38923153033621244, "learning_rate": 1.0635920992554393e-08, "loss": 0.302, "step": 21458 }, { "epoch": 0.99, "grad_norm": 0.7749417265166105, "learning_rate": 1.0567424891473332e-08, "loss": 0.3157, "step": 21459 }, { "epoch": 0.99, "grad_norm": 0.40492033841371855, "learning_rate": 1.0499149946801012e-08, "loss": 0.2578, "step": 21460 }, { "epoch": 0.99, "grad_norm": 0.36771311176778, "learning_rate": 1.0431096160048449e-08, "loss": 0.2836, "step": 21461 }, { "epoch": 0.99, "grad_norm": 0.35313126036104814, "learning_rate": 1.0363263532724433e-08, "loss": 0.0852, "step": 21462 }, { "epoch": 0.99, "grad_norm": 0.44407847151800756, "learning_rate": 1.0295652066328877e-08, "loss": 0.2766, "step": 21463 }, { "epoch": 0.99, "grad_norm": 0.5294988892810603, "learning_rate": 1.022826176236058e-08, "loss": 0.2988, "step": 21464 }, { "epoch": 0.99, "grad_norm": 0.35221151794253763, "learning_rate": 1.0161092622309465e-08, "loss": 0.2929, "step": 21465 }, { "epoch": 0.99, "grad_norm": 0.3582539767322185, "learning_rate": 1.009414464766323e-08, "loss": 0.1653, "step": 21466 }, { "epoch": 0.99, "grad_norm": 0.6277326791983803, "learning_rate": 1.0027417839904019e-08, "loss": 0.3302, "step": 21467 }, { "epoch": 0.99, "grad_norm": 0.23630472445010045, "learning_rate": 9.960912200510652e-09, "loss": 0.2118, "step": 21468 }, { "epoch": 0.99, "grad_norm": 0.7891375730060838, "learning_rate": 9.894627730953066e-09, "loss": 0.2411, "step": 21469 }, { "epoch": 0.99, "grad_norm": 0.3667227319722737, "learning_rate": 9.828564432700082e-09, "loss": 0.2638, "step": 21470 }, { "epoch": 0.99, "grad_norm": 0.9885794648831816, "learning_rate": 9.762722307213867e-09, "loss": 0.5114, "step": 21471 }, { "epoch": 0.99, "grad_norm": 0.3194600554178285, "learning_rate": 9.697101355952143e-09, "loss": 0.2245, "step": 21472 }, { "epoch": 0.99, "grad_norm": 0.3994135132179661, "learning_rate": 9.63170158036819e-09, "loss": 0.2796, "step": 21473 }, { "epoch": 0.99, "grad_norm": 0.3496199808465557, "learning_rate": 9.566522981909743e-09, "loss": 0.1525, "step": 21474 }, { "epoch": 0.99, "grad_norm": 0.5022273922304606, "learning_rate": 9.501565562018977e-09, "loss": 0.2201, "step": 21475 }, { "epoch": 0.99, "grad_norm": 0.31959411721682784, "learning_rate": 9.436829322134743e-09, "loss": 0.2661, "step": 21476 }, { "epoch": 0.99, "grad_norm": 0.5379090744882348, "learning_rate": 9.372314263690342e-09, "loss": 0.3833, "step": 21477 }, { "epoch": 0.99, "grad_norm": 0.8755134587203777, "learning_rate": 9.308020388113515e-09, "loss": 0.3975, "step": 21478 }, { "epoch": 0.99, "grad_norm": 0.365790249320034, "learning_rate": 9.243947696828682e-09, "loss": 0.1951, "step": 21479 }, { "epoch": 0.99, "grad_norm": 0.28762831275755524, "learning_rate": 9.180096191253595e-09, "loss": 0.2371, "step": 21480 }, { "epoch": 0.99, "grad_norm": 0.5390754470619237, "learning_rate": 9.116465872800462e-09, "loss": 0.2958, "step": 21481 }, { "epoch": 0.99, "grad_norm": 0.28065442167481003, "learning_rate": 9.053056742880373e-09, "loss": 0.1627, "step": 21482 }, { "epoch": 0.99, "grad_norm": 1.2738478937073574, "learning_rate": 8.98986880289665e-09, "loss": 0.7054, "step": 21483 }, { "epoch": 0.99, "grad_norm": 0.33266014770109253, "learning_rate": 8.926902054247067e-09, "loss": 0.2774, "step": 21484 }, { "epoch": 0.99, "grad_norm": 0.5164999263111189, "learning_rate": 8.86415649832717e-09, "loss": 0.2116, "step": 21485 }, { "epoch": 0.99, "grad_norm": 0.8015601157427507, "learning_rate": 8.80163213652474e-09, "loss": 0.3166, "step": 21486 }, { "epoch": 0.99, "grad_norm": 0.38568232967970706, "learning_rate": 8.739328970224226e-09, "loss": 0.2058, "step": 21487 }, { "epoch": 0.99, "grad_norm": 0.2799198887920922, "learning_rate": 8.677247000805632e-09, "loss": 0.2126, "step": 21488 }, { "epoch": 0.99, "grad_norm": 0.34370398996394924, "learning_rate": 8.615386229642309e-09, "loss": 0.2579, "step": 21489 }, { "epoch": 0.99, "grad_norm": 1.650960231161021, "learning_rate": 8.553746658105377e-09, "loss": 0.6912, "step": 21490 }, { "epoch": 0.99, "grad_norm": 0.35387691082339146, "learning_rate": 8.492328287558194e-09, "loss": 0.2459, "step": 21491 }, { "epoch": 0.99, "grad_norm": 0.3886156113408288, "learning_rate": 8.431131119361891e-09, "loss": 0.2543, "step": 21492 }, { "epoch": 0.99, "grad_norm": 0.8117505151600072, "learning_rate": 8.37015515486872e-09, "loss": 0.3802, "step": 21493 }, { "epoch": 0.99, "grad_norm": 0.34974377887697355, "learning_rate": 8.309400395432043e-09, "loss": 0.2714, "step": 21494 }, { "epoch": 0.99, "grad_norm": 0.2812790479749563, "learning_rate": 8.24886684239412e-09, "loss": 0.1373, "step": 21495 }, { "epoch": 0.99, "grad_norm": 0.25470779800657534, "learning_rate": 8.188554497096101e-09, "loss": 0.205, "step": 21496 }, { "epoch": 0.99, "grad_norm": 0.4186588818496496, "learning_rate": 8.128463360872473e-09, "loss": 0.2792, "step": 21497 }, { "epoch": 0.99, "grad_norm": 0.5996969658273908, "learning_rate": 8.068593435055504e-09, "loss": 0.2718, "step": 21498 }, { "epoch": 0.99, "grad_norm": 0.8765596812821451, "learning_rate": 8.008944720969692e-09, "loss": 0.4364, "step": 21499 }, { "epoch": 0.99, "grad_norm": 0.3781751287092715, "learning_rate": 7.949517219935088e-09, "loss": 0.2664, "step": 21500 }, { "epoch": 0.99, "grad_norm": 0.25806078207905836, "learning_rate": 7.89031093326731e-09, "loss": 0.2079, "step": 21501 }, { "epoch": 0.99, "grad_norm": 0.5353428668317922, "learning_rate": 7.831325862277527e-09, "loss": 0.1914, "step": 21502 }, { "epoch": 0.99, "grad_norm": 0.37773229060676133, "learning_rate": 7.772562008272477e-09, "loss": 0.2397, "step": 21503 }, { "epoch": 0.99, "grad_norm": 0.34732635752874064, "learning_rate": 7.714019372551118e-09, "loss": 0.2758, "step": 21504 }, { "epoch": 0.99, "grad_norm": 1.0754734057994448, "learning_rate": 7.655697956411301e-09, "loss": 0.23, "step": 21505 }, { "epoch": 0.99, "grad_norm": 0.3580077234594924, "learning_rate": 7.597597761144215e-09, "loss": 0.2878, "step": 21506 }, { "epoch": 0.99, "grad_norm": 0.5444363463080844, "learning_rate": 7.539718788034389e-09, "loss": 0.2639, "step": 21507 }, { "epoch": 0.99, "grad_norm": 0.30472176311330224, "learning_rate": 7.48206103836524e-09, "loss": 0.1884, "step": 21508 }, { "epoch": 0.99, "grad_norm": 0.33138166734152413, "learning_rate": 7.424624513411305e-09, "loss": 0.2473, "step": 21509 }, { "epoch": 0.99, "grad_norm": 0.7757974803418083, "learning_rate": 7.3674092144460084e-09, "loss": 0.3701, "step": 21510 }, { "epoch": 0.99, "grad_norm": 0.4527654640556501, "learning_rate": 7.310415142735006e-09, "loss": 0.2301, "step": 21511 }, { "epoch": 0.99, "grad_norm": 0.275330227219164, "learning_rate": 7.2536422995406196e-09, "loss": 0.2419, "step": 21512 }, { "epoch": 0.99, "grad_norm": 1.2544213035942349, "learning_rate": 7.197090686119623e-09, "loss": 0.7327, "step": 21513 }, { "epoch": 0.99, "grad_norm": 0.3134565393270457, "learning_rate": 7.140760303723237e-09, "loss": 0.1596, "step": 21514 }, { "epoch": 0.99, "grad_norm": 0.34427015539532196, "learning_rate": 7.084651153599353e-09, "loss": 0.2382, "step": 21515 }, { "epoch": 0.99, "grad_norm": 0.4130954961021895, "learning_rate": 7.02876323699031e-09, "loss": 0.2623, "step": 21516 }, { "epoch": 0.99, "grad_norm": 1.3692640506794043, "learning_rate": 6.973096555132896e-09, "loss": 0.4834, "step": 21517 }, { "epoch": 0.99, "grad_norm": 0.310703230365906, "learning_rate": 6.91765110925946e-09, "loss": 0.1786, "step": 21518 }, { "epoch": 0.99, "grad_norm": 1.5202894869163943, "learning_rate": 6.862426900597907e-09, "loss": 0.7263, "step": 21519 }, { "epoch": 0.99, "grad_norm": 0.28627431757707894, "learning_rate": 6.8074239303705934e-09, "loss": 0.2317, "step": 21520 }, { "epoch": 0.99, "grad_norm": 0.31074701785982906, "learning_rate": 6.7526421997954335e-09, "loss": 0.182, "step": 21521 }, { "epoch": 0.99, "grad_norm": 0.4553187842930531, "learning_rate": 6.698081710084792e-09, "loss": 0.2523, "step": 21522 }, { "epoch": 0.99, "grad_norm": 0.36486479948261447, "learning_rate": 6.6437424624477e-09, "loss": 0.2848, "step": 21523 }, { "epoch": 0.99, "grad_norm": 0.3093268905736733, "learning_rate": 6.58962445808653e-09, "loss": 0.1836, "step": 21524 }, { "epoch": 0.99, "grad_norm": 1.1686933530312147, "learning_rate": 6.535727698199213e-09, "loss": 0.5545, "step": 21525 }, { "epoch": 0.99, "grad_norm": 0.838983469809667, "learning_rate": 6.482052183978127e-09, "loss": 0.3639, "step": 21526 }, { "epoch": 0.99, "grad_norm": 0.34065939524257677, "learning_rate": 6.428597916613433e-09, "loss": 0.2356, "step": 21527 }, { "epoch": 0.99, "grad_norm": 0.2330062967001011, "learning_rate": 6.375364897287517e-09, "loss": 0.1666, "step": 21528 }, { "epoch": 0.99, "grad_norm": 1.3909180525133036, "learning_rate": 6.322353127178326e-09, "loss": 0.6702, "step": 21529 }, { "epoch": 0.99, "grad_norm": 0.36392344916243163, "learning_rate": 6.269562607461588e-09, "loss": 0.2784, "step": 21530 }, { "epoch": 0.99, "grad_norm": 1.2204739891367613, "learning_rate": 6.216993339303034e-09, "loss": 0.2814, "step": 21531 }, { "epoch": 0.99, "grad_norm": 0.3580824049823755, "learning_rate": 6.164645323869511e-09, "loss": 0.3015, "step": 21532 }, { "epoch": 0.99, "grad_norm": 0.38677085565020597, "learning_rate": 6.112518562317871e-09, "loss": 0.2418, "step": 21533 }, { "epoch": 0.99, "grad_norm": 0.16722957125355134, "learning_rate": 6.060613055802744e-09, "loss": 0.0704, "step": 21534 }, { "epoch": 0.99, "grad_norm": 0.3704672482723745, "learning_rate": 6.008928805473213e-09, "loss": 0.2902, "step": 21535 }, { "epoch": 0.99, "grad_norm": 0.44685106206105607, "learning_rate": 5.957465812473917e-09, "loss": 0.2899, "step": 21536 }, { "epoch": 0.99, "grad_norm": 0.5071838821214395, "learning_rate": 5.906224077943945e-09, "loss": 0.2621, "step": 21537 }, { "epoch": 0.99, "grad_norm": 0.7673621789069253, "learning_rate": 5.855203603017945e-09, "loss": 0.3381, "step": 21538 }, { "epoch": 0.99, "grad_norm": 0.43794477105658364, "learning_rate": 5.804404388825013e-09, "loss": 0.2671, "step": 21539 }, { "epoch": 0.99, "grad_norm": 0.2696375840193878, "learning_rate": 5.753826436490917e-09, "loss": 0.2196, "step": 21540 }, { "epoch": 0.99, "grad_norm": 0.3569859371725361, "learning_rate": 5.7034697471336495e-09, "loss": 0.1197, "step": 21541 }, { "epoch": 0.99, "grad_norm": 0.44894690529766557, "learning_rate": 5.653334321868986e-09, "loss": 0.28, "step": 21542 }, { "epoch": 0.99, "grad_norm": 0.49035358332072015, "learning_rate": 5.603420161807149e-09, "loss": 0.324, "step": 21543 }, { "epoch": 0.99, "grad_norm": 0.3508351822123145, "learning_rate": 5.553727268051701e-09, "loss": 0.2386, "step": 21544 }, { "epoch": 0.99, "grad_norm": 0.4361404344993259, "learning_rate": 5.504255641705092e-09, "loss": 0.2635, "step": 21545 }, { "epoch": 0.99, "grad_norm": 0.274315518625277, "learning_rate": 5.455005283862002e-09, "loss": 0.1641, "step": 21546 }, { "epoch": 0.99, "grad_norm": 0.5219978532385382, "learning_rate": 5.405976195611562e-09, "loss": 0.271, "step": 21547 }, { "epoch": 0.99, "grad_norm": 0.2777436173040839, "learning_rate": 5.357168378039568e-09, "loss": 0.2315, "step": 21548 }, { "epoch": 0.99, "grad_norm": 0.7901886025911812, "learning_rate": 5.308581832226267e-09, "loss": 0.507, "step": 21549 }, { "epoch": 0.99, "grad_norm": 0.743255209664263, "learning_rate": 5.260216559249687e-09, "loss": 0.2775, "step": 21550 }, { "epoch": 0.99, "grad_norm": 0.3219815475448342, "learning_rate": 5.212072560177861e-09, "loss": 0.2252, "step": 21551 }, { "epoch": 0.99, "grad_norm": 0.37816095996664706, "learning_rate": 5.1641498360777146e-09, "loss": 0.2865, "step": 21552 }, { "epoch": 0.99, "grad_norm": 0.3594253891255938, "learning_rate": 5.116448388009509e-09, "loss": 0.2036, "step": 21553 }, { "epoch": 0.99, "grad_norm": 0.39636592024092515, "learning_rate": 5.068968217031289e-09, "loss": 0.1958, "step": 21554 }, { "epoch": 0.99, "grad_norm": 0.5575924883322314, "learning_rate": 5.021709324192214e-09, "loss": 0.3227, "step": 21555 }, { "epoch": 0.99, "grad_norm": 0.3723546818495945, "learning_rate": 4.974671710539225e-09, "loss": 0.311, "step": 21556 }, { "epoch": 0.99, "grad_norm": 0.8473967610236064, "learning_rate": 4.9278553771137105e-09, "loss": 0.1106, "step": 21557 }, { "epoch": 0.99, "grad_norm": 0.3478989295637733, "learning_rate": 4.881260324951509e-09, "loss": 0.2208, "step": 21558 }, { "epoch": 0.99, "grad_norm": 0.29964458311911185, "learning_rate": 4.834886555085128e-09, "loss": 0.2615, "step": 21559 }, { "epoch": 0.99, "grad_norm": 0.42164599629970967, "learning_rate": 4.788734068541523e-09, "loss": 0.1961, "step": 21560 }, { "epoch": 0.99, "grad_norm": 0.4923722461072486, "learning_rate": 4.74280286634099e-09, "loss": 0.3216, "step": 21561 }, { "epoch": 0.99, "grad_norm": 1.6467087831318428, "learning_rate": 4.697092949501603e-09, "loss": 0.5366, "step": 21562 }, { "epoch": 0.99, "grad_norm": 0.36283506164023727, "learning_rate": 4.651604319035885e-09, "loss": 0.2164, "step": 21563 }, { "epoch": 0.99, "grad_norm": 0.29628124220668456, "learning_rate": 4.606336975948589e-09, "loss": 0.229, "step": 21564 }, { "epoch": 0.99, "grad_norm": 0.509862357281291, "learning_rate": 4.561290921243355e-09, "loss": 0.2557, "step": 21565 }, { "epoch": 0.99, "grad_norm": 0.5249777831643306, "learning_rate": 4.516466155918276e-09, "loss": 0.301, "step": 21566 }, { "epoch": 0.99, "grad_norm": 0.3660895893222812, "learning_rate": 4.471862680964778e-09, "loss": 0.2303, "step": 21567 }, { "epoch": 0.99, "grad_norm": 0.36306604770389955, "learning_rate": 4.427480497369852e-09, "loss": 0.2895, "step": 21568 }, { "epoch": 0.99, "grad_norm": 0.45055986115795227, "learning_rate": 4.383319606117153e-09, "loss": 0.2642, "step": 21569 }, { "epoch": 0.99, "grad_norm": 0.7234005571377594, "learning_rate": 4.33938000818368e-09, "loss": 0.2131, "step": 21570 }, { "epoch": 0.99, "grad_norm": 0.2567419376302108, "learning_rate": 4.2956617045419865e-09, "loss": 0.2408, "step": 21571 }, { "epoch": 0.99, "grad_norm": 0.42850693458520744, "learning_rate": 4.252164696161298e-09, "loss": 0.2665, "step": 21572 }, { "epoch": 0.99, "grad_norm": 0.4938350227972083, "learning_rate": 4.208888984003068e-09, "loss": 0.2695, "step": 21573 }, { "epoch": 0.99, "grad_norm": 1.6190327224947545, "learning_rate": 4.165834569026528e-09, "loss": 0.5385, "step": 21574 }, { "epoch": 0.99, "grad_norm": 0.4928285194951692, "learning_rate": 4.123001452183139e-09, "loss": 0.3384, "step": 21575 }, { "epoch": 0.99, "grad_norm": 0.2874082242426235, "learning_rate": 4.0803896344232545e-09, "loss": 0.2138, "step": 21576 }, { "epoch": 0.99, "grad_norm": 0.4060456345182681, "learning_rate": 4.037999116689451e-09, "loss": 0.254, "step": 21577 }, { "epoch": 0.99, "grad_norm": 0.371309652995185, "learning_rate": 3.995829899918757e-09, "loss": 0.2061, "step": 21578 }, { "epoch": 0.99, "grad_norm": 0.32730304052825715, "learning_rate": 3.953881985047092e-09, "loss": 0.2616, "step": 21579 }, { "epoch": 0.99, "grad_norm": 0.5294288356817469, "learning_rate": 3.912155373002602e-09, "loss": 0.2636, "step": 21580 }, { "epoch": 0.99, "grad_norm": 0.8019539014481959, "learning_rate": 3.8706500647078814e-09, "loss": 0.2732, "step": 21581 }, { "epoch": 0.99, "grad_norm": 0.40061723361616414, "learning_rate": 3.829366061083306e-09, "loss": 0.2636, "step": 21582 }, { "epoch": 0.99, "grad_norm": 0.35489475647924795, "learning_rate": 3.788303363041479e-09, "loss": 0.241, "step": 21583 }, { "epoch": 0.99, "grad_norm": 0.4261941466040029, "learning_rate": 3.747461971492783e-09, "loss": 0.2365, "step": 21584 }, { "epoch": 0.99, "grad_norm": 0.4259577061041173, "learning_rate": 3.7068418873398293e-09, "loss": 0.3338, "step": 21585 }, { "epoch": 0.99, "grad_norm": 0.5368930353343816, "learning_rate": 3.666443111484119e-09, "loss": 0.1535, "step": 21586 }, { "epoch": 0.99, "grad_norm": 0.30414809369924783, "learning_rate": 3.6262656448182722e-09, "loss": 0.2404, "step": 21587 }, { "epoch": 0.99, "grad_norm": 0.4065528642939639, "learning_rate": 3.586309488231576e-09, "loss": 0.2684, "step": 21588 }, { "epoch": 0.99, "grad_norm": 0.5523473273685721, "learning_rate": 3.5465746426099902e-09, "loss": 0.267, "step": 21589 }, { "epoch": 0.99, "grad_norm": 0.4143058538191836, "learning_rate": 3.5070611088317e-09, "loss": 0.2534, "step": 21590 }, { "epoch": 0.99, "grad_norm": 0.4093892735673874, "learning_rate": 3.4677688877737812e-09, "loss": 0.3019, "step": 21591 }, { "epoch": 0.99, "grad_norm": 0.24394009469562805, "learning_rate": 3.4286979803033193e-09, "loss": 0.1983, "step": 21592 }, { "epoch": 0.99, "grad_norm": 1.0644738864111938, "learning_rate": 3.3898483872873976e-09, "loss": 0.1322, "step": 21593 }, { "epoch": 0.99, "grad_norm": 0.3557023150661273, "learning_rate": 3.351220109585329e-09, "loss": 0.2762, "step": 21594 }, { "epoch": 0.99, "grad_norm": 0.40290461880223555, "learning_rate": 3.3128131480519856e-09, "loss": 0.2969, "step": 21595 }, { "epoch": 0.99, "grad_norm": 0.5063733834187837, "learning_rate": 3.2746275035377972e-09, "loss": 0.1325, "step": 21596 }, { "epoch": 0.99, "grad_norm": 0.37482864600818017, "learning_rate": 3.236663176889865e-09, "loss": 0.294, "step": 21597 }, { "epoch": 0.99, "grad_norm": 0.2842164375955541, "learning_rate": 3.1989201689452964e-09, "loss": 0.1542, "step": 21598 }, { "epoch": 0.99, "grad_norm": 0.35699383052726724, "learning_rate": 3.1613984805423105e-09, "loss": 0.2346, "step": 21599 }, { "epoch": 0.99, "grad_norm": 0.3581259809965335, "learning_rate": 3.1240981125113535e-09, "loss": 0.2795, "step": 21600 }, { "epoch": 0.99, "grad_norm": 0.9199713566271871, "learning_rate": 3.0870190656773214e-09, "loss": 0.4416, "step": 21601 }, { "epoch": 0.99, "grad_norm": 0.7487161676811744, "learning_rate": 3.050161340861779e-09, "loss": 0.1641, "step": 21602 }, { "epoch": 0.99, "grad_norm": 0.3206002966688541, "learning_rate": 3.013524938880741e-09, "loss": 0.2771, "step": 21603 }, { "epoch": 0.99, "grad_norm": 0.3326284789686114, "learning_rate": 2.977109860544669e-09, "loss": 0.2299, "step": 21604 }, { "epoch": 0.99, "grad_norm": 0.5048964944722827, "learning_rate": 2.940916106659586e-09, "loss": 0.2576, "step": 21605 }, { "epoch": 0.99, "grad_norm": 0.36812986538371545, "learning_rate": 2.9049436780281825e-09, "loss": 0.214, "step": 21606 }, { "epoch": 0.99, "grad_norm": 0.3816476173484007, "learning_rate": 2.8691925754453785e-09, "loss": 0.2951, "step": 21607 }, { "epoch": 0.99, "grad_norm": 0.9341432930965753, "learning_rate": 2.8336627997038735e-09, "loss": 0.3964, "step": 21608 }, { "epoch": 0.99, "grad_norm": 0.33102507118690233, "learning_rate": 2.7983543515897048e-09, "loss": 0.2021, "step": 21609 }, { "epoch": 0.99, "grad_norm": 0.3180966371696221, "learning_rate": 2.76326723188447e-09, "loss": 0.2016, "step": 21610 }, { "epoch": 0.99, "grad_norm": 0.37614340133159213, "learning_rate": 2.7284014413642144e-09, "loss": 0.2891, "step": 21611 }, { "epoch": 0.99, "grad_norm": 0.31408486227283594, "learning_rate": 2.693756980802764e-09, "loss": 0.2105, "step": 21612 }, { "epoch": 0.99, "grad_norm": 1.5666803004603664, "learning_rate": 2.6593338509650623e-09, "loss": 0.4198, "step": 21613 }, { "epoch": 0.99, "grad_norm": 1.30172625271836, "learning_rate": 2.6251320526149427e-09, "loss": 0.8052, "step": 21614 }, { "epoch": 0.99, "grad_norm": 0.25023752532296084, "learning_rate": 2.591151586508467e-09, "loss": 0.2135, "step": 21615 }, { "epoch": 0.99, "grad_norm": 0.982252797759143, "learning_rate": 2.5573924533983664e-09, "loss": 0.4262, "step": 21616 }, { "epoch": 0.99, "grad_norm": 0.4040939107815881, "learning_rate": 2.523854654031821e-09, "loss": 0.246, "step": 21617 }, { "epoch": 0.99, "grad_norm": 0.24627529001706758, "learning_rate": 2.49053818915157e-09, "loss": 0.2031, "step": 21618 }, { "epoch": 0.99, "grad_norm": 0.3905765089336634, "learning_rate": 2.4574430594948016e-09, "loss": 0.2399, "step": 21619 }, { "epoch": 0.99, "grad_norm": 1.1977269485675561, "learning_rate": 2.4245692657942633e-09, "loss": 0.6318, "step": 21620 }, { "epoch": 0.99, "grad_norm": 0.33922133935384097, "learning_rate": 2.3919168087782607e-09, "loss": 0.2594, "step": 21621 }, { "epoch": 0.99, "grad_norm": 0.8376659865420893, "learning_rate": 2.3594856891695493e-09, "loss": 0.2451, "step": 21622 }, { "epoch": 0.99, "grad_norm": 0.33234190090878546, "learning_rate": 2.3272759076864437e-09, "loss": 0.2819, "step": 21623 }, { "epoch": 0.99, "grad_norm": 0.38347436208434454, "learning_rate": 2.2952874650405964e-09, "loss": 0.2674, "step": 21624 }, { "epoch": 0.99, "grad_norm": 0.39807186990993204, "learning_rate": 2.2635203619414405e-09, "loss": 0.0992, "step": 21625 }, { "epoch": 0.99, "grad_norm": 0.4417410141708921, "learning_rate": 2.2319745990928566e-09, "loss": 0.3349, "step": 21626 }, { "epoch": 0.99, "grad_norm": 0.35693221833620314, "learning_rate": 2.200650177190955e-09, "loss": 0.2532, "step": 21627 }, { "epoch": 0.99, "grad_norm": 0.4528213277687044, "learning_rate": 2.1695470969318454e-09, "loss": 0.2608, "step": 21628 }, { "epoch": 0.99, "grad_norm": 0.9343348367527564, "learning_rate": 2.138665359002756e-09, "loss": 0.4589, "step": 21629 }, { "epoch": 0.99, "grad_norm": 0.25642444842145096, "learning_rate": 2.108004964086474e-09, "loss": 0.1975, "step": 21630 }, { "epoch": 0.99, "grad_norm": 0.2790452139464055, "learning_rate": 2.077565912863566e-09, "loss": 0.2272, "step": 21631 }, { "epoch": 0.99, "grad_norm": 1.0958564138246014, "learning_rate": 2.0473482060079375e-09, "loss": 0.4654, "step": 21632 }, { "epoch": 0.99, "grad_norm": 0.3397165909891794, "learning_rate": 2.0173518441868324e-09, "loss": 0.2546, "step": 21633 }, { "epoch": 0.99, "grad_norm": 0.6078773316892453, "learning_rate": 1.9875768280663843e-09, "loss": 0.3436, "step": 21634 }, { "epoch": 0.99, "grad_norm": 0.3808509143965247, "learning_rate": 1.9580231583038457e-09, "loss": 0.24, "step": 21635 }, { "epoch": 0.99, "grad_norm": 0.35517559597608905, "learning_rate": 1.928690835555358e-09, "loss": 0.2388, "step": 21636 }, { "epoch": 0.99, "grad_norm": 0.3595316387492313, "learning_rate": 1.8995798604681813e-09, "loss": 0.1653, "step": 21637 }, { "epoch": 0.99, "grad_norm": 0.349162835841552, "learning_rate": 1.8706902336884656e-09, "loss": 0.2648, "step": 21638 }, { "epoch": 0.99, "grad_norm": 0.33474948582430675, "learning_rate": 1.8420219558556995e-09, "loss": 0.2512, "step": 21639 }, { "epoch": 0.99, "grad_norm": 0.6328446175820863, "learning_rate": 1.81357502760382e-09, "loss": 0.3557, "step": 21640 }, { "epoch": 0.99, "grad_norm": 0.5537088921641299, "learning_rate": 1.7853494495634338e-09, "loss": 0.1492, "step": 21641 }, { "epoch": 0.99, "grad_norm": 0.47202978151328984, "learning_rate": 1.7573452223584865e-09, "loss": 0.285, "step": 21642 }, { "epoch": 0.99, "grad_norm": 0.2605883340992969, "learning_rate": 1.7295623466107026e-09, "loss": 0.2321, "step": 21643 }, { "epoch": 0.99, "grad_norm": 0.9993597437921435, "learning_rate": 1.7020008229329255e-09, "loss": 0.4501, "step": 21644 }, { "epoch": 0.99, "grad_norm": 0.306825167228871, "learning_rate": 1.6746606519357777e-09, "loss": 0.1865, "step": 21645 }, { "epoch": 0.99, "grad_norm": 0.4456645673173325, "learning_rate": 1.6475418342265515e-09, "loss": 0.328, "step": 21646 }, { "epoch": 0.99, "grad_norm": 0.5685285193969339, "learning_rate": 1.6206443704036568e-09, "loss": 0.3143, "step": 21647 }, { "epoch": 0.99, "grad_norm": 0.3350194777285541, "learning_rate": 1.5939682610621732e-09, "loss": 0.1778, "step": 21648 }, { "epoch": 0.99, "grad_norm": 0.38565327715846165, "learning_rate": 1.56751350679496e-09, "loss": 0.2076, "step": 21649 }, { "epoch": 0.99, "grad_norm": 0.49259567506199814, "learning_rate": 1.5412801081859941e-09, "loss": 0.3374, "step": 21650 }, { "epoch": 0.99, "grad_norm": 0.2822211063300327, "learning_rate": 1.5152680658159225e-09, "loss": 0.2118, "step": 21651 }, { "epoch": 0.99, "grad_norm": 0.7308099899489343, "learning_rate": 1.489477380262061e-09, "loss": 0.4148, "step": 21652 }, { "epoch": 0.99, "grad_norm": 1.615600908726532, "learning_rate": 1.4639080520939541e-09, "loss": 0.6796, "step": 21653 }, { "epoch": 0.99, "grad_norm": 0.3510468935890721, "learning_rate": 1.4385600818778156e-09, "loss": 0.2857, "step": 21654 }, { "epoch": 0.99, "grad_norm": 0.2883813521152375, "learning_rate": 1.4134334701754182e-09, "loss": 0.2042, "step": 21655 }, { "epoch": 0.99, "grad_norm": 0.480955536926555, "learning_rate": 1.388528217544094e-09, "loss": 0.2676, "step": 21656 }, { "epoch": 0.99, "grad_norm": 0.330343129049868, "learning_rate": 1.363844324532293e-09, "loss": 0.2492, "step": 21657 }, { "epoch": 0.99, "grad_norm": 0.9936849173522314, "learning_rate": 1.3393817916895756e-09, "loss": 0.3374, "step": 21658 }, { "epoch": 0.99, "grad_norm": 0.4541444252575229, "learning_rate": 1.3151406195544002e-09, "loss": 0.3139, "step": 21659 }, { "epoch": 1.0, "grad_norm": 0.7146052675798962, "learning_rate": 1.2911208086663351e-09, "loss": 0.3306, "step": 21660 }, { "epoch": 1.0, "grad_norm": 0.22350510364993992, "learning_rate": 1.267322359556067e-09, "loss": 0.1526, "step": 21661 }, { "epoch": 1.0, "grad_norm": 0.3449199083898967, "learning_rate": 1.2437452727498412e-09, "loss": 0.3104, "step": 21662 }, { "epoch": 1.0, "grad_norm": 0.6527753256633516, "learning_rate": 1.220389548770573e-09, "loss": 0.3164, "step": 21663 }, { "epoch": 1.0, "grad_norm": 0.40491874997345795, "learning_rate": 1.1972551881345162e-09, "loss": 0.2483, "step": 21664 }, { "epoch": 1.0, "grad_norm": 1.886718547954293, "learning_rate": 1.1743421913545938e-09, "loss": 0.5242, "step": 21665 }, { "epoch": 1.0, "grad_norm": 0.42985751505636405, "learning_rate": 1.1516505589381777e-09, "loss": 0.2723, "step": 21666 }, { "epoch": 1.0, "grad_norm": 0.23350890288194356, "learning_rate": 1.1291802913859784e-09, "loss": 0.2226, "step": 21667 }, { "epoch": 1.0, "grad_norm": 0.7039442467593681, "learning_rate": 1.1069313891975964e-09, "loss": 0.2848, "step": 21668 }, { "epoch": 1.0, "grad_norm": 0.43966574829754296, "learning_rate": 1.0849038528648603e-09, "loss": 0.2676, "step": 21669 }, { "epoch": 1.0, "grad_norm": 0.2870216514598382, "learning_rate": 1.0630976828740481e-09, "loss": 0.2509, "step": 21670 }, { "epoch": 1.0, "grad_norm": 0.5169265317743014, "learning_rate": 1.0415128797103268e-09, "loss": 0.1967, "step": 21671 }, { "epoch": 1.0, "grad_norm": 0.4754034371688888, "learning_rate": 1.0201494438499825e-09, "loss": 0.2529, "step": 21672 }, { "epoch": 1.0, "grad_norm": 0.3493277251201268, "learning_rate": 9.990073757670804e-10, "loss": 0.2248, "step": 21673 }, { "epoch": 1.0, "grad_norm": 0.33316429168516826, "learning_rate": 9.78086675927914e-10, "loss": 0.2603, "step": 21674 }, { "epoch": 1.0, "grad_norm": 0.3820858136520356, "learning_rate": 9.573873447976667e-10, "loss": 0.2625, "step": 21675 }, { "epoch": 1.0, "grad_norm": 1.360922936304401, "learning_rate": 9.369093828326403e-10, "loss": 0.7765, "step": 21676 }, { "epoch": 1.0, "grad_norm": 0.5293869206914182, "learning_rate": 9.166527904880263e-10, "loss": 0.096, "step": 21677 }, { "epoch": 1.0, "grad_norm": 0.3845172999466061, "learning_rate": 8.966175682112443e-10, "loss": 0.2632, "step": 21678 }, { "epoch": 1.0, "grad_norm": 0.353691667508306, "learning_rate": 8.768037164463838e-10, "loss": 0.2678, "step": 21679 }, { "epoch": 1.0, "grad_norm": 0.6699491706264941, "learning_rate": 8.572112356308726e-10, "loss": 0.3688, "step": 21680 }, { "epoch": 1.0, "grad_norm": 0.35179555512076083, "learning_rate": 8.37840126199918e-10, "loss": 0.1502, "step": 21681 }, { "epoch": 1.0, "grad_norm": 0.2659674413129796, "learning_rate": 8.186903885820663e-10, "loss": 0.2309, "step": 21682 }, { "epoch": 1.0, "grad_norm": 0.4283630816635962, "learning_rate": 7.997620232014225e-10, "loss": 0.2375, "step": 21683 }, { "epoch": 1.0, "grad_norm": 0.6819758872490643, "learning_rate": 7.810550304754305e-10, "loss": 0.15, "step": 21684 }, { "epoch": 1.0, "grad_norm": 0.3794621562298256, "learning_rate": 7.62569410820424e-10, "loss": 0.298, "step": 21685 }, { "epoch": 1.0, "grad_norm": 0.5583109562539721, "learning_rate": 7.44305164644965e-10, "loss": 0.3448, "step": 21686 }, { "epoch": 1.0, "grad_norm": 0.3184933885882249, "learning_rate": 7.262622923531748e-10, "loss": 0.2265, "step": 21687 }, { "epoch": 1.0, "grad_norm": 0.34309846354109635, "learning_rate": 7.084407943436233e-10, "loss": 0.26, "step": 21688 }, { "epoch": 1.0, "grad_norm": 0.4512031508504746, "learning_rate": 6.908406710126603e-10, "loss": 0.1894, "step": 21689 }, { "epoch": 1.0, "grad_norm": 0.2727539675440686, "learning_rate": 6.734619227488637e-10, "loss": 0.1983, "step": 21690 }, { "epoch": 1.0, "grad_norm": 0.5111901520870467, "learning_rate": 6.563045499363707e-10, "loss": 0.3125, "step": 21691 }, { "epoch": 1.0, "grad_norm": 0.7090908417222125, "learning_rate": 6.393685529570981e-10, "loss": 0.3828, "step": 21692 }, { "epoch": 1.0, "grad_norm": 0.5656748785435641, "learning_rate": 6.226539321840808e-10, "loss": 0.2984, "step": 21693 }, { "epoch": 1.0, "grad_norm": 0.31245024392137855, "learning_rate": 6.061606879881333e-10, "loss": 0.2294, "step": 21694 }, { "epoch": 1.0, "grad_norm": 0.2848229747875986, "learning_rate": 5.898888207334086e-10, "loss": 0.2099, "step": 21695 }, { "epoch": 1.0, "grad_norm": 0.5948077828725327, "learning_rate": 5.738383307818396e-10, "loss": 0.3165, "step": 21696 }, { "epoch": 1.0, "grad_norm": 0.36176047065746203, "learning_rate": 5.580092184875874e-10, "loss": 0.2427, "step": 21697 }, { "epoch": 1.0, "grad_norm": 0.3716996756971005, "learning_rate": 5.424014842014824e-10, "loss": 0.3039, "step": 21698 }, { "epoch": 1.0, "grad_norm": 0.8645350010365502, "learning_rate": 5.270151282688041e-10, "loss": 0.4891, "step": 21699 }, { "epoch": 1.0, "grad_norm": 0.3654841280977617, "learning_rate": 5.118501510303909e-10, "loss": 0.203, "step": 21700 }, { "epoch": 1.0, "grad_norm": 0.30528617381241235, "learning_rate": 4.969065528226402e-10, "loss": 0.159, "step": 21701 }, { "epoch": 1.0, "grad_norm": 0.34838076773590904, "learning_rate": 4.821843339752885e-10, "loss": 0.2594, "step": 21702 }, { "epoch": 1.0, "grad_norm": 0.3750308099579273, "learning_rate": 4.676834948147413e-10, "loss": 0.2256, "step": 21703 }, { "epoch": 1.0, "grad_norm": 1.5477096133337906, "learning_rate": 4.53404035661853e-10, "loss": 0.7436, "step": 21704 }, { "epoch": 1.0, "grad_norm": 0.869875546062393, "learning_rate": 4.3934595683303716e-10, "loss": 0.4606, "step": 21705 }, { "epoch": 1.0, "grad_norm": 0.2545067793607434, "learning_rate": 4.2550925863915624e-10, "loss": 0.2543, "step": 21706 }, { "epoch": 1.0, "grad_norm": 0.3452805033008394, "learning_rate": 4.118939413877421e-10, "loss": 0.1098, "step": 21707 }, { "epoch": 1.0, "grad_norm": 0.5360738819964151, "learning_rate": 3.985000053785548e-10, "loss": 0.3373, "step": 21708 }, { "epoch": 1.0, "grad_norm": 0.425466283467058, "learning_rate": 3.8532745090913427e-10, "loss": 0.3243, "step": 21709 }, { "epoch": 1.0, "grad_norm": 0.3559831514711698, "learning_rate": 3.723762782703588e-10, "loss": 0.2533, "step": 21710 }, { "epoch": 1.0, "grad_norm": 0.6124996975356838, "learning_rate": 3.596464877497763e-10, "loss": 0.385, "step": 21711 }, { "epoch": 1.0, "grad_norm": 0.44268259548629474, "learning_rate": 3.471380796282731e-10, "loss": 0.2783, "step": 21712 }, { "epoch": 1.0, "grad_norm": 0.7561702378340494, "learning_rate": 3.348510541834049e-10, "loss": 0.2787, "step": 21713 }, { "epoch": 1.0, "grad_norm": 0.2915018395642266, "learning_rate": 3.2278541168717647e-10, "loss": 0.2503, "step": 21714 }, { "epoch": 1.0, "grad_norm": 0.2772427119246304, "learning_rate": 3.109411524071515e-10, "loss": 0.1941, "step": 21715 }, { "epoch": 1.0, "grad_norm": 1.4683710504748486, "learning_rate": 2.9931827660534263e-10, "loss": 0.3138, "step": 21716 }, { "epoch": 1.0, "grad_norm": 0.8713967156654168, "learning_rate": 2.8791678453821135e-10, "loss": 0.4454, "step": 21717 }, { "epoch": 1.0, "grad_norm": 0.253944387044597, "learning_rate": 2.7673667645888856e-10, "loss": 0.2463, "step": 21718 }, { "epoch": 1.0, "grad_norm": 0.6042439103711952, "learning_rate": 2.65777952614954e-10, "loss": 0.3501, "step": 21719 }, { "epoch": 1.0, "grad_norm": 0.20040722879285555, "learning_rate": 2.550406132484362e-10, "loss": 0.0674, "step": 21720 }, { "epoch": 1.0, "grad_norm": 0.29838045121578627, "learning_rate": 2.44524658596923e-10, "loss": 0.1909, "step": 21721 }, { "epoch": 1.0, "grad_norm": 0.39325338079862293, "learning_rate": 2.3423008889467134e-10, "loss": 0.3057, "step": 21722 }, { "epoch": 1.0, "grad_norm": 0.49580547990507134, "learning_rate": 2.2415690436816685e-10, "loss": 0.2815, "step": 21723 }, { "epoch": 1.0, "grad_norm": 0.3694042826272208, "learning_rate": 2.1430510524167448e-10, "loss": 0.2979, "step": 21724 }, { "epoch": 1.0, "grad_norm": 0.6444201871641582, "learning_rate": 2.0467469173168776e-10, "loss": 0.3699, "step": 21725 }, { "epoch": 1.0, "grad_norm": 0.24061020415216497, "learning_rate": 1.9526566405247972e-10, "loss": 0.1728, "step": 21726 }, { "epoch": 1.0, "grad_norm": 0.38192158712139984, "learning_rate": 1.8607802241277228e-10, "loss": 0.2643, "step": 21727 }, { "epoch": 1.0, "grad_norm": 0.6006917995005504, "learning_rate": 1.7711176701462607e-10, "loss": 0.2377, "step": 21728 }, { "epoch": 1.0, "grad_norm": 0.37807502779184093, "learning_rate": 1.683668980578812e-10, "loss": 0.2036, "step": 21729 }, { "epoch": 1.0, "grad_norm": 0.3356607290175542, "learning_rate": 1.5984341573460626e-10, "loss": 0.28, "step": 21730 }, { "epoch": 1.0, "grad_norm": 0.6545291338281968, "learning_rate": 1.5154132023575962e-10, "loss": 0.3493, "step": 21731 }, { "epoch": 1.0, "grad_norm": 0.4970172752431013, "learning_rate": 1.4346061174230763e-10, "loss": 0.2129, "step": 21732 }, { "epoch": 1.0, "grad_norm": 0.25582173781224604, "learning_rate": 1.3560129043632685e-10, "loss": 0.1557, "step": 21733 }, { "epoch": 1.0, "grad_norm": 0.3790901229948752, "learning_rate": 1.2796335648879166e-10, "loss": 0.3095, "step": 21734 }, { "epoch": 1.0, "grad_norm": 0.9558574864114417, "learning_rate": 1.2054681007067637e-10, "loss": 0.5092, "step": 21735 }, { "epoch": 1.0, "grad_norm": 0.32310943257825164, "learning_rate": 1.1335165134518378e-10, "loss": 0.2164, "step": 21736 }, { "epoch": 1.0, "grad_norm": 0.5413391920236553, "learning_rate": 1.06377880472186e-10, "loss": 0.3188, "step": 21737 }, { "epoch": 1.0, "grad_norm": 0.5614760330936077, "learning_rate": 9.962549760711427e-11, "loss": 0.2876, "step": 21738 }, { "epoch": 1.0, "grad_norm": 0.18302935147811014, "learning_rate": 9.3094502896518e-11, "loss": 0.1326, "step": 21739 }, { "epoch": 1.0, "grad_norm": 1.4654245355543103, "learning_rate": 8.678489648805688e-11, "loss": 0.6957, "step": 21740 }, { "epoch": 1.0, "grad_norm": 0.49381659135447514, "learning_rate": 8.069667851939856e-11, "loss": 0.3231, "step": 21741 }, { "epoch": 1.0, "grad_norm": 0.27289146691544053, "learning_rate": 7.482984912710045e-11, "loss": 0.2141, "step": 21742 }, { "epoch": 1.0, "grad_norm": 1.1015648932902677, "learning_rate": 6.918440843883822e-11, "loss": 0.4007, "step": 21743 }, { "epoch": 1.0, "grad_norm": 0.593323061455853, "learning_rate": 6.37603565811773e-11, "loss": 0.1885, "step": 21744 }, { "epoch": 1.0, "grad_norm": 0.3330743867635877, "learning_rate": 5.855769367402176e-11, "loss": 0.2572, "step": 21745 }, { "epoch": 1.0, "grad_norm": 0.2589686848628386, "learning_rate": 5.3576419831724566e-11, "loss": 0.2062, "step": 21746 }, { "epoch": 1.0, "grad_norm": 0.6611910289929411, "learning_rate": 4.8816535166418266e-11, "loss": 0.3423, "step": 21747 }, { "epoch": 1.0, "grad_norm": 0.42254838139413986, "learning_rate": 4.4278039781353585e-11, "loss": 0.2764, "step": 21748 }, { "epoch": 1.0, "grad_norm": 0.41744818058299255, "learning_rate": 3.9960933777560826e-11, "loss": 0.2352, "step": 21749 }, { "epoch": 1.0, "grad_norm": 0.4781134275376225, "learning_rate": 3.586521725162939e-11, "loss": 0.2867, "step": 21750 }, { "epoch": 1.0, "grad_norm": 0.3936997308993916, "learning_rate": 3.199089029348734e-11, "loss": 0.2829, "step": 21751 }, { "epoch": 1.0, "grad_norm": 0.45433621665121454, "learning_rate": 2.8337952988621853e-11, "loss": 0.1875, "step": 21752 }, { "epoch": 1.0, "grad_norm": 0.4558330327323344, "learning_rate": 2.4906405418079206e-11, "loss": 0.3096, "step": 21753 }, { "epoch": 1.0, "grad_norm": 0.27946005568873983, "learning_rate": 2.1696247658464785e-11, "loss": 0.2418, "step": 21754 }, { "epoch": 1.0, "grad_norm": 0.6073615387903163, "learning_rate": 1.8707479779722648e-11, "loss": 0.1463, "step": 21755 }, { "epoch": 1.0, "grad_norm": 1.9979528859376219, "learning_rate": 1.5940101849576394e-11, "loss": 0.6006, "step": 21756 }, { "epoch": 1.0, "grad_norm": 0.2787063265436414, "learning_rate": 1.3394113927978069e-11, "loss": 0.2166, "step": 21757 }, { "epoch": 1.0, "grad_norm": 0.37708467033115073, "learning_rate": 1.1069516071549047e-11, "loss": 0.3004, "step": 21758 }, { "epoch": 1.0, "grad_norm": 0.6805469461554808, "learning_rate": 8.966308332469809e-12, "loss": 0.2847, "step": 21759 }, { "epoch": 1.0, "grad_norm": 0.33815053488109426, "learning_rate": 7.0844907562594985e-12, "loss": 0.2659, "step": 21760 }, { "epoch": 1.0, "grad_norm": 0.5455686230231608, "learning_rate": 5.424063385106593e-12, "loss": 0.2454, "step": 21761 }, { "epoch": 1.0, "grad_norm": 0.40450404973701837, "learning_rate": 3.9850262567586726e-12, "loss": 0.2388, "step": 21762 }, { "epoch": 1.0, "grad_norm": 0.3417118882130641, "learning_rate": 2.7673794011917608e-12, "loss": 0.2397, "step": 21763 }, { "epoch": 1.0, "grad_norm": 0.7317025291557016, "learning_rate": 1.7711228461614327e-12, "loss": 0.4163, "step": 21764 }, { "epoch": 1.0, "grad_norm": 0.6761798824984974, "learning_rate": 9.962566138721485e-13, "loss": 0.2715, "step": 21765 }, { "epoch": 1.0, "grad_norm": 0.41496727159002544, "learning_rate": 4.427807209772539e-13, "loss": 0.2676, "step": 21766 }, { "epoch": 1.0, "grad_norm": 0.25374487131977586, "learning_rate": 1.1069518079942498e-13, "loss": 0.1654, "step": 21767 }, { "epoch": 1.0, "grad_norm": 2.187948659713511, "learning_rate": 0.0, "loss": 0.1929, "step": 21768 }, { "epoch": 1.0, "step": 21768, "total_flos": 0.0, "train_loss": 0.3187218575373166, "train_runtime": 134905.7372, "train_samples_per_second": 62.001, "train_steps_per_second": 0.161 } ], "logging_steps": 1.0, "max_steps": 21768, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }