{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 7895, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000253324889170361, "grad_norm": 12.183216094970703, "learning_rate": 1.0131712259371835e-08, "loss": 1.4559, "step": 1 }, { "epoch": 0.000506649778340722, "grad_norm": 10.059121131896973, "learning_rate": 2.026342451874367e-08, "loss": 1.5302, "step": 2 }, { "epoch": 0.0007599746675110829, "grad_norm": 10.393256187438965, "learning_rate": 3.0395136778115507e-08, "loss": 1.421, "step": 3 }, { "epoch": 0.001013299556681444, "grad_norm": 10.55001449584961, "learning_rate": 4.052684903748734e-08, "loss": 1.3825, "step": 4 }, { "epoch": 0.001266624445851805, "grad_norm": 9.996969223022461, "learning_rate": 5.0658561296859173e-08, "loss": 1.3259, "step": 5 }, { "epoch": 0.0015199493350221659, "grad_norm": 14.385526657104492, "learning_rate": 6.079027355623101e-08, "loss": 1.36, "step": 6 }, { "epoch": 0.001773274224192527, "grad_norm": 10.932132720947266, "learning_rate": 7.092198581560284e-08, "loss": 1.5607, "step": 7 }, { "epoch": 0.002026599113362888, "grad_norm": 11.283689498901367, "learning_rate": 8.105369807497468e-08, "loss": 1.5634, "step": 8 }, { "epoch": 0.002279924002533249, "grad_norm": 12.34595775604248, "learning_rate": 9.118541033434651e-08, "loss": 1.5305, "step": 9 }, { "epoch": 0.00253324889170361, "grad_norm": 12.212746620178223, "learning_rate": 1.0131712259371835e-07, "loss": 1.5121, "step": 10 }, { "epoch": 0.002786573780873971, "grad_norm": 14.221822738647461, "learning_rate": 1.1144883485309017e-07, "loss": 1.6522, "step": 11 }, { "epoch": 0.0030398986700443317, "grad_norm": 10.095308303833008, "learning_rate": 1.2158054711246203e-07, "loss": 1.3202, "step": 12 }, { "epoch": 0.003293223559214693, "grad_norm": 12.034480094909668, "learning_rate": 1.3171225937183385e-07, "loss": 1.5278, "step": 13 }, { "epoch": 0.003546548448385054, "grad_norm": 10.577567100524902, "learning_rate": 1.4184397163120568e-07, "loss": 1.4612, "step": 14 }, { "epoch": 0.0037998733375554147, "grad_norm": 12.96672248840332, "learning_rate": 1.5197568389057753e-07, "loss": 1.4318, "step": 15 }, { "epoch": 0.004053198226725776, "grad_norm": 10.878087997436523, "learning_rate": 1.6210739614994936e-07, "loss": 1.4786, "step": 16 }, { "epoch": 0.004306523115896137, "grad_norm": 15.642341613769531, "learning_rate": 1.722391084093212e-07, "loss": 1.5879, "step": 17 }, { "epoch": 0.004559848005066498, "grad_norm": 12.503395080566406, "learning_rate": 1.8237082066869301e-07, "loss": 1.3623, "step": 18 }, { "epoch": 0.004813172894236859, "grad_norm": 10.388249397277832, "learning_rate": 1.9250253292806487e-07, "loss": 1.4227, "step": 19 }, { "epoch": 0.00506649778340722, "grad_norm": 14.328523635864258, "learning_rate": 2.026342451874367e-07, "loss": 1.6519, "step": 20 }, { "epoch": 0.0053198226725775805, "grad_norm": 14.44335651397705, "learning_rate": 2.1276595744680852e-07, "loss": 1.5192, "step": 21 }, { "epoch": 0.005573147561747942, "grad_norm": 12.124639511108398, "learning_rate": 2.2289766970618035e-07, "loss": 1.3475, "step": 22 }, { "epoch": 0.005826472450918303, "grad_norm": 10.395844459533691, "learning_rate": 2.330293819655522e-07, "loss": 1.3104, "step": 23 }, { "epoch": 0.0060797973400886635, "grad_norm": 11.978984832763672, "learning_rate": 2.4316109422492405e-07, "loss": 1.444, "step": 24 }, { "epoch": 0.006333122229259025, "grad_norm": 11.570704460144043, "learning_rate": 2.532928064842959e-07, "loss": 1.5421, "step": 25 }, { "epoch": 0.006586447118429386, "grad_norm": 10.934496879577637, "learning_rate": 2.634245187436677e-07, "loss": 1.3876, "step": 26 }, { "epoch": 0.006839772007599746, "grad_norm": 9.044105529785156, "learning_rate": 2.7355623100303953e-07, "loss": 1.4284, "step": 27 }, { "epoch": 0.007093096896770108, "grad_norm": 8.541162490844727, "learning_rate": 2.8368794326241136e-07, "loss": 1.4057, "step": 28 }, { "epoch": 0.007346421785940469, "grad_norm": 12.932791709899902, "learning_rate": 2.938196555217832e-07, "loss": 1.5905, "step": 29 }, { "epoch": 0.007599746675110829, "grad_norm": 9.997332572937012, "learning_rate": 3.0395136778115507e-07, "loss": 1.468, "step": 30 }, { "epoch": 0.007853071564281191, "grad_norm": 8.133085250854492, "learning_rate": 3.140830800405269e-07, "loss": 1.4008, "step": 31 }, { "epoch": 0.008106396453451552, "grad_norm": 8.5535249710083, "learning_rate": 3.242147922998987e-07, "loss": 1.3891, "step": 32 }, { "epoch": 0.008359721342621912, "grad_norm": 8.260271072387695, "learning_rate": 3.3434650455927055e-07, "loss": 1.4133, "step": 33 }, { "epoch": 0.008613046231792274, "grad_norm": 10.480013847351074, "learning_rate": 3.444782168186424e-07, "loss": 1.4304, "step": 34 }, { "epoch": 0.008866371120962635, "grad_norm": 7.807279109954834, "learning_rate": 3.5460992907801425e-07, "loss": 1.5742, "step": 35 }, { "epoch": 0.009119696010132995, "grad_norm": 8.243898391723633, "learning_rate": 3.6474164133738603e-07, "loss": 1.3216, "step": 36 }, { "epoch": 0.009373020899303357, "grad_norm": 8.004374504089355, "learning_rate": 3.748733535967579e-07, "loss": 1.4215, "step": 37 }, { "epoch": 0.009626345788473718, "grad_norm": 7.767487049102783, "learning_rate": 3.8500506585612973e-07, "loss": 1.4052, "step": 38 }, { "epoch": 0.009879670677644078, "grad_norm": 7.188638210296631, "learning_rate": 3.9513677811550156e-07, "loss": 1.3566, "step": 39 }, { "epoch": 0.01013299556681444, "grad_norm": 10.821622848510742, "learning_rate": 4.052684903748734e-07, "loss": 1.3463, "step": 40 }, { "epoch": 0.010386320455984801, "grad_norm": 8.293659210205078, "learning_rate": 4.1540020263424527e-07, "loss": 1.4509, "step": 41 }, { "epoch": 0.010639645345155161, "grad_norm": 6.990182876586914, "learning_rate": 4.2553191489361704e-07, "loss": 1.4913, "step": 42 }, { "epoch": 0.010892970234325523, "grad_norm": 7.317448139190674, "learning_rate": 4.356636271529889e-07, "loss": 1.3197, "step": 43 }, { "epoch": 0.011146295123495884, "grad_norm": 7.312219619750977, "learning_rate": 4.457953394123607e-07, "loss": 1.3949, "step": 44 }, { "epoch": 0.011399620012666244, "grad_norm": 7.905741214752197, "learning_rate": 4.5592705167173257e-07, "loss": 1.4606, "step": 45 }, { "epoch": 0.011652944901836606, "grad_norm": 7.263676166534424, "learning_rate": 4.660587639311044e-07, "loss": 1.4277, "step": 46 }, { "epoch": 0.011906269791006967, "grad_norm": 7.631997585296631, "learning_rate": 4.7619047619047623e-07, "loss": 1.3829, "step": 47 }, { "epoch": 0.012159594680177327, "grad_norm": 7.328865051269531, "learning_rate": 4.863221884498481e-07, "loss": 1.4042, "step": 48 }, { "epoch": 0.012412919569347688, "grad_norm": 6.723066806793213, "learning_rate": 4.964539007092199e-07, "loss": 1.3893, "step": 49 }, { "epoch": 0.01266624445851805, "grad_norm": 7.137428283691406, "learning_rate": 5.065856129685918e-07, "loss": 1.3351, "step": 50 }, { "epoch": 0.01291956934768841, "grad_norm": 5.965123176574707, "learning_rate": 5.167173252279636e-07, "loss": 1.3512, "step": 51 }, { "epoch": 0.013172894236858771, "grad_norm": 6.249057769775391, "learning_rate": 5.268490374873354e-07, "loss": 1.2029, "step": 52 }, { "epoch": 0.013426219126029133, "grad_norm": 7.059906005859375, "learning_rate": 5.369807497467072e-07, "loss": 1.412, "step": 53 }, { "epoch": 0.013679544015199493, "grad_norm": 6.9676947593688965, "learning_rate": 5.471124620060791e-07, "loss": 1.2606, "step": 54 }, { "epoch": 0.013932868904369854, "grad_norm": 6.916459083557129, "learning_rate": 5.572441742654509e-07, "loss": 1.3164, "step": 55 }, { "epoch": 0.014186193793540216, "grad_norm": 6.74832820892334, "learning_rate": 5.673758865248227e-07, "loss": 1.4729, "step": 56 }, { "epoch": 0.014439518682710576, "grad_norm": 6.46651029586792, "learning_rate": 5.775075987841945e-07, "loss": 1.4083, "step": 57 }, { "epoch": 0.014692843571880937, "grad_norm": 6.30950927734375, "learning_rate": 5.876393110435664e-07, "loss": 1.2164, "step": 58 }, { "epoch": 0.014946168461051299, "grad_norm": 6.412931442260742, "learning_rate": 5.977710233029382e-07, "loss": 1.2824, "step": 59 }, { "epoch": 0.015199493350221659, "grad_norm": 5.739402770996094, "learning_rate": 6.079027355623101e-07, "loss": 1.3378, "step": 60 }, { "epoch": 0.01545281823939202, "grad_norm": 5.976473808288574, "learning_rate": 6.180344478216819e-07, "loss": 1.2651, "step": 61 }, { "epoch": 0.015706143128562382, "grad_norm": 6.36185359954834, "learning_rate": 6.281661600810538e-07, "loss": 1.3197, "step": 62 }, { "epoch": 0.015959468017732743, "grad_norm": 5.828892707824707, "learning_rate": 6.382978723404255e-07, "loss": 1.2424, "step": 63 }, { "epoch": 0.016212792906903105, "grad_norm": 6.295270919799805, "learning_rate": 6.484295845997974e-07, "loss": 1.223, "step": 64 }, { "epoch": 0.016466117796073463, "grad_norm": 6.278206825256348, "learning_rate": 6.585612968591693e-07, "loss": 1.2775, "step": 65 }, { "epoch": 0.016719442685243825, "grad_norm": 5.356352806091309, "learning_rate": 6.686930091185411e-07, "loss": 1.1728, "step": 66 }, { "epoch": 0.016972767574414186, "grad_norm": 6.592249393463135, "learning_rate": 6.788247213779129e-07, "loss": 1.3805, "step": 67 }, { "epoch": 0.017226092463584548, "grad_norm": 6.495695114135742, "learning_rate": 6.889564336372847e-07, "loss": 1.4029, "step": 68 }, { "epoch": 0.01747941735275491, "grad_norm": 5.936342239379883, "learning_rate": 6.990881458966566e-07, "loss": 1.2631, "step": 69 }, { "epoch": 0.01773274224192527, "grad_norm": 6.378044128417969, "learning_rate": 7.092198581560285e-07, "loss": 1.4561, "step": 70 }, { "epoch": 0.01798606713109563, "grad_norm": 6.01347541809082, "learning_rate": 7.193515704154002e-07, "loss": 1.3346, "step": 71 }, { "epoch": 0.01823939202026599, "grad_norm": 6.118315696716309, "learning_rate": 7.294832826747721e-07, "loss": 1.2509, "step": 72 }, { "epoch": 0.018492716909436352, "grad_norm": 5.997556686401367, "learning_rate": 7.39614994934144e-07, "loss": 1.3574, "step": 73 }, { "epoch": 0.018746041798606713, "grad_norm": 5.349987506866455, "learning_rate": 7.497467071935158e-07, "loss": 1.2392, "step": 74 }, { "epoch": 0.018999366687777075, "grad_norm": 5.7259907722473145, "learning_rate": 7.598784194528875e-07, "loss": 1.2385, "step": 75 }, { "epoch": 0.019252691576947437, "grad_norm": 6.2682600021362305, "learning_rate": 7.700101317122595e-07, "loss": 1.2794, "step": 76 }, { "epoch": 0.019506016466117795, "grad_norm": 5.695931911468506, "learning_rate": 7.801418439716313e-07, "loss": 1.3075, "step": 77 }, { "epoch": 0.019759341355288156, "grad_norm": 5.9824700355529785, "learning_rate": 7.902735562310031e-07, "loss": 1.4274, "step": 78 }, { "epoch": 0.020012666244458518, "grad_norm": 6.070525646209717, "learning_rate": 8.00405268490375e-07, "loss": 1.2768, "step": 79 }, { "epoch": 0.02026599113362888, "grad_norm": 6.06186580657959, "learning_rate": 8.105369807497468e-07, "loss": 1.2878, "step": 80 }, { "epoch": 0.02051931602279924, "grad_norm": 5.997804641723633, "learning_rate": 8.206686930091186e-07, "loss": 1.2837, "step": 81 }, { "epoch": 0.020772640911969602, "grad_norm": 5.837419509887695, "learning_rate": 8.308004052684905e-07, "loss": 1.2875, "step": 82 }, { "epoch": 0.02102596580113996, "grad_norm": 6.083469390869141, "learning_rate": 8.409321175278623e-07, "loss": 1.3317, "step": 83 }, { "epoch": 0.021279290690310322, "grad_norm": 5.9370198249816895, "learning_rate": 8.510638297872341e-07, "loss": 1.228, "step": 84 }, { "epoch": 0.021532615579480684, "grad_norm": 5.628711700439453, "learning_rate": 8.611955420466059e-07, "loss": 1.259, "step": 85 }, { "epoch": 0.021785940468651045, "grad_norm": 5.700177192687988, "learning_rate": 8.713272543059778e-07, "loss": 1.3854, "step": 86 }, { "epoch": 0.022039265357821407, "grad_norm": 5.34316873550415, "learning_rate": 8.814589665653496e-07, "loss": 1.1796, "step": 87 }, { "epoch": 0.02229259024699177, "grad_norm": 5.681664943695068, "learning_rate": 8.915906788247214e-07, "loss": 1.3161, "step": 88 }, { "epoch": 0.022545915136162126, "grad_norm": 5.612365245819092, "learning_rate": 9.017223910840933e-07, "loss": 1.2537, "step": 89 }, { "epoch": 0.022799240025332488, "grad_norm": 6.493178844451904, "learning_rate": 9.118541033434651e-07, "loss": 1.3713, "step": 90 }, { "epoch": 0.02305256491450285, "grad_norm": 5.3083295822143555, "learning_rate": 9.219858156028369e-07, "loss": 1.1649, "step": 91 }, { "epoch": 0.02330588980367321, "grad_norm": 6.291053295135498, "learning_rate": 9.321175278622088e-07, "loss": 1.2495, "step": 92 }, { "epoch": 0.023559214692843573, "grad_norm": 6.047577381134033, "learning_rate": 9.422492401215806e-07, "loss": 1.3651, "step": 93 }, { "epoch": 0.023812539582013934, "grad_norm": 5.668388843536377, "learning_rate": 9.523809523809525e-07, "loss": 1.2696, "step": 94 }, { "epoch": 0.024065864471184292, "grad_norm": 5.498602867126465, "learning_rate": 9.625126646403244e-07, "loss": 1.1631, "step": 95 }, { "epoch": 0.024319189360354654, "grad_norm": 6.081385612487793, "learning_rate": 9.726443768996962e-07, "loss": 1.3137, "step": 96 }, { "epoch": 0.024572514249525015, "grad_norm": 6.417263507843018, "learning_rate": 9.827760891590678e-07, "loss": 1.4252, "step": 97 }, { "epoch": 0.024825839138695377, "grad_norm": 5.6804656982421875, "learning_rate": 9.929078014184399e-07, "loss": 1.291, "step": 98 }, { "epoch": 0.02507916402786574, "grad_norm": 6.759496688842773, "learning_rate": 1.0030395136778117e-06, "loss": 1.3338, "step": 99 }, { "epoch": 0.0253324889170361, "grad_norm": 5.483371257781982, "learning_rate": 1.0131712259371835e-06, "loss": 1.1854, "step": 100 }, { "epoch": 0.025585813806206458, "grad_norm": 5.6341681480407715, "learning_rate": 1.0233029381965553e-06, "loss": 1.23, "step": 101 }, { "epoch": 0.02583913869537682, "grad_norm": 5.76145076751709, "learning_rate": 1.0334346504559272e-06, "loss": 1.3177, "step": 102 }, { "epoch": 0.02609246358454718, "grad_norm": 5.4306745529174805, "learning_rate": 1.043566362715299e-06, "loss": 1.2265, "step": 103 }, { "epoch": 0.026345788473717543, "grad_norm": 5.675881862640381, "learning_rate": 1.0536980749746708e-06, "loss": 1.1397, "step": 104 }, { "epoch": 0.026599113362887904, "grad_norm": 5.60020112991333, "learning_rate": 1.0638297872340427e-06, "loss": 1.3454, "step": 105 }, { "epoch": 0.026852438252058266, "grad_norm": 5.341634273529053, "learning_rate": 1.0739614994934145e-06, "loss": 1.1553, "step": 106 }, { "epoch": 0.027105763141228624, "grad_norm": 5.30195426940918, "learning_rate": 1.0840932117527863e-06, "loss": 1.2349, "step": 107 }, { "epoch": 0.027359088030398986, "grad_norm": 5.831993103027344, "learning_rate": 1.0942249240121581e-06, "loss": 1.332, "step": 108 }, { "epoch": 0.027612412919569347, "grad_norm": 5.65993595123291, "learning_rate": 1.10435663627153e-06, "loss": 1.4381, "step": 109 }, { "epoch": 0.02786573780873971, "grad_norm": 5.581968307495117, "learning_rate": 1.1144883485309018e-06, "loss": 1.1786, "step": 110 }, { "epoch": 0.02811906269791007, "grad_norm": 5.961047649383545, "learning_rate": 1.1246200607902736e-06, "loss": 1.2945, "step": 111 }, { "epoch": 0.028372387587080432, "grad_norm": 6.274909496307373, "learning_rate": 1.1347517730496454e-06, "loss": 1.4267, "step": 112 }, { "epoch": 0.028625712476250793, "grad_norm": 5.144623279571533, "learning_rate": 1.1448834853090175e-06, "loss": 1.168, "step": 113 }, { "epoch": 0.02887903736542115, "grad_norm": 5.619993209838867, "learning_rate": 1.155015197568389e-06, "loss": 1.2755, "step": 114 }, { "epoch": 0.029132362254591513, "grad_norm": 5.38019323348999, "learning_rate": 1.165146909827761e-06, "loss": 1.2547, "step": 115 }, { "epoch": 0.029385687143761875, "grad_norm": 5.788126468658447, "learning_rate": 1.1752786220871327e-06, "loss": 1.233, "step": 116 }, { "epoch": 0.029639012032932236, "grad_norm": 5.342813014984131, "learning_rate": 1.1854103343465048e-06, "loss": 1.1753, "step": 117 }, { "epoch": 0.029892336922102598, "grad_norm": 5.679368019104004, "learning_rate": 1.1955420466058764e-06, "loss": 1.3854, "step": 118 }, { "epoch": 0.03014566181127296, "grad_norm": 5.998026371002197, "learning_rate": 1.2056737588652482e-06, "loss": 1.3227, "step": 119 }, { "epoch": 0.030398986700443317, "grad_norm": 5.865193843841553, "learning_rate": 1.2158054711246203e-06, "loss": 1.3186, "step": 120 }, { "epoch": 0.03065231158961368, "grad_norm": 5.349485874176025, "learning_rate": 1.2259371833839919e-06, "loss": 1.116, "step": 121 }, { "epoch": 0.03090563647878404, "grad_norm": 5.940770626068115, "learning_rate": 1.2360688956433637e-06, "loss": 1.3233, "step": 122 }, { "epoch": 0.031158961367954402, "grad_norm": 5.499122142791748, "learning_rate": 1.2462006079027357e-06, "loss": 1.269, "step": 123 }, { "epoch": 0.031412286257124764, "grad_norm": 5.793130874633789, "learning_rate": 1.2563323201621076e-06, "loss": 1.3234, "step": 124 }, { "epoch": 0.031665611146295125, "grad_norm": 5.939444541931152, "learning_rate": 1.2664640324214794e-06, "loss": 1.3782, "step": 125 }, { "epoch": 0.03191893603546549, "grad_norm": 5.3546013832092285, "learning_rate": 1.276595744680851e-06, "loss": 1.2291, "step": 126 }, { "epoch": 0.03217226092463585, "grad_norm": 5.786067962646484, "learning_rate": 1.286727456940223e-06, "loss": 1.2777, "step": 127 }, { "epoch": 0.03242558581380621, "grad_norm": 5.49869441986084, "learning_rate": 1.2968591691995949e-06, "loss": 1.2273, "step": 128 }, { "epoch": 0.032678910702976564, "grad_norm": 5.4831929206848145, "learning_rate": 1.3069908814589665e-06, "loss": 1.2496, "step": 129 }, { "epoch": 0.032932235592146926, "grad_norm": 5.68988561630249, "learning_rate": 1.3171225937183385e-06, "loss": 1.2683, "step": 130 }, { "epoch": 0.03318556048131729, "grad_norm": 4.994076728820801, "learning_rate": 1.3272543059777104e-06, "loss": 1.3347, "step": 131 }, { "epoch": 0.03343888537048765, "grad_norm": 5.527144432067871, "learning_rate": 1.3373860182370822e-06, "loss": 1.3032, "step": 132 }, { "epoch": 0.03369221025965801, "grad_norm": 5.677175045013428, "learning_rate": 1.347517730496454e-06, "loss": 1.2438, "step": 133 }, { "epoch": 0.03394553514882837, "grad_norm": 5.882457256317139, "learning_rate": 1.3576494427558258e-06, "loss": 1.3001, "step": 134 }, { "epoch": 0.034198860037998734, "grad_norm": 5.756645202636719, "learning_rate": 1.3677811550151977e-06, "loss": 1.2909, "step": 135 }, { "epoch": 0.034452184927169095, "grad_norm": 5.526420593261719, "learning_rate": 1.3779128672745695e-06, "loss": 1.2927, "step": 136 }, { "epoch": 0.03470550981633946, "grad_norm": 5.440059185028076, "learning_rate": 1.3880445795339415e-06, "loss": 1.3548, "step": 137 }, { "epoch": 0.03495883470550982, "grad_norm": 5.624795436859131, "learning_rate": 1.3981762917933131e-06, "loss": 1.3193, "step": 138 }, { "epoch": 0.03521215959468018, "grad_norm": 6.083158493041992, "learning_rate": 1.408308004052685e-06, "loss": 1.3284, "step": 139 }, { "epoch": 0.03546548448385054, "grad_norm": 6.338374137878418, "learning_rate": 1.418439716312057e-06, "loss": 1.2175, "step": 140 }, { "epoch": 0.035718809373020896, "grad_norm": 5.234193801879883, "learning_rate": 1.4285714285714286e-06, "loss": 1.1537, "step": 141 }, { "epoch": 0.03597213426219126, "grad_norm": 5.26567268371582, "learning_rate": 1.4387031408308005e-06, "loss": 1.1837, "step": 142 }, { "epoch": 0.03622545915136162, "grad_norm": 5.7893147468566895, "learning_rate": 1.4488348530901725e-06, "loss": 1.1652, "step": 143 }, { "epoch": 0.03647878404053198, "grad_norm": 5.951411247253418, "learning_rate": 1.4589665653495441e-06, "loss": 1.3915, "step": 144 }, { "epoch": 0.03673210892970234, "grad_norm": 5.907873630523682, "learning_rate": 1.4690982776089161e-06, "loss": 1.3022, "step": 145 }, { "epoch": 0.036985433818872704, "grad_norm": 5.551837921142578, "learning_rate": 1.479229989868288e-06, "loss": 1.2273, "step": 146 }, { "epoch": 0.037238758708043065, "grad_norm": 5.3975443840026855, "learning_rate": 1.4893617021276596e-06, "loss": 1.1165, "step": 147 }, { "epoch": 0.03749208359721343, "grad_norm": 5.251124382019043, "learning_rate": 1.4994934143870316e-06, "loss": 1.2381, "step": 148 }, { "epoch": 0.03774540848638379, "grad_norm": 5.477112293243408, "learning_rate": 1.5096251266464035e-06, "loss": 1.1766, "step": 149 }, { "epoch": 0.03799873337555415, "grad_norm": 5.669548511505127, "learning_rate": 1.519756838905775e-06, "loss": 1.3699, "step": 150 }, { "epoch": 0.03825205826472451, "grad_norm": 5.682290077209473, "learning_rate": 1.5298885511651471e-06, "loss": 1.2886, "step": 151 }, { "epoch": 0.03850538315389487, "grad_norm": 5.136720657348633, "learning_rate": 1.540020263424519e-06, "loss": 1.1589, "step": 152 }, { "epoch": 0.03875870804306523, "grad_norm": 5.364446640014648, "learning_rate": 1.5501519756838905e-06, "loss": 1.168, "step": 153 }, { "epoch": 0.03901203293223559, "grad_norm": 5.042862415313721, "learning_rate": 1.5602836879432626e-06, "loss": 1.1465, "step": 154 }, { "epoch": 0.03926535782140595, "grad_norm": 5.456974983215332, "learning_rate": 1.5704154002026344e-06, "loss": 1.233, "step": 155 }, { "epoch": 0.03951868271057631, "grad_norm": 5.630804538726807, "learning_rate": 1.5805471124620062e-06, "loss": 1.0355, "step": 156 }, { "epoch": 0.039772007599746674, "grad_norm": 5.544961452484131, "learning_rate": 1.590678824721378e-06, "loss": 1.336, "step": 157 }, { "epoch": 0.040025332488917036, "grad_norm": 5.7770676612854, "learning_rate": 1.60081053698075e-06, "loss": 1.2418, "step": 158 }, { "epoch": 0.0402786573780874, "grad_norm": 5.462604999542236, "learning_rate": 1.6109422492401217e-06, "loss": 1.1947, "step": 159 }, { "epoch": 0.04053198226725776, "grad_norm": 5.253891944885254, "learning_rate": 1.6210739614994935e-06, "loss": 1.178, "step": 160 }, { "epoch": 0.04078530715642812, "grad_norm": 5.2835001945495605, "learning_rate": 1.6312056737588656e-06, "loss": 1.1609, "step": 161 }, { "epoch": 0.04103863204559848, "grad_norm": 5.4543914794921875, "learning_rate": 1.6413373860182372e-06, "loss": 1.1959, "step": 162 }, { "epoch": 0.04129195693476884, "grad_norm": 6.159005641937256, "learning_rate": 1.651469098277609e-06, "loss": 1.2731, "step": 163 }, { "epoch": 0.041545281823939205, "grad_norm": 5.177642822265625, "learning_rate": 1.661600810536981e-06, "loss": 1.1241, "step": 164 }, { "epoch": 0.04179860671310956, "grad_norm": 5.764869213104248, "learning_rate": 1.6717325227963527e-06, "loss": 1.2032, "step": 165 }, { "epoch": 0.04205193160227992, "grad_norm": 5.476707935333252, "learning_rate": 1.6818642350557245e-06, "loss": 1.291, "step": 166 }, { "epoch": 0.04230525649145028, "grad_norm": 5.529805660247803, "learning_rate": 1.6919959473150963e-06, "loss": 1.2466, "step": 167 }, { "epoch": 0.042558581380620644, "grad_norm": 5.377219200134277, "learning_rate": 1.7021276595744682e-06, "loss": 1.1435, "step": 168 }, { "epoch": 0.042811906269791006, "grad_norm": 5.9831013679504395, "learning_rate": 1.7122593718338402e-06, "loss": 1.2628, "step": 169 }, { "epoch": 0.04306523115896137, "grad_norm": 5.454530715942383, "learning_rate": 1.7223910840932118e-06, "loss": 1.2458, "step": 170 }, { "epoch": 0.04331855604813173, "grad_norm": 5.665981769561768, "learning_rate": 1.7325227963525836e-06, "loss": 1.239, "step": 171 }, { "epoch": 0.04357188093730209, "grad_norm": 5.54949426651001, "learning_rate": 1.7426545086119557e-06, "loss": 1.144, "step": 172 }, { "epoch": 0.04382520582647245, "grad_norm": 5.391991138458252, "learning_rate": 1.7527862208713273e-06, "loss": 1.2089, "step": 173 }, { "epoch": 0.044078530715642814, "grad_norm": 5.288974285125732, "learning_rate": 1.7629179331306991e-06, "loss": 1.2922, "step": 174 }, { "epoch": 0.044331855604813175, "grad_norm": 5.138978004455566, "learning_rate": 1.7730496453900712e-06, "loss": 1.1125, "step": 175 }, { "epoch": 0.04458518049398354, "grad_norm": 5.508606433868408, "learning_rate": 1.7831813576494428e-06, "loss": 1.3176, "step": 176 }, { "epoch": 0.0448385053831539, "grad_norm": 5.380614280700684, "learning_rate": 1.7933130699088146e-06, "loss": 1.1962, "step": 177 }, { "epoch": 0.04509183027232425, "grad_norm": 5.325069904327393, "learning_rate": 1.8034447821681866e-06, "loss": 1.2922, "step": 178 }, { "epoch": 0.045345155161494614, "grad_norm": 5.326142311096191, "learning_rate": 1.8135764944275583e-06, "loss": 1.1661, "step": 179 }, { "epoch": 0.045598480050664976, "grad_norm": 5.590676784515381, "learning_rate": 1.8237082066869303e-06, "loss": 1.2746, "step": 180 }, { "epoch": 0.04585180493983534, "grad_norm": 5.422430515289307, "learning_rate": 1.8338399189463021e-06, "loss": 1.2305, "step": 181 }, { "epoch": 0.0461051298290057, "grad_norm": 5.1848955154418945, "learning_rate": 1.8439716312056737e-06, "loss": 1.1117, "step": 182 }, { "epoch": 0.04635845471817606, "grad_norm": 5.1731462478637695, "learning_rate": 1.8541033434650458e-06, "loss": 1.1643, "step": 183 }, { "epoch": 0.04661177960734642, "grad_norm": 5.318070888519287, "learning_rate": 1.8642350557244176e-06, "loss": 1.2171, "step": 184 }, { "epoch": 0.046865104496516784, "grad_norm": 5.908895015716553, "learning_rate": 1.8743667679837892e-06, "loss": 1.422, "step": 185 }, { "epoch": 0.047118429385687145, "grad_norm": 5.597854137420654, "learning_rate": 1.8844984802431613e-06, "loss": 1.2011, "step": 186 }, { "epoch": 0.04737175427485751, "grad_norm": 5.565207004547119, "learning_rate": 1.894630192502533e-06, "loss": 1.1722, "step": 187 }, { "epoch": 0.04762507916402787, "grad_norm": 5.958907604217529, "learning_rate": 1.904761904761905e-06, "loss": 1.3827, "step": 188 }, { "epoch": 0.04787840405319823, "grad_norm": 5.93968391418457, "learning_rate": 1.9148936170212767e-06, "loss": 1.1789, "step": 189 }, { "epoch": 0.048131728942368585, "grad_norm": 5.312741279602051, "learning_rate": 1.9250253292806488e-06, "loss": 1.2322, "step": 190 }, { "epoch": 0.048385053831538946, "grad_norm": 5.421358108520508, "learning_rate": 1.9351570415400204e-06, "loss": 1.1542, "step": 191 }, { "epoch": 0.04863837872070931, "grad_norm": 5.5818095207214355, "learning_rate": 1.9452887537993924e-06, "loss": 1.2192, "step": 192 }, { "epoch": 0.04889170360987967, "grad_norm": 5.480461597442627, "learning_rate": 1.955420466058764e-06, "loss": 1.2174, "step": 193 }, { "epoch": 0.04914502849905003, "grad_norm": 5.37147855758667, "learning_rate": 1.9655521783181357e-06, "loss": 1.2698, "step": 194 }, { "epoch": 0.04939835338822039, "grad_norm": 5.655091285705566, "learning_rate": 1.9756838905775077e-06, "loss": 1.0975, "step": 195 }, { "epoch": 0.049651678277390754, "grad_norm": 5.748220443725586, "learning_rate": 1.9858156028368797e-06, "loss": 1.3046, "step": 196 }, { "epoch": 0.049905003166561115, "grad_norm": 6.164122104644775, "learning_rate": 1.9959473150962513e-06, "loss": 1.2854, "step": 197 }, { "epoch": 0.05015832805573148, "grad_norm": 5.593215465545654, "learning_rate": 2.0060790273556234e-06, "loss": 1.2397, "step": 198 }, { "epoch": 0.05041165294490184, "grad_norm": 5.580338001251221, "learning_rate": 2.016210739614995e-06, "loss": 1.1977, "step": 199 }, { "epoch": 0.0506649778340722, "grad_norm": 6.3558878898620605, "learning_rate": 2.026342451874367e-06, "loss": 1.2491, "step": 200 }, { "epoch": 0.05091830272324256, "grad_norm": 5.370153427124023, "learning_rate": 2.0364741641337387e-06, "loss": 1.1747, "step": 201 }, { "epoch": 0.051171627612412916, "grad_norm": 5.352872371673584, "learning_rate": 2.0466058763931107e-06, "loss": 1.3069, "step": 202 }, { "epoch": 0.05142495250158328, "grad_norm": 5.849454402923584, "learning_rate": 2.0567375886524823e-06, "loss": 1.3362, "step": 203 }, { "epoch": 0.05167827739075364, "grad_norm": 5.278831481933594, "learning_rate": 2.0668693009118543e-06, "loss": 1.1309, "step": 204 }, { "epoch": 0.051931602279924, "grad_norm": 5.236042022705078, "learning_rate": 2.0770010131712264e-06, "loss": 1.1772, "step": 205 }, { "epoch": 0.05218492716909436, "grad_norm": 5.872128963470459, "learning_rate": 2.087132725430598e-06, "loss": 1.2255, "step": 206 }, { "epoch": 0.052438252058264724, "grad_norm": 5.233270645141602, "learning_rate": 2.0972644376899696e-06, "loss": 1.2479, "step": 207 }, { "epoch": 0.052691576947435086, "grad_norm": 5.511275768280029, "learning_rate": 2.1073961499493417e-06, "loss": 1.1882, "step": 208 }, { "epoch": 0.05294490183660545, "grad_norm": 5.640159606933594, "learning_rate": 2.1175278622087133e-06, "loss": 1.4266, "step": 209 }, { "epoch": 0.05319822672577581, "grad_norm": 5.4359612464904785, "learning_rate": 2.1276595744680853e-06, "loss": 1.227, "step": 210 }, { "epoch": 0.05345155161494617, "grad_norm": 5.258604049682617, "learning_rate": 2.1377912867274573e-06, "loss": 1.1736, "step": 211 }, { "epoch": 0.05370487650411653, "grad_norm": 5.675787448883057, "learning_rate": 2.147922998986829e-06, "loss": 1.2261, "step": 212 }, { "epoch": 0.05395820139328689, "grad_norm": 5.490091800689697, "learning_rate": 2.158054711246201e-06, "loss": 1.1692, "step": 213 }, { "epoch": 0.05421152628245725, "grad_norm": 5.569422721862793, "learning_rate": 2.1681864235055726e-06, "loss": 1.2708, "step": 214 }, { "epoch": 0.05446485117162761, "grad_norm": 6.177036762237549, "learning_rate": 2.1783181357649442e-06, "loss": 1.2441, "step": 215 }, { "epoch": 0.05471817606079797, "grad_norm": 4.792664051055908, "learning_rate": 2.1884498480243163e-06, "loss": 1.1072, "step": 216 }, { "epoch": 0.05497150094996833, "grad_norm": 5.102123260498047, "learning_rate": 2.1985815602836883e-06, "loss": 1.1383, "step": 217 }, { "epoch": 0.055224825839138694, "grad_norm": 5.349294662475586, "learning_rate": 2.20871327254306e-06, "loss": 1.0587, "step": 218 }, { "epoch": 0.055478150728309056, "grad_norm": 5.529597759246826, "learning_rate": 2.218844984802432e-06, "loss": 1.2363, "step": 219 }, { "epoch": 0.05573147561747942, "grad_norm": 5.755967140197754, "learning_rate": 2.2289766970618036e-06, "loss": 1.3302, "step": 220 }, { "epoch": 0.05598480050664978, "grad_norm": 5.315429210662842, "learning_rate": 2.2391084093211756e-06, "loss": 1.2632, "step": 221 }, { "epoch": 0.05623812539582014, "grad_norm": 5.7810139656066895, "learning_rate": 2.2492401215805472e-06, "loss": 1.1679, "step": 222 }, { "epoch": 0.0564914502849905, "grad_norm": 5.621176719665527, "learning_rate": 2.2593718338399193e-06, "loss": 1.2638, "step": 223 }, { "epoch": 0.056744775174160864, "grad_norm": 5.352179527282715, "learning_rate": 2.269503546099291e-06, "loss": 1.2045, "step": 224 }, { "epoch": 0.056998100063331225, "grad_norm": 5.482887268066406, "learning_rate": 2.279635258358663e-06, "loss": 1.0832, "step": 225 }, { "epoch": 0.05725142495250159, "grad_norm": 5.484334468841553, "learning_rate": 2.289766970618035e-06, "loss": 1.1747, "step": 226 }, { "epoch": 0.05750474984167194, "grad_norm": 5.4562554359436035, "learning_rate": 2.2998986828774066e-06, "loss": 1.2621, "step": 227 }, { "epoch": 0.0577580747308423, "grad_norm": 5.370246410369873, "learning_rate": 2.310030395136778e-06, "loss": 1.055, "step": 228 }, { "epoch": 0.058011399620012664, "grad_norm": 5.205504417419434, "learning_rate": 2.3201621073961502e-06, "loss": 1.1595, "step": 229 }, { "epoch": 0.058264724509183026, "grad_norm": 5.665552616119385, "learning_rate": 2.330293819655522e-06, "loss": 1.2979, "step": 230 }, { "epoch": 0.05851804939835339, "grad_norm": 5.3679070472717285, "learning_rate": 2.340425531914894e-06, "loss": 1.2985, "step": 231 }, { "epoch": 0.05877137428752375, "grad_norm": 5.475890636444092, "learning_rate": 2.3505572441742655e-06, "loss": 1.1784, "step": 232 }, { "epoch": 0.05902469917669411, "grad_norm": 5.7248992919921875, "learning_rate": 2.3606889564336375e-06, "loss": 1.2104, "step": 233 }, { "epoch": 0.05927802406586447, "grad_norm": 5.338497638702393, "learning_rate": 2.3708206686930096e-06, "loss": 1.1068, "step": 234 }, { "epoch": 0.059531348955034834, "grad_norm": 5.562758445739746, "learning_rate": 2.380952380952381e-06, "loss": 1.2252, "step": 235 }, { "epoch": 0.059784673844205195, "grad_norm": 5.872779846191406, "learning_rate": 2.391084093211753e-06, "loss": 1.1181, "step": 236 }, { "epoch": 0.06003799873337556, "grad_norm": 5.6264214515686035, "learning_rate": 2.401215805471125e-06, "loss": 1.3164, "step": 237 }, { "epoch": 0.06029132362254592, "grad_norm": 5.5919647216796875, "learning_rate": 2.4113475177304965e-06, "loss": 1.1989, "step": 238 }, { "epoch": 0.06054464851171627, "grad_norm": 5.198634147644043, "learning_rate": 2.4214792299898685e-06, "loss": 1.2422, "step": 239 }, { "epoch": 0.060797973400886635, "grad_norm": 5.159615993499756, "learning_rate": 2.4316109422492405e-06, "loss": 1.1807, "step": 240 }, { "epoch": 0.061051298290056996, "grad_norm": 5.649820804595947, "learning_rate": 2.441742654508612e-06, "loss": 1.1369, "step": 241 }, { "epoch": 0.06130462317922736, "grad_norm": 5.47420597076416, "learning_rate": 2.4518743667679838e-06, "loss": 1.304, "step": 242 }, { "epoch": 0.06155794806839772, "grad_norm": 4.905952453613281, "learning_rate": 2.462006079027356e-06, "loss": 1.1568, "step": 243 }, { "epoch": 0.06181127295756808, "grad_norm": 5.942564964294434, "learning_rate": 2.4721377912867274e-06, "loss": 1.272, "step": 244 }, { "epoch": 0.06206459784673844, "grad_norm": 6.035305976867676, "learning_rate": 2.4822695035460995e-06, "loss": 1.2711, "step": 245 }, { "epoch": 0.062317922735908804, "grad_norm": 5.5649800300598145, "learning_rate": 2.4924012158054715e-06, "loss": 1.2617, "step": 246 }, { "epoch": 0.06257124762507917, "grad_norm": 5.223843097686768, "learning_rate": 2.502532928064843e-06, "loss": 1.2616, "step": 247 }, { "epoch": 0.06282457251424953, "grad_norm": 5.219488620758057, "learning_rate": 2.512664640324215e-06, "loss": 1.162, "step": 248 }, { "epoch": 0.06307789740341989, "grad_norm": 5.6271867752075195, "learning_rate": 2.5227963525835868e-06, "loss": 1.2603, "step": 249 }, { "epoch": 0.06333122229259025, "grad_norm": 5.215453147888184, "learning_rate": 2.532928064842959e-06, "loss": 1.1556, "step": 250 }, { "epoch": 0.06358454718176061, "grad_norm": 5.587162494659424, "learning_rate": 2.543059777102331e-06, "loss": 1.2228, "step": 251 }, { "epoch": 0.06383787207093097, "grad_norm": 5.448244571685791, "learning_rate": 2.553191489361702e-06, "loss": 1.1525, "step": 252 }, { "epoch": 0.06409119696010133, "grad_norm": 6.127096176147461, "learning_rate": 2.563323201621074e-06, "loss": 1.1575, "step": 253 }, { "epoch": 0.0643445218492717, "grad_norm": 4.879481315612793, "learning_rate": 2.573454913880446e-06, "loss": 1.1375, "step": 254 }, { "epoch": 0.06459784673844206, "grad_norm": 5.284853458404541, "learning_rate": 2.5835866261398177e-06, "loss": 1.1636, "step": 255 }, { "epoch": 0.06485117162761242, "grad_norm": 4.960007190704346, "learning_rate": 2.5937183383991898e-06, "loss": 1.1259, "step": 256 }, { "epoch": 0.06510449651678277, "grad_norm": 5.490287780761719, "learning_rate": 2.603850050658562e-06, "loss": 1.2555, "step": 257 }, { "epoch": 0.06535782140595313, "grad_norm": 5.298451900482178, "learning_rate": 2.613981762917933e-06, "loss": 1.3183, "step": 258 }, { "epoch": 0.06561114629512349, "grad_norm": 4.963212966918945, "learning_rate": 2.624113475177305e-06, "loss": 1.1287, "step": 259 }, { "epoch": 0.06586447118429385, "grad_norm": 5.298976421356201, "learning_rate": 2.634245187436677e-06, "loss": 1.1672, "step": 260 }, { "epoch": 0.06611779607346421, "grad_norm": 5.273107528686523, "learning_rate": 2.644376899696049e-06, "loss": 1.1491, "step": 261 }, { "epoch": 0.06637112096263457, "grad_norm": 5.450850486755371, "learning_rate": 2.6545086119554207e-06, "loss": 1.2121, "step": 262 }, { "epoch": 0.06662444585180494, "grad_norm": 5.025203704833984, "learning_rate": 2.6646403242147928e-06, "loss": 1.1708, "step": 263 }, { "epoch": 0.0668777707409753, "grad_norm": 5.214092254638672, "learning_rate": 2.6747720364741644e-06, "loss": 1.2275, "step": 264 }, { "epoch": 0.06713109563014566, "grad_norm": 5.152961254119873, "learning_rate": 2.684903748733536e-06, "loss": 1.2722, "step": 265 }, { "epoch": 0.06738442051931602, "grad_norm": 5.896986961364746, "learning_rate": 2.695035460992908e-06, "loss": 1.3384, "step": 266 }, { "epoch": 0.06763774540848638, "grad_norm": 5.6102614402771, "learning_rate": 2.70516717325228e-06, "loss": 1.2057, "step": 267 }, { "epoch": 0.06789107029765674, "grad_norm": 5.330872535705566, "learning_rate": 2.7152988855116517e-06, "loss": 1.1099, "step": 268 }, { "epoch": 0.0681443951868271, "grad_norm": 5.673343181610107, "learning_rate": 2.7254305977710233e-06, "loss": 1.2957, "step": 269 }, { "epoch": 0.06839772007599747, "grad_norm": 5.127655506134033, "learning_rate": 2.7355623100303953e-06, "loss": 1.1429, "step": 270 }, { "epoch": 0.06865104496516783, "grad_norm": 5.601057052612305, "learning_rate": 2.745694022289767e-06, "loss": 1.1527, "step": 271 }, { "epoch": 0.06890436985433819, "grad_norm": 5.111176013946533, "learning_rate": 2.755825734549139e-06, "loss": 1.1284, "step": 272 }, { "epoch": 0.06915769474350855, "grad_norm": 5.472817897796631, "learning_rate": 2.765957446808511e-06, "loss": 1.3043, "step": 273 }, { "epoch": 0.06941101963267891, "grad_norm": 5.623200416564941, "learning_rate": 2.776089159067883e-06, "loss": 1.2232, "step": 274 }, { "epoch": 0.06966434452184928, "grad_norm": 5.1391825675964355, "learning_rate": 2.7862208713272543e-06, "loss": 1.1516, "step": 275 }, { "epoch": 0.06991766941101964, "grad_norm": 4.69110631942749, "learning_rate": 2.7963525835866263e-06, "loss": 1.1348, "step": 276 }, { "epoch": 0.07017099430019, "grad_norm": 4.934825420379639, "learning_rate": 2.8064842958459983e-06, "loss": 1.1664, "step": 277 }, { "epoch": 0.07042431918936036, "grad_norm": 5.43660306930542, "learning_rate": 2.81661600810537e-06, "loss": 1.2981, "step": 278 }, { "epoch": 0.07067764407853072, "grad_norm": 5.765442848205566, "learning_rate": 2.826747720364742e-06, "loss": 1.2113, "step": 279 }, { "epoch": 0.07093096896770108, "grad_norm": 5.242639064788818, "learning_rate": 2.836879432624114e-06, "loss": 1.2121, "step": 280 }, { "epoch": 0.07118429385687144, "grad_norm": 5.404294967651367, "learning_rate": 2.8470111448834852e-06, "loss": 1.287, "step": 281 }, { "epoch": 0.07143761874604179, "grad_norm": 5.30703592300415, "learning_rate": 2.8571428571428573e-06, "loss": 1.1924, "step": 282 }, { "epoch": 0.07169094363521215, "grad_norm": 5.358706474304199, "learning_rate": 2.8672745694022293e-06, "loss": 1.2447, "step": 283 }, { "epoch": 0.07194426852438252, "grad_norm": 5.951683521270752, "learning_rate": 2.877406281661601e-06, "loss": 1.2574, "step": 284 }, { "epoch": 0.07219759341355288, "grad_norm": 4.923393726348877, "learning_rate": 2.887537993920973e-06, "loss": 1.0977, "step": 285 }, { "epoch": 0.07245091830272324, "grad_norm": 5.036745071411133, "learning_rate": 2.897669706180345e-06, "loss": 1.2233, "step": 286 }, { "epoch": 0.0727042431918936, "grad_norm": 5.286535739898682, "learning_rate": 2.907801418439716e-06, "loss": 1.194, "step": 287 }, { "epoch": 0.07295756808106396, "grad_norm": 5.941610336303711, "learning_rate": 2.9179331306990882e-06, "loss": 1.3264, "step": 288 }, { "epoch": 0.07321089297023432, "grad_norm": 5.5159687995910645, "learning_rate": 2.9280648429584603e-06, "loss": 1.2171, "step": 289 }, { "epoch": 0.07346421785940468, "grad_norm": 5.988364219665527, "learning_rate": 2.9381965552178323e-06, "loss": 1.2531, "step": 290 }, { "epoch": 0.07371754274857505, "grad_norm": 5.247437477111816, "learning_rate": 2.948328267477204e-06, "loss": 1.189, "step": 291 }, { "epoch": 0.07397086763774541, "grad_norm": 5.554168224334717, "learning_rate": 2.958459979736576e-06, "loss": 1.2524, "step": 292 }, { "epoch": 0.07422419252691577, "grad_norm": 5.522302627563477, "learning_rate": 2.968591691995947e-06, "loss": 1.3294, "step": 293 }, { "epoch": 0.07447751741608613, "grad_norm": 4.99888277053833, "learning_rate": 2.978723404255319e-06, "loss": 1.1604, "step": 294 }, { "epoch": 0.07473084230525649, "grad_norm": 6.059742450714111, "learning_rate": 2.9888551165146912e-06, "loss": 1.2363, "step": 295 }, { "epoch": 0.07498416719442685, "grad_norm": 5.118231296539307, "learning_rate": 2.9989868287740633e-06, "loss": 1.0948, "step": 296 }, { "epoch": 0.07523749208359722, "grad_norm": 5.535048007965088, "learning_rate": 3.009118541033435e-06, "loss": 1.1973, "step": 297 }, { "epoch": 0.07549081697276758, "grad_norm": 5.824736595153809, "learning_rate": 3.019250253292807e-06, "loss": 1.1542, "step": 298 }, { "epoch": 0.07574414186193794, "grad_norm": 5.241152763366699, "learning_rate": 3.0293819655521785e-06, "loss": 1.2698, "step": 299 }, { "epoch": 0.0759974667511083, "grad_norm": 5.363473415374756, "learning_rate": 3.03951367781155e-06, "loss": 1.1991, "step": 300 }, { "epoch": 0.07625079164027866, "grad_norm": 5.177186965942383, "learning_rate": 3.049645390070922e-06, "loss": 1.1214, "step": 301 }, { "epoch": 0.07650411652944902, "grad_norm": 5.408219814300537, "learning_rate": 3.0597771023302942e-06, "loss": 1.364, "step": 302 }, { "epoch": 0.07675744141861938, "grad_norm": 5.7646636962890625, "learning_rate": 3.0699088145896663e-06, "loss": 1.2886, "step": 303 }, { "epoch": 0.07701076630778975, "grad_norm": 5.605578422546387, "learning_rate": 3.080040526849038e-06, "loss": 1.2563, "step": 304 }, { "epoch": 0.07726409119696011, "grad_norm": 5.21433162689209, "learning_rate": 3.0901722391084095e-06, "loss": 1.3145, "step": 305 }, { "epoch": 0.07751741608613046, "grad_norm": 5.218530654907227, "learning_rate": 3.100303951367781e-06, "loss": 1.2366, "step": 306 }, { "epoch": 0.07777074097530082, "grad_norm": 5.189089775085449, "learning_rate": 3.110435663627153e-06, "loss": 1.2856, "step": 307 }, { "epoch": 0.07802406586447118, "grad_norm": 5.267763614654541, "learning_rate": 3.120567375886525e-06, "loss": 1.2607, "step": 308 }, { "epoch": 0.07827739075364154, "grad_norm": 5.631710052490234, "learning_rate": 3.1306990881458972e-06, "loss": 1.2722, "step": 309 }, { "epoch": 0.0785307156428119, "grad_norm": 5.194464683532715, "learning_rate": 3.140830800405269e-06, "loss": 1.3005, "step": 310 }, { "epoch": 0.07878404053198226, "grad_norm": 4.978764057159424, "learning_rate": 3.1509625126646404e-06, "loss": 1.1632, "step": 311 }, { "epoch": 0.07903736542115262, "grad_norm": 5.242401123046875, "learning_rate": 3.1610942249240125e-06, "loss": 1.1535, "step": 312 }, { "epoch": 0.07929069031032299, "grad_norm": 5.89813232421875, "learning_rate": 3.171225937183384e-06, "loss": 1.2232, "step": 313 }, { "epoch": 0.07954401519949335, "grad_norm": 5.654772758483887, "learning_rate": 3.181357649442756e-06, "loss": 1.2795, "step": 314 }, { "epoch": 0.07979734008866371, "grad_norm": 5.303825855255127, "learning_rate": 3.191489361702128e-06, "loss": 1.1862, "step": 315 }, { "epoch": 0.08005066497783407, "grad_norm": 5.2518439292907715, "learning_rate": 3.2016210739615e-06, "loss": 1.2725, "step": 316 }, { "epoch": 0.08030398986700443, "grad_norm": 5.576358795166016, "learning_rate": 3.2117527862208714e-06, "loss": 1.2813, "step": 317 }, { "epoch": 0.0805573147561748, "grad_norm": 5.416296482086182, "learning_rate": 3.2218844984802434e-06, "loss": 1.1842, "step": 318 }, { "epoch": 0.08081063964534516, "grad_norm": 5.248586654663086, "learning_rate": 3.232016210739615e-06, "loss": 1.1256, "step": 319 }, { "epoch": 0.08106396453451552, "grad_norm": 5.1331658363342285, "learning_rate": 3.242147922998987e-06, "loss": 1.2788, "step": 320 }, { "epoch": 0.08131728942368588, "grad_norm": 5.369128704071045, "learning_rate": 3.252279635258359e-06, "loss": 1.1909, "step": 321 }, { "epoch": 0.08157061431285624, "grad_norm": 5.515130996704102, "learning_rate": 3.262411347517731e-06, "loss": 1.2855, "step": 322 }, { "epoch": 0.0818239392020266, "grad_norm": 4.981941223144531, "learning_rate": 3.2725430597771024e-06, "loss": 1.13, "step": 323 }, { "epoch": 0.08207726409119696, "grad_norm": 6.185157775878906, "learning_rate": 3.2826747720364744e-06, "loss": 1.3237, "step": 324 }, { "epoch": 0.08233058898036733, "grad_norm": 5.413832187652588, "learning_rate": 3.2928064842958464e-06, "loss": 1.335, "step": 325 }, { "epoch": 0.08258391386953769, "grad_norm": 5.234766960144043, "learning_rate": 3.302938196555218e-06, "loss": 1.0887, "step": 326 }, { "epoch": 0.08283723875870805, "grad_norm": 5.378023624420166, "learning_rate": 3.31306990881459e-06, "loss": 1.2683, "step": 327 }, { "epoch": 0.08309056364787841, "grad_norm": 5.207670211791992, "learning_rate": 3.323201621073962e-06, "loss": 1.2492, "step": 328 }, { "epoch": 0.08334388853704877, "grad_norm": 4.918520450592041, "learning_rate": 3.3333333333333333e-06, "loss": 1.1408, "step": 329 }, { "epoch": 0.08359721342621912, "grad_norm": 5.375702857971191, "learning_rate": 3.3434650455927054e-06, "loss": 1.2153, "step": 330 }, { "epoch": 0.08385053831538948, "grad_norm": 5.3048996925354, "learning_rate": 3.3535967578520774e-06, "loss": 1.3736, "step": 331 }, { "epoch": 0.08410386320455984, "grad_norm": 5.028907775878906, "learning_rate": 3.363728470111449e-06, "loss": 1.1762, "step": 332 }, { "epoch": 0.0843571880937302, "grad_norm": 4.838732719421387, "learning_rate": 3.373860182370821e-06, "loss": 1.1368, "step": 333 }, { "epoch": 0.08461051298290057, "grad_norm": 5.1180853843688965, "learning_rate": 3.3839918946301927e-06, "loss": 1.1149, "step": 334 }, { "epoch": 0.08486383787207093, "grad_norm": 5.510826587677002, "learning_rate": 3.3941236068895643e-06, "loss": 1.3176, "step": 335 }, { "epoch": 0.08511716276124129, "grad_norm": 4.998798370361328, "learning_rate": 3.4042553191489363e-06, "loss": 1.1184, "step": 336 }, { "epoch": 0.08537048765041165, "grad_norm": 5.48846960067749, "learning_rate": 3.4143870314083084e-06, "loss": 1.2034, "step": 337 }, { "epoch": 0.08562381253958201, "grad_norm": 5.412283897399902, "learning_rate": 3.4245187436676804e-06, "loss": 1.2323, "step": 338 }, { "epoch": 0.08587713742875237, "grad_norm": 5.218188285827637, "learning_rate": 3.434650455927052e-06, "loss": 1.2015, "step": 339 }, { "epoch": 0.08613046231792273, "grad_norm": 5.164620876312256, "learning_rate": 3.4447821681864236e-06, "loss": 1.2658, "step": 340 }, { "epoch": 0.0863837872070931, "grad_norm": 5.300448894500732, "learning_rate": 3.4549138804457952e-06, "loss": 1.1716, "step": 341 }, { "epoch": 0.08663711209626346, "grad_norm": 5.105301380157471, "learning_rate": 3.4650455927051673e-06, "loss": 1.1445, "step": 342 }, { "epoch": 0.08689043698543382, "grad_norm": 5.227696895599365, "learning_rate": 3.4751773049645393e-06, "loss": 1.1638, "step": 343 }, { "epoch": 0.08714376187460418, "grad_norm": 5.406198024749756, "learning_rate": 3.4853090172239114e-06, "loss": 1.2622, "step": 344 }, { "epoch": 0.08739708676377454, "grad_norm": 5.080615043640137, "learning_rate": 3.495440729483283e-06, "loss": 1.1559, "step": 345 }, { "epoch": 0.0876504116529449, "grad_norm": 5.109445571899414, "learning_rate": 3.5055724417426546e-06, "loss": 1.2238, "step": 346 }, { "epoch": 0.08790373654211527, "grad_norm": 5.225170612335205, "learning_rate": 3.5157041540020266e-06, "loss": 1.2987, "step": 347 }, { "epoch": 0.08815706143128563, "grad_norm": 4.769128322601318, "learning_rate": 3.5258358662613982e-06, "loss": 1.2457, "step": 348 }, { "epoch": 0.08841038632045599, "grad_norm": 5.18264102935791, "learning_rate": 3.5359675785207703e-06, "loss": 1.1111, "step": 349 }, { "epoch": 0.08866371120962635, "grad_norm": 5.246507167816162, "learning_rate": 3.5460992907801423e-06, "loss": 1.1872, "step": 350 }, { "epoch": 0.08891703609879671, "grad_norm": 4.926772117614746, "learning_rate": 3.5562310030395144e-06, "loss": 1.1972, "step": 351 }, { "epoch": 0.08917036098796707, "grad_norm": 5.447096347808838, "learning_rate": 3.5663627152988856e-06, "loss": 1.2149, "step": 352 }, { "epoch": 0.08942368587713743, "grad_norm": 4.652470588684082, "learning_rate": 3.5764944275582576e-06, "loss": 1.1225, "step": 353 }, { "epoch": 0.0896770107663078, "grad_norm": 5.64431619644165, "learning_rate": 3.586626139817629e-06, "loss": 1.2261, "step": 354 }, { "epoch": 0.08993033565547814, "grad_norm": 5.396925449371338, "learning_rate": 3.5967578520770012e-06, "loss": 1.1252, "step": 355 }, { "epoch": 0.0901836605446485, "grad_norm": 5.296283721923828, "learning_rate": 3.6068895643363733e-06, "loss": 1.2206, "step": 356 }, { "epoch": 0.09043698543381887, "grad_norm": 5.024763584136963, "learning_rate": 3.6170212765957453e-06, "loss": 1.203, "step": 357 }, { "epoch": 0.09069031032298923, "grad_norm": 5.703789710998535, "learning_rate": 3.6271529888551165e-06, "loss": 1.2007, "step": 358 }, { "epoch": 0.09094363521215959, "grad_norm": 5.510667324066162, "learning_rate": 3.6372847011144885e-06, "loss": 1.1985, "step": 359 }, { "epoch": 0.09119696010132995, "grad_norm": 5.609925746917725, "learning_rate": 3.6474164133738606e-06, "loss": 1.2416, "step": 360 }, { "epoch": 0.09145028499050031, "grad_norm": 4.885375022888184, "learning_rate": 3.657548125633232e-06, "loss": 1.3185, "step": 361 }, { "epoch": 0.09170360987967068, "grad_norm": 4.889640808105469, "learning_rate": 3.6676798378926042e-06, "loss": 1.099, "step": 362 }, { "epoch": 0.09195693476884104, "grad_norm": 5.009072303771973, "learning_rate": 3.6778115501519763e-06, "loss": 1.1495, "step": 363 }, { "epoch": 0.0922102596580114, "grad_norm": 4.96414852142334, "learning_rate": 3.6879432624113475e-06, "loss": 1.2632, "step": 364 }, { "epoch": 0.09246358454718176, "grad_norm": 5.6016316413879395, "learning_rate": 3.6980749746707195e-06, "loss": 1.2534, "step": 365 }, { "epoch": 0.09271690943635212, "grad_norm": 4.959722995758057, "learning_rate": 3.7082066869300915e-06, "loss": 1.186, "step": 366 }, { "epoch": 0.09297023432552248, "grad_norm": 5.125227928161621, "learning_rate": 3.7183383991894636e-06, "loss": 1.0711, "step": 367 }, { "epoch": 0.09322355921469284, "grad_norm": 5.24466609954834, "learning_rate": 3.728470111448835e-06, "loss": 1.1699, "step": 368 }, { "epoch": 0.0934768841038632, "grad_norm": 5.310903549194336, "learning_rate": 3.7386018237082072e-06, "loss": 1.2407, "step": 369 }, { "epoch": 0.09373020899303357, "grad_norm": 5.320084095001221, "learning_rate": 3.7487335359675784e-06, "loss": 1.3521, "step": 370 }, { "epoch": 0.09398353388220393, "grad_norm": 5.608123302459717, "learning_rate": 3.7588652482269505e-06, "loss": 1.195, "step": 371 }, { "epoch": 0.09423685877137429, "grad_norm": 5.201747417449951, "learning_rate": 3.7689969604863225e-06, "loss": 1.0786, "step": 372 }, { "epoch": 0.09449018366054465, "grad_norm": 5.231518268585205, "learning_rate": 3.7791286727456945e-06, "loss": 1.2605, "step": 373 }, { "epoch": 0.09474350854971501, "grad_norm": 5.920846462249756, "learning_rate": 3.789260385005066e-06, "loss": 1.3388, "step": 374 }, { "epoch": 0.09499683343888538, "grad_norm": 5.163887977600098, "learning_rate": 3.799392097264438e-06, "loss": 1.1228, "step": 375 }, { "epoch": 0.09525015832805574, "grad_norm": 5.717247486114502, "learning_rate": 3.80952380952381e-06, "loss": 1.2795, "step": 376 }, { "epoch": 0.0955034832172261, "grad_norm": 4.618260383605957, "learning_rate": 3.819655521783182e-06, "loss": 1.1681, "step": 377 }, { "epoch": 0.09575680810639646, "grad_norm": 5.07727575302124, "learning_rate": 3.8297872340425535e-06, "loss": 1.1068, "step": 378 }, { "epoch": 0.09601013299556681, "grad_norm": 5.215041637420654, "learning_rate": 3.839918946301925e-06, "loss": 1.2376, "step": 379 }, { "epoch": 0.09626345788473717, "grad_norm": 5.052102088928223, "learning_rate": 3.8500506585612975e-06, "loss": 1.2343, "step": 380 }, { "epoch": 0.09651678277390753, "grad_norm": 4.981046199798584, "learning_rate": 3.860182370820669e-06, "loss": 1.318, "step": 381 }, { "epoch": 0.09677010766307789, "grad_norm": 5.665030002593994, "learning_rate": 3.870314083080041e-06, "loss": 1.2604, "step": 382 }, { "epoch": 0.09702343255224825, "grad_norm": 5.199818134307861, "learning_rate": 3.880445795339412e-06, "loss": 1.2613, "step": 383 }, { "epoch": 0.09727675744141862, "grad_norm": 5.118903636932373, "learning_rate": 3.890577507598785e-06, "loss": 1.2008, "step": 384 }, { "epoch": 0.09753008233058898, "grad_norm": 4.857881546020508, "learning_rate": 3.9007092198581565e-06, "loss": 1.2139, "step": 385 }, { "epoch": 0.09778340721975934, "grad_norm": 4.874960899353027, "learning_rate": 3.910840932117528e-06, "loss": 1.1296, "step": 386 }, { "epoch": 0.0980367321089297, "grad_norm": 5.047555446624756, "learning_rate": 3.9209726443769005e-06, "loss": 1.1817, "step": 387 }, { "epoch": 0.09829005699810006, "grad_norm": 4.445738315582275, "learning_rate": 3.931104356636271e-06, "loss": 1.1105, "step": 388 }, { "epoch": 0.09854338188727042, "grad_norm": 5.266610145568848, "learning_rate": 3.941236068895644e-06, "loss": 1.2458, "step": 389 }, { "epoch": 0.09879670677644078, "grad_norm": 5.520458221435547, "learning_rate": 3.951367781155015e-06, "loss": 1.2126, "step": 390 }, { "epoch": 0.09905003166561115, "grad_norm": 5.264195919036865, "learning_rate": 3.961499493414388e-06, "loss": 1.2394, "step": 391 }, { "epoch": 0.09930335655478151, "grad_norm": 5.44072961807251, "learning_rate": 3.9716312056737595e-06, "loss": 1.3318, "step": 392 }, { "epoch": 0.09955668144395187, "grad_norm": 4.864696502685547, "learning_rate": 3.981762917933131e-06, "loss": 1.1867, "step": 393 }, { "epoch": 0.09981000633312223, "grad_norm": 4.9540114402771, "learning_rate": 3.991894630192503e-06, "loss": 1.1448, "step": 394 }, { "epoch": 0.10006333122229259, "grad_norm": 5.423478603363037, "learning_rate": 4.002026342451874e-06, "loss": 1.2762, "step": 395 }, { "epoch": 0.10031665611146295, "grad_norm": 5.948858261108398, "learning_rate": 4.012158054711247e-06, "loss": 1.3089, "step": 396 }, { "epoch": 0.10056998100063332, "grad_norm": 4.995451927185059, "learning_rate": 4.022289766970618e-06, "loss": 1.2065, "step": 397 }, { "epoch": 0.10082330588980368, "grad_norm": 5.340811729431152, "learning_rate": 4.03242147922999e-06, "loss": 1.1683, "step": 398 }, { "epoch": 0.10107663077897404, "grad_norm": 5.5295491218566895, "learning_rate": 4.042553191489362e-06, "loss": 1.267, "step": 399 }, { "epoch": 0.1013299556681444, "grad_norm": 5.184334754943848, "learning_rate": 4.052684903748734e-06, "loss": 1.1513, "step": 400 }, { "epoch": 0.10158328055731476, "grad_norm": 5.154574394226074, "learning_rate": 4.062816616008106e-06, "loss": 1.1991, "step": 401 }, { "epoch": 0.10183660544648512, "grad_norm": 5.186733722686768, "learning_rate": 4.072948328267477e-06, "loss": 1.2177, "step": 402 }, { "epoch": 0.10208993033565548, "grad_norm": 5.594045639038086, "learning_rate": 4.08308004052685e-06, "loss": 1.2582, "step": 403 }, { "epoch": 0.10234325522482583, "grad_norm": 4.924523830413818, "learning_rate": 4.093211752786221e-06, "loss": 1.237, "step": 404 }, { "epoch": 0.1025965801139962, "grad_norm": 5.1376543045043945, "learning_rate": 4.103343465045593e-06, "loss": 1.155, "step": 405 }, { "epoch": 0.10284990500316656, "grad_norm": 4.898098468780518, "learning_rate": 4.113475177304965e-06, "loss": 1.1889, "step": 406 }, { "epoch": 0.10310322989233692, "grad_norm": 5.024801731109619, "learning_rate": 4.123606889564336e-06, "loss": 1.1944, "step": 407 }, { "epoch": 0.10335655478150728, "grad_norm": 5.272379398345947, "learning_rate": 4.133738601823709e-06, "loss": 1.2796, "step": 408 }, { "epoch": 0.10360987967067764, "grad_norm": 5.209236145019531, "learning_rate": 4.14387031408308e-06, "loss": 1.1897, "step": 409 }, { "epoch": 0.103863204559848, "grad_norm": 4.9806976318359375, "learning_rate": 4.154002026342453e-06, "loss": 1.2955, "step": 410 }, { "epoch": 0.10411652944901836, "grad_norm": 5.147138595581055, "learning_rate": 4.1641337386018235e-06, "loss": 1.1794, "step": 411 }, { "epoch": 0.10436985433818873, "grad_norm": 5.406728744506836, "learning_rate": 4.174265450861196e-06, "loss": 1.3, "step": 412 }, { "epoch": 0.10462317922735909, "grad_norm": 5.133518218994141, "learning_rate": 4.184397163120568e-06, "loss": 1.3578, "step": 413 }, { "epoch": 0.10487650411652945, "grad_norm": 5.1981520652771, "learning_rate": 4.194528875379939e-06, "loss": 1.4429, "step": 414 }, { "epoch": 0.10512982900569981, "grad_norm": 4.696353435516357, "learning_rate": 4.204660587639312e-06, "loss": 1.1034, "step": 415 }, { "epoch": 0.10538315389487017, "grad_norm": 5.248478889465332, "learning_rate": 4.214792299898683e-06, "loss": 1.3058, "step": 416 }, { "epoch": 0.10563647878404053, "grad_norm": 4.99880313873291, "learning_rate": 4.224924012158055e-06, "loss": 1.1162, "step": 417 }, { "epoch": 0.1058898036732109, "grad_norm": 5.2948455810546875, "learning_rate": 4.2350557244174265e-06, "loss": 1.323, "step": 418 }, { "epoch": 0.10614312856238126, "grad_norm": 5.046769142150879, "learning_rate": 4.245187436676799e-06, "loss": 1.2691, "step": 419 }, { "epoch": 0.10639645345155162, "grad_norm": 5.314305305480957, "learning_rate": 4.255319148936171e-06, "loss": 1.1455, "step": 420 }, { "epoch": 0.10664977834072198, "grad_norm": 5.6942853927612305, "learning_rate": 4.265450861195542e-06, "loss": 1.3361, "step": 421 }, { "epoch": 0.10690310322989234, "grad_norm": 5.832979202270508, "learning_rate": 4.275582573454915e-06, "loss": 1.4814, "step": 422 }, { "epoch": 0.1071564281190627, "grad_norm": 5.128951549530029, "learning_rate": 4.2857142857142855e-06, "loss": 1.2869, "step": 423 }, { "epoch": 0.10740975300823306, "grad_norm": 5.437411785125732, "learning_rate": 4.295845997973658e-06, "loss": 1.2294, "step": 424 }, { "epoch": 0.10766307789740343, "grad_norm": 5.274136543273926, "learning_rate": 4.3059777102330295e-06, "loss": 1.1951, "step": 425 }, { "epoch": 0.10791640278657379, "grad_norm": 5.109431266784668, "learning_rate": 4.316109422492402e-06, "loss": 1.2014, "step": 426 }, { "epoch": 0.10816972767574415, "grad_norm": 5.105831146240234, "learning_rate": 4.326241134751774e-06, "loss": 1.2486, "step": 427 }, { "epoch": 0.1084230525649145, "grad_norm": 5.709077835083008, "learning_rate": 4.336372847011145e-06, "loss": 1.2318, "step": 428 }, { "epoch": 0.10867637745408486, "grad_norm": 4.6664934158325195, "learning_rate": 4.346504559270517e-06, "loss": 1.1364, "step": 429 }, { "epoch": 0.10892970234325522, "grad_norm": 5.61269998550415, "learning_rate": 4.3566362715298885e-06, "loss": 1.2759, "step": 430 }, { "epoch": 0.10918302723242558, "grad_norm": 5.311194896697998, "learning_rate": 4.366767983789261e-06, "loss": 1.2994, "step": 431 }, { "epoch": 0.10943635212159594, "grad_norm": 5.4426679611206055, "learning_rate": 4.3768996960486325e-06, "loss": 1.2654, "step": 432 }, { "epoch": 0.1096896770107663, "grad_norm": 5.604968070983887, "learning_rate": 4.387031408308004e-06, "loss": 1.283, "step": 433 }, { "epoch": 0.10994300189993667, "grad_norm": 4.886499404907227, "learning_rate": 4.397163120567377e-06, "loss": 1.2903, "step": 434 }, { "epoch": 0.11019632678910703, "grad_norm": 5.554519176483154, "learning_rate": 4.407294832826748e-06, "loss": 1.2279, "step": 435 }, { "epoch": 0.11044965167827739, "grad_norm": 4.756959915161133, "learning_rate": 4.41742654508612e-06, "loss": 1.158, "step": 436 }, { "epoch": 0.11070297656744775, "grad_norm": 5.010580539703369, "learning_rate": 4.4275582573454915e-06, "loss": 1.3596, "step": 437 }, { "epoch": 0.11095630145661811, "grad_norm": 5.439083099365234, "learning_rate": 4.437689969604864e-06, "loss": 1.2645, "step": 438 }, { "epoch": 0.11120962634578847, "grad_norm": 5.501040935516357, "learning_rate": 4.4478216818642355e-06, "loss": 1.2239, "step": 439 }, { "epoch": 0.11146295123495883, "grad_norm": 5.120853900909424, "learning_rate": 4.457953394123607e-06, "loss": 1.2607, "step": 440 }, { "epoch": 0.1117162761241292, "grad_norm": 4.933994293212891, "learning_rate": 4.468085106382979e-06, "loss": 1.187, "step": 441 }, { "epoch": 0.11196960101329956, "grad_norm": 4.760913372039795, "learning_rate": 4.478216818642351e-06, "loss": 1.0609, "step": 442 }, { "epoch": 0.11222292590246992, "grad_norm": 4.908841133117676, "learning_rate": 4.488348530901723e-06, "loss": 1.0957, "step": 443 }, { "epoch": 0.11247625079164028, "grad_norm": 5.275254726409912, "learning_rate": 4.4984802431610945e-06, "loss": 1.0603, "step": 444 }, { "epoch": 0.11272957568081064, "grad_norm": 5.005535125732422, "learning_rate": 4.508611955420467e-06, "loss": 1.1966, "step": 445 }, { "epoch": 0.112982900569981, "grad_norm": 5.133699893951416, "learning_rate": 4.5187436676798385e-06, "loss": 1.2276, "step": 446 }, { "epoch": 0.11323622545915137, "grad_norm": 5.156227111816406, "learning_rate": 4.52887537993921e-06, "loss": 1.0842, "step": 447 }, { "epoch": 0.11348955034832173, "grad_norm": 5.140113830566406, "learning_rate": 4.539007092198582e-06, "loss": 1.212, "step": 448 }, { "epoch": 0.11374287523749209, "grad_norm": 5.005239009857178, "learning_rate": 4.549138804457953e-06, "loss": 1.1829, "step": 449 }, { "epoch": 0.11399620012666245, "grad_norm": 4.534379959106445, "learning_rate": 4.559270516717326e-06, "loss": 1.015, "step": 450 }, { "epoch": 0.11424952501583281, "grad_norm": 4.960865497589111, "learning_rate": 4.5694022289766975e-06, "loss": 1.0726, "step": 451 }, { "epoch": 0.11450284990500317, "grad_norm": 5.407089710235596, "learning_rate": 4.57953394123607e-06, "loss": 1.2036, "step": 452 }, { "epoch": 0.11475617479417352, "grad_norm": 4.970057487487793, "learning_rate": 4.589665653495441e-06, "loss": 1.2275, "step": 453 }, { "epoch": 0.11500949968334388, "grad_norm": 5.319904327392578, "learning_rate": 4.599797365754813e-06, "loss": 1.2653, "step": 454 }, { "epoch": 0.11526282457251424, "grad_norm": 5.534214496612549, "learning_rate": 4.609929078014185e-06, "loss": 1.2502, "step": 455 }, { "epoch": 0.1155161494616846, "grad_norm": 5.3505425453186035, "learning_rate": 4.620060790273556e-06, "loss": 1.3581, "step": 456 }, { "epoch": 0.11576947435085497, "grad_norm": 5.140420436859131, "learning_rate": 4.630192502532929e-06, "loss": 1.2164, "step": 457 }, { "epoch": 0.11602279924002533, "grad_norm": 5.232957363128662, "learning_rate": 4.6403242147923005e-06, "loss": 1.2348, "step": 458 }, { "epoch": 0.11627612412919569, "grad_norm": 5.0088324546813965, "learning_rate": 4.650455927051672e-06, "loss": 1.202, "step": 459 }, { "epoch": 0.11652944901836605, "grad_norm": 5.049102306365967, "learning_rate": 4.660587639311044e-06, "loss": 1.3339, "step": 460 }, { "epoch": 0.11678277390753641, "grad_norm": 5.3588995933532715, "learning_rate": 4.670719351570416e-06, "loss": 1.2878, "step": 461 }, { "epoch": 0.11703609879670678, "grad_norm": 4.652127265930176, "learning_rate": 4.680851063829788e-06, "loss": 1.1328, "step": 462 }, { "epoch": 0.11728942368587714, "grad_norm": 5.1378278732299805, "learning_rate": 4.690982776089159e-06, "loss": 1.2494, "step": 463 }, { "epoch": 0.1175427485750475, "grad_norm": 5.077396392822266, "learning_rate": 4.701114488348531e-06, "loss": 1.244, "step": 464 }, { "epoch": 0.11779607346421786, "grad_norm": 4.885287761688232, "learning_rate": 4.711246200607903e-06, "loss": 1.045, "step": 465 }, { "epoch": 0.11804939835338822, "grad_norm": 4.663662433624268, "learning_rate": 4.721377912867275e-06, "loss": 1.1788, "step": 466 }, { "epoch": 0.11830272324255858, "grad_norm": 5.223139762878418, "learning_rate": 4.731509625126647e-06, "loss": 1.2393, "step": 467 }, { "epoch": 0.11855604813172894, "grad_norm": 4.848125457763672, "learning_rate": 4.741641337386019e-06, "loss": 1.1446, "step": 468 }, { "epoch": 0.1188093730208993, "grad_norm": 5.275095462799072, "learning_rate": 4.751773049645391e-06, "loss": 1.2942, "step": 469 }, { "epoch": 0.11906269791006967, "grad_norm": 4.845182418823242, "learning_rate": 4.761904761904762e-06, "loss": 1.2158, "step": 470 }, { "epoch": 0.11931602279924003, "grad_norm": 5.734543800354004, "learning_rate": 4.772036474164134e-06, "loss": 1.3147, "step": 471 }, { "epoch": 0.11956934768841039, "grad_norm": 5.24599027633667, "learning_rate": 4.782168186423506e-06, "loss": 1.2151, "step": 472 }, { "epoch": 0.11982267257758075, "grad_norm": 4.8684163093566895, "learning_rate": 4.792299898682878e-06, "loss": 1.1434, "step": 473 }, { "epoch": 0.12007599746675111, "grad_norm": 4.637145042419434, "learning_rate": 4.80243161094225e-06, "loss": 1.1748, "step": 474 }, { "epoch": 0.12032932235592148, "grad_norm": 4.772747993469238, "learning_rate": 4.812563323201621e-06, "loss": 1.1442, "step": 475 }, { "epoch": 0.12058264724509184, "grad_norm": 4.968971252441406, "learning_rate": 4.822695035460993e-06, "loss": 1.2552, "step": 476 }, { "epoch": 0.12083597213426218, "grad_norm": 5.498548984527588, "learning_rate": 4.832826747720365e-06, "loss": 1.1324, "step": 477 }, { "epoch": 0.12108929702343255, "grad_norm": 5.052052974700928, "learning_rate": 4.842958459979737e-06, "loss": 1.2966, "step": 478 }, { "epoch": 0.12134262191260291, "grad_norm": 5.156652450561523, "learning_rate": 4.853090172239109e-06, "loss": 1.1934, "step": 479 }, { "epoch": 0.12159594680177327, "grad_norm": 5.284748554229736, "learning_rate": 4.863221884498481e-06, "loss": 1.2785, "step": 480 }, { "epoch": 0.12184927169094363, "grad_norm": 4.996010780334473, "learning_rate": 4.873353596757853e-06, "loss": 1.1932, "step": 481 }, { "epoch": 0.12210259658011399, "grad_norm": 5.440031051635742, "learning_rate": 4.883485309017224e-06, "loss": 1.2987, "step": 482 }, { "epoch": 0.12235592146928435, "grad_norm": 5.136556148529053, "learning_rate": 4.893617021276596e-06, "loss": 1.0797, "step": 483 }, { "epoch": 0.12260924635845472, "grad_norm": 5.024312496185303, "learning_rate": 4.9037487335359675e-06, "loss": 1.1816, "step": 484 }, { "epoch": 0.12286257124762508, "grad_norm": 5.054494380950928, "learning_rate": 4.91388044579534e-06, "loss": 1.3098, "step": 485 }, { "epoch": 0.12311589613679544, "grad_norm": 5.291783332824707, "learning_rate": 4.924012158054712e-06, "loss": 1.2519, "step": 486 }, { "epoch": 0.1233692210259658, "grad_norm": 5.203293800354004, "learning_rate": 4.934143870314084e-06, "loss": 1.2508, "step": 487 }, { "epoch": 0.12362254591513616, "grad_norm": 4.995335578918457, "learning_rate": 4.944275582573455e-06, "loss": 1.1623, "step": 488 }, { "epoch": 0.12387587080430652, "grad_norm": 4.85042667388916, "learning_rate": 4.954407294832827e-06, "loss": 1.2747, "step": 489 }, { "epoch": 0.12412919569347688, "grad_norm": 5.630333423614502, "learning_rate": 4.964539007092199e-06, "loss": 1.3355, "step": 490 }, { "epoch": 0.12438252058264725, "grad_norm": 5.215515613555908, "learning_rate": 4.9746707193515705e-06, "loss": 1.1913, "step": 491 }, { "epoch": 0.12463584547181761, "grad_norm": 4.697793006896973, "learning_rate": 4.984802431610943e-06, "loss": 1.1055, "step": 492 }, { "epoch": 0.12488917036098797, "grad_norm": 5.270546913146973, "learning_rate": 4.994934143870315e-06, "loss": 1.1722, "step": 493 }, { "epoch": 0.12514249525015833, "grad_norm": 5.559477806091309, "learning_rate": 5.005065856129686e-06, "loss": 1.2904, "step": 494 }, { "epoch": 0.12539582013932868, "grad_norm": 4.968261241912842, "learning_rate": 5.015197568389059e-06, "loss": 1.1808, "step": 495 }, { "epoch": 0.12564914502849905, "grad_norm": 5.030310153961182, "learning_rate": 5.02532928064843e-06, "loss": 1.2484, "step": 496 }, { "epoch": 0.1259024699176694, "grad_norm": 4.8509840965271, "learning_rate": 5.035460992907801e-06, "loss": 1.3076, "step": 497 }, { "epoch": 0.12615579480683978, "grad_norm": 4.936345100402832, "learning_rate": 5.0455927051671735e-06, "loss": 1.1693, "step": 498 }, { "epoch": 0.12640911969601012, "grad_norm": 5.1634392738342285, "learning_rate": 5.055724417426545e-06, "loss": 1.2937, "step": 499 }, { "epoch": 0.1266624445851805, "grad_norm": 4.734792709350586, "learning_rate": 5.065856129685918e-06, "loss": 1.1153, "step": 500 }, { "epoch": 0.1266624445851805, "eval_loss": 1.2462952136993408, "eval_runtime": 12.2985, "eval_samples_per_second": 32.524, "eval_steps_per_second": 4.066, "step": 500 }, { "epoch": 0.12691576947435085, "grad_norm": 5.271373271942139, "learning_rate": 5.075987841945289e-06, "loss": 1.1751, "step": 501 }, { "epoch": 0.12716909436352122, "grad_norm": 4.605556488037109, "learning_rate": 5.086119554204662e-06, "loss": 1.1828, "step": 502 }, { "epoch": 0.12742241925269157, "grad_norm": 4.6827311515808105, "learning_rate": 5.096251266464033e-06, "loss": 1.1332, "step": 503 }, { "epoch": 0.12767574414186195, "grad_norm": 6.322502613067627, "learning_rate": 5.106382978723404e-06, "loss": 1.1816, "step": 504 }, { "epoch": 0.1279290690310323, "grad_norm": 4.943698883056641, "learning_rate": 5.1165146909827765e-06, "loss": 1.1607, "step": 505 }, { "epoch": 0.12818239392020267, "grad_norm": 4.914384841918945, "learning_rate": 5.126646403242148e-06, "loss": 1.1669, "step": 506 }, { "epoch": 0.12843571880937302, "grad_norm": 5.10693359375, "learning_rate": 5.136778115501521e-06, "loss": 1.2683, "step": 507 }, { "epoch": 0.1286890436985434, "grad_norm": 5.151780128479004, "learning_rate": 5.146909827760892e-06, "loss": 1.2711, "step": 508 }, { "epoch": 0.12894236858771374, "grad_norm": 4.811371803283691, "learning_rate": 5.157041540020263e-06, "loss": 1.2546, "step": 509 }, { "epoch": 0.12919569347688412, "grad_norm": 4.895148277282715, "learning_rate": 5.1671732522796354e-06, "loss": 1.2519, "step": 510 }, { "epoch": 0.12944901836605446, "grad_norm": 4.593327522277832, "learning_rate": 5.177304964539007e-06, "loss": 1.2683, "step": 511 }, { "epoch": 0.12970234325522484, "grad_norm": 5.491243839263916, "learning_rate": 5.1874366767983795e-06, "loss": 1.3801, "step": 512 }, { "epoch": 0.1299556681443952, "grad_norm": 4.669014930725098, "learning_rate": 5.197568389057751e-06, "loss": 1.076, "step": 513 }, { "epoch": 0.13020899303356553, "grad_norm": 4.788550853729248, "learning_rate": 5.207700101317124e-06, "loss": 1.0858, "step": 514 }, { "epoch": 0.1304623179227359, "grad_norm": 5.297513008117676, "learning_rate": 5.217831813576495e-06, "loss": 1.263, "step": 515 }, { "epoch": 0.13071564281190626, "grad_norm": 4.7116923332214355, "learning_rate": 5.227963525835866e-06, "loss": 1.1378, "step": 516 }, { "epoch": 0.13096896770107663, "grad_norm": 4.830559730529785, "learning_rate": 5.2380952380952384e-06, "loss": 1.1793, "step": 517 }, { "epoch": 0.13122229259024698, "grad_norm": 4.668907165527344, "learning_rate": 5.24822695035461e-06, "loss": 1.1554, "step": 518 }, { "epoch": 0.13147561747941736, "grad_norm": 5.034953594207764, "learning_rate": 5.2583586626139825e-06, "loss": 1.2435, "step": 519 }, { "epoch": 0.1317289423685877, "grad_norm": 4.942272186279297, "learning_rate": 5.268490374873354e-06, "loss": 1.1792, "step": 520 }, { "epoch": 0.13198226725775808, "grad_norm": 4.644348621368408, "learning_rate": 5.278622087132726e-06, "loss": 1.2066, "step": 521 }, { "epoch": 0.13223559214692843, "grad_norm": 4.6449785232543945, "learning_rate": 5.288753799392098e-06, "loss": 1.1041, "step": 522 }, { "epoch": 0.1324889170360988, "grad_norm": 4.8624186515808105, "learning_rate": 5.298885511651469e-06, "loss": 1.2315, "step": 523 }, { "epoch": 0.13274224192526915, "grad_norm": 4.965691566467285, "learning_rate": 5.3090172239108414e-06, "loss": 1.3935, "step": 524 }, { "epoch": 0.13299556681443953, "grad_norm": 4.97011137008667, "learning_rate": 5.319148936170213e-06, "loss": 1.414, "step": 525 }, { "epoch": 0.13324889170360987, "grad_norm": 4.666852951049805, "learning_rate": 5.3292806484295855e-06, "loss": 1.1865, "step": 526 }, { "epoch": 0.13350221659278025, "grad_norm": 5.110204219818115, "learning_rate": 5.339412360688957e-06, "loss": 1.0862, "step": 527 }, { "epoch": 0.1337555414819506, "grad_norm": 5.102944850921631, "learning_rate": 5.349544072948329e-06, "loss": 1.2819, "step": 528 }, { "epoch": 0.13400886637112097, "grad_norm": 4.5785980224609375, "learning_rate": 5.359675785207701e-06, "loss": 1.2022, "step": 529 }, { "epoch": 0.13426219126029132, "grad_norm": 4.983090400695801, "learning_rate": 5.369807497467072e-06, "loss": 1.2425, "step": 530 }, { "epoch": 0.1345155161494617, "grad_norm": 5.571552753448486, "learning_rate": 5.3799392097264444e-06, "loss": 1.334, "step": 531 }, { "epoch": 0.13476884103863204, "grad_norm": 4.813221454620361, "learning_rate": 5.390070921985816e-06, "loss": 1.2187, "step": 532 }, { "epoch": 0.13502216592780242, "grad_norm": 4.895610809326172, "learning_rate": 5.400202634245188e-06, "loss": 1.1801, "step": 533 }, { "epoch": 0.13527549081697277, "grad_norm": 5.426584243774414, "learning_rate": 5.41033434650456e-06, "loss": 1.392, "step": 534 }, { "epoch": 0.13552881570614314, "grad_norm": 4.887606143951416, "learning_rate": 5.420466058763931e-06, "loss": 1.2473, "step": 535 }, { "epoch": 0.1357821405953135, "grad_norm": 5.3059401512146, "learning_rate": 5.430597771023303e-06, "loss": 1.2066, "step": 536 }, { "epoch": 0.13603546548448386, "grad_norm": 5.063421726226807, "learning_rate": 5.440729483282675e-06, "loss": 1.2607, "step": 537 }, { "epoch": 0.1362887903736542, "grad_norm": 5.2057294845581055, "learning_rate": 5.450861195542047e-06, "loss": 1.3135, "step": 538 }, { "epoch": 0.13654211526282456, "grad_norm": 5.15851354598999, "learning_rate": 5.460992907801419e-06, "loss": 1.3632, "step": 539 }, { "epoch": 0.13679544015199493, "grad_norm": 5.122459411621094, "learning_rate": 5.471124620060791e-06, "loss": 1.1665, "step": 540 }, { "epoch": 0.13704876504116528, "grad_norm": 5.20040225982666, "learning_rate": 5.481256332320163e-06, "loss": 1.2699, "step": 541 }, { "epoch": 0.13730208993033566, "grad_norm": 4.900663375854492, "learning_rate": 5.491388044579534e-06, "loss": 1.1101, "step": 542 }, { "epoch": 0.137555414819506, "grad_norm": 4.9732561111450195, "learning_rate": 5.501519756838906e-06, "loss": 1.2512, "step": 543 }, { "epoch": 0.13780873970867638, "grad_norm": 4.934910774230957, "learning_rate": 5.511651469098278e-06, "loss": 1.1755, "step": 544 }, { "epoch": 0.13806206459784673, "grad_norm": 4.943143367767334, "learning_rate": 5.52178318135765e-06, "loss": 1.2349, "step": 545 }, { "epoch": 0.1383153894870171, "grad_norm": 4.923962116241455, "learning_rate": 5.531914893617022e-06, "loss": 1.2429, "step": 546 }, { "epoch": 0.13856871437618745, "grad_norm": 5.372138500213623, "learning_rate": 5.542046605876394e-06, "loss": 1.2674, "step": 547 }, { "epoch": 0.13882203926535783, "grad_norm": 4.940601825714111, "learning_rate": 5.552178318135766e-06, "loss": 1.2771, "step": 548 }, { "epoch": 0.13907536415452817, "grad_norm": 4.944694995880127, "learning_rate": 5.562310030395137e-06, "loss": 1.2695, "step": 549 }, { "epoch": 0.13932868904369855, "grad_norm": 5.337344169616699, "learning_rate": 5.5724417426545085e-06, "loss": 1.1693, "step": 550 }, { "epoch": 0.1395820139328689, "grad_norm": 4.865754127502441, "learning_rate": 5.582573454913881e-06, "loss": 1.2209, "step": 551 }, { "epoch": 0.13983533882203927, "grad_norm": 4.5765228271484375, "learning_rate": 5.592705167173253e-06, "loss": 1.2566, "step": 552 }, { "epoch": 0.14008866371120962, "grad_norm": 5.803656101226807, "learning_rate": 5.602836879432625e-06, "loss": 1.4674, "step": 553 }, { "epoch": 0.14034198860038, "grad_norm": 4.550984859466553, "learning_rate": 5.612968591691997e-06, "loss": 1.1272, "step": 554 }, { "epoch": 0.14059531348955034, "grad_norm": 4.758320331573486, "learning_rate": 5.623100303951369e-06, "loss": 1.0811, "step": 555 }, { "epoch": 0.14084863837872072, "grad_norm": 4.948498725891113, "learning_rate": 5.63323201621074e-06, "loss": 1.2059, "step": 556 }, { "epoch": 0.14110196326789107, "grad_norm": 4.6559858322143555, "learning_rate": 5.6433637284701115e-06, "loss": 1.0973, "step": 557 }, { "epoch": 0.14135528815706144, "grad_norm": 4.768313407897949, "learning_rate": 5.653495440729484e-06, "loss": 1.2299, "step": 558 }, { "epoch": 0.1416086130462318, "grad_norm": 5.055200099945068, "learning_rate": 5.663627152988856e-06, "loss": 1.2204, "step": 559 }, { "epoch": 0.14186193793540217, "grad_norm": 5.64510440826416, "learning_rate": 5.673758865248228e-06, "loss": 1.3287, "step": 560 }, { "epoch": 0.1421152628245725, "grad_norm": 5.069836616516113, "learning_rate": 5.683890577507599e-06, "loss": 1.1542, "step": 561 }, { "epoch": 0.1423685877137429, "grad_norm": 4.7821478843688965, "learning_rate": 5.6940222897669704e-06, "loss": 1.3209, "step": 562 }, { "epoch": 0.14262191260291324, "grad_norm": 5.0057148933410645, "learning_rate": 5.704154002026343e-06, "loss": 1.1426, "step": 563 }, { "epoch": 0.14287523749208358, "grad_norm": 4.5058913230896, "learning_rate": 5.7142857142857145e-06, "loss": 1.1116, "step": 564 }, { "epoch": 0.14312856238125396, "grad_norm": 5.375939846038818, "learning_rate": 5.724417426545087e-06, "loss": 1.2233, "step": 565 }, { "epoch": 0.1433818872704243, "grad_norm": 5.549449443817139, "learning_rate": 5.734549138804459e-06, "loss": 1.2536, "step": 566 }, { "epoch": 0.14363521215959468, "grad_norm": 4.731637001037598, "learning_rate": 5.744680851063831e-06, "loss": 1.145, "step": 567 }, { "epoch": 0.14388853704876503, "grad_norm": 4.841800212860107, "learning_rate": 5.754812563323202e-06, "loss": 1.1289, "step": 568 }, { "epoch": 0.1441418619379354, "grad_norm": 4.759994029998779, "learning_rate": 5.7649442755825734e-06, "loss": 1.0788, "step": 569 }, { "epoch": 0.14439518682710575, "grad_norm": 5.397213459014893, "learning_rate": 5.775075987841946e-06, "loss": 1.4156, "step": 570 }, { "epoch": 0.14464851171627613, "grad_norm": 5.066045761108398, "learning_rate": 5.7852077001013175e-06, "loss": 1.2685, "step": 571 }, { "epoch": 0.14490183660544648, "grad_norm": 4.570275783538818, "learning_rate": 5.79533941236069e-06, "loss": 1.174, "step": 572 }, { "epoch": 0.14515516149461685, "grad_norm": 4.592137336730957, "learning_rate": 5.805471124620062e-06, "loss": 1.1032, "step": 573 }, { "epoch": 0.1454084863837872, "grad_norm": 4.847631454467773, "learning_rate": 5.815602836879432e-06, "loss": 1.2636, "step": 574 }, { "epoch": 0.14566181127295758, "grad_norm": 5.709230899810791, "learning_rate": 5.825734549138805e-06, "loss": 1.282, "step": 575 }, { "epoch": 0.14591513616212792, "grad_norm": 5.450544357299805, "learning_rate": 5.8358662613981764e-06, "loss": 1.1158, "step": 576 }, { "epoch": 0.1461684610512983, "grad_norm": 4.97231912612915, "learning_rate": 5.845997973657549e-06, "loss": 1.2709, "step": 577 }, { "epoch": 0.14642178594046865, "grad_norm": 4.959066867828369, "learning_rate": 5.8561296859169205e-06, "loss": 1.1452, "step": 578 }, { "epoch": 0.14667511082963902, "grad_norm": 5.879068374633789, "learning_rate": 5.866261398176293e-06, "loss": 1.386, "step": 579 }, { "epoch": 0.14692843571880937, "grad_norm": 5.1025710105896, "learning_rate": 5.876393110435665e-06, "loss": 1.269, "step": 580 }, { "epoch": 0.14718176060797974, "grad_norm": 5.202458381652832, "learning_rate": 5.886524822695035e-06, "loss": 1.2261, "step": 581 }, { "epoch": 0.1474350854971501, "grad_norm": 5.0866475105285645, "learning_rate": 5.896656534954408e-06, "loss": 1.2695, "step": 582 }, { "epoch": 0.14768841038632047, "grad_norm": 5.054940223693848, "learning_rate": 5.9067882472137794e-06, "loss": 1.2914, "step": 583 }, { "epoch": 0.14794173527549082, "grad_norm": 4.855292797088623, "learning_rate": 5.916919959473152e-06, "loss": 1.2319, "step": 584 }, { "epoch": 0.1481950601646612, "grad_norm": 4.923979759216309, "learning_rate": 5.9270516717325235e-06, "loss": 1.228, "step": 585 }, { "epoch": 0.14844838505383154, "grad_norm": 4.366090297698975, "learning_rate": 5.937183383991894e-06, "loss": 1.145, "step": 586 }, { "epoch": 0.1487017099430019, "grad_norm": 4.820800304412842, "learning_rate": 5.947315096251267e-06, "loss": 1.1171, "step": 587 }, { "epoch": 0.14895503483217226, "grad_norm": 5.232685089111328, "learning_rate": 5.957446808510638e-06, "loss": 1.1695, "step": 588 }, { "epoch": 0.1492083597213426, "grad_norm": 5.733992576599121, "learning_rate": 5.967578520770011e-06, "loss": 1.2805, "step": 589 }, { "epoch": 0.14946168461051298, "grad_norm": 5.02573299407959, "learning_rate": 5.9777102330293824e-06, "loss": 1.1743, "step": 590 }, { "epoch": 0.14971500949968333, "grad_norm": 4.9159626960754395, "learning_rate": 5.987841945288755e-06, "loss": 1.2444, "step": 591 }, { "epoch": 0.1499683343888537, "grad_norm": 4.857386112213135, "learning_rate": 5.9979736575481265e-06, "loss": 1.278, "step": 592 }, { "epoch": 0.15022165927802406, "grad_norm": 5.224244594573975, "learning_rate": 6.008105369807497e-06, "loss": 1.436, "step": 593 }, { "epoch": 0.15047498416719443, "grad_norm": 4.591535568237305, "learning_rate": 6.01823708206687e-06, "loss": 1.2383, "step": 594 }, { "epoch": 0.15072830905636478, "grad_norm": 5.028872489929199, "learning_rate": 6.028368794326241e-06, "loss": 1.3694, "step": 595 }, { "epoch": 0.15098163394553515, "grad_norm": 5.284378528594971, "learning_rate": 6.038500506585614e-06, "loss": 1.3386, "step": 596 }, { "epoch": 0.1512349588347055, "grad_norm": 4.551464557647705, "learning_rate": 6.0486322188449854e-06, "loss": 1.2189, "step": 597 }, { "epoch": 0.15148828372387588, "grad_norm": 5.2143778800964355, "learning_rate": 6.058763931104357e-06, "loss": 1.1644, "step": 598 }, { "epoch": 0.15174160861304622, "grad_norm": 4.9922099113464355, "learning_rate": 6.0688956433637295e-06, "loss": 1.298, "step": 599 }, { "epoch": 0.1519949335022166, "grad_norm": 5.037347316741943, "learning_rate": 6.0790273556231e-06, "loss": 1.348, "step": 600 }, { "epoch": 0.15224825839138695, "grad_norm": 4.79486608505249, "learning_rate": 6.089159067882473e-06, "loss": 1.242, "step": 601 }, { "epoch": 0.15250158328055732, "grad_norm": 4.395257949829102, "learning_rate": 6.099290780141844e-06, "loss": 1.1533, "step": 602 }, { "epoch": 0.15275490816972767, "grad_norm": 4.24887228012085, "learning_rate": 6.109422492401216e-06, "loss": 1.1706, "step": 603 }, { "epoch": 0.15300823305889805, "grad_norm": 4.749911308288574, "learning_rate": 6.1195542046605884e-06, "loss": 1.1852, "step": 604 }, { "epoch": 0.1532615579480684, "grad_norm": 5.326765537261963, "learning_rate": 6.12968591691996e-06, "loss": 1.2012, "step": 605 }, { "epoch": 0.15351488283723877, "grad_norm": 4.694608211517334, "learning_rate": 6.1398176291793325e-06, "loss": 1.1974, "step": 606 }, { "epoch": 0.15376820772640912, "grad_norm": 4.689748764038086, "learning_rate": 6.149949341438703e-06, "loss": 1.2694, "step": 607 }, { "epoch": 0.1540215326155795, "grad_norm": 4.834078788757324, "learning_rate": 6.160081053698076e-06, "loss": 1.2839, "step": 608 }, { "epoch": 0.15427485750474984, "grad_norm": 4.931235313415527, "learning_rate": 6.170212765957447e-06, "loss": 1.2335, "step": 609 }, { "epoch": 0.15452818239392022, "grad_norm": 4.4806060791015625, "learning_rate": 6.180344478216819e-06, "loss": 1.1619, "step": 610 }, { "epoch": 0.15478150728309056, "grad_norm": 4.908557891845703, "learning_rate": 6.1904761904761914e-06, "loss": 1.2526, "step": 611 }, { "epoch": 0.1550348321722609, "grad_norm": 4.620062828063965, "learning_rate": 6.200607902735562e-06, "loss": 1.2694, "step": 612 }, { "epoch": 0.1552881570614313, "grad_norm": 4.872439384460449, "learning_rate": 6.210739614994935e-06, "loss": 1.1727, "step": 613 }, { "epoch": 0.15554148195060163, "grad_norm": 4.710540294647217, "learning_rate": 6.220871327254306e-06, "loss": 1.1959, "step": 614 }, { "epoch": 0.155794806839772, "grad_norm": 4.609006881713867, "learning_rate": 6.231003039513678e-06, "loss": 1.2663, "step": 615 }, { "epoch": 0.15604813172894236, "grad_norm": 4.969557285308838, "learning_rate": 6.24113475177305e-06, "loss": 1.1245, "step": 616 }, { "epoch": 0.15630145661811273, "grad_norm": 4.4855055809021, "learning_rate": 6.251266464032422e-06, "loss": 1.2075, "step": 617 }, { "epoch": 0.15655478150728308, "grad_norm": 4.479387283325195, "learning_rate": 6.2613981762917944e-06, "loss": 1.254, "step": 618 }, { "epoch": 0.15680810639645346, "grad_norm": 4.780701637268066, "learning_rate": 6.271529888551165e-06, "loss": 1.2976, "step": 619 }, { "epoch": 0.1570614312856238, "grad_norm": 4.669686317443848, "learning_rate": 6.281661600810538e-06, "loss": 1.1554, "step": 620 }, { "epoch": 0.15731475617479418, "grad_norm": 4.6630330085754395, "learning_rate": 6.291793313069909e-06, "loss": 1.1868, "step": 621 }, { "epoch": 0.15756808106396453, "grad_norm": 4.786992073059082, "learning_rate": 6.301925025329281e-06, "loss": 1.1817, "step": 622 }, { "epoch": 0.1578214059531349, "grad_norm": 4.614963054656982, "learning_rate": 6.312056737588653e-06, "loss": 1.0693, "step": 623 }, { "epoch": 0.15807473084230525, "grad_norm": 4.794111728668213, "learning_rate": 6.322188449848025e-06, "loss": 1.2119, "step": 624 }, { "epoch": 0.15832805573147563, "grad_norm": 4.7772440910339355, "learning_rate": 6.3323201621073974e-06, "loss": 1.1036, "step": 625 }, { "epoch": 0.15858138062064597, "grad_norm": 4.855818748474121, "learning_rate": 6.342451874366768e-06, "loss": 1.2127, "step": 626 }, { "epoch": 0.15883470550981635, "grad_norm": 4.64150333404541, "learning_rate": 6.35258358662614e-06, "loss": 1.1563, "step": 627 }, { "epoch": 0.1590880303989867, "grad_norm": 5.098568439483643, "learning_rate": 6.362715298885512e-06, "loss": 1.2082, "step": 628 }, { "epoch": 0.15934135528815707, "grad_norm": 4.789144515991211, "learning_rate": 6.372847011144884e-06, "loss": 1.2472, "step": 629 }, { "epoch": 0.15959468017732742, "grad_norm": 4.904264450073242, "learning_rate": 6.382978723404256e-06, "loss": 1.3159, "step": 630 }, { "epoch": 0.1598480050664978, "grad_norm": 5.040409088134766, "learning_rate": 6.393110435663628e-06, "loss": 1.293, "step": 631 }, { "epoch": 0.16010132995566814, "grad_norm": 5.19047737121582, "learning_rate": 6.403242147923e-06, "loss": 1.258, "step": 632 }, { "epoch": 0.16035465484483852, "grad_norm": 4.936978816986084, "learning_rate": 6.413373860182371e-06, "loss": 1.3897, "step": 633 }, { "epoch": 0.16060797973400887, "grad_norm": 4.566588401794434, "learning_rate": 6.423505572441743e-06, "loss": 1.1514, "step": 634 }, { "epoch": 0.16086130462317924, "grad_norm": 4.738023281097412, "learning_rate": 6.433637284701115e-06, "loss": 1.1792, "step": 635 }, { "epoch": 0.1611146295123496, "grad_norm": 5.172008037567139, "learning_rate": 6.443768996960487e-06, "loss": 1.2696, "step": 636 }, { "epoch": 0.16136795440151994, "grad_norm": 4.78951358795166, "learning_rate": 6.453900709219859e-06, "loss": 1.487, "step": 637 }, { "epoch": 0.1616212792906903, "grad_norm": 4.4658989906311035, "learning_rate": 6.46403242147923e-06, "loss": 1.1498, "step": 638 }, { "epoch": 0.16187460417986066, "grad_norm": 4.382638454437256, "learning_rate": 6.474164133738602e-06, "loss": 1.1615, "step": 639 }, { "epoch": 0.16212792906903103, "grad_norm": 4.52036190032959, "learning_rate": 6.484295845997974e-06, "loss": 1.191, "step": 640 }, { "epoch": 0.16238125395820138, "grad_norm": 4.304661750793457, "learning_rate": 6.494427558257346e-06, "loss": 1.1075, "step": 641 }, { "epoch": 0.16263457884737176, "grad_norm": 4.753248691558838, "learning_rate": 6.504559270516718e-06, "loss": 1.2028, "step": 642 }, { "epoch": 0.1628879037365421, "grad_norm": 4.911106586456299, "learning_rate": 6.51469098277609e-06, "loss": 1.3859, "step": 643 }, { "epoch": 0.16314122862571248, "grad_norm": 5.116793155670166, "learning_rate": 6.524822695035462e-06, "loss": 1.1626, "step": 644 }, { "epoch": 0.16339455351488283, "grad_norm": 5.212765693664551, "learning_rate": 6.534954407294833e-06, "loss": 1.2837, "step": 645 }, { "epoch": 0.1636478784040532, "grad_norm": 4.747133731842041, "learning_rate": 6.545086119554205e-06, "loss": 1.2263, "step": 646 }, { "epoch": 0.16390120329322355, "grad_norm": 4.83426570892334, "learning_rate": 6.555217831813577e-06, "loss": 1.2202, "step": 647 }, { "epoch": 0.16415452818239393, "grad_norm": 4.844605922698975, "learning_rate": 6.565349544072949e-06, "loss": 1.1607, "step": 648 }, { "epoch": 0.16440785307156427, "grad_norm": 4.556807518005371, "learning_rate": 6.575481256332321e-06, "loss": 1.2303, "step": 649 }, { "epoch": 0.16466117796073465, "grad_norm": 4.369943618774414, "learning_rate": 6.585612968591693e-06, "loss": 1.1333, "step": 650 }, { "epoch": 0.164914502849905, "grad_norm": 4.572859287261963, "learning_rate": 6.595744680851064e-06, "loss": 1.1399, "step": 651 }, { "epoch": 0.16516782773907537, "grad_norm": 4.5565505027771, "learning_rate": 6.605876393110436e-06, "loss": 1.2068, "step": 652 }, { "epoch": 0.16542115262824572, "grad_norm": 4.9332170486450195, "learning_rate": 6.616008105369808e-06, "loss": 1.4343, "step": 653 }, { "epoch": 0.1656744775174161, "grad_norm": 5.112438201904297, "learning_rate": 6.62613981762918e-06, "loss": 1.3254, "step": 654 }, { "epoch": 0.16592780240658644, "grad_norm": 5.191864967346191, "learning_rate": 6.636271529888552e-06, "loss": 1.3029, "step": 655 }, { "epoch": 0.16618112729575682, "grad_norm": 4.797086715698242, "learning_rate": 6.646403242147924e-06, "loss": 1.2374, "step": 656 }, { "epoch": 0.16643445218492717, "grad_norm": 5.420797348022461, "learning_rate": 6.656534954407296e-06, "loss": 1.2674, "step": 657 }, { "epoch": 0.16668777707409754, "grad_norm": 5.221513271331787, "learning_rate": 6.666666666666667e-06, "loss": 1.1778, "step": 658 }, { "epoch": 0.1669411019632679, "grad_norm": 4.723294258117676, "learning_rate": 6.676798378926039e-06, "loss": 1.2055, "step": 659 }, { "epoch": 0.16719442685243824, "grad_norm": 5.161945819854736, "learning_rate": 6.686930091185411e-06, "loss": 1.3098, "step": 660 }, { "epoch": 0.1674477517416086, "grad_norm": 4.428056716918945, "learning_rate": 6.697061803444783e-06, "loss": 1.1297, "step": 661 }, { "epoch": 0.16770107663077896, "grad_norm": 4.702414035797119, "learning_rate": 6.707193515704155e-06, "loss": 1.1794, "step": 662 }, { "epoch": 0.16795440151994934, "grad_norm": 4.583940505981445, "learning_rate": 6.7173252279635256e-06, "loss": 1.1729, "step": 663 }, { "epoch": 0.16820772640911968, "grad_norm": 5.682275295257568, "learning_rate": 6.727456940222898e-06, "loss": 1.2655, "step": 664 }, { "epoch": 0.16846105129829006, "grad_norm": 4.875946521759033, "learning_rate": 6.73758865248227e-06, "loss": 1.1625, "step": 665 }, { "epoch": 0.1687143761874604, "grad_norm": 4.4593281745910645, "learning_rate": 6.747720364741642e-06, "loss": 1.1104, "step": 666 }, { "epoch": 0.16896770107663078, "grad_norm": 5.123931407928467, "learning_rate": 6.757852077001014e-06, "loss": 1.3114, "step": 667 }, { "epoch": 0.16922102596580113, "grad_norm": 4.7207255363464355, "learning_rate": 6.767983789260385e-06, "loss": 1.184, "step": 668 }, { "epoch": 0.1694743508549715, "grad_norm": 4.69482946395874, "learning_rate": 6.778115501519758e-06, "loss": 1.1978, "step": 669 }, { "epoch": 0.16972767574414185, "grad_norm": 4.450202941894531, "learning_rate": 6.7882472137791286e-06, "loss": 1.1371, "step": 670 }, { "epoch": 0.16998100063331223, "grad_norm": 5.538049221038818, "learning_rate": 6.798378926038501e-06, "loss": 1.2752, "step": 671 }, { "epoch": 0.17023432552248258, "grad_norm": 4.678607940673828, "learning_rate": 6.808510638297873e-06, "loss": 1.305, "step": 672 }, { "epoch": 0.17048765041165295, "grad_norm": 4.505133152008057, "learning_rate": 6.818642350557245e-06, "loss": 1.1406, "step": 673 }, { "epoch": 0.1707409753008233, "grad_norm": 5.682215213775635, "learning_rate": 6.828774062816617e-06, "loss": 1.3239, "step": 674 }, { "epoch": 0.17099430018999368, "grad_norm": 4.628654479980469, "learning_rate": 6.838905775075988e-06, "loss": 1.0611, "step": 675 }, { "epoch": 0.17124762507916402, "grad_norm": 4.638597011566162, "learning_rate": 6.849037487335361e-06, "loss": 1.3282, "step": 676 }, { "epoch": 0.1715009499683344, "grad_norm": 4.467402458190918, "learning_rate": 6.8591691995947316e-06, "loss": 1.2017, "step": 677 }, { "epoch": 0.17175427485750475, "grad_norm": 4.895129203796387, "learning_rate": 6.869300911854104e-06, "loss": 1.2265, "step": 678 }, { "epoch": 0.17200759974667512, "grad_norm": 4.73948860168457, "learning_rate": 6.879432624113476e-06, "loss": 1.1968, "step": 679 }, { "epoch": 0.17226092463584547, "grad_norm": 5.32872200012207, "learning_rate": 6.889564336372847e-06, "loss": 1.2781, "step": 680 }, { "epoch": 0.17251424952501584, "grad_norm": 4.710712909698486, "learning_rate": 6.89969604863222e-06, "loss": 1.2644, "step": 681 }, { "epoch": 0.1727675744141862, "grad_norm": 5.227484226226807, "learning_rate": 6.9098277608915905e-06, "loss": 1.3654, "step": 682 }, { "epoch": 0.17302089930335657, "grad_norm": 4.663438320159912, "learning_rate": 6.919959473150964e-06, "loss": 1.1535, "step": 683 }, { "epoch": 0.17327422419252692, "grad_norm": 4.927851676940918, "learning_rate": 6.9300911854103346e-06, "loss": 1.2334, "step": 684 }, { "epoch": 0.17352754908169726, "grad_norm": 4.664185047149658, "learning_rate": 6.940222897669707e-06, "loss": 1.3305, "step": 685 }, { "epoch": 0.17378087397086764, "grad_norm": 4.821979999542236, "learning_rate": 6.950354609929079e-06, "loss": 1.0947, "step": 686 }, { "epoch": 0.174034198860038, "grad_norm": 4.539811611175537, "learning_rate": 6.96048632218845e-06, "loss": 1.216, "step": 687 }, { "epoch": 0.17428752374920836, "grad_norm": 5.37496280670166, "learning_rate": 6.970618034447823e-06, "loss": 1.3172, "step": 688 }, { "epoch": 0.1745408486383787, "grad_norm": 4.168117523193359, "learning_rate": 6.9807497467071935e-06, "loss": 1.0461, "step": 689 }, { "epoch": 0.17479417352754908, "grad_norm": 4.736009120941162, "learning_rate": 6.990881458966566e-06, "loss": 1.2247, "step": 690 }, { "epoch": 0.17504749841671943, "grad_norm": 4.574060440063477, "learning_rate": 7.0010131712259376e-06, "loss": 1.1369, "step": 691 }, { "epoch": 0.1753008233058898, "grad_norm": 4.718704700469971, "learning_rate": 7.011144883485309e-06, "loss": 1.255, "step": 692 }, { "epoch": 0.17555414819506016, "grad_norm": 4.60386848449707, "learning_rate": 7.021276595744682e-06, "loss": 1.2849, "step": 693 }, { "epoch": 0.17580747308423053, "grad_norm": 4.837037086486816, "learning_rate": 7.031408308004053e-06, "loss": 1.2205, "step": 694 }, { "epoch": 0.17606079797340088, "grad_norm": 4.803045272827148, "learning_rate": 7.041540020263426e-06, "loss": 1.1764, "step": 695 }, { "epoch": 0.17631412286257125, "grad_norm": 4.914492130279541, "learning_rate": 7.0516717325227965e-06, "loss": 1.2196, "step": 696 }, { "epoch": 0.1765674477517416, "grad_norm": 5.577901363372803, "learning_rate": 7.061803444782169e-06, "loss": 1.2251, "step": 697 }, { "epoch": 0.17682077264091198, "grad_norm": 4.781679630279541, "learning_rate": 7.0719351570415406e-06, "loss": 1.1626, "step": 698 }, { "epoch": 0.17707409753008233, "grad_norm": 4.7842535972595215, "learning_rate": 7.082066869300912e-06, "loss": 1.1781, "step": 699 }, { "epoch": 0.1773274224192527, "grad_norm": 4.552501201629639, "learning_rate": 7.092198581560285e-06, "loss": 1.1998, "step": 700 }, { "epoch": 0.17758074730842305, "grad_norm": 4.845871925354004, "learning_rate": 7.102330293819656e-06, "loss": 1.2929, "step": 701 }, { "epoch": 0.17783407219759342, "grad_norm": 4.504978179931641, "learning_rate": 7.112462006079029e-06, "loss": 1.0928, "step": 702 }, { "epoch": 0.17808739708676377, "grad_norm": 4.638769626617432, "learning_rate": 7.1225937183383995e-06, "loss": 1.207, "step": 703 }, { "epoch": 0.17834072197593415, "grad_norm": 4.376689434051514, "learning_rate": 7.132725430597771e-06, "loss": 1.2776, "step": 704 }, { "epoch": 0.1785940468651045, "grad_norm": 4.428815841674805, "learning_rate": 7.1428571428571436e-06, "loss": 1.2085, "step": 705 }, { "epoch": 0.17884737175427487, "grad_norm": 4.936357021331787, "learning_rate": 7.152988855116515e-06, "loss": 1.2298, "step": 706 }, { "epoch": 0.17910069664344522, "grad_norm": 4.868915557861328, "learning_rate": 7.163120567375888e-06, "loss": 1.2821, "step": 707 }, { "epoch": 0.1793540215326156, "grad_norm": 4.827498435974121, "learning_rate": 7.173252279635258e-06, "loss": 1.1198, "step": 708 }, { "epoch": 0.17960734642178594, "grad_norm": 4.9391984939575195, "learning_rate": 7.183383991894632e-06, "loss": 1.333, "step": 709 }, { "epoch": 0.1798606713109563, "grad_norm": 4.812231063842773, "learning_rate": 7.1935157041540025e-06, "loss": 1.3199, "step": 710 }, { "epoch": 0.18011399620012666, "grad_norm": 4.894856929779053, "learning_rate": 7.203647416413374e-06, "loss": 1.4395, "step": 711 }, { "epoch": 0.180367321089297, "grad_norm": 4.840929985046387, "learning_rate": 7.2137791286727466e-06, "loss": 1.1412, "step": 712 }, { "epoch": 0.1806206459784674, "grad_norm": 4.748245716094971, "learning_rate": 7.223910840932118e-06, "loss": 1.2242, "step": 713 }, { "epoch": 0.18087397086763773, "grad_norm": 4.636096000671387, "learning_rate": 7.234042553191491e-06, "loss": 1.3063, "step": 714 }, { "epoch": 0.1811272957568081, "grad_norm": 4.838342189788818, "learning_rate": 7.244174265450861e-06, "loss": 1.2305, "step": 715 }, { "epoch": 0.18138062064597846, "grad_norm": 4.387026786804199, "learning_rate": 7.254305977710233e-06, "loss": 1.2357, "step": 716 }, { "epoch": 0.18163394553514883, "grad_norm": 4.892704963684082, "learning_rate": 7.2644376899696055e-06, "loss": 1.2262, "step": 717 }, { "epoch": 0.18188727042431918, "grad_norm": 4.6830058097839355, "learning_rate": 7.274569402228977e-06, "loss": 1.1495, "step": 718 }, { "epoch": 0.18214059531348956, "grad_norm": 4.722542762756348, "learning_rate": 7.2847011144883496e-06, "loss": 1.2518, "step": 719 }, { "epoch": 0.1823939202026599, "grad_norm": 4.444228649139404, "learning_rate": 7.294832826747721e-06, "loss": 1.3131, "step": 720 }, { "epoch": 0.18264724509183028, "grad_norm": 4.4738335609436035, "learning_rate": 7.304964539007094e-06, "loss": 1.256, "step": 721 }, { "epoch": 0.18290056998100063, "grad_norm": 4.794192314147949, "learning_rate": 7.315096251266464e-06, "loss": 1.1494, "step": 722 }, { "epoch": 0.183153894870171, "grad_norm": 5.2765679359436035, "learning_rate": 7.325227963525836e-06, "loss": 1.1946, "step": 723 }, { "epoch": 0.18340721975934135, "grad_norm": 5.034933567047119, "learning_rate": 7.3353596757852085e-06, "loss": 1.2822, "step": 724 }, { "epoch": 0.18366054464851173, "grad_norm": 4.959056854248047, "learning_rate": 7.34549138804458e-06, "loss": 1.2693, "step": 725 }, { "epoch": 0.18391386953768207, "grad_norm": 5.423939228057861, "learning_rate": 7.3556231003039526e-06, "loss": 1.3931, "step": 726 }, { "epoch": 0.18416719442685245, "grad_norm": 4.752450466156006, "learning_rate": 7.365754812563324e-06, "loss": 1.3915, "step": 727 }, { "epoch": 0.1844205193160228, "grad_norm": 4.22570276260376, "learning_rate": 7.375886524822695e-06, "loss": 1.1261, "step": 728 }, { "epoch": 0.18467384420519317, "grad_norm": 4.663388729095459, "learning_rate": 7.386018237082067e-06, "loss": 1.1313, "step": 729 }, { "epoch": 0.18492716909436352, "grad_norm": 4.750311374664307, "learning_rate": 7.396149949341439e-06, "loss": 1.218, "step": 730 }, { "epoch": 0.1851804939835339, "grad_norm": 5.208932876586914, "learning_rate": 7.4062816616008115e-06, "loss": 1.5564, "step": 731 }, { "epoch": 0.18543381887270424, "grad_norm": 4.712618350982666, "learning_rate": 7.416413373860183e-06, "loss": 1.2767, "step": 732 }, { "epoch": 0.18568714376187462, "grad_norm": 5.186469078063965, "learning_rate": 7.426545086119554e-06, "loss": 1.1933, "step": 733 }, { "epoch": 0.18594046865104497, "grad_norm": 4.9151930809021, "learning_rate": 7.436676798378927e-06, "loss": 1.2096, "step": 734 }, { "epoch": 0.1861937935402153, "grad_norm": 4.630429744720459, "learning_rate": 7.446808510638298e-06, "loss": 1.1771, "step": 735 }, { "epoch": 0.1864471184293857, "grad_norm": 4.926432132720947, "learning_rate": 7.45694022289767e-06, "loss": 1.2877, "step": 736 }, { "epoch": 0.18670044331855604, "grad_norm": 4.804089546203613, "learning_rate": 7.467071935157042e-06, "loss": 1.2231, "step": 737 }, { "epoch": 0.1869537682077264, "grad_norm": 4.711433410644531, "learning_rate": 7.4772036474164145e-06, "loss": 1.3406, "step": 738 }, { "epoch": 0.18720709309689676, "grad_norm": 5.337911128997803, "learning_rate": 7.487335359675786e-06, "loss": 1.2785, "step": 739 }, { "epoch": 0.18746041798606713, "grad_norm": 4.779917240142822, "learning_rate": 7.497467071935157e-06, "loss": 1.2526, "step": 740 }, { "epoch": 0.18771374287523748, "grad_norm": 4.563136100769043, "learning_rate": 7.507598784194529e-06, "loss": 1.2333, "step": 741 }, { "epoch": 0.18796706776440786, "grad_norm": 4.497562408447266, "learning_rate": 7.517730496453901e-06, "loss": 1.1728, "step": 742 }, { "epoch": 0.1882203926535782, "grad_norm": 4.735845565795898, "learning_rate": 7.527862208713273e-06, "loss": 1.2258, "step": 743 }, { "epoch": 0.18847371754274858, "grad_norm": 4.666139125823975, "learning_rate": 7.537993920972645e-06, "loss": 1.2191, "step": 744 }, { "epoch": 0.18872704243191893, "grad_norm": 4.478661060333252, "learning_rate": 7.548125633232017e-06, "loss": 1.2205, "step": 745 }, { "epoch": 0.1889803673210893, "grad_norm": 4.495870113372803, "learning_rate": 7.558257345491389e-06, "loss": 1.0589, "step": 746 }, { "epoch": 0.18923369221025965, "grad_norm": 5.100306034088135, "learning_rate": 7.56838905775076e-06, "loss": 1.3409, "step": 747 }, { "epoch": 0.18948701709943003, "grad_norm": 4.780535697937012, "learning_rate": 7.578520770010132e-06, "loss": 1.275, "step": 748 }, { "epoch": 0.18974034198860038, "grad_norm": 4.6737189292907715, "learning_rate": 7.588652482269504e-06, "loss": 1.3216, "step": 749 }, { "epoch": 0.18999366687777075, "grad_norm": 4.600954055786133, "learning_rate": 7.598784194528876e-06, "loss": 1.1785, "step": 750 }, { "epoch": 0.1902469917669411, "grad_norm": 4.667325973510742, "learning_rate": 7.608915906788248e-06, "loss": 1.1652, "step": 751 }, { "epoch": 0.19050031665611147, "grad_norm": 4.5473856925964355, "learning_rate": 7.61904761904762e-06, "loss": 1.3336, "step": 752 }, { "epoch": 0.19075364154528182, "grad_norm": 5.475617408752441, "learning_rate": 7.629179331306992e-06, "loss": 1.3631, "step": 753 }, { "epoch": 0.1910069664344522, "grad_norm": 4.32639217376709, "learning_rate": 7.639311043566364e-06, "loss": 1.1886, "step": 754 }, { "epoch": 0.19126029132362254, "grad_norm": 4.698390007019043, "learning_rate": 7.649442755825735e-06, "loss": 1.2001, "step": 755 }, { "epoch": 0.19151361621279292, "grad_norm": 4.61825704574585, "learning_rate": 7.659574468085107e-06, "loss": 1.0986, "step": 756 }, { "epoch": 0.19176694110196327, "grad_norm": 4.3026628494262695, "learning_rate": 7.669706180344479e-06, "loss": 1.2281, "step": 757 }, { "epoch": 0.19202026599113362, "grad_norm": 4.374382972717285, "learning_rate": 7.67983789260385e-06, "loss": 1.1682, "step": 758 }, { "epoch": 0.192273590880304, "grad_norm": 4.806503772735596, "learning_rate": 7.689969604863222e-06, "loss": 1.2286, "step": 759 }, { "epoch": 0.19252691576947434, "grad_norm": 4.930648326873779, "learning_rate": 7.700101317122595e-06, "loss": 1.3067, "step": 760 }, { "epoch": 0.1927802406586447, "grad_norm": 6.1269307136535645, "learning_rate": 7.710233029381967e-06, "loss": 1.354, "step": 761 }, { "epoch": 0.19303356554781506, "grad_norm": 4.767146587371826, "learning_rate": 7.720364741641338e-06, "loss": 1.2168, "step": 762 }, { "epoch": 0.19328689043698544, "grad_norm": 4.531832695007324, "learning_rate": 7.73049645390071e-06, "loss": 1.2485, "step": 763 }, { "epoch": 0.19354021532615578, "grad_norm": 4.542140007019043, "learning_rate": 7.740628166160082e-06, "loss": 1.2629, "step": 764 }, { "epoch": 0.19379354021532616, "grad_norm": 4.538090229034424, "learning_rate": 7.750759878419453e-06, "loss": 1.2346, "step": 765 }, { "epoch": 0.1940468651044965, "grad_norm": 4.264867782592773, "learning_rate": 7.760891590678825e-06, "loss": 1.1366, "step": 766 }, { "epoch": 0.19430018999366688, "grad_norm": 4.4305739402771, "learning_rate": 7.771023302938198e-06, "loss": 1.1654, "step": 767 }, { "epoch": 0.19455351488283723, "grad_norm": 4.640016078948975, "learning_rate": 7.78115501519757e-06, "loss": 1.1595, "step": 768 }, { "epoch": 0.1948068397720076, "grad_norm": 4.859258651733398, "learning_rate": 7.79128672745694e-06, "loss": 1.2217, "step": 769 }, { "epoch": 0.19506016466117795, "grad_norm": 4.299515247344971, "learning_rate": 7.801418439716313e-06, "loss": 1.1171, "step": 770 }, { "epoch": 0.19531348955034833, "grad_norm": 4.902132511138916, "learning_rate": 7.811550151975685e-06, "loss": 1.2726, "step": 771 }, { "epoch": 0.19556681443951868, "grad_norm": 5.054393291473389, "learning_rate": 7.821681864235056e-06, "loss": 1.1047, "step": 772 }, { "epoch": 0.19582013932868905, "grad_norm": 4.509024143218994, "learning_rate": 7.831813576494428e-06, "loss": 1.1481, "step": 773 }, { "epoch": 0.1960734642178594, "grad_norm": 4.73517370223999, "learning_rate": 7.841945288753801e-06, "loss": 1.3705, "step": 774 }, { "epoch": 0.19632678910702978, "grad_norm": 4.514815330505371, "learning_rate": 7.852077001013173e-06, "loss": 1.3325, "step": 775 }, { "epoch": 0.19658011399620012, "grad_norm": 4.597283363342285, "learning_rate": 7.862208713272543e-06, "loss": 1.2341, "step": 776 }, { "epoch": 0.1968334388853705, "grad_norm": 4.144809246063232, "learning_rate": 7.872340425531916e-06, "loss": 1.1692, "step": 777 }, { "epoch": 0.19708676377454085, "grad_norm": 4.425267696380615, "learning_rate": 7.882472137791288e-06, "loss": 1.1706, "step": 778 }, { "epoch": 0.19734008866371122, "grad_norm": 4.391229629516602, "learning_rate": 7.89260385005066e-06, "loss": 1.2168, "step": 779 }, { "epoch": 0.19759341355288157, "grad_norm": 4.830233573913574, "learning_rate": 7.90273556231003e-06, "loss": 1.2752, "step": 780 }, { "epoch": 0.19784673844205194, "grad_norm": 4.596672534942627, "learning_rate": 7.912867274569402e-06, "loss": 1.2317, "step": 781 }, { "epoch": 0.1981000633312223, "grad_norm": 4.901708126068115, "learning_rate": 7.922998986828776e-06, "loss": 1.2566, "step": 782 }, { "epoch": 0.19835338822039264, "grad_norm": 4.467739582061768, "learning_rate": 7.933130699088146e-06, "loss": 1.1349, "step": 783 }, { "epoch": 0.19860671310956302, "grad_norm": 4.667692184448242, "learning_rate": 7.943262411347519e-06, "loss": 1.1607, "step": 784 }, { "epoch": 0.19886003799873336, "grad_norm": 4.813882350921631, "learning_rate": 7.95339412360689e-06, "loss": 1.3432, "step": 785 }, { "epoch": 0.19911336288790374, "grad_norm": 4.331610202789307, "learning_rate": 7.963525835866262e-06, "loss": 1.1571, "step": 786 }, { "epoch": 0.1993666877770741, "grad_norm": 4.513205528259277, "learning_rate": 7.973657548125634e-06, "loss": 1.3929, "step": 787 }, { "epoch": 0.19962001266624446, "grad_norm": 4.2903265953063965, "learning_rate": 7.983789260385005e-06, "loss": 1.3334, "step": 788 }, { "epoch": 0.1998733375554148, "grad_norm": 3.9981842041015625, "learning_rate": 7.993920972644377e-06, "loss": 1.0683, "step": 789 }, { "epoch": 0.20012666244458518, "grad_norm": 4.327489376068115, "learning_rate": 8.004052684903749e-06, "loss": 1.0201, "step": 790 }, { "epoch": 0.20037998733375553, "grad_norm": 4.737874984741211, "learning_rate": 8.014184397163122e-06, "loss": 1.4405, "step": 791 }, { "epoch": 0.2006333122229259, "grad_norm": 4.559230804443359, "learning_rate": 8.024316109422494e-06, "loss": 1.2087, "step": 792 }, { "epoch": 0.20088663711209626, "grad_norm": 4.605414390563965, "learning_rate": 8.034447821681865e-06, "loss": 1.2694, "step": 793 }, { "epoch": 0.20113996200126663, "grad_norm": 5.313296794891357, "learning_rate": 8.044579533941237e-06, "loss": 1.3344, "step": 794 }, { "epoch": 0.20139328689043698, "grad_norm": 5.075579643249512, "learning_rate": 8.054711246200608e-06, "loss": 1.361, "step": 795 }, { "epoch": 0.20164661177960735, "grad_norm": 4.876029968261719, "learning_rate": 8.06484295845998e-06, "loss": 1.2475, "step": 796 }, { "epoch": 0.2018999366687777, "grad_norm": 4.533029556274414, "learning_rate": 8.074974670719352e-06, "loss": 1.3967, "step": 797 }, { "epoch": 0.20215326155794808, "grad_norm": 4.693490028381348, "learning_rate": 8.085106382978723e-06, "loss": 1.2355, "step": 798 }, { "epoch": 0.20240658644711843, "grad_norm": 4.543484687805176, "learning_rate": 8.095238095238097e-06, "loss": 1.1849, "step": 799 }, { "epoch": 0.2026599113362888, "grad_norm": 4.704221248626709, "learning_rate": 8.105369807497468e-06, "loss": 1.2655, "step": 800 }, { "epoch": 0.20291323622545915, "grad_norm": 4.049461841583252, "learning_rate": 8.11550151975684e-06, "loss": 1.0146, "step": 801 }, { "epoch": 0.20316656111462952, "grad_norm": 4.67555046081543, "learning_rate": 8.125633232016211e-06, "loss": 1.1409, "step": 802 }, { "epoch": 0.20341988600379987, "grad_norm": 4.73737907409668, "learning_rate": 8.135764944275583e-06, "loss": 1.3214, "step": 803 }, { "epoch": 0.20367321089297025, "grad_norm": 4.849461078643799, "learning_rate": 8.145896656534955e-06, "loss": 1.3731, "step": 804 }, { "epoch": 0.2039265357821406, "grad_norm": 4.94274377822876, "learning_rate": 8.156028368794326e-06, "loss": 1.2043, "step": 805 }, { "epoch": 0.20417986067131097, "grad_norm": 4.574157238006592, "learning_rate": 8.1661600810537e-06, "loss": 1.2201, "step": 806 }, { "epoch": 0.20443318556048132, "grad_norm": 4.684918403625488, "learning_rate": 8.176291793313071e-06, "loss": 1.1782, "step": 807 }, { "epoch": 0.20468651044965167, "grad_norm": 5.050751209259033, "learning_rate": 8.186423505572443e-06, "loss": 1.1306, "step": 808 }, { "epoch": 0.20493983533882204, "grad_norm": 4.734405517578125, "learning_rate": 8.196555217831814e-06, "loss": 1.1486, "step": 809 }, { "epoch": 0.2051931602279924, "grad_norm": 4.575673580169678, "learning_rate": 8.206686930091186e-06, "loss": 1.3207, "step": 810 }, { "epoch": 0.20544648511716276, "grad_norm": 4.46920108795166, "learning_rate": 8.216818642350558e-06, "loss": 1.1536, "step": 811 }, { "epoch": 0.2056998100063331, "grad_norm": 4.92442512512207, "learning_rate": 8.22695035460993e-06, "loss": 1.2989, "step": 812 }, { "epoch": 0.2059531348955035, "grad_norm": 4.4767069816589355, "learning_rate": 8.237082066869303e-06, "loss": 1.182, "step": 813 }, { "epoch": 0.20620645978467383, "grad_norm": 4.701501369476318, "learning_rate": 8.247213779128672e-06, "loss": 1.2302, "step": 814 }, { "epoch": 0.2064597846738442, "grad_norm": 4.852680206298828, "learning_rate": 8.257345491388046e-06, "loss": 1.3965, "step": 815 }, { "epoch": 0.20671310956301456, "grad_norm": 4.312740802764893, "learning_rate": 8.267477203647417e-06, "loss": 1.1641, "step": 816 }, { "epoch": 0.20696643445218493, "grad_norm": 4.566477298736572, "learning_rate": 8.277608915906789e-06, "loss": 1.2108, "step": 817 }, { "epoch": 0.20721975934135528, "grad_norm": 4.805391311645508, "learning_rate": 8.28774062816616e-06, "loss": 1.2754, "step": 818 }, { "epoch": 0.20747308423052566, "grad_norm": 4.131628513336182, "learning_rate": 8.297872340425532e-06, "loss": 1.0912, "step": 819 }, { "epoch": 0.207726409119696, "grad_norm": 4.7957892417907715, "learning_rate": 8.308004052684906e-06, "loss": 1.1674, "step": 820 }, { "epoch": 0.20797973400886638, "grad_norm": 4.751543998718262, "learning_rate": 8.318135764944275e-06, "loss": 1.3456, "step": 821 }, { "epoch": 0.20823305889803673, "grad_norm": 4.739302635192871, "learning_rate": 8.328267477203647e-06, "loss": 1.1692, "step": 822 }, { "epoch": 0.2084863837872071, "grad_norm": 5.0696916580200195, "learning_rate": 8.33839918946302e-06, "loss": 1.2978, "step": 823 }, { "epoch": 0.20873970867637745, "grad_norm": 4.685317516326904, "learning_rate": 8.348530901722392e-06, "loss": 1.226, "step": 824 }, { "epoch": 0.20899303356554783, "grad_norm": 4.865052700042725, "learning_rate": 8.358662613981764e-06, "loss": 1.338, "step": 825 }, { "epoch": 0.20924635845471817, "grad_norm": 4.250523090362549, "learning_rate": 8.368794326241135e-06, "loss": 1.1768, "step": 826 }, { "epoch": 0.20949968334388855, "grad_norm": 4.162044525146484, "learning_rate": 8.378926038500509e-06, "loss": 1.2061, "step": 827 }, { "epoch": 0.2097530082330589, "grad_norm": 4.625030517578125, "learning_rate": 8.389057750759878e-06, "loss": 1.2754, "step": 828 }, { "epoch": 0.21000633312222927, "grad_norm": 4.660970687866211, "learning_rate": 8.39918946301925e-06, "loss": 1.2398, "step": 829 }, { "epoch": 0.21025965801139962, "grad_norm": 4.666497230529785, "learning_rate": 8.409321175278623e-06, "loss": 1.1091, "step": 830 }, { "epoch": 0.21051298290057, "grad_norm": 4.57522439956665, "learning_rate": 8.419452887537995e-06, "loss": 1.3134, "step": 831 }, { "epoch": 0.21076630778974034, "grad_norm": 5.008798122406006, "learning_rate": 8.429584599797367e-06, "loss": 1.2074, "step": 832 }, { "epoch": 0.2110196326789107, "grad_norm": 4.1695780754089355, "learning_rate": 8.439716312056738e-06, "loss": 1.148, "step": 833 }, { "epoch": 0.21127295756808107, "grad_norm": 4.676490306854248, "learning_rate": 8.44984802431611e-06, "loss": 1.0401, "step": 834 }, { "epoch": 0.2115262824572514, "grad_norm": 4.756280422210693, "learning_rate": 8.459979736575481e-06, "loss": 1.238, "step": 835 }, { "epoch": 0.2117796073464218, "grad_norm": 4.5334601402282715, "learning_rate": 8.470111448834853e-06, "loss": 1.1742, "step": 836 }, { "epoch": 0.21203293223559214, "grad_norm": 4.883600234985352, "learning_rate": 8.480243161094226e-06, "loss": 1.3176, "step": 837 }, { "epoch": 0.2122862571247625, "grad_norm": 4.711753845214844, "learning_rate": 8.490374873353598e-06, "loss": 1.2518, "step": 838 }, { "epoch": 0.21253958201393286, "grad_norm": 4.728085041046143, "learning_rate": 8.50050658561297e-06, "loss": 1.28, "step": 839 }, { "epoch": 0.21279290690310323, "grad_norm": 4.967433929443359, "learning_rate": 8.510638297872341e-06, "loss": 1.3654, "step": 840 }, { "epoch": 0.21304623179227358, "grad_norm": 4.490946292877197, "learning_rate": 8.520770010131713e-06, "loss": 1.2963, "step": 841 }, { "epoch": 0.21329955668144396, "grad_norm": 4.252407073974609, "learning_rate": 8.530901722391084e-06, "loss": 1.2775, "step": 842 }, { "epoch": 0.2135528815706143, "grad_norm": 4.625506401062012, "learning_rate": 8.541033434650456e-06, "loss": 1.2156, "step": 843 }, { "epoch": 0.21380620645978468, "grad_norm": 4.151983737945557, "learning_rate": 8.55116514690983e-06, "loss": 1.3086, "step": 844 }, { "epoch": 0.21405953134895503, "grad_norm": 4.83617639541626, "learning_rate": 8.561296859169201e-06, "loss": 1.2696, "step": 845 }, { "epoch": 0.2143128562381254, "grad_norm": 4.213065147399902, "learning_rate": 8.571428571428571e-06, "loss": 1.0948, "step": 846 }, { "epoch": 0.21456618112729575, "grad_norm": 4.133890151977539, "learning_rate": 8.581560283687944e-06, "loss": 1.3205, "step": 847 }, { "epoch": 0.21481950601646613, "grad_norm": 4.231499195098877, "learning_rate": 8.591691995947316e-06, "loss": 1.1487, "step": 848 }, { "epoch": 0.21507283090563648, "grad_norm": 4.207747459411621, "learning_rate": 8.601823708206687e-06, "loss": 1.2424, "step": 849 }, { "epoch": 0.21532615579480685, "grad_norm": 4.441788673400879, "learning_rate": 8.611955420466059e-06, "loss": 1.2109, "step": 850 }, { "epoch": 0.2155794806839772, "grad_norm": 4.671090602874756, "learning_rate": 8.622087132725432e-06, "loss": 1.3559, "step": 851 }, { "epoch": 0.21583280557314757, "grad_norm": 4.83182430267334, "learning_rate": 8.632218844984804e-06, "loss": 1.3977, "step": 852 }, { "epoch": 0.21608613046231792, "grad_norm": 4.461249351501465, "learning_rate": 8.642350557244174e-06, "loss": 1.2408, "step": 853 }, { "epoch": 0.2163394553514883, "grad_norm": 4.672199726104736, "learning_rate": 8.652482269503547e-06, "loss": 1.3422, "step": 854 }, { "epoch": 0.21659278024065864, "grad_norm": 4.4290900230407715, "learning_rate": 8.662613981762919e-06, "loss": 1.2632, "step": 855 }, { "epoch": 0.216846105129829, "grad_norm": 4.46585750579834, "learning_rate": 8.67274569402229e-06, "loss": 1.1929, "step": 856 }, { "epoch": 0.21709943001899937, "grad_norm": 4.754853248596191, "learning_rate": 8.682877406281662e-06, "loss": 1.2999, "step": 857 }, { "epoch": 0.21735275490816972, "grad_norm": 4.45685338973999, "learning_rate": 8.693009118541034e-06, "loss": 1.2949, "step": 858 }, { "epoch": 0.2176060797973401, "grad_norm": 4.251350402832031, "learning_rate": 8.703140830800407e-06, "loss": 1.3525, "step": 859 }, { "epoch": 0.21785940468651044, "grad_norm": 4.639081954956055, "learning_rate": 8.713272543059777e-06, "loss": 1.2987, "step": 860 }, { "epoch": 0.2181127295756808, "grad_norm": 4.152873992919922, "learning_rate": 8.72340425531915e-06, "loss": 1.3355, "step": 861 }, { "epoch": 0.21836605446485116, "grad_norm": 4.566308975219727, "learning_rate": 8.733535967578522e-06, "loss": 1.2251, "step": 862 }, { "epoch": 0.21861937935402154, "grad_norm": 5.728938579559326, "learning_rate": 8.743667679837893e-06, "loss": 1.3422, "step": 863 }, { "epoch": 0.21887270424319188, "grad_norm": 5.035421848297119, "learning_rate": 8.753799392097265e-06, "loss": 1.3664, "step": 864 }, { "epoch": 0.21912602913236226, "grad_norm": 5.070148468017578, "learning_rate": 8.763931104356637e-06, "loss": 1.3327, "step": 865 }, { "epoch": 0.2193793540215326, "grad_norm": 4.458358287811279, "learning_rate": 8.774062816616008e-06, "loss": 1.3214, "step": 866 }, { "epoch": 0.21963267891070298, "grad_norm": 4.389994144439697, "learning_rate": 8.78419452887538e-06, "loss": 1.1337, "step": 867 }, { "epoch": 0.21988600379987333, "grad_norm": 4.408799171447754, "learning_rate": 8.794326241134753e-06, "loss": 1.2597, "step": 868 }, { "epoch": 0.2201393286890437, "grad_norm": 4.331786632537842, "learning_rate": 8.804457953394125e-06, "loss": 1.3029, "step": 869 }, { "epoch": 0.22039265357821405, "grad_norm": 4.499011039733887, "learning_rate": 8.814589665653496e-06, "loss": 1.217, "step": 870 }, { "epoch": 0.22064597846738443, "grad_norm": 4.391335964202881, "learning_rate": 8.824721377912868e-06, "loss": 1.2315, "step": 871 }, { "epoch": 0.22089930335655478, "grad_norm": 4.000341892242432, "learning_rate": 8.83485309017224e-06, "loss": 1.1479, "step": 872 }, { "epoch": 0.22115262824572515, "grad_norm": 4.703832626342773, "learning_rate": 8.844984802431611e-06, "loss": 1.3473, "step": 873 }, { "epoch": 0.2214059531348955, "grad_norm": 4.657235145568848, "learning_rate": 8.855116514690983e-06, "loss": 1.2664, "step": 874 }, { "epoch": 0.22165927802406588, "grad_norm": 4.632787227630615, "learning_rate": 8.865248226950355e-06, "loss": 1.3598, "step": 875 }, { "epoch": 0.22191260291323622, "grad_norm": 5.230173587799072, "learning_rate": 8.875379939209728e-06, "loss": 1.3767, "step": 876 }, { "epoch": 0.2221659278024066, "grad_norm": 4.797090530395508, "learning_rate": 8.8855116514691e-06, "loss": 1.3005, "step": 877 }, { "epoch": 0.22241925269157695, "grad_norm": 4.6531596183776855, "learning_rate": 8.895643363728471e-06, "loss": 1.2636, "step": 878 }, { "epoch": 0.22267257758074732, "grad_norm": 4.6279473304748535, "learning_rate": 8.905775075987843e-06, "loss": 1.2479, "step": 879 }, { "epoch": 0.22292590246991767, "grad_norm": 4.576147556304932, "learning_rate": 8.915906788247214e-06, "loss": 1.2952, "step": 880 }, { "epoch": 0.22317922735908802, "grad_norm": 4.335544586181641, "learning_rate": 8.926038500506586e-06, "loss": 1.1002, "step": 881 }, { "epoch": 0.2234325522482584, "grad_norm": 4.895113945007324, "learning_rate": 8.936170212765958e-06, "loss": 1.2593, "step": 882 }, { "epoch": 0.22368587713742874, "grad_norm": 4.440051555633545, "learning_rate": 8.94630192502533e-06, "loss": 1.1467, "step": 883 }, { "epoch": 0.22393920202659912, "grad_norm": 4.605652809143066, "learning_rate": 8.956433637284702e-06, "loss": 1.2606, "step": 884 }, { "epoch": 0.22419252691576946, "grad_norm": 4.574369430541992, "learning_rate": 8.966565349544074e-06, "loss": 1.2385, "step": 885 }, { "epoch": 0.22444585180493984, "grad_norm": 4.45790433883667, "learning_rate": 8.976697061803446e-06, "loss": 1.1846, "step": 886 }, { "epoch": 0.2246991766941102, "grad_norm": 4.781072616577148, "learning_rate": 8.986828774062817e-06, "loss": 1.2787, "step": 887 }, { "epoch": 0.22495250158328056, "grad_norm": 4.68171501159668, "learning_rate": 8.996960486322189e-06, "loss": 1.1573, "step": 888 }, { "epoch": 0.2252058264724509, "grad_norm": 4.497115612030029, "learning_rate": 9.00709219858156e-06, "loss": 1.3108, "step": 889 }, { "epoch": 0.22545915136162129, "grad_norm": 4.450427055358887, "learning_rate": 9.017223910840934e-06, "loss": 1.2311, "step": 890 }, { "epoch": 0.22571247625079163, "grad_norm": 4.6777424812316895, "learning_rate": 9.027355623100304e-06, "loss": 1.3122, "step": 891 }, { "epoch": 0.225965801139962, "grad_norm": 4.58901309967041, "learning_rate": 9.037487335359677e-06, "loss": 1.329, "step": 892 }, { "epoch": 0.22621912602913236, "grad_norm": 4.763391494750977, "learning_rate": 9.047619047619049e-06, "loss": 1.2884, "step": 893 }, { "epoch": 0.22647245091830273, "grad_norm": 4.61236047744751, "learning_rate": 9.05775075987842e-06, "loss": 1.3443, "step": 894 }, { "epoch": 0.22672577580747308, "grad_norm": 4.786077499389648, "learning_rate": 9.067882472137792e-06, "loss": 1.2916, "step": 895 }, { "epoch": 0.22697910069664345, "grad_norm": 4.385064601898193, "learning_rate": 9.078014184397164e-06, "loss": 1.2207, "step": 896 }, { "epoch": 0.2272324255858138, "grad_norm": 4.293412208557129, "learning_rate": 9.088145896656537e-06, "loss": 1.1634, "step": 897 }, { "epoch": 0.22748575047498418, "grad_norm": 4.510735511779785, "learning_rate": 9.098277608915907e-06, "loss": 1.2406, "step": 898 }, { "epoch": 0.22773907536415453, "grad_norm": 5.970086097717285, "learning_rate": 9.108409321175278e-06, "loss": 1.2865, "step": 899 }, { "epoch": 0.2279924002533249, "grad_norm": 4.670897960662842, "learning_rate": 9.118541033434652e-06, "loss": 1.2052, "step": 900 }, { "epoch": 0.22824572514249525, "grad_norm": 4.067436695098877, "learning_rate": 9.128672745694023e-06, "loss": 1.1642, "step": 901 }, { "epoch": 0.22849905003166562, "grad_norm": 4.683802127838135, "learning_rate": 9.138804457953395e-06, "loss": 1.4001, "step": 902 }, { "epoch": 0.22875237492083597, "grad_norm": 4.468769073486328, "learning_rate": 9.148936170212767e-06, "loss": 1.2653, "step": 903 }, { "epoch": 0.22900569981000635, "grad_norm": 4.894930839538574, "learning_rate": 9.15906788247214e-06, "loss": 1.2637, "step": 904 }, { "epoch": 0.2292590246991767, "grad_norm": 4.898458003997803, "learning_rate": 9.16919959473151e-06, "loss": 1.2617, "step": 905 }, { "epoch": 0.22951234958834704, "grad_norm": 4.784833908081055, "learning_rate": 9.179331306990881e-06, "loss": 1.3382, "step": 906 }, { "epoch": 0.22976567447751742, "grad_norm": 4.338533878326416, "learning_rate": 9.189463019250255e-06, "loss": 1.2544, "step": 907 }, { "epoch": 0.23001899936668777, "grad_norm": 4.30348539352417, "learning_rate": 9.199594731509626e-06, "loss": 1.2249, "step": 908 }, { "epoch": 0.23027232425585814, "grad_norm": 4.037816524505615, "learning_rate": 9.209726443768998e-06, "loss": 1.2093, "step": 909 }, { "epoch": 0.2305256491450285, "grad_norm": 4.498706340789795, "learning_rate": 9.21985815602837e-06, "loss": 1.2782, "step": 910 }, { "epoch": 0.23077897403419886, "grad_norm": 3.9231648445129395, "learning_rate": 9.229989868287741e-06, "loss": 1.1986, "step": 911 }, { "epoch": 0.2310322989233692, "grad_norm": 4.32187032699585, "learning_rate": 9.240121580547113e-06, "loss": 1.3408, "step": 912 }, { "epoch": 0.2312856238125396, "grad_norm": 4.445515155792236, "learning_rate": 9.250253292806484e-06, "loss": 1.2548, "step": 913 }, { "epoch": 0.23153894870170993, "grad_norm": 4.231760501861572, "learning_rate": 9.260385005065858e-06, "loss": 1.1247, "step": 914 }, { "epoch": 0.2317922735908803, "grad_norm": 4.526363372802734, "learning_rate": 9.27051671732523e-06, "loss": 1.1513, "step": 915 }, { "epoch": 0.23204559848005066, "grad_norm": 4.280642986297607, "learning_rate": 9.280648429584601e-06, "loss": 1.2449, "step": 916 }, { "epoch": 0.23229892336922103, "grad_norm": 4.070103645324707, "learning_rate": 9.290780141843973e-06, "loss": 1.1165, "step": 917 }, { "epoch": 0.23255224825839138, "grad_norm": 4.806093692779541, "learning_rate": 9.300911854103344e-06, "loss": 1.2911, "step": 918 }, { "epoch": 0.23280557314756176, "grad_norm": 4.156966686248779, "learning_rate": 9.311043566362716e-06, "loss": 1.1085, "step": 919 }, { "epoch": 0.2330588980367321, "grad_norm": 4.51497220993042, "learning_rate": 9.321175278622087e-06, "loss": 1.2273, "step": 920 }, { "epoch": 0.23331222292590248, "grad_norm": 4.607082843780518, "learning_rate": 9.33130699088146e-06, "loss": 1.3149, "step": 921 }, { "epoch": 0.23356554781507283, "grad_norm": 4.478603363037109, "learning_rate": 9.341438703140832e-06, "loss": 1.2928, "step": 922 }, { "epoch": 0.2338188727042432, "grad_norm": 4.361640930175781, "learning_rate": 9.351570415400202e-06, "loss": 1.1646, "step": 923 }, { "epoch": 0.23407219759341355, "grad_norm": 4.692184925079346, "learning_rate": 9.361702127659576e-06, "loss": 1.2986, "step": 924 }, { "epoch": 0.23432552248258393, "grad_norm": 4.741754531860352, "learning_rate": 9.371833839918947e-06, "loss": 1.2221, "step": 925 }, { "epoch": 0.23457884737175427, "grad_norm": 4.560026168823242, "learning_rate": 9.381965552178319e-06, "loss": 1.2979, "step": 926 }, { "epoch": 0.23483217226092465, "grad_norm": 4.630263328552246, "learning_rate": 9.39209726443769e-06, "loss": 1.2944, "step": 927 }, { "epoch": 0.235085497150095, "grad_norm": 4.621401309967041, "learning_rate": 9.402228976697062e-06, "loss": 1.304, "step": 928 }, { "epoch": 0.23533882203926534, "grad_norm": 4.51711368560791, "learning_rate": 9.412360688956435e-06, "loss": 1.3174, "step": 929 }, { "epoch": 0.23559214692843572, "grad_norm": 4.709510326385498, "learning_rate": 9.422492401215805e-06, "loss": 1.2371, "step": 930 }, { "epoch": 0.23584547181760607, "grad_norm": 4.616505146026611, "learning_rate": 9.432624113475179e-06, "loss": 1.4215, "step": 931 }, { "epoch": 0.23609879670677644, "grad_norm": 4.576818943023682, "learning_rate": 9.44275582573455e-06, "loss": 1.3146, "step": 932 }, { "epoch": 0.2363521215959468, "grad_norm": 4.430837154388428, "learning_rate": 9.452887537993922e-06, "loss": 1.2977, "step": 933 }, { "epoch": 0.23660544648511717, "grad_norm": 4.0142388343811035, "learning_rate": 9.463019250253293e-06, "loss": 1.1209, "step": 934 }, { "epoch": 0.2368587713742875, "grad_norm": 4.440330505371094, "learning_rate": 9.473150962512665e-06, "loss": 1.2769, "step": 935 }, { "epoch": 0.2371120962634579, "grad_norm": 4.563925743103027, "learning_rate": 9.483282674772038e-06, "loss": 1.2471, "step": 936 }, { "epoch": 0.23736542115262824, "grad_norm": 4.916733741760254, "learning_rate": 9.493414387031408e-06, "loss": 1.4746, "step": 937 }, { "epoch": 0.2376187460417986, "grad_norm": 4.41516637802124, "learning_rate": 9.503546099290782e-06, "loss": 1.3259, "step": 938 }, { "epoch": 0.23787207093096896, "grad_norm": 4.409024238586426, "learning_rate": 9.513677811550153e-06, "loss": 1.2328, "step": 939 }, { "epoch": 0.23812539582013934, "grad_norm": 4.46191930770874, "learning_rate": 9.523809523809525e-06, "loss": 1.3066, "step": 940 }, { "epoch": 0.23837872070930968, "grad_norm": 4.529879093170166, "learning_rate": 9.533941236068896e-06, "loss": 1.1784, "step": 941 }, { "epoch": 0.23863204559848006, "grad_norm": 4.373073101043701, "learning_rate": 9.544072948328268e-06, "loss": 1.3092, "step": 942 }, { "epoch": 0.2388853704876504, "grad_norm": 4.161291599273682, "learning_rate": 9.55420466058764e-06, "loss": 1.1198, "step": 943 }, { "epoch": 0.23913869537682078, "grad_norm": 4.716823577880859, "learning_rate": 9.564336372847011e-06, "loss": 1.2307, "step": 944 }, { "epoch": 0.23939202026599113, "grad_norm": 4.811398983001709, "learning_rate": 9.574468085106385e-06, "loss": 1.3575, "step": 945 }, { "epoch": 0.2396453451551615, "grad_norm": 4.302731037139893, "learning_rate": 9.584599797365756e-06, "loss": 1.1594, "step": 946 }, { "epoch": 0.23989867004433185, "grad_norm": 4.507314205169678, "learning_rate": 9.594731509625128e-06, "loss": 1.2235, "step": 947 }, { "epoch": 0.24015199493350223, "grad_norm": 4.548384666442871, "learning_rate": 9.6048632218845e-06, "loss": 1.346, "step": 948 }, { "epoch": 0.24040531982267258, "grad_norm": 4.291707992553711, "learning_rate": 9.614994934143871e-06, "loss": 1.1325, "step": 949 }, { "epoch": 0.24065864471184295, "grad_norm": 4.426799297332764, "learning_rate": 9.625126646403243e-06, "loss": 1.1833, "step": 950 }, { "epoch": 0.2409119696010133, "grad_norm": 4.290721416473389, "learning_rate": 9.635258358662614e-06, "loss": 1.2496, "step": 951 }, { "epoch": 0.24116529449018367, "grad_norm": 4.555160045623779, "learning_rate": 9.645390070921986e-06, "loss": 1.2221, "step": 952 }, { "epoch": 0.24141861937935402, "grad_norm": 3.9502954483032227, "learning_rate": 9.655521783181359e-06, "loss": 1.175, "step": 953 }, { "epoch": 0.24167194426852437, "grad_norm": 4.670769691467285, "learning_rate": 9.66565349544073e-06, "loss": 1.3957, "step": 954 }, { "epoch": 0.24192526915769474, "grad_norm": 4.343841075897217, "learning_rate": 9.675785207700102e-06, "loss": 1.2519, "step": 955 }, { "epoch": 0.2421785940468651, "grad_norm": 4.143503189086914, "learning_rate": 9.685916919959474e-06, "loss": 1.2741, "step": 956 }, { "epoch": 0.24243191893603547, "grad_norm": 4.043879985809326, "learning_rate": 9.696048632218846e-06, "loss": 1.1817, "step": 957 }, { "epoch": 0.24268524382520582, "grad_norm": 3.883371114730835, "learning_rate": 9.706180344478217e-06, "loss": 1.1797, "step": 958 }, { "epoch": 0.2429385687143762, "grad_norm": 3.936861515045166, "learning_rate": 9.716312056737589e-06, "loss": 1.1455, "step": 959 }, { "epoch": 0.24319189360354654, "grad_norm": 4.490845680236816, "learning_rate": 9.726443768996962e-06, "loss": 1.2841, "step": 960 }, { "epoch": 0.2434452184927169, "grad_norm": 4.519428253173828, "learning_rate": 9.736575481256332e-06, "loss": 1.2878, "step": 961 }, { "epoch": 0.24369854338188726, "grad_norm": 4.357264518737793, "learning_rate": 9.746707193515705e-06, "loss": 1.1434, "step": 962 }, { "epoch": 0.24395186827105764, "grad_norm": 4.587429523468018, "learning_rate": 9.756838905775077e-06, "loss": 1.2539, "step": 963 }, { "epoch": 0.24420519316022798, "grad_norm": 4.37636661529541, "learning_rate": 9.766970618034449e-06, "loss": 1.1944, "step": 964 }, { "epoch": 0.24445851804939836, "grad_norm": 4.168228626251221, "learning_rate": 9.77710233029382e-06, "loss": 1.178, "step": 965 }, { "epoch": 0.2447118429385687, "grad_norm": 4.678414344787598, "learning_rate": 9.787234042553192e-06, "loss": 1.2634, "step": 966 }, { "epoch": 0.24496516782773908, "grad_norm": 4.3709611892700195, "learning_rate": 9.797365754812565e-06, "loss": 1.2131, "step": 967 }, { "epoch": 0.24521849271690943, "grad_norm": 4.429797172546387, "learning_rate": 9.807497467071935e-06, "loss": 1.2301, "step": 968 }, { "epoch": 0.2454718176060798, "grad_norm": 4.7387003898620605, "learning_rate": 9.817629179331308e-06, "loss": 1.1721, "step": 969 }, { "epoch": 0.24572514249525015, "grad_norm": 4.454428672790527, "learning_rate": 9.82776089159068e-06, "loss": 1.3677, "step": 970 }, { "epoch": 0.24597846738442053, "grad_norm": 4.637801647186279, "learning_rate": 9.837892603850052e-06, "loss": 1.2987, "step": 971 }, { "epoch": 0.24623179227359088, "grad_norm": 4.245021820068359, "learning_rate": 9.848024316109423e-06, "loss": 1.3343, "step": 972 }, { "epoch": 0.24648511716276125, "grad_norm": 4.278740882873535, "learning_rate": 9.858156028368795e-06, "loss": 1.2818, "step": 973 }, { "epoch": 0.2467384420519316, "grad_norm": 4.510958671569824, "learning_rate": 9.868287740628168e-06, "loss": 1.3503, "step": 974 }, { "epoch": 0.24699176694110198, "grad_norm": 4.130159854888916, "learning_rate": 9.878419452887538e-06, "loss": 1.2979, "step": 975 }, { "epoch": 0.24724509183027232, "grad_norm": 4.403942108154297, "learning_rate": 9.88855116514691e-06, "loss": 1.2455, "step": 976 }, { "epoch": 0.2474984167194427, "grad_norm": 4.474011421203613, "learning_rate": 9.898682877406283e-06, "loss": 1.2902, "step": 977 }, { "epoch": 0.24775174160861305, "grad_norm": 4.206770896911621, "learning_rate": 9.908814589665655e-06, "loss": 1.2073, "step": 978 }, { "epoch": 0.2480050664977834, "grad_norm": 4.503035068511963, "learning_rate": 9.918946301925026e-06, "loss": 1.2272, "step": 979 }, { "epoch": 0.24825839138695377, "grad_norm": 4.825187683105469, "learning_rate": 9.929078014184398e-06, "loss": 1.3413, "step": 980 }, { "epoch": 0.24851171627612412, "grad_norm": 4.212365627288818, "learning_rate": 9.939209726443771e-06, "loss": 1.2217, "step": 981 }, { "epoch": 0.2487650411652945, "grad_norm": 4.430416107177734, "learning_rate": 9.949341438703141e-06, "loss": 1.1744, "step": 982 }, { "epoch": 0.24901836605446484, "grad_norm": 5.085466384887695, "learning_rate": 9.959473150962513e-06, "loss": 1.3391, "step": 983 }, { "epoch": 0.24927169094363522, "grad_norm": 4.587413787841797, "learning_rate": 9.969604863221886e-06, "loss": 1.2309, "step": 984 }, { "epoch": 0.24952501583280556, "grad_norm": 4.599008083343506, "learning_rate": 9.979736575481258e-06, "loss": 1.204, "step": 985 }, { "epoch": 0.24977834072197594, "grad_norm": 4.6535162925720215, "learning_rate": 9.98986828774063e-06, "loss": 1.2601, "step": 986 }, { "epoch": 0.2500316656111463, "grad_norm": 3.9977779388427734, "learning_rate": 1e-05, "loss": 1.2025, "step": 987 }, { "epoch": 0.25028499050031666, "grad_norm": 4.556582450866699, "learning_rate": 9.999999929801171e-06, "loss": 1.3582, "step": 988 }, { "epoch": 0.250538315389487, "grad_norm": 4.729936599731445, "learning_rate": 9.99999971920469e-06, "loss": 1.322, "step": 989 }, { "epoch": 0.25079164027865736, "grad_norm": 4.16456413269043, "learning_rate": 9.999999368210558e-06, "loss": 1.1521, "step": 990 }, { "epoch": 0.25104496516782776, "grad_norm": 4.371050834655762, "learning_rate": 9.999998876818787e-06, "loss": 1.3173, "step": 991 }, { "epoch": 0.2512982900569981, "grad_norm": 4.363968372344971, "learning_rate": 9.999998245029391e-06, "loss": 1.3401, "step": 992 }, { "epoch": 0.25155161494616846, "grad_norm": 4.4262847900390625, "learning_rate": 9.999997472842388e-06, "loss": 1.1315, "step": 993 }, { "epoch": 0.2518049398353388, "grad_norm": 4.121704578399658, "learning_rate": 9.999996560257801e-06, "loss": 1.0382, "step": 994 }, { "epoch": 0.2520582647245092, "grad_norm": 4.098482131958008, "learning_rate": 9.999995507275652e-06, "loss": 1.1252, "step": 995 }, { "epoch": 0.25231158961367955, "grad_norm": 4.604094982147217, "learning_rate": 9.999994313895973e-06, "loss": 1.4268, "step": 996 }, { "epoch": 0.2525649145028499, "grad_norm": 4.778846740722656, "learning_rate": 9.999992980118795e-06, "loss": 1.3845, "step": 997 }, { "epoch": 0.25281823939202025, "grad_norm": 4.112329959869385, "learning_rate": 9.999991505944161e-06, "loss": 1.2382, "step": 998 }, { "epoch": 0.25307156428119065, "grad_norm": 4.718144416809082, "learning_rate": 9.999989891372107e-06, "loss": 1.1994, "step": 999 }, { "epoch": 0.253324889170361, "grad_norm": 4.772528648376465, "learning_rate": 9.99998813640268e-06, "loss": 1.3379, "step": 1000 }, { "epoch": 0.253324889170361, "eval_loss": 1.277957558631897, "eval_runtime": 11.8882, "eval_samples_per_second": 33.647, "eval_steps_per_second": 4.206, "step": 1000 }, { "epoch": 0.25357821405953135, "grad_norm": 4.5006232261657715, "learning_rate": 9.99998624103593e-06, "loss": 1.2804, "step": 1001 }, { "epoch": 0.2538315389487017, "grad_norm": 4.349647521972656, "learning_rate": 9.999984205271911e-06, "loss": 1.2432, "step": 1002 }, { "epoch": 0.25408486383787204, "grad_norm": 4.238874435424805, "learning_rate": 9.999982029110677e-06, "loss": 1.2355, "step": 1003 }, { "epoch": 0.25433818872704245, "grad_norm": 4.476635456085205, "learning_rate": 9.999979712552293e-06, "loss": 1.3165, "step": 1004 }, { "epoch": 0.2545915136162128, "grad_norm": 4.258846282958984, "learning_rate": 9.99997725559682e-06, "loss": 1.3219, "step": 1005 }, { "epoch": 0.25484483850538314, "grad_norm": 4.392333984375, "learning_rate": 9.99997465824433e-06, "loss": 1.2872, "step": 1006 }, { "epoch": 0.2550981633945535, "grad_norm": 4.222839832305908, "learning_rate": 9.999971920494895e-06, "loss": 1.3018, "step": 1007 }, { "epoch": 0.2553514882837239, "grad_norm": 4.751513481140137, "learning_rate": 9.999969042348592e-06, "loss": 1.2831, "step": 1008 }, { "epoch": 0.25560481317289424, "grad_norm": 5.133706092834473, "learning_rate": 9.999966023805501e-06, "loss": 1.3577, "step": 1009 }, { "epoch": 0.2558581380620646, "grad_norm": 4.333564281463623, "learning_rate": 9.999962864865708e-06, "loss": 1.1974, "step": 1010 }, { "epoch": 0.25611146295123494, "grad_norm": 4.265956401824951, "learning_rate": 9.9999595655293e-06, "loss": 1.2562, "step": 1011 }, { "epoch": 0.25636478784040534, "grad_norm": 4.358114242553711, "learning_rate": 9.999956125796372e-06, "loss": 1.2141, "step": 1012 }, { "epoch": 0.2566181127295757, "grad_norm": 4.338444232940674, "learning_rate": 9.999952545667018e-06, "loss": 1.1853, "step": 1013 }, { "epoch": 0.25687143761874603, "grad_norm": 4.273314476013184, "learning_rate": 9.999948825141342e-06, "loss": 1.1726, "step": 1014 }, { "epoch": 0.2571247625079164, "grad_norm": 4.453716278076172, "learning_rate": 9.999944964219447e-06, "loss": 1.2582, "step": 1015 }, { "epoch": 0.2573780873970868, "grad_norm": 4.264922142028809, "learning_rate": 9.999940962901437e-06, "loss": 1.2207, "step": 1016 }, { "epoch": 0.25763141228625713, "grad_norm": 4.277278423309326, "learning_rate": 9.99993682118743e-06, "loss": 1.3064, "step": 1017 }, { "epoch": 0.2578847371754275, "grad_norm": 4.1954731941223145, "learning_rate": 9.999932539077541e-06, "loss": 1.1567, "step": 1018 }, { "epoch": 0.25813806206459783, "grad_norm": 4.1409149169921875, "learning_rate": 9.999928116571888e-06, "loss": 1.223, "step": 1019 }, { "epoch": 0.25839138695376823, "grad_norm": 3.997588634490967, "learning_rate": 9.9999235536706e-06, "loss": 1.2553, "step": 1020 }, { "epoch": 0.2586447118429386, "grad_norm": 4.307750701904297, "learning_rate": 9.9999188503738e-06, "loss": 1.2374, "step": 1021 }, { "epoch": 0.2588980367321089, "grad_norm": 4.207553863525391, "learning_rate": 9.999914006681622e-06, "loss": 1.2823, "step": 1022 }, { "epoch": 0.2591513616212793, "grad_norm": 4.0428690910339355, "learning_rate": 9.999909022594201e-06, "loss": 1.215, "step": 1023 }, { "epoch": 0.2594046865104497, "grad_norm": 4.571065902709961, "learning_rate": 9.999903898111679e-06, "loss": 1.398, "step": 1024 }, { "epoch": 0.25965801139962, "grad_norm": 5.071618556976318, "learning_rate": 9.9998986332342e-06, "loss": 1.2586, "step": 1025 }, { "epoch": 0.2599113362887904, "grad_norm": 4.284810543060303, "learning_rate": 9.999893227961909e-06, "loss": 1.2099, "step": 1026 }, { "epoch": 0.2601646611779607, "grad_norm": 4.17232608795166, "learning_rate": 9.99988768229496e-06, "loss": 1.2971, "step": 1027 }, { "epoch": 0.26041798606713107, "grad_norm": 4.449314117431641, "learning_rate": 9.999881996233508e-06, "loss": 1.2091, "step": 1028 }, { "epoch": 0.26067131095630147, "grad_norm": 4.640637397766113, "learning_rate": 9.999876169777713e-06, "loss": 1.2349, "step": 1029 }, { "epoch": 0.2609246358454718, "grad_norm": 4.333415985107422, "learning_rate": 9.999870202927739e-06, "loss": 1.3162, "step": 1030 }, { "epoch": 0.26117796073464217, "grad_norm": 4.521099090576172, "learning_rate": 9.999864095683753e-06, "loss": 1.3238, "step": 1031 }, { "epoch": 0.2614312856238125, "grad_norm": 4.749149322509766, "learning_rate": 9.999857848045927e-06, "loss": 1.1897, "step": 1032 }, { "epoch": 0.2616846105129829, "grad_norm": 4.04673433303833, "learning_rate": 9.999851460014435e-06, "loss": 1.2942, "step": 1033 }, { "epoch": 0.26193793540215327, "grad_norm": 4.424718379974365, "learning_rate": 9.999844931589457e-06, "loss": 1.2762, "step": 1034 }, { "epoch": 0.2621912602913236, "grad_norm": 4.327094554901123, "learning_rate": 9.999838262771178e-06, "loss": 1.2108, "step": 1035 }, { "epoch": 0.26244458518049396, "grad_norm": 4.391043186187744, "learning_rate": 9.999831453559782e-06, "loss": 1.2186, "step": 1036 }, { "epoch": 0.26269791006966436, "grad_norm": 4.974733829498291, "learning_rate": 9.999824503955465e-06, "loss": 1.3035, "step": 1037 }, { "epoch": 0.2629512349588347, "grad_norm": 4.520740509033203, "learning_rate": 9.999817413958415e-06, "loss": 1.2613, "step": 1038 }, { "epoch": 0.26320455984800506, "grad_norm": 3.9201395511627197, "learning_rate": 9.999810183568839e-06, "loss": 1.2798, "step": 1039 }, { "epoch": 0.2634578847371754, "grad_norm": 4.590415954589844, "learning_rate": 9.999802812786933e-06, "loss": 1.3429, "step": 1040 }, { "epoch": 0.2637112096263458, "grad_norm": 4.340219497680664, "learning_rate": 9.999795301612912e-06, "loss": 1.2433, "step": 1041 }, { "epoch": 0.26396453451551616, "grad_norm": 4.225920677185059, "learning_rate": 9.99978765004698e-06, "loss": 1.2547, "step": 1042 }, { "epoch": 0.2642178594046865, "grad_norm": 4.53737735748291, "learning_rate": 9.999779858089353e-06, "loss": 1.2362, "step": 1043 }, { "epoch": 0.26447118429385685, "grad_norm": 4.659938812255859, "learning_rate": 9.999771925740251e-06, "loss": 1.3427, "step": 1044 }, { "epoch": 0.26472450918302726, "grad_norm": 4.459654331207275, "learning_rate": 9.999763852999897e-06, "loss": 1.309, "step": 1045 }, { "epoch": 0.2649778340721976, "grad_norm": 4.582836151123047, "learning_rate": 9.999755639868518e-06, "loss": 1.2851, "step": 1046 }, { "epoch": 0.26523115896136795, "grad_norm": 4.712745666503906, "learning_rate": 9.999747286346342e-06, "loss": 1.3503, "step": 1047 }, { "epoch": 0.2654844838505383, "grad_norm": 4.448166847229004, "learning_rate": 9.999738792433609e-06, "loss": 1.398, "step": 1048 }, { "epoch": 0.2657378087397087, "grad_norm": 4.9156975746154785, "learning_rate": 9.99973015813055e-06, "loss": 1.4148, "step": 1049 }, { "epoch": 0.26599113362887905, "grad_norm": 4.917559623718262, "learning_rate": 9.999721383437413e-06, "loss": 1.3996, "step": 1050 }, { "epoch": 0.2662444585180494, "grad_norm": 4.078757286071777, "learning_rate": 9.999712468354444e-06, "loss": 1.2654, "step": 1051 }, { "epoch": 0.26649778340721975, "grad_norm": 4.17608642578125, "learning_rate": 9.999703412881892e-06, "loss": 1.2419, "step": 1052 }, { "epoch": 0.2667511082963901, "grad_norm": 4.432343482971191, "learning_rate": 9.99969421702001e-06, "loss": 1.1326, "step": 1053 }, { "epoch": 0.2670044331855605, "grad_norm": 4.077740669250488, "learning_rate": 9.999684880769058e-06, "loss": 1.0532, "step": 1054 }, { "epoch": 0.26725775807473084, "grad_norm": 4.219796180725098, "learning_rate": 9.999675404129299e-06, "loss": 1.2136, "step": 1055 }, { "epoch": 0.2675110829639012, "grad_norm": 4.376031875610352, "learning_rate": 9.999665787100997e-06, "loss": 1.2242, "step": 1056 }, { "epoch": 0.26776440785307154, "grad_norm": 4.411856651306152, "learning_rate": 9.999656029684422e-06, "loss": 1.2607, "step": 1057 }, { "epoch": 0.26801773274224194, "grad_norm": 4.215365886688232, "learning_rate": 9.99964613187985e-06, "loss": 1.2405, "step": 1058 }, { "epoch": 0.2682710576314123, "grad_norm": 4.299322605133057, "learning_rate": 9.999636093687556e-06, "loss": 1.2972, "step": 1059 }, { "epoch": 0.26852438252058264, "grad_norm": 4.521589756011963, "learning_rate": 9.999625915107826e-06, "loss": 1.3306, "step": 1060 }, { "epoch": 0.268777707409753, "grad_norm": 4.32185697555542, "learning_rate": 9.999615596140944e-06, "loss": 1.2621, "step": 1061 }, { "epoch": 0.2690310322989234, "grad_norm": 4.3838701248168945, "learning_rate": 9.999605136787197e-06, "loss": 1.2831, "step": 1062 }, { "epoch": 0.26928435718809374, "grad_norm": 4.577671051025391, "learning_rate": 9.999594537046882e-06, "loss": 1.2296, "step": 1063 }, { "epoch": 0.2695376820772641, "grad_norm": 4.250572681427002, "learning_rate": 9.999583796920296e-06, "loss": 1.2728, "step": 1064 }, { "epoch": 0.26979100696643443, "grad_norm": 4.592348575592041, "learning_rate": 9.999572916407741e-06, "loss": 1.2855, "step": 1065 }, { "epoch": 0.27004433185560484, "grad_norm": 4.446188449859619, "learning_rate": 9.99956189550952e-06, "loss": 1.2105, "step": 1066 }, { "epoch": 0.2702976567447752, "grad_norm": 4.710268497467041, "learning_rate": 9.999550734225945e-06, "loss": 1.4268, "step": 1067 }, { "epoch": 0.27055098163394553, "grad_norm": 4.610569953918457, "learning_rate": 9.999539432557327e-06, "loss": 1.2433, "step": 1068 }, { "epoch": 0.2708043065231159, "grad_norm": 4.46692419052124, "learning_rate": 9.999527990503986e-06, "loss": 1.2776, "step": 1069 }, { "epoch": 0.2710576314122863, "grad_norm": 4.537080764770508, "learning_rate": 9.999516408066244e-06, "loss": 1.3355, "step": 1070 }, { "epoch": 0.27131095630145663, "grad_norm": 3.7139177322387695, "learning_rate": 9.99950468524442e-06, "loss": 1.1919, "step": 1071 }, { "epoch": 0.271564281190627, "grad_norm": 4.5693583488464355, "learning_rate": 9.999492822038853e-06, "loss": 1.316, "step": 1072 }, { "epoch": 0.2718176060797973, "grad_norm": 4.537054538726807, "learning_rate": 9.999480818449868e-06, "loss": 1.183, "step": 1073 }, { "epoch": 0.27207093096896773, "grad_norm": 4.270650863647461, "learning_rate": 9.999468674477803e-06, "loss": 1.3077, "step": 1074 }, { "epoch": 0.2723242558581381, "grad_norm": 3.8746337890625, "learning_rate": 9.999456390123004e-06, "loss": 1.1064, "step": 1075 }, { "epoch": 0.2725775807473084, "grad_norm": 4.1109466552734375, "learning_rate": 9.99944396538581e-06, "loss": 1.1139, "step": 1076 }, { "epoch": 0.27283090563647877, "grad_norm": 4.652540683746338, "learning_rate": 9.999431400266574e-06, "loss": 1.3425, "step": 1077 }, { "epoch": 0.2730842305256491, "grad_norm": 4.178191184997559, "learning_rate": 9.999418694765648e-06, "loss": 1.3607, "step": 1078 }, { "epoch": 0.2733375554148195, "grad_norm": 4.317570686340332, "learning_rate": 9.999405848883387e-06, "loss": 1.176, "step": 1079 }, { "epoch": 0.27359088030398987, "grad_norm": 4.367639064788818, "learning_rate": 9.999392862620154e-06, "loss": 1.2317, "step": 1080 }, { "epoch": 0.2738442051931602, "grad_norm": 4.147261142730713, "learning_rate": 9.999379735976312e-06, "loss": 1.2692, "step": 1081 }, { "epoch": 0.27409753008233056, "grad_norm": 4.390732765197754, "learning_rate": 9.999366468952229e-06, "loss": 1.2009, "step": 1082 }, { "epoch": 0.27435085497150097, "grad_norm": 4.224822521209717, "learning_rate": 9.99935306154828e-06, "loss": 1.2473, "step": 1083 }, { "epoch": 0.2746041798606713, "grad_norm": 4.581904411315918, "learning_rate": 9.999339513764838e-06, "loss": 1.2304, "step": 1084 }, { "epoch": 0.27485750474984166, "grad_norm": 4.850911617279053, "learning_rate": 9.999325825602288e-06, "loss": 1.4055, "step": 1085 }, { "epoch": 0.275110829639012, "grad_norm": 4.441746234893799, "learning_rate": 9.999311997061011e-06, "loss": 1.2762, "step": 1086 }, { "epoch": 0.2753641545281824, "grad_norm": 4.414993762969971, "learning_rate": 9.999298028141396e-06, "loss": 1.3163, "step": 1087 }, { "epoch": 0.27561747941735276, "grad_norm": 4.703424453735352, "learning_rate": 9.999283918843836e-06, "loss": 1.2733, "step": 1088 }, { "epoch": 0.2758708043065231, "grad_norm": 4.2780537605285645, "learning_rate": 9.999269669168727e-06, "loss": 1.2719, "step": 1089 }, { "epoch": 0.27612412919569346, "grad_norm": 4.284636974334717, "learning_rate": 9.999255279116468e-06, "loss": 1.2018, "step": 1090 }, { "epoch": 0.27637745408486386, "grad_norm": 3.985605478286743, "learning_rate": 9.999240748687464e-06, "loss": 1.2875, "step": 1091 }, { "epoch": 0.2766307789740342, "grad_norm": 4.404812812805176, "learning_rate": 9.999226077882123e-06, "loss": 1.133, "step": 1092 }, { "epoch": 0.27688410386320456, "grad_norm": 4.2787251472473145, "learning_rate": 9.999211266700855e-06, "loss": 1.2876, "step": 1093 }, { "epoch": 0.2771374287523749, "grad_norm": 4.34266996383667, "learning_rate": 9.99919631514408e-06, "loss": 1.206, "step": 1094 }, { "epoch": 0.2773907536415453, "grad_norm": 4.1548542976379395, "learning_rate": 9.999181223212215e-06, "loss": 1.2429, "step": 1095 }, { "epoch": 0.27764407853071565, "grad_norm": 4.232419490814209, "learning_rate": 9.999165990905684e-06, "loss": 1.2596, "step": 1096 }, { "epoch": 0.277897403419886, "grad_norm": 4.342401504516602, "learning_rate": 9.999150618224914e-06, "loss": 1.1759, "step": 1097 }, { "epoch": 0.27815072830905635, "grad_norm": 4.437196254730225, "learning_rate": 9.99913510517034e-06, "loss": 1.2726, "step": 1098 }, { "epoch": 0.27840405319822675, "grad_norm": 4.315725326538086, "learning_rate": 9.999119451742392e-06, "loss": 1.3566, "step": 1099 }, { "epoch": 0.2786573780873971, "grad_norm": 4.410442352294922, "learning_rate": 9.999103657941514e-06, "loss": 1.3163, "step": 1100 }, { "epoch": 0.27891070297656745, "grad_norm": 4.655491352081299, "learning_rate": 9.999087723768149e-06, "loss": 1.4307, "step": 1101 }, { "epoch": 0.2791640278657378, "grad_norm": 3.777026891708374, "learning_rate": 9.999071649222744e-06, "loss": 1.1103, "step": 1102 }, { "epoch": 0.27941735275490814, "grad_norm": 4.128303527832031, "learning_rate": 9.99905543430575e-06, "loss": 1.2228, "step": 1103 }, { "epoch": 0.27967067764407855, "grad_norm": 4.326513767242432, "learning_rate": 9.99903907901762e-06, "loss": 1.2697, "step": 1104 }, { "epoch": 0.2799240025332489, "grad_norm": 4.27681303024292, "learning_rate": 9.999022583358816e-06, "loss": 1.2722, "step": 1105 }, { "epoch": 0.28017732742241924, "grad_norm": 4.046457290649414, "learning_rate": 9.999005947329804e-06, "loss": 1.1574, "step": 1106 }, { "epoch": 0.2804306523115896, "grad_norm": 4.085561275482178, "learning_rate": 9.998989170931046e-06, "loss": 1.1658, "step": 1107 }, { "epoch": 0.28068397720076, "grad_norm": 4.437790393829346, "learning_rate": 9.998972254163016e-06, "loss": 1.2576, "step": 1108 }, { "epoch": 0.28093730208993034, "grad_norm": 4.35031795501709, "learning_rate": 9.998955197026186e-06, "loss": 1.1881, "step": 1109 }, { "epoch": 0.2811906269791007, "grad_norm": 4.078453063964844, "learning_rate": 9.99893799952104e-06, "loss": 1.2298, "step": 1110 }, { "epoch": 0.28144395186827104, "grad_norm": 4.690131664276123, "learning_rate": 9.998920661648053e-06, "loss": 1.3197, "step": 1111 }, { "epoch": 0.28169727675744144, "grad_norm": 4.241751670837402, "learning_rate": 9.99890318340772e-06, "loss": 1.3322, "step": 1112 }, { "epoch": 0.2819506016466118, "grad_norm": 4.170272350311279, "learning_rate": 9.998885564800528e-06, "loss": 1.2634, "step": 1113 }, { "epoch": 0.28220392653578213, "grad_norm": 4.748134613037109, "learning_rate": 9.998867805826973e-06, "loss": 1.2743, "step": 1114 }, { "epoch": 0.2824572514249525, "grad_norm": 4.64783239364624, "learning_rate": 9.998849906487552e-06, "loss": 1.3666, "step": 1115 }, { "epoch": 0.2827105763141229, "grad_norm": 3.911585807800293, "learning_rate": 9.998831866782769e-06, "loss": 1.0517, "step": 1116 }, { "epoch": 0.28296390120329323, "grad_norm": 4.002292633056641, "learning_rate": 9.99881368671313e-06, "loss": 1.2947, "step": 1117 }, { "epoch": 0.2832172260924636, "grad_norm": 4.09478759765625, "learning_rate": 9.998795366279145e-06, "loss": 1.2217, "step": 1118 }, { "epoch": 0.28347055098163393, "grad_norm": 4.193706035614014, "learning_rate": 9.998776905481328e-06, "loss": 1.2352, "step": 1119 }, { "epoch": 0.28372387587080433, "grad_norm": 4.266157627105713, "learning_rate": 9.9987583043202e-06, "loss": 1.2766, "step": 1120 }, { "epoch": 0.2839772007599747, "grad_norm": 4.135251522064209, "learning_rate": 9.998739562796281e-06, "loss": 1.3218, "step": 1121 }, { "epoch": 0.284230525649145, "grad_norm": 3.903446674346924, "learning_rate": 9.998720680910097e-06, "loss": 1.2192, "step": 1122 }, { "epoch": 0.2844838505383154, "grad_norm": 4.4483256340026855, "learning_rate": 9.99870165866218e-06, "loss": 1.1485, "step": 1123 }, { "epoch": 0.2847371754274858, "grad_norm": 4.186819076538086, "learning_rate": 9.998682496053065e-06, "loss": 1.1963, "step": 1124 }, { "epoch": 0.2849905003166561, "grad_norm": 4.26090145111084, "learning_rate": 9.998663193083285e-06, "loss": 1.2054, "step": 1125 }, { "epoch": 0.2852438252058265, "grad_norm": 4.314788341522217, "learning_rate": 9.998643749753387e-06, "loss": 1.2775, "step": 1126 }, { "epoch": 0.2854971500949968, "grad_norm": 4.19307804107666, "learning_rate": 9.998624166063915e-06, "loss": 1.2285, "step": 1127 }, { "epoch": 0.28575047498416717, "grad_norm": 4.191956520080566, "learning_rate": 9.99860444201542e-06, "loss": 1.189, "step": 1128 }, { "epoch": 0.28600379987333757, "grad_norm": 4.338077068328857, "learning_rate": 9.998584577608454e-06, "loss": 1.313, "step": 1129 }, { "epoch": 0.2862571247625079, "grad_norm": 4.931329727172852, "learning_rate": 9.998564572843577e-06, "loss": 1.3063, "step": 1130 }, { "epoch": 0.28651044965167827, "grad_norm": 4.653294086456299, "learning_rate": 9.998544427721348e-06, "loss": 1.358, "step": 1131 }, { "epoch": 0.2867637745408486, "grad_norm": 4.46495246887207, "learning_rate": 9.998524142242334e-06, "loss": 1.2361, "step": 1132 }, { "epoch": 0.287017099430019, "grad_norm": 4.529207229614258, "learning_rate": 9.998503716407105e-06, "loss": 1.2673, "step": 1133 }, { "epoch": 0.28727042431918937, "grad_norm": 3.916558265686035, "learning_rate": 9.998483150216234e-06, "loss": 1.2153, "step": 1134 }, { "epoch": 0.2875237492083597, "grad_norm": 4.207367897033691, "learning_rate": 9.9984624436703e-06, "loss": 1.1631, "step": 1135 }, { "epoch": 0.28777707409753006, "grad_norm": 4.070796489715576, "learning_rate": 9.998441596769883e-06, "loss": 1.2328, "step": 1136 }, { "epoch": 0.28803039898670046, "grad_norm": 4.211414337158203, "learning_rate": 9.998420609515568e-06, "loss": 1.1903, "step": 1137 }, { "epoch": 0.2882837238758708, "grad_norm": 4.0857086181640625, "learning_rate": 9.998399481907945e-06, "loss": 1.1356, "step": 1138 }, { "epoch": 0.28853704876504116, "grad_norm": 4.152622222900391, "learning_rate": 9.998378213947607e-06, "loss": 1.2288, "step": 1139 }, { "epoch": 0.2887903736542115, "grad_norm": 4.304581642150879, "learning_rate": 9.998356805635154e-06, "loss": 1.2543, "step": 1140 }, { "epoch": 0.2890436985433819, "grad_norm": 4.069146156311035, "learning_rate": 9.998335256971183e-06, "loss": 1.1565, "step": 1141 }, { "epoch": 0.28929702343255226, "grad_norm": 4.224587440490723, "learning_rate": 9.998313567956299e-06, "loss": 1.2069, "step": 1142 }, { "epoch": 0.2895503483217226, "grad_norm": 3.9599013328552246, "learning_rate": 9.998291738591115e-06, "loss": 1.1918, "step": 1143 }, { "epoch": 0.28980367321089295, "grad_norm": 4.346947193145752, "learning_rate": 9.998269768876239e-06, "loss": 1.2541, "step": 1144 }, { "epoch": 0.29005699810006336, "grad_norm": 4.332737445831299, "learning_rate": 9.998247658812293e-06, "loss": 1.2649, "step": 1145 }, { "epoch": 0.2903103229892337, "grad_norm": 4.032033443450928, "learning_rate": 9.998225408399894e-06, "loss": 1.2889, "step": 1146 }, { "epoch": 0.29056364787840405, "grad_norm": 4.593135356903076, "learning_rate": 9.998203017639668e-06, "loss": 1.3193, "step": 1147 }, { "epoch": 0.2908169727675744, "grad_norm": 4.185672760009766, "learning_rate": 9.998180486532242e-06, "loss": 1.399, "step": 1148 }, { "epoch": 0.29107029765674475, "grad_norm": 4.378150939941406, "learning_rate": 9.998157815078253e-06, "loss": 1.2216, "step": 1149 }, { "epoch": 0.29132362254591515, "grad_norm": 3.8727684020996094, "learning_rate": 9.998135003278335e-06, "loss": 1.3562, "step": 1150 }, { "epoch": 0.2915769474350855, "grad_norm": 4.286794662475586, "learning_rate": 9.998112051133127e-06, "loss": 1.2944, "step": 1151 }, { "epoch": 0.29183027232425585, "grad_norm": 4.495805263519287, "learning_rate": 9.998088958643277e-06, "loss": 1.2803, "step": 1152 }, { "epoch": 0.2920835972134262, "grad_norm": 4.211381435394287, "learning_rate": 9.99806572580943e-06, "loss": 1.1863, "step": 1153 }, { "epoch": 0.2923369221025966, "grad_norm": 4.094125747680664, "learning_rate": 9.998042352632242e-06, "loss": 1.3885, "step": 1154 }, { "epoch": 0.29259024699176694, "grad_norm": 4.094359874725342, "learning_rate": 9.998018839112365e-06, "loss": 1.2796, "step": 1155 }, { "epoch": 0.2928435718809373, "grad_norm": 3.947622060775757, "learning_rate": 9.997995185250462e-06, "loss": 1.2438, "step": 1156 }, { "epoch": 0.29309689677010764, "grad_norm": 4.144353866577148, "learning_rate": 9.997971391047197e-06, "loss": 1.1298, "step": 1157 }, { "epoch": 0.29335022165927804, "grad_norm": 3.9895102977752686, "learning_rate": 9.997947456503238e-06, "loss": 1.1884, "step": 1158 }, { "epoch": 0.2936035465484484, "grad_norm": 4.259244918823242, "learning_rate": 9.997923381619257e-06, "loss": 1.2304, "step": 1159 }, { "epoch": 0.29385687143761874, "grad_norm": 4.923684120178223, "learning_rate": 9.99789916639593e-06, "loss": 1.5598, "step": 1160 }, { "epoch": 0.2941101963267891, "grad_norm": 4.479145526885986, "learning_rate": 9.997874810833936e-06, "loss": 1.2207, "step": 1161 }, { "epoch": 0.2943635212159595, "grad_norm": 5.040842533111572, "learning_rate": 9.99785031493396e-06, "loss": 1.2571, "step": 1162 }, { "epoch": 0.29461684610512984, "grad_norm": 3.860297441482544, "learning_rate": 9.997825678696688e-06, "loss": 1.2313, "step": 1163 }, { "epoch": 0.2948701709943002, "grad_norm": 4.079608917236328, "learning_rate": 9.997800902122816e-06, "loss": 1.1536, "step": 1164 }, { "epoch": 0.29512349588347053, "grad_norm": 3.9218783378601074, "learning_rate": 9.997775985213035e-06, "loss": 1.1827, "step": 1165 }, { "epoch": 0.29537682077264094, "grad_norm": 3.88308048248291, "learning_rate": 9.99775092796805e-06, "loss": 1.2224, "step": 1166 }, { "epoch": 0.2956301456618113, "grad_norm": 3.903902053833008, "learning_rate": 9.997725730388556e-06, "loss": 1.133, "step": 1167 }, { "epoch": 0.29588347055098163, "grad_norm": 4.177271366119385, "learning_rate": 9.99770039247527e-06, "loss": 1.1429, "step": 1168 }, { "epoch": 0.296136795440152, "grad_norm": 3.8579490184783936, "learning_rate": 9.997674914228896e-06, "loss": 1.1676, "step": 1169 }, { "epoch": 0.2963901203293224, "grad_norm": 4.455913543701172, "learning_rate": 9.997649295650157e-06, "loss": 1.2915, "step": 1170 }, { "epoch": 0.29664344521849273, "grad_norm": 4.432326793670654, "learning_rate": 9.997623536739764e-06, "loss": 1.2021, "step": 1171 }, { "epoch": 0.2968967701076631, "grad_norm": 4.448549747467041, "learning_rate": 9.997597637498445e-06, "loss": 1.2106, "step": 1172 }, { "epoch": 0.2971500949968334, "grad_norm": 4.243499755859375, "learning_rate": 9.99757159792693e-06, "loss": 1.2227, "step": 1173 }, { "epoch": 0.2974034198860038, "grad_norm": 4.29484748840332, "learning_rate": 9.997545418025942e-06, "loss": 1.3158, "step": 1174 }, { "epoch": 0.2976567447751742, "grad_norm": 4.385229110717773, "learning_rate": 9.997519097796224e-06, "loss": 1.1911, "step": 1175 }, { "epoch": 0.2979100696643445, "grad_norm": 4.231202125549316, "learning_rate": 9.997492637238512e-06, "loss": 1.114, "step": 1176 }, { "epoch": 0.29816339455351487, "grad_norm": 4.25446081161499, "learning_rate": 9.997466036353549e-06, "loss": 1.2236, "step": 1177 }, { "epoch": 0.2984167194426852, "grad_norm": 4.097778797149658, "learning_rate": 9.99743929514208e-06, "loss": 1.2686, "step": 1178 }, { "epoch": 0.2986700443318556, "grad_norm": 4.999679088592529, "learning_rate": 9.99741241360486e-06, "loss": 1.189, "step": 1179 }, { "epoch": 0.29892336922102597, "grad_norm": 3.9533159732818604, "learning_rate": 9.99738539174264e-06, "loss": 1.2186, "step": 1180 }, { "epoch": 0.2991766941101963, "grad_norm": 4.128479957580566, "learning_rate": 9.99735822955618e-06, "loss": 1.2896, "step": 1181 }, { "epoch": 0.29943001899936666, "grad_norm": 4.410684585571289, "learning_rate": 9.997330927046243e-06, "loss": 1.3052, "step": 1182 }, { "epoch": 0.29968334388853707, "grad_norm": 4.5088372230529785, "learning_rate": 9.997303484213597e-06, "loss": 1.2223, "step": 1183 }, { "epoch": 0.2999366687777074, "grad_norm": 4.644050121307373, "learning_rate": 9.99727590105901e-06, "loss": 1.3765, "step": 1184 }, { "epoch": 0.30018999366687776, "grad_norm": 4.378511905670166, "learning_rate": 9.99724817758326e-06, "loss": 1.33, "step": 1185 }, { "epoch": 0.3004433185560481, "grad_norm": 4.221255779266357, "learning_rate": 9.997220313787122e-06, "loss": 1.1759, "step": 1186 }, { "epoch": 0.3006966434452185, "grad_norm": 4.051570415496826, "learning_rate": 9.99719230967138e-06, "loss": 1.2226, "step": 1187 }, { "epoch": 0.30094996833438886, "grad_norm": 4.175117492675781, "learning_rate": 9.997164165236819e-06, "loss": 1.1844, "step": 1188 }, { "epoch": 0.3012032932235592, "grad_norm": 4.066131591796875, "learning_rate": 9.997135880484232e-06, "loss": 1.2764, "step": 1189 }, { "epoch": 0.30145661811272956, "grad_norm": 4.347494602203369, "learning_rate": 9.99710745541441e-06, "loss": 1.2386, "step": 1190 }, { "epoch": 0.30170994300189996, "grad_norm": 4.305504322052002, "learning_rate": 9.997078890028153e-06, "loss": 1.4568, "step": 1191 }, { "epoch": 0.3019632678910703, "grad_norm": 4.016643524169922, "learning_rate": 9.997050184326263e-06, "loss": 1.2184, "step": 1192 }, { "epoch": 0.30221659278024066, "grad_norm": 4.105624198913574, "learning_rate": 9.997021338309547e-06, "loss": 1.3006, "step": 1193 }, { "epoch": 0.302469917669411, "grad_norm": 4.551614284515381, "learning_rate": 9.996992351978811e-06, "loss": 1.4236, "step": 1194 }, { "epoch": 0.3027232425585814, "grad_norm": 4.448981761932373, "learning_rate": 9.996963225334874e-06, "loss": 1.3243, "step": 1195 }, { "epoch": 0.30297656744775175, "grad_norm": 4.29794454574585, "learning_rate": 9.996933958378551e-06, "loss": 1.3387, "step": 1196 }, { "epoch": 0.3032298923369221, "grad_norm": 4.252987861633301, "learning_rate": 9.996904551110664e-06, "loss": 1.2944, "step": 1197 }, { "epoch": 0.30348321722609245, "grad_norm": 4.383278846740723, "learning_rate": 9.99687500353204e-06, "loss": 1.296, "step": 1198 }, { "epoch": 0.3037365421152628, "grad_norm": 4.380770683288574, "learning_rate": 9.996845315643506e-06, "loss": 1.2481, "step": 1199 }, { "epoch": 0.3039898670044332, "grad_norm": 4.155014514923096, "learning_rate": 9.996815487445899e-06, "loss": 1.2227, "step": 1200 }, { "epoch": 0.30424319189360355, "grad_norm": 4.016407489776611, "learning_rate": 9.996785518940056e-06, "loss": 1.4424, "step": 1201 }, { "epoch": 0.3044965167827739, "grad_norm": 4.063757419586182, "learning_rate": 9.996755410126815e-06, "loss": 1.2652, "step": 1202 }, { "epoch": 0.30474984167194424, "grad_norm": 4.2165117263793945, "learning_rate": 9.996725161007027e-06, "loss": 1.2779, "step": 1203 }, { "epoch": 0.30500316656111465, "grad_norm": 3.8022258281707764, "learning_rate": 9.996694771581535e-06, "loss": 1.2567, "step": 1204 }, { "epoch": 0.305256491450285, "grad_norm": 4.526707649230957, "learning_rate": 9.996664241851197e-06, "loss": 1.4895, "step": 1205 }, { "epoch": 0.30550981633945534, "grad_norm": 3.676969289779663, "learning_rate": 9.99663357181687e-06, "loss": 1.1388, "step": 1206 }, { "epoch": 0.3057631412286257, "grad_norm": 4.362067699432373, "learning_rate": 9.996602761479413e-06, "loss": 1.2873, "step": 1207 }, { "epoch": 0.3060164661177961, "grad_norm": 4.138330936431885, "learning_rate": 9.996571810839693e-06, "loss": 1.1894, "step": 1208 }, { "epoch": 0.30626979100696644, "grad_norm": 4.2472615242004395, "learning_rate": 9.996540719898578e-06, "loss": 1.2704, "step": 1209 }, { "epoch": 0.3065231158961368, "grad_norm": 3.9246773719787598, "learning_rate": 9.99650948865694e-06, "loss": 1.2234, "step": 1210 }, { "epoch": 0.30677644078530714, "grad_norm": 4.2852044105529785, "learning_rate": 9.996478117115659e-06, "loss": 1.2984, "step": 1211 }, { "epoch": 0.30702976567447754, "grad_norm": 4.638847827911377, "learning_rate": 9.996446605275614e-06, "loss": 1.2492, "step": 1212 }, { "epoch": 0.3072830905636479, "grad_norm": 4.264072418212891, "learning_rate": 9.99641495313769e-06, "loss": 1.3236, "step": 1213 }, { "epoch": 0.30753641545281823, "grad_norm": 4.2414350509643555, "learning_rate": 9.996383160702775e-06, "loss": 1.328, "step": 1214 }, { "epoch": 0.3077897403419886, "grad_norm": 3.8178317546844482, "learning_rate": 9.996351227971763e-06, "loss": 1.1453, "step": 1215 }, { "epoch": 0.308043065231159, "grad_norm": 4.003973484039307, "learning_rate": 9.996319154945551e-06, "loss": 1.1736, "step": 1216 }, { "epoch": 0.30829639012032933, "grad_norm": 4.237383842468262, "learning_rate": 9.996286941625038e-06, "loss": 1.377, "step": 1217 }, { "epoch": 0.3085497150094997, "grad_norm": 4.385888576507568, "learning_rate": 9.99625458801113e-06, "loss": 1.3226, "step": 1218 }, { "epoch": 0.30880303989867003, "grad_norm": 4.4331278800964355, "learning_rate": 9.996222094104733e-06, "loss": 1.3285, "step": 1219 }, { "epoch": 0.30905636478784043, "grad_norm": 4.470774173736572, "learning_rate": 9.996189459906762e-06, "loss": 1.2614, "step": 1220 }, { "epoch": 0.3093096896770108, "grad_norm": 4.2090229988098145, "learning_rate": 9.996156685418133e-06, "loss": 1.2239, "step": 1221 }, { "epoch": 0.3095630145661811, "grad_norm": 3.836949348449707, "learning_rate": 9.996123770639766e-06, "loss": 1.2611, "step": 1222 }, { "epoch": 0.3098163394553515, "grad_norm": 4.553760528564453, "learning_rate": 9.996090715572587e-06, "loss": 1.3546, "step": 1223 }, { "epoch": 0.3100696643445218, "grad_norm": 4.262728691101074, "learning_rate": 9.996057520217519e-06, "loss": 1.2489, "step": 1224 }, { "epoch": 0.3103229892336922, "grad_norm": 4.288120269775391, "learning_rate": 9.996024184575497e-06, "loss": 1.3574, "step": 1225 }, { "epoch": 0.3105763141228626, "grad_norm": 4.323267936706543, "learning_rate": 9.99599070864746e-06, "loss": 1.388, "step": 1226 }, { "epoch": 0.3108296390120329, "grad_norm": 3.9528911113739014, "learning_rate": 9.995957092434345e-06, "loss": 1.1117, "step": 1227 }, { "epoch": 0.31108296390120327, "grad_norm": 4.321549415588379, "learning_rate": 9.995923335937095e-06, "loss": 1.2249, "step": 1228 }, { "epoch": 0.31133628879037367, "grad_norm": 3.863180637359619, "learning_rate": 9.995889439156661e-06, "loss": 1.1733, "step": 1229 }, { "epoch": 0.311589613679544, "grad_norm": 4.257288932800293, "learning_rate": 9.995855402093991e-06, "loss": 1.1108, "step": 1230 }, { "epoch": 0.31184293856871437, "grad_norm": 4.414398193359375, "learning_rate": 9.995821224750044e-06, "loss": 1.2711, "step": 1231 }, { "epoch": 0.3120962634578847, "grad_norm": 4.367593288421631, "learning_rate": 9.995786907125778e-06, "loss": 1.2059, "step": 1232 }, { "epoch": 0.3123495883470551, "grad_norm": 4.501057147979736, "learning_rate": 9.995752449222159e-06, "loss": 1.2884, "step": 1233 }, { "epoch": 0.31260291323622547, "grad_norm": 4.291118621826172, "learning_rate": 9.99571785104015e-06, "loss": 1.1887, "step": 1234 }, { "epoch": 0.3128562381253958, "grad_norm": 4.475590705871582, "learning_rate": 9.995683112580725e-06, "loss": 1.428, "step": 1235 }, { "epoch": 0.31310956301456616, "grad_norm": 4.3426923751831055, "learning_rate": 9.99564823384486e-06, "loss": 1.2912, "step": 1236 }, { "epoch": 0.31336288790373656, "grad_norm": 4.068551540374756, "learning_rate": 9.995613214833534e-06, "loss": 1.2501, "step": 1237 }, { "epoch": 0.3136162127929069, "grad_norm": 4.004886627197266, "learning_rate": 9.995578055547732e-06, "loss": 1.2266, "step": 1238 }, { "epoch": 0.31386953768207726, "grad_norm": 4.359253883361816, "learning_rate": 9.99554275598844e-06, "loss": 1.2092, "step": 1239 }, { "epoch": 0.3141228625712476, "grad_norm": 3.9630236625671387, "learning_rate": 9.995507316156645e-06, "loss": 1.0663, "step": 1240 }, { "epoch": 0.314376187460418, "grad_norm": 4.438020706176758, "learning_rate": 9.995471736053349e-06, "loss": 1.2586, "step": 1241 }, { "epoch": 0.31462951234958836, "grad_norm": 4.1128973960876465, "learning_rate": 9.995436015679545e-06, "loss": 1.2448, "step": 1242 }, { "epoch": 0.3148828372387587, "grad_norm": 4.267119884490967, "learning_rate": 9.995400155036241e-06, "loss": 1.3054, "step": 1243 }, { "epoch": 0.31513616212792905, "grad_norm": 4.559778213500977, "learning_rate": 9.995364154124442e-06, "loss": 1.3046, "step": 1244 }, { "epoch": 0.31538948701709946, "grad_norm": 4.251293182373047, "learning_rate": 9.995328012945158e-06, "loss": 1.1169, "step": 1245 }, { "epoch": 0.3156428119062698, "grad_norm": 4.158294200897217, "learning_rate": 9.995291731499406e-06, "loss": 1.2024, "step": 1246 }, { "epoch": 0.31589613679544015, "grad_norm": 4.296648025512695, "learning_rate": 9.995255309788202e-06, "loss": 1.231, "step": 1247 }, { "epoch": 0.3161494616846105, "grad_norm": 4.170580863952637, "learning_rate": 9.99521874781257e-06, "loss": 1.2121, "step": 1248 }, { "epoch": 0.31640278657378085, "grad_norm": 4.245189666748047, "learning_rate": 9.995182045573537e-06, "loss": 1.2395, "step": 1249 }, { "epoch": 0.31665611146295125, "grad_norm": 3.9477896690368652, "learning_rate": 9.995145203072132e-06, "loss": 1.2729, "step": 1250 }, { "epoch": 0.3169094363521216, "grad_norm": 4.069958686828613, "learning_rate": 9.995108220309392e-06, "loss": 1.3298, "step": 1251 }, { "epoch": 0.31716276124129195, "grad_norm": 3.9865245819091797, "learning_rate": 9.995071097286355e-06, "loss": 1.2307, "step": 1252 }, { "epoch": 0.3174160861304623, "grad_norm": 4.537790298461914, "learning_rate": 9.995033834004061e-06, "loss": 1.1516, "step": 1253 }, { "epoch": 0.3176694110196327, "grad_norm": 4.2048492431640625, "learning_rate": 9.99499643046356e-06, "loss": 1.4099, "step": 1254 }, { "epoch": 0.31792273590880304, "grad_norm": 3.8523194789886475, "learning_rate": 9.9949588866659e-06, "loss": 1.0571, "step": 1255 }, { "epoch": 0.3181760607979734, "grad_norm": 3.7813720703125, "learning_rate": 9.994921202612135e-06, "loss": 1.1963, "step": 1256 }, { "epoch": 0.31842938568714374, "grad_norm": 3.999424934387207, "learning_rate": 9.994883378303324e-06, "loss": 1.0709, "step": 1257 }, { "epoch": 0.31868271057631414, "grad_norm": 4.427967071533203, "learning_rate": 9.994845413740529e-06, "loss": 1.3181, "step": 1258 }, { "epoch": 0.3189360354654845, "grad_norm": 4.221949577331543, "learning_rate": 9.994807308924814e-06, "loss": 1.1552, "step": 1259 }, { "epoch": 0.31918936035465484, "grad_norm": 5.069612979888916, "learning_rate": 9.99476906385725e-06, "loss": 1.2541, "step": 1260 }, { "epoch": 0.3194426852438252, "grad_norm": 4.107848644256592, "learning_rate": 9.994730678538914e-06, "loss": 1.2334, "step": 1261 }, { "epoch": 0.3196960101329956, "grad_norm": 5.001278400421143, "learning_rate": 9.994692152970882e-06, "loss": 1.2908, "step": 1262 }, { "epoch": 0.31994933502216594, "grad_norm": 4.346644401550293, "learning_rate": 9.994653487154233e-06, "loss": 1.3057, "step": 1263 }, { "epoch": 0.3202026599113363, "grad_norm": 3.7297120094299316, "learning_rate": 9.994614681090056e-06, "loss": 1.0748, "step": 1264 }, { "epoch": 0.32045598480050663, "grad_norm": 4.22995138168335, "learning_rate": 9.994575734779439e-06, "loss": 1.2675, "step": 1265 }, { "epoch": 0.32070930968967704, "grad_norm": 3.8312008380889893, "learning_rate": 9.994536648223477e-06, "loss": 1.171, "step": 1266 }, { "epoch": 0.3209626345788474, "grad_norm": 3.967376232147217, "learning_rate": 9.994497421423266e-06, "loss": 1.2821, "step": 1267 }, { "epoch": 0.32121595946801773, "grad_norm": 4.221203327178955, "learning_rate": 9.994458054379909e-06, "loss": 1.2664, "step": 1268 }, { "epoch": 0.3214692843571881, "grad_norm": 4.06121301651001, "learning_rate": 9.994418547094511e-06, "loss": 1.391, "step": 1269 }, { "epoch": 0.3217226092463585, "grad_norm": 4.372332572937012, "learning_rate": 9.99437889956818e-06, "loss": 1.3267, "step": 1270 }, { "epoch": 0.32197593413552883, "grad_norm": 4.301946640014648, "learning_rate": 9.994339111802032e-06, "loss": 1.3362, "step": 1271 }, { "epoch": 0.3222292590246992, "grad_norm": 4.1606903076171875, "learning_rate": 9.99429918379718e-06, "loss": 1.3005, "step": 1272 }, { "epoch": 0.3224825839138695, "grad_norm": 3.8666462898254395, "learning_rate": 9.99425911555475e-06, "loss": 1.0772, "step": 1273 }, { "epoch": 0.3227359088030399, "grad_norm": 4.130020618438721, "learning_rate": 9.994218907075863e-06, "loss": 1.2813, "step": 1274 }, { "epoch": 0.3229892336922103, "grad_norm": 3.832627296447754, "learning_rate": 9.994178558361649e-06, "loss": 1.2409, "step": 1275 }, { "epoch": 0.3232425585813806, "grad_norm": 4.253661155700684, "learning_rate": 9.994138069413244e-06, "loss": 1.3443, "step": 1276 }, { "epoch": 0.32349588347055097, "grad_norm": 4.230594158172607, "learning_rate": 9.994097440231781e-06, "loss": 1.3346, "step": 1277 }, { "epoch": 0.3237492083597213, "grad_norm": 4.01607608795166, "learning_rate": 9.994056670818404e-06, "loss": 1.1863, "step": 1278 }, { "epoch": 0.3240025332488917, "grad_norm": 4.280426979064941, "learning_rate": 9.994015761174254e-06, "loss": 1.3087, "step": 1279 }, { "epoch": 0.32425585813806207, "grad_norm": 3.954005479812622, "learning_rate": 9.993974711300485e-06, "loss": 1.1972, "step": 1280 }, { "epoch": 0.3245091830272324, "grad_norm": 4.103531837463379, "learning_rate": 9.993933521198244e-06, "loss": 1.3494, "step": 1281 }, { "epoch": 0.32476250791640277, "grad_norm": 3.806126832962036, "learning_rate": 9.99389219086869e-06, "loss": 1.1307, "step": 1282 }, { "epoch": 0.32501583280557317, "grad_norm": 4.244111061096191, "learning_rate": 9.993850720312987e-06, "loss": 1.326, "step": 1283 }, { "epoch": 0.3252691576947435, "grad_norm": 4.36318826675415, "learning_rate": 9.993809109532294e-06, "loss": 1.2854, "step": 1284 }, { "epoch": 0.32552248258391386, "grad_norm": 4.563369274139404, "learning_rate": 9.993767358527781e-06, "loss": 1.3687, "step": 1285 }, { "epoch": 0.3257758074730842, "grad_norm": 4.065633296966553, "learning_rate": 9.993725467300624e-06, "loss": 1.2267, "step": 1286 }, { "epoch": 0.3260291323622546, "grad_norm": 3.907613754272461, "learning_rate": 9.993683435851995e-06, "loss": 1.2013, "step": 1287 }, { "epoch": 0.32628245725142496, "grad_norm": 4.399827480316162, "learning_rate": 9.993641264183074e-06, "loss": 1.3248, "step": 1288 }, { "epoch": 0.3265357821405953, "grad_norm": 4.3525776863098145, "learning_rate": 9.993598952295048e-06, "loss": 1.2563, "step": 1289 }, { "epoch": 0.32678910702976566, "grad_norm": 3.8334646224975586, "learning_rate": 9.993556500189103e-06, "loss": 1.1712, "step": 1290 }, { "epoch": 0.32704243191893606, "grad_norm": 4.270079612731934, "learning_rate": 9.993513907866432e-06, "loss": 1.473, "step": 1291 }, { "epoch": 0.3272957568081064, "grad_norm": 4.307861328125, "learning_rate": 9.993471175328231e-06, "loss": 1.2472, "step": 1292 }, { "epoch": 0.32754908169727676, "grad_norm": 4.307832717895508, "learning_rate": 9.9934283025757e-06, "loss": 1.3726, "step": 1293 }, { "epoch": 0.3278024065864471, "grad_norm": 3.930716037750244, "learning_rate": 9.993385289610044e-06, "loss": 1.1815, "step": 1294 }, { "epoch": 0.3280557314756175, "grad_norm": 4.554092884063721, "learning_rate": 9.993342136432467e-06, "loss": 1.3118, "step": 1295 }, { "epoch": 0.32830905636478785, "grad_norm": 3.947636365890503, "learning_rate": 9.993298843044184e-06, "loss": 0.9973, "step": 1296 }, { "epoch": 0.3285623812539582, "grad_norm": 4.019169807434082, "learning_rate": 9.99325540944641e-06, "loss": 1.2246, "step": 1297 }, { "epoch": 0.32881570614312855, "grad_norm": 4.576243877410889, "learning_rate": 9.993211835640364e-06, "loss": 1.2787, "step": 1298 }, { "epoch": 0.3290690310322989, "grad_norm": 4.043989181518555, "learning_rate": 9.99316812162727e-06, "loss": 1.2234, "step": 1299 }, { "epoch": 0.3293223559214693, "grad_norm": 4.106696605682373, "learning_rate": 9.993124267408356e-06, "loss": 1.1678, "step": 1300 }, { "epoch": 0.32957568081063965, "grad_norm": 4.1464080810546875, "learning_rate": 9.99308027298485e-06, "loss": 1.297, "step": 1301 }, { "epoch": 0.32982900569981, "grad_norm": 4.302769660949707, "learning_rate": 9.993036138357993e-06, "loss": 1.2412, "step": 1302 }, { "epoch": 0.33008233058898034, "grad_norm": 3.8838820457458496, "learning_rate": 9.99299186352902e-06, "loss": 1.149, "step": 1303 }, { "epoch": 0.33033565547815075, "grad_norm": 4.113027095794678, "learning_rate": 9.992947448499176e-06, "loss": 1.2752, "step": 1304 }, { "epoch": 0.3305889803673211, "grad_norm": 4.165403366088867, "learning_rate": 9.992902893269709e-06, "loss": 1.2183, "step": 1305 }, { "epoch": 0.33084230525649144, "grad_norm": 4.083942413330078, "learning_rate": 9.992858197841866e-06, "loss": 1.2577, "step": 1306 }, { "epoch": 0.3310956301456618, "grad_norm": 4.231232166290283, "learning_rate": 9.992813362216907e-06, "loss": 1.2931, "step": 1307 }, { "epoch": 0.3313489550348322, "grad_norm": 3.911257266998291, "learning_rate": 9.992768386396088e-06, "loss": 1.2065, "step": 1308 }, { "epoch": 0.33160227992400254, "grad_norm": 4.078742027282715, "learning_rate": 9.992723270380674e-06, "loss": 1.2378, "step": 1309 }, { "epoch": 0.3318556048131729, "grad_norm": 4.388514041900635, "learning_rate": 9.992678014171928e-06, "loss": 1.2061, "step": 1310 }, { "epoch": 0.33210892970234324, "grad_norm": 4.175978183746338, "learning_rate": 9.992632617771126e-06, "loss": 1.261, "step": 1311 }, { "epoch": 0.33236225459151364, "grad_norm": 4.308657646179199, "learning_rate": 9.992587081179537e-06, "loss": 1.334, "step": 1312 }, { "epoch": 0.332615579480684, "grad_norm": 3.970914363861084, "learning_rate": 9.992541404398445e-06, "loss": 1.1876, "step": 1313 }, { "epoch": 0.33286890436985433, "grad_norm": 3.9518797397613525, "learning_rate": 9.99249558742913e-06, "loss": 1.3064, "step": 1314 }, { "epoch": 0.3331222292590247, "grad_norm": 4.303136348724365, "learning_rate": 9.992449630272878e-06, "loss": 1.3708, "step": 1315 }, { "epoch": 0.3333755541481951, "grad_norm": 4.3481526374816895, "learning_rate": 9.992403532930981e-06, "loss": 1.2291, "step": 1316 }, { "epoch": 0.33362887903736543, "grad_norm": 4.0205278396606445, "learning_rate": 9.992357295404733e-06, "loss": 1.2624, "step": 1317 }, { "epoch": 0.3338822039265358, "grad_norm": 4.278614521026611, "learning_rate": 9.99231091769543e-06, "loss": 1.3077, "step": 1318 }, { "epoch": 0.33413552881570613, "grad_norm": 4.188653469085693, "learning_rate": 9.992264399804378e-06, "loss": 1.2738, "step": 1319 }, { "epoch": 0.3343888537048765, "grad_norm": 3.7268896102905273, "learning_rate": 9.992217741732881e-06, "loss": 1.1054, "step": 1320 }, { "epoch": 0.3346421785940469, "grad_norm": 4.0296711921691895, "learning_rate": 9.99217094348225e-06, "loss": 1.1936, "step": 1321 }, { "epoch": 0.3348955034832172, "grad_norm": 3.9426963329315186, "learning_rate": 9.992124005053798e-06, "loss": 1.2568, "step": 1322 }, { "epoch": 0.3351488283723876, "grad_norm": 4.505002021789551, "learning_rate": 9.992076926448844e-06, "loss": 1.4986, "step": 1323 }, { "epoch": 0.3354021532615579, "grad_norm": 4.16630220413208, "learning_rate": 9.992029707668708e-06, "loss": 1.3681, "step": 1324 }, { "epoch": 0.3356554781507283, "grad_norm": 3.81494140625, "learning_rate": 9.991982348714719e-06, "loss": 1.2015, "step": 1325 }, { "epoch": 0.3359088030398987, "grad_norm": 4.078320026397705, "learning_rate": 9.991934849588205e-06, "loss": 1.2564, "step": 1326 }, { "epoch": 0.336162127929069, "grad_norm": 4.1204304695129395, "learning_rate": 9.991887210290501e-06, "loss": 1.1618, "step": 1327 }, { "epoch": 0.33641545281823937, "grad_norm": 3.9501395225524902, "learning_rate": 9.991839430822942e-06, "loss": 1.2996, "step": 1328 }, { "epoch": 0.33666877770740977, "grad_norm": 4.23710823059082, "learning_rate": 9.991791511186872e-06, "loss": 1.4021, "step": 1329 }, { "epoch": 0.3369221025965801, "grad_norm": 4.005718231201172, "learning_rate": 9.991743451383636e-06, "loss": 1.1643, "step": 1330 }, { "epoch": 0.33717542748575047, "grad_norm": 4.026569843292236, "learning_rate": 9.991695251414584e-06, "loss": 1.2508, "step": 1331 }, { "epoch": 0.3374287523749208, "grad_norm": 4.557069778442383, "learning_rate": 9.991646911281067e-06, "loss": 1.3715, "step": 1332 }, { "epoch": 0.3376820772640912, "grad_norm": 3.9800522327423096, "learning_rate": 9.991598430984445e-06, "loss": 1.213, "step": 1333 }, { "epoch": 0.33793540215326157, "grad_norm": 4.316899299621582, "learning_rate": 9.99154981052608e-06, "loss": 1.2742, "step": 1334 }, { "epoch": 0.3381887270424319, "grad_norm": 3.8521671295166016, "learning_rate": 9.991501049907336e-06, "loss": 1.263, "step": 1335 }, { "epoch": 0.33844205193160226, "grad_norm": 4.271399974822998, "learning_rate": 9.991452149129579e-06, "loss": 1.2038, "step": 1336 }, { "epoch": 0.33869537682077266, "grad_norm": 4.1595377922058105, "learning_rate": 9.991403108194187e-06, "loss": 1.2474, "step": 1337 }, { "epoch": 0.338948701709943, "grad_norm": 4.392655372619629, "learning_rate": 9.991353927102537e-06, "loss": 1.3154, "step": 1338 }, { "epoch": 0.33920202659911336, "grad_norm": 4.084737300872803, "learning_rate": 9.991304605856006e-06, "loss": 1.1643, "step": 1339 }, { "epoch": 0.3394553514882837, "grad_norm": 3.8093276023864746, "learning_rate": 9.99125514445598e-06, "loss": 1.0724, "step": 1340 }, { "epoch": 0.3397086763774541, "grad_norm": 4.508452892303467, "learning_rate": 9.991205542903849e-06, "loss": 1.2907, "step": 1341 }, { "epoch": 0.33996200126662446, "grad_norm": 3.834325075149536, "learning_rate": 9.991155801201006e-06, "loss": 1.1093, "step": 1342 }, { "epoch": 0.3402153261557948, "grad_norm": 4.575895309448242, "learning_rate": 9.991105919348846e-06, "loss": 1.2663, "step": 1343 }, { "epoch": 0.34046865104496515, "grad_norm": 3.84704852104187, "learning_rate": 9.991055897348773e-06, "loss": 1.1895, "step": 1344 }, { "epoch": 0.3407219759341355, "grad_norm": 4.40834379196167, "learning_rate": 9.991005735202191e-06, "loss": 1.3147, "step": 1345 }, { "epoch": 0.3409753008233059, "grad_norm": 4.067554950714111, "learning_rate": 9.990955432910504e-06, "loss": 1.2896, "step": 1346 }, { "epoch": 0.34122862571247625, "grad_norm": 3.7188527584075928, "learning_rate": 9.99090499047513e-06, "loss": 1.1266, "step": 1347 }, { "epoch": 0.3414819506016466, "grad_norm": 4.089304447174072, "learning_rate": 9.990854407897481e-06, "loss": 1.2385, "step": 1348 }, { "epoch": 0.34173527549081695, "grad_norm": 4.055856227874756, "learning_rate": 9.99080368517898e-06, "loss": 1.2671, "step": 1349 }, { "epoch": 0.34198860037998735, "grad_norm": 4.142635822296143, "learning_rate": 9.990752822321052e-06, "loss": 1.2366, "step": 1350 }, { "epoch": 0.3422419252691577, "grad_norm": 3.9640398025512695, "learning_rate": 9.990701819325122e-06, "loss": 1.2174, "step": 1351 }, { "epoch": 0.34249525015832805, "grad_norm": 4.102181434631348, "learning_rate": 9.990650676192626e-06, "loss": 1.1915, "step": 1352 }, { "epoch": 0.3427485750474984, "grad_norm": 3.8989973068237305, "learning_rate": 9.990599392924996e-06, "loss": 1.206, "step": 1353 }, { "epoch": 0.3430018999366688, "grad_norm": 4.988248825073242, "learning_rate": 9.990547969523673e-06, "loss": 1.3651, "step": 1354 }, { "epoch": 0.34325522482583914, "grad_norm": 3.986790418624878, "learning_rate": 9.990496405990104e-06, "loss": 1.0947, "step": 1355 }, { "epoch": 0.3435085497150095, "grad_norm": 4.1601948738098145, "learning_rate": 9.990444702325736e-06, "loss": 1.2768, "step": 1356 }, { "epoch": 0.34376187460417984, "grad_norm": 4.031864166259766, "learning_rate": 9.990392858532017e-06, "loss": 1.1873, "step": 1357 }, { "epoch": 0.34401519949335024, "grad_norm": 4.27942419052124, "learning_rate": 9.990340874610406e-06, "loss": 1.338, "step": 1358 }, { "epoch": 0.3442685243825206, "grad_norm": 3.944505214691162, "learning_rate": 9.990288750562365e-06, "loss": 1.2352, "step": 1359 }, { "epoch": 0.34452184927169094, "grad_norm": 4.307021141052246, "learning_rate": 9.99023648638935e-06, "loss": 1.3033, "step": 1360 }, { "epoch": 0.3447751741608613, "grad_norm": 4.350025177001953, "learning_rate": 9.990184082092838e-06, "loss": 1.3114, "step": 1361 }, { "epoch": 0.3450284990500317, "grad_norm": 4.570362091064453, "learning_rate": 9.990131537674293e-06, "loss": 1.2122, "step": 1362 }, { "epoch": 0.34528182393920204, "grad_norm": 4.033261299133301, "learning_rate": 9.990078853135192e-06, "loss": 1.2177, "step": 1363 }, { "epoch": 0.3455351488283724, "grad_norm": 4.397818088531494, "learning_rate": 9.990026028477018e-06, "loss": 1.3178, "step": 1364 }, { "epoch": 0.34578847371754273, "grad_norm": 3.95310115814209, "learning_rate": 9.98997306370125e-06, "loss": 1.1768, "step": 1365 }, { "epoch": 0.34604179860671314, "grad_norm": 4.639983654022217, "learning_rate": 9.98991995880938e-06, "loss": 1.3302, "step": 1366 }, { "epoch": 0.3462951234958835, "grad_norm": 4.064812660217285, "learning_rate": 9.989866713802894e-06, "loss": 1.3295, "step": 1367 }, { "epoch": 0.34654844838505383, "grad_norm": 4.017191410064697, "learning_rate": 9.98981332868329e-06, "loss": 1.2532, "step": 1368 }, { "epoch": 0.3468017732742242, "grad_norm": 4.332028865814209, "learning_rate": 9.989759803452065e-06, "loss": 1.256, "step": 1369 }, { "epoch": 0.3470550981633945, "grad_norm": 4.250354290008545, "learning_rate": 9.989706138110724e-06, "loss": 1.3158, "step": 1370 }, { "epoch": 0.34730842305256493, "grad_norm": 3.973611831665039, "learning_rate": 9.989652332660773e-06, "loss": 1.1776, "step": 1371 }, { "epoch": 0.3475617479417353, "grad_norm": 4.323413848876953, "learning_rate": 9.989598387103724e-06, "loss": 1.2984, "step": 1372 }, { "epoch": 0.3478150728309056, "grad_norm": 4.1420159339904785, "learning_rate": 9.98954430144109e-06, "loss": 1.3327, "step": 1373 }, { "epoch": 0.348068397720076, "grad_norm": 4.249251842498779, "learning_rate": 9.98949007567439e-06, "loss": 1.3095, "step": 1374 }, { "epoch": 0.3483217226092464, "grad_norm": 4.179461479187012, "learning_rate": 9.989435709805148e-06, "loss": 1.1735, "step": 1375 }, { "epoch": 0.3485750474984167, "grad_norm": 4.478822231292725, "learning_rate": 9.989381203834892e-06, "loss": 1.3818, "step": 1376 }, { "epoch": 0.34882837238758707, "grad_norm": 4.347557067871094, "learning_rate": 9.989326557765147e-06, "loss": 1.4084, "step": 1377 }, { "epoch": 0.3490816972767574, "grad_norm": 4.458835124969482, "learning_rate": 9.98927177159745e-06, "loss": 1.2902, "step": 1378 }, { "epoch": 0.3493350221659278, "grad_norm": 4.278656482696533, "learning_rate": 9.989216845333343e-06, "loss": 1.2257, "step": 1379 }, { "epoch": 0.34958834705509817, "grad_norm": 4.385313034057617, "learning_rate": 9.989161778974364e-06, "loss": 1.2569, "step": 1380 }, { "epoch": 0.3498416719442685, "grad_norm": 4.05797815322876, "learning_rate": 9.98910657252206e-06, "loss": 1.2239, "step": 1381 }, { "epoch": 0.35009499683343887, "grad_norm": 3.951141834259033, "learning_rate": 9.989051225977982e-06, "loss": 1.3011, "step": 1382 }, { "epoch": 0.35034832172260927, "grad_norm": 4.144949436187744, "learning_rate": 9.988995739343684e-06, "loss": 1.3402, "step": 1383 }, { "epoch": 0.3506016466117796, "grad_norm": 4.196046829223633, "learning_rate": 9.988940112620724e-06, "loss": 1.2248, "step": 1384 }, { "epoch": 0.35085497150094996, "grad_norm": 3.6633708477020264, "learning_rate": 9.988884345810664e-06, "loss": 1.1418, "step": 1385 }, { "epoch": 0.3511082963901203, "grad_norm": 4.088935375213623, "learning_rate": 9.988828438915068e-06, "loss": 1.3156, "step": 1386 }, { "epoch": 0.3513616212792907, "grad_norm": 3.9691689014434814, "learning_rate": 9.98877239193551e-06, "loss": 1.2846, "step": 1387 }, { "epoch": 0.35161494616846106, "grad_norm": 4.316605091094971, "learning_rate": 9.98871620487356e-06, "loss": 1.3143, "step": 1388 }, { "epoch": 0.3518682710576314, "grad_norm": 4.068321228027344, "learning_rate": 9.988659877730798e-06, "loss": 1.2556, "step": 1389 }, { "epoch": 0.35212159594680176, "grad_norm": 4.39426326751709, "learning_rate": 9.988603410508803e-06, "loss": 1.2159, "step": 1390 }, { "epoch": 0.35237492083597216, "grad_norm": 4.016197204589844, "learning_rate": 9.988546803209164e-06, "loss": 1.2425, "step": 1391 }, { "epoch": 0.3526282457251425, "grad_norm": 4.415781497955322, "learning_rate": 9.988490055833468e-06, "loss": 1.2436, "step": 1392 }, { "epoch": 0.35288157061431286, "grad_norm": 4.049761772155762, "learning_rate": 9.988433168383309e-06, "loss": 1.1914, "step": 1393 }, { "epoch": 0.3531348955034832, "grad_norm": 4.043946743011475, "learning_rate": 9.988376140860285e-06, "loss": 1.3694, "step": 1394 }, { "epoch": 0.35338822039265355, "grad_norm": 4.123354434967041, "learning_rate": 9.988318973265998e-06, "loss": 1.146, "step": 1395 }, { "epoch": 0.35364154528182395, "grad_norm": 4.2967529296875, "learning_rate": 9.98826166560205e-06, "loss": 1.1554, "step": 1396 }, { "epoch": 0.3538948701709943, "grad_norm": 3.7744598388671875, "learning_rate": 9.988204217870055e-06, "loss": 1.1237, "step": 1397 }, { "epoch": 0.35414819506016465, "grad_norm": 4.072709560394287, "learning_rate": 9.98814663007162e-06, "loss": 1.2496, "step": 1398 }, { "epoch": 0.354401519949335, "grad_norm": 3.9878129959106445, "learning_rate": 9.988088902208367e-06, "loss": 1.1834, "step": 1399 }, { "epoch": 0.3546548448385054, "grad_norm": 3.452812671661377, "learning_rate": 9.988031034281917e-06, "loss": 1.0854, "step": 1400 }, { "epoch": 0.35490816972767575, "grad_norm": 4.219860553741455, "learning_rate": 9.987973026293891e-06, "loss": 1.2147, "step": 1401 }, { "epoch": 0.3551614946168461, "grad_norm": 4.2590012550354, "learning_rate": 9.987914878245921e-06, "loss": 1.3714, "step": 1402 }, { "epoch": 0.35541481950601644, "grad_norm": 3.835472822189331, "learning_rate": 9.98785659013964e-06, "loss": 1.3531, "step": 1403 }, { "epoch": 0.35566814439518685, "grad_norm": 3.726508617401123, "learning_rate": 9.987798161976683e-06, "loss": 1.2905, "step": 1404 }, { "epoch": 0.3559214692843572, "grad_norm": 3.9989113807678223, "learning_rate": 9.987739593758691e-06, "loss": 1.2383, "step": 1405 }, { "epoch": 0.35617479417352754, "grad_norm": 4.159228801727295, "learning_rate": 9.98768088548731e-06, "loss": 1.2434, "step": 1406 }, { "epoch": 0.3564281190626979, "grad_norm": 3.9649009704589844, "learning_rate": 9.987622037164185e-06, "loss": 1.2097, "step": 1407 }, { "epoch": 0.3566814439518683, "grad_norm": 3.781782865524292, "learning_rate": 9.987563048790971e-06, "loss": 1.2509, "step": 1408 }, { "epoch": 0.35693476884103864, "grad_norm": 4.055093288421631, "learning_rate": 9.987503920369326e-06, "loss": 1.143, "step": 1409 }, { "epoch": 0.357188093730209, "grad_norm": 4.081745624542236, "learning_rate": 9.987444651900909e-06, "loss": 1.1594, "step": 1410 }, { "epoch": 0.35744141861937934, "grad_norm": 4.309524059295654, "learning_rate": 9.987385243387381e-06, "loss": 1.1907, "step": 1411 }, { "epoch": 0.35769474350854974, "grad_norm": 3.8126816749572754, "learning_rate": 9.987325694830414e-06, "loss": 1.1785, "step": 1412 }, { "epoch": 0.3579480683977201, "grad_norm": 4.495013236999512, "learning_rate": 9.98726600623168e-06, "loss": 1.3577, "step": 1413 }, { "epoch": 0.35820139328689043, "grad_norm": 4.093703269958496, "learning_rate": 9.987206177592852e-06, "loss": 1.2145, "step": 1414 }, { "epoch": 0.3584547181760608, "grad_norm": 4.1885905265808105, "learning_rate": 9.987146208915612e-06, "loss": 1.3387, "step": 1415 }, { "epoch": 0.3587080430652312, "grad_norm": 4.143773078918457, "learning_rate": 9.987086100201646e-06, "loss": 1.1747, "step": 1416 }, { "epoch": 0.35896136795440153, "grad_norm": 3.8252334594726562, "learning_rate": 9.98702585145264e-06, "loss": 1.1336, "step": 1417 }, { "epoch": 0.3592146928435719, "grad_norm": 3.914947986602783, "learning_rate": 9.986965462670282e-06, "loss": 1.1176, "step": 1418 }, { "epoch": 0.35946801773274223, "grad_norm": 3.8457250595092773, "learning_rate": 9.986904933856274e-06, "loss": 1.2418, "step": 1419 }, { "epoch": 0.3597213426219126, "grad_norm": 4.563148021697998, "learning_rate": 9.98684426501231e-06, "loss": 1.3218, "step": 1420 }, { "epoch": 0.359974667511083, "grad_norm": 4.02828311920166, "learning_rate": 9.9867834561401e-06, "loss": 1.166, "step": 1421 }, { "epoch": 0.3602279924002533, "grad_norm": 3.606048822402954, "learning_rate": 9.986722507241344e-06, "loss": 1.1854, "step": 1422 }, { "epoch": 0.3604813172894237, "grad_norm": 4.164137840270996, "learning_rate": 9.986661418317759e-06, "loss": 1.3286, "step": 1423 }, { "epoch": 0.360734642178594, "grad_norm": 4.220963001251221, "learning_rate": 9.986600189371058e-06, "loss": 1.309, "step": 1424 }, { "epoch": 0.3609879670677644, "grad_norm": 3.883132219314575, "learning_rate": 9.986538820402962e-06, "loss": 1.3193, "step": 1425 }, { "epoch": 0.3612412919569348, "grad_norm": 4.095963478088379, "learning_rate": 9.986477311415192e-06, "loss": 1.3036, "step": 1426 }, { "epoch": 0.3614946168461051, "grad_norm": 3.9755353927612305, "learning_rate": 9.986415662409476e-06, "loss": 1.3306, "step": 1427 }, { "epoch": 0.36174794173527547, "grad_norm": 3.626171588897705, "learning_rate": 9.986353873387545e-06, "loss": 1.1595, "step": 1428 }, { "epoch": 0.36200126662444587, "grad_norm": 3.70266056060791, "learning_rate": 9.986291944351136e-06, "loss": 1.2649, "step": 1429 }, { "epoch": 0.3622545915136162, "grad_norm": 3.8242764472961426, "learning_rate": 9.986229875301984e-06, "loss": 1.3271, "step": 1430 }, { "epoch": 0.36250791640278657, "grad_norm": 4.0928955078125, "learning_rate": 9.986167666241834e-06, "loss": 1.1441, "step": 1431 }, { "epoch": 0.3627612412919569, "grad_norm": 4.2461957931518555, "learning_rate": 9.986105317172434e-06, "loss": 1.24, "step": 1432 }, { "epoch": 0.3630145661811273, "grad_norm": 3.7550787925720215, "learning_rate": 9.986042828095534e-06, "loss": 1.2437, "step": 1433 }, { "epoch": 0.36326789107029767, "grad_norm": 3.7784383296966553, "learning_rate": 9.985980199012887e-06, "loss": 1.1827, "step": 1434 }, { "epoch": 0.363521215959468, "grad_norm": 3.8189046382904053, "learning_rate": 9.985917429926253e-06, "loss": 1.0869, "step": 1435 }, { "epoch": 0.36377454084863836, "grad_norm": 4.422670841217041, "learning_rate": 9.985854520837396e-06, "loss": 1.2184, "step": 1436 }, { "epoch": 0.36402786573780876, "grad_norm": 4.154664993286133, "learning_rate": 9.985791471748079e-06, "loss": 1.1978, "step": 1437 }, { "epoch": 0.3642811906269791, "grad_norm": 3.967803955078125, "learning_rate": 9.985728282660075e-06, "loss": 1.2848, "step": 1438 }, { "epoch": 0.36453451551614946, "grad_norm": 4.257202625274658, "learning_rate": 9.985664953575157e-06, "loss": 1.2945, "step": 1439 }, { "epoch": 0.3647878404053198, "grad_norm": 4.032511234283447, "learning_rate": 9.985601484495105e-06, "loss": 1.1835, "step": 1440 }, { "epoch": 0.3650411652944902, "grad_norm": 4.19437313079834, "learning_rate": 9.985537875421698e-06, "loss": 1.3775, "step": 1441 }, { "epoch": 0.36529449018366056, "grad_norm": 4.085351943969727, "learning_rate": 9.985474126356724e-06, "loss": 1.2746, "step": 1442 }, { "epoch": 0.3655478150728309, "grad_norm": 3.9030771255493164, "learning_rate": 9.985410237301976e-06, "loss": 1.2155, "step": 1443 }, { "epoch": 0.36580113996200125, "grad_norm": 3.9144396781921387, "learning_rate": 9.985346208259244e-06, "loss": 1.1466, "step": 1444 }, { "epoch": 0.3660544648511716, "grad_norm": 4.423661708831787, "learning_rate": 9.985282039230326e-06, "loss": 1.2705, "step": 1445 }, { "epoch": 0.366307789740342, "grad_norm": 3.9243950843811035, "learning_rate": 9.985217730217025e-06, "loss": 1.2001, "step": 1446 }, { "epoch": 0.36656111462951235, "grad_norm": 4.033255577087402, "learning_rate": 9.985153281221144e-06, "loss": 1.3083, "step": 1447 }, { "epoch": 0.3668144395186827, "grad_norm": 4.167736053466797, "learning_rate": 9.9850886922445e-06, "loss": 1.1748, "step": 1448 }, { "epoch": 0.36706776440785305, "grad_norm": 3.9712142944335938, "learning_rate": 9.9850239632889e-06, "loss": 1.1794, "step": 1449 }, { "epoch": 0.36732108929702345, "grad_norm": 4.448064804077148, "learning_rate": 9.984959094356163e-06, "loss": 1.2823, "step": 1450 }, { "epoch": 0.3675744141861938, "grad_norm": 4.087486743927002, "learning_rate": 9.984894085448111e-06, "loss": 1.4028, "step": 1451 }, { "epoch": 0.36782773907536415, "grad_norm": 3.659010410308838, "learning_rate": 9.98482893656657e-06, "loss": 1.1813, "step": 1452 }, { "epoch": 0.3680810639645345, "grad_norm": 3.814033031463623, "learning_rate": 9.984763647713369e-06, "loss": 1.1237, "step": 1453 }, { "epoch": 0.3683343888537049, "grad_norm": 4.058841228485107, "learning_rate": 9.984698218890341e-06, "loss": 1.4169, "step": 1454 }, { "epoch": 0.36858771374287524, "grad_norm": 3.9934372901916504, "learning_rate": 9.984632650099322e-06, "loss": 1.2431, "step": 1455 }, { "epoch": 0.3688410386320456, "grad_norm": 3.9111716747283936, "learning_rate": 9.984566941342156e-06, "loss": 1.2912, "step": 1456 }, { "epoch": 0.36909436352121594, "grad_norm": 4.044859409332275, "learning_rate": 9.984501092620685e-06, "loss": 1.2513, "step": 1457 }, { "epoch": 0.36934768841038634, "grad_norm": 3.935584306716919, "learning_rate": 9.98443510393676e-06, "loss": 1.2562, "step": 1458 }, { "epoch": 0.3696010132995567, "grad_norm": 3.825549840927124, "learning_rate": 9.984368975292233e-06, "loss": 1.2572, "step": 1459 }, { "epoch": 0.36985433818872704, "grad_norm": 4.032233238220215, "learning_rate": 9.984302706688962e-06, "loss": 1.2745, "step": 1460 }, { "epoch": 0.3701076630778974, "grad_norm": 4.025396823883057, "learning_rate": 9.984236298128806e-06, "loss": 1.224, "step": 1461 }, { "epoch": 0.3703609879670678, "grad_norm": 4.060763359069824, "learning_rate": 9.984169749613632e-06, "loss": 1.1462, "step": 1462 }, { "epoch": 0.37061431285623814, "grad_norm": 3.721524238586426, "learning_rate": 9.984103061145306e-06, "loss": 1.2868, "step": 1463 }, { "epoch": 0.3708676377454085, "grad_norm": 3.8655741214752197, "learning_rate": 9.984036232725702e-06, "loss": 1.1302, "step": 1464 }, { "epoch": 0.37112096263457883, "grad_norm": 3.8349742889404297, "learning_rate": 9.983969264356697e-06, "loss": 1.1663, "step": 1465 }, { "epoch": 0.37137428752374924, "grad_norm": 4.160083770751953, "learning_rate": 9.983902156040172e-06, "loss": 1.2775, "step": 1466 }, { "epoch": 0.3716276124129196, "grad_norm": 3.9831254482269287, "learning_rate": 9.983834907778009e-06, "loss": 1.3009, "step": 1467 }, { "epoch": 0.37188093730208993, "grad_norm": 3.8674516677856445, "learning_rate": 9.9837675195721e-06, "loss": 1.1354, "step": 1468 }, { "epoch": 0.3721342621912603, "grad_norm": 3.812502145767212, "learning_rate": 9.98369999142433e-06, "loss": 1.0914, "step": 1469 }, { "epoch": 0.3723875870804306, "grad_norm": 3.684924840927124, "learning_rate": 9.983632323336606e-06, "loss": 1.1655, "step": 1470 }, { "epoch": 0.37264091196960103, "grad_norm": 4.2497968673706055, "learning_rate": 9.983564515310817e-06, "loss": 1.2061, "step": 1471 }, { "epoch": 0.3728942368587714, "grad_norm": 4.240072250366211, "learning_rate": 9.983496567348874e-06, "loss": 1.2387, "step": 1472 }, { "epoch": 0.3731475617479417, "grad_norm": 4.081851482391357, "learning_rate": 9.983428479452683e-06, "loss": 1.2998, "step": 1473 }, { "epoch": 0.3734008866371121, "grad_norm": 3.9202663898468018, "learning_rate": 9.983360251624156e-06, "loss": 1.1734, "step": 1474 }, { "epoch": 0.3736542115262825, "grad_norm": 4.135853290557861, "learning_rate": 9.98329188386521e-06, "loss": 1.25, "step": 1475 }, { "epoch": 0.3739075364154528, "grad_norm": 4.302660942077637, "learning_rate": 9.983223376177761e-06, "loss": 1.1406, "step": 1476 }, { "epoch": 0.37416086130462317, "grad_norm": 4.354469299316406, "learning_rate": 9.983154728563738e-06, "loss": 1.2616, "step": 1477 }, { "epoch": 0.3744141861937935, "grad_norm": 3.839205503463745, "learning_rate": 9.983085941025063e-06, "loss": 1.1259, "step": 1478 }, { "epoch": 0.3746675110829639, "grad_norm": 3.928718090057373, "learning_rate": 9.983017013563671e-06, "loss": 1.1985, "step": 1479 }, { "epoch": 0.37492083597213427, "grad_norm": 3.98683762550354, "learning_rate": 9.982947946181497e-06, "loss": 1.2515, "step": 1480 }, { "epoch": 0.3751741608613046, "grad_norm": 4.049800872802734, "learning_rate": 9.98287873888048e-06, "loss": 1.2324, "step": 1481 }, { "epoch": 0.37542748575047497, "grad_norm": 4.102850914001465, "learning_rate": 9.982809391662563e-06, "loss": 1.244, "step": 1482 }, { "epoch": 0.37568081063964537, "grad_norm": 4.339122772216797, "learning_rate": 9.982739904529695e-06, "loss": 1.3376, "step": 1483 }, { "epoch": 0.3759341355288157, "grad_norm": 3.813641309738159, "learning_rate": 9.982670277483824e-06, "loss": 1.2096, "step": 1484 }, { "epoch": 0.37618746041798606, "grad_norm": 4.246204853057861, "learning_rate": 9.982600510526908e-06, "loss": 1.3492, "step": 1485 }, { "epoch": 0.3764407853071564, "grad_norm": 3.831717014312744, "learning_rate": 9.982530603660905e-06, "loss": 1.2275, "step": 1486 }, { "epoch": 0.3766941101963268, "grad_norm": 4.106499195098877, "learning_rate": 9.982460556887776e-06, "loss": 1.2366, "step": 1487 }, { "epoch": 0.37694743508549716, "grad_norm": 4.112460136413574, "learning_rate": 9.982390370209492e-06, "loss": 1.3153, "step": 1488 }, { "epoch": 0.3772007599746675, "grad_norm": 4.27755880355835, "learning_rate": 9.982320043628022e-06, "loss": 1.2236, "step": 1489 }, { "epoch": 0.37745408486383786, "grad_norm": 3.8407411575317383, "learning_rate": 9.982249577145338e-06, "loss": 1.1825, "step": 1490 }, { "epoch": 0.37770740975300826, "grad_norm": 4.075498580932617, "learning_rate": 9.982178970763421e-06, "loss": 1.2772, "step": 1491 }, { "epoch": 0.3779607346421786, "grad_norm": 4.027531147003174, "learning_rate": 9.982108224484255e-06, "loss": 1.3049, "step": 1492 }, { "epoch": 0.37821405953134896, "grad_norm": 4.01485013961792, "learning_rate": 9.982037338309824e-06, "loss": 1.3352, "step": 1493 }, { "epoch": 0.3784673844205193, "grad_norm": 4.572307586669922, "learning_rate": 9.98196631224212e-06, "loss": 1.1925, "step": 1494 }, { "epoch": 0.37872070930968965, "grad_norm": 3.935351848602295, "learning_rate": 9.981895146283139e-06, "loss": 1.2807, "step": 1495 }, { "epoch": 0.37897403419886005, "grad_norm": 4.141636371612549, "learning_rate": 9.981823840434875e-06, "loss": 1.3289, "step": 1496 }, { "epoch": 0.3792273590880304, "grad_norm": 3.8772614002227783, "learning_rate": 9.98175239469933e-06, "loss": 1.2172, "step": 1497 }, { "epoch": 0.37948068397720075, "grad_norm": 3.867401599884033, "learning_rate": 9.981680809078516e-06, "loss": 1.2663, "step": 1498 }, { "epoch": 0.3797340088663711, "grad_norm": 4.0219831466674805, "learning_rate": 9.98160908357444e-06, "loss": 1.3242, "step": 1499 }, { "epoch": 0.3799873337555415, "grad_norm": 4.100152969360352, "learning_rate": 9.981537218189113e-06, "loss": 1.3128, "step": 1500 }, { "epoch": 0.3799873337555415, "eval_loss": 1.2583286762237549, "eval_runtime": 12.7009, "eval_samples_per_second": 31.494, "eval_steps_per_second": 3.937, "step": 1500 }, { "epoch": 0.38024065864471185, "grad_norm": 3.9439661502838135, "learning_rate": 9.981465212924557e-06, "loss": 1.1865, "step": 1501 }, { "epoch": 0.3804939835338822, "grad_norm": 3.7056007385253906, "learning_rate": 9.981393067782793e-06, "loss": 1.2072, "step": 1502 }, { "epoch": 0.38074730842305254, "grad_norm": 4.4519171714782715, "learning_rate": 9.981320782765847e-06, "loss": 1.3596, "step": 1503 }, { "epoch": 0.38100063331222295, "grad_norm": 3.824197769165039, "learning_rate": 9.981248357875745e-06, "loss": 1.24, "step": 1504 }, { "epoch": 0.3812539582013933, "grad_norm": 4.500977039337158, "learning_rate": 9.981175793114526e-06, "loss": 1.2211, "step": 1505 }, { "epoch": 0.38150728309056364, "grad_norm": 3.888195514678955, "learning_rate": 9.981103088484226e-06, "loss": 1.1836, "step": 1506 }, { "epoch": 0.381760607979734, "grad_norm": 4.110970497131348, "learning_rate": 9.981030243986885e-06, "loss": 1.1025, "step": 1507 }, { "epoch": 0.3820139328689044, "grad_norm": 4.05878210067749, "learning_rate": 9.980957259624549e-06, "loss": 1.2961, "step": 1508 }, { "epoch": 0.38226725775807474, "grad_norm": 4.440320014953613, "learning_rate": 9.980884135399268e-06, "loss": 1.2095, "step": 1509 }, { "epoch": 0.3825205826472451, "grad_norm": 3.711378574371338, "learning_rate": 9.980810871313094e-06, "loss": 1.1757, "step": 1510 }, { "epoch": 0.38277390753641544, "grad_norm": 4.575596809387207, "learning_rate": 9.980737467368086e-06, "loss": 1.3483, "step": 1511 }, { "epoch": 0.38302723242558584, "grad_norm": 3.8007843494415283, "learning_rate": 9.980663923566306e-06, "loss": 1.19, "step": 1512 }, { "epoch": 0.3832805573147562, "grad_norm": 4.5330119132995605, "learning_rate": 9.980590239909814e-06, "loss": 1.1921, "step": 1513 }, { "epoch": 0.38353388220392653, "grad_norm": 3.7246146202087402, "learning_rate": 9.980516416400683e-06, "loss": 1.1175, "step": 1514 }, { "epoch": 0.3837872070930969, "grad_norm": 3.7267162799835205, "learning_rate": 9.980442453040986e-06, "loss": 1.1074, "step": 1515 }, { "epoch": 0.38404053198226723, "grad_norm": 3.743905782699585, "learning_rate": 9.980368349832799e-06, "loss": 1.2421, "step": 1516 }, { "epoch": 0.38429385687143763, "grad_norm": 4.078317642211914, "learning_rate": 9.980294106778203e-06, "loss": 1.271, "step": 1517 }, { "epoch": 0.384547181760608, "grad_norm": 4.014880180358887, "learning_rate": 9.980219723879283e-06, "loss": 1.258, "step": 1518 }, { "epoch": 0.38480050664977833, "grad_norm": 4.1032538414001465, "learning_rate": 9.980145201138127e-06, "loss": 1.3407, "step": 1519 }, { "epoch": 0.3850538315389487, "grad_norm": 3.856544017791748, "learning_rate": 9.980070538556828e-06, "loss": 1.1389, "step": 1520 }, { "epoch": 0.3853071564281191, "grad_norm": 4.109692573547363, "learning_rate": 9.979995736137482e-06, "loss": 1.3166, "step": 1521 }, { "epoch": 0.3855604813172894, "grad_norm": 4.3691840171813965, "learning_rate": 9.979920793882191e-06, "loss": 1.3517, "step": 1522 }, { "epoch": 0.3858138062064598, "grad_norm": 4.090217113494873, "learning_rate": 9.979845711793057e-06, "loss": 1.3177, "step": 1523 }, { "epoch": 0.3860671310956301, "grad_norm": 4.154439449310303, "learning_rate": 9.97977048987219e-06, "loss": 1.3189, "step": 1524 }, { "epoch": 0.3863204559848005, "grad_norm": 3.9512176513671875, "learning_rate": 9.9796951281217e-06, "loss": 1.2516, "step": 1525 }, { "epoch": 0.3865737808739709, "grad_norm": 4.008617401123047, "learning_rate": 9.979619626543705e-06, "loss": 1.2444, "step": 1526 }, { "epoch": 0.3868271057631412, "grad_norm": 4.366746425628662, "learning_rate": 9.979543985140325e-06, "loss": 1.2738, "step": 1527 }, { "epoch": 0.38708043065231157, "grad_norm": 4.0556745529174805, "learning_rate": 9.979468203913684e-06, "loss": 1.2275, "step": 1528 }, { "epoch": 0.38733375554148197, "grad_norm": 3.4091527462005615, "learning_rate": 9.97939228286591e-06, "loss": 1.1072, "step": 1529 }, { "epoch": 0.3875870804306523, "grad_norm": 3.441343069076538, "learning_rate": 9.979316221999133e-06, "loss": 1.1414, "step": 1530 }, { "epoch": 0.38784040531982267, "grad_norm": 4.0893659591674805, "learning_rate": 9.979240021315493e-06, "loss": 1.2421, "step": 1531 }, { "epoch": 0.388093730208993, "grad_norm": 3.9928534030914307, "learning_rate": 9.979163680817124e-06, "loss": 1.2132, "step": 1532 }, { "epoch": 0.3883470550981634, "grad_norm": 3.8956265449523926, "learning_rate": 9.979087200506175e-06, "loss": 1.14, "step": 1533 }, { "epoch": 0.38860037998733377, "grad_norm": 3.679107666015625, "learning_rate": 9.97901058038479e-06, "loss": 1.1185, "step": 1534 }, { "epoch": 0.3888537048765041, "grad_norm": 3.8407785892486572, "learning_rate": 9.978933820455119e-06, "loss": 1.1715, "step": 1535 }, { "epoch": 0.38910702976567446, "grad_norm": 3.9353787899017334, "learning_rate": 9.978856920719321e-06, "loss": 1.1604, "step": 1536 }, { "epoch": 0.38936035465484486, "grad_norm": 3.892644166946411, "learning_rate": 9.978779881179557e-06, "loss": 1.3091, "step": 1537 }, { "epoch": 0.3896136795440152, "grad_norm": 3.780111789703369, "learning_rate": 9.978702701837985e-06, "loss": 1.2745, "step": 1538 }, { "epoch": 0.38986700443318556, "grad_norm": 4.206931114196777, "learning_rate": 9.978625382696773e-06, "loss": 1.3056, "step": 1539 }, { "epoch": 0.3901203293223559, "grad_norm": 4.062953948974609, "learning_rate": 9.978547923758098e-06, "loss": 1.2184, "step": 1540 }, { "epoch": 0.39037365421152626, "grad_norm": 4.187219142913818, "learning_rate": 9.978470325024127e-06, "loss": 1.3464, "step": 1541 }, { "epoch": 0.39062697910069666, "grad_norm": 4.182732105255127, "learning_rate": 9.978392586497043e-06, "loss": 1.2979, "step": 1542 }, { "epoch": 0.390880303989867, "grad_norm": 4.154636383056641, "learning_rate": 9.97831470817903e-06, "loss": 1.2577, "step": 1543 }, { "epoch": 0.39113362887903735, "grad_norm": 3.8434035778045654, "learning_rate": 9.978236690072271e-06, "loss": 1.2204, "step": 1544 }, { "epoch": 0.3913869537682077, "grad_norm": 4.057715892791748, "learning_rate": 9.978158532178961e-06, "loss": 1.2932, "step": 1545 }, { "epoch": 0.3916402786573781, "grad_norm": 4.071051120758057, "learning_rate": 9.978080234501292e-06, "loss": 1.2954, "step": 1546 }, { "epoch": 0.39189360354654845, "grad_norm": 3.698793649673462, "learning_rate": 9.978001797041464e-06, "loss": 1.1183, "step": 1547 }, { "epoch": 0.3921469284357188, "grad_norm": 3.4650938510894775, "learning_rate": 9.977923219801678e-06, "loss": 1.2295, "step": 1548 }, { "epoch": 0.39240025332488915, "grad_norm": 4.090244770050049, "learning_rate": 9.97784450278414e-06, "loss": 1.4205, "step": 1549 }, { "epoch": 0.39265357821405955, "grad_norm": 4.126779079437256, "learning_rate": 9.977765645991062e-06, "loss": 1.3266, "step": 1550 }, { "epoch": 0.3929069031032299, "grad_norm": 3.527606964111328, "learning_rate": 9.977686649424658e-06, "loss": 1.0561, "step": 1551 }, { "epoch": 0.39316022799240025, "grad_norm": 3.956460475921631, "learning_rate": 9.977607513087145e-06, "loss": 1.1791, "step": 1552 }, { "epoch": 0.3934135528815706, "grad_norm": 3.9396400451660156, "learning_rate": 9.977528236980746e-06, "loss": 1.2844, "step": 1553 }, { "epoch": 0.393666877770741, "grad_norm": 4.004495620727539, "learning_rate": 9.977448821107686e-06, "loss": 1.3033, "step": 1554 }, { "epoch": 0.39392020265991134, "grad_norm": 3.8449161052703857, "learning_rate": 9.977369265470197e-06, "loss": 1.117, "step": 1555 }, { "epoch": 0.3941735275490817, "grad_norm": 4.153273105621338, "learning_rate": 9.977289570070514e-06, "loss": 1.199, "step": 1556 }, { "epoch": 0.39442685243825204, "grad_norm": 3.6627635955810547, "learning_rate": 9.97720973491087e-06, "loss": 1.2055, "step": 1557 }, { "epoch": 0.39468017732742244, "grad_norm": 4.08154821395874, "learning_rate": 9.977129759993511e-06, "loss": 1.2293, "step": 1558 }, { "epoch": 0.3949335022165928, "grad_norm": 3.6264965534210205, "learning_rate": 9.97704964532068e-06, "loss": 1.1469, "step": 1559 }, { "epoch": 0.39518682710576314, "grad_norm": 4.057589054107666, "learning_rate": 9.976969390894626e-06, "loss": 1.2882, "step": 1560 }, { "epoch": 0.3954401519949335, "grad_norm": 4.077418327331543, "learning_rate": 9.976888996717606e-06, "loss": 1.1201, "step": 1561 }, { "epoch": 0.3956934768841039, "grad_norm": 4.358857154846191, "learning_rate": 9.976808462791876e-06, "loss": 1.3677, "step": 1562 }, { "epoch": 0.39594680177327424, "grad_norm": 3.968888759613037, "learning_rate": 9.976727789119696e-06, "loss": 1.2792, "step": 1563 }, { "epoch": 0.3962001266624446, "grad_norm": 4.224818706512451, "learning_rate": 9.976646975703333e-06, "loss": 1.2444, "step": 1564 }, { "epoch": 0.39645345155161493, "grad_norm": 4.409234046936035, "learning_rate": 9.976566022545053e-06, "loss": 1.3107, "step": 1565 }, { "epoch": 0.3967067764407853, "grad_norm": 3.9601402282714844, "learning_rate": 9.976484929647135e-06, "loss": 1.4012, "step": 1566 }, { "epoch": 0.3969601013299557, "grad_norm": 3.763319253921509, "learning_rate": 9.97640369701185e-06, "loss": 1.156, "step": 1567 }, { "epoch": 0.39721342621912603, "grad_norm": 3.6474928855895996, "learning_rate": 9.976322324641482e-06, "loss": 1.1047, "step": 1568 }, { "epoch": 0.3974667511082964, "grad_norm": 3.506779432296753, "learning_rate": 9.976240812538315e-06, "loss": 1.2464, "step": 1569 }, { "epoch": 0.3977200759974667, "grad_norm": 3.992598533630371, "learning_rate": 9.97615916070464e-06, "loss": 1.2044, "step": 1570 }, { "epoch": 0.39797340088663713, "grad_norm": 4.019123077392578, "learning_rate": 9.976077369142747e-06, "loss": 1.3095, "step": 1571 }, { "epoch": 0.3982267257758075, "grad_norm": 4.286922931671143, "learning_rate": 9.975995437854933e-06, "loss": 1.2642, "step": 1572 }, { "epoch": 0.3984800506649778, "grad_norm": 4.605002403259277, "learning_rate": 9.9759133668435e-06, "loss": 1.3369, "step": 1573 }, { "epoch": 0.3987333755541482, "grad_norm": 3.912172555923462, "learning_rate": 9.97583115611075e-06, "loss": 1.147, "step": 1574 }, { "epoch": 0.3989867004433186, "grad_norm": 4.260953903198242, "learning_rate": 9.975748805658996e-06, "loss": 1.1082, "step": 1575 }, { "epoch": 0.3992400253324889, "grad_norm": 3.7952542304992676, "learning_rate": 9.975666315490547e-06, "loss": 1.084, "step": 1576 }, { "epoch": 0.39949335022165927, "grad_norm": 3.7086267471313477, "learning_rate": 9.975583685607717e-06, "loss": 1.1192, "step": 1577 }, { "epoch": 0.3997466751108296, "grad_norm": 3.9403204917907715, "learning_rate": 9.975500916012832e-06, "loss": 1.1835, "step": 1578 }, { "epoch": 0.4, "grad_norm": 4.280765533447266, "learning_rate": 9.975418006708213e-06, "loss": 1.3106, "step": 1579 }, { "epoch": 0.40025332488917037, "grad_norm": 3.7051100730895996, "learning_rate": 9.975334957696186e-06, "loss": 1.1271, "step": 1580 }, { "epoch": 0.4005066497783407, "grad_norm": 4.107967376708984, "learning_rate": 9.975251768979088e-06, "loss": 1.2167, "step": 1581 }, { "epoch": 0.40075997466751107, "grad_norm": 3.9122486114501953, "learning_rate": 9.97516844055925e-06, "loss": 1.2156, "step": 1582 }, { "epoch": 0.40101329955668147, "grad_norm": 4.168511867523193, "learning_rate": 9.975084972439016e-06, "loss": 1.1672, "step": 1583 }, { "epoch": 0.4012666244458518, "grad_norm": 4.123467922210693, "learning_rate": 9.975001364620727e-06, "loss": 1.3482, "step": 1584 }, { "epoch": 0.40151994933502216, "grad_norm": 3.775412082672119, "learning_rate": 9.97491761710673e-06, "loss": 1.1986, "step": 1585 }, { "epoch": 0.4017732742241925, "grad_norm": 3.749708414077759, "learning_rate": 9.974833729899378e-06, "loss": 1.1553, "step": 1586 }, { "epoch": 0.4020265991133629, "grad_norm": 4.15584659576416, "learning_rate": 9.974749703001027e-06, "loss": 1.268, "step": 1587 }, { "epoch": 0.40227992400253326, "grad_norm": 3.7087783813476562, "learning_rate": 9.974665536414036e-06, "loss": 1.1035, "step": 1588 }, { "epoch": 0.4025332488917036, "grad_norm": 4.503772258758545, "learning_rate": 9.97458123014077e-06, "loss": 1.4037, "step": 1589 }, { "epoch": 0.40278657378087396, "grad_norm": 3.9420435428619385, "learning_rate": 9.974496784183592e-06, "loss": 1.2059, "step": 1590 }, { "epoch": 0.4030398986700443, "grad_norm": 4.350255966186523, "learning_rate": 9.974412198544877e-06, "loss": 1.329, "step": 1591 }, { "epoch": 0.4032932235592147, "grad_norm": 4.118219375610352, "learning_rate": 9.974327473226998e-06, "loss": 1.1194, "step": 1592 }, { "epoch": 0.40354654844838506, "grad_norm": 3.929291248321533, "learning_rate": 9.974242608232337e-06, "loss": 1.2417, "step": 1593 }, { "epoch": 0.4037998733375554, "grad_norm": 3.713636875152588, "learning_rate": 9.974157603563273e-06, "loss": 1.2178, "step": 1594 }, { "epoch": 0.40405319822672575, "grad_norm": 4.204817295074463, "learning_rate": 9.974072459222195e-06, "loss": 1.145, "step": 1595 }, { "epoch": 0.40430652311589615, "grad_norm": 4.219133377075195, "learning_rate": 9.973987175211492e-06, "loss": 1.2754, "step": 1596 }, { "epoch": 0.4045598480050665, "grad_norm": 3.8872694969177246, "learning_rate": 9.973901751533563e-06, "loss": 1.1998, "step": 1597 }, { "epoch": 0.40481317289423685, "grad_norm": 4.318929672241211, "learning_rate": 9.973816188190803e-06, "loss": 1.3327, "step": 1598 }, { "epoch": 0.4050664977834072, "grad_norm": 3.7346320152282715, "learning_rate": 9.973730485185615e-06, "loss": 1.2627, "step": 1599 }, { "epoch": 0.4053198226725776, "grad_norm": 4.234984874725342, "learning_rate": 9.973644642520407e-06, "loss": 1.385, "step": 1600 }, { "epoch": 0.40557314756174795, "grad_norm": 3.7861108779907227, "learning_rate": 9.973558660197588e-06, "loss": 1.226, "step": 1601 }, { "epoch": 0.4058264724509183, "grad_norm": 4.297764778137207, "learning_rate": 9.973472538219573e-06, "loss": 1.3382, "step": 1602 }, { "epoch": 0.40607979734008864, "grad_norm": 3.9423766136169434, "learning_rate": 9.97338627658878e-06, "loss": 1.2161, "step": 1603 }, { "epoch": 0.40633312222925905, "grad_norm": 3.855275869369507, "learning_rate": 9.973299875307631e-06, "loss": 1.3608, "step": 1604 }, { "epoch": 0.4065864471184294, "grad_norm": 3.6971325874328613, "learning_rate": 9.973213334378553e-06, "loss": 1.1117, "step": 1605 }, { "epoch": 0.40683977200759974, "grad_norm": 4.086726188659668, "learning_rate": 9.973126653803975e-06, "loss": 1.3681, "step": 1606 }, { "epoch": 0.4070930968967701, "grad_norm": 3.759913682937622, "learning_rate": 9.97303983358633e-06, "loss": 1.1092, "step": 1607 }, { "epoch": 0.4073464217859405, "grad_norm": 3.9194438457489014, "learning_rate": 9.972952873728061e-06, "loss": 1.2265, "step": 1608 }, { "epoch": 0.40759974667511084, "grad_norm": 4.006295204162598, "learning_rate": 9.972865774231602e-06, "loss": 1.2321, "step": 1609 }, { "epoch": 0.4078530715642812, "grad_norm": 3.828908920288086, "learning_rate": 9.972778535099405e-06, "loss": 1.1447, "step": 1610 }, { "epoch": 0.40810639645345154, "grad_norm": 4.304439544677734, "learning_rate": 9.972691156333917e-06, "loss": 1.2447, "step": 1611 }, { "epoch": 0.40835972134262194, "grad_norm": 4.169918537139893, "learning_rate": 9.97260363793759e-06, "loss": 1.2263, "step": 1612 }, { "epoch": 0.4086130462317923, "grad_norm": 3.9893386363983154, "learning_rate": 9.972515979912887e-06, "loss": 1.2821, "step": 1613 }, { "epoch": 0.40886637112096264, "grad_norm": 3.9322779178619385, "learning_rate": 9.972428182262264e-06, "loss": 1.1436, "step": 1614 }, { "epoch": 0.409119696010133, "grad_norm": 3.978416919708252, "learning_rate": 9.972340244988187e-06, "loss": 1.1924, "step": 1615 }, { "epoch": 0.40937302089930333, "grad_norm": 4.024904727935791, "learning_rate": 9.972252168093127e-06, "loss": 1.3191, "step": 1616 }, { "epoch": 0.40962634578847373, "grad_norm": 3.776224374771118, "learning_rate": 9.972163951579557e-06, "loss": 1.2135, "step": 1617 }, { "epoch": 0.4098796706776441, "grad_norm": 3.8125698566436768, "learning_rate": 9.972075595449953e-06, "loss": 1.1311, "step": 1618 }, { "epoch": 0.41013299556681443, "grad_norm": 3.916381597518921, "learning_rate": 9.971987099706798e-06, "loss": 1.1718, "step": 1619 }, { "epoch": 0.4103863204559848, "grad_norm": 3.6483993530273438, "learning_rate": 9.971898464352574e-06, "loss": 1.2224, "step": 1620 }, { "epoch": 0.4106396453451552, "grad_norm": 4.058041095733643, "learning_rate": 9.971809689389771e-06, "loss": 1.3286, "step": 1621 }, { "epoch": 0.4108929702343255, "grad_norm": 4.105546474456787, "learning_rate": 9.971720774820884e-06, "loss": 1.2571, "step": 1622 }, { "epoch": 0.4111462951234959, "grad_norm": 3.9141669273376465, "learning_rate": 9.971631720648406e-06, "loss": 1.3122, "step": 1623 }, { "epoch": 0.4113996200126662, "grad_norm": 4.1042938232421875, "learning_rate": 9.97154252687484e-06, "loss": 1.2634, "step": 1624 }, { "epoch": 0.4116529449018366, "grad_norm": 3.5131354331970215, "learning_rate": 9.971453193502689e-06, "loss": 1.0811, "step": 1625 }, { "epoch": 0.411906269791007, "grad_norm": 4.2376627922058105, "learning_rate": 9.971363720534463e-06, "loss": 1.2599, "step": 1626 }, { "epoch": 0.4121595946801773, "grad_norm": 4.638526916503906, "learning_rate": 9.971274107972675e-06, "loss": 1.3102, "step": 1627 }, { "epoch": 0.41241291956934767, "grad_norm": 3.7138400077819824, "learning_rate": 9.971184355819839e-06, "loss": 1.2356, "step": 1628 }, { "epoch": 0.41266624445851807, "grad_norm": 4.112374305725098, "learning_rate": 9.971094464078476e-06, "loss": 1.2447, "step": 1629 }, { "epoch": 0.4129195693476884, "grad_norm": 3.9615652561187744, "learning_rate": 9.97100443275111e-06, "loss": 1.1926, "step": 1630 }, { "epoch": 0.41317289423685877, "grad_norm": 4.023500442504883, "learning_rate": 9.97091426184027e-06, "loss": 1.3549, "step": 1631 }, { "epoch": 0.4134262191260291, "grad_norm": 4.214376926422119, "learning_rate": 9.970823951348488e-06, "loss": 1.311, "step": 1632 }, { "epoch": 0.4136795440151995, "grad_norm": 3.8443446159362793, "learning_rate": 9.970733501278297e-06, "loss": 1.3935, "step": 1633 }, { "epoch": 0.41393286890436987, "grad_norm": 3.7742702960968018, "learning_rate": 9.970642911632241e-06, "loss": 1.1621, "step": 1634 }, { "epoch": 0.4141861937935402, "grad_norm": 4.038102149963379, "learning_rate": 9.970552182412861e-06, "loss": 1.3304, "step": 1635 }, { "epoch": 0.41443951868271056, "grad_norm": 3.636784791946411, "learning_rate": 9.970461313622704e-06, "loss": 1.1665, "step": 1636 }, { "epoch": 0.41469284357188096, "grad_norm": 3.843376398086548, "learning_rate": 9.970370305264325e-06, "loss": 1.2383, "step": 1637 }, { "epoch": 0.4149461684610513, "grad_norm": 37.40401077270508, "learning_rate": 9.970279157340274e-06, "loss": 1.4453, "step": 1638 }, { "epoch": 0.41519949335022166, "grad_norm": 3.8575456142425537, "learning_rate": 9.970187869853117e-06, "loss": 1.2007, "step": 1639 }, { "epoch": 0.415452818239392, "grad_norm": 3.9534714221954346, "learning_rate": 9.970096442805413e-06, "loss": 1.3262, "step": 1640 }, { "epoch": 0.41570614312856236, "grad_norm": 3.9382474422454834, "learning_rate": 9.970004876199731e-06, "loss": 1.2947, "step": 1641 }, { "epoch": 0.41595946801773276, "grad_norm": 4.135881423950195, "learning_rate": 9.96991317003864e-06, "loss": 1.2837, "step": 1642 }, { "epoch": 0.4162127929069031, "grad_norm": 3.807326078414917, "learning_rate": 9.969821324324717e-06, "loss": 1.1733, "step": 1643 }, { "epoch": 0.41646611779607345, "grad_norm": 4.549855709075928, "learning_rate": 9.969729339060541e-06, "loss": 1.4417, "step": 1644 }, { "epoch": 0.4167194426852438, "grad_norm": 4.065769672393799, "learning_rate": 9.969637214248693e-06, "loss": 1.262, "step": 1645 }, { "epoch": 0.4169727675744142, "grad_norm": 3.8566086292266846, "learning_rate": 9.969544949891763e-06, "loss": 1.2035, "step": 1646 }, { "epoch": 0.41722609246358455, "grad_norm": 4.1258697509765625, "learning_rate": 9.969452545992339e-06, "loss": 1.2796, "step": 1647 }, { "epoch": 0.4174794173527549, "grad_norm": 3.7903356552124023, "learning_rate": 9.969360002553017e-06, "loss": 1.1092, "step": 1648 }, { "epoch": 0.41773274224192525, "grad_norm": 3.8634912967681885, "learning_rate": 9.969267319576394e-06, "loss": 1.3094, "step": 1649 }, { "epoch": 0.41798606713109565, "grad_norm": 3.896378755569458, "learning_rate": 9.969174497065074e-06, "loss": 1.2376, "step": 1650 }, { "epoch": 0.418239392020266, "grad_norm": 3.868745803833008, "learning_rate": 9.969081535021665e-06, "loss": 1.2363, "step": 1651 }, { "epoch": 0.41849271690943635, "grad_norm": 3.708183526992798, "learning_rate": 9.968988433448775e-06, "loss": 1.2199, "step": 1652 }, { "epoch": 0.4187460417986067, "grad_norm": 4.169568061828613, "learning_rate": 9.968895192349016e-06, "loss": 1.263, "step": 1653 }, { "epoch": 0.4189993666877771, "grad_norm": 3.771214008331299, "learning_rate": 9.96880181172501e-06, "loss": 1.2186, "step": 1654 }, { "epoch": 0.41925269157694744, "grad_norm": 3.9816854000091553, "learning_rate": 9.96870829157938e-06, "loss": 1.2561, "step": 1655 }, { "epoch": 0.4195060164661178, "grad_norm": 4.4165778160095215, "learning_rate": 9.968614631914746e-06, "loss": 1.2839, "step": 1656 }, { "epoch": 0.41975934135528814, "grad_norm": 3.9381518363952637, "learning_rate": 9.968520832733745e-06, "loss": 1.2773, "step": 1657 }, { "epoch": 0.42001266624445854, "grad_norm": 4.232367038726807, "learning_rate": 9.968426894039006e-06, "loss": 1.3495, "step": 1658 }, { "epoch": 0.4202659911336289, "grad_norm": 3.5772526264190674, "learning_rate": 9.96833281583317e-06, "loss": 1.0775, "step": 1659 }, { "epoch": 0.42051931602279924, "grad_norm": 3.7976326942443848, "learning_rate": 9.968238598118876e-06, "loss": 1.2518, "step": 1660 }, { "epoch": 0.4207726409119696, "grad_norm": 4.092529773712158, "learning_rate": 9.96814424089877e-06, "loss": 1.2172, "step": 1661 }, { "epoch": 0.42102596580114, "grad_norm": 4.227485656738281, "learning_rate": 9.968049744175503e-06, "loss": 1.2755, "step": 1662 }, { "epoch": 0.42127929069031034, "grad_norm": 3.9120378494262695, "learning_rate": 9.967955107951727e-06, "loss": 1.2017, "step": 1663 }, { "epoch": 0.4215326155794807, "grad_norm": 4.034577369689941, "learning_rate": 9.967860332230102e-06, "loss": 1.2581, "step": 1664 }, { "epoch": 0.42178594046865103, "grad_norm": 4.344268321990967, "learning_rate": 9.967765417013284e-06, "loss": 1.1905, "step": 1665 }, { "epoch": 0.4220392653578214, "grad_norm": 4.0689544677734375, "learning_rate": 9.967670362303944e-06, "loss": 1.1421, "step": 1666 }, { "epoch": 0.4222925902469918, "grad_norm": 4.221645355224609, "learning_rate": 9.967575168104746e-06, "loss": 1.1829, "step": 1667 }, { "epoch": 0.42254591513616213, "grad_norm": 3.80424165725708, "learning_rate": 9.967479834418368e-06, "loss": 1.3086, "step": 1668 }, { "epoch": 0.4227992400253325, "grad_norm": 4.1412200927734375, "learning_rate": 9.967384361247484e-06, "loss": 1.1899, "step": 1669 }, { "epoch": 0.4230525649145028, "grad_norm": 4.207159996032715, "learning_rate": 9.967288748594775e-06, "loss": 1.219, "step": 1670 }, { "epoch": 0.42330588980367323, "grad_norm": 4.115466594696045, "learning_rate": 9.967192996462925e-06, "loss": 1.1965, "step": 1671 }, { "epoch": 0.4235592146928436, "grad_norm": 3.930893659591675, "learning_rate": 9.967097104854624e-06, "loss": 1.2194, "step": 1672 }, { "epoch": 0.4238125395820139, "grad_norm": 3.5521059036254883, "learning_rate": 9.967001073772564e-06, "loss": 1.0621, "step": 1673 }, { "epoch": 0.4240658644711843, "grad_norm": 3.9072189331054688, "learning_rate": 9.966904903219443e-06, "loss": 1.1969, "step": 1674 }, { "epoch": 0.4243191893603547, "grad_norm": 3.701289176940918, "learning_rate": 9.966808593197959e-06, "loss": 1.1165, "step": 1675 }, { "epoch": 0.424572514249525, "grad_norm": 4.0461320877075195, "learning_rate": 9.966712143710819e-06, "loss": 1.3238, "step": 1676 }, { "epoch": 0.42482583913869537, "grad_norm": 4.037704944610596, "learning_rate": 9.966615554760729e-06, "loss": 1.3101, "step": 1677 }, { "epoch": 0.4250791640278657, "grad_norm": 4.149792671203613, "learning_rate": 9.966518826350401e-06, "loss": 1.2117, "step": 1678 }, { "epoch": 0.4253324889170361, "grad_norm": 3.9528732299804688, "learning_rate": 9.966421958482553e-06, "loss": 1.2514, "step": 1679 }, { "epoch": 0.42558581380620647, "grad_norm": 3.8577218055725098, "learning_rate": 9.966324951159904e-06, "loss": 1.1941, "step": 1680 }, { "epoch": 0.4258391386953768, "grad_norm": 3.78157114982605, "learning_rate": 9.966227804385177e-06, "loss": 1.1766, "step": 1681 }, { "epoch": 0.42609246358454717, "grad_norm": 3.9237875938415527, "learning_rate": 9.966130518161102e-06, "loss": 1.2366, "step": 1682 }, { "epoch": 0.42634578847371757, "grad_norm": 3.930771589279175, "learning_rate": 9.96603309249041e-06, "loss": 1.2634, "step": 1683 }, { "epoch": 0.4265991133628879, "grad_norm": 4.1034135818481445, "learning_rate": 9.965935527375835e-06, "loss": 1.3415, "step": 1684 }, { "epoch": 0.42685243825205826, "grad_norm": 4.213851451873779, "learning_rate": 9.965837822820117e-06, "loss": 1.3057, "step": 1685 }, { "epoch": 0.4271057631412286, "grad_norm": 4.129579544067383, "learning_rate": 9.965739978826004e-06, "loss": 1.1247, "step": 1686 }, { "epoch": 0.42735908803039896, "grad_norm": 4.075704574584961, "learning_rate": 9.965641995396235e-06, "loss": 1.1876, "step": 1687 }, { "epoch": 0.42761241291956936, "grad_norm": 4.015869617462158, "learning_rate": 9.96554387253357e-06, "loss": 1.2813, "step": 1688 }, { "epoch": 0.4278657378087397, "grad_norm": 3.5135021209716797, "learning_rate": 9.965445610240758e-06, "loss": 1.165, "step": 1689 }, { "epoch": 0.42811906269791006, "grad_norm": 3.59696102142334, "learning_rate": 9.965347208520561e-06, "loss": 1.2176, "step": 1690 }, { "epoch": 0.4283723875870804, "grad_norm": 3.8628885746002197, "learning_rate": 9.965248667375742e-06, "loss": 1.1508, "step": 1691 }, { "epoch": 0.4286257124762508, "grad_norm": 3.7970235347747803, "learning_rate": 9.965149986809067e-06, "loss": 1.2897, "step": 1692 }, { "epoch": 0.42887903736542116, "grad_norm": 4.112331867218018, "learning_rate": 9.965051166823308e-06, "loss": 1.4396, "step": 1693 }, { "epoch": 0.4291323622545915, "grad_norm": 3.932244062423706, "learning_rate": 9.964952207421239e-06, "loss": 1.2638, "step": 1694 }, { "epoch": 0.42938568714376185, "grad_norm": 3.9631271362304688, "learning_rate": 9.96485310860564e-06, "loss": 1.209, "step": 1695 }, { "epoch": 0.42963901203293225, "grad_norm": 4.168961048126221, "learning_rate": 9.96475387037929e-06, "loss": 1.3285, "step": 1696 }, { "epoch": 0.4298923369221026, "grad_norm": 3.889246940612793, "learning_rate": 9.96465449274498e-06, "loss": 1.2151, "step": 1697 }, { "epoch": 0.43014566181127295, "grad_norm": 4.106306552886963, "learning_rate": 9.964554975705499e-06, "loss": 1.3925, "step": 1698 }, { "epoch": 0.4303989867004433, "grad_norm": 3.897536516189575, "learning_rate": 9.96445531926364e-06, "loss": 1.3023, "step": 1699 }, { "epoch": 0.4306523115896137, "grad_norm": 3.791674852371216, "learning_rate": 9.964355523422201e-06, "loss": 1.1373, "step": 1700 }, { "epoch": 0.43090563647878405, "grad_norm": 3.9845049381256104, "learning_rate": 9.964255588183989e-06, "loss": 1.2183, "step": 1701 }, { "epoch": 0.4311589613679544, "grad_norm": 4.000594615936279, "learning_rate": 9.964155513551806e-06, "loss": 1.2421, "step": 1702 }, { "epoch": 0.43141228625712474, "grad_norm": 4.074901580810547, "learning_rate": 9.964055299528462e-06, "loss": 1.2713, "step": 1703 }, { "epoch": 0.43166561114629515, "grad_norm": 3.7564749717712402, "learning_rate": 9.96395494611677e-06, "loss": 1.0999, "step": 1704 }, { "epoch": 0.4319189360354655, "grad_norm": 3.847492218017578, "learning_rate": 9.963854453319552e-06, "loss": 1.1781, "step": 1705 }, { "epoch": 0.43217226092463584, "grad_norm": 4.278839111328125, "learning_rate": 9.963753821139625e-06, "loss": 1.2701, "step": 1706 }, { "epoch": 0.4324255858138062, "grad_norm": 3.66853666305542, "learning_rate": 9.96365304957982e-06, "loss": 1.0739, "step": 1707 }, { "epoch": 0.4326789107029766, "grad_norm": 3.588291883468628, "learning_rate": 9.963552138642962e-06, "loss": 1.1273, "step": 1708 }, { "epoch": 0.43293223559214694, "grad_norm": 3.9035909175872803, "learning_rate": 9.963451088331885e-06, "loss": 1.1392, "step": 1709 }, { "epoch": 0.4331855604813173, "grad_norm": 4.046663761138916, "learning_rate": 9.96334989864943e-06, "loss": 1.242, "step": 1710 }, { "epoch": 0.43343888537048764, "grad_norm": 3.7607357501983643, "learning_rate": 9.963248569598436e-06, "loss": 1.1249, "step": 1711 }, { "epoch": 0.433692210259658, "grad_norm": 3.775604724884033, "learning_rate": 9.963147101181748e-06, "loss": 1.131, "step": 1712 }, { "epoch": 0.4339455351488284, "grad_norm": 3.9623661041259766, "learning_rate": 9.963045493402215e-06, "loss": 1.1045, "step": 1713 }, { "epoch": 0.43419886003799874, "grad_norm": 4.357337951660156, "learning_rate": 9.962943746262691e-06, "loss": 1.3916, "step": 1714 }, { "epoch": 0.4344521849271691, "grad_norm": 3.967344045639038, "learning_rate": 9.962841859766032e-06, "loss": 1.1091, "step": 1715 }, { "epoch": 0.43470550981633943, "grad_norm": 3.9389865398406982, "learning_rate": 9.9627398339151e-06, "loss": 1.0805, "step": 1716 }, { "epoch": 0.43495883470550983, "grad_norm": 4.415093421936035, "learning_rate": 9.96263766871276e-06, "loss": 1.3108, "step": 1717 }, { "epoch": 0.4352121595946802, "grad_norm": 3.8508241176605225, "learning_rate": 9.962535364161879e-06, "loss": 1.1796, "step": 1718 }, { "epoch": 0.43546548448385053, "grad_norm": 3.9241507053375244, "learning_rate": 9.962432920265333e-06, "loss": 1.2547, "step": 1719 }, { "epoch": 0.4357188093730209, "grad_norm": 3.8462283611297607, "learning_rate": 9.962330337025993e-06, "loss": 1.1839, "step": 1720 }, { "epoch": 0.4359721342621913, "grad_norm": 3.643967628479004, "learning_rate": 9.962227614446744e-06, "loss": 1.217, "step": 1721 }, { "epoch": 0.4362254591513616, "grad_norm": 3.898104190826416, "learning_rate": 9.96212475253047e-06, "loss": 1.1689, "step": 1722 }, { "epoch": 0.436478784040532, "grad_norm": 3.9070205688476562, "learning_rate": 9.96202175128006e-06, "loss": 1.2024, "step": 1723 }, { "epoch": 0.4367321089297023, "grad_norm": 4.046167373657227, "learning_rate": 9.961918610698403e-06, "loss": 1.2221, "step": 1724 }, { "epoch": 0.4369854338188727, "grad_norm": 3.691685676574707, "learning_rate": 9.961815330788397e-06, "loss": 1.2499, "step": 1725 }, { "epoch": 0.4372387587080431, "grad_norm": 3.5208113193511963, "learning_rate": 9.961711911552943e-06, "loss": 1.1007, "step": 1726 }, { "epoch": 0.4374920835972134, "grad_norm": 3.906390905380249, "learning_rate": 9.961608352994943e-06, "loss": 1.2704, "step": 1727 }, { "epoch": 0.43774540848638377, "grad_norm": 3.5950465202331543, "learning_rate": 9.961504655117306e-06, "loss": 1.1376, "step": 1728 }, { "epoch": 0.4379987333755542, "grad_norm": 3.922039031982422, "learning_rate": 9.961400817922943e-06, "loss": 1.2138, "step": 1729 }, { "epoch": 0.4382520582647245, "grad_norm": 3.3800699710845947, "learning_rate": 9.961296841414772e-06, "loss": 1.1219, "step": 1730 }, { "epoch": 0.43850538315389487, "grad_norm": 4.212485313415527, "learning_rate": 9.96119272559571e-06, "loss": 1.1339, "step": 1731 }, { "epoch": 0.4387587080430652, "grad_norm": 3.8920695781707764, "learning_rate": 9.961088470468681e-06, "loss": 1.1936, "step": 1732 }, { "epoch": 0.4390120329322356, "grad_norm": 4.168819904327393, "learning_rate": 9.960984076036612e-06, "loss": 1.3314, "step": 1733 }, { "epoch": 0.43926535782140597, "grad_norm": 4.400965690612793, "learning_rate": 9.960879542302437e-06, "loss": 1.3004, "step": 1734 }, { "epoch": 0.4395186827105763, "grad_norm": 3.778353452682495, "learning_rate": 9.96077486926909e-06, "loss": 1.2599, "step": 1735 }, { "epoch": 0.43977200759974666, "grad_norm": 3.7821664810180664, "learning_rate": 9.960670056939507e-06, "loss": 1.1449, "step": 1736 }, { "epoch": 0.440025332488917, "grad_norm": 3.8294286727905273, "learning_rate": 9.960565105316636e-06, "loss": 1.1239, "step": 1737 }, { "epoch": 0.4402786573780874, "grad_norm": 3.890516996383667, "learning_rate": 9.960460014403422e-06, "loss": 1.2695, "step": 1738 }, { "epoch": 0.44053198226725776, "grad_norm": 3.5491371154785156, "learning_rate": 9.960354784202814e-06, "loss": 1.2282, "step": 1739 }, { "epoch": 0.4407853071564281, "grad_norm": 4.057201385498047, "learning_rate": 9.96024941471777e-06, "loss": 1.2313, "step": 1740 }, { "epoch": 0.44103863204559846, "grad_norm": 3.9677681922912598, "learning_rate": 9.960143905951247e-06, "loss": 1.1955, "step": 1741 }, { "epoch": 0.44129195693476886, "grad_norm": 4.036614418029785, "learning_rate": 9.960038257906206e-06, "loss": 1.3526, "step": 1742 }, { "epoch": 0.4415452818239392, "grad_norm": 4.330037593841553, "learning_rate": 9.959932470585619e-06, "loss": 1.2349, "step": 1743 }, { "epoch": 0.44179860671310955, "grad_norm": 3.7230772972106934, "learning_rate": 9.959826543992448e-06, "loss": 1.219, "step": 1744 }, { "epoch": 0.4420519316022799, "grad_norm": 3.892216682434082, "learning_rate": 9.959720478129677e-06, "loss": 1.2287, "step": 1745 }, { "epoch": 0.4423052564914503, "grad_norm": 3.636098861694336, "learning_rate": 9.959614273000276e-06, "loss": 1.2059, "step": 1746 }, { "epoch": 0.44255858138062065, "grad_norm": 3.4097251892089844, "learning_rate": 9.959507928607232e-06, "loss": 1.2674, "step": 1747 }, { "epoch": 0.442811906269791, "grad_norm": 3.809654951095581, "learning_rate": 9.95940144495353e-06, "loss": 1.2083, "step": 1748 }, { "epoch": 0.44306523115896135, "grad_norm": 3.955592393875122, "learning_rate": 9.95929482204216e-06, "loss": 1.2913, "step": 1749 }, { "epoch": 0.44331855604813175, "grad_norm": 3.4874720573425293, "learning_rate": 9.959188059876115e-06, "loss": 1.1605, "step": 1750 }, { "epoch": 0.4435718809373021, "grad_norm": 3.6861040592193604, "learning_rate": 9.959081158458393e-06, "loss": 1.196, "step": 1751 }, { "epoch": 0.44382520582647245, "grad_norm": 4.005224704742432, "learning_rate": 9.958974117791998e-06, "loss": 1.1422, "step": 1752 }, { "epoch": 0.4440785307156428, "grad_norm": 4.103886604309082, "learning_rate": 9.958866937879932e-06, "loss": 1.2124, "step": 1753 }, { "epoch": 0.4443318556048132, "grad_norm": 3.5270473957061768, "learning_rate": 9.958759618725208e-06, "loss": 1.1824, "step": 1754 }, { "epoch": 0.44458518049398354, "grad_norm": 4.049485206604004, "learning_rate": 9.958652160330837e-06, "loss": 1.22, "step": 1755 }, { "epoch": 0.4448385053831539, "grad_norm": 3.594125986099243, "learning_rate": 9.958544562699838e-06, "loss": 1.1801, "step": 1756 }, { "epoch": 0.44509183027232424, "grad_norm": 4.060774803161621, "learning_rate": 9.95843682583523e-06, "loss": 1.1721, "step": 1757 }, { "epoch": 0.44534515516149464, "grad_norm": 4.582459449768066, "learning_rate": 9.958328949740043e-06, "loss": 1.2746, "step": 1758 }, { "epoch": 0.445598480050665, "grad_norm": 3.7934377193450928, "learning_rate": 9.958220934417302e-06, "loss": 1.1259, "step": 1759 }, { "epoch": 0.44585180493983534, "grad_norm": 4.88258695602417, "learning_rate": 9.95811277987004e-06, "loss": 1.2888, "step": 1760 }, { "epoch": 0.4461051298290057, "grad_norm": 4.136811256408691, "learning_rate": 9.958004486101293e-06, "loss": 1.3195, "step": 1761 }, { "epoch": 0.44635845471817603, "grad_norm": 4.080023765563965, "learning_rate": 9.957896053114106e-06, "loss": 1.2883, "step": 1762 }, { "epoch": 0.44661177960734644, "grad_norm": 3.545778274536133, "learning_rate": 9.957787480911522e-06, "loss": 1.113, "step": 1763 }, { "epoch": 0.4468651044965168, "grad_norm": 4.2189178466796875, "learning_rate": 9.957678769496587e-06, "loss": 1.286, "step": 1764 }, { "epoch": 0.44711842938568713, "grad_norm": 3.997009754180908, "learning_rate": 9.957569918872359e-06, "loss": 1.2288, "step": 1765 }, { "epoch": 0.4473717542748575, "grad_norm": 4.109933853149414, "learning_rate": 9.95746092904189e-06, "loss": 1.4011, "step": 1766 }, { "epoch": 0.4476250791640279, "grad_norm": 3.9960086345672607, "learning_rate": 9.957351800008241e-06, "loss": 1.319, "step": 1767 }, { "epoch": 0.44787840405319823, "grad_norm": 4.276450157165527, "learning_rate": 9.957242531774476e-06, "loss": 1.1925, "step": 1768 }, { "epoch": 0.4481317289423686, "grad_norm": 4.669538497924805, "learning_rate": 9.957133124343666e-06, "loss": 1.1981, "step": 1769 }, { "epoch": 0.4483850538315389, "grad_norm": 4.093993663787842, "learning_rate": 9.957023577718879e-06, "loss": 1.2145, "step": 1770 }, { "epoch": 0.44863837872070933, "grad_norm": 3.737457036972046, "learning_rate": 9.956913891903195e-06, "loss": 1.293, "step": 1771 }, { "epoch": 0.4488917036098797, "grad_norm": 3.5621869564056396, "learning_rate": 9.95680406689969e-06, "loss": 1.0754, "step": 1772 }, { "epoch": 0.44914502849905, "grad_norm": 3.8836936950683594, "learning_rate": 9.956694102711452e-06, "loss": 1.2296, "step": 1773 }, { "epoch": 0.4493983533882204, "grad_norm": 4.1828742027282715, "learning_rate": 9.956583999341564e-06, "loss": 1.5065, "step": 1774 }, { "epoch": 0.4496516782773908, "grad_norm": 4.305746555328369, "learning_rate": 9.956473756793123e-06, "loss": 1.3093, "step": 1775 }, { "epoch": 0.4499050031665611, "grad_norm": 3.833472728729248, "learning_rate": 9.956363375069222e-06, "loss": 1.2496, "step": 1776 }, { "epoch": 0.45015832805573147, "grad_norm": 3.845844268798828, "learning_rate": 9.95625285417296e-06, "loss": 1.182, "step": 1777 }, { "epoch": 0.4504116529449018, "grad_norm": 3.8236515522003174, "learning_rate": 9.956142194107438e-06, "loss": 1.1313, "step": 1778 }, { "epoch": 0.4506649778340722, "grad_norm": 3.7965993881225586, "learning_rate": 9.95603139487577e-06, "loss": 1.1112, "step": 1779 }, { "epoch": 0.45091830272324257, "grad_norm": 4.976373195648193, "learning_rate": 9.95592045648106e-06, "loss": 1.1899, "step": 1780 }, { "epoch": 0.4511716276124129, "grad_norm": 3.762986183166504, "learning_rate": 9.955809378926428e-06, "loss": 1.1653, "step": 1781 }, { "epoch": 0.45142495250158327, "grad_norm": 3.95588755607605, "learning_rate": 9.955698162214992e-06, "loss": 1.2598, "step": 1782 }, { "epoch": 0.45167827739075367, "grad_norm": 3.8318119049072266, "learning_rate": 9.955586806349874e-06, "loss": 1.2612, "step": 1783 }, { "epoch": 0.451931602279924, "grad_norm": 3.6611649990081787, "learning_rate": 9.9554753113342e-06, "loss": 1.2329, "step": 1784 }, { "epoch": 0.45218492716909436, "grad_norm": 4.284769058227539, "learning_rate": 9.955363677171101e-06, "loss": 1.2697, "step": 1785 }, { "epoch": 0.4524382520582647, "grad_norm": 3.549837112426758, "learning_rate": 9.955251903863713e-06, "loss": 1.1174, "step": 1786 }, { "epoch": 0.45269157694743506, "grad_norm": 4.23306941986084, "learning_rate": 9.955139991415175e-06, "loss": 1.2886, "step": 1787 }, { "epoch": 0.45294490183660546, "grad_norm": 3.990553617477417, "learning_rate": 9.955027939828628e-06, "loss": 1.3615, "step": 1788 }, { "epoch": 0.4531982267257758, "grad_norm": 4.17873477935791, "learning_rate": 9.954915749107219e-06, "loss": 1.3555, "step": 1789 }, { "epoch": 0.45345155161494616, "grad_norm": 3.8110413551330566, "learning_rate": 9.954803419254097e-06, "loss": 1.2577, "step": 1790 }, { "epoch": 0.4537048765041165, "grad_norm": 3.6629533767700195, "learning_rate": 9.954690950272419e-06, "loss": 1.0698, "step": 1791 }, { "epoch": 0.4539582013932869, "grad_norm": 3.7844316959381104, "learning_rate": 9.95457834216534e-06, "loss": 1.2555, "step": 1792 }, { "epoch": 0.45421152628245726, "grad_norm": 3.962120771408081, "learning_rate": 9.954465594936024e-06, "loss": 1.2444, "step": 1793 }, { "epoch": 0.4544648511716276, "grad_norm": 3.9265973567962646, "learning_rate": 9.954352708587636e-06, "loss": 1.283, "step": 1794 }, { "epoch": 0.45471817606079795, "grad_norm": 4.013422966003418, "learning_rate": 9.954239683123344e-06, "loss": 1.2045, "step": 1795 }, { "epoch": 0.45497150094996835, "grad_norm": 4.067855358123779, "learning_rate": 9.954126518546326e-06, "loss": 1.2415, "step": 1796 }, { "epoch": 0.4552248258391387, "grad_norm": 3.4011635780334473, "learning_rate": 9.954013214859757e-06, "loss": 1.0973, "step": 1797 }, { "epoch": 0.45547815072830905, "grad_norm": 3.786022186279297, "learning_rate": 9.953899772066817e-06, "loss": 1.132, "step": 1798 }, { "epoch": 0.4557314756174794, "grad_norm": 3.6727492809295654, "learning_rate": 9.953786190170694e-06, "loss": 1.1464, "step": 1799 }, { "epoch": 0.4559848005066498, "grad_norm": 3.9442310333251953, "learning_rate": 9.953672469174578e-06, "loss": 1.1045, "step": 1800 }, { "epoch": 0.45623812539582015, "grad_norm": 3.8506267070770264, "learning_rate": 9.953558609081659e-06, "loss": 1.2371, "step": 1801 }, { "epoch": 0.4564914502849905, "grad_norm": 3.5730085372924805, "learning_rate": 9.953444609895136e-06, "loss": 1.1059, "step": 1802 }, { "epoch": 0.45674477517416084, "grad_norm": 4.165727138519287, "learning_rate": 9.953330471618211e-06, "loss": 1.1077, "step": 1803 }, { "epoch": 0.45699810006333125, "grad_norm": 3.7444920539855957, "learning_rate": 9.953216194254088e-06, "loss": 1.1051, "step": 1804 }, { "epoch": 0.4572514249525016, "grad_norm": 4.292081832885742, "learning_rate": 9.953101777805973e-06, "loss": 1.3843, "step": 1805 }, { "epoch": 0.45750474984167194, "grad_norm": 3.955099582672119, "learning_rate": 9.952987222277084e-06, "loss": 1.2544, "step": 1806 }, { "epoch": 0.4577580747308423, "grad_norm": 3.6804616451263428, "learning_rate": 9.952872527670636e-06, "loss": 1.0769, "step": 1807 }, { "epoch": 0.4580113996200127, "grad_norm": 4.215207576751709, "learning_rate": 9.952757693989848e-06, "loss": 1.3601, "step": 1808 }, { "epoch": 0.45826472450918304, "grad_norm": 3.824826240539551, "learning_rate": 9.952642721237945e-06, "loss": 1.2812, "step": 1809 }, { "epoch": 0.4585180493983534, "grad_norm": 3.581301689147949, "learning_rate": 9.952527609418153e-06, "loss": 1.0627, "step": 1810 }, { "epoch": 0.45877137428752374, "grad_norm": 3.8855178356170654, "learning_rate": 9.95241235853371e-06, "loss": 1.3018, "step": 1811 }, { "epoch": 0.4590246991766941, "grad_norm": 3.7887930870056152, "learning_rate": 9.952296968587847e-06, "loss": 1.2669, "step": 1812 }, { "epoch": 0.4592780240658645, "grad_norm": 3.6120541095733643, "learning_rate": 9.952181439583808e-06, "loss": 1.0911, "step": 1813 }, { "epoch": 0.45953134895503484, "grad_norm": 5.2798357009887695, "learning_rate": 9.952065771524834e-06, "loss": 1.1291, "step": 1814 }, { "epoch": 0.4597846738442052, "grad_norm": 4.032440185546875, "learning_rate": 9.951949964414174e-06, "loss": 1.2002, "step": 1815 }, { "epoch": 0.46003799873337553, "grad_norm": 3.861185073852539, "learning_rate": 9.95183401825508e-06, "loss": 1.2061, "step": 1816 }, { "epoch": 0.46029132362254593, "grad_norm": 4.094289302825928, "learning_rate": 9.951717933050808e-06, "loss": 1.3375, "step": 1817 }, { "epoch": 0.4605446485117163, "grad_norm": 4.029137134552002, "learning_rate": 9.951601708804616e-06, "loss": 1.1493, "step": 1818 }, { "epoch": 0.46079797340088663, "grad_norm": 4.202754497528076, "learning_rate": 9.95148534551977e-06, "loss": 1.3218, "step": 1819 }, { "epoch": 0.461051298290057, "grad_norm": 4.536633014678955, "learning_rate": 9.951368843199537e-06, "loss": 1.3656, "step": 1820 }, { "epoch": 0.4613046231792274, "grad_norm": 3.5142319202423096, "learning_rate": 9.951252201847185e-06, "loss": 1.2584, "step": 1821 }, { "epoch": 0.4615579480683977, "grad_norm": 4.182380199432373, "learning_rate": 9.951135421465994e-06, "loss": 1.1919, "step": 1822 }, { "epoch": 0.4618112729575681, "grad_norm": 4.094515323638916, "learning_rate": 9.951018502059241e-06, "loss": 1.248, "step": 1823 }, { "epoch": 0.4620645978467384, "grad_norm": 3.897925615310669, "learning_rate": 9.950901443630207e-06, "loss": 1.1969, "step": 1824 }, { "epoch": 0.4623179227359088, "grad_norm": 3.899789571762085, "learning_rate": 9.950784246182182e-06, "loss": 1.3231, "step": 1825 }, { "epoch": 0.4625712476250792, "grad_norm": 3.80564284324646, "learning_rate": 9.950666909718455e-06, "loss": 1.2565, "step": 1826 }, { "epoch": 0.4628245725142495, "grad_norm": 3.5356082916259766, "learning_rate": 9.950549434242323e-06, "loss": 1.0738, "step": 1827 }, { "epoch": 0.46307789740341987, "grad_norm": 3.7041282653808594, "learning_rate": 9.950431819757082e-06, "loss": 1.2571, "step": 1828 }, { "epoch": 0.4633312222925903, "grad_norm": 3.9085562229156494, "learning_rate": 9.950314066266036e-06, "loss": 1.1865, "step": 1829 }, { "epoch": 0.4635845471817606, "grad_norm": 4.401854038238525, "learning_rate": 9.95019617377249e-06, "loss": 1.303, "step": 1830 }, { "epoch": 0.46383787207093097, "grad_norm": 3.7897353172302246, "learning_rate": 9.950078142279756e-06, "loss": 1.1402, "step": 1831 }, { "epoch": 0.4640911969601013, "grad_norm": 4.0070390701293945, "learning_rate": 9.949959971791148e-06, "loss": 1.4373, "step": 1832 }, { "epoch": 0.4643445218492717, "grad_norm": 3.7028911113739014, "learning_rate": 9.949841662309984e-06, "loss": 1.2008, "step": 1833 }, { "epoch": 0.46459784673844207, "grad_norm": 3.7358293533325195, "learning_rate": 9.949723213839587e-06, "loss": 1.1822, "step": 1834 }, { "epoch": 0.4648511716276124, "grad_norm": 3.5930306911468506, "learning_rate": 9.94960462638328e-06, "loss": 1.1352, "step": 1835 }, { "epoch": 0.46510449651678276, "grad_norm": 3.7119147777557373, "learning_rate": 9.949485899944396e-06, "loss": 1.1535, "step": 1836 }, { "epoch": 0.4653578214059531, "grad_norm": 3.891018867492676, "learning_rate": 9.949367034526267e-06, "loss": 1.2585, "step": 1837 }, { "epoch": 0.4656111462951235, "grad_norm": 3.875239133834839, "learning_rate": 9.94924803013223e-06, "loss": 1.2086, "step": 1838 }, { "epoch": 0.46586447118429386, "grad_norm": 3.798842668533325, "learning_rate": 9.94912888676563e-06, "loss": 1.222, "step": 1839 }, { "epoch": 0.4661177960734642, "grad_norm": 3.962372064590454, "learning_rate": 9.949009604429811e-06, "loss": 1.2586, "step": 1840 }, { "epoch": 0.46637112096263456, "grad_norm": 3.7630298137664795, "learning_rate": 9.948890183128122e-06, "loss": 1.0473, "step": 1841 }, { "epoch": 0.46662444585180496, "grad_norm": 3.6790220737457275, "learning_rate": 9.948770622863914e-06, "loss": 1.2097, "step": 1842 }, { "epoch": 0.4668777707409753, "grad_norm": 3.9057321548461914, "learning_rate": 9.948650923640547e-06, "loss": 1.2565, "step": 1843 }, { "epoch": 0.46713109563014565, "grad_norm": 4.201375961303711, "learning_rate": 9.948531085461382e-06, "loss": 1.1996, "step": 1844 }, { "epoch": 0.467384420519316, "grad_norm": 3.963596820831299, "learning_rate": 9.948411108329783e-06, "loss": 1.2559, "step": 1845 }, { "epoch": 0.4676377454084864, "grad_norm": 3.891873598098755, "learning_rate": 9.94829099224912e-06, "loss": 1.3843, "step": 1846 }, { "epoch": 0.46789107029765675, "grad_norm": 3.9661974906921387, "learning_rate": 9.948170737222763e-06, "loss": 1.3839, "step": 1847 }, { "epoch": 0.4681443951868271, "grad_norm": 3.680875062942505, "learning_rate": 9.948050343254092e-06, "loss": 1.2136, "step": 1848 }, { "epoch": 0.46839772007599745, "grad_norm": 3.872366189956665, "learning_rate": 9.947929810346486e-06, "loss": 1.228, "step": 1849 }, { "epoch": 0.46865104496516785, "grad_norm": 4.095637798309326, "learning_rate": 9.94780913850333e-06, "loss": 1.136, "step": 1850 }, { "epoch": 0.4689043698543382, "grad_norm": 3.4010581970214844, "learning_rate": 9.947688327728013e-06, "loss": 1.1325, "step": 1851 }, { "epoch": 0.46915769474350855, "grad_norm": 3.8270957469940186, "learning_rate": 9.947567378023927e-06, "loss": 1.1534, "step": 1852 }, { "epoch": 0.4694110196326789, "grad_norm": 4.096489429473877, "learning_rate": 9.947446289394466e-06, "loss": 1.2138, "step": 1853 }, { "epoch": 0.4696643445218493, "grad_norm": 3.8658835887908936, "learning_rate": 9.947325061843035e-06, "loss": 1.1772, "step": 1854 }, { "epoch": 0.46991766941101965, "grad_norm": 4.072702407836914, "learning_rate": 9.947203695373033e-06, "loss": 1.2552, "step": 1855 }, { "epoch": 0.47017099430019, "grad_norm": 3.8411388397216797, "learning_rate": 9.94708218998787e-06, "loss": 1.3319, "step": 1856 }, { "epoch": 0.47042431918936034, "grad_norm": 3.4740614891052246, "learning_rate": 9.946960545690958e-06, "loss": 1.0957, "step": 1857 }, { "epoch": 0.4706776440785307, "grad_norm": 3.8394386768341064, "learning_rate": 9.946838762485712e-06, "loss": 1.2165, "step": 1858 }, { "epoch": 0.4709309689677011, "grad_norm": 3.7392282485961914, "learning_rate": 9.946716840375552e-06, "loss": 1.245, "step": 1859 }, { "epoch": 0.47118429385687144, "grad_norm": 4.12973690032959, "learning_rate": 9.946594779363901e-06, "loss": 1.1534, "step": 1860 }, { "epoch": 0.4714376187460418, "grad_norm": 3.3439900875091553, "learning_rate": 9.946472579454188e-06, "loss": 1.0638, "step": 1861 }, { "epoch": 0.47169094363521213, "grad_norm": 3.2665603160858154, "learning_rate": 9.946350240649843e-06, "loss": 1.0374, "step": 1862 }, { "epoch": 0.47194426852438254, "grad_norm": 3.712949752807617, "learning_rate": 9.9462277629543e-06, "loss": 1.2055, "step": 1863 }, { "epoch": 0.4721975934135529, "grad_norm": 3.8193211555480957, "learning_rate": 9.946105146371003e-06, "loss": 1.1696, "step": 1864 }, { "epoch": 0.47245091830272323, "grad_norm": 4.11019229888916, "learning_rate": 9.94598239090339e-06, "loss": 1.2141, "step": 1865 }, { "epoch": 0.4727042431918936, "grad_norm": 4.131199359893799, "learning_rate": 9.945859496554909e-06, "loss": 1.2633, "step": 1866 }, { "epoch": 0.472957568081064, "grad_norm": 4.110605716705322, "learning_rate": 9.94573646332901e-06, "loss": 1.3537, "step": 1867 }, { "epoch": 0.47321089297023433, "grad_norm": 4.174281120300293, "learning_rate": 9.945613291229152e-06, "loss": 1.1177, "step": 1868 }, { "epoch": 0.4734642178594047, "grad_norm": 4.247077465057373, "learning_rate": 9.945489980258788e-06, "loss": 1.2448, "step": 1869 }, { "epoch": 0.473717542748575, "grad_norm": 3.9473955631256104, "learning_rate": 9.945366530421385e-06, "loss": 1.1886, "step": 1870 }, { "epoch": 0.47397086763774543, "grad_norm": 3.6181344985961914, "learning_rate": 9.945242941720408e-06, "loss": 1.1013, "step": 1871 }, { "epoch": 0.4742241925269158, "grad_norm": 3.9763457775115967, "learning_rate": 9.945119214159324e-06, "loss": 1.2935, "step": 1872 }, { "epoch": 0.4744775174160861, "grad_norm": 3.6653854846954346, "learning_rate": 9.944995347741613e-06, "loss": 1.0569, "step": 1873 }, { "epoch": 0.4747308423052565, "grad_norm": 3.494320869445801, "learning_rate": 9.94487134247075e-06, "loss": 1.0765, "step": 1874 }, { "epoch": 0.4749841671944269, "grad_norm": 3.7668800354003906, "learning_rate": 9.944747198350215e-06, "loss": 1.1446, "step": 1875 }, { "epoch": 0.4752374920835972, "grad_norm": 3.7044589519500732, "learning_rate": 9.9446229153835e-06, "loss": 1.1367, "step": 1876 }, { "epoch": 0.47549081697276757, "grad_norm": 3.969409227371216, "learning_rate": 9.944498493574088e-06, "loss": 1.2635, "step": 1877 }, { "epoch": 0.4757441418619379, "grad_norm": 3.864562511444092, "learning_rate": 9.944373932925475e-06, "loss": 1.318, "step": 1878 }, { "epoch": 0.4759974667511083, "grad_norm": 3.6207752227783203, "learning_rate": 9.944249233441162e-06, "loss": 1.2752, "step": 1879 }, { "epoch": 0.47625079164027867, "grad_norm": 3.924584150314331, "learning_rate": 9.944124395124645e-06, "loss": 1.2219, "step": 1880 }, { "epoch": 0.476504116529449, "grad_norm": 3.9239914417266846, "learning_rate": 9.943999417979435e-06, "loss": 1.2603, "step": 1881 }, { "epoch": 0.47675744141861937, "grad_norm": 3.840116500854492, "learning_rate": 9.943874302009037e-06, "loss": 1.1768, "step": 1882 }, { "epoch": 0.4770107663077897, "grad_norm": 3.9847187995910645, "learning_rate": 9.943749047216966e-06, "loss": 1.0493, "step": 1883 }, { "epoch": 0.4772640911969601, "grad_norm": 3.9286274909973145, "learning_rate": 9.943623653606738e-06, "loss": 1.3563, "step": 1884 }, { "epoch": 0.47751741608613046, "grad_norm": 4.200928688049316, "learning_rate": 9.943498121181877e-06, "loss": 1.3595, "step": 1885 }, { "epoch": 0.4777707409753008, "grad_norm": 3.5353293418884277, "learning_rate": 9.943372449945903e-06, "loss": 1.1743, "step": 1886 }, { "epoch": 0.47802406586447116, "grad_norm": 4.019369602203369, "learning_rate": 9.943246639902349e-06, "loss": 1.1256, "step": 1887 }, { "epoch": 0.47827739075364156, "grad_norm": 4.187241077423096, "learning_rate": 9.943120691054745e-06, "loss": 1.3265, "step": 1888 }, { "epoch": 0.4785307156428119, "grad_norm": 4.001984596252441, "learning_rate": 9.942994603406629e-06, "loss": 1.2638, "step": 1889 }, { "epoch": 0.47878404053198226, "grad_norm": 3.6993045806884766, "learning_rate": 9.942868376961542e-06, "loss": 1.1978, "step": 1890 }, { "epoch": 0.4790373654211526, "grad_norm": 3.9563772678375244, "learning_rate": 9.942742011723028e-06, "loss": 1.2217, "step": 1891 }, { "epoch": 0.479290690310323, "grad_norm": 3.6382575035095215, "learning_rate": 9.942615507694633e-06, "loss": 1.1652, "step": 1892 }, { "epoch": 0.47954401519949336, "grad_norm": 4.0197834968566895, "learning_rate": 9.942488864879912e-06, "loss": 1.3435, "step": 1893 }, { "epoch": 0.4797973400886637, "grad_norm": 3.5034239292144775, "learning_rate": 9.94236208328242e-06, "loss": 1.0797, "step": 1894 }, { "epoch": 0.48005066497783405, "grad_norm": 3.9032034873962402, "learning_rate": 9.942235162905719e-06, "loss": 1.2162, "step": 1895 }, { "epoch": 0.48030398986700445, "grad_norm": 3.9833247661590576, "learning_rate": 9.942108103753367e-06, "loss": 1.2685, "step": 1896 }, { "epoch": 0.4805573147561748, "grad_norm": 3.9508931636810303, "learning_rate": 9.941980905828939e-06, "loss": 1.1964, "step": 1897 }, { "epoch": 0.48081063964534515, "grad_norm": 4.35664176940918, "learning_rate": 9.941853569136001e-06, "loss": 1.3512, "step": 1898 }, { "epoch": 0.4810639645345155, "grad_norm": 4.374859809875488, "learning_rate": 9.941726093678132e-06, "loss": 1.2175, "step": 1899 }, { "epoch": 0.4813172894236859, "grad_norm": 3.94962215423584, "learning_rate": 9.941598479458911e-06, "loss": 1.3012, "step": 1900 }, { "epoch": 0.48157061431285625, "grad_norm": 3.9118425846099854, "learning_rate": 9.941470726481921e-06, "loss": 1.1469, "step": 1901 }, { "epoch": 0.4818239392020266, "grad_norm": 3.626525402069092, "learning_rate": 9.941342834750748e-06, "loss": 1.2502, "step": 1902 }, { "epoch": 0.48207726409119694, "grad_norm": 4.247342109680176, "learning_rate": 9.941214804268983e-06, "loss": 1.2068, "step": 1903 }, { "epoch": 0.48233058898036735, "grad_norm": 4.113563060760498, "learning_rate": 9.941086635040225e-06, "loss": 1.2238, "step": 1904 }, { "epoch": 0.4825839138695377, "grad_norm": 3.7747962474823, "learning_rate": 9.940958327068068e-06, "loss": 1.1895, "step": 1905 }, { "epoch": 0.48283723875870804, "grad_norm": 4.083578109741211, "learning_rate": 9.940829880356117e-06, "loss": 1.2219, "step": 1906 }, { "epoch": 0.4830905636478784, "grad_norm": 3.6183180809020996, "learning_rate": 9.940701294907979e-06, "loss": 1.2029, "step": 1907 }, { "epoch": 0.48334388853704874, "grad_norm": 4.016280651092529, "learning_rate": 9.940572570727265e-06, "loss": 1.2682, "step": 1908 }, { "epoch": 0.48359721342621914, "grad_norm": 3.8436830043792725, "learning_rate": 9.940443707817588e-06, "loss": 1.33, "step": 1909 }, { "epoch": 0.4838505383153895, "grad_norm": 3.845280408859253, "learning_rate": 9.940314706182566e-06, "loss": 1.2901, "step": 1910 }, { "epoch": 0.48410386320455984, "grad_norm": 3.605034589767456, "learning_rate": 9.940185565825824e-06, "loss": 1.3842, "step": 1911 }, { "epoch": 0.4843571880937302, "grad_norm": 3.5678157806396484, "learning_rate": 9.940056286750988e-06, "loss": 1.1572, "step": 1912 }, { "epoch": 0.4846105129829006, "grad_norm": 4.389228343963623, "learning_rate": 9.939926868961684e-06, "loss": 1.195, "step": 1913 }, { "epoch": 0.48486383787207094, "grad_norm": 3.625386953353882, "learning_rate": 9.93979731246155e-06, "loss": 1.2272, "step": 1914 }, { "epoch": 0.4851171627612413, "grad_norm": 3.577895164489746, "learning_rate": 9.939667617254222e-06, "loss": 1.0943, "step": 1915 }, { "epoch": 0.48537048765041163, "grad_norm": 4.115499019622803, "learning_rate": 9.939537783343342e-06, "loss": 1.198, "step": 1916 }, { "epoch": 0.48562381253958203, "grad_norm": 3.993464708328247, "learning_rate": 9.939407810732558e-06, "loss": 1.3738, "step": 1917 }, { "epoch": 0.4858771374287524, "grad_norm": 4.160882472991943, "learning_rate": 9.939277699425515e-06, "loss": 1.2951, "step": 1918 }, { "epoch": 0.48613046231792273, "grad_norm": 3.5682358741760254, "learning_rate": 9.939147449425873e-06, "loss": 1.1651, "step": 1919 }, { "epoch": 0.4863837872070931, "grad_norm": 3.803846836090088, "learning_rate": 9.939017060737283e-06, "loss": 1.2901, "step": 1920 }, { "epoch": 0.4866371120962635, "grad_norm": 4.027624130249023, "learning_rate": 9.938886533363408e-06, "loss": 1.2616, "step": 1921 }, { "epoch": 0.4868904369854338, "grad_norm": 3.9616568088531494, "learning_rate": 9.938755867307915e-06, "loss": 1.2761, "step": 1922 }, { "epoch": 0.4871437618746042, "grad_norm": 3.555142641067505, "learning_rate": 9.938625062574471e-06, "loss": 1.1668, "step": 1923 }, { "epoch": 0.4873970867637745, "grad_norm": 3.52968168258667, "learning_rate": 9.938494119166751e-06, "loss": 1.2268, "step": 1924 }, { "epoch": 0.4876504116529449, "grad_norm": 3.665621519088745, "learning_rate": 9.93836303708843e-06, "loss": 1.003, "step": 1925 }, { "epoch": 0.4879037365421153, "grad_norm": 3.53971004486084, "learning_rate": 9.938231816343191e-06, "loss": 0.9954, "step": 1926 }, { "epoch": 0.4881570614312856, "grad_norm": 4.071761608123779, "learning_rate": 9.938100456934716e-06, "loss": 1.2768, "step": 1927 }, { "epoch": 0.48841038632045597, "grad_norm": 3.6120855808258057, "learning_rate": 9.937968958866693e-06, "loss": 1.0892, "step": 1928 }, { "epoch": 0.4886637112096264, "grad_norm": 3.712071657180786, "learning_rate": 9.937837322142818e-06, "loss": 1.2015, "step": 1929 }, { "epoch": 0.4889170360987967, "grad_norm": 4.381753921508789, "learning_rate": 9.937705546766784e-06, "loss": 1.3256, "step": 1930 }, { "epoch": 0.48917036098796707, "grad_norm": 3.739407539367676, "learning_rate": 9.937573632742294e-06, "loss": 1.1897, "step": 1931 }, { "epoch": 0.4894236858771374, "grad_norm": 3.939709186553955, "learning_rate": 9.93744158007305e-06, "loss": 1.0873, "step": 1932 }, { "epoch": 0.48967701076630776, "grad_norm": 3.766883373260498, "learning_rate": 9.93730938876276e-06, "loss": 1.1832, "step": 1933 }, { "epoch": 0.48993033565547817, "grad_norm": 3.7371826171875, "learning_rate": 9.937177058815134e-06, "loss": 1.191, "step": 1934 }, { "epoch": 0.4901836605446485, "grad_norm": 3.86498761177063, "learning_rate": 9.937044590233895e-06, "loss": 1.3341, "step": 1935 }, { "epoch": 0.49043698543381886, "grad_norm": 3.599517583847046, "learning_rate": 9.936911983022755e-06, "loss": 1.109, "step": 1936 }, { "epoch": 0.4906903103229892, "grad_norm": 3.963528871536255, "learning_rate": 9.93677923718544e-06, "loss": 1.305, "step": 1937 }, { "epoch": 0.4909436352121596, "grad_norm": 3.53969669342041, "learning_rate": 9.936646352725678e-06, "loss": 1.1524, "step": 1938 }, { "epoch": 0.49119696010132996, "grad_norm": 3.351635456085205, "learning_rate": 9.936513329647201e-06, "loss": 1.1303, "step": 1939 }, { "epoch": 0.4914502849905003, "grad_norm": 3.753661870956421, "learning_rate": 9.936380167953744e-06, "loss": 1.1581, "step": 1940 }, { "epoch": 0.49170360987967066, "grad_norm": 3.5876848697662354, "learning_rate": 9.936246867649044e-06, "loss": 1.0585, "step": 1941 }, { "epoch": 0.49195693476884106, "grad_norm": 3.771541118621826, "learning_rate": 9.936113428736845e-06, "loss": 1.1871, "step": 1942 }, { "epoch": 0.4922102596580114, "grad_norm": 3.8653862476348877, "learning_rate": 9.935979851220895e-06, "loss": 1.2944, "step": 1943 }, { "epoch": 0.49246358454718175, "grad_norm": 4.140165328979492, "learning_rate": 9.935846135104945e-06, "loss": 1.3218, "step": 1944 }, { "epoch": 0.4927169094363521, "grad_norm": 3.655492067337036, "learning_rate": 9.935712280392747e-06, "loss": 1.199, "step": 1945 }, { "epoch": 0.4929702343255225, "grad_norm": 3.7361409664154053, "learning_rate": 9.935578287088063e-06, "loss": 1.2055, "step": 1946 }, { "epoch": 0.49322355921469285, "grad_norm": 4.23722505569458, "learning_rate": 9.935444155194654e-06, "loss": 1.2338, "step": 1947 }, { "epoch": 0.4934768841038632, "grad_norm": 3.802419900894165, "learning_rate": 9.935309884716285e-06, "loss": 1.2822, "step": 1948 }, { "epoch": 0.49373020899303355, "grad_norm": 3.4896342754364014, "learning_rate": 9.93517547565673e-06, "loss": 1.1569, "step": 1949 }, { "epoch": 0.49398353388220395, "grad_norm": 4.055016040802002, "learning_rate": 9.935040928019756e-06, "loss": 1.106, "step": 1950 }, { "epoch": 0.4942368587713743, "grad_norm": 3.828601598739624, "learning_rate": 9.93490624180915e-06, "loss": 1.1133, "step": 1951 }, { "epoch": 0.49449018366054465, "grad_norm": 3.522599220275879, "learning_rate": 9.934771417028688e-06, "loss": 1.0369, "step": 1952 }, { "epoch": 0.494743508549715, "grad_norm": 3.7118875980377197, "learning_rate": 9.934636453682158e-06, "loss": 1.2343, "step": 1953 }, { "epoch": 0.4949968334388854, "grad_norm": 3.7711949348449707, "learning_rate": 9.93450135177335e-06, "loss": 1.1774, "step": 1954 }, { "epoch": 0.49525015832805575, "grad_norm": 3.6655616760253906, "learning_rate": 9.934366111306055e-06, "loss": 1.2547, "step": 1955 }, { "epoch": 0.4955034832172261, "grad_norm": 3.4832236766815186, "learning_rate": 9.934230732284072e-06, "loss": 1.1242, "step": 1956 }, { "epoch": 0.49575680810639644, "grad_norm": 3.6455090045928955, "learning_rate": 9.934095214711204e-06, "loss": 1.4028, "step": 1957 }, { "epoch": 0.4960101329955668, "grad_norm": 3.7010371685028076, "learning_rate": 9.933959558591254e-06, "loss": 1.3082, "step": 1958 }, { "epoch": 0.4962634578847372, "grad_norm": 4.023183345794678, "learning_rate": 9.933823763928032e-06, "loss": 1.282, "step": 1959 }, { "epoch": 0.49651678277390754, "grad_norm": 3.851107358932495, "learning_rate": 9.933687830725351e-06, "loss": 1.2865, "step": 1960 }, { "epoch": 0.4967701076630779, "grad_norm": 3.800704002380371, "learning_rate": 9.933551758987029e-06, "loss": 1.3246, "step": 1961 }, { "epoch": 0.49702343255224823, "grad_norm": 3.916658401489258, "learning_rate": 9.933415548716884e-06, "loss": 1.2298, "step": 1962 }, { "epoch": 0.49727675744141864, "grad_norm": 4.056824207305908, "learning_rate": 9.933279199918743e-06, "loss": 1.266, "step": 1963 }, { "epoch": 0.497530082330589, "grad_norm": 3.6966655254364014, "learning_rate": 9.933142712596435e-06, "loss": 1.0131, "step": 1964 }, { "epoch": 0.49778340721975933, "grad_norm": 3.6945905685424805, "learning_rate": 9.933006086753793e-06, "loss": 1.2248, "step": 1965 }, { "epoch": 0.4980367321089297, "grad_norm": 3.9814541339874268, "learning_rate": 9.93286932239465e-06, "loss": 1.3103, "step": 1966 }, { "epoch": 0.4982900569981001, "grad_norm": 3.521240472793579, "learning_rate": 9.932732419522849e-06, "loss": 1.0976, "step": 1967 }, { "epoch": 0.49854338188727043, "grad_norm": 3.8234946727752686, "learning_rate": 9.932595378142233e-06, "loss": 1.3093, "step": 1968 }, { "epoch": 0.4987967067764408, "grad_norm": 3.7615811824798584, "learning_rate": 9.932458198256652e-06, "loss": 1.1305, "step": 1969 }, { "epoch": 0.4990500316656111, "grad_norm": 3.895721912384033, "learning_rate": 9.932320879869956e-06, "loss": 1.1736, "step": 1970 }, { "epoch": 0.49930335655478153, "grad_norm": 3.6676993370056152, "learning_rate": 9.932183422986e-06, "loss": 1.2882, "step": 1971 }, { "epoch": 0.4995566814439519, "grad_norm": 3.9136924743652344, "learning_rate": 9.932045827608648e-06, "loss": 1.1333, "step": 1972 }, { "epoch": 0.4998100063331222, "grad_norm": 3.678321599960327, "learning_rate": 9.931908093741757e-06, "loss": 1.2515, "step": 1973 }, { "epoch": 0.5000633312222926, "grad_norm": 3.9502320289611816, "learning_rate": 9.931770221389201e-06, "loss": 1.1732, "step": 1974 }, { "epoch": 0.5003166561114629, "grad_norm": 4.168564796447754, "learning_rate": 9.931632210554846e-06, "loss": 1.477, "step": 1975 }, { "epoch": 0.5005699810006333, "grad_norm": 3.4833409786224365, "learning_rate": 9.931494061242573e-06, "loss": 1.154, "step": 1976 }, { "epoch": 0.5008233058898037, "grad_norm": 3.859236240386963, "learning_rate": 9.931355773456257e-06, "loss": 1.2578, "step": 1977 }, { "epoch": 0.501076630778974, "grad_norm": 4.015527248382568, "learning_rate": 9.93121734719978e-06, "loss": 1.2787, "step": 1978 }, { "epoch": 0.5013299556681444, "grad_norm": 3.5901248455047607, "learning_rate": 9.931078782477033e-06, "loss": 1.2092, "step": 1979 }, { "epoch": 0.5015832805573147, "grad_norm": 3.4743640422821045, "learning_rate": 9.930940079291904e-06, "loss": 1.1874, "step": 1980 }, { "epoch": 0.5018366054464851, "grad_norm": 3.7826244831085205, "learning_rate": 9.93080123764829e-06, "loss": 1.2899, "step": 1981 }, { "epoch": 0.5020899303356555, "grad_norm": 3.508268117904663, "learning_rate": 9.930662257550087e-06, "loss": 1.2104, "step": 1982 }, { "epoch": 0.5023432552248258, "grad_norm": 3.540865421295166, "learning_rate": 9.930523139001199e-06, "loss": 1.0924, "step": 1983 }, { "epoch": 0.5025965801139962, "grad_norm": 4.011488914489746, "learning_rate": 9.930383882005532e-06, "loss": 1.3308, "step": 1984 }, { "epoch": 0.5028499050031665, "grad_norm": 3.86618971824646, "learning_rate": 9.930244486566996e-06, "loss": 1.2516, "step": 1985 }, { "epoch": 0.5031032298923369, "grad_norm": 3.611562967300415, "learning_rate": 9.930104952689507e-06, "loss": 1.3106, "step": 1986 }, { "epoch": 0.5033565547815073, "grad_norm": 4.026989936828613, "learning_rate": 9.929965280376981e-06, "loss": 1.1976, "step": 1987 }, { "epoch": 0.5036098796706776, "grad_norm": 3.4937264919281006, "learning_rate": 9.929825469633338e-06, "loss": 1.0598, "step": 1988 }, { "epoch": 0.503863204559848, "grad_norm": 3.5130345821380615, "learning_rate": 9.929685520462508e-06, "loss": 1.1567, "step": 1989 }, { "epoch": 0.5041165294490184, "grad_norm": 3.598287343978882, "learning_rate": 9.929545432868422e-06, "loss": 1.1503, "step": 1990 }, { "epoch": 0.5043698543381887, "grad_norm": 3.6132335662841797, "learning_rate": 9.929405206855008e-06, "loss": 1.1466, "step": 1991 }, { "epoch": 0.5046231792273591, "grad_norm": 3.887468099594116, "learning_rate": 9.929264842426204e-06, "loss": 1.322, "step": 1992 }, { "epoch": 0.5048765041165294, "grad_norm": 3.5717601776123047, "learning_rate": 9.929124339585956e-06, "loss": 1.2215, "step": 1993 }, { "epoch": 0.5051298290056998, "grad_norm": 3.476271390914917, "learning_rate": 9.928983698338207e-06, "loss": 1.1382, "step": 1994 }, { "epoch": 0.5053831538948702, "grad_norm": 3.6219406127929688, "learning_rate": 9.928842918686905e-06, "loss": 1.1434, "step": 1995 }, { "epoch": 0.5056364787840405, "grad_norm": 3.7151741981506348, "learning_rate": 9.928702000636004e-06, "loss": 1.3485, "step": 1996 }, { "epoch": 0.5058898036732109, "grad_norm": 3.5612034797668457, "learning_rate": 9.92856094418946e-06, "loss": 1.326, "step": 1997 }, { "epoch": 0.5061431285623813, "grad_norm": 3.819822072982788, "learning_rate": 9.928419749351236e-06, "loss": 1.1866, "step": 1998 }, { "epoch": 0.5063964534515516, "grad_norm": 3.5734217166900635, "learning_rate": 9.928278416125294e-06, "loss": 1.1093, "step": 1999 }, { "epoch": 0.506649778340722, "grad_norm": 3.6447010040283203, "learning_rate": 9.928136944515605e-06, "loss": 1.0837, "step": 2000 }, { "epoch": 0.506649778340722, "eval_loss": 1.2390925884246826, "eval_runtime": 12.3897, "eval_samples_per_second": 32.285, "eval_steps_per_second": 4.036, "step": 2000 }, { "epoch": 0.5069031032298923, "grad_norm": 3.805245876312256, "learning_rate": 9.927995334526139e-06, "loss": 1.2013, "step": 2001 }, { "epoch": 0.5071564281190627, "grad_norm": 3.6440162658691406, "learning_rate": 9.927853586160876e-06, "loss": 1.098, "step": 2002 }, { "epoch": 0.5074097530082331, "grad_norm": 3.89577317237854, "learning_rate": 9.927711699423792e-06, "loss": 1.2889, "step": 2003 }, { "epoch": 0.5076630778974034, "grad_norm": 4.357210636138916, "learning_rate": 9.927569674318874e-06, "loss": 1.4048, "step": 2004 }, { "epoch": 0.5079164027865738, "grad_norm": 4.084841251373291, "learning_rate": 9.927427510850107e-06, "loss": 1.2109, "step": 2005 }, { "epoch": 0.5081697276757441, "grad_norm": 4.151208877563477, "learning_rate": 9.927285209021487e-06, "loss": 1.1832, "step": 2006 }, { "epoch": 0.5084230525649145, "grad_norm": 4.021500587463379, "learning_rate": 9.927142768837005e-06, "loss": 1.24, "step": 2007 }, { "epoch": 0.5086763774540849, "grad_norm": 3.5542354583740234, "learning_rate": 9.927000190300666e-06, "loss": 1.3194, "step": 2008 }, { "epoch": 0.5089297023432552, "grad_norm": 4.103028774261475, "learning_rate": 9.926857473416469e-06, "loss": 1.3017, "step": 2009 }, { "epoch": 0.5091830272324256, "grad_norm": 4.074617385864258, "learning_rate": 9.926714618188424e-06, "loss": 1.3239, "step": 2010 }, { "epoch": 0.509436352121596, "grad_norm": 3.6382956504821777, "learning_rate": 9.926571624620542e-06, "loss": 1.2841, "step": 2011 }, { "epoch": 0.5096896770107663, "grad_norm": 3.8066420555114746, "learning_rate": 9.926428492716838e-06, "loss": 1.2699, "step": 2012 }, { "epoch": 0.5099430018999367, "grad_norm": 3.729119300842285, "learning_rate": 9.92628522248133e-06, "loss": 1.2963, "step": 2013 }, { "epoch": 0.510196326789107, "grad_norm": 3.840654134750366, "learning_rate": 9.926141813918042e-06, "loss": 1.1904, "step": 2014 }, { "epoch": 0.5104496516782774, "grad_norm": 3.9306607246398926, "learning_rate": 9.925998267031001e-06, "loss": 1.2877, "step": 2015 }, { "epoch": 0.5107029765674478, "grad_norm": 3.945796489715576, "learning_rate": 9.925854581824236e-06, "loss": 1.1981, "step": 2016 }, { "epoch": 0.5109563014566181, "grad_norm": 3.8385891914367676, "learning_rate": 9.925710758301785e-06, "loss": 1.2959, "step": 2017 }, { "epoch": 0.5112096263457885, "grad_norm": 4.107153415679932, "learning_rate": 9.925566796467685e-06, "loss": 1.2408, "step": 2018 }, { "epoch": 0.5114629512349589, "grad_norm": 3.6010515689849854, "learning_rate": 9.925422696325976e-06, "loss": 1.1565, "step": 2019 }, { "epoch": 0.5117162761241292, "grad_norm": 4.021646976470947, "learning_rate": 9.925278457880706e-06, "loss": 1.1205, "step": 2020 }, { "epoch": 0.5119696010132996, "grad_norm": 3.8271992206573486, "learning_rate": 9.925134081135925e-06, "loss": 1.1259, "step": 2021 }, { "epoch": 0.5122229259024699, "grad_norm": 4.050345420837402, "learning_rate": 9.924989566095689e-06, "loss": 1.3793, "step": 2022 }, { "epoch": 0.5124762507916403, "grad_norm": 3.9474878311157227, "learning_rate": 9.924844912764053e-06, "loss": 1.3196, "step": 2023 }, { "epoch": 0.5127295756808107, "grad_norm": 3.6200332641601562, "learning_rate": 9.924700121145081e-06, "loss": 1.1426, "step": 2024 }, { "epoch": 0.512982900569981, "grad_norm": 3.7752346992492676, "learning_rate": 9.924555191242838e-06, "loss": 1.1998, "step": 2025 }, { "epoch": 0.5132362254591514, "grad_norm": 3.581876277923584, "learning_rate": 9.92441012306139e-06, "loss": 1.2416, "step": 2026 }, { "epoch": 0.5134895503483218, "grad_norm": 3.861626148223877, "learning_rate": 9.924264916604817e-06, "loss": 1.3283, "step": 2027 }, { "epoch": 0.5137428752374921, "grad_norm": 4.046294212341309, "learning_rate": 9.924119571877192e-06, "loss": 1.1203, "step": 2028 }, { "epoch": 0.5139962001266625, "grad_norm": 3.922940254211426, "learning_rate": 9.923974088882597e-06, "loss": 1.2, "step": 2029 }, { "epoch": 0.5142495250158328, "grad_norm": 3.821467161178589, "learning_rate": 9.923828467625118e-06, "loss": 1.1233, "step": 2030 }, { "epoch": 0.5145028499050032, "grad_norm": 3.7097089290618896, "learning_rate": 9.923682708108844e-06, "loss": 1.0941, "step": 2031 }, { "epoch": 0.5147561747941736, "grad_norm": 3.8376052379608154, "learning_rate": 9.923536810337866e-06, "loss": 1.3278, "step": 2032 }, { "epoch": 0.5150094996833439, "grad_norm": 3.7457728385925293, "learning_rate": 9.923390774316282e-06, "loss": 1.2232, "step": 2033 }, { "epoch": 0.5152628245725143, "grad_norm": 3.587247133255005, "learning_rate": 9.923244600048191e-06, "loss": 1.1964, "step": 2034 }, { "epoch": 0.5155161494616846, "grad_norm": 3.852628469467163, "learning_rate": 9.923098287537702e-06, "loss": 1.2074, "step": 2035 }, { "epoch": 0.515769474350855, "grad_norm": 3.785722494125366, "learning_rate": 9.92295183678892e-06, "loss": 1.2066, "step": 2036 }, { "epoch": 0.5160227992400254, "grad_norm": 4.016298770904541, "learning_rate": 9.922805247805956e-06, "loss": 1.1638, "step": 2037 }, { "epoch": 0.5162761241291957, "grad_norm": 3.6992557048797607, "learning_rate": 9.922658520592927e-06, "loss": 1.1199, "step": 2038 }, { "epoch": 0.5165294490183661, "grad_norm": 3.6224451065063477, "learning_rate": 9.922511655153957e-06, "loss": 1.1693, "step": 2039 }, { "epoch": 0.5167827739075365, "grad_norm": 3.499828815460205, "learning_rate": 9.922364651493165e-06, "loss": 1.1314, "step": 2040 }, { "epoch": 0.5170360987967068, "grad_norm": 3.856823682785034, "learning_rate": 9.92221750961468e-06, "loss": 1.172, "step": 2041 }, { "epoch": 0.5172894236858772, "grad_norm": 4.1732563972473145, "learning_rate": 9.922070229522636e-06, "loss": 1.3302, "step": 2042 }, { "epoch": 0.5175427485750475, "grad_norm": 3.7007861137390137, "learning_rate": 9.921922811221166e-06, "loss": 1.2326, "step": 2043 }, { "epoch": 0.5177960734642179, "grad_norm": 4.095267295837402, "learning_rate": 9.92177525471441e-06, "loss": 1.3154, "step": 2044 }, { "epoch": 0.5180493983533883, "grad_norm": 3.9190802574157715, "learning_rate": 9.921627560006511e-06, "loss": 1.0951, "step": 2045 }, { "epoch": 0.5183027232425585, "grad_norm": 3.760322332382202, "learning_rate": 9.921479727101619e-06, "loss": 1.2138, "step": 2046 }, { "epoch": 0.518556048131729, "grad_norm": 3.5061159133911133, "learning_rate": 9.921331756003881e-06, "loss": 1.2732, "step": 2047 }, { "epoch": 0.5188093730208994, "grad_norm": 3.5123519897460938, "learning_rate": 9.921183646717454e-06, "loss": 1.1772, "step": 2048 }, { "epoch": 0.5190626979100696, "grad_norm": 3.607067346572876, "learning_rate": 9.921035399246497e-06, "loss": 1.0284, "step": 2049 }, { "epoch": 0.51931602279924, "grad_norm": 3.7096149921417236, "learning_rate": 9.920887013595171e-06, "loss": 1.1604, "step": 2050 }, { "epoch": 0.5195693476884103, "grad_norm": 3.6085991859436035, "learning_rate": 9.920738489767646e-06, "loss": 1.2365, "step": 2051 }, { "epoch": 0.5198226725775807, "grad_norm": 3.6953377723693848, "learning_rate": 9.92058982776809e-06, "loss": 1.1491, "step": 2052 }, { "epoch": 0.5200759974667511, "grad_norm": 3.950957775115967, "learning_rate": 9.920441027600678e-06, "loss": 1.1382, "step": 2053 }, { "epoch": 0.5203293223559214, "grad_norm": 4.319118499755859, "learning_rate": 9.920292089269587e-06, "loss": 1.4027, "step": 2054 }, { "epoch": 0.5205826472450918, "grad_norm": 4.153669834136963, "learning_rate": 9.920143012778999e-06, "loss": 1.2138, "step": 2055 }, { "epoch": 0.5208359721342621, "grad_norm": 3.893864631652832, "learning_rate": 9.919993798133104e-06, "loss": 1.1122, "step": 2056 }, { "epoch": 0.5210892970234325, "grad_norm": 3.6461257934570312, "learning_rate": 9.919844445336088e-06, "loss": 1.1891, "step": 2057 }, { "epoch": 0.5213426219126029, "grad_norm": 4.094683647155762, "learning_rate": 9.919694954392145e-06, "loss": 1.2813, "step": 2058 }, { "epoch": 0.5215959468017732, "grad_norm": 3.255305051803589, "learning_rate": 9.919545325305475e-06, "loss": 1.0393, "step": 2059 }, { "epoch": 0.5218492716909436, "grad_norm": 3.8772292137145996, "learning_rate": 9.919395558080276e-06, "loss": 1.1041, "step": 2060 }, { "epoch": 0.522102596580114, "grad_norm": 3.944272994995117, "learning_rate": 9.919245652720756e-06, "loss": 1.2829, "step": 2061 }, { "epoch": 0.5223559214692843, "grad_norm": 3.7375152111053467, "learning_rate": 9.919095609231125e-06, "loss": 1.2344, "step": 2062 }, { "epoch": 0.5226092463584547, "grad_norm": 3.8056650161743164, "learning_rate": 9.918945427615594e-06, "loss": 1.2783, "step": 2063 }, { "epoch": 0.522862571247625, "grad_norm": 3.702904224395752, "learning_rate": 9.91879510787838e-06, "loss": 1.2879, "step": 2064 }, { "epoch": 0.5231158961367954, "grad_norm": 3.8003106117248535, "learning_rate": 9.918644650023706e-06, "loss": 1.1689, "step": 2065 }, { "epoch": 0.5233692210259658, "grad_norm": 3.9635705947875977, "learning_rate": 9.918494054055795e-06, "loss": 1.2963, "step": 2066 }, { "epoch": 0.5236225459151361, "grad_norm": 3.4321775436401367, "learning_rate": 9.918343319978877e-06, "loss": 1.0809, "step": 2067 }, { "epoch": 0.5238758708043065, "grad_norm": 3.361389636993408, "learning_rate": 9.918192447797182e-06, "loss": 1.1926, "step": 2068 }, { "epoch": 0.5241291956934769, "grad_norm": 3.9239723682403564, "learning_rate": 9.918041437514948e-06, "loss": 1.2748, "step": 2069 }, { "epoch": 0.5243825205826472, "grad_norm": 3.5856761932373047, "learning_rate": 9.917890289136416e-06, "loss": 1.254, "step": 2070 }, { "epoch": 0.5246358454718176, "grad_norm": 3.499878406524658, "learning_rate": 9.91773900266583e-06, "loss": 1.1641, "step": 2071 }, { "epoch": 0.5248891703609879, "grad_norm": 3.535977840423584, "learning_rate": 9.917587578107438e-06, "loss": 1.2673, "step": 2072 }, { "epoch": 0.5251424952501583, "grad_norm": 3.4507598876953125, "learning_rate": 9.91743601546549e-06, "loss": 1.1051, "step": 2073 }, { "epoch": 0.5253958201393287, "grad_norm": 3.9945662021636963, "learning_rate": 9.917284314744245e-06, "loss": 1.1079, "step": 2074 }, { "epoch": 0.525649145028499, "grad_norm": 3.716646671295166, "learning_rate": 9.91713247594796e-06, "loss": 1.2796, "step": 2075 }, { "epoch": 0.5259024699176694, "grad_norm": 3.8714919090270996, "learning_rate": 9.9169804990809e-06, "loss": 1.412, "step": 2076 }, { "epoch": 0.5261557948068398, "grad_norm": 3.7069480419158936, "learning_rate": 9.91682838414733e-06, "loss": 1.243, "step": 2077 }, { "epoch": 0.5264091196960101, "grad_norm": 3.471064329147339, "learning_rate": 9.916676131151528e-06, "loss": 1.2679, "step": 2078 }, { "epoch": 0.5266624445851805, "grad_norm": 3.772437334060669, "learning_rate": 9.91652374009776e-06, "loss": 1.1777, "step": 2079 }, { "epoch": 0.5269157694743508, "grad_norm": 4.003936290740967, "learning_rate": 9.916371210990313e-06, "loss": 1.2332, "step": 2080 }, { "epoch": 0.5271690943635212, "grad_norm": 4.371222972869873, "learning_rate": 9.916218543833464e-06, "loss": 1.2212, "step": 2081 }, { "epoch": 0.5274224192526916, "grad_norm": 3.849670648574829, "learning_rate": 9.916065738631504e-06, "loss": 1.3082, "step": 2082 }, { "epoch": 0.5276757441418619, "grad_norm": 3.9164035320281982, "learning_rate": 9.915912795388722e-06, "loss": 1.2778, "step": 2083 }, { "epoch": 0.5279290690310323, "grad_norm": 3.9645965099334717, "learning_rate": 9.915759714109412e-06, "loss": 1.2734, "step": 2084 }, { "epoch": 0.5281823939202026, "grad_norm": 3.515418529510498, "learning_rate": 9.915606494797874e-06, "loss": 1.1672, "step": 2085 }, { "epoch": 0.528435718809373, "grad_norm": 3.6071736812591553, "learning_rate": 9.915453137458409e-06, "loss": 1.2437, "step": 2086 }, { "epoch": 0.5286890436985434, "grad_norm": 3.903167247772217, "learning_rate": 9.915299642095323e-06, "loss": 1.2654, "step": 2087 }, { "epoch": 0.5289423685877137, "grad_norm": 3.7934257984161377, "learning_rate": 9.915146008712928e-06, "loss": 1.3173, "step": 2088 }, { "epoch": 0.5291956934768841, "grad_norm": 3.4777708053588867, "learning_rate": 9.914992237315535e-06, "loss": 1.1638, "step": 2089 }, { "epoch": 0.5294490183660545, "grad_norm": 3.345414161682129, "learning_rate": 9.914838327907466e-06, "loss": 1.2188, "step": 2090 }, { "epoch": 0.5297023432552248, "grad_norm": 3.4722683429718018, "learning_rate": 9.914684280493039e-06, "loss": 1.1764, "step": 2091 }, { "epoch": 0.5299556681443952, "grad_norm": 3.456489086151123, "learning_rate": 9.91453009507658e-06, "loss": 1.1269, "step": 2092 }, { "epoch": 0.5302089930335655, "grad_norm": 3.9334988594055176, "learning_rate": 9.91437577166242e-06, "loss": 1.2108, "step": 2093 }, { "epoch": 0.5304623179227359, "grad_norm": 3.5916223526000977, "learning_rate": 9.914221310254892e-06, "loss": 1.2412, "step": 2094 }, { "epoch": 0.5307156428119063, "grad_norm": 3.69205904006958, "learning_rate": 9.914066710858333e-06, "loss": 1.251, "step": 2095 }, { "epoch": 0.5309689677010766, "grad_norm": 4.0429463386535645, "learning_rate": 9.913911973477082e-06, "loss": 1.2754, "step": 2096 }, { "epoch": 0.531222292590247, "grad_norm": 3.604990243911743, "learning_rate": 9.913757098115488e-06, "loss": 1.314, "step": 2097 }, { "epoch": 0.5314756174794174, "grad_norm": 3.89520001411438, "learning_rate": 9.913602084777896e-06, "loss": 1.2807, "step": 2098 }, { "epoch": 0.5317289423685877, "grad_norm": 3.5424606800079346, "learning_rate": 9.913446933468661e-06, "loss": 1.1046, "step": 2099 }, { "epoch": 0.5319822672577581, "grad_norm": 4.084542274475098, "learning_rate": 9.913291644192139e-06, "loss": 1.366, "step": 2100 }, { "epoch": 0.5322355921469284, "grad_norm": 3.751150608062744, "learning_rate": 9.91313621695269e-06, "loss": 1.2879, "step": 2101 }, { "epoch": 0.5324889170360988, "grad_norm": 3.7791383266448975, "learning_rate": 9.91298065175468e-06, "loss": 1.2437, "step": 2102 }, { "epoch": 0.5327422419252692, "grad_norm": 3.40948224067688, "learning_rate": 9.912824948602474e-06, "loss": 1.1915, "step": 2103 }, { "epoch": 0.5329955668144395, "grad_norm": 3.428567886352539, "learning_rate": 9.912669107500447e-06, "loss": 1.0372, "step": 2104 }, { "epoch": 0.5332488917036099, "grad_norm": 3.6524455547332764, "learning_rate": 9.912513128452974e-06, "loss": 1.2302, "step": 2105 }, { "epoch": 0.5335022165927802, "grad_norm": 3.7702980041503906, "learning_rate": 9.912357011464436e-06, "loss": 1.2113, "step": 2106 }, { "epoch": 0.5337555414819506, "grad_norm": 3.56003999710083, "learning_rate": 9.912200756539211e-06, "loss": 1.1612, "step": 2107 }, { "epoch": 0.534008866371121, "grad_norm": 3.6964781284332275, "learning_rate": 9.912044363681695e-06, "loss": 1.1697, "step": 2108 }, { "epoch": 0.5342621912602913, "grad_norm": 3.6726064682006836, "learning_rate": 9.911887832896274e-06, "loss": 1.1919, "step": 2109 }, { "epoch": 0.5345155161494617, "grad_norm": 3.5990684032440186, "learning_rate": 9.911731164187345e-06, "loss": 1.1649, "step": 2110 }, { "epoch": 0.5347688410386321, "grad_norm": 3.431689500808716, "learning_rate": 9.911574357559308e-06, "loss": 1.0571, "step": 2111 }, { "epoch": 0.5350221659278024, "grad_norm": 3.707425594329834, "learning_rate": 9.911417413016565e-06, "loss": 1.1837, "step": 2112 }, { "epoch": 0.5352754908169728, "grad_norm": 3.7318520545959473, "learning_rate": 9.911260330563522e-06, "loss": 1.3111, "step": 2113 }, { "epoch": 0.5355288157061431, "grad_norm": 3.8392438888549805, "learning_rate": 9.91110311020459e-06, "loss": 1.1554, "step": 2114 }, { "epoch": 0.5357821405953135, "grad_norm": 3.4397387504577637, "learning_rate": 9.910945751944185e-06, "loss": 1.1259, "step": 2115 }, { "epoch": 0.5360354654844839, "grad_norm": 3.693002223968506, "learning_rate": 9.910788255786725e-06, "loss": 1.3216, "step": 2116 }, { "epoch": 0.5362887903736542, "grad_norm": 3.924417018890381, "learning_rate": 9.910630621736632e-06, "loss": 1.3065, "step": 2117 }, { "epoch": 0.5365421152628246, "grad_norm": 4.017768383026123, "learning_rate": 9.910472849798333e-06, "loss": 1.2996, "step": 2118 }, { "epoch": 0.536795440151995, "grad_norm": 3.6276843547821045, "learning_rate": 9.910314939976257e-06, "loss": 1.1187, "step": 2119 }, { "epoch": 0.5370487650411653, "grad_norm": 3.8386552333831787, "learning_rate": 9.91015689227484e-06, "loss": 1.2779, "step": 2120 }, { "epoch": 0.5373020899303357, "grad_norm": 4.193774700164795, "learning_rate": 9.909998706698519e-06, "loss": 1.1943, "step": 2121 }, { "epoch": 0.537555414819506, "grad_norm": 3.8841090202331543, "learning_rate": 9.909840383251735e-06, "loss": 1.1898, "step": 2122 }, { "epoch": 0.5378087397086764, "grad_norm": 3.499729633331299, "learning_rate": 9.909681921938934e-06, "loss": 1.0828, "step": 2123 }, { "epoch": 0.5380620645978468, "grad_norm": 3.5716724395751953, "learning_rate": 9.909523322764568e-06, "loss": 1.1872, "step": 2124 }, { "epoch": 0.5383153894870171, "grad_norm": 3.674340009689331, "learning_rate": 9.909364585733085e-06, "loss": 1.2995, "step": 2125 }, { "epoch": 0.5385687143761875, "grad_norm": 3.615060567855835, "learning_rate": 9.909205710848945e-06, "loss": 1.1923, "step": 2126 }, { "epoch": 0.5388220392653578, "grad_norm": 3.3412909507751465, "learning_rate": 9.90904669811661e-06, "loss": 1.1698, "step": 2127 }, { "epoch": 0.5390753641545282, "grad_norm": 4.253294944763184, "learning_rate": 9.908887547540546e-06, "loss": 1.271, "step": 2128 }, { "epoch": 0.5393286890436986, "grad_norm": 3.6456515789031982, "learning_rate": 9.90872825912522e-06, "loss": 1.2528, "step": 2129 }, { "epoch": 0.5395820139328689, "grad_norm": 3.7424333095550537, "learning_rate": 9.908568832875104e-06, "loss": 1.2329, "step": 2130 }, { "epoch": 0.5398353388220393, "grad_norm": 3.7191965579986572, "learning_rate": 9.908409268794677e-06, "loss": 1.1287, "step": 2131 }, { "epoch": 0.5400886637112097, "grad_norm": 3.479968786239624, "learning_rate": 9.908249566888416e-06, "loss": 1.1933, "step": 2132 }, { "epoch": 0.54034198860038, "grad_norm": 3.8091413974761963, "learning_rate": 9.90808972716081e-06, "loss": 1.2426, "step": 2133 }, { "epoch": 0.5405953134895504, "grad_norm": 3.447298765182495, "learning_rate": 9.907929749616345e-06, "loss": 1.2251, "step": 2134 }, { "epoch": 0.5408486383787207, "grad_norm": 3.6902029514312744, "learning_rate": 9.907769634259511e-06, "loss": 1.2301, "step": 2135 }, { "epoch": 0.5411019632678911, "grad_norm": 3.4941325187683105, "learning_rate": 9.907609381094807e-06, "loss": 1.1992, "step": 2136 }, { "epoch": 0.5413552881570615, "grad_norm": 3.628161668777466, "learning_rate": 9.907448990126732e-06, "loss": 1.1705, "step": 2137 }, { "epoch": 0.5416086130462318, "grad_norm": 4.226117134094238, "learning_rate": 9.907288461359788e-06, "loss": 1.3543, "step": 2138 }, { "epoch": 0.5418619379354022, "grad_norm": 3.90450382232666, "learning_rate": 9.907127794798483e-06, "loss": 1.2767, "step": 2139 }, { "epoch": 0.5421152628245726, "grad_norm": 3.9196813106536865, "learning_rate": 9.906966990447332e-06, "loss": 1.2252, "step": 2140 }, { "epoch": 0.5423685877137429, "grad_norm": 4.277498245239258, "learning_rate": 9.906806048310847e-06, "loss": 1.257, "step": 2141 }, { "epoch": 0.5426219126029133, "grad_norm": 3.480289936065674, "learning_rate": 9.906644968393546e-06, "loss": 1.2312, "step": 2142 }, { "epoch": 0.5428752374920836, "grad_norm": 3.5467991828918457, "learning_rate": 9.906483750699955e-06, "loss": 1.1649, "step": 2143 }, { "epoch": 0.543128562381254, "grad_norm": 4.222132682800293, "learning_rate": 9.906322395234601e-06, "loss": 1.348, "step": 2144 }, { "epoch": 0.5433818872704244, "grad_norm": 4.237131118774414, "learning_rate": 9.906160902002013e-06, "loss": 1.1572, "step": 2145 }, { "epoch": 0.5436352121595946, "grad_norm": 4.146684646606445, "learning_rate": 9.905999271006726e-06, "loss": 1.3532, "step": 2146 }, { "epoch": 0.543888537048765, "grad_norm": 3.6279287338256836, "learning_rate": 9.905837502253279e-06, "loss": 1.1156, "step": 2147 }, { "epoch": 0.5441418619379355, "grad_norm": 3.8298258781433105, "learning_rate": 9.905675595746214e-06, "loss": 1.2658, "step": 2148 }, { "epoch": 0.5443951868271057, "grad_norm": 3.568838119506836, "learning_rate": 9.905513551490078e-06, "loss": 1.282, "step": 2149 }, { "epoch": 0.5446485117162762, "grad_norm": 3.4449048042297363, "learning_rate": 9.90535136948942e-06, "loss": 1.3066, "step": 2150 }, { "epoch": 0.5449018366054464, "grad_norm": 3.561124801635742, "learning_rate": 9.905189049748796e-06, "loss": 1.0653, "step": 2151 }, { "epoch": 0.5451551614946168, "grad_norm": 3.7354698181152344, "learning_rate": 9.905026592272759e-06, "loss": 1.2388, "step": 2152 }, { "epoch": 0.5454084863837872, "grad_norm": 3.690352439880371, "learning_rate": 9.904863997065878e-06, "loss": 1.4449, "step": 2153 }, { "epoch": 0.5456618112729575, "grad_norm": 3.964592218399048, "learning_rate": 9.904701264132713e-06, "loss": 1.329, "step": 2154 }, { "epoch": 0.545915136162128, "grad_norm": 3.7120819091796875, "learning_rate": 9.904538393477835e-06, "loss": 1.1677, "step": 2155 }, { "epoch": 0.5461684610512982, "grad_norm": 3.5068633556365967, "learning_rate": 9.904375385105818e-06, "loss": 1.1439, "step": 2156 }, { "epoch": 0.5464217859404686, "grad_norm": 3.9299685955047607, "learning_rate": 9.904212239021238e-06, "loss": 1.1888, "step": 2157 }, { "epoch": 0.546675110829639, "grad_norm": 4.307981967926025, "learning_rate": 9.904048955228677e-06, "loss": 1.3852, "step": 2158 }, { "epoch": 0.5469284357188093, "grad_norm": 4.037630558013916, "learning_rate": 9.903885533732722e-06, "loss": 1.3964, "step": 2159 }, { "epoch": 0.5471817606079797, "grad_norm": 3.4218804836273193, "learning_rate": 9.903721974537956e-06, "loss": 1.0959, "step": 2160 }, { "epoch": 0.5474350854971501, "grad_norm": 3.514329671859741, "learning_rate": 9.903558277648979e-06, "loss": 1.1461, "step": 2161 }, { "epoch": 0.5476884103863204, "grad_norm": 3.447237730026245, "learning_rate": 9.903394443070381e-06, "loss": 1.1734, "step": 2162 }, { "epoch": 0.5479417352754908, "grad_norm": 3.6662721633911133, "learning_rate": 9.903230470806766e-06, "loss": 1.2235, "step": 2163 }, { "epoch": 0.5481950601646611, "grad_norm": 3.6063079833984375, "learning_rate": 9.903066360862736e-06, "loss": 1.2365, "step": 2164 }, { "epoch": 0.5484483850538315, "grad_norm": 3.9934699535369873, "learning_rate": 9.902902113242903e-06, "loss": 1.3201, "step": 2165 }, { "epoch": 0.5487017099430019, "grad_norm": 3.5313923358917236, "learning_rate": 9.902737727951876e-06, "loss": 1.1839, "step": 2166 }, { "epoch": 0.5489550348321722, "grad_norm": 3.5758824348449707, "learning_rate": 9.90257320499427e-06, "loss": 1.2516, "step": 2167 }, { "epoch": 0.5492083597213426, "grad_norm": 3.4398324489593506, "learning_rate": 9.902408544374706e-06, "loss": 1.2804, "step": 2168 }, { "epoch": 0.549461684610513, "grad_norm": 3.7697672843933105, "learning_rate": 9.90224374609781e-06, "loss": 1.2488, "step": 2169 }, { "epoch": 0.5497150094996833, "grad_norm": 3.538827419281006, "learning_rate": 9.902078810168206e-06, "loss": 1.2351, "step": 2170 }, { "epoch": 0.5499683343888537, "grad_norm": 3.9213740825653076, "learning_rate": 9.901913736590527e-06, "loss": 1.4626, "step": 2171 }, { "epoch": 0.550221659278024, "grad_norm": 3.5800392627716064, "learning_rate": 9.901748525369406e-06, "loss": 1.1471, "step": 2172 }, { "epoch": 0.5504749841671944, "grad_norm": 4.1332807540893555, "learning_rate": 9.901583176509485e-06, "loss": 1.3493, "step": 2173 }, { "epoch": 0.5507283090563648, "grad_norm": 3.9190592765808105, "learning_rate": 9.901417690015405e-06, "loss": 1.3066, "step": 2174 }, { "epoch": 0.5509816339455351, "grad_norm": 3.843719244003296, "learning_rate": 9.901252065891814e-06, "loss": 1.1864, "step": 2175 }, { "epoch": 0.5512349588347055, "grad_norm": 3.5053625106811523, "learning_rate": 9.90108630414336e-06, "loss": 1.1446, "step": 2176 }, { "epoch": 0.5514882837238758, "grad_norm": 3.4328432083129883, "learning_rate": 9.900920404774703e-06, "loss": 1.1201, "step": 2177 }, { "epoch": 0.5517416086130462, "grad_norm": 4.106205463409424, "learning_rate": 9.900754367790497e-06, "loss": 1.3857, "step": 2178 }, { "epoch": 0.5519949335022166, "grad_norm": 3.870600461959839, "learning_rate": 9.900588193195405e-06, "loss": 1.2029, "step": 2179 }, { "epoch": 0.5522482583913869, "grad_norm": 3.6704447269439697, "learning_rate": 9.900421880994093e-06, "loss": 1.3875, "step": 2180 }, { "epoch": 0.5525015832805573, "grad_norm": 3.882249593734741, "learning_rate": 9.900255431191232e-06, "loss": 1.2162, "step": 2181 }, { "epoch": 0.5527549081697277, "grad_norm": 3.450150966644287, "learning_rate": 9.900088843791494e-06, "loss": 1.0647, "step": 2182 }, { "epoch": 0.553008233058898, "grad_norm": 4.354090690612793, "learning_rate": 9.899922118799559e-06, "loss": 1.2928, "step": 2183 }, { "epoch": 0.5532615579480684, "grad_norm": 3.895104169845581, "learning_rate": 9.899755256220107e-06, "loss": 1.1778, "step": 2184 }, { "epoch": 0.5535148828372387, "grad_norm": 3.9388809204101562, "learning_rate": 9.899588256057824e-06, "loss": 1.234, "step": 2185 }, { "epoch": 0.5537682077264091, "grad_norm": 3.7990224361419678, "learning_rate": 9.899421118317399e-06, "loss": 1.2266, "step": 2186 }, { "epoch": 0.5540215326155795, "grad_norm": 4.169088840484619, "learning_rate": 9.899253843003525e-06, "loss": 1.1988, "step": 2187 }, { "epoch": 0.5542748575047498, "grad_norm": 3.5473103523254395, "learning_rate": 9.899086430120898e-06, "loss": 1.1201, "step": 2188 }, { "epoch": 0.5545281823939202, "grad_norm": 3.514629364013672, "learning_rate": 9.898918879674223e-06, "loss": 1.0495, "step": 2189 }, { "epoch": 0.5547815072830906, "grad_norm": 3.4299962520599365, "learning_rate": 9.8987511916682e-06, "loss": 1.0817, "step": 2190 }, { "epoch": 0.5550348321722609, "grad_norm": 3.9319610595703125, "learning_rate": 9.898583366107539e-06, "loss": 1.2688, "step": 2191 }, { "epoch": 0.5552881570614313, "grad_norm": 3.759704113006592, "learning_rate": 9.898415402996952e-06, "loss": 1.2093, "step": 2192 }, { "epoch": 0.5555414819506016, "grad_norm": 3.3724687099456787, "learning_rate": 9.898247302341158e-06, "loss": 1.1356, "step": 2193 }, { "epoch": 0.555794806839772, "grad_norm": 3.661829948425293, "learning_rate": 9.898079064144877e-06, "loss": 1.2271, "step": 2194 }, { "epoch": 0.5560481317289424, "grad_norm": 3.6634035110473633, "learning_rate": 9.897910688412829e-06, "loss": 1.2451, "step": 2195 }, { "epoch": 0.5563014566181127, "grad_norm": 3.894113302230835, "learning_rate": 9.897742175149746e-06, "loss": 1.1522, "step": 2196 }, { "epoch": 0.5565547815072831, "grad_norm": 3.4600610733032227, "learning_rate": 9.897573524360357e-06, "loss": 1.1344, "step": 2197 }, { "epoch": 0.5568081063964535, "grad_norm": 3.658263683319092, "learning_rate": 9.897404736049399e-06, "loss": 1.26, "step": 2198 }, { "epoch": 0.5570614312856238, "grad_norm": 3.899393081665039, "learning_rate": 9.897235810221612e-06, "loss": 1.2925, "step": 2199 }, { "epoch": 0.5573147561747942, "grad_norm": 3.949037790298462, "learning_rate": 9.897066746881738e-06, "loss": 1.1514, "step": 2200 }, { "epoch": 0.5575680810639645, "grad_norm": 3.4396145343780518, "learning_rate": 9.896897546034524e-06, "loss": 1.1929, "step": 2201 }, { "epoch": 0.5578214059531349, "grad_norm": 3.182199239730835, "learning_rate": 9.896728207684724e-06, "loss": 1.1388, "step": 2202 }, { "epoch": 0.5580747308423053, "grad_norm": 3.5556013584136963, "learning_rate": 9.89655873183709e-06, "loss": 1.202, "step": 2203 }, { "epoch": 0.5583280557314756, "grad_norm": 3.7224278450012207, "learning_rate": 9.896389118496381e-06, "loss": 1.1746, "step": 2204 }, { "epoch": 0.558581380620646, "grad_norm": 4.2328362464904785, "learning_rate": 9.896219367667362e-06, "loss": 1.3125, "step": 2205 }, { "epoch": 0.5588347055098163, "grad_norm": 4.193203926086426, "learning_rate": 9.896049479354797e-06, "loss": 1.2244, "step": 2206 }, { "epoch": 0.5590880303989867, "grad_norm": 3.8881187438964844, "learning_rate": 9.895879453563457e-06, "loss": 1.1759, "step": 2207 }, { "epoch": 0.5593413552881571, "grad_norm": 3.405766725540161, "learning_rate": 9.895709290298117e-06, "loss": 1.2021, "step": 2208 }, { "epoch": 0.5595946801773274, "grad_norm": 3.778693437576294, "learning_rate": 9.895538989563555e-06, "loss": 1.1735, "step": 2209 }, { "epoch": 0.5598480050664978, "grad_norm": 4.0944929122924805, "learning_rate": 9.895368551364551e-06, "loss": 1.3031, "step": 2210 }, { "epoch": 0.5601013299556682, "grad_norm": 3.705367088317871, "learning_rate": 9.895197975705895e-06, "loss": 1.1974, "step": 2211 }, { "epoch": 0.5603546548448385, "grad_norm": 3.4504377841949463, "learning_rate": 9.895027262592372e-06, "loss": 1.1317, "step": 2212 }, { "epoch": 0.5606079797340089, "grad_norm": 3.8827710151672363, "learning_rate": 9.894856412028778e-06, "loss": 1.1375, "step": 2213 }, { "epoch": 0.5608613046231792, "grad_norm": 4.1285858154296875, "learning_rate": 9.89468542401991e-06, "loss": 1.3678, "step": 2214 }, { "epoch": 0.5611146295123496, "grad_norm": 3.390394449234009, "learning_rate": 9.89451429857057e-06, "loss": 1.2629, "step": 2215 }, { "epoch": 0.56136795440152, "grad_norm": 3.5470921993255615, "learning_rate": 9.894343035685561e-06, "loss": 1.2365, "step": 2216 }, { "epoch": 0.5616212792906903, "grad_norm": 3.7412421703338623, "learning_rate": 9.894171635369697e-06, "loss": 1.3165, "step": 2217 }, { "epoch": 0.5618746041798607, "grad_norm": 3.2183799743652344, "learning_rate": 9.894000097627783e-06, "loss": 1.1686, "step": 2218 }, { "epoch": 0.5621279290690311, "grad_norm": 3.583022356033325, "learning_rate": 9.893828422464642e-06, "loss": 1.1512, "step": 2219 }, { "epoch": 0.5623812539582014, "grad_norm": 3.8245561122894287, "learning_rate": 9.893656609885092e-06, "loss": 1.1414, "step": 2220 }, { "epoch": 0.5626345788473718, "grad_norm": 3.6685290336608887, "learning_rate": 9.89348465989396e-06, "loss": 1.2011, "step": 2221 }, { "epoch": 0.5628879037365421, "grad_norm": 3.8434760570526123, "learning_rate": 9.89331257249607e-06, "loss": 1.1604, "step": 2222 }, { "epoch": 0.5631412286257125, "grad_norm": 3.6513028144836426, "learning_rate": 9.893140347696257e-06, "loss": 1.1783, "step": 2223 }, { "epoch": 0.5633945535148829, "grad_norm": 3.6049387454986572, "learning_rate": 9.892967985499356e-06, "loss": 1.1097, "step": 2224 }, { "epoch": 0.5636478784040532, "grad_norm": 3.929824113845825, "learning_rate": 9.892795485910207e-06, "loss": 1.1884, "step": 2225 }, { "epoch": 0.5639012032932236, "grad_norm": 3.6172595024108887, "learning_rate": 9.892622848933653e-06, "loss": 1.186, "step": 2226 }, { "epoch": 0.5641545281823939, "grad_norm": 3.9310948848724365, "learning_rate": 9.892450074574545e-06, "loss": 1.1807, "step": 2227 }, { "epoch": 0.5644078530715643, "grad_norm": 3.604039430618286, "learning_rate": 9.892277162837731e-06, "loss": 1.0479, "step": 2228 }, { "epoch": 0.5646611779607347, "grad_norm": 3.72631573677063, "learning_rate": 9.892104113728065e-06, "loss": 1.2094, "step": 2229 }, { "epoch": 0.564914502849905, "grad_norm": 3.7389190196990967, "learning_rate": 9.891930927250411e-06, "loss": 1.1695, "step": 2230 }, { "epoch": 0.5651678277390754, "grad_norm": 3.9570655822753906, "learning_rate": 9.891757603409629e-06, "loss": 1.1978, "step": 2231 }, { "epoch": 0.5654211526282458, "grad_norm": 3.9663290977478027, "learning_rate": 9.891584142210584e-06, "loss": 1.2631, "step": 2232 }, { "epoch": 0.5656744775174161, "grad_norm": 3.927863121032715, "learning_rate": 9.89141054365815e-06, "loss": 1.1389, "step": 2233 }, { "epoch": 0.5659278024065865, "grad_norm": 3.885694742202759, "learning_rate": 9.891236807757201e-06, "loss": 1.2052, "step": 2234 }, { "epoch": 0.5661811272957568, "grad_norm": 3.5142667293548584, "learning_rate": 9.891062934512615e-06, "loss": 1.1599, "step": 2235 }, { "epoch": 0.5664344521849272, "grad_norm": 3.523827314376831, "learning_rate": 9.890888923929274e-06, "loss": 1.245, "step": 2236 }, { "epoch": 0.5666877770740976, "grad_norm": 3.3272361755371094, "learning_rate": 9.890714776012065e-06, "loss": 1.0359, "step": 2237 }, { "epoch": 0.5669411019632679, "grad_norm": 3.374920606613159, "learning_rate": 9.890540490765876e-06, "loss": 1.2326, "step": 2238 }, { "epoch": 0.5671944268524383, "grad_norm": 3.8165230751037598, "learning_rate": 9.890366068195603e-06, "loss": 1.3016, "step": 2239 }, { "epoch": 0.5674477517416087, "grad_norm": 3.5627212524414062, "learning_rate": 9.890191508306141e-06, "loss": 1.2669, "step": 2240 }, { "epoch": 0.567701076630779, "grad_norm": 3.4874629974365234, "learning_rate": 9.890016811102395e-06, "loss": 1.1789, "step": 2241 }, { "epoch": 0.5679544015199494, "grad_norm": 3.576786994934082, "learning_rate": 9.889841976589268e-06, "loss": 1.0605, "step": 2242 }, { "epoch": 0.5682077264091197, "grad_norm": 3.454319953918457, "learning_rate": 9.88966700477167e-06, "loss": 1.2683, "step": 2243 }, { "epoch": 0.56846105129829, "grad_norm": 3.5707461833953857, "learning_rate": 9.889491895654515e-06, "loss": 1.3246, "step": 2244 }, { "epoch": 0.5687143761874605, "grad_norm": 3.5469753742218018, "learning_rate": 9.88931664924272e-06, "loss": 1.109, "step": 2245 }, { "epoch": 0.5689677010766307, "grad_norm": 3.709312915802002, "learning_rate": 9.889141265541202e-06, "loss": 1.2081, "step": 2246 }, { "epoch": 0.5692210259658012, "grad_norm": 3.820150375366211, "learning_rate": 9.888965744554892e-06, "loss": 1.2152, "step": 2247 }, { "epoch": 0.5694743508549716, "grad_norm": 3.6582584381103516, "learning_rate": 9.888790086288714e-06, "loss": 1.2377, "step": 2248 }, { "epoch": 0.5697276757441418, "grad_norm": 3.495410442352295, "learning_rate": 9.8886142907476e-06, "loss": 1.1192, "step": 2249 }, { "epoch": 0.5699810006333123, "grad_norm": 3.2466015815734863, "learning_rate": 9.88843835793649e-06, "loss": 1.1125, "step": 2250 }, { "epoch": 0.5702343255224825, "grad_norm": 3.685121536254883, "learning_rate": 9.888262287860321e-06, "loss": 1.1283, "step": 2251 }, { "epoch": 0.570487650411653, "grad_norm": 3.65809965133667, "learning_rate": 9.888086080524039e-06, "loss": 1.3646, "step": 2252 }, { "epoch": 0.5707409753008233, "grad_norm": 3.577664852142334, "learning_rate": 9.88790973593259e-06, "loss": 1.239, "step": 2253 }, { "epoch": 0.5709943001899936, "grad_norm": 3.3043811321258545, "learning_rate": 9.887733254090925e-06, "loss": 1.1543, "step": 2254 }, { "epoch": 0.571247625079164, "grad_norm": 3.625861406326294, "learning_rate": 9.887556635004003e-06, "loss": 1.1222, "step": 2255 }, { "epoch": 0.5715009499683343, "grad_norm": 3.6544387340545654, "learning_rate": 9.887379878676782e-06, "loss": 1.2513, "step": 2256 }, { "epoch": 0.5717542748575047, "grad_norm": 3.5436434745788574, "learning_rate": 9.887202985114223e-06, "loss": 1.199, "step": 2257 }, { "epoch": 0.5720075997466751, "grad_norm": 3.650327205657959, "learning_rate": 9.887025954321295e-06, "loss": 1.147, "step": 2258 }, { "epoch": 0.5722609246358454, "grad_norm": 3.910425901412964, "learning_rate": 9.88684878630297e-06, "loss": 1.3222, "step": 2259 }, { "epoch": 0.5725142495250158, "grad_norm": 4.051957130432129, "learning_rate": 9.88667148106422e-06, "loss": 1.2572, "step": 2260 }, { "epoch": 0.5727675744141862, "grad_norm": 3.374309778213501, "learning_rate": 9.886494038610025e-06, "loss": 1.1578, "step": 2261 }, { "epoch": 0.5730208993033565, "grad_norm": 3.7524333000183105, "learning_rate": 9.886316458945367e-06, "loss": 1.2844, "step": 2262 }, { "epoch": 0.5732742241925269, "grad_norm": 3.5143179893493652, "learning_rate": 9.886138742075235e-06, "loss": 1.1891, "step": 2263 }, { "epoch": 0.5735275490816972, "grad_norm": 3.6519126892089844, "learning_rate": 9.885960888004616e-06, "loss": 1.1665, "step": 2264 }, { "epoch": 0.5737808739708676, "grad_norm": 3.3841686248779297, "learning_rate": 9.885782896738504e-06, "loss": 1.2329, "step": 2265 }, { "epoch": 0.574034198860038, "grad_norm": 3.453477621078491, "learning_rate": 9.8856047682819e-06, "loss": 1.0778, "step": 2266 }, { "epoch": 0.5742875237492083, "grad_norm": 3.317171335220337, "learning_rate": 9.885426502639803e-06, "loss": 1.1258, "step": 2267 }, { "epoch": 0.5745408486383787, "grad_norm": 3.9248321056365967, "learning_rate": 9.88524809981722e-06, "loss": 1.2575, "step": 2268 }, { "epoch": 0.5747941735275491, "grad_norm": 3.7087948322296143, "learning_rate": 9.88506955981916e-06, "loss": 1.0535, "step": 2269 }, { "epoch": 0.5750474984167194, "grad_norm": 3.3787403106689453, "learning_rate": 9.884890882650635e-06, "loss": 1.1185, "step": 2270 }, { "epoch": 0.5753008233058898, "grad_norm": 3.9463608264923096, "learning_rate": 9.884712068316665e-06, "loss": 1.2782, "step": 2271 }, { "epoch": 0.5755541481950601, "grad_norm": 3.6454262733459473, "learning_rate": 9.88453311682227e-06, "loss": 1.071, "step": 2272 }, { "epoch": 0.5758074730842305, "grad_norm": 3.7849459648132324, "learning_rate": 9.884354028172472e-06, "loss": 1.0822, "step": 2273 }, { "epoch": 0.5760607979734009, "grad_norm": 3.8799235820770264, "learning_rate": 9.884174802372303e-06, "loss": 1.1646, "step": 2274 }, { "epoch": 0.5763141228625712, "grad_norm": 3.6872293949127197, "learning_rate": 9.883995439426797e-06, "loss": 1.2012, "step": 2275 }, { "epoch": 0.5765674477517416, "grad_norm": 3.7613775730133057, "learning_rate": 9.883815939340985e-06, "loss": 1.1613, "step": 2276 }, { "epoch": 0.5768207726409119, "grad_norm": 3.8921945095062256, "learning_rate": 9.883636302119911e-06, "loss": 1.4246, "step": 2277 }, { "epoch": 0.5770740975300823, "grad_norm": 3.689805746078491, "learning_rate": 9.88345652776862e-06, "loss": 1.1549, "step": 2278 }, { "epoch": 0.5773274224192527, "grad_norm": 3.6792287826538086, "learning_rate": 9.883276616292157e-06, "loss": 1.1565, "step": 2279 }, { "epoch": 0.577580747308423, "grad_norm": 3.658616781234741, "learning_rate": 9.883096567695575e-06, "loss": 1.333, "step": 2280 }, { "epoch": 0.5778340721975934, "grad_norm": 3.7376174926757812, "learning_rate": 9.882916381983931e-06, "loss": 1.2714, "step": 2281 }, { "epoch": 0.5780873970867638, "grad_norm": 3.3016133308410645, "learning_rate": 9.882736059162283e-06, "loss": 1.043, "step": 2282 }, { "epoch": 0.5783407219759341, "grad_norm": 3.7895755767822266, "learning_rate": 9.882555599235694e-06, "loss": 1.227, "step": 2283 }, { "epoch": 0.5785940468651045, "grad_norm": 3.860079765319824, "learning_rate": 9.882375002209235e-06, "loss": 1.1046, "step": 2284 }, { "epoch": 0.5788473717542748, "grad_norm": 3.4295051097869873, "learning_rate": 9.882194268087973e-06, "loss": 1.044, "step": 2285 }, { "epoch": 0.5791006966434452, "grad_norm": 3.6967933177948, "learning_rate": 9.882013396876983e-06, "loss": 1.1913, "step": 2286 }, { "epoch": 0.5793540215326156, "grad_norm": 3.7292520999908447, "learning_rate": 9.881832388581345e-06, "loss": 1.2879, "step": 2287 }, { "epoch": 0.5796073464217859, "grad_norm": 3.2542896270751953, "learning_rate": 9.881651243206142e-06, "loss": 1.0997, "step": 2288 }, { "epoch": 0.5798606713109563, "grad_norm": 3.7655575275421143, "learning_rate": 9.88146996075646e-06, "loss": 1.2617, "step": 2289 }, { "epoch": 0.5801139962001267, "grad_norm": 3.836667060852051, "learning_rate": 9.881288541237389e-06, "loss": 1.2397, "step": 2290 }, { "epoch": 0.580367321089297, "grad_norm": 3.9770896434783936, "learning_rate": 9.881106984654025e-06, "loss": 1.227, "step": 2291 }, { "epoch": 0.5806206459784674, "grad_norm": 4.11192512512207, "learning_rate": 9.880925291011463e-06, "loss": 1.1607, "step": 2292 }, { "epoch": 0.5808739708676377, "grad_norm": 3.859837532043457, "learning_rate": 9.880743460314806e-06, "loss": 1.1955, "step": 2293 }, { "epoch": 0.5811272957568081, "grad_norm": 3.834381580352783, "learning_rate": 9.88056149256916e-06, "loss": 1.3425, "step": 2294 }, { "epoch": 0.5813806206459785, "grad_norm": 3.7988929748535156, "learning_rate": 9.880379387779637e-06, "loss": 1.1974, "step": 2295 }, { "epoch": 0.5816339455351488, "grad_norm": 3.9919984340667725, "learning_rate": 9.880197145951347e-06, "loss": 1.1925, "step": 2296 }, { "epoch": 0.5818872704243192, "grad_norm": 3.8683440685272217, "learning_rate": 9.880014767089407e-06, "loss": 1.2232, "step": 2297 }, { "epoch": 0.5821405953134895, "grad_norm": 3.4029898643493652, "learning_rate": 9.879832251198941e-06, "loss": 1.2023, "step": 2298 }, { "epoch": 0.5823939202026599, "grad_norm": 3.969633102416992, "learning_rate": 9.879649598285073e-06, "loss": 1.2331, "step": 2299 }, { "epoch": 0.5826472450918303, "grad_norm": 4.044780731201172, "learning_rate": 9.879466808352928e-06, "loss": 1.3385, "step": 2300 }, { "epoch": 0.5829005699810006, "grad_norm": 3.8353214263916016, "learning_rate": 9.879283881407645e-06, "loss": 1.2206, "step": 2301 }, { "epoch": 0.583153894870171, "grad_norm": 3.7752926349639893, "learning_rate": 9.879100817454357e-06, "loss": 1.2771, "step": 2302 }, { "epoch": 0.5834072197593414, "grad_norm": 3.4767112731933594, "learning_rate": 9.878917616498205e-06, "loss": 1.2505, "step": 2303 }, { "epoch": 0.5836605446485117, "grad_norm": 3.8391661643981934, "learning_rate": 9.878734278544332e-06, "loss": 1.3158, "step": 2304 }, { "epoch": 0.5839138695376821, "grad_norm": 3.89296555519104, "learning_rate": 9.878550803597888e-06, "loss": 1.1653, "step": 2305 }, { "epoch": 0.5841671944268524, "grad_norm": 3.8957955837249756, "learning_rate": 9.878367191664022e-06, "loss": 1.2505, "step": 2306 }, { "epoch": 0.5844205193160228, "grad_norm": 3.8498334884643555, "learning_rate": 9.878183442747892e-06, "loss": 1.2245, "step": 2307 }, { "epoch": 0.5846738442051932, "grad_norm": 3.7277896404266357, "learning_rate": 9.87799955685466e-06, "loss": 1.2585, "step": 2308 }, { "epoch": 0.5849271690943635, "grad_norm": 3.3101165294647217, "learning_rate": 9.877815533989484e-06, "loss": 1.0909, "step": 2309 }, { "epoch": 0.5851804939835339, "grad_norm": 3.9764208793640137, "learning_rate": 9.877631374157534e-06, "loss": 1.1819, "step": 2310 }, { "epoch": 0.5854338188727043, "grad_norm": 3.7967846393585205, "learning_rate": 9.877447077363983e-06, "loss": 1.2923, "step": 2311 }, { "epoch": 0.5856871437618746, "grad_norm": 3.9297142028808594, "learning_rate": 9.877262643614003e-06, "loss": 1.1912, "step": 2312 }, { "epoch": 0.585940468651045, "grad_norm": 3.7486352920532227, "learning_rate": 9.877078072912773e-06, "loss": 1.121, "step": 2313 }, { "epoch": 0.5861937935402153, "grad_norm": 3.9516937732696533, "learning_rate": 9.876893365265477e-06, "loss": 1.3066, "step": 2314 }, { "epoch": 0.5864471184293857, "grad_norm": 3.692528247833252, "learning_rate": 9.876708520677302e-06, "loss": 1.2561, "step": 2315 }, { "epoch": 0.5867004433185561, "grad_norm": 3.739330530166626, "learning_rate": 9.876523539153436e-06, "loss": 1.1427, "step": 2316 }, { "epoch": 0.5869537682077264, "grad_norm": 3.611651659011841, "learning_rate": 9.876338420699076e-06, "loss": 1.2367, "step": 2317 }, { "epoch": 0.5872070930968968, "grad_norm": 3.9292173385620117, "learning_rate": 9.876153165319417e-06, "loss": 1.3312, "step": 2318 }, { "epoch": 0.5874604179860672, "grad_norm": 3.6973323822021484, "learning_rate": 9.875967773019664e-06, "loss": 1.2351, "step": 2319 }, { "epoch": 0.5877137428752375, "grad_norm": 3.271484613418579, "learning_rate": 9.875782243805019e-06, "loss": 1.1391, "step": 2320 }, { "epoch": 0.5879670677644079, "grad_norm": 3.175833225250244, "learning_rate": 9.875596577680695e-06, "loss": 1.0855, "step": 2321 }, { "epoch": 0.5882203926535782, "grad_norm": 3.5260026454925537, "learning_rate": 9.875410774651903e-06, "loss": 1.1706, "step": 2322 }, { "epoch": 0.5884737175427486, "grad_norm": 3.352823257446289, "learning_rate": 9.875224834723865e-06, "loss": 1.2847, "step": 2323 }, { "epoch": 0.588727042431919, "grad_norm": 3.724398374557495, "learning_rate": 9.875038757901795e-06, "loss": 1.2112, "step": 2324 }, { "epoch": 0.5889803673210893, "grad_norm": 3.6846039295196533, "learning_rate": 9.874852544190923e-06, "loss": 1.1993, "step": 2325 }, { "epoch": 0.5892336922102597, "grad_norm": 3.6119465827941895, "learning_rate": 9.874666193596476e-06, "loss": 1.1627, "step": 2326 }, { "epoch": 0.58948701709943, "grad_norm": 3.6697070598602295, "learning_rate": 9.874479706123686e-06, "loss": 1.193, "step": 2327 }, { "epoch": 0.5897403419886004, "grad_norm": 3.8406481742858887, "learning_rate": 9.87429308177779e-06, "loss": 1.0613, "step": 2328 }, { "epoch": 0.5899936668777708, "grad_norm": 3.6351852416992188, "learning_rate": 9.874106320564033e-06, "loss": 1.1984, "step": 2329 }, { "epoch": 0.5902469917669411, "grad_norm": 4.594501495361328, "learning_rate": 9.87391942248765e-06, "loss": 1.139, "step": 2330 }, { "epoch": 0.5905003166561115, "grad_norm": 3.439969062805176, "learning_rate": 9.873732387553897e-06, "loss": 1.0793, "step": 2331 }, { "epoch": 0.5907536415452819, "grad_norm": 4.089970588684082, "learning_rate": 9.87354521576802e-06, "loss": 1.4473, "step": 2332 }, { "epoch": 0.5910069664344522, "grad_norm": 3.807089328765869, "learning_rate": 9.87335790713528e-06, "loss": 1.2171, "step": 2333 }, { "epoch": 0.5912602913236226, "grad_norm": 3.730450391769409, "learning_rate": 9.873170461660934e-06, "loss": 1.1716, "step": 2334 }, { "epoch": 0.5915136162127929, "grad_norm": 3.7394068241119385, "learning_rate": 9.872982879350243e-06, "loss": 1.3105, "step": 2335 }, { "epoch": 0.5917669411019633, "grad_norm": 3.285534143447876, "learning_rate": 9.872795160208478e-06, "loss": 1.1275, "step": 2336 }, { "epoch": 0.5920202659911337, "grad_norm": 3.6585047245025635, "learning_rate": 9.87260730424091e-06, "loss": 1.1234, "step": 2337 }, { "epoch": 0.592273590880304, "grad_norm": 3.874295473098755, "learning_rate": 9.872419311452811e-06, "loss": 1.1829, "step": 2338 }, { "epoch": 0.5925269157694744, "grad_norm": 3.504314422607422, "learning_rate": 9.872231181849461e-06, "loss": 1.1766, "step": 2339 }, { "epoch": 0.5927802406586448, "grad_norm": 3.646886110305786, "learning_rate": 9.872042915436144e-06, "loss": 1.2219, "step": 2340 }, { "epoch": 0.5930335655478151, "grad_norm": 4.184945106506348, "learning_rate": 9.871854512218145e-06, "loss": 1.2237, "step": 2341 }, { "epoch": 0.5932868904369855, "grad_norm": 3.251213312149048, "learning_rate": 9.871665972200754e-06, "loss": 0.9566, "step": 2342 }, { "epoch": 0.5935402153261558, "grad_norm": 3.8858537673950195, "learning_rate": 9.871477295389266e-06, "loss": 1.2412, "step": 2343 }, { "epoch": 0.5937935402153262, "grad_norm": 3.5049939155578613, "learning_rate": 9.87128848178898e-06, "loss": 1.0805, "step": 2344 }, { "epoch": 0.5940468651044966, "grad_norm": 4.045013904571533, "learning_rate": 9.871099531405195e-06, "loss": 1.3981, "step": 2345 }, { "epoch": 0.5943001899936668, "grad_norm": 3.804518461227417, "learning_rate": 9.870910444243219e-06, "loss": 1.1937, "step": 2346 }, { "epoch": 0.5945535148828373, "grad_norm": 3.7914228439331055, "learning_rate": 9.87072122030836e-06, "loss": 1.2388, "step": 2347 }, { "epoch": 0.5948068397720075, "grad_norm": 3.981391191482544, "learning_rate": 9.870531859605931e-06, "loss": 1.151, "step": 2348 }, { "epoch": 0.595060164661178, "grad_norm": 3.6058106422424316, "learning_rate": 9.870342362141252e-06, "loss": 1.0759, "step": 2349 }, { "epoch": 0.5953134895503484, "grad_norm": 3.439100742340088, "learning_rate": 9.870152727919642e-06, "loss": 1.196, "step": 2350 }, { "epoch": 0.5955668144395186, "grad_norm": 3.8293299674987793, "learning_rate": 9.869962956946426e-06, "loss": 1.2095, "step": 2351 }, { "epoch": 0.595820139328689, "grad_norm": 3.6827378273010254, "learning_rate": 9.869773049226932e-06, "loss": 1.0907, "step": 2352 }, { "epoch": 0.5960734642178595, "grad_norm": 3.8984475135803223, "learning_rate": 9.869583004766491e-06, "loss": 1.1624, "step": 2353 }, { "epoch": 0.5963267891070297, "grad_norm": 3.549410581588745, "learning_rate": 9.869392823570445e-06, "loss": 1.2173, "step": 2354 }, { "epoch": 0.5965801139962001, "grad_norm": 3.6839888095855713, "learning_rate": 9.869202505644128e-06, "loss": 1.179, "step": 2355 }, { "epoch": 0.5968334388853704, "grad_norm": 3.5096590518951416, "learning_rate": 9.869012050992889e-06, "loss": 1.1676, "step": 2356 }, { "epoch": 0.5970867637745408, "grad_norm": 3.367306709289551, "learning_rate": 9.868821459622071e-06, "loss": 1.0645, "step": 2357 }, { "epoch": 0.5973400886637112, "grad_norm": 3.5231404304504395, "learning_rate": 9.868630731537031e-06, "loss": 1.1651, "step": 2358 }, { "epoch": 0.5975934135528815, "grad_norm": 3.431152820587158, "learning_rate": 9.86843986674312e-06, "loss": 1.1637, "step": 2359 }, { "epoch": 0.5978467384420519, "grad_norm": 3.711580753326416, "learning_rate": 9.868248865245702e-06, "loss": 1.3629, "step": 2360 }, { "epoch": 0.5981000633312223, "grad_norm": 3.830181837081909, "learning_rate": 9.868057727050134e-06, "loss": 1.1988, "step": 2361 }, { "epoch": 0.5983533882203926, "grad_norm": 4.121151924133301, "learning_rate": 9.867866452161789e-06, "loss": 1.2656, "step": 2362 }, { "epoch": 0.598606713109563, "grad_norm": 3.4987690448760986, "learning_rate": 9.867675040586035e-06, "loss": 1.1397, "step": 2363 }, { "epoch": 0.5988600379987333, "grad_norm": 3.3809003829956055, "learning_rate": 9.867483492328246e-06, "loss": 1.1094, "step": 2364 }, { "epoch": 0.5991133628879037, "grad_norm": 3.502246618270874, "learning_rate": 9.867291807393803e-06, "loss": 1.1703, "step": 2365 }, { "epoch": 0.5993666877770741, "grad_norm": 3.4921200275421143, "learning_rate": 9.867099985788087e-06, "loss": 1.1978, "step": 2366 }, { "epoch": 0.5996200126662444, "grad_norm": 4.000209808349609, "learning_rate": 9.866908027516484e-06, "loss": 1.3362, "step": 2367 }, { "epoch": 0.5998733375554148, "grad_norm": 3.200626850128174, "learning_rate": 9.866715932584385e-06, "loss": 1.0594, "step": 2368 }, { "epoch": 0.6001266624445852, "grad_norm": 3.8519949913024902, "learning_rate": 9.866523700997183e-06, "loss": 1.2104, "step": 2369 }, { "epoch": 0.6003799873337555, "grad_norm": 3.4061481952667236, "learning_rate": 9.866331332760277e-06, "loss": 1.1132, "step": 2370 }, { "epoch": 0.6006333122229259, "grad_norm": 3.539203405380249, "learning_rate": 9.866138827879068e-06, "loss": 1.2231, "step": 2371 }, { "epoch": 0.6008866371120962, "grad_norm": 3.539919137954712, "learning_rate": 9.865946186358962e-06, "loss": 1.284, "step": 2372 }, { "epoch": 0.6011399620012666, "grad_norm": 3.8084702491760254, "learning_rate": 9.865753408205365e-06, "loss": 1.2749, "step": 2373 }, { "epoch": 0.601393286890437, "grad_norm": 3.732297420501709, "learning_rate": 9.865560493423695e-06, "loss": 1.0956, "step": 2374 }, { "epoch": 0.6016466117796073, "grad_norm": 3.882216691970825, "learning_rate": 9.865367442019366e-06, "loss": 1.386, "step": 2375 }, { "epoch": 0.6018999366687777, "grad_norm": 3.396533727645874, "learning_rate": 9.8651742539978e-06, "loss": 1.0325, "step": 2376 }, { "epoch": 0.602153261557948, "grad_norm": 4.000504016876221, "learning_rate": 9.86498092936442e-06, "loss": 1.3059, "step": 2377 }, { "epoch": 0.6024065864471184, "grad_norm": 3.0580947399139404, "learning_rate": 9.864787468124658e-06, "loss": 1.0508, "step": 2378 }, { "epoch": 0.6026599113362888, "grad_norm": 3.613420248031616, "learning_rate": 9.864593870283942e-06, "loss": 1.1817, "step": 2379 }, { "epoch": 0.6029132362254591, "grad_norm": 3.494577646255493, "learning_rate": 9.86440013584771e-06, "loss": 1.0924, "step": 2380 }, { "epoch": 0.6031665611146295, "grad_norm": 3.633143424987793, "learning_rate": 9.864206264821403e-06, "loss": 1.2451, "step": 2381 }, { "epoch": 0.6034198860037999, "grad_norm": 3.626067638397217, "learning_rate": 9.864012257210462e-06, "loss": 1.1005, "step": 2382 }, { "epoch": 0.6036732108929702, "grad_norm": 3.796259641647339, "learning_rate": 9.863818113020338e-06, "loss": 1.217, "step": 2383 }, { "epoch": 0.6039265357821406, "grad_norm": 3.874244451522827, "learning_rate": 9.86362383225648e-06, "loss": 1.2143, "step": 2384 }, { "epoch": 0.6041798606713109, "grad_norm": 3.2779505252838135, "learning_rate": 9.863429414924346e-06, "loss": 1.1315, "step": 2385 }, { "epoch": 0.6044331855604813, "grad_norm": 3.3644847869873047, "learning_rate": 9.863234861029393e-06, "loss": 1.1816, "step": 2386 }, { "epoch": 0.6046865104496517, "grad_norm": 3.528351306915283, "learning_rate": 9.863040170577084e-06, "loss": 1.1545, "step": 2387 }, { "epoch": 0.604939835338822, "grad_norm": 3.5509703159332275, "learning_rate": 9.862845343572885e-06, "loss": 1.1927, "step": 2388 }, { "epoch": 0.6051931602279924, "grad_norm": 3.670538902282715, "learning_rate": 9.86265038002227e-06, "loss": 1.1376, "step": 2389 }, { "epoch": 0.6054464851171628, "grad_norm": 3.4490549564361572, "learning_rate": 9.862455279930709e-06, "loss": 1.1018, "step": 2390 }, { "epoch": 0.6056998100063331, "grad_norm": 3.8076932430267334, "learning_rate": 9.862260043303685e-06, "loss": 1.2233, "step": 2391 }, { "epoch": 0.6059531348955035, "grad_norm": 3.9804649353027344, "learning_rate": 9.862064670146676e-06, "loss": 1.1906, "step": 2392 }, { "epoch": 0.6062064597846738, "grad_norm": 3.8885498046875, "learning_rate": 9.86186916046517e-06, "loss": 1.2377, "step": 2393 }, { "epoch": 0.6064597846738442, "grad_norm": 3.717961549758911, "learning_rate": 9.861673514264658e-06, "loss": 1.2273, "step": 2394 }, { "epoch": 0.6067131095630146, "grad_norm": 3.9650092124938965, "learning_rate": 9.861477731550631e-06, "loss": 1.1808, "step": 2395 }, { "epoch": 0.6069664344521849, "grad_norm": 3.5600218772888184, "learning_rate": 9.861281812328587e-06, "loss": 1.0836, "step": 2396 }, { "epoch": 0.6072197593413553, "grad_norm": 3.285839319229126, "learning_rate": 9.86108575660403e-06, "loss": 1.1076, "step": 2397 }, { "epoch": 0.6074730842305256, "grad_norm": 3.713930368423462, "learning_rate": 9.860889564382463e-06, "loss": 1.2391, "step": 2398 }, { "epoch": 0.607726409119696, "grad_norm": 3.4917590618133545, "learning_rate": 9.860693235669394e-06, "loss": 1.1042, "step": 2399 }, { "epoch": 0.6079797340088664, "grad_norm": 4.100406646728516, "learning_rate": 9.860496770470338e-06, "loss": 1.4159, "step": 2400 }, { "epoch": 0.6082330588980367, "grad_norm": 3.681727647781372, "learning_rate": 9.860300168790811e-06, "loss": 1.2346, "step": 2401 }, { "epoch": 0.6084863837872071, "grad_norm": 3.341277599334717, "learning_rate": 9.860103430636332e-06, "loss": 1.1499, "step": 2402 }, { "epoch": 0.6087397086763775, "grad_norm": 3.523472309112549, "learning_rate": 9.859906556012427e-06, "loss": 1.0684, "step": 2403 }, { "epoch": 0.6089930335655478, "grad_norm": 3.5669310092926025, "learning_rate": 9.859709544924624e-06, "loss": 1.2042, "step": 2404 }, { "epoch": 0.6092463584547182, "grad_norm": 3.4634294509887695, "learning_rate": 9.859512397378455e-06, "loss": 1.224, "step": 2405 }, { "epoch": 0.6094996833438885, "grad_norm": 3.555194139480591, "learning_rate": 9.859315113379455e-06, "loss": 1.2471, "step": 2406 }, { "epoch": 0.6097530082330589, "grad_norm": 3.5009043216705322, "learning_rate": 9.859117692933163e-06, "loss": 1.18, "step": 2407 }, { "epoch": 0.6100063331222293, "grad_norm": 3.4651875495910645, "learning_rate": 9.858920136045124e-06, "loss": 1.0705, "step": 2408 }, { "epoch": 0.6102596580113996, "grad_norm": 3.6112053394317627, "learning_rate": 9.858722442720885e-06, "loss": 1.1159, "step": 2409 }, { "epoch": 0.61051298290057, "grad_norm": 3.9805543422698975, "learning_rate": 9.858524612965997e-06, "loss": 1.3357, "step": 2410 }, { "epoch": 0.6107663077897404, "grad_norm": 3.884993076324463, "learning_rate": 9.858326646786017e-06, "loss": 1.2623, "step": 2411 }, { "epoch": 0.6110196326789107, "grad_norm": 3.940034866333008, "learning_rate": 9.858128544186498e-06, "loss": 1.2967, "step": 2412 }, { "epoch": 0.6112729575680811, "grad_norm": 3.6648826599121094, "learning_rate": 9.85793030517301e-06, "loss": 1.1501, "step": 2413 }, { "epoch": 0.6115262824572514, "grad_norm": 3.650735378265381, "learning_rate": 9.857731929751114e-06, "loss": 1.3022, "step": 2414 }, { "epoch": 0.6117796073464218, "grad_norm": 3.5040194988250732, "learning_rate": 9.857533417926382e-06, "loss": 1.1969, "step": 2415 }, { "epoch": 0.6120329322355922, "grad_norm": 3.9353554248809814, "learning_rate": 9.857334769704388e-06, "loss": 1.1229, "step": 2416 }, { "epoch": 0.6122862571247625, "grad_norm": 3.66317081451416, "learning_rate": 9.85713598509071e-06, "loss": 1.252, "step": 2417 }, { "epoch": 0.6125395820139329, "grad_norm": 3.60054349899292, "learning_rate": 9.856937064090931e-06, "loss": 1.1752, "step": 2418 }, { "epoch": 0.6127929069031033, "grad_norm": 3.854161262512207, "learning_rate": 9.856738006710636e-06, "loss": 1.1795, "step": 2419 }, { "epoch": 0.6130462317922736, "grad_norm": 3.4885568618774414, "learning_rate": 9.856538812955411e-06, "loss": 1.1409, "step": 2420 }, { "epoch": 0.613299556681444, "grad_norm": 3.6377522945404053, "learning_rate": 9.856339482830856e-06, "loss": 1.2984, "step": 2421 }, { "epoch": 0.6135528815706143, "grad_norm": 3.71474027633667, "learning_rate": 9.856140016342562e-06, "loss": 1.1658, "step": 2422 }, { "epoch": 0.6138062064597847, "grad_norm": 3.5078494548797607, "learning_rate": 9.855940413496134e-06, "loss": 1.0478, "step": 2423 }, { "epoch": 0.6140595313489551, "grad_norm": 3.417715072631836, "learning_rate": 9.855740674297173e-06, "loss": 1.0735, "step": 2424 }, { "epoch": 0.6143128562381254, "grad_norm": 4.048703193664551, "learning_rate": 9.855540798751292e-06, "loss": 1.2193, "step": 2425 }, { "epoch": 0.6145661811272958, "grad_norm": 4.096585750579834, "learning_rate": 9.8553407868641e-06, "loss": 1.4754, "step": 2426 }, { "epoch": 0.6148195060164661, "grad_norm": 3.781301975250244, "learning_rate": 9.855140638641213e-06, "loss": 1.1606, "step": 2427 }, { "epoch": 0.6150728309056365, "grad_norm": 4.041252136230469, "learning_rate": 9.854940354088253e-06, "loss": 1.2971, "step": 2428 }, { "epoch": 0.6153261557948069, "grad_norm": 3.318225383758545, "learning_rate": 9.854739933210846e-06, "loss": 1.0641, "step": 2429 }, { "epoch": 0.6155794806839772, "grad_norm": 3.967278003692627, "learning_rate": 9.854539376014614e-06, "loss": 1.3346, "step": 2430 }, { "epoch": 0.6158328055731476, "grad_norm": 3.648906707763672, "learning_rate": 9.854338682505193e-06, "loss": 1.2795, "step": 2431 }, { "epoch": 0.616086130462318, "grad_norm": 3.5962131023406982, "learning_rate": 9.854137852688216e-06, "loss": 1.1027, "step": 2432 }, { "epoch": 0.6163394553514883, "grad_norm": 3.73262619972229, "learning_rate": 9.853936886569324e-06, "loss": 1.2438, "step": 2433 }, { "epoch": 0.6165927802406587, "grad_norm": 3.7775135040283203, "learning_rate": 9.853735784154159e-06, "loss": 1.2827, "step": 2434 }, { "epoch": 0.616846105129829, "grad_norm": 4.0967302322387695, "learning_rate": 9.853534545448367e-06, "loss": 1.2889, "step": 2435 }, { "epoch": 0.6170994300189994, "grad_norm": 3.595000982284546, "learning_rate": 9.8533331704576e-06, "loss": 1.2437, "step": 2436 }, { "epoch": 0.6173527549081698, "grad_norm": 3.4501607418060303, "learning_rate": 9.853131659187513e-06, "loss": 1.1672, "step": 2437 }, { "epoch": 0.6176060797973401, "grad_norm": 3.6820144653320312, "learning_rate": 9.852930011643763e-06, "loss": 1.2082, "step": 2438 }, { "epoch": 0.6178594046865105, "grad_norm": 3.65936279296875, "learning_rate": 9.852728227832013e-06, "loss": 1.2964, "step": 2439 }, { "epoch": 0.6181127295756809, "grad_norm": 3.977670431137085, "learning_rate": 9.852526307757928e-06, "loss": 1.2392, "step": 2440 }, { "epoch": 0.6183660544648512, "grad_norm": 3.5032083988189697, "learning_rate": 9.85232425142718e-06, "loss": 1.1944, "step": 2441 }, { "epoch": 0.6186193793540216, "grad_norm": 3.7888448238372803, "learning_rate": 9.852122058845439e-06, "loss": 1.2319, "step": 2442 }, { "epoch": 0.6188727042431919, "grad_norm": 3.6973185539245605, "learning_rate": 9.851919730018386e-06, "loss": 1.2063, "step": 2443 }, { "epoch": 0.6191260291323623, "grad_norm": 3.6707706451416016, "learning_rate": 9.851717264951702e-06, "loss": 1.1027, "step": 2444 }, { "epoch": 0.6193793540215327, "grad_norm": 3.313488721847534, "learning_rate": 9.85151466365107e-06, "loss": 1.1288, "step": 2445 }, { "epoch": 0.619632678910703, "grad_norm": 3.5334272384643555, "learning_rate": 9.851311926122179e-06, "loss": 1.1914, "step": 2446 }, { "epoch": 0.6198860037998734, "grad_norm": 3.603609323501587, "learning_rate": 9.851109052370725e-06, "loss": 1.2569, "step": 2447 }, { "epoch": 0.6201393286890436, "grad_norm": 3.827209234237671, "learning_rate": 9.850906042402399e-06, "loss": 1.223, "step": 2448 }, { "epoch": 0.620392653578214, "grad_norm": 3.4155426025390625, "learning_rate": 9.850702896222908e-06, "loss": 1.1548, "step": 2449 }, { "epoch": 0.6206459784673845, "grad_norm": 3.9286139011383057, "learning_rate": 9.850499613837952e-06, "loss": 1.3792, "step": 2450 }, { "epoch": 0.6208993033565547, "grad_norm": 3.6877617835998535, "learning_rate": 9.850296195253241e-06, "loss": 1.1077, "step": 2451 }, { "epoch": 0.6211526282457251, "grad_norm": 3.57601261138916, "learning_rate": 9.850092640474485e-06, "loss": 1.2984, "step": 2452 }, { "epoch": 0.6214059531348956, "grad_norm": 3.9190783500671387, "learning_rate": 9.849888949507402e-06, "loss": 1.3625, "step": 2453 }, { "epoch": 0.6216592780240658, "grad_norm": 3.610003709793091, "learning_rate": 9.849685122357708e-06, "loss": 1.2229, "step": 2454 }, { "epoch": 0.6219126029132362, "grad_norm": 3.735644578933716, "learning_rate": 9.849481159031131e-06, "loss": 1.1831, "step": 2455 }, { "epoch": 0.6221659278024065, "grad_norm": 3.2859835624694824, "learning_rate": 9.849277059533395e-06, "loss": 1.0491, "step": 2456 }, { "epoch": 0.6224192526915769, "grad_norm": 3.327444076538086, "learning_rate": 9.849072823870232e-06, "loss": 1.1315, "step": 2457 }, { "epoch": 0.6226725775807473, "grad_norm": 3.6521694660186768, "learning_rate": 9.848868452047378e-06, "loss": 1.1709, "step": 2458 }, { "epoch": 0.6229259024699176, "grad_norm": 3.5883758068084717, "learning_rate": 9.84866394407057e-06, "loss": 1.1678, "step": 2459 }, { "epoch": 0.623179227359088, "grad_norm": 3.2457404136657715, "learning_rate": 9.84845929994555e-06, "loss": 1.0339, "step": 2460 }, { "epoch": 0.6234325522482584, "grad_norm": 3.464756727218628, "learning_rate": 9.848254519678064e-06, "loss": 1.1122, "step": 2461 }, { "epoch": 0.6236858771374287, "grad_norm": 3.6326985359191895, "learning_rate": 9.848049603273865e-06, "loss": 1.1961, "step": 2462 }, { "epoch": 0.6239392020265991, "grad_norm": 3.8263144493103027, "learning_rate": 9.847844550738706e-06, "loss": 1.1436, "step": 2463 }, { "epoch": 0.6241925269157694, "grad_norm": 3.556764602661133, "learning_rate": 9.847639362078344e-06, "loss": 1.1058, "step": 2464 }, { "epoch": 0.6244458518049398, "grad_norm": 3.9600555896759033, "learning_rate": 9.847434037298538e-06, "loss": 1.2834, "step": 2465 }, { "epoch": 0.6246991766941102, "grad_norm": 4.088186740875244, "learning_rate": 9.847228576405058e-06, "loss": 1.2617, "step": 2466 }, { "epoch": 0.6249525015832805, "grad_norm": 3.8825464248657227, "learning_rate": 9.847022979403671e-06, "loss": 1.1537, "step": 2467 }, { "epoch": 0.6252058264724509, "grad_norm": 3.252762794494629, "learning_rate": 9.84681724630015e-06, "loss": 1.1198, "step": 2468 }, { "epoch": 0.6254591513616212, "grad_norm": 3.690809488296509, "learning_rate": 9.846611377100274e-06, "loss": 1.2873, "step": 2469 }, { "epoch": 0.6257124762507916, "grad_norm": 3.537818193435669, "learning_rate": 9.846405371809821e-06, "loss": 1.2116, "step": 2470 }, { "epoch": 0.625965801139962, "grad_norm": 3.7453677654266357, "learning_rate": 9.846199230434576e-06, "loss": 1.2925, "step": 2471 }, { "epoch": 0.6262191260291323, "grad_norm": 3.2455244064331055, "learning_rate": 9.84599295298033e-06, "loss": 1.0063, "step": 2472 }, { "epoch": 0.6264724509183027, "grad_norm": 3.628256320953369, "learning_rate": 9.845786539452871e-06, "loss": 1.1957, "step": 2473 }, { "epoch": 0.6267257758074731, "grad_norm": 3.5005202293395996, "learning_rate": 9.845579989857998e-06, "loss": 1.1522, "step": 2474 }, { "epoch": 0.6269791006966434, "grad_norm": 3.5334384441375732, "learning_rate": 9.845373304201509e-06, "loss": 1.375, "step": 2475 }, { "epoch": 0.6272324255858138, "grad_norm": 3.3087430000305176, "learning_rate": 9.84516648248921e-06, "loss": 1.1753, "step": 2476 }, { "epoch": 0.6274857504749841, "grad_norm": 3.5373950004577637, "learning_rate": 9.844959524726907e-06, "loss": 1.1284, "step": 2477 }, { "epoch": 0.6277390753641545, "grad_norm": 3.617833375930786, "learning_rate": 9.84475243092041e-06, "loss": 1.0662, "step": 2478 }, { "epoch": 0.6279924002533249, "grad_norm": 3.5470521450042725, "learning_rate": 9.844545201075535e-06, "loss": 1.1782, "step": 2479 }, { "epoch": 0.6282457251424952, "grad_norm": 3.921121835708618, "learning_rate": 9.844337835198102e-06, "loss": 1.3281, "step": 2480 }, { "epoch": 0.6284990500316656, "grad_norm": 3.5617926120758057, "learning_rate": 9.844130333293932e-06, "loss": 1.2355, "step": 2481 }, { "epoch": 0.628752374920836, "grad_norm": 3.533834218978882, "learning_rate": 9.843922695368855e-06, "loss": 1.1639, "step": 2482 }, { "epoch": 0.6290056998100063, "grad_norm": 3.3362550735473633, "learning_rate": 9.843714921428698e-06, "loss": 1.2091, "step": 2483 }, { "epoch": 0.6292590246991767, "grad_norm": 3.5323235988616943, "learning_rate": 9.843507011479296e-06, "loss": 1.356, "step": 2484 }, { "epoch": 0.629512349588347, "grad_norm": 3.518071174621582, "learning_rate": 9.843298965526486e-06, "loss": 1.1021, "step": 2485 }, { "epoch": 0.6297656744775174, "grad_norm": 3.7014873027801514, "learning_rate": 9.843090783576112e-06, "loss": 1.3844, "step": 2486 }, { "epoch": 0.6300189993666878, "grad_norm": 3.5969622135162354, "learning_rate": 9.842882465634019e-06, "loss": 1.1927, "step": 2487 }, { "epoch": 0.6302723242558581, "grad_norm": 3.5788826942443848, "learning_rate": 9.842674011706057e-06, "loss": 1.2414, "step": 2488 }, { "epoch": 0.6305256491450285, "grad_norm": 4.032215595245361, "learning_rate": 9.842465421798074e-06, "loss": 1.2238, "step": 2489 }, { "epoch": 0.6307789740341989, "grad_norm": 3.575622797012329, "learning_rate": 9.842256695915937e-06, "loss": 1.3002, "step": 2490 }, { "epoch": 0.6310322989233692, "grad_norm": 3.4500770568847656, "learning_rate": 9.842047834065498e-06, "loss": 1.1602, "step": 2491 }, { "epoch": 0.6312856238125396, "grad_norm": 3.9447834491729736, "learning_rate": 9.841838836252627e-06, "loss": 1.1706, "step": 2492 }, { "epoch": 0.6315389487017099, "grad_norm": 3.377351760864258, "learning_rate": 9.84162970248319e-06, "loss": 1.2073, "step": 2493 }, { "epoch": 0.6317922735908803, "grad_norm": 3.500148296356201, "learning_rate": 9.84142043276306e-06, "loss": 1.2556, "step": 2494 }, { "epoch": 0.6320455984800507, "grad_norm": 3.5882997512817383, "learning_rate": 9.841211027098114e-06, "loss": 1.1664, "step": 2495 }, { "epoch": 0.632298923369221, "grad_norm": 3.531613826751709, "learning_rate": 9.84100148549423e-06, "loss": 1.2383, "step": 2496 }, { "epoch": 0.6325522482583914, "grad_norm": 3.6397781372070312, "learning_rate": 9.840791807957294e-06, "loss": 1.2755, "step": 2497 }, { "epoch": 0.6328055731475617, "grad_norm": 3.521218776702881, "learning_rate": 9.840581994493193e-06, "loss": 1.1873, "step": 2498 }, { "epoch": 0.6330588980367321, "grad_norm": 3.705307960510254, "learning_rate": 9.840372045107818e-06, "loss": 1.2026, "step": 2499 }, { "epoch": 0.6333122229259025, "grad_norm": 3.428232192993164, "learning_rate": 9.840161959807064e-06, "loss": 1.1304, "step": 2500 }, { "epoch": 0.6333122229259025, "eval_loss": 1.2158032655715942, "eval_runtime": 12.2773, "eval_samples_per_second": 32.58, "eval_steps_per_second": 4.073, "step": 2500 }, { "epoch": 0.6335655478150728, "grad_norm": 3.6344573497772217, "learning_rate": 9.839951738596831e-06, "loss": 1.125, "step": 2501 }, { "epoch": 0.6338188727042432, "grad_norm": 3.926832437515259, "learning_rate": 9.839741381483021e-06, "loss": 1.1736, "step": 2502 }, { "epoch": 0.6340721975934136, "grad_norm": 3.35507869720459, "learning_rate": 9.839530888471542e-06, "loss": 1.0818, "step": 2503 }, { "epoch": 0.6343255224825839, "grad_norm": 3.9087727069854736, "learning_rate": 9.839320259568304e-06, "loss": 1.2332, "step": 2504 }, { "epoch": 0.6345788473717543, "grad_norm": 3.944812536239624, "learning_rate": 9.839109494779222e-06, "loss": 1.3249, "step": 2505 }, { "epoch": 0.6348321722609246, "grad_norm": 3.5641653537750244, "learning_rate": 9.838898594110211e-06, "loss": 1.2282, "step": 2506 }, { "epoch": 0.635085497150095, "grad_norm": 3.5591001510620117, "learning_rate": 9.838687557567199e-06, "loss": 1.2849, "step": 2507 }, { "epoch": 0.6353388220392654, "grad_norm": 4.062427520751953, "learning_rate": 9.838476385156103e-06, "loss": 1.2333, "step": 2508 }, { "epoch": 0.6355921469284357, "grad_norm": 3.5046918392181396, "learning_rate": 9.838265076882863e-06, "loss": 1.2505, "step": 2509 }, { "epoch": 0.6358454718176061, "grad_norm": 3.372955083847046, "learning_rate": 9.838053632753403e-06, "loss": 1.2336, "step": 2510 }, { "epoch": 0.6360987967067765, "grad_norm": 3.606010913848877, "learning_rate": 9.837842052773667e-06, "loss": 1.2544, "step": 2511 }, { "epoch": 0.6363521215959468, "grad_norm": 4.002732753753662, "learning_rate": 9.837630336949592e-06, "loss": 1.2273, "step": 2512 }, { "epoch": 0.6366054464851172, "grad_norm": 3.352332353591919, "learning_rate": 9.837418485287126e-06, "loss": 1.0451, "step": 2513 }, { "epoch": 0.6368587713742875, "grad_norm": 3.3298330307006836, "learning_rate": 9.837206497792216e-06, "loss": 1.0997, "step": 2514 }, { "epoch": 0.6371120962634579, "grad_norm": 3.6441266536712646, "learning_rate": 9.836994374470814e-06, "loss": 1.219, "step": 2515 }, { "epoch": 0.6373654211526283, "grad_norm": 3.3209352493286133, "learning_rate": 9.836782115328878e-06, "loss": 1.0118, "step": 2516 }, { "epoch": 0.6376187460417986, "grad_norm": 3.3625969886779785, "learning_rate": 9.836569720372367e-06, "loss": 1.1919, "step": 2517 }, { "epoch": 0.637872070930969, "grad_norm": 3.6182706356048584, "learning_rate": 9.836357189607243e-06, "loss": 1.2894, "step": 2518 }, { "epoch": 0.6381253958201393, "grad_norm": 3.827185869216919, "learning_rate": 9.836144523039476e-06, "loss": 1.2509, "step": 2519 }, { "epoch": 0.6383787207093097, "grad_norm": 3.77411150932312, "learning_rate": 9.83593172067504e-06, "loss": 1.2143, "step": 2520 }, { "epoch": 0.6386320455984801, "grad_norm": 3.397799491882324, "learning_rate": 9.835718782519905e-06, "loss": 1.1697, "step": 2521 }, { "epoch": 0.6388853704876504, "grad_norm": 4.167684078216553, "learning_rate": 9.835505708580055e-06, "loss": 1.3649, "step": 2522 }, { "epoch": 0.6391386953768208, "grad_norm": 3.54966402053833, "learning_rate": 9.83529249886147e-06, "loss": 1.1086, "step": 2523 }, { "epoch": 0.6393920202659912, "grad_norm": 3.8356873989105225, "learning_rate": 9.83507915337014e-06, "loss": 1.2253, "step": 2524 }, { "epoch": 0.6396453451551615, "grad_norm": 3.2942631244659424, "learning_rate": 9.83486567211205e-06, "loss": 1.0629, "step": 2525 }, { "epoch": 0.6398986700443319, "grad_norm": 3.718168258666992, "learning_rate": 9.8346520550932e-06, "loss": 1.2178, "step": 2526 }, { "epoch": 0.6401519949335022, "grad_norm": 3.384877920150757, "learning_rate": 9.834438302319585e-06, "loss": 1.0572, "step": 2527 }, { "epoch": 0.6404053198226726, "grad_norm": 3.6040239334106445, "learning_rate": 9.834224413797208e-06, "loss": 1.2056, "step": 2528 }, { "epoch": 0.640658644711843, "grad_norm": 3.670182943344116, "learning_rate": 9.834010389532075e-06, "loss": 1.2164, "step": 2529 }, { "epoch": 0.6409119696010133, "grad_norm": 3.1873552799224854, "learning_rate": 9.833796229530199e-06, "loss": 1.0307, "step": 2530 }, { "epoch": 0.6411652944901837, "grad_norm": 3.485616445541382, "learning_rate": 9.833581933797586e-06, "loss": 1.1692, "step": 2531 }, { "epoch": 0.6414186193793541, "grad_norm": 3.664083957672119, "learning_rate": 9.833367502340261e-06, "loss": 1.1873, "step": 2532 }, { "epoch": 0.6416719442685244, "grad_norm": 3.7422778606414795, "learning_rate": 9.833152935164242e-06, "loss": 1.2442, "step": 2533 }, { "epoch": 0.6419252691576948, "grad_norm": 3.2830452919006348, "learning_rate": 9.832938232275551e-06, "loss": 1.1344, "step": 2534 }, { "epoch": 0.6421785940468651, "grad_norm": 3.661639451980591, "learning_rate": 9.832723393680222e-06, "loss": 1.3233, "step": 2535 }, { "epoch": 0.6424319189360355, "grad_norm": 3.579019784927368, "learning_rate": 9.832508419384284e-06, "loss": 1.2245, "step": 2536 }, { "epoch": 0.6426852438252059, "grad_norm": 3.857731580734253, "learning_rate": 9.832293309393775e-06, "loss": 1.3119, "step": 2537 }, { "epoch": 0.6429385687143762, "grad_norm": 3.6141717433929443, "learning_rate": 9.832078063714733e-06, "loss": 1.1423, "step": 2538 }, { "epoch": 0.6431918936035466, "grad_norm": 3.8558592796325684, "learning_rate": 9.831862682353206e-06, "loss": 1.2059, "step": 2539 }, { "epoch": 0.643445218492717, "grad_norm": 3.6421942710876465, "learning_rate": 9.831647165315238e-06, "loss": 1.1619, "step": 2540 }, { "epoch": 0.6436985433818873, "grad_norm": 3.2293131351470947, "learning_rate": 9.831431512606883e-06, "loss": 1.1146, "step": 2541 }, { "epoch": 0.6439518682710577, "grad_norm": 3.673706531524658, "learning_rate": 9.831215724234194e-06, "loss": 1.2253, "step": 2542 }, { "epoch": 0.644205193160228, "grad_norm": 3.3997058868408203, "learning_rate": 9.830999800203233e-06, "loss": 1.2331, "step": 2543 }, { "epoch": 0.6444585180493984, "grad_norm": 3.706429958343506, "learning_rate": 9.830783740520063e-06, "loss": 1.2251, "step": 2544 }, { "epoch": 0.6447118429385688, "grad_norm": 3.5596835613250732, "learning_rate": 9.830567545190747e-06, "loss": 1.22, "step": 2545 }, { "epoch": 0.644965167827739, "grad_norm": 3.6280670166015625, "learning_rate": 9.830351214221359e-06, "loss": 1.2266, "step": 2546 }, { "epoch": 0.6452184927169095, "grad_norm": 3.31756329536438, "learning_rate": 9.830134747617975e-06, "loss": 1.0577, "step": 2547 }, { "epoch": 0.6454718176060797, "grad_norm": 3.772573709487915, "learning_rate": 9.829918145386668e-06, "loss": 1.2135, "step": 2548 }, { "epoch": 0.6457251424952501, "grad_norm": 3.4784507751464844, "learning_rate": 9.829701407533526e-06, "loss": 1.1664, "step": 2549 }, { "epoch": 0.6459784673844206, "grad_norm": 3.6505179405212402, "learning_rate": 9.829484534064628e-06, "loss": 1.1425, "step": 2550 }, { "epoch": 0.6462317922735908, "grad_norm": 3.907944440841675, "learning_rate": 9.829267524986071e-06, "loss": 1.3133, "step": 2551 }, { "epoch": 0.6464851171627612, "grad_norm": 4.5512261390686035, "learning_rate": 9.829050380303945e-06, "loss": 1.2907, "step": 2552 }, { "epoch": 0.6467384420519317, "grad_norm": 3.2760396003723145, "learning_rate": 9.828833100024347e-06, "loss": 1.0209, "step": 2553 }, { "epoch": 0.6469917669411019, "grad_norm": 3.457796096801758, "learning_rate": 9.828615684153379e-06, "loss": 1.2081, "step": 2554 }, { "epoch": 0.6472450918302723, "grad_norm": 3.9074411392211914, "learning_rate": 9.828398132697146e-06, "loss": 1.263, "step": 2555 }, { "epoch": 0.6474984167194426, "grad_norm": 3.8507144451141357, "learning_rate": 9.828180445661754e-06, "loss": 1.2712, "step": 2556 }, { "epoch": 0.647751741608613, "grad_norm": 3.44443678855896, "learning_rate": 9.82796262305332e-06, "loss": 1.1306, "step": 2557 }, { "epoch": 0.6480050664977834, "grad_norm": 3.4174551963806152, "learning_rate": 9.827744664877959e-06, "loss": 1.2463, "step": 2558 }, { "epoch": 0.6482583913869537, "grad_norm": 4.097899436950684, "learning_rate": 9.82752657114179e-06, "loss": 1.3898, "step": 2559 }, { "epoch": 0.6485117162761241, "grad_norm": 3.9730896949768066, "learning_rate": 9.827308341850936e-06, "loss": 1.2523, "step": 2560 }, { "epoch": 0.6487650411652945, "grad_norm": 3.5033178329467773, "learning_rate": 9.827089977011528e-06, "loss": 1.1796, "step": 2561 }, { "epoch": 0.6490183660544648, "grad_norm": 3.499201536178589, "learning_rate": 9.826871476629696e-06, "loss": 1.16, "step": 2562 }, { "epoch": 0.6492716909436352, "grad_norm": 3.558795690536499, "learning_rate": 9.826652840711573e-06, "loss": 1.1215, "step": 2563 }, { "epoch": 0.6495250158328055, "grad_norm": 3.6329128742218018, "learning_rate": 9.826434069263301e-06, "loss": 1.2845, "step": 2564 }, { "epoch": 0.6497783407219759, "grad_norm": 3.5413899421691895, "learning_rate": 9.826215162291023e-06, "loss": 1.1394, "step": 2565 }, { "epoch": 0.6500316656111463, "grad_norm": 3.2898104190826416, "learning_rate": 9.825996119800887e-06, "loss": 1.075, "step": 2566 }, { "epoch": 0.6502849905003166, "grad_norm": 3.540954113006592, "learning_rate": 9.825776941799038e-06, "loss": 1.1711, "step": 2567 }, { "epoch": 0.650538315389487, "grad_norm": 3.7790684700012207, "learning_rate": 9.825557628291637e-06, "loss": 1.1565, "step": 2568 }, { "epoch": 0.6507916402786573, "grad_norm": 3.7246615886688232, "learning_rate": 9.82533817928484e-06, "loss": 1.2207, "step": 2569 }, { "epoch": 0.6510449651678277, "grad_norm": 3.244691848754883, "learning_rate": 9.825118594784805e-06, "loss": 1.1929, "step": 2570 }, { "epoch": 0.6512982900569981, "grad_norm": 3.6630892753601074, "learning_rate": 9.824898874797704e-06, "loss": 1.136, "step": 2571 }, { "epoch": 0.6515516149461684, "grad_norm": 3.627481698989868, "learning_rate": 9.824679019329704e-06, "loss": 1.2029, "step": 2572 }, { "epoch": 0.6518049398353388, "grad_norm": 4.051472187042236, "learning_rate": 9.824459028386976e-06, "loss": 1.2067, "step": 2573 }, { "epoch": 0.6520582647245092, "grad_norm": 4.0756001472473145, "learning_rate": 9.824238901975703e-06, "loss": 1.3799, "step": 2574 }, { "epoch": 0.6523115896136795, "grad_norm": 3.7352709770202637, "learning_rate": 9.82401864010206e-06, "loss": 1.123, "step": 2575 }, { "epoch": 0.6525649145028499, "grad_norm": 3.5075106620788574, "learning_rate": 9.823798242772235e-06, "loss": 1.0726, "step": 2576 }, { "epoch": 0.6528182393920202, "grad_norm": 3.140592098236084, "learning_rate": 9.823577709992417e-06, "loss": 1.0517, "step": 2577 }, { "epoch": 0.6530715642811906, "grad_norm": 3.906625747680664, "learning_rate": 9.823357041768798e-06, "loss": 1.2148, "step": 2578 }, { "epoch": 0.653324889170361, "grad_norm": 3.9738869667053223, "learning_rate": 9.823136238107573e-06, "loss": 1.3406, "step": 2579 }, { "epoch": 0.6535782140595313, "grad_norm": 3.5039169788360596, "learning_rate": 9.822915299014941e-06, "loss": 1.2235, "step": 2580 }, { "epoch": 0.6538315389487017, "grad_norm": 3.6179444789886475, "learning_rate": 9.822694224497111e-06, "loss": 1.3082, "step": 2581 }, { "epoch": 0.6540848638378721, "grad_norm": 3.5878806114196777, "learning_rate": 9.822473014560285e-06, "loss": 1.1701, "step": 2582 }, { "epoch": 0.6543381887270424, "grad_norm": 3.4601778984069824, "learning_rate": 9.822251669210679e-06, "loss": 1.0544, "step": 2583 }, { "epoch": 0.6545915136162128, "grad_norm": 3.5456607341766357, "learning_rate": 9.822030188454506e-06, "loss": 1.2495, "step": 2584 }, { "epoch": 0.6548448385053831, "grad_norm": 4.021832466125488, "learning_rate": 9.821808572297984e-06, "loss": 1.4227, "step": 2585 }, { "epoch": 0.6550981633945535, "grad_norm": 3.634138345718384, "learning_rate": 9.821586820747337e-06, "loss": 1.1106, "step": 2586 }, { "epoch": 0.6553514882837239, "grad_norm": 3.8235223293304443, "learning_rate": 9.821364933808793e-06, "loss": 1.2882, "step": 2587 }, { "epoch": 0.6556048131728942, "grad_norm": 3.4546220302581787, "learning_rate": 9.821142911488582e-06, "loss": 1.1963, "step": 2588 }, { "epoch": 0.6558581380620646, "grad_norm": 3.7855916023254395, "learning_rate": 9.820920753792935e-06, "loss": 1.2486, "step": 2589 }, { "epoch": 0.656111462951235, "grad_norm": 3.4730772972106934, "learning_rate": 9.820698460728095e-06, "loss": 1.1228, "step": 2590 }, { "epoch": 0.6563647878404053, "grad_norm": 3.582882881164551, "learning_rate": 9.820476032300302e-06, "loss": 1.2618, "step": 2591 }, { "epoch": 0.6566181127295757, "grad_norm": 3.7244527339935303, "learning_rate": 9.8202534685158e-06, "loss": 1.2207, "step": 2592 }, { "epoch": 0.656871437618746, "grad_norm": 3.8072774410247803, "learning_rate": 9.820030769380839e-06, "loss": 1.0822, "step": 2593 }, { "epoch": 0.6571247625079164, "grad_norm": 3.689227819442749, "learning_rate": 9.819807934901673e-06, "loss": 1.2445, "step": 2594 }, { "epoch": 0.6573780873970868, "grad_norm": 3.8133480548858643, "learning_rate": 9.81958496508456e-06, "loss": 1.2767, "step": 2595 }, { "epoch": 0.6576314122862571, "grad_norm": 3.3837785720825195, "learning_rate": 9.81936185993576e-06, "loss": 1.128, "step": 2596 }, { "epoch": 0.6578847371754275, "grad_norm": 3.8632192611694336, "learning_rate": 9.819138619461538e-06, "loss": 1.281, "step": 2597 }, { "epoch": 0.6581380620645978, "grad_norm": 3.616199254989624, "learning_rate": 9.818915243668163e-06, "loss": 1.1237, "step": 2598 }, { "epoch": 0.6583913869537682, "grad_norm": 3.4493207931518555, "learning_rate": 9.818691732561904e-06, "loss": 1.1529, "step": 2599 }, { "epoch": 0.6586447118429386, "grad_norm": 3.936377763748169, "learning_rate": 9.818468086149041e-06, "loss": 1.2162, "step": 2600 }, { "epoch": 0.6588980367321089, "grad_norm": 3.9524548053741455, "learning_rate": 9.818244304435853e-06, "loss": 1.4033, "step": 2601 }, { "epoch": 0.6591513616212793, "grad_norm": 3.612659215927124, "learning_rate": 9.818020387428621e-06, "loss": 1.1037, "step": 2602 }, { "epoch": 0.6594046865104497, "grad_norm": 3.4523441791534424, "learning_rate": 9.817796335133637e-06, "loss": 1.1582, "step": 2603 }, { "epoch": 0.65965801139962, "grad_norm": 3.481605052947998, "learning_rate": 9.817572147557189e-06, "loss": 1.1846, "step": 2604 }, { "epoch": 0.6599113362887904, "grad_norm": 3.3964247703552246, "learning_rate": 9.817347824705572e-06, "loss": 1.2143, "step": 2605 }, { "epoch": 0.6601646611779607, "grad_norm": 3.5022902488708496, "learning_rate": 9.817123366585088e-06, "loss": 1.1489, "step": 2606 }, { "epoch": 0.6604179860671311, "grad_norm": 3.4793701171875, "learning_rate": 9.816898773202037e-06, "loss": 1.2056, "step": 2607 }, { "epoch": 0.6606713109563015, "grad_norm": 3.454171657562256, "learning_rate": 9.816674044562724e-06, "loss": 1.1265, "step": 2608 }, { "epoch": 0.6609246358454718, "grad_norm": 3.443490982055664, "learning_rate": 9.816449180673465e-06, "loss": 1.2059, "step": 2609 }, { "epoch": 0.6611779607346422, "grad_norm": 3.181407928466797, "learning_rate": 9.816224181540567e-06, "loss": 1.0797, "step": 2610 }, { "epoch": 0.6614312856238126, "grad_norm": 3.7836201190948486, "learning_rate": 9.815999047170353e-06, "loss": 1.2255, "step": 2611 }, { "epoch": 0.6616846105129829, "grad_norm": 3.772714376449585, "learning_rate": 9.815773777569142e-06, "loss": 1.1066, "step": 2612 }, { "epoch": 0.6619379354021533, "grad_norm": 3.495640754699707, "learning_rate": 9.81554837274326e-06, "loss": 1.3562, "step": 2613 }, { "epoch": 0.6621912602913236, "grad_norm": 3.265284776687622, "learning_rate": 9.815322832699036e-06, "loss": 1.1142, "step": 2614 }, { "epoch": 0.662444585180494, "grad_norm": 3.3903627395629883, "learning_rate": 9.815097157442805e-06, "loss": 1.1176, "step": 2615 }, { "epoch": 0.6626979100696644, "grad_norm": 3.4801971912384033, "learning_rate": 9.814871346980903e-06, "loss": 1.3048, "step": 2616 }, { "epoch": 0.6629512349588347, "grad_norm": 3.5183205604553223, "learning_rate": 9.814645401319671e-06, "loss": 1.1921, "step": 2617 }, { "epoch": 0.6632045598480051, "grad_norm": 3.0776429176330566, "learning_rate": 9.814419320465452e-06, "loss": 1.1038, "step": 2618 }, { "epoch": 0.6634578847371754, "grad_norm": 3.521026849746704, "learning_rate": 9.814193104424593e-06, "loss": 1.117, "step": 2619 }, { "epoch": 0.6637112096263458, "grad_norm": 3.6802165508270264, "learning_rate": 9.813966753203448e-06, "loss": 1.2133, "step": 2620 }, { "epoch": 0.6639645345155162, "grad_norm": 3.3408403396606445, "learning_rate": 9.813740266808375e-06, "loss": 1.211, "step": 2621 }, { "epoch": 0.6642178594046865, "grad_norm": 3.791865110397339, "learning_rate": 9.81351364524573e-06, "loss": 1.2252, "step": 2622 }, { "epoch": 0.6644711842938569, "grad_norm": 3.5978035926818848, "learning_rate": 9.813286888521878e-06, "loss": 1.2328, "step": 2623 }, { "epoch": 0.6647245091830273, "grad_norm": 3.6417248249053955, "learning_rate": 9.813059996643186e-06, "loss": 1.2067, "step": 2624 }, { "epoch": 0.6649778340721976, "grad_norm": 3.5736918449401855, "learning_rate": 9.812832969616026e-06, "loss": 1.1599, "step": 2625 }, { "epoch": 0.665231158961368, "grad_norm": 3.390369176864624, "learning_rate": 9.812605807446771e-06, "loss": 1.2273, "step": 2626 }, { "epoch": 0.6654844838505383, "grad_norm": 3.5803728103637695, "learning_rate": 9.8123785101418e-06, "loss": 1.1763, "step": 2627 }, { "epoch": 0.6657378087397087, "grad_norm": 3.7515909671783447, "learning_rate": 9.812151077707497e-06, "loss": 1.2424, "step": 2628 }, { "epoch": 0.6659911336288791, "grad_norm": 3.962169647216797, "learning_rate": 9.811923510150248e-06, "loss": 1.1588, "step": 2629 }, { "epoch": 0.6662444585180494, "grad_norm": 3.5884110927581787, "learning_rate": 9.81169580747644e-06, "loss": 1.1604, "step": 2630 }, { "epoch": 0.6664977834072198, "grad_norm": 3.8470706939697266, "learning_rate": 9.81146796969247e-06, "loss": 1.146, "step": 2631 }, { "epoch": 0.6667511082963902, "grad_norm": 3.6632542610168457, "learning_rate": 9.811239996804734e-06, "loss": 1.1298, "step": 2632 }, { "epoch": 0.6670044331855605, "grad_norm": 3.558820962905884, "learning_rate": 9.811011888819632e-06, "loss": 1.2359, "step": 2633 }, { "epoch": 0.6672577580747309, "grad_norm": 3.3591184616088867, "learning_rate": 9.810783645743573e-06, "loss": 1.203, "step": 2634 }, { "epoch": 0.6675110829639012, "grad_norm": 4.192767143249512, "learning_rate": 9.810555267582964e-06, "loss": 1.3851, "step": 2635 }, { "epoch": 0.6677644078530716, "grad_norm": 3.6012911796569824, "learning_rate": 9.810326754344217e-06, "loss": 1.1344, "step": 2636 }, { "epoch": 0.668017732742242, "grad_norm": 3.674480676651001, "learning_rate": 9.81009810603375e-06, "loss": 1.077, "step": 2637 }, { "epoch": 0.6682710576314123, "grad_norm": 3.7993180751800537, "learning_rate": 9.809869322657983e-06, "loss": 1.2632, "step": 2638 }, { "epoch": 0.6685243825205827, "grad_norm": 3.856072425842285, "learning_rate": 9.809640404223338e-06, "loss": 1.2595, "step": 2639 }, { "epoch": 0.668777707409753, "grad_norm": 3.7589426040649414, "learning_rate": 9.809411350736246e-06, "loss": 1.2555, "step": 2640 }, { "epoch": 0.6690310322989234, "grad_norm": 3.743173837661743, "learning_rate": 9.809182162203137e-06, "loss": 1.1436, "step": 2641 }, { "epoch": 0.6692843571880938, "grad_norm": 3.676936149597168, "learning_rate": 9.808952838630447e-06, "loss": 1.1748, "step": 2642 }, { "epoch": 0.669537682077264, "grad_norm": 3.525935173034668, "learning_rate": 9.808723380024614e-06, "loss": 1.1007, "step": 2643 }, { "epoch": 0.6697910069664345, "grad_norm": 3.3500614166259766, "learning_rate": 9.808493786392083e-06, "loss": 1.1616, "step": 2644 }, { "epoch": 0.6700443318556049, "grad_norm": 3.467604875564575, "learning_rate": 9.8082640577393e-06, "loss": 1.0956, "step": 2645 }, { "epoch": 0.6702976567447751, "grad_norm": 3.592313051223755, "learning_rate": 9.808034194072717e-06, "loss": 1.2814, "step": 2646 }, { "epoch": 0.6705509816339456, "grad_norm": 3.6698861122131348, "learning_rate": 9.807804195398785e-06, "loss": 1.1094, "step": 2647 }, { "epoch": 0.6708043065231158, "grad_norm": 3.490830898284912, "learning_rate": 9.807574061723966e-06, "loss": 1.1319, "step": 2648 }, { "epoch": 0.6710576314122862, "grad_norm": 3.7101504802703857, "learning_rate": 9.80734379305472e-06, "loss": 1.1763, "step": 2649 }, { "epoch": 0.6713109563014567, "grad_norm": 3.7675065994262695, "learning_rate": 9.807113389397514e-06, "loss": 1.2364, "step": 2650 }, { "epoch": 0.6715642811906269, "grad_norm": 3.990852117538452, "learning_rate": 9.806882850758816e-06, "loss": 1.276, "step": 2651 }, { "epoch": 0.6718176060797973, "grad_norm": 3.472147226333618, "learning_rate": 9.806652177145102e-06, "loss": 1.146, "step": 2652 }, { "epoch": 0.6720709309689678, "grad_norm": 3.1356961727142334, "learning_rate": 9.806421368562847e-06, "loss": 1.046, "step": 2653 }, { "epoch": 0.672324255858138, "grad_norm": 3.3926939964294434, "learning_rate": 9.806190425018531e-06, "loss": 1.07, "step": 2654 }, { "epoch": 0.6725775807473084, "grad_norm": 3.6589784622192383, "learning_rate": 9.805959346518643e-06, "loss": 1.1224, "step": 2655 }, { "epoch": 0.6728309056364787, "grad_norm": 3.628971815109253, "learning_rate": 9.805728133069667e-06, "loss": 1.161, "step": 2656 }, { "epoch": 0.6730842305256491, "grad_norm": 3.4346976280212402, "learning_rate": 9.805496784678099e-06, "loss": 1.1655, "step": 2657 }, { "epoch": 0.6733375554148195, "grad_norm": 3.8982088565826416, "learning_rate": 9.805265301350433e-06, "loss": 1.1965, "step": 2658 }, { "epoch": 0.6735908803039898, "grad_norm": 3.2711942195892334, "learning_rate": 9.805033683093168e-06, "loss": 1.2469, "step": 2659 }, { "epoch": 0.6738442051931602, "grad_norm": 3.649364471435547, "learning_rate": 9.804801929912811e-06, "loss": 1.1161, "step": 2660 }, { "epoch": 0.6740975300823306, "grad_norm": 3.8847033977508545, "learning_rate": 9.804570041815866e-06, "loss": 1.2566, "step": 2661 }, { "epoch": 0.6743508549715009, "grad_norm": 3.7913880348205566, "learning_rate": 9.804338018808847e-06, "loss": 1.205, "step": 2662 }, { "epoch": 0.6746041798606713, "grad_norm": 3.595766305923462, "learning_rate": 9.80410586089827e-06, "loss": 1.1123, "step": 2663 }, { "epoch": 0.6748575047498416, "grad_norm": 3.2884340286254883, "learning_rate": 9.80387356809065e-06, "loss": 1.0508, "step": 2664 }, { "epoch": 0.675110829639012, "grad_norm": 3.183701992034912, "learning_rate": 9.80364114039251e-06, "loss": 1.1189, "step": 2665 }, { "epoch": 0.6753641545281824, "grad_norm": 3.781153917312622, "learning_rate": 9.803408577810381e-06, "loss": 1.3086, "step": 2666 }, { "epoch": 0.6756174794173527, "grad_norm": 3.7944130897521973, "learning_rate": 9.80317588035079e-06, "loss": 1.1294, "step": 2667 }, { "epoch": 0.6758708043065231, "grad_norm": 3.5396370887756348, "learning_rate": 9.80294304802027e-06, "loss": 1.186, "step": 2668 }, { "epoch": 0.6761241291956934, "grad_norm": 3.7316062450408936, "learning_rate": 9.802710080825362e-06, "loss": 1.2823, "step": 2669 }, { "epoch": 0.6763774540848638, "grad_norm": 3.5834367275238037, "learning_rate": 9.802476978772604e-06, "loss": 1.2484, "step": 2670 }, { "epoch": 0.6766307789740342, "grad_norm": 3.470984697341919, "learning_rate": 9.802243741868545e-06, "loss": 1.026, "step": 2671 }, { "epoch": 0.6768841038632045, "grad_norm": 3.6198978424072266, "learning_rate": 9.80201037011973e-06, "loss": 1.1535, "step": 2672 }, { "epoch": 0.6771374287523749, "grad_norm": 3.994255304336548, "learning_rate": 9.801776863532716e-06, "loss": 1.1094, "step": 2673 }, { "epoch": 0.6773907536415453, "grad_norm": 3.4337193965911865, "learning_rate": 9.801543222114058e-06, "loss": 1.0807, "step": 2674 }, { "epoch": 0.6776440785307156, "grad_norm": 3.4465723037719727, "learning_rate": 9.801309445870317e-06, "loss": 1.1684, "step": 2675 }, { "epoch": 0.677897403419886, "grad_norm": 3.2914836406707764, "learning_rate": 9.801075534808058e-06, "loss": 1.0535, "step": 2676 }, { "epoch": 0.6781507283090563, "grad_norm": 3.766895294189453, "learning_rate": 9.800841488933846e-06, "loss": 1.2471, "step": 2677 }, { "epoch": 0.6784040531982267, "grad_norm": 3.9472076892852783, "learning_rate": 9.800607308254254e-06, "loss": 1.2071, "step": 2678 }, { "epoch": 0.6786573780873971, "grad_norm": 3.281208038330078, "learning_rate": 9.800372992775862e-06, "loss": 1.1713, "step": 2679 }, { "epoch": 0.6789107029765674, "grad_norm": 3.465557336807251, "learning_rate": 9.800138542505247e-06, "loss": 1.2152, "step": 2680 }, { "epoch": 0.6791640278657378, "grad_norm": 3.6775200366973877, "learning_rate": 9.799903957448988e-06, "loss": 1.1666, "step": 2681 }, { "epoch": 0.6794173527549082, "grad_norm": 3.829551935195923, "learning_rate": 9.799669237613678e-06, "loss": 1.0608, "step": 2682 }, { "epoch": 0.6796706776440785, "grad_norm": 3.5077948570251465, "learning_rate": 9.799434383005904e-06, "loss": 1.142, "step": 2683 }, { "epoch": 0.6799240025332489, "grad_norm": 3.613893508911133, "learning_rate": 9.799199393632262e-06, "loss": 1.1488, "step": 2684 }, { "epoch": 0.6801773274224192, "grad_norm": 3.767568826675415, "learning_rate": 9.798964269499348e-06, "loss": 1.343, "step": 2685 }, { "epoch": 0.6804306523115896, "grad_norm": 3.9153528213500977, "learning_rate": 9.79872901061377e-06, "loss": 1.1427, "step": 2686 }, { "epoch": 0.68068397720076, "grad_norm": 3.626418113708496, "learning_rate": 9.79849361698213e-06, "loss": 1.1733, "step": 2687 }, { "epoch": 0.6809373020899303, "grad_norm": 3.3231289386749268, "learning_rate": 9.798258088611036e-06, "loss": 1.0894, "step": 2688 }, { "epoch": 0.6811906269791007, "grad_norm": 3.4808497428894043, "learning_rate": 9.798022425507106e-06, "loss": 1.1191, "step": 2689 }, { "epoch": 0.681443951868271, "grad_norm": 3.5405893325805664, "learning_rate": 9.797786627676955e-06, "loss": 1.2735, "step": 2690 }, { "epoch": 0.6816972767574414, "grad_norm": 3.6188483238220215, "learning_rate": 9.797550695127203e-06, "loss": 1.2375, "step": 2691 }, { "epoch": 0.6819506016466118, "grad_norm": 3.759868860244751, "learning_rate": 9.797314627864477e-06, "loss": 1.2528, "step": 2692 }, { "epoch": 0.6822039265357821, "grad_norm": 3.7572758197784424, "learning_rate": 9.797078425895403e-06, "loss": 1.2812, "step": 2693 }, { "epoch": 0.6824572514249525, "grad_norm": 3.654799222946167, "learning_rate": 9.796842089226616e-06, "loss": 1.2315, "step": 2694 }, { "epoch": 0.6827105763141229, "grad_norm": 3.8572299480438232, "learning_rate": 9.796605617864752e-06, "loss": 1.1414, "step": 2695 }, { "epoch": 0.6829639012032932, "grad_norm": 3.588945150375366, "learning_rate": 9.796369011816449e-06, "loss": 1.128, "step": 2696 }, { "epoch": 0.6832172260924636, "grad_norm": 3.4814610481262207, "learning_rate": 9.796132271088351e-06, "loss": 1.1898, "step": 2697 }, { "epoch": 0.6834705509816339, "grad_norm": 3.417701482772827, "learning_rate": 9.79589539568711e-06, "loss": 1.1112, "step": 2698 }, { "epoch": 0.6837238758708043, "grad_norm": 3.6168627738952637, "learning_rate": 9.795658385619373e-06, "loss": 1.2118, "step": 2699 }, { "epoch": 0.6839772007599747, "grad_norm": 3.409858226776123, "learning_rate": 9.795421240891795e-06, "loss": 1.0221, "step": 2700 }, { "epoch": 0.684230525649145, "grad_norm": 3.630058765411377, "learning_rate": 9.795183961511037e-06, "loss": 1.1802, "step": 2701 }, { "epoch": 0.6844838505383154, "grad_norm": 3.1784121990203857, "learning_rate": 9.79494654748376e-06, "loss": 1.1309, "step": 2702 }, { "epoch": 0.6847371754274858, "grad_norm": 3.5996437072753906, "learning_rate": 9.794708998816631e-06, "loss": 1.2083, "step": 2703 }, { "epoch": 0.6849905003166561, "grad_norm": 3.6962954998016357, "learning_rate": 9.794471315516322e-06, "loss": 1.2055, "step": 2704 }, { "epoch": 0.6852438252058265, "grad_norm": 3.6837923526763916, "learning_rate": 9.794233497589505e-06, "loss": 1.1432, "step": 2705 }, { "epoch": 0.6854971500949968, "grad_norm": 3.7618539333343506, "learning_rate": 9.793995545042856e-06, "loss": 1.2378, "step": 2706 }, { "epoch": 0.6857504749841672, "grad_norm": 3.795304536819458, "learning_rate": 9.793757457883062e-06, "loss": 1.2583, "step": 2707 }, { "epoch": 0.6860037998733376, "grad_norm": 3.4354283809661865, "learning_rate": 9.793519236116804e-06, "loss": 1.1152, "step": 2708 }, { "epoch": 0.6862571247625079, "grad_norm": 3.2153573036193848, "learning_rate": 9.793280879750772e-06, "loss": 1.0758, "step": 2709 }, { "epoch": 0.6865104496516783, "grad_norm": 3.977475881576538, "learning_rate": 9.79304238879166e-06, "loss": 1.2517, "step": 2710 }, { "epoch": 0.6867637745408487, "grad_norm": 3.4537222385406494, "learning_rate": 9.792803763246166e-06, "loss": 1.2322, "step": 2711 }, { "epoch": 0.687017099430019, "grad_norm": 3.520698070526123, "learning_rate": 9.792565003120987e-06, "loss": 1.1549, "step": 2712 }, { "epoch": 0.6872704243191894, "grad_norm": 3.863520860671997, "learning_rate": 9.792326108422827e-06, "loss": 1.1825, "step": 2713 }, { "epoch": 0.6875237492083597, "grad_norm": 3.8098387718200684, "learning_rate": 9.792087079158399e-06, "loss": 1.2562, "step": 2714 }, { "epoch": 0.6877770740975301, "grad_norm": 3.588688373565674, "learning_rate": 9.79184791533441e-06, "loss": 1.1739, "step": 2715 }, { "epoch": 0.6880303989867005, "grad_norm": 3.77241849899292, "learning_rate": 9.791608616957577e-06, "loss": 1.1563, "step": 2716 }, { "epoch": 0.6882837238758708, "grad_norm": 3.7428314685821533, "learning_rate": 9.79136918403462e-06, "loss": 1.1555, "step": 2717 }, { "epoch": 0.6885370487650412, "grad_norm": 3.3901526927948, "learning_rate": 9.791129616572263e-06, "loss": 1.0641, "step": 2718 }, { "epoch": 0.6887903736542115, "grad_norm": 3.3258988857269287, "learning_rate": 9.790889914577231e-06, "loss": 0.9971, "step": 2719 }, { "epoch": 0.6890436985433819, "grad_norm": 3.6803858280181885, "learning_rate": 9.790650078056257e-06, "loss": 1.1599, "step": 2720 }, { "epoch": 0.6892970234325523, "grad_norm": 3.4460649490356445, "learning_rate": 9.790410107016073e-06, "loss": 1.068, "step": 2721 }, { "epoch": 0.6895503483217226, "grad_norm": 3.723104476928711, "learning_rate": 9.790170001463417e-06, "loss": 1.1796, "step": 2722 }, { "epoch": 0.689803673210893, "grad_norm": 3.571429491043091, "learning_rate": 9.789929761405035e-06, "loss": 1.1815, "step": 2723 }, { "epoch": 0.6900569981000634, "grad_norm": 3.5266213417053223, "learning_rate": 9.789689386847667e-06, "loss": 1.2039, "step": 2724 }, { "epoch": 0.6903103229892337, "grad_norm": 3.419090986251831, "learning_rate": 9.78944887779807e-06, "loss": 1.1417, "step": 2725 }, { "epoch": 0.6905636478784041, "grad_norm": 3.677717447280884, "learning_rate": 9.78920823426299e-06, "loss": 1.2448, "step": 2726 }, { "epoch": 0.6908169727675744, "grad_norm": 3.6117289066314697, "learning_rate": 9.788967456249188e-06, "loss": 1.2947, "step": 2727 }, { "epoch": 0.6910702976567448, "grad_norm": 3.5542232990264893, "learning_rate": 9.788726543763425e-06, "loss": 1.1518, "step": 2728 }, { "epoch": 0.6913236225459152, "grad_norm": 3.5767104625701904, "learning_rate": 9.788485496812464e-06, "loss": 1.1689, "step": 2729 }, { "epoch": 0.6915769474350855, "grad_norm": 3.090622663497925, "learning_rate": 9.788244315403075e-06, "loss": 1.0362, "step": 2730 }, { "epoch": 0.6918302723242559, "grad_norm": 3.714952230453491, "learning_rate": 9.78800299954203e-06, "loss": 1.3668, "step": 2731 }, { "epoch": 0.6920835972134263, "grad_norm": 3.6238505840301514, "learning_rate": 9.787761549236105e-06, "loss": 1.245, "step": 2732 }, { "epoch": 0.6923369221025966, "grad_norm": 3.8209900856018066, "learning_rate": 9.787519964492081e-06, "loss": 1.3525, "step": 2733 }, { "epoch": 0.692590246991767, "grad_norm": 3.626279830932617, "learning_rate": 9.787278245316739e-06, "loss": 1.1803, "step": 2734 }, { "epoch": 0.6928435718809373, "grad_norm": 3.9143478870391846, "learning_rate": 9.787036391716866e-06, "loss": 1.2359, "step": 2735 }, { "epoch": 0.6930968967701077, "grad_norm": 3.4293344020843506, "learning_rate": 9.786794403699256e-06, "loss": 1.1852, "step": 2736 }, { "epoch": 0.6933502216592781, "grad_norm": 3.490367889404297, "learning_rate": 9.786552281270701e-06, "loss": 1.1433, "step": 2737 }, { "epoch": 0.6936035465484484, "grad_norm": 3.5443949699401855, "learning_rate": 9.786310024438004e-06, "loss": 1.333, "step": 2738 }, { "epoch": 0.6938568714376188, "grad_norm": 3.587047576904297, "learning_rate": 9.786067633207963e-06, "loss": 1.1288, "step": 2739 }, { "epoch": 0.694110196326789, "grad_norm": 3.672039747238159, "learning_rate": 9.785825107587386e-06, "loss": 1.1176, "step": 2740 }, { "epoch": 0.6943635212159595, "grad_norm": 4.216299057006836, "learning_rate": 9.785582447583084e-06, "loss": 1.3564, "step": 2741 }, { "epoch": 0.6946168461051299, "grad_norm": 3.2558181285858154, "learning_rate": 9.785339653201869e-06, "loss": 1.1666, "step": 2742 }, { "epoch": 0.6948701709943002, "grad_norm": 3.5010135173797607, "learning_rate": 9.78509672445056e-06, "loss": 1.2109, "step": 2743 }, { "epoch": 0.6951234958834706, "grad_norm": 3.6624481678009033, "learning_rate": 9.784853661335976e-06, "loss": 1.2024, "step": 2744 }, { "epoch": 0.695376820772641, "grad_norm": 3.223618268966675, "learning_rate": 9.784610463864946e-06, "loss": 1.1214, "step": 2745 }, { "epoch": 0.6956301456618112, "grad_norm": 3.5028629302978516, "learning_rate": 9.784367132044295e-06, "loss": 1.2009, "step": 2746 }, { "epoch": 0.6958834705509817, "grad_norm": 3.3813838958740234, "learning_rate": 9.784123665880858e-06, "loss": 1.1444, "step": 2747 }, { "epoch": 0.696136795440152, "grad_norm": 3.6025495529174805, "learning_rate": 9.78388006538147e-06, "loss": 1.2635, "step": 2748 }, { "epoch": 0.6963901203293223, "grad_norm": 4.06139612197876, "learning_rate": 9.783636330552972e-06, "loss": 1.2937, "step": 2749 }, { "epoch": 0.6966434452184928, "grad_norm": 3.7361483573913574, "learning_rate": 9.783392461402208e-06, "loss": 1.331, "step": 2750 }, { "epoch": 0.696896770107663, "grad_norm": 3.60343074798584, "learning_rate": 9.783148457936028e-06, "loss": 1.1484, "step": 2751 }, { "epoch": 0.6971500949968334, "grad_norm": 3.524378776550293, "learning_rate": 9.782904320161278e-06, "loss": 1.3156, "step": 2752 }, { "epoch": 0.6974034198860039, "grad_norm": 3.6785829067230225, "learning_rate": 9.782660048084816e-06, "loss": 1.103, "step": 2753 }, { "epoch": 0.6976567447751741, "grad_norm": 3.7482783794403076, "learning_rate": 9.782415641713503e-06, "loss": 1.207, "step": 2754 }, { "epoch": 0.6979100696643445, "grad_norm": 3.5252530574798584, "learning_rate": 9.7821711010542e-06, "loss": 1.2077, "step": 2755 }, { "epoch": 0.6981633945535148, "grad_norm": 3.8628737926483154, "learning_rate": 9.781926426113773e-06, "loss": 1.2113, "step": 2756 }, { "epoch": 0.6984167194426852, "grad_norm": 3.483025074005127, "learning_rate": 9.781681616899094e-06, "loss": 1.1702, "step": 2757 }, { "epoch": 0.6986700443318556, "grad_norm": 3.4358625411987305, "learning_rate": 9.781436673417035e-06, "loss": 1.1687, "step": 2758 }, { "epoch": 0.6989233692210259, "grad_norm": 3.8206863403320312, "learning_rate": 9.781191595674476e-06, "loss": 1.3216, "step": 2759 }, { "epoch": 0.6991766941101963, "grad_norm": 3.408923625946045, "learning_rate": 9.780946383678297e-06, "loss": 1.0955, "step": 2760 }, { "epoch": 0.6994300189993667, "grad_norm": 3.7028443813323975, "learning_rate": 9.780701037435386e-06, "loss": 1.2207, "step": 2761 }, { "epoch": 0.699683343888537, "grad_norm": 3.5919699668884277, "learning_rate": 9.78045555695263e-06, "loss": 1.2388, "step": 2762 }, { "epoch": 0.6999366687777074, "grad_norm": 3.6181886196136475, "learning_rate": 9.780209942236923e-06, "loss": 1.2301, "step": 2763 }, { "epoch": 0.7001899936668777, "grad_norm": 3.928760528564453, "learning_rate": 9.77996419329516e-06, "loss": 1.3502, "step": 2764 }, { "epoch": 0.7004433185560481, "grad_norm": 3.586662769317627, "learning_rate": 9.779718310134242e-06, "loss": 1.2067, "step": 2765 }, { "epoch": 0.7006966434452185, "grad_norm": 3.925246000289917, "learning_rate": 9.779472292761075e-06, "loss": 1.3005, "step": 2766 }, { "epoch": 0.7009499683343888, "grad_norm": 3.557842493057251, "learning_rate": 9.779226141182566e-06, "loss": 1.1391, "step": 2767 }, { "epoch": 0.7012032932235592, "grad_norm": 3.4727470874786377, "learning_rate": 9.778979855405627e-06, "loss": 1.0853, "step": 2768 }, { "epoch": 0.7014566181127295, "grad_norm": 3.460678815841675, "learning_rate": 9.778733435437174e-06, "loss": 1.2103, "step": 2769 }, { "epoch": 0.7017099430018999, "grad_norm": 3.586535930633545, "learning_rate": 9.778486881284123e-06, "loss": 1.2026, "step": 2770 }, { "epoch": 0.7019632678910703, "grad_norm": 3.348304033279419, "learning_rate": 9.778240192953402e-06, "loss": 1.1491, "step": 2771 }, { "epoch": 0.7022165927802406, "grad_norm": 3.8354218006134033, "learning_rate": 9.777993370451938e-06, "loss": 1.2024, "step": 2772 }, { "epoch": 0.702469917669411, "grad_norm": 3.896153211593628, "learning_rate": 9.777746413786657e-06, "loss": 1.0675, "step": 2773 }, { "epoch": 0.7027232425585814, "grad_norm": 3.4157509803771973, "learning_rate": 9.777499322964496e-06, "loss": 1.1816, "step": 2774 }, { "epoch": 0.7029765674477517, "grad_norm": 3.68452525138855, "learning_rate": 9.777252097992394e-06, "loss": 1.3039, "step": 2775 }, { "epoch": 0.7032298923369221, "grad_norm": 3.712641954421997, "learning_rate": 9.777004738877291e-06, "loss": 1.3429, "step": 2776 }, { "epoch": 0.7034832172260924, "grad_norm": 3.383347272872925, "learning_rate": 9.776757245626134e-06, "loss": 1.072, "step": 2777 }, { "epoch": 0.7037365421152628, "grad_norm": 3.540350914001465, "learning_rate": 9.776509618245874e-06, "loss": 1.2046, "step": 2778 }, { "epoch": 0.7039898670044332, "grad_norm": 3.4994916915893555, "learning_rate": 9.776261856743462e-06, "loss": 1.1806, "step": 2779 }, { "epoch": 0.7042431918936035, "grad_norm": 3.762624740600586, "learning_rate": 9.776013961125852e-06, "loss": 1.1689, "step": 2780 }, { "epoch": 0.7044965167827739, "grad_norm": 3.590169668197632, "learning_rate": 9.775765931400012e-06, "loss": 1.2333, "step": 2781 }, { "epoch": 0.7047498416719443, "grad_norm": 3.553405284881592, "learning_rate": 9.775517767572901e-06, "loss": 1.1121, "step": 2782 }, { "epoch": 0.7050031665611146, "grad_norm": 3.6617019176483154, "learning_rate": 9.775269469651492e-06, "loss": 1.2301, "step": 2783 }, { "epoch": 0.705256491450285, "grad_norm": 3.711812973022461, "learning_rate": 9.775021037642752e-06, "loss": 1.2415, "step": 2784 }, { "epoch": 0.7055098163394553, "grad_norm": 4.336576461791992, "learning_rate": 9.77477247155366e-06, "loss": 1.2206, "step": 2785 }, { "epoch": 0.7057631412286257, "grad_norm": 3.625121831893921, "learning_rate": 9.774523771391196e-06, "loss": 1.1155, "step": 2786 }, { "epoch": 0.7060164661177961, "grad_norm": 3.3995773792266846, "learning_rate": 9.77427493716234e-06, "loss": 1.0699, "step": 2787 }, { "epoch": 0.7062697910069664, "grad_norm": 3.3976480960845947, "learning_rate": 9.774025968874083e-06, "loss": 1.0927, "step": 2788 }, { "epoch": 0.7065231158961368, "grad_norm": 3.8355326652526855, "learning_rate": 9.773776866533413e-06, "loss": 1.2444, "step": 2789 }, { "epoch": 0.7067764407853071, "grad_norm": 3.6447906494140625, "learning_rate": 9.773527630147326e-06, "loss": 1.1609, "step": 2790 }, { "epoch": 0.7070297656744775, "grad_norm": 3.718270778656006, "learning_rate": 9.773278259722822e-06, "loss": 1.1833, "step": 2791 }, { "epoch": 0.7072830905636479, "grad_norm": 3.87910795211792, "learning_rate": 9.7730287552669e-06, "loss": 1.3377, "step": 2792 }, { "epoch": 0.7075364154528182, "grad_norm": 3.5926077365875244, "learning_rate": 9.772779116786568e-06, "loss": 1.1916, "step": 2793 }, { "epoch": 0.7077897403419886, "grad_norm": 3.551798105239868, "learning_rate": 9.772529344288836e-06, "loss": 1.1198, "step": 2794 }, { "epoch": 0.708043065231159, "grad_norm": 3.351592779159546, "learning_rate": 9.772279437780716e-06, "loss": 1.1733, "step": 2795 }, { "epoch": 0.7082963901203293, "grad_norm": 3.3235034942626953, "learning_rate": 9.772029397269226e-06, "loss": 1.0887, "step": 2796 }, { "epoch": 0.7085497150094997, "grad_norm": 3.4611363410949707, "learning_rate": 9.771779222761389e-06, "loss": 1.1947, "step": 2797 }, { "epoch": 0.70880303989867, "grad_norm": 3.4118261337280273, "learning_rate": 9.771528914264225e-06, "loss": 1.1529, "step": 2798 }, { "epoch": 0.7090563647878404, "grad_norm": 3.7440123558044434, "learning_rate": 9.771278471784767e-06, "loss": 1.1056, "step": 2799 }, { "epoch": 0.7093096896770108, "grad_norm": 3.0236973762512207, "learning_rate": 9.771027895330045e-06, "loss": 1.0729, "step": 2800 }, { "epoch": 0.7095630145661811, "grad_norm": 3.7565345764160156, "learning_rate": 9.770777184907096e-06, "loss": 1.2416, "step": 2801 }, { "epoch": 0.7098163394553515, "grad_norm": 3.542041063308716, "learning_rate": 9.770526340522959e-06, "loss": 1.1375, "step": 2802 }, { "epoch": 0.7100696643445219, "grad_norm": 3.2771825790405273, "learning_rate": 9.77027536218468e-06, "loss": 1.1273, "step": 2803 }, { "epoch": 0.7103229892336922, "grad_norm": 3.5278711318969727, "learning_rate": 9.770024249899302e-06, "loss": 1.0621, "step": 2804 }, { "epoch": 0.7105763141228626, "grad_norm": 3.2327733039855957, "learning_rate": 9.769773003673882e-06, "loss": 1.1363, "step": 2805 }, { "epoch": 0.7108296390120329, "grad_norm": 3.598832130432129, "learning_rate": 9.769521623515468e-06, "loss": 1.1317, "step": 2806 }, { "epoch": 0.7110829639012033, "grad_norm": 3.827223300933838, "learning_rate": 9.769270109431123e-06, "loss": 1.2074, "step": 2807 }, { "epoch": 0.7113362887903737, "grad_norm": 3.5710222721099854, "learning_rate": 9.76901846142791e-06, "loss": 1.1924, "step": 2808 }, { "epoch": 0.711589613679544, "grad_norm": 3.319575786590576, "learning_rate": 9.768766679512894e-06, "loss": 1.1732, "step": 2809 }, { "epoch": 0.7118429385687144, "grad_norm": 3.6563777923583984, "learning_rate": 9.768514763693143e-06, "loss": 1.2641, "step": 2810 }, { "epoch": 0.7120962634578847, "grad_norm": 3.330498695373535, "learning_rate": 9.768262713975734e-06, "loss": 1.1456, "step": 2811 }, { "epoch": 0.7123495883470551, "grad_norm": 3.920604705810547, "learning_rate": 9.768010530367741e-06, "loss": 1.3083, "step": 2812 }, { "epoch": 0.7126029132362255, "grad_norm": 3.7744476795196533, "learning_rate": 9.767758212876247e-06, "loss": 1.1667, "step": 2813 }, { "epoch": 0.7128562381253958, "grad_norm": 3.3494551181793213, "learning_rate": 9.767505761508338e-06, "loss": 1.1604, "step": 2814 }, { "epoch": 0.7131095630145662, "grad_norm": 3.6803066730499268, "learning_rate": 9.767253176271104e-06, "loss": 1.1327, "step": 2815 }, { "epoch": 0.7133628879037366, "grad_norm": 3.979126214981079, "learning_rate": 9.767000457171632e-06, "loss": 1.2729, "step": 2816 }, { "epoch": 0.7136162127929069, "grad_norm": 3.2043516635894775, "learning_rate": 9.76674760421702e-06, "loss": 1.1035, "step": 2817 }, { "epoch": 0.7138695376820773, "grad_norm": 3.6564838886260986, "learning_rate": 9.766494617414373e-06, "loss": 1.2312, "step": 2818 }, { "epoch": 0.7141228625712476, "grad_norm": 3.200101137161255, "learning_rate": 9.766241496770787e-06, "loss": 1.1007, "step": 2819 }, { "epoch": 0.714376187460418, "grad_norm": 3.8117692470550537, "learning_rate": 9.765988242293378e-06, "loss": 1.2331, "step": 2820 }, { "epoch": 0.7146295123495884, "grad_norm": 3.5853116512298584, "learning_rate": 9.765734853989251e-06, "loss": 1.2502, "step": 2821 }, { "epoch": 0.7148828372387587, "grad_norm": 3.3093082904815674, "learning_rate": 9.765481331865521e-06, "loss": 1.1271, "step": 2822 }, { "epoch": 0.7151361621279291, "grad_norm": 3.530261516571045, "learning_rate": 9.76522767592931e-06, "loss": 1.3105, "step": 2823 }, { "epoch": 0.7153894870170995, "grad_norm": 3.4714860916137695, "learning_rate": 9.764973886187741e-06, "loss": 1.1998, "step": 2824 }, { "epoch": 0.7156428119062698, "grad_norm": 3.621464252471924, "learning_rate": 9.764719962647937e-06, "loss": 1.2733, "step": 2825 }, { "epoch": 0.7158961367954402, "grad_norm": 3.5986428260803223, "learning_rate": 9.764465905317029e-06, "loss": 1.3084, "step": 2826 }, { "epoch": 0.7161494616846105, "grad_norm": 3.1292741298675537, "learning_rate": 9.764211714202152e-06, "loss": 1.1816, "step": 2827 }, { "epoch": 0.7164027865737809, "grad_norm": 3.384155511856079, "learning_rate": 9.763957389310443e-06, "loss": 1.1063, "step": 2828 }, { "epoch": 0.7166561114629513, "grad_norm": 3.7090487480163574, "learning_rate": 9.763702930649045e-06, "loss": 1.1882, "step": 2829 }, { "epoch": 0.7169094363521216, "grad_norm": 3.265096426010132, "learning_rate": 9.763448338225098e-06, "loss": 1.059, "step": 2830 }, { "epoch": 0.717162761241292, "grad_norm": 3.665614604949951, "learning_rate": 9.763193612045756e-06, "loss": 1.1052, "step": 2831 }, { "epoch": 0.7174160861304624, "grad_norm": 3.465980291366577, "learning_rate": 9.762938752118169e-06, "loss": 1.1833, "step": 2832 }, { "epoch": 0.7176694110196327, "grad_norm": 3.3577919006347656, "learning_rate": 9.762683758449495e-06, "loss": 1.0934, "step": 2833 }, { "epoch": 0.7179227359088031, "grad_norm": 3.931393623352051, "learning_rate": 9.762428631046893e-06, "loss": 1.255, "step": 2834 }, { "epoch": 0.7181760607979734, "grad_norm": 3.6076884269714355, "learning_rate": 9.762173369917527e-06, "loss": 1.2592, "step": 2835 }, { "epoch": 0.7184293856871438, "grad_norm": 3.668667793273926, "learning_rate": 9.761917975068564e-06, "loss": 1.2569, "step": 2836 }, { "epoch": 0.7186827105763142, "grad_norm": 3.767733097076416, "learning_rate": 9.761662446507177e-06, "loss": 1.1592, "step": 2837 }, { "epoch": 0.7189360354654845, "grad_norm": 3.57222056388855, "learning_rate": 9.761406784240539e-06, "loss": 1.242, "step": 2838 }, { "epoch": 0.7191893603546549, "grad_norm": 4.031567573547363, "learning_rate": 9.76115098827583e-06, "loss": 1.2613, "step": 2839 }, { "epoch": 0.7194426852438252, "grad_norm": 3.574033260345459, "learning_rate": 9.760895058620236e-06, "loss": 1.3366, "step": 2840 }, { "epoch": 0.7196960101329956, "grad_norm": 3.2835404872894287, "learning_rate": 9.760638995280938e-06, "loss": 1.0849, "step": 2841 }, { "epoch": 0.719949335022166, "grad_norm": 3.594667911529541, "learning_rate": 9.760382798265127e-06, "loss": 1.2336, "step": 2842 }, { "epoch": 0.7202026599113363, "grad_norm": 3.377202272415161, "learning_rate": 9.76012646758e-06, "loss": 1.1906, "step": 2843 }, { "epoch": 0.7204559848005067, "grad_norm": 3.599886178970337, "learning_rate": 9.759870003232751e-06, "loss": 1.3008, "step": 2844 }, { "epoch": 0.7207093096896771, "grad_norm": 3.5252554416656494, "learning_rate": 9.759613405230583e-06, "loss": 1.2417, "step": 2845 }, { "epoch": 0.7209626345788474, "grad_norm": 3.7451412677764893, "learning_rate": 9.759356673580703e-06, "loss": 1.2919, "step": 2846 }, { "epoch": 0.7212159594680178, "grad_norm": 3.5595405101776123, "learning_rate": 9.759099808290318e-06, "loss": 1.0684, "step": 2847 }, { "epoch": 0.721469284357188, "grad_norm": 3.7966434955596924, "learning_rate": 9.75884280936664e-06, "loss": 1.2092, "step": 2848 }, { "epoch": 0.7217226092463584, "grad_norm": 3.775515556335449, "learning_rate": 9.758585676816888e-06, "loss": 1.2583, "step": 2849 }, { "epoch": 0.7219759341355289, "grad_norm": 3.613001585006714, "learning_rate": 9.758328410648277e-06, "loss": 1.2235, "step": 2850 }, { "epoch": 0.7222292590246991, "grad_norm": 3.4368202686309814, "learning_rate": 9.758071010868037e-06, "loss": 1.2107, "step": 2851 }, { "epoch": 0.7224825839138695, "grad_norm": 3.6963143348693848, "learning_rate": 9.757813477483393e-06, "loss": 1.1236, "step": 2852 }, { "epoch": 0.72273590880304, "grad_norm": 3.0793519020080566, "learning_rate": 9.757555810501575e-06, "loss": 1.1591, "step": 2853 }, { "epoch": 0.7229892336922102, "grad_norm": 3.4738950729370117, "learning_rate": 9.757298009929822e-06, "loss": 1.2413, "step": 2854 }, { "epoch": 0.7232425585813806, "grad_norm": 3.767167568206787, "learning_rate": 9.757040075775369e-06, "loss": 1.1664, "step": 2855 }, { "epoch": 0.7234958834705509, "grad_norm": 3.5329854488372803, "learning_rate": 9.75678200804546e-06, "loss": 1.1411, "step": 2856 }, { "epoch": 0.7237492083597213, "grad_norm": 3.2329213619232178, "learning_rate": 9.75652380674734e-06, "loss": 1.0596, "step": 2857 }, { "epoch": 0.7240025332488917, "grad_norm": 3.5320141315460205, "learning_rate": 9.756265471888263e-06, "loss": 1.2004, "step": 2858 }, { "epoch": 0.724255858138062, "grad_norm": 3.182819366455078, "learning_rate": 9.75600700347548e-06, "loss": 1.0744, "step": 2859 }, { "epoch": 0.7245091830272324, "grad_norm": 3.5611660480499268, "learning_rate": 9.755748401516252e-06, "loss": 1.1452, "step": 2860 }, { "epoch": 0.7247625079164027, "grad_norm": 3.2804763317108154, "learning_rate": 9.755489666017834e-06, "loss": 1.0568, "step": 2861 }, { "epoch": 0.7250158328055731, "grad_norm": 3.84488582611084, "learning_rate": 9.755230796987496e-06, "loss": 1.2335, "step": 2862 }, { "epoch": 0.7252691576947435, "grad_norm": 4.201401233673096, "learning_rate": 9.754971794432506e-06, "loss": 1.1728, "step": 2863 }, { "epoch": 0.7255224825839138, "grad_norm": 3.7625200748443604, "learning_rate": 9.754712658360137e-06, "loss": 1.3335, "step": 2864 }, { "epoch": 0.7257758074730842, "grad_norm": 3.607170820236206, "learning_rate": 9.754453388777665e-06, "loss": 1.3213, "step": 2865 }, { "epoch": 0.7260291323622546, "grad_norm": 3.8149611949920654, "learning_rate": 9.754193985692371e-06, "loss": 1.1848, "step": 2866 }, { "epoch": 0.7262824572514249, "grad_norm": 3.745016098022461, "learning_rate": 9.753934449111535e-06, "loss": 1.1727, "step": 2867 }, { "epoch": 0.7265357821405953, "grad_norm": 3.7872586250305176, "learning_rate": 9.753674779042451e-06, "loss": 1.1979, "step": 2868 }, { "epoch": 0.7267891070297656, "grad_norm": 3.3696279525756836, "learning_rate": 9.753414975492406e-06, "loss": 1.0847, "step": 2869 }, { "epoch": 0.727042431918936, "grad_norm": 3.3734793663024902, "learning_rate": 9.753155038468696e-06, "loss": 1.1775, "step": 2870 }, { "epoch": 0.7272957568081064, "grad_norm": 3.693936347961426, "learning_rate": 9.752894967978622e-06, "loss": 1.2687, "step": 2871 }, { "epoch": 0.7275490816972767, "grad_norm": 3.2961838245391846, "learning_rate": 9.752634764029483e-06, "loss": 1.1288, "step": 2872 }, { "epoch": 0.7278024065864471, "grad_norm": 3.4125046730041504, "learning_rate": 9.752374426628587e-06, "loss": 1.1906, "step": 2873 }, { "epoch": 0.7280557314756175, "grad_norm": 3.271693468093872, "learning_rate": 9.752113955783245e-06, "loss": 1.1654, "step": 2874 }, { "epoch": 0.7283090563647878, "grad_norm": 3.64925479888916, "learning_rate": 9.75185335150077e-06, "loss": 1.2178, "step": 2875 }, { "epoch": 0.7285623812539582, "grad_norm": 3.1804234981536865, "learning_rate": 9.751592613788481e-06, "loss": 0.9787, "step": 2876 }, { "epoch": 0.7288157061431285, "grad_norm": 3.6267011165618896, "learning_rate": 9.751331742653698e-06, "loss": 1.0725, "step": 2877 }, { "epoch": 0.7290690310322989, "grad_norm": 3.5391435623168945, "learning_rate": 9.751070738103745e-06, "loss": 1.1544, "step": 2878 }, { "epoch": 0.7293223559214693, "grad_norm": 3.3422415256500244, "learning_rate": 9.750809600145955e-06, "loss": 1.1848, "step": 2879 }, { "epoch": 0.7295756808106396, "grad_norm": 3.2491321563720703, "learning_rate": 9.750548328787657e-06, "loss": 1.1193, "step": 2880 }, { "epoch": 0.72982900569981, "grad_norm": 3.7429699897766113, "learning_rate": 9.750286924036188e-06, "loss": 1.2345, "step": 2881 }, { "epoch": 0.7300823305889804, "grad_norm": 3.055997133255005, "learning_rate": 9.750025385898887e-06, "loss": 1.1035, "step": 2882 }, { "epoch": 0.7303356554781507, "grad_norm": 3.5209739208221436, "learning_rate": 9.749763714383102e-06, "loss": 1.2324, "step": 2883 }, { "epoch": 0.7305889803673211, "grad_norm": 3.2700893878936768, "learning_rate": 9.749501909496177e-06, "loss": 1.0234, "step": 2884 }, { "epoch": 0.7308423052564914, "grad_norm": 3.586759328842163, "learning_rate": 9.749239971245463e-06, "loss": 1.3001, "step": 2885 }, { "epoch": 0.7310956301456618, "grad_norm": 3.6156978607177734, "learning_rate": 9.74897789963832e-06, "loss": 1.2048, "step": 2886 }, { "epoch": 0.7313489550348322, "grad_norm": 3.196514844894409, "learning_rate": 9.748715694682101e-06, "loss": 1.0979, "step": 2887 }, { "epoch": 0.7316022799240025, "grad_norm": 3.7128384113311768, "learning_rate": 9.74845335638417e-06, "loss": 1.1782, "step": 2888 }, { "epoch": 0.7318556048131729, "grad_norm": 3.2188334465026855, "learning_rate": 9.748190884751896e-06, "loss": 1.0616, "step": 2889 }, { "epoch": 0.7321089297023432, "grad_norm": 4.012807846069336, "learning_rate": 9.747928279792647e-06, "loss": 1.2046, "step": 2890 }, { "epoch": 0.7323622545915136, "grad_norm": 3.4193642139434814, "learning_rate": 9.747665541513795e-06, "loss": 1.2761, "step": 2891 }, { "epoch": 0.732615579480684, "grad_norm": 3.9362986087799072, "learning_rate": 9.747402669922723e-06, "loss": 1.1693, "step": 2892 }, { "epoch": 0.7328689043698543, "grad_norm": 3.805783987045288, "learning_rate": 9.747139665026807e-06, "loss": 1.3688, "step": 2893 }, { "epoch": 0.7331222292590247, "grad_norm": 3.6252715587615967, "learning_rate": 9.746876526833435e-06, "loss": 1.214, "step": 2894 }, { "epoch": 0.7333755541481951, "grad_norm": 3.744133234024048, "learning_rate": 9.746613255349994e-06, "loss": 1.1945, "step": 2895 }, { "epoch": 0.7336288790373654, "grad_norm": 3.763392210006714, "learning_rate": 9.746349850583878e-06, "loss": 1.1511, "step": 2896 }, { "epoch": 0.7338822039265358, "grad_norm": 3.8932294845581055, "learning_rate": 9.746086312542482e-06, "loss": 1.2599, "step": 2897 }, { "epoch": 0.7341355288157061, "grad_norm": 3.539275884628296, "learning_rate": 9.745822641233209e-06, "loss": 1.0907, "step": 2898 }, { "epoch": 0.7343888537048765, "grad_norm": 3.518880844116211, "learning_rate": 9.745558836663459e-06, "loss": 1.0641, "step": 2899 }, { "epoch": 0.7346421785940469, "grad_norm": 3.6900782585144043, "learning_rate": 9.74529489884064e-06, "loss": 1.2881, "step": 2900 }, { "epoch": 0.7348955034832172, "grad_norm": 3.250474214553833, "learning_rate": 9.745030827772165e-06, "loss": 1.0688, "step": 2901 }, { "epoch": 0.7351488283723876, "grad_norm": 3.5695221424102783, "learning_rate": 9.744766623465449e-06, "loss": 1.1915, "step": 2902 }, { "epoch": 0.735402153261558, "grad_norm": 3.4501566886901855, "learning_rate": 9.744502285927908e-06, "loss": 1.0965, "step": 2903 }, { "epoch": 0.7356554781507283, "grad_norm": 3.3264076709747314, "learning_rate": 9.744237815166968e-06, "loss": 1.1917, "step": 2904 }, { "epoch": 0.7359088030398987, "grad_norm": 3.6294021606445312, "learning_rate": 9.743973211190054e-06, "loss": 1.1603, "step": 2905 }, { "epoch": 0.736162127929069, "grad_norm": 3.9763689041137695, "learning_rate": 9.743708474004594e-06, "loss": 1.1595, "step": 2906 }, { "epoch": 0.7364154528182394, "grad_norm": 3.597679853439331, "learning_rate": 9.743443603618026e-06, "loss": 1.1928, "step": 2907 }, { "epoch": 0.7366687777074098, "grad_norm": 3.656299591064453, "learning_rate": 9.74317860003778e-06, "loss": 1.1791, "step": 2908 }, { "epoch": 0.7369221025965801, "grad_norm": 3.7492666244506836, "learning_rate": 9.742913463271308e-06, "loss": 1.1845, "step": 2909 }, { "epoch": 0.7371754274857505, "grad_norm": 3.099785089492798, "learning_rate": 9.742648193326044e-06, "loss": 1.023, "step": 2910 }, { "epoch": 0.7374287523749208, "grad_norm": 3.5840766429901123, "learning_rate": 9.742382790209445e-06, "loss": 1.1468, "step": 2911 }, { "epoch": 0.7376820772640912, "grad_norm": 3.1212668418884277, "learning_rate": 9.742117253928957e-06, "loss": 1.0754, "step": 2912 }, { "epoch": 0.7379354021532616, "grad_norm": 3.5566112995147705, "learning_rate": 9.741851584492041e-06, "loss": 1.2996, "step": 2913 }, { "epoch": 0.7381887270424319, "grad_norm": 3.684170961380005, "learning_rate": 9.741585781906155e-06, "loss": 1.2737, "step": 2914 }, { "epoch": 0.7384420519316023, "grad_norm": 3.6651434898376465, "learning_rate": 9.741319846178762e-06, "loss": 1.1011, "step": 2915 }, { "epoch": 0.7386953768207727, "grad_norm": 3.631960391998291, "learning_rate": 9.741053777317328e-06, "loss": 1.1804, "step": 2916 }, { "epoch": 0.738948701709943, "grad_norm": 3.5752904415130615, "learning_rate": 9.74078757532933e-06, "loss": 1.1606, "step": 2917 }, { "epoch": 0.7392020265991134, "grad_norm": 3.315155029296875, "learning_rate": 9.740521240222235e-06, "loss": 1.0884, "step": 2918 }, { "epoch": 0.7394553514882837, "grad_norm": 3.4121146202087402, "learning_rate": 9.740254772003527e-06, "loss": 1.2873, "step": 2919 }, { "epoch": 0.7397086763774541, "grad_norm": 3.9900479316711426, "learning_rate": 9.739988170680687e-06, "loss": 1.2129, "step": 2920 }, { "epoch": 0.7399620012666245, "grad_norm": 3.557452440261841, "learning_rate": 9.7397214362612e-06, "loss": 1.2383, "step": 2921 }, { "epoch": 0.7402153261557948, "grad_norm": 3.6594953536987305, "learning_rate": 9.739454568752556e-06, "loss": 1.2351, "step": 2922 }, { "epoch": 0.7404686510449652, "grad_norm": 3.352383852005005, "learning_rate": 9.73918756816225e-06, "loss": 1.0694, "step": 2923 }, { "epoch": 0.7407219759341356, "grad_norm": 3.5007543563842773, "learning_rate": 9.738920434497777e-06, "loss": 1.2174, "step": 2924 }, { "epoch": 0.7409753008233059, "grad_norm": 3.6896722316741943, "learning_rate": 9.73865316776664e-06, "loss": 1.2207, "step": 2925 }, { "epoch": 0.7412286257124763, "grad_norm": 3.894541025161743, "learning_rate": 9.738385767976344e-06, "loss": 1.2838, "step": 2926 }, { "epoch": 0.7414819506016466, "grad_norm": 3.4676342010498047, "learning_rate": 9.738118235134395e-06, "loss": 1.2436, "step": 2927 }, { "epoch": 0.741735275490817, "grad_norm": 3.736973285675049, "learning_rate": 9.737850569248308e-06, "loss": 1.0835, "step": 2928 }, { "epoch": 0.7419886003799874, "grad_norm": 3.6173088550567627, "learning_rate": 9.737582770325595e-06, "loss": 1.2395, "step": 2929 }, { "epoch": 0.7422419252691577, "grad_norm": 3.260591506958008, "learning_rate": 9.737314838373781e-06, "loss": 1.0065, "step": 2930 }, { "epoch": 0.7424952501583281, "grad_norm": 3.230839252471924, "learning_rate": 9.737046773400384e-06, "loss": 1.1127, "step": 2931 }, { "epoch": 0.7427485750474985, "grad_norm": 3.7157719135284424, "learning_rate": 9.736778575412935e-06, "loss": 1.1872, "step": 2932 }, { "epoch": 0.7430018999366688, "grad_norm": 3.453279495239258, "learning_rate": 9.736510244418965e-06, "loss": 1.0701, "step": 2933 }, { "epoch": 0.7432552248258392, "grad_norm": 3.4291446208953857, "learning_rate": 9.736241780426005e-06, "loss": 1.0969, "step": 2934 }, { "epoch": 0.7435085497150095, "grad_norm": 3.3004000186920166, "learning_rate": 9.735973183441598e-06, "loss": 1.1602, "step": 2935 }, { "epoch": 0.7437618746041799, "grad_norm": 3.5002408027648926, "learning_rate": 9.735704453473281e-06, "loss": 1.1217, "step": 2936 }, { "epoch": 0.7440151994933503, "grad_norm": 3.2200589179992676, "learning_rate": 9.735435590528603e-06, "loss": 1.065, "step": 2937 }, { "epoch": 0.7442685243825206, "grad_norm": 3.6667120456695557, "learning_rate": 9.735166594615115e-06, "loss": 1.208, "step": 2938 }, { "epoch": 0.744521849271691, "grad_norm": 3.5607337951660156, "learning_rate": 9.734897465740367e-06, "loss": 1.1361, "step": 2939 }, { "epoch": 0.7447751741608613, "grad_norm": 3.6154489517211914, "learning_rate": 9.734628203911916e-06, "loss": 1.1662, "step": 2940 }, { "epoch": 0.7450284990500317, "grad_norm": 3.8687450885772705, "learning_rate": 9.734358809137325e-06, "loss": 1.2813, "step": 2941 }, { "epoch": 0.7452818239392021, "grad_norm": 3.4730746746063232, "learning_rate": 9.73408928142416e-06, "loss": 1.1927, "step": 2942 }, { "epoch": 0.7455351488283724, "grad_norm": 3.4987568855285645, "learning_rate": 9.733819620779983e-06, "loss": 1.263, "step": 2943 }, { "epoch": 0.7457884737175428, "grad_norm": 4.5206074714660645, "learning_rate": 9.733549827212371e-06, "loss": 1.5617, "step": 2944 }, { "epoch": 0.7460417986067132, "grad_norm": 3.6188478469848633, "learning_rate": 9.7332799007289e-06, "loss": 1.2127, "step": 2945 }, { "epoch": 0.7462951234958835, "grad_norm": 3.842108964920044, "learning_rate": 9.733009841337145e-06, "loss": 1.1828, "step": 2946 }, { "epoch": 0.7465484483850539, "grad_norm": 3.4908463954925537, "learning_rate": 9.732739649044694e-06, "loss": 1.1087, "step": 2947 }, { "epoch": 0.7468017732742241, "grad_norm": 3.5372354984283447, "learning_rate": 9.732469323859131e-06, "loss": 1.1885, "step": 2948 }, { "epoch": 0.7470550981633945, "grad_norm": 3.726322889328003, "learning_rate": 9.732198865788047e-06, "loss": 1.1577, "step": 2949 }, { "epoch": 0.747308423052565, "grad_norm": 3.757126808166504, "learning_rate": 9.731928274839038e-06, "loss": 1.3131, "step": 2950 }, { "epoch": 0.7475617479417352, "grad_norm": 3.244915246963501, "learning_rate": 9.7316575510197e-06, "loss": 1.139, "step": 2951 }, { "epoch": 0.7478150728309056, "grad_norm": 3.993474245071411, "learning_rate": 9.731386694337635e-06, "loss": 1.2573, "step": 2952 }, { "epoch": 0.748068397720076, "grad_norm": 3.6339077949523926, "learning_rate": 9.73111570480045e-06, "loss": 1.3047, "step": 2953 }, { "epoch": 0.7483217226092463, "grad_norm": 3.532299518585205, "learning_rate": 9.730844582415752e-06, "loss": 1.2002, "step": 2954 }, { "epoch": 0.7485750474984167, "grad_norm": 3.2947475910186768, "learning_rate": 9.730573327191158e-06, "loss": 1.0655, "step": 2955 }, { "epoch": 0.748828372387587, "grad_norm": 3.738976240158081, "learning_rate": 9.73030193913428e-06, "loss": 1.1959, "step": 2956 }, { "epoch": 0.7490816972767574, "grad_norm": 3.7513082027435303, "learning_rate": 9.73003041825274e-06, "loss": 1.1723, "step": 2957 }, { "epoch": 0.7493350221659278, "grad_norm": 3.2699825763702393, "learning_rate": 9.729758764554164e-06, "loss": 1.1177, "step": 2958 }, { "epoch": 0.7495883470550981, "grad_norm": 3.1440470218658447, "learning_rate": 9.729486978046178e-06, "loss": 1.0667, "step": 2959 }, { "epoch": 0.7498416719442685, "grad_norm": 3.0764389038085938, "learning_rate": 9.729215058736417e-06, "loss": 1.0814, "step": 2960 }, { "epoch": 0.7500949968334388, "grad_norm": 3.6139206886291504, "learning_rate": 9.72894300663251e-06, "loss": 1.1392, "step": 2961 }, { "epoch": 0.7503483217226092, "grad_norm": 3.397728443145752, "learning_rate": 9.7286708217421e-06, "loss": 1.201, "step": 2962 }, { "epoch": 0.7506016466117796, "grad_norm": 3.492866039276123, "learning_rate": 9.728398504072832e-06, "loss": 1.0876, "step": 2963 }, { "epoch": 0.7508549715009499, "grad_norm": 3.6088707447052, "learning_rate": 9.728126053632348e-06, "loss": 1.1156, "step": 2964 }, { "epoch": 0.7511082963901203, "grad_norm": 3.5072734355926514, "learning_rate": 9.727853470428301e-06, "loss": 1.1877, "step": 2965 }, { "epoch": 0.7513616212792907, "grad_norm": 3.3493130207061768, "learning_rate": 9.727580754468345e-06, "loss": 1.0638, "step": 2966 }, { "epoch": 0.751614946168461, "grad_norm": 3.3511264324188232, "learning_rate": 9.727307905760137e-06, "loss": 1.0617, "step": 2967 }, { "epoch": 0.7518682710576314, "grad_norm": 3.3056769371032715, "learning_rate": 9.727034924311337e-06, "loss": 0.9994, "step": 2968 }, { "epoch": 0.7521215959468017, "grad_norm": 3.7182092666625977, "learning_rate": 9.726761810129614e-06, "loss": 1.2064, "step": 2969 }, { "epoch": 0.7523749208359721, "grad_norm": 4.280904769897461, "learning_rate": 9.726488563222633e-06, "loss": 1.5103, "step": 2970 }, { "epoch": 0.7526282457251425, "grad_norm": 3.617830991744995, "learning_rate": 9.726215183598069e-06, "loss": 1.1906, "step": 2971 }, { "epoch": 0.7528815706143128, "grad_norm": 3.109330177307129, "learning_rate": 9.725941671263597e-06, "loss": 1.0851, "step": 2972 }, { "epoch": 0.7531348955034832, "grad_norm": 3.392521381378174, "learning_rate": 9.7256680262269e-06, "loss": 1.2138, "step": 2973 }, { "epoch": 0.7533882203926536, "grad_norm": 3.6669206619262695, "learning_rate": 9.725394248495657e-06, "loss": 1.1091, "step": 2974 }, { "epoch": 0.7536415452818239, "grad_norm": 3.238131523132324, "learning_rate": 9.72512033807756e-06, "loss": 1.1352, "step": 2975 }, { "epoch": 0.7538948701709943, "grad_norm": 3.459148645401001, "learning_rate": 9.724846294980298e-06, "loss": 1.2146, "step": 2976 }, { "epoch": 0.7541481950601646, "grad_norm": 3.5758607387542725, "learning_rate": 9.724572119211566e-06, "loss": 1.1659, "step": 2977 }, { "epoch": 0.754401519949335, "grad_norm": 3.6841254234313965, "learning_rate": 9.724297810779064e-06, "loss": 1.2396, "step": 2978 }, { "epoch": 0.7546548448385054, "grad_norm": 3.3286004066467285, "learning_rate": 9.724023369690493e-06, "loss": 1.1295, "step": 2979 }, { "epoch": 0.7549081697276757, "grad_norm": 3.5271801948547363, "learning_rate": 9.72374879595356e-06, "loss": 1.2116, "step": 2980 }, { "epoch": 0.7551614946168461, "grad_norm": 3.4255433082580566, "learning_rate": 9.723474089575975e-06, "loss": 1.1897, "step": 2981 }, { "epoch": 0.7554148195060165, "grad_norm": 3.3398475646972656, "learning_rate": 9.723199250565449e-06, "loss": 1.0863, "step": 2982 }, { "epoch": 0.7556681443951868, "grad_norm": 3.2170045375823975, "learning_rate": 9.722924278929705e-06, "loss": 0.9774, "step": 2983 }, { "epoch": 0.7559214692843572, "grad_norm": 3.3909413814544678, "learning_rate": 9.722649174676459e-06, "loss": 1.081, "step": 2984 }, { "epoch": 0.7561747941735275, "grad_norm": 3.376862049102783, "learning_rate": 9.722373937813439e-06, "loss": 1.1951, "step": 2985 }, { "epoch": 0.7564281190626979, "grad_norm": 3.3181986808776855, "learning_rate": 9.72209856834837e-06, "loss": 1.1478, "step": 2986 }, { "epoch": 0.7566814439518683, "grad_norm": 3.5607292652130127, "learning_rate": 9.721823066288988e-06, "loss": 1.0894, "step": 2987 }, { "epoch": 0.7569347688410386, "grad_norm": 3.5868828296661377, "learning_rate": 9.721547431643027e-06, "loss": 1.1139, "step": 2988 }, { "epoch": 0.757188093730209, "grad_norm": 4.153842449188232, "learning_rate": 9.721271664418226e-06, "loss": 1.2214, "step": 2989 }, { "epoch": 0.7574414186193793, "grad_norm": 3.2191667556762695, "learning_rate": 9.72099576462233e-06, "loss": 1.1246, "step": 2990 }, { "epoch": 0.7576947435085497, "grad_norm": 3.7205142974853516, "learning_rate": 9.720719732263087e-06, "loss": 1.188, "step": 2991 }, { "epoch": 0.7579480683977201, "grad_norm": 3.6832919120788574, "learning_rate": 9.720443567348245e-06, "loss": 1.2336, "step": 2992 }, { "epoch": 0.7582013932868904, "grad_norm": 3.2360286712646484, "learning_rate": 9.720167269885561e-06, "loss": 1.0151, "step": 2993 }, { "epoch": 0.7584547181760608, "grad_norm": 3.7540252208709717, "learning_rate": 9.71989083988279e-06, "loss": 1.0989, "step": 2994 }, { "epoch": 0.7587080430652312, "grad_norm": 3.3956358432769775, "learning_rate": 9.719614277347697e-06, "loss": 1.1714, "step": 2995 }, { "epoch": 0.7589613679544015, "grad_norm": 3.719648838043213, "learning_rate": 9.71933758228805e-06, "loss": 1.0855, "step": 2996 }, { "epoch": 0.7592146928435719, "grad_norm": 3.271644353866577, "learning_rate": 9.719060754711613e-06, "loss": 1.0492, "step": 2997 }, { "epoch": 0.7594680177327422, "grad_norm": 3.6907081604003906, "learning_rate": 9.718783794626163e-06, "loss": 1.2695, "step": 2998 }, { "epoch": 0.7597213426219126, "grad_norm": 3.3592963218688965, "learning_rate": 9.718506702039474e-06, "loss": 1.1613, "step": 2999 }, { "epoch": 0.759974667511083, "grad_norm": 3.1232833862304688, "learning_rate": 9.718229476959329e-06, "loss": 1.0907, "step": 3000 }, { "epoch": 0.759974667511083, "eval_loss": 1.1983312368392944, "eval_runtime": 11.8746, "eval_samples_per_second": 33.685, "eval_steps_per_second": 4.211, "step": 3000 }, { "epoch": 0.7602279924002533, "grad_norm": 3.976891279220581, "learning_rate": 9.717952119393512e-06, "loss": 1.3394, "step": 3001 }, { "epoch": 0.7604813172894237, "grad_norm": 3.916884183883667, "learning_rate": 9.717674629349809e-06, "loss": 1.1595, "step": 3002 }, { "epoch": 0.7607346421785941, "grad_norm": 3.557054281234741, "learning_rate": 9.717397006836016e-06, "loss": 1.2948, "step": 3003 }, { "epoch": 0.7609879670677644, "grad_norm": 3.4140894412994385, "learning_rate": 9.717119251859925e-06, "loss": 1.1628, "step": 3004 }, { "epoch": 0.7612412919569348, "grad_norm": 3.865302085876465, "learning_rate": 9.716841364429334e-06, "loss": 1.2314, "step": 3005 }, { "epoch": 0.7614946168461051, "grad_norm": 3.5563416481018066, "learning_rate": 9.716563344552052e-06, "loss": 1.1763, "step": 3006 }, { "epoch": 0.7617479417352755, "grad_norm": 3.6717288494110107, "learning_rate": 9.716285192235878e-06, "loss": 1.2297, "step": 3007 }, { "epoch": 0.7620012666244459, "grad_norm": 3.1846835613250732, "learning_rate": 9.716006907488629e-06, "loss": 1.1082, "step": 3008 }, { "epoch": 0.7622545915136162, "grad_norm": 3.318401575088501, "learning_rate": 9.715728490318117e-06, "loss": 1.0138, "step": 3009 }, { "epoch": 0.7625079164027866, "grad_norm": 3.2839062213897705, "learning_rate": 9.715449940732158e-06, "loss": 1.0879, "step": 3010 }, { "epoch": 0.7627612412919569, "grad_norm": 3.480377435684204, "learning_rate": 9.715171258738574e-06, "loss": 1.2739, "step": 3011 }, { "epoch": 0.7630145661811273, "grad_norm": 3.6452996730804443, "learning_rate": 9.71489244434519e-06, "loss": 1.1813, "step": 3012 }, { "epoch": 0.7632678910702977, "grad_norm": 3.612215042114258, "learning_rate": 9.714613497559839e-06, "loss": 1.2545, "step": 3013 }, { "epoch": 0.763521215959468, "grad_norm": 3.9662351608276367, "learning_rate": 9.714334418390348e-06, "loss": 1.3321, "step": 3014 }, { "epoch": 0.7637745408486384, "grad_norm": 3.423196792602539, "learning_rate": 9.714055206844557e-06, "loss": 1.2222, "step": 3015 }, { "epoch": 0.7640278657378088, "grad_norm": 3.5687761306762695, "learning_rate": 9.713775862930306e-06, "loss": 1.2045, "step": 3016 }, { "epoch": 0.7642811906269791, "grad_norm": 3.524186849594116, "learning_rate": 9.713496386655436e-06, "loss": 1.1724, "step": 3017 }, { "epoch": 0.7645345155161495, "grad_norm": 3.4388513565063477, "learning_rate": 9.713216778027798e-06, "loss": 1.2327, "step": 3018 }, { "epoch": 0.7647878404053198, "grad_norm": 3.0279359817504883, "learning_rate": 9.712937037055241e-06, "loss": 1.1481, "step": 3019 }, { "epoch": 0.7650411652944902, "grad_norm": 3.2815046310424805, "learning_rate": 9.712657163745623e-06, "loss": 1.1834, "step": 3020 }, { "epoch": 0.7652944901836606, "grad_norm": 3.692152976989746, "learning_rate": 9.712377158106798e-06, "loss": 1.3233, "step": 3021 }, { "epoch": 0.7655478150728309, "grad_norm": 3.515498399734497, "learning_rate": 9.712097020146631e-06, "loss": 1.2536, "step": 3022 }, { "epoch": 0.7658011399620013, "grad_norm": 3.546562671661377, "learning_rate": 9.711816749872989e-06, "loss": 1.1545, "step": 3023 }, { "epoch": 0.7660544648511717, "grad_norm": 3.4301865100860596, "learning_rate": 9.711536347293742e-06, "loss": 1.1617, "step": 3024 }, { "epoch": 0.766307789740342, "grad_norm": 3.3776097297668457, "learning_rate": 9.711255812416762e-06, "loss": 1.158, "step": 3025 }, { "epoch": 0.7665611146295124, "grad_norm": 3.4715332984924316, "learning_rate": 9.710975145249925e-06, "loss": 1.0201, "step": 3026 }, { "epoch": 0.7668144395186827, "grad_norm": 3.1569130420684814, "learning_rate": 9.710694345801116e-06, "loss": 1.0537, "step": 3027 }, { "epoch": 0.7670677644078531, "grad_norm": 3.8341550827026367, "learning_rate": 9.710413414078218e-06, "loss": 1.2747, "step": 3028 }, { "epoch": 0.7673210892970235, "grad_norm": 4.095462799072266, "learning_rate": 9.710132350089117e-06, "loss": 1.2228, "step": 3029 }, { "epoch": 0.7675744141861938, "grad_norm": 3.852132797241211, "learning_rate": 9.709851153841708e-06, "loss": 1.2327, "step": 3030 }, { "epoch": 0.7678277390753642, "grad_norm": 3.2577106952667236, "learning_rate": 9.709569825343886e-06, "loss": 1.061, "step": 3031 }, { "epoch": 0.7680810639645345, "grad_norm": 3.1022679805755615, "learning_rate": 9.709288364603551e-06, "loss": 1.0671, "step": 3032 }, { "epoch": 0.7683343888537049, "grad_norm": 3.4080400466918945, "learning_rate": 9.709006771628605e-06, "loss": 1.0825, "step": 3033 }, { "epoch": 0.7685877137428753, "grad_norm": 3.4398090839385986, "learning_rate": 9.708725046426957e-06, "loss": 1.1026, "step": 3034 }, { "epoch": 0.7688410386320456, "grad_norm": 3.5645711421966553, "learning_rate": 9.708443189006516e-06, "loss": 1.0719, "step": 3035 }, { "epoch": 0.769094363521216, "grad_norm": 3.712003231048584, "learning_rate": 9.708161199375198e-06, "loss": 1.3359, "step": 3036 }, { "epoch": 0.7693476884103864, "grad_norm": 3.857830762863159, "learning_rate": 9.707879077540918e-06, "loss": 1.3231, "step": 3037 }, { "epoch": 0.7696010132995567, "grad_norm": 3.32967472076416, "learning_rate": 9.7075968235116e-06, "loss": 1.0123, "step": 3038 }, { "epoch": 0.7698543381887271, "grad_norm": 3.3485281467437744, "learning_rate": 9.70731443729517e-06, "loss": 1.1482, "step": 3039 }, { "epoch": 0.7701076630778974, "grad_norm": 3.2829980850219727, "learning_rate": 9.707031918899558e-06, "loss": 1.0776, "step": 3040 }, { "epoch": 0.7703609879670678, "grad_norm": 3.3828749656677246, "learning_rate": 9.706749268332694e-06, "loss": 1.2159, "step": 3041 }, { "epoch": 0.7706143128562382, "grad_norm": 3.7079780101776123, "learning_rate": 9.70646648560252e-06, "loss": 1.0764, "step": 3042 }, { "epoch": 0.7708676377454085, "grad_norm": 3.597647190093994, "learning_rate": 9.70618357071697e-06, "loss": 1.2618, "step": 3043 }, { "epoch": 0.7711209626345789, "grad_norm": 3.4452812671661377, "learning_rate": 9.705900523683991e-06, "loss": 1.1679, "step": 3044 }, { "epoch": 0.7713742875237493, "grad_norm": 3.8126072883605957, "learning_rate": 9.705617344511531e-06, "loss": 1.3734, "step": 3045 }, { "epoch": 0.7716276124129196, "grad_norm": 3.520598888397217, "learning_rate": 9.705334033207542e-06, "loss": 1.0812, "step": 3046 }, { "epoch": 0.77188093730209, "grad_norm": 3.625828981399536, "learning_rate": 9.705050589779979e-06, "loss": 1.2304, "step": 3047 }, { "epoch": 0.7721342621912602, "grad_norm": 3.4245150089263916, "learning_rate": 9.7047670142368e-06, "loss": 1.0356, "step": 3048 }, { "epoch": 0.7723875870804306, "grad_norm": 3.5724411010742188, "learning_rate": 9.704483306585967e-06, "loss": 1.1305, "step": 3049 }, { "epoch": 0.772640911969601, "grad_norm": 3.7136075496673584, "learning_rate": 9.70419946683545e-06, "loss": 1.2277, "step": 3050 }, { "epoch": 0.7728942368587713, "grad_norm": 3.5142602920532227, "learning_rate": 9.703915494993215e-06, "loss": 1.2297, "step": 3051 }, { "epoch": 0.7731475617479417, "grad_norm": 3.6985182762145996, "learning_rate": 9.703631391067239e-06, "loss": 1.1701, "step": 3052 }, { "epoch": 0.7734008866371122, "grad_norm": 3.6202380657196045, "learning_rate": 9.703347155065496e-06, "loss": 1.1452, "step": 3053 }, { "epoch": 0.7736542115262824, "grad_norm": 3.7811033725738525, "learning_rate": 9.703062786995972e-06, "loss": 1.2575, "step": 3054 }, { "epoch": 0.7739075364154528, "grad_norm": 3.7027955055236816, "learning_rate": 9.702778286866647e-06, "loss": 1.1537, "step": 3055 }, { "epoch": 0.7741608613046231, "grad_norm": 3.3426177501678467, "learning_rate": 9.702493654685512e-06, "loss": 1.2052, "step": 3056 }, { "epoch": 0.7744141861937935, "grad_norm": 3.939532518386841, "learning_rate": 9.702208890460559e-06, "loss": 1.22, "step": 3057 }, { "epoch": 0.7746675110829639, "grad_norm": 3.596379280090332, "learning_rate": 9.701923994199784e-06, "loss": 1.1168, "step": 3058 }, { "epoch": 0.7749208359721342, "grad_norm": 3.7136929035186768, "learning_rate": 9.701638965911188e-06, "loss": 1.2842, "step": 3059 }, { "epoch": 0.7751741608613046, "grad_norm": 3.8441104888916016, "learning_rate": 9.701353805602773e-06, "loss": 1.3659, "step": 3060 }, { "epoch": 0.7754274857504749, "grad_norm": 3.539217472076416, "learning_rate": 9.701068513282547e-06, "loss": 1.1411, "step": 3061 }, { "epoch": 0.7756808106396453, "grad_norm": 3.299401044845581, "learning_rate": 9.70078308895852e-06, "loss": 1.1182, "step": 3062 }, { "epoch": 0.7759341355288157, "grad_norm": 3.4412131309509277, "learning_rate": 9.700497532638707e-06, "loss": 1.1753, "step": 3063 }, { "epoch": 0.776187460417986, "grad_norm": 3.202305793762207, "learning_rate": 9.700211844331126e-06, "loss": 1.128, "step": 3064 }, { "epoch": 0.7764407853071564, "grad_norm": 3.5436625480651855, "learning_rate": 9.6999260240438e-06, "loss": 1.2065, "step": 3065 }, { "epoch": 0.7766941101963268, "grad_norm": 3.8974997997283936, "learning_rate": 9.699640071784752e-06, "loss": 1.1363, "step": 3066 }, { "epoch": 0.7769474350854971, "grad_norm": 3.397515058517456, "learning_rate": 9.699353987562017e-06, "loss": 1.0489, "step": 3067 }, { "epoch": 0.7772007599746675, "grad_norm": 3.2989842891693115, "learning_rate": 9.699067771383621e-06, "loss": 1.2765, "step": 3068 }, { "epoch": 0.7774540848638378, "grad_norm": 3.317469596862793, "learning_rate": 9.698781423257606e-06, "loss": 1.1563, "step": 3069 }, { "epoch": 0.7777074097530082, "grad_norm": 3.4486303329467773, "learning_rate": 9.698494943192011e-06, "loss": 1.265, "step": 3070 }, { "epoch": 0.7779607346421786, "grad_norm": 3.5988612174987793, "learning_rate": 9.69820833119488e-06, "loss": 1.1572, "step": 3071 }, { "epoch": 0.7782140595313489, "grad_norm": 3.2691543102264404, "learning_rate": 9.697921587274261e-06, "loss": 1.1893, "step": 3072 }, { "epoch": 0.7784673844205193, "grad_norm": 3.7137327194213867, "learning_rate": 9.697634711438205e-06, "loss": 1.1896, "step": 3073 }, { "epoch": 0.7787207093096897, "grad_norm": 3.6938259601593018, "learning_rate": 9.697347703694769e-06, "loss": 1.1371, "step": 3074 }, { "epoch": 0.77897403419886, "grad_norm": 3.327613353729248, "learning_rate": 9.697060564052009e-06, "loss": 1.2833, "step": 3075 }, { "epoch": 0.7792273590880304, "grad_norm": 3.5304393768310547, "learning_rate": 9.696773292517991e-06, "loss": 1.152, "step": 3076 }, { "epoch": 0.7794806839772007, "grad_norm": 3.6134355068206787, "learning_rate": 9.696485889100781e-06, "loss": 1.2214, "step": 3077 }, { "epoch": 0.7797340088663711, "grad_norm": 3.2280755043029785, "learning_rate": 9.696198353808449e-06, "loss": 1.1215, "step": 3078 }, { "epoch": 0.7799873337555415, "grad_norm": 3.1979382038116455, "learning_rate": 9.695910686649067e-06, "loss": 1.1125, "step": 3079 }, { "epoch": 0.7802406586447118, "grad_norm": 3.7828009128570557, "learning_rate": 9.695622887630714e-06, "loss": 1.2805, "step": 3080 }, { "epoch": 0.7804939835338822, "grad_norm": 3.492016077041626, "learning_rate": 9.69533495676147e-06, "loss": 1.2535, "step": 3081 }, { "epoch": 0.7807473084230525, "grad_norm": 3.493319034576416, "learning_rate": 9.695046894049422e-06, "loss": 1.1868, "step": 3082 }, { "epoch": 0.7810006333122229, "grad_norm": 3.061150312423706, "learning_rate": 9.694758699502658e-06, "loss": 1.0062, "step": 3083 }, { "epoch": 0.7812539582013933, "grad_norm": 3.4540719985961914, "learning_rate": 9.694470373129268e-06, "loss": 1.1865, "step": 3084 }, { "epoch": 0.7815072830905636, "grad_norm": 3.6647727489471436, "learning_rate": 9.694181914937353e-06, "loss": 1.2265, "step": 3085 }, { "epoch": 0.781760607979734, "grad_norm": 3.4149343967437744, "learning_rate": 9.693893324935008e-06, "loss": 1.2191, "step": 3086 }, { "epoch": 0.7820139328689044, "grad_norm": 3.4975831508636475, "learning_rate": 9.69360460313034e-06, "loss": 1.1281, "step": 3087 }, { "epoch": 0.7822672577580747, "grad_norm": 3.4812533855438232, "learning_rate": 9.693315749531452e-06, "loss": 1.0671, "step": 3088 }, { "epoch": 0.7825205826472451, "grad_norm": 3.6105175018310547, "learning_rate": 9.69302676414646e-06, "loss": 1.2766, "step": 3089 }, { "epoch": 0.7827739075364154, "grad_norm": 3.2064576148986816, "learning_rate": 9.692737646983475e-06, "loss": 1.2355, "step": 3090 }, { "epoch": 0.7830272324255858, "grad_norm": 3.4227678775787354, "learning_rate": 9.692448398050616e-06, "loss": 1.1434, "step": 3091 }, { "epoch": 0.7832805573147562, "grad_norm": 3.613091468811035, "learning_rate": 9.692159017356005e-06, "loss": 1.2833, "step": 3092 }, { "epoch": 0.7835338822039265, "grad_norm": 3.376112937927246, "learning_rate": 9.691869504907768e-06, "loss": 1.3671, "step": 3093 }, { "epoch": 0.7837872070930969, "grad_norm": 3.616713047027588, "learning_rate": 9.691579860714033e-06, "loss": 1.1647, "step": 3094 }, { "epoch": 0.7840405319822673, "grad_norm": 3.1492090225219727, "learning_rate": 9.691290084782935e-06, "loss": 0.9979, "step": 3095 }, { "epoch": 0.7842938568714376, "grad_norm": 3.5917351245880127, "learning_rate": 9.691000177122613e-06, "loss": 1.2316, "step": 3096 }, { "epoch": 0.784547181760608, "grad_norm": 3.565091609954834, "learning_rate": 9.690710137741202e-06, "loss": 1.2166, "step": 3097 }, { "epoch": 0.7848005066497783, "grad_norm": 3.124160051345825, "learning_rate": 9.690419966646849e-06, "loss": 1.0655, "step": 3098 }, { "epoch": 0.7850538315389487, "grad_norm": 3.484919309616089, "learning_rate": 9.690129663847703e-06, "loss": 1.1891, "step": 3099 }, { "epoch": 0.7853071564281191, "grad_norm": 3.617344856262207, "learning_rate": 9.689839229351912e-06, "loss": 1.1958, "step": 3100 }, { "epoch": 0.7855604813172894, "grad_norm": 3.445941686630249, "learning_rate": 9.689548663167636e-06, "loss": 1.2537, "step": 3101 }, { "epoch": 0.7858138062064598, "grad_norm": 3.6886892318725586, "learning_rate": 9.689257965303029e-06, "loss": 1.3261, "step": 3102 }, { "epoch": 0.7860671310956302, "grad_norm": 3.6095004081726074, "learning_rate": 9.688967135766257e-06, "loss": 1.1573, "step": 3103 }, { "epoch": 0.7863204559848005, "grad_norm": 3.524705171585083, "learning_rate": 9.688676174565486e-06, "loss": 1.1003, "step": 3104 }, { "epoch": 0.7865737808739709, "grad_norm": 3.440295934677124, "learning_rate": 9.688385081708883e-06, "loss": 1.2146, "step": 3105 }, { "epoch": 0.7868271057631412, "grad_norm": 3.291731119155884, "learning_rate": 9.688093857204628e-06, "loss": 1.0812, "step": 3106 }, { "epoch": 0.7870804306523116, "grad_norm": 3.2207679748535156, "learning_rate": 9.687802501060893e-06, "loss": 0.9835, "step": 3107 }, { "epoch": 0.787333755541482, "grad_norm": 3.5070319175720215, "learning_rate": 9.687511013285863e-06, "loss": 1.2079, "step": 3108 }, { "epoch": 0.7875870804306523, "grad_norm": 3.92917799949646, "learning_rate": 9.687219393887716e-06, "loss": 1.2231, "step": 3109 }, { "epoch": 0.7878404053198227, "grad_norm": 3.5776121616363525, "learning_rate": 9.686927642874648e-06, "loss": 1.1356, "step": 3110 }, { "epoch": 0.788093730208993, "grad_norm": 3.3320472240448, "learning_rate": 9.68663576025485e-06, "loss": 1.1073, "step": 3111 }, { "epoch": 0.7883470550981634, "grad_norm": 3.826519012451172, "learning_rate": 9.686343746036513e-06, "loss": 1.3251, "step": 3112 }, { "epoch": 0.7886003799873338, "grad_norm": 3.5765302181243896, "learning_rate": 9.686051600227841e-06, "loss": 1.1568, "step": 3113 }, { "epoch": 0.7888537048765041, "grad_norm": 3.455152988433838, "learning_rate": 9.685759322837039e-06, "loss": 1.2745, "step": 3114 }, { "epoch": 0.7891070297656745, "grad_norm": 3.5755326747894287, "learning_rate": 9.685466913872308e-06, "loss": 1.0886, "step": 3115 }, { "epoch": 0.7893603546548449, "grad_norm": 3.298093795776367, "learning_rate": 9.685174373341864e-06, "loss": 1.2409, "step": 3116 }, { "epoch": 0.7896136795440152, "grad_norm": 3.254582643508911, "learning_rate": 9.684881701253917e-06, "loss": 1.1229, "step": 3117 }, { "epoch": 0.7898670044331856, "grad_norm": 3.6387033462524414, "learning_rate": 9.684588897616689e-06, "loss": 1.1904, "step": 3118 }, { "epoch": 0.7901203293223559, "grad_norm": 3.4219889640808105, "learning_rate": 9.6842959624384e-06, "loss": 1.2364, "step": 3119 }, { "epoch": 0.7903736542115263, "grad_norm": 3.6625919342041016, "learning_rate": 9.684002895727279e-06, "loss": 1.2403, "step": 3120 }, { "epoch": 0.7906269791006967, "grad_norm": 3.738839864730835, "learning_rate": 9.683709697491549e-06, "loss": 1.1652, "step": 3121 }, { "epoch": 0.790880303989867, "grad_norm": 3.450275421142578, "learning_rate": 9.683416367739444e-06, "loss": 1.0929, "step": 3122 }, { "epoch": 0.7911336288790374, "grad_norm": 3.7725143432617188, "learning_rate": 9.683122906479206e-06, "loss": 1.2139, "step": 3123 }, { "epoch": 0.7913869537682078, "grad_norm": 3.659973621368408, "learning_rate": 9.68282931371907e-06, "loss": 1.2173, "step": 3124 }, { "epoch": 0.7916402786573781, "grad_norm": 3.4902517795562744, "learning_rate": 9.68253558946728e-06, "loss": 1.2356, "step": 3125 }, { "epoch": 0.7918936035465485, "grad_norm": 3.636298418045044, "learning_rate": 9.682241733732088e-06, "loss": 1.2319, "step": 3126 }, { "epoch": 0.7921469284357188, "grad_norm": 3.2394468784332275, "learning_rate": 9.681947746521742e-06, "loss": 0.9985, "step": 3127 }, { "epoch": 0.7924002533248892, "grad_norm": 3.2394330501556396, "learning_rate": 9.681653627844497e-06, "loss": 1.1402, "step": 3128 }, { "epoch": 0.7926535782140596, "grad_norm": 3.2934374809265137, "learning_rate": 9.681359377708613e-06, "loss": 1.0669, "step": 3129 }, { "epoch": 0.7929069031032299, "grad_norm": 3.6023757457733154, "learning_rate": 9.681064996122351e-06, "loss": 1.1956, "step": 3130 }, { "epoch": 0.7931602279924003, "grad_norm": 3.362863302230835, "learning_rate": 9.680770483093978e-06, "loss": 1.1133, "step": 3131 }, { "epoch": 0.7934135528815706, "grad_norm": 3.4116978645324707, "learning_rate": 9.680475838631764e-06, "loss": 1.3068, "step": 3132 }, { "epoch": 0.793666877770741, "grad_norm": 3.557642936706543, "learning_rate": 9.680181062743981e-06, "loss": 1.2215, "step": 3133 }, { "epoch": 0.7939202026599114, "grad_norm": 3.188054323196411, "learning_rate": 9.67988615543891e-06, "loss": 1.1557, "step": 3134 }, { "epoch": 0.7941735275490817, "grad_norm": 3.318195104598999, "learning_rate": 9.679591116724826e-06, "loss": 1.132, "step": 3135 }, { "epoch": 0.7944268524382521, "grad_norm": 3.3555331230163574, "learning_rate": 9.679295946610017e-06, "loss": 1.1461, "step": 3136 }, { "epoch": 0.7946801773274225, "grad_norm": 3.305663585662842, "learning_rate": 9.679000645102771e-06, "loss": 1.2936, "step": 3137 }, { "epoch": 0.7949335022165928, "grad_norm": 3.2727150917053223, "learning_rate": 9.67870521221138e-06, "loss": 1.0754, "step": 3138 }, { "epoch": 0.7951868271057632, "grad_norm": 3.497584581375122, "learning_rate": 9.678409647944141e-06, "loss": 1.1606, "step": 3139 }, { "epoch": 0.7954401519949335, "grad_norm": 3.5710928440093994, "learning_rate": 9.678113952309351e-06, "loss": 1.2395, "step": 3140 }, { "epoch": 0.7956934768841039, "grad_norm": 3.847907304763794, "learning_rate": 9.677818125315314e-06, "loss": 1.2105, "step": 3141 }, { "epoch": 0.7959468017732743, "grad_norm": 3.30964732170105, "learning_rate": 9.677522166970335e-06, "loss": 1.1297, "step": 3142 }, { "epoch": 0.7962001266624446, "grad_norm": 3.6384894847869873, "learning_rate": 9.677226077282728e-06, "loss": 1.2604, "step": 3143 }, { "epoch": 0.796453451551615, "grad_norm": 3.5480246543884277, "learning_rate": 9.676929856260803e-06, "loss": 1.2013, "step": 3144 }, { "epoch": 0.7967067764407854, "grad_norm": 3.394646167755127, "learning_rate": 9.67663350391288e-06, "loss": 1.0893, "step": 3145 }, { "epoch": 0.7969601013299557, "grad_norm": 3.6400444507598877, "learning_rate": 9.67633702024728e-06, "loss": 1.1384, "step": 3146 }, { "epoch": 0.797213426219126, "grad_norm": 3.373098850250244, "learning_rate": 9.676040405272329e-06, "loss": 1.2476, "step": 3147 }, { "epoch": 0.7974667511082963, "grad_norm": 3.505863666534424, "learning_rate": 9.675743658996353e-06, "loss": 1.1156, "step": 3148 }, { "epoch": 0.7977200759974667, "grad_norm": 3.760483980178833, "learning_rate": 9.675446781427689e-06, "loss": 1.3028, "step": 3149 }, { "epoch": 0.7979734008866372, "grad_norm": 3.562391519546509, "learning_rate": 9.675149772574669e-06, "loss": 1.2152, "step": 3150 }, { "epoch": 0.7982267257758074, "grad_norm": 3.8508145809173584, "learning_rate": 9.674852632445635e-06, "loss": 1.2636, "step": 3151 }, { "epoch": 0.7984800506649778, "grad_norm": 3.880608081817627, "learning_rate": 9.674555361048931e-06, "loss": 1.1694, "step": 3152 }, { "epoch": 0.7987333755541483, "grad_norm": 3.732882022857666, "learning_rate": 9.674257958392901e-06, "loss": 1.1474, "step": 3153 }, { "epoch": 0.7989867004433185, "grad_norm": 3.0603837966918945, "learning_rate": 9.6739604244859e-06, "loss": 1.034, "step": 3154 }, { "epoch": 0.799240025332489, "grad_norm": 3.607832431793213, "learning_rate": 9.67366275933628e-06, "loss": 1.2098, "step": 3155 }, { "epoch": 0.7994933502216592, "grad_norm": 3.4668948650360107, "learning_rate": 9.673364962952402e-06, "loss": 1.0646, "step": 3156 }, { "epoch": 0.7997466751108296, "grad_norm": 3.265519142150879, "learning_rate": 9.673067035342625e-06, "loss": 1.2244, "step": 3157 }, { "epoch": 0.8, "grad_norm": 3.534250020980835, "learning_rate": 9.672768976515314e-06, "loss": 1.1848, "step": 3158 }, { "epoch": 0.8002533248891703, "grad_norm": 3.676901340484619, "learning_rate": 9.672470786478842e-06, "loss": 1.1528, "step": 3159 }, { "epoch": 0.8005066497783407, "grad_norm": 3.2827255725860596, "learning_rate": 9.672172465241581e-06, "loss": 1.1754, "step": 3160 }, { "epoch": 0.800759974667511, "grad_norm": 3.2366580963134766, "learning_rate": 9.671874012811905e-06, "loss": 1.0439, "step": 3161 }, { "epoch": 0.8010132995566814, "grad_norm": 3.3683462142944336, "learning_rate": 9.671575429198198e-06, "loss": 1.0951, "step": 3162 }, { "epoch": 0.8012666244458518, "grad_norm": 3.380063772201538, "learning_rate": 9.671276714408841e-06, "loss": 1.3027, "step": 3163 }, { "epoch": 0.8015199493350221, "grad_norm": 3.5820157527923584, "learning_rate": 9.670977868452225e-06, "loss": 1.2704, "step": 3164 }, { "epoch": 0.8017732742241925, "grad_norm": 3.5308854579925537, "learning_rate": 9.670678891336738e-06, "loss": 1.1675, "step": 3165 }, { "epoch": 0.8020265991133629, "grad_norm": 3.722160577774048, "learning_rate": 9.670379783070776e-06, "loss": 1.2754, "step": 3166 }, { "epoch": 0.8022799240025332, "grad_norm": 3.3937900066375732, "learning_rate": 9.670080543662742e-06, "loss": 1.1771, "step": 3167 }, { "epoch": 0.8025332488917036, "grad_norm": 3.163614511489868, "learning_rate": 9.66978117312103e-06, "loss": 1.0594, "step": 3168 }, { "epoch": 0.8027865737808739, "grad_norm": 3.295989990234375, "learning_rate": 9.669481671454055e-06, "loss": 1.0515, "step": 3169 }, { "epoch": 0.8030398986700443, "grad_norm": 3.417457103729248, "learning_rate": 9.669182038670223e-06, "loss": 1.1258, "step": 3170 }, { "epoch": 0.8032932235592147, "grad_norm": 3.2533841133117676, "learning_rate": 9.668882274777946e-06, "loss": 1.1411, "step": 3171 }, { "epoch": 0.803546548448385, "grad_norm": 3.379027843475342, "learning_rate": 9.668582379785645e-06, "loss": 1.241, "step": 3172 }, { "epoch": 0.8037998733375554, "grad_norm": 3.3672876358032227, "learning_rate": 9.668282353701737e-06, "loss": 1.2533, "step": 3173 }, { "epoch": 0.8040531982267258, "grad_norm": 3.446272373199463, "learning_rate": 9.667982196534651e-06, "loss": 1.1312, "step": 3174 }, { "epoch": 0.8043065231158961, "grad_norm": 3.0003163814544678, "learning_rate": 9.66768190829281e-06, "loss": 1.0172, "step": 3175 }, { "epoch": 0.8045598480050665, "grad_norm": 3.13234281539917, "learning_rate": 9.66738148898465e-06, "loss": 1.2439, "step": 3176 }, { "epoch": 0.8048131728942368, "grad_norm": 3.442028045654297, "learning_rate": 9.667080938618605e-06, "loss": 1.2135, "step": 3177 }, { "epoch": 0.8050664977834072, "grad_norm": 3.6103761196136475, "learning_rate": 9.666780257203114e-06, "loss": 1.1319, "step": 3178 }, { "epoch": 0.8053198226725776, "grad_norm": 3.420414686203003, "learning_rate": 9.666479444746622e-06, "loss": 1.1977, "step": 3179 }, { "epoch": 0.8055731475617479, "grad_norm": 3.5578372478485107, "learning_rate": 9.666178501257573e-06, "loss": 1.0822, "step": 3180 }, { "epoch": 0.8058264724509183, "grad_norm": 3.520909547805786, "learning_rate": 9.665877426744418e-06, "loss": 1.2723, "step": 3181 }, { "epoch": 0.8060797973400886, "grad_norm": 3.3848018646240234, "learning_rate": 9.665576221215614e-06, "loss": 1.0662, "step": 3182 }, { "epoch": 0.806333122229259, "grad_norm": 3.620361804962158, "learning_rate": 9.665274884679614e-06, "loss": 1.1763, "step": 3183 }, { "epoch": 0.8065864471184294, "grad_norm": 3.8704168796539307, "learning_rate": 9.664973417144883e-06, "loss": 1.3097, "step": 3184 }, { "epoch": 0.8068397720075997, "grad_norm": 3.535381317138672, "learning_rate": 9.664671818619884e-06, "loss": 1.1958, "step": 3185 }, { "epoch": 0.8070930968967701, "grad_norm": 3.782193422317505, "learning_rate": 9.664370089113087e-06, "loss": 1.2697, "step": 3186 }, { "epoch": 0.8073464217859405, "grad_norm": 3.414486885070801, "learning_rate": 9.664068228632963e-06, "loss": 1.1252, "step": 3187 }, { "epoch": 0.8075997466751108, "grad_norm": 3.460775852203369, "learning_rate": 9.663766237187991e-06, "loss": 1.1037, "step": 3188 }, { "epoch": 0.8078530715642812, "grad_norm": 3.7717158794403076, "learning_rate": 9.663464114786649e-06, "loss": 1.2827, "step": 3189 }, { "epoch": 0.8081063964534515, "grad_norm": 3.7486562728881836, "learning_rate": 9.66316186143742e-06, "loss": 1.2179, "step": 3190 }, { "epoch": 0.8083597213426219, "grad_norm": 3.182420015335083, "learning_rate": 9.662859477148789e-06, "loss": 0.9427, "step": 3191 }, { "epoch": 0.8086130462317923, "grad_norm": 3.69174861907959, "learning_rate": 9.66255696192925e-06, "loss": 1.2672, "step": 3192 }, { "epoch": 0.8088663711209626, "grad_norm": 3.5637874603271484, "learning_rate": 9.6622543157873e-06, "loss": 1.2077, "step": 3193 }, { "epoch": 0.809119696010133, "grad_norm": 3.373786211013794, "learning_rate": 9.661951538731431e-06, "loss": 1.1181, "step": 3194 }, { "epoch": 0.8093730208993034, "grad_norm": 3.3571665287017822, "learning_rate": 9.661648630770149e-06, "loss": 1.1348, "step": 3195 }, { "epoch": 0.8096263457884737, "grad_norm": 3.5804684162139893, "learning_rate": 9.661345591911956e-06, "loss": 1.13, "step": 3196 }, { "epoch": 0.8098796706776441, "grad_norm": 3.5872457027435303, "learning_rate": 9.661042422165366e-06, "loss": 1.271, "step": 3197 }, { "epoch": 0.8101329955668144, "grad_norm": 4.017947196960449, "learning_rate": 9.66073912153889e-06, "loss": 1.2686, "step": 3198 }, { "epoch": 0.8103863204559848, "grad_norm": 4.318078994750977, "learning_rate": 9.66043569004104e-06, "loss": 1.2008, "step": 3199 }, { "epoch": 0.8106396453451552, "grad_norm": 3.1133437156677246, "learning_rate": 9.660132127680344e-06, "loss": 1.1175, "step": 3200 }, { "epoch": 0.8108929702343255, "grad_norm": 3.359477996826172, "learning_rate": 9.65982843446532e-06, "loss": 1.1499, "step": 3201 }, { "epoch": 0.8111462951234959, "grad_norm": 3.635134696960449, "learning_rate": 9.659524610404499e-06, "loss": 1.1756, "step": 3202 }, { "epoch": 0.8113996200126662, "grad_norm": 3.313199281692505, "learning_rate": 9.659220655506408e-06, "loss": 1.141, "step": 3203 }, { "epoch": 0.8116529449018366, "grad_norm": 3.8395190238952637, "learning_rate": 9.658916569779586e-06, "loss": 1.275, "step": 3204 }, { "epoch": 0.811906269791007, "grad_norm": 3.571878433227539, "learning_rate": 9.65861235323257e-06, "loss": 1.2219, "step": 3205 }, { "epoch": 0.8121595946801773, "grad_norm": 3.641077995300293, "learning_rate": 9.658308005873905e-06, "loss": 1.2264, "step": 3206 }, { "epoch": 0.8124129195693477, "grad_norm": 3.2891337871551514, "learning_rate": 9.658003527712132e-06, "loss": 1.1374, "step": 3207 }, { "epoch": 0.8126662444585181, "grad_norm": 3.634793996810913, "learning_rate": 9.657698918755803e-06, "loss": 1.2506, "step": 3208 }, { "epoch": 0.8129195693476884, "grad_norm": 3.4935338497161865, "learning_rate": 9.657394179013471e-06, "loss": 1.2953, "step": 3209 }, { "epoch": 0.8131728942368588, "grad_norm": 3.2657690048217773, "learning_rate": 9.657089308493695e-06, "loss": 1.0885, "step": 3210 }, { "epoch": 0.8134262191260291, "grad_norm": 3.4446094036102295, "learning_rate": 9.656784307205033e-06, "loss": 1.3055, "step": 3211 }, { "epoch": 0.8136795440151995, "grad_norm": 3.265702962875366, "learning_rate": 9.65647917515605e-06, "loss": 0.9897, "step": 3212 }, { "epoch": 0.8139328689043699, "grad_norm": 3.2030014991760254, "learning_rate": 9.656173912355314e-06, "loss": 1.1032, "step": 3213 }, { "epoch": 0.8141861937935402, "grad_norm": 3.6671595573425293, "learning_rate": 9.655868518811398e-06, "loss": 1.2168, "step": 3214 }, { "epoch": 0.8144395186827106, "grad_norm": 3.2315893173217773, "learning_rate": 9.655562994532875e-06, "loss": 1.0976, "step": 3215 }, { "epoch": 0.814692843571881, "grad_norm": 3.3605079650878906, "learning_rate": 9.655257339528325e-06, "loss": 1.1803, "step": 3216 }, { "epoch": 0.8149461684610513, "grad_norm": 3.871366500854492, "learning_rate": 9.65495155380633e-06, "loss": 1.3661, "step": 3217 }, { "epoch": 0.8151994933502217, "grad_norm": 3.7332444190979004, "learning_rate": 9.65464563737548e-06, "loss": 1.1392, "step": 3218 }, { "epoch": 0.815452818239392, "grad_norm": 4.088546276092529, "learning_rate": 9.654339590244359e-06, "loss": 1.3065, "step": 3219 }, { "epoch": 0.8157061431285624, "grad_norm": 3.6960456371307373, "learning_rate": 9.654033412421565e-06, "loss": 1.1731, "step": 3220 }, { "epoch": 0.8159594680177328, "grad_norm": 3.8195888996124268, "learning_rate": 9.653727103915694e-06, "loss": 1.0466, "step": 3221 }, { "epoch": 0.8162127929069031, "grad_norm": 3.41788649559021, "learning_rate": 9.653420664735348e-06, "loss": 1.1442, "step": 3222 }, { "epoch": 0.8164661177960735, "grad_norm": 3.5325682163238525, "learning_rate": 9.653114094889128e-06, "loss": 1.1169, "step": 3223 }, { "epoch": 0.8167194426852439, "grad_norm": 3.577907085418701, "learning_rate": 9.652807394385646e-06, "loss": 1.1823, "step": 3224 }, { "epoch": 0.8169727675744142, "grad_norm": 3.28833270072937, "learning_rate": 9.652500563233513e-06, "loss": 1.1912, "step": 3225 }, { "epoch": 0.8172260924635846, "grad_norm": 3.2663886547088623, "learning_rate": 9.652193601441346e-06, "loss": 1.2565, "step": 3226 }, { "epoch": 0.8174794173527549, "grad_norm": 3.691718101501465, "learning_rate": 9.651886509017763e-06, "loss": 1.2236, "step": 3227 }, { "epoch": 0.8177327422419253, "grad_norm": 3.51438570022583, "learning_rate": 9.651579285971386e-06, "loss": 1.232, "step": 3228 }, { "epoch": 0.8179860671310957, "grad_norm": 3.36354660987854, "learning_rate": 9.651271932310843e-06, "loss": 1.1005, "step": 3229 }, { "epoch": 0.818239392020266, "grad_norm": 2.835439443588257, "learning_rate": 9.650964448044763e-06, "loss": 1.1692, "step": 3230 }, { "epoch": 0.8184927169094364, "grad_norm": 3.549734354019165, "learning_rate": 9.650656833181784e-06, "loss": 1.1792, "step": 3231 }, { "epoch": 0.8187460417986067, "grad_norm": 3.8005549907684326, "learning_rate": 9.650349087730538e-06, "loss": 1.0825, "step": 3232 }, { "epoch": 0.8189993666877771, "grad_norm": 3.6286110877990723, "learning_rate": 9.65004121169967e-06, "loss": 1.219, "step": 3233 }, { "epoch": 0.8192526915769475, "grad_norm": 3.3073770999908447, "learning_rate": 9.649733205097824e-06, "loss": 1.0917, "step": 3234 }, { "epoch": 0.8195060164661178, "grad_norm": 3.6399085521698, "learning_rate": 9.64942506793365e-06, "loss": 1.3087, "step": 3235 }, { "epoch": 0.8197593413552882, "grad_norm": 3.6974353790283203, "learning_rate": 9.649116800215799e-06, "loss": 1.1865, "step": 3236 }, { "epoch": 0.8200126662444586, "grad_norm": 3.6860082149505615, "learning_rate": 9.648808401952926e-06, "loss": 1.239, "step": 3237 }, { "epoch": 0.8202659911336289, "grad_norm": 3.564912796020508, "learning_rate": 9.648499873153691e-06, "loss": 1.1653, "step": 3238 }, { "epoch": 0.8205193160227993, "grad_norm": 3.8110899925231934, "learning_rate": 9.648191213826761e-06, "loss": 1.1491, "step": 3239 }, { "epoch": 0.8207726409119696, "grad_norm": 3.462819814682007, "learning_rate": 9.647882423980799e-06, "loss": 1.2072, "step": 3240 }, { "epoch": 0.82102596580114, "grad_norm": 3.7578890323638916, "learning_rate": 9.647573503624477e-06, "loss": 1.2355, "step": 3241 }, { "epoch": 0.8212792906903104, "grad_norm": 3.286555051803589, "learning_rate": 9.647264452766468e-06, "loss": 1.1327, "step": 3242 }, { "epoch": 0.8215326155794807, "grad_norm": 3.6600263118743896, "learning_rate": 9.646955271415453e-06, "loss": 1.2181, "step": 3243 }, { "epoch": 0.821785940468651, "grad_norm": 3.9920105934143066, "learning_rate": 9.64664595958011e-06, "loss": 1.3502, "step": 3244 }, { "epoch": 0.8220392653578215, "grad_norm": 3.3619606494903564, "learning_rate": 9.646336517269127e-06, "loss": 1.2424, "step": 3245 }, { "epoch": 0.8222925902469918, "grad_norm": 3.620398759841919, "learning_rate": 9.646026944491194e-06, "loss": 1.1896, "step": 3246 }, { "epoch": 0.8225459151361622, "grad_norm": 3.2635204792022705, "learning_rate": 9.645717241255e-06, "loss": 1.1473, "step": 3247 }, { "epoch": 0.8227992400253324, "grad_norm": 3.320261001586914, "learning_rate": 9.645407407569244e-06, "loss": 1.0127, "step": 3248 }, { "epoch": 0.8230525649145028, "grad_norm": 3.4120755195617676, "learning_rate": 9.645097443442624e-06, "loss": 1.0719, "step": 3249 }, { "epoch": 0.8233058898036733, "grad_norm": 3.1058967113494873, "learning_rate": 9.644787348883846e-06, "loss": 1.1065, "step": 3250 }, { "epoch": 0.8235592146928435, "grad_norm": 3.425712823867798, "learning_rate": 9.644477123901617e-06, "loss": 1.034, "step": 3251 }, { "epoch": 0.823812539582014, "grad_norm": 3.5326826572418213, "learning_rate": 9.644166768504645e-06, "loss": 1.1458, "step": 3252 }, { "epoch": 0.8240658644711842, "grad_norm": 3.7466468811035156, "learning_rate": 9.643856282701646e-06, "loss": 1.3668, "step": 3253 }, { "epoch": 0.8243191893603546, "grad_norm": 3.811091184616089, "learning_rate": 9.643545666501342e-06, "loss": 1.2014, "step": 3254 }, { "epoch": 0.824572514249525, "grad_norm": 3.382639169692993, "learning_rate": 9.64323491991245e-06, "loss": 1.0997, "step": 3255 }, { "epoch": 0.8248258391386953, "grad_norm": 3.555203437805176, "learning_rate": 9.642924042943699e-06, "loss": 1.2725, "step": 3256 }, { "epoch": 0.8250791640278657, "grad_norm": 3.4761922359466553, "learning_rate": 9.642613035603816e-06, "loss": 1.1242, "step": 3257 }, { "epoch": 0.8253324889170361, "grad_norm": 3.351353168487549, "learning_rate": 9.642301897901537e-06, "loss": 1.0991, "step": 3258 }, { "epoch": 0.8255858138062064, "grad_norm": 3.5288312435150146, "learning_rate": 9.641990629845593e-06, "loss": 1.1853, "step": 3259 }, { "epoch": 0.8258391386953768, "grad_norm": 3.377979040145874, "learning_rate": 9.64167923144473e-06, "loss": 1.1463, "step": 3260 }, { "epoch": 0.8260924635845471, "grad_norm": 3.269613265991211, "learning_rate": 9.64136770270769e-06, "loss": 1.1406, "step": 3261 }, { "epoch": 0.8263457884737175, "grad_norm": 3.4501872062683105, "learning_rate": 9.641056043643218e-06, "loss": 1.1931, "step": 3262 }, { "epoch": 0.8265991133628879, "grad_norm": 3.291609764099121, "learning_rate": 9.640744254260068e-06, "loss": 1.082, "step": 3263 }, { "epoch": 0.8268524382520582, "grad_norm": 3.530622720718384, "learning_rate": 9.640432334566995e-06, "loss": 1.1391, "step": 3264 }, { "epoch": 0.8271057631412286, "grad_norm": 3.626964807510376, "learning_rate": 9.640120284572757e-06, "loss": 1.1773, "step": 3265 }, { "epoch": 0.827359088030399, "grad_norm": 3.473663091659546, "learning_rate": 9.639808104286118e-06, "loss": 1.234, "step": 3266 }, { "epoch": 0.8276124129195693, "grad_norm": 3.2221481800079346, "learning_rate": 9.639495793715838e-06, "loss": 0.9925, "step": 3267 }, { "epoch": 0.8278657378087397, "grad_norm": 3.36942458152771, "learning_rate": 9.639183352870693e-06, "loss": 1.1158, "step": 3268 }, { "epoch": 0.82811906269791, "grad_norm": 3.7078704833984375, "learning_rate": 9.638870781759453e-06, "loss": 1.1496, "step": 3269 }, { "epoch": 0.8283723875870804, "grad_norm": 3.411898374557495, "learning_rate": 9.638558080390895e-06, "loss": 1.1096, "step": 3270 }, { "epoch": 0.8286257124762508, "grad_norm": 3.7567858695983887, "learning_rate": 9.638245248773804e-06, "loss": 1.2055, "step": 3271 }, { "epoch": 0.8288790373654211, "grad_norm": 3.509984016418457, "learning_rate": 9.637932286916955e-06, "loss": 1.1556, "step": 3272 }, { "epoch": 0.8291323622545915, "grad_norm": 3.6461474895477295, "learning_rate": 9.637619194829144e-06, "loss": 1.2767, "step": 3273 }, { "epoch": 0.8293856871437619, "grad_norm": 3.354914665222168, "learning_rate": 9.63730597251916e-06, "loss": 1.1551, "step": 3274 }, { "epoch": 0.8296390120329322, "grad_norm": 3.547229766845703, "learning_rate": 9.636992619995795e-06, "loss": 1.1532, "step": 3275 }, { "epoch": 0.8298923369221026, "grad_norm": 3.6905324459075928, "learning_rate": 9.636679137267852e-06, "loss": 1.2091, "step": 3276 }, { "epoch": 0.8301456618112729, "grad_norm": 3.495739698410034, "learning_rate": 9.636365524344132e-06, "loss": 1.2042, "step": 3277 }, { "epoch": 0.8303989867004433, "grad_norm": 3.5037713050842285, "learning_rate": 9.636051781233443e-06, "loss": 1.2253, "step": 3278 }, { "epoch": 0.8306523115896137, "grad_norm": 3.280710220336914, "learning_rate": 9.635737907944589e-06, "loss": 1.2191, "step": 3279 }, { "epoch": 0.830905636478784, "grad_norm": 3.6910886764526367, "learning_rate": 9.63542390448639e-06, "loss": 1.179, "step": 3280 }, { "epoch": 0.8311589613679544, "grad_norm": 3.32639217376709, "learning_rate": 9.635109770867658e-06, "loss": 1.1353, "step": 3281 }, { "epoch": 0.8314122862571247, "grad_norm": 3.2570884227752686, "learning_rate": 9.634795507097217e-06, "loss": 1.0916, "step": 3282 }, { "epoch": 0.8316656111462951, "grad_norm": 3.7500569820404053, "learning_rate": 9.634481113183892e-06, "loss": 1.1985, "step": 3283 }, { "epoch": 0.8319189360354655, "grad_norm": 3.468489646911621, "learning_rate": 9.634166589136508e-06, "loss": 1.1431, "step": 3284 }, { "epoch": 0.8321722609246358, "grad_norm": 3.260855197906494, "learning_rate": 9.633851934963899e-06, "loss": 1.1575, "step": 3285 }, { "epoch": 0.8324255858138062, "grad_norm": 3.2026495933532715, "learning_rate": 9.633537150674898e-06, "loss": 1.0722, "step": 3286 }, { "epoch": 0.8326789107029766, "grad_norm": 3.4730541706085205, "learning_rate": 9.633222236278346e-06, "loss": 1.2272, "step": 3287 }, { "epoch": 0.8329322355921469, "grad_norm": 3.312720537185669, "learning_rate": 9.632907191783085e-06, "loss": 1.239, "step": 3288 }, { "epoch": 0.8331855604813173, "grad_norm": 3.187642812728882, "learning_rate": 9.632592017197962e-06, "loss": 1.059, "step": 3289 }, { "epoch": 0.8334388853704876, "grad_norm": 4.059429168701172, "learning_rate": 9.632276712531825e-06, "loss": 1.3552, "step": 3290 }, { "epoch": 0.833692210259658, "grad_norm": 3.5450494289398193, "learning_rate": 9.63196127779353e-06, "loss": 1.2119, "step": 3291 }, { "epoch": 0.8339455351488284, "grad_norm": 3.40283465385437, "learning_rate": 9.631645712991932e-06, "loss": 1.0008, "step": 3292 }, { "epoch": 0.8341988600379987, "grad_norm": 3.6970903873443604, "learning_rate": 9.631330018135896e-06, "loss": 1.1442, "step": 3293 }, { "epoch": 0.8344521849271691, "grad_norm": 3.3924965858459473, "learning_rate": 9.631014193234282e-06, "loss": 1.1336, "step": 3294 }, { "epoch": 0.8347055098163395, "grad_norm": 3.460174083709717, "learning_rate": 9.630698238295959e-06, "loss": 1.2668, "step": 3295 }, { "epoch": 0.8349588347055098, "grad_norm": 3.0121147632598877, "learning_rate": 9.6303821533298e-06, "loss": 1.0716, "step": 3296 }, { "epoch": 0.8352121595946802, "grad_norm": 3.6534409523010254, "learning_rate": 9.630065938344682e-06, "loss": 1.2435, "step": 3297 }, { "epoch": 0.8354654844838505, "grad_norm": 3.2021796703338623, "learning_rate": 9.62974959334948e-06, "loss": 1.0484, "step": 3298 }, { "epoch": 0.8357188093730209, "grad_norm": 4.067572116851807, "learning_rate": 9.629433118353083e-06, "loss": 1.3855, "step": 3299 }, { "epoch": 0.8359721342621913, "grad_norm": 3.870067596435547, "learning_rate": 9.62911651336437e-06, "loss": 1.2859, "step": 3300 }, { "epoch": 0.8362254591513616, "grad_norm": 3.280607223510742, "learning_rate": 9.628799778392237e-06, "loss": 1.2571, "step": 3301 }, { "epoch": 0.836478784040532, "grad_norm": 3.271177291870117, "learning_rate": 9.628482913445575e-06, "loss": 0.9535, "step": 3302 }, { "epoch": 0.8367321089297023, "grad_norm": 3.8014323711395264, "learning_rate": 9.628165918533282e-06, "loss": 1.2103, "step": 3303 }, { "epoch": 0.8369854338188727, "grad_norm": 4.148789882659912, "learning_rate": 9.627848793664258e-06, "loss": 1.2324, "step": 3304 }, { "epoch": 0.8372387587080431, "grad_norm": 3.280813217163086, "learning_rate": 9.62753153884741e-06, "loss": 1.1898, "step": 3305 }, { "epoch": 0.8374920835972134, "grad_norm": 3.4878618717193604, "learning_rate": 9.627214154091646e-06, "loss": 1.2181, "step": 3306 }, { "epoch": 0.8377454084863838, "grad_norm": 3.2640535831451416, "learning_rate": 9.626896639405876e-06, "loss": 1.1118, "step": 3307 }, { "epoch": 0.8379987333755542, "grad_norm": 3.645920753479004, "learning_rate": 9.626578994799017e-06, "loss": 1.2364, "step": 3308 }, { "epoch": 0.8382520582647245, "grad_norm": 3.916548490524292, "learning_rate": 9.62626122027999e-06, "loss": 1.2849, "step": 3309 }, { "epoch": 0.8385053831538949, "grad_norm": 3.662060499191284, "learning_rate": 9.625943315857713e-06, "loss": 1.2489, "step": 3310 }, { "epoch": 0.8387587080430652, "grad_norm": 3.293536424636841, "learning_rate": 9.625625281541117e-06, "loss": 1.1696, "step": 3311 }, { "epoch": 0.8390120329322356, "grad_norm": 3.3884665966033936, "learning_rate": 9.625307117339132e-06, "loss": 1.3609, "step": 3312 }, { "epoch": 0.839265357821406, "grad_norm": 3.5792012214660645, "learning_rate": 9.62498882326069e-06, "loss": 1.1006, "step": 3313 }, { "epoch": 0.8395186827105763, "grad_norm": 3.2106194496154785, "learning_rate": 9.62467039931473e-06, "loss": 1.0974, "step": 3314 }, { "epoch": 0.8397720075997467, "grad_norm": 3.4125583171844482, "learning_rate": 9.624351845510192e-06, "loss": 1.1999, "step": 3315 }, { "epoch": 0.8400253324889171, "grad_norm": 3.447925090789795, "learning_rate": 9.624033161856024e-06, "loss": 1.1, "step": 3316 }, { "epoch": 0.8402786573780874, "grad_norm": 3.7276673316955566, "learning_rate": 9.623714348361169e-06, "loss": 1.3541, "step": 3317 }, { "epoch": 0.8405319822672578, "grad_norm": 3.6366593837738037, "learning_rate": 9.623395405034584e-06, "loss": 1.183, "step": 3318 }, { "epoch": 0.8407853071564281, "grad_norm": 3.335683822631836, "learning_rate": 9.623076331885222e-06, "loss": 1.1631, "step": 3319 }, { "epoch": 0.8410386320455985, "grad_norm": 3.652543783187866, "learning_rate": 9.622757128922043e-06, "loss": 1.1235, "step": 3320 }, { "epoch": 0.8412919569347689, "grad_norm": 3.2850959300994873, "learning_rate": 9.62243779615401e-06, "loss": 1.0402, "step": 3321 }, { "epoch": 0.8415452818239392, "grad_norm": 3.1912031173706055, "learning_rate": 9.62211833359009e-06, "loss": 1.0177, "step": 3322 }, { "epoch": 0.8417986067131096, "grad_norm": 3.3349733352661133, "learning_rate": 9.621798741239255e-06, "loss": 1.1605, "step": 3323 }, { "epoch": 0.84205193160228, "grad_norm": 3.359973907470703, "learning_rate": 9.621479019110476e-06, "loss": 1.1057, "step": 3324 }, { "epoch": 0.8423052564914503, "grad_norm": 3.6060352325439453, "learning_rate": 9.621159167212735e-06, "loss": 1.2396, "step": 3325 }, { "epoch": 0.8425585813806207, "grad_norm": 3.468721866607666, "learning_rate": 9.620839185555006e-06, "loss": 1.1335, "step": 3326 }, { "epoch": 0.842811906269791, "grad_norm": 3.759596824645996, "learning_rate": 9.620519074146282e-06, "loss": 1.2117, "step": 3327 }, { "epoch": 0.8430652311589614, "grad_norm": 3.733689546585083, "learning_rate": 9.620198832995547e-06, "loss": 1.1796, "step": 3328 }, { "epoch": 0.8433185560481318, "grad_norm": 3.7993173599243164, "learning_rate": 9.619878462111793e-06, "loss": 1.1737, "step": 3329 }, { "epoch": 0.8435718809373021, "grad_norm": 2.978166103363037, "learning_rate": 9.619557961504018e-06, "loss": 0.9284, "step": 3330 }, { "epoch": 0.8438252058264725, "grad_norm": 3.3335835933685303, "learning_rate": 9.619237331181221e-06, "loss": 1.189, "step": 3331 }, { "epoch": 0.8440785307156428, "grad_norm": 3.68396258354187, "learning_rate": 9.618916571152403e-06, "loss": 1.1267, "step": 3332 }, { "epoch": 0.8443318556048132, "grad_norm": 3.612015962600708, "learning_rate": 9.618595681426574e-06, "loss": 1.3243, "step": 3333 }, { "epoch": 0.8445851804939836, "grad_norm": 3.8254354000091553, "learning_rate": 9.618274662012743e-06, "loss": 1.3139, "step": 3334 }, { "epoch": 0.8448385053831539, "grad_norm": 3.5294089317321777, "learning_rate": 9.617953512919922e-06, "loss": 1.1345, "step": 3335 }, { "epoch": 0.8450918302723243, "grad_norm": 3.3894784450531006, "learning_rate": 9.617632234157132e-06, "loss": 1.0853, "step": 3336 }, { "epoch": 0.8453451551614947, "grad_norm": 3.574516534805298, "learning_rate": 9.617310825733395e-06, "loss": 1.1328, "step": 3337 }, { "epoch": 0.845598480050665, "grad_norm": 3.29719877243042, "learning_rate": 9.616989287657731e-06, "loss": 1.0126, "step": 3338 }, { "epoch": 0.8458518049398354, "grad_norm": 3.667088031768799, "learning_rate": 9.616667619939172e-06, "loss": 1.3743, "step": 3339 }, { "epoch": 0.8461051298290057, "grad_norm": 3.3549182415008545, "learning_rate": 9.616345822586753e-06, "loss": 1.1181, "step": 3340 }, { "epoch": 0.8463584547181761, "grad_norm": 3.507493257522583, "learning_rate": 9.616023895609503e-06, "loss": 1.2276, "step": 3341 }, { "epoch": 0.8466117796073465, "grad_norm": 3.775092601776123, "learning_rate": 9.615701839016468e-06, "loss": 1.2453, "step": 3342 }, { "epoch": 0.8468651044965168, "grad_norm": 3.3038055896759033, "learning_rate": 9.615379652816687e-06, "loss": 1.2524, "step": 3343 }, { "epoch": 0.8471184293856872, "grad_norm": 3.6604628562927246, "learning_rate": 9.615057337019208e-06, "loss": 1.0756, "step": 3344 }, { "epoch": 0.8473717542748576, "grad_norm": 3.2440614700317383, "learning_rate": 9.614734891633084e-06, "loss": 1.1165, "step": 3345 }, { "epoch": 0.8476250791640279, "grad_norm": 3.37060284614563, "learning_rate": 9.614412316667367e-06, "loss": 1.1274, "step": 3346 }, { "epoch": 0.8478784040531983, "grad_norm": 3.8288700580596924, "learning_rate": 9.614089612131114e-06, "loss": 1.1934, "step": 3347 }, { "epoch": 0.8481317289423685, "grad_norm": 3.8033788204193115, "learning_rate": 9.613766778033387e-06, "loss": 1.1799, "step": 3348 }, { "epoch": 0.848385053831539, "grad_norm": 3.9271960258483887, "learning_rate": 9.613443814383252e-06, "loss": 1.1305, "step": 3349 }, { "epoch": 0.8486383787207094, "grad_norm": 3.70815372467041, "learning_rate": 9.613120721189776e-06, "loss": 1.1846, "step": 3350 }, { "epoch": 0.8488917036098796, "grad_norm": 3.861818790435791, "learning_rate": 9.612797498462032e-06, "loss": 1.1439, "step": 3351 }, { "epoch": 0.84914502849905, "grad_norm": 3.6337101459503174, "learning_rate": 9.612474146209097e-06, "loss": 1.1044, "step": 3352 }, { "epoch": 0.8493983533882203, "grad_norm": 3.3341217041015625, "learning_rate": 9.61215066444005e-06, "loss": 1.0371, "step": 3353 }, { "epoch": 0.8496516782773907, "grad_norm": 3.7437963485717773, "learning_rate": 9.611827053163973e-06, "loss": 1.317, "step": 3354 }, { "epoch": 0.8499050031665611, "grad_norm": 3.634654998779297, "learning_rate": 9.611503312389953e-06, "loss": 1.2687, "step": 3355 }, { "epoch": 0.8501583280557314, "grad_norm": 3.5133156776428223, "learning_rate": 9.611179442127083e-06, "loss": 1.2252, "step": 3356 }, { "epoch": 0.8504116529449018, "grad_norm": 3.691375255584717, "learning_rate": 9.610855442384456e-06, "loss": 1.2031, "step": 3357 }, { "epoch": 0.8506649778340722, "grad_norm": 3.2914648056030273, "learning_rate": 9.610531313171168e-06, "loss": 1.1457, "step": 3358 }, { "epoch": 0.8509183027232425, "grad_norm": 3.2313108444213867, "learning_rate": 9.610207054496322e-06, "loss": 1.101, "step": 3359 }, { "epoch": 0.8511716276124129, "grad_norm": 3.529951333999634, "learning_rate": 9.609882666369022e-06, "loss": 1.1806, "step": 3360 }, { "epoch": 0.8514249525015832, "grad_norm": 3.5383546352386475, "learning_rate": 9.609558148798378e-06, "loss": 1.145, "step": 3361 }, { "epoch": 0.8516782773907536, "grad_norm": 3.3370304107666016, "learning_rate": 9.609233501793502e-06, "loss": 1.0638, "step": 3362 }, { "epoch": 0.851931602279924, "grad_norm": 3.65524959564209, "learning_rate": 9.608908725363509e-06, "loss": 1.3135, "step": 3363 }, { "epoch": 0.8521849271690943, "grad_norm": 3.2601053714752197, "learning_rate": 9.608583819517519e-06, "loss": 1.1432, "step": 3364 }, { "epoch": 0.8524382520582647, "grad_norm": 3.9082705974578857, "learning_rate": 9.608258784264654e-06, "loss": 1.36, "step": 3365 }, { "epoch": 0.8526915769474351, "grad_norm": 3.53440260887146, "learning_rate": 9.607933619614042e-06, "loss": 1.268, "step": 3366 }, { "epoch": 0.8529449018366054, "grad_norm": 3.2626724243164062, "learning_rate": 9.607608325574816e-06, "loss": 1.1265, "step": 3367 }, { "epoch": 0.8531982267257758, "grad_norm": 3.4613711833953857, "learning_rate": 9.607282902156106e-06, "loss": 1.1979, "step": 3368 }, { "epoch": 0.8534515516149461, "grad_norm": 3.542390823364258, "learning_rate": 9.606957349367052e-06, "loss": 1.198, "step": 3369 }, { "epoch": 0.8537048765041165, "grad_norm": 3.2955868244171143, "learning_rate": 9.606631667216794e-06, "loss": 1.2156, "step": 3370 }, { "epoch": 0.8539582013932869, "grad_norm": 3.3685765266418457, "learning_rate": 9.60630585571448e-06, "loss": 1.2304, "step": 3371 }, { "epoch": 0.8542115262824572, "grad_norm": 4.13430643081665, "learning_rate": 9.605979914869255e-06, "loss": 1.3528, "step": 3372 }, { "epoch": 0.8544648511716276, "grad_norm": 3.3133087158203125, "learning_rate": 9.605653844690273e-06, "loss": 1.1131, "step": 3373 }, { "epoch": 0.8547181760607979, "grad_norm": 3.21870756149292, "learning_rate": 9.605327645186688e-06, "loss": 1.1227, "step": 3374 }, { "epoch": 0.8549715009499683, "grad_norm": 3.2606735229492188, "learning_rate": 9.605001316367664e-06, "loss": 1.2406, "step": 3375 }, { "epoch": 0.8552248258391387, "grad_norm": 3.4934375286102295, "learning_rate": 9.60467485824236e-06, "loss": 1.2796, "step": 3376 }, { "epoch": 0.855478150728309, "grad_norm": 3.60872220993042, "learning_rate": 9.604348270819944e-06, "loss": 1.0723, "step": 3377 }, { "epoch": 0.8557314756174794, "grad_norm": 3.738271951675415, "learning_rate": 9.604021554109586e-06, "loss": 1.3091, "step": 3378 }, { "epoch": 0.8559848005066498, "grad_norm": 3.7667062282562256, "learning_rate": 9.60369470812046e-06, "loss": 1.2022, "step": 3379 }, { "epoch": 0.8562381253958201, "grad_norm": 3.4908809661865234, "learning_rate": 9.603367732861746e-06, "loss": 1.2256, "step": 3380 }, { "epoch": 0.8564914502849905, "grad_norm": 3.327702522277832, "learning_rate": 9.603040628342622e-06, "loss": 1.2269, "step": 3381 }, { "epoch": 0.8567447751741608, "grad_norm": 3.4891254901885986, "learning_rate": 9.602713394572276e-06, "loss": 1.1644, "step": 3382 }, { "epoch": 0.8569981000633312, "grad_norm": 3.4369208812713623, "learning_rate": 9.602386031559893e-06, "loss": 1.1514, "step": 3383 }, { "epoch": 0.8572514249525016, "grad_norm": 3.2946858406066895, "learning_rate": 9.602058539314669e-06, "loss": 1.158, "step": 3384 }, { "epoch": 0.8575047498416719, "grad_norm": 3.3024790287017822, "learning_rate": 9.601730917845798e-06, "loss": 1.2727, "step": 3385 }, { "epoch": 0.8577580747308423, "grad_norm": 3.251995325088501, "learning_rate": 9.60140316716248e-06, "loss": 1.0886, "step": 3386 }, { "epoch": 0.8580113996200127, "grad_norm": 3.3264102935791016, "learning_rate": 9.601075287273916e-06, "loss": 1.1008, "step": 3387 }, { "epoch": 0.858264724509183, "grad_norm": 3.395895481109619, "learning_rate": 9.600747278189314e-06, "loss": 1.0521, "step": 3388 }, { "epoch": 0.8585180493983534, "grad_norm": 3.5084939002990723, "learning_rate": 9.600419139917887e-06, "loss": 1.1628, "step": 3389 }, { "epoch": 0.8587713742875237, "grad_norm": 3.2303173542022705, "learning_rate": 9.600090872468846e-06, "loss": 1.1065, "step": 3390 }, { "epoch": 0.8590246991766941, "grad_norm": 3.6353373527526855, "learning_rate": 9.599762475851409e-06, "loss": 1.2276, "step": 3391 }, { "epoch": 0.8592780240658645, "grad_norm": 3.4630491733551025, "learning_rate": 9.599433950074797e-06, "loss": 1.1645, "step": 3392 }, { "epoch": 0.8595313489550348, "grad_norm": 3.725224256515503, "learning_rate": 9.599105295148235e-06, "loss": 1.1279, "step": 3393 }, { "epoch": 0.8597846738442052, "grad_norm": 3.003977060317993, "learning_rate": 9.598776511080954e-06, "loss": 1.0819, "step": 3394 }, { "epoch": 0.8600379987333756, "grad_norm": 3.839994192123413, "learning_rate": 9.598447597882181e-06, "loss": 1.3302, "step": 3395 }, { "epoch": 0.8602913236225459, "grad_norm": 3.295487880706787, "learning_rate": 9.598118555561156e-06, "loss": 1.0836, "step": 3396 }, { "epoch": 0.8605446485117163, "grad_norm": 3.553488254547119, "learning_rate": 9.597789384127117e-06, "loss": 1.2063, "step": 3397 }, { "epoch": 0.8607979734008866, "grad_norm": 3.2332942485809326, "learning_rate": 9.597460083589307e-06, "loss": 1.0509, "step": 3398 }, { "epoch": 0.861051298290057, "grad_norm": 3.3764026165008545, "learning_rate": 9.597130653956973e-06, "loss": 1.153, "step": 3399 }, { "epoch": 0.8613046231792274, "grad_norm": 3.295069932937622, "learning_rate": 9.596801095239365e-06, "loss": 1.0382, "step": 3400 }, { "epoch": 0.8615579480683977, "grad_norm": 3.451786518096924, "learning_rate": 9.596471407445736e-06, "loss": 1.1697, "step": 3401 }, { "epoch": 0.8618112729575681, "grad_norm": 3.5099878311157227, "learning_rate": 9.596141590585344e-06, "loss": 1.1123, "step": 3402 }, { "epoch": 0.8620645978467384, "grad_norm": 3.3034534454345703, "learning_rate": 9.59581164466745e-06, "loss": 1.1429, "step": 3403 }, { "epoch": 0.8623179227359088, "grad_norm": 3.3229706287384033, "learning_rate": 9.595481569701319e-06, "loss": 1.0715, "step": 3404 }, { "epoch": 0.8625712476250792, "grad_norm": 3.2438437938690186, "learning_rate": 9.595151365696221e-06, "loss": 1.1276, "step": 3405 }, { "epoch": 0.8628245725142495, "grad_norm": 3.7240092754364014, "learning_rate": 9.594821032661425e-06, "loss": 1.1568, "step": 3406 }, { "epoch": 0.8630778974034199, "grad_norm": 3.175704002380371, "learning_rate": 9.594490570606207e-06, "loss": 1.1228, "step": 3407 }, { "epoch": 0.8633312222925903, "grad_norm": 3.8279178142547607, "learning_rate": 9.594159979539849e-06, "loss": 1.2746, "step": 3408 }, { "epoch": 0.8635845471817606, "grad_norm": 3.5796291828155518, "learning_rate": 9.59382925947163e-06, "loss": 1.2341, "step": 3409 }, { "epoch": 0.863837872070931, "grad_norm": 3.5394248962402344, "learning_rate": 9.59349841041084e-06, "loss": 1.1411, "step": 3410 }, { "epoch": 0.8640911969601013, "grad_norm": 3.572301149368286, "learning_rate": 9.593167432366766e-06, "loss": 1.1513, "step": 3411 }, { "epoch": 0.8643445218492717, "grad_norm": 3.1407394409179688, "learning_rate": 9.592836325348705e-06, "loss": 1.1336, "step": 3412 }, { "epoch": 0.8645978467384421, "grad_norm": 3.3476498126983643, "learning_rate": 9.59250508936595e-06, "loss": 1.1538, "step": 3413 }, { "epoch": 0.8648511716276124, "grad_norm": 3.4111223220825195, "learning_rate": 9.592173724427809e-06, "loss": 1.056, "step": 3414 }, { "epoch": 0.8651044965167828, "grad_norm": 3.2360401153564453, "learning_rate": 9.591842230543578e-06, "loss": 1.0997, "step": 3415 }, { "epoch": 0.8653578214059532, "grad_norm": 3.479116678237915, "learning_rate": 9.59151060772257e-06, "loss": 1.294, "step": 3416 }, { "epoch": 0.8656111462951235, "grad_norm": 3.418973922729492, "learning_rate": 9.591178855974097e-06, "loss": 1.2281, "step": 3417 }, { "epoch": 0.8658644711842939, "grad_norm": 3.6004345417022705, "learning_rate": 9.590846975307473e-06, "loss": 1.2213, "step": 3418 }, { "epoch": 0.8661177960734642, "grad_norm": 3.6040594577789307, "learning_rate": 9.590514965732017e-06, "loss": 1.0826, "step": 3419 }, { "epoch": 0.8663711209626346, "grad_norm": 3.5817062854766846, "learning_rate": 9.590182827257053e-06, "loss": 1.2093, "step": 3420 }, { "epoch": 0.866624445851805, "grad_norm": 3.6036312580108643, "learning_rate": 9.589850559891906e-06, "loss": 1.2953, "step": 3421 }, { "epoch": 0.8668777707409753, "grad_norm": 3.4746053218841553, "learning_rate": 9.589518163645908e-06, "loss": 1.1615, "step": 3422 }, { "epoch": 0.8671310956301457, "grad_norm": 3.4524717330932617, "learning_rate": 9.58918563852839e-06, "loss": 1.1426, "step": 3423 }, { "epoch": 0.867384420519316, "grad_norm": 3.8653552532196045, "learning_rate": 9.58885298454869e-06, "loss": 1.1985, "step": 3424 }, { "epoch": 0.8676377454084864, "grad_norm": 3.478992462158203, "learning_rate": 9.588520201716149e-06, "loss": 1.2534, "step": 3425 }, { "epoch": 0.8678910702976568, "grad_norm": 3.3428542613983154, "learning_rate": 9.588187290040109e-06, "loss": 0.9823, "step": 3426 }, { "epoch": 0.8681443951868271, "grad_norm": 3.336794137954712, "learning_rate": 9.587854249529924e-06, "loss": 1.2026, "step": 3427 }, { "epoch": 0.8683977200759975, "grad_norm": 3.6661384105682373, "learning_rate": 9.58752108019494e-06, "loss": 1.1801, "step": 3428 }, { "epoch": 0.8686510449651679, "grad_norm": 3.672661066055298, "learning_rate": 9.587187782044514e-06, "loss": 1.2013, "step": 3429 }, { "epoch": 0.8689043698543382, "grad_norm": 3.4144299030303955, "learning_rate": 9.586854355088006e-06, "loss": 1.386, "step": 3430 }, { "epoch": 0.8691576947435086, "grad_norm": 3.670971393585205, "learning_rate": 9.586520799334776e-06, "loss": 1.1359, "step": 3431 }, { "epoch": 0.8694110196326789, "grad_norm": 3.665709972381592, "learning_rate": 9.586187114794192e-06, "loss": 1.1387, "step": 3432 }, { "epoch": 0.8696643445218493, "grad_norm": 3.3912973403930664, "learning_rate": 9.585853301475625e-06, "loss": 1.1105, "step": 3433 }, { "epoch": 0.8699176694110197, "grad_norm": 3.3353540897369385, "learning_rate": 9.585519359388445e-06, "loss": 1.2105, "step": 3434 }, { "epoch": 0.87017099430019, "grad_norm": 3.507322072982788, "learning_rate": 9.585185288542031e-06, "loss": 1.0176, "step": 3435 }, { "epoch": 0.8704243191893604, "grad_norm": 3.2737700939178467, "learning_rate": 9.584851088945762e-06, "loss": 1.0474, "step": 3436 }, { "epoch": 0.8706776440785308, "grad_norm": 3.285423517227173, "learning_rate": 9.584516760609024e-06, "loss": 1.1533, "step": 3437 }, { "epoch": 0.8709309689677011, "grad_norm": 3.5063467025756836, "learning_rate": 9.584182303541205e-06, "loss": 1.2178, "step": 3438 }, { "epoch": 0.8711842938568715, "grad_norm": 3.2288620471954346, "learning_rate": 9.583847717751694e-06, "loss": 1.0731, "step": 3439 }, { "epoch": 0.8714376187460418, "grad_norm": 3.816467523574829, "learning_rate": 9.583513003249889e-06, "loss": 1.2776, "step": 3440 }, { "epoch": 0.8716909436352122, "grad_norm": 3.81528902053833, "learning_rate": 9.583178160045186e-06, "loss": 1.1239, "step": 3441 }, { "epoch": 0.8719442685243826, "grad_norm": 3.3365797996520996, "learning_rate": 9.582843188146987e-06, "loss": 1.2304, "step": 3442 }, { "epoch": 0.8721975934135529, "grad_norm": 3.1397223472595215, "learning_rate": 9.582508087564701e-06, "loss": 1.0956, "step": 3443 }, { "epoch": 0.8724509183027233, "grad_norm": 3.297811985015869, "learning_rate": 9.582172858307735e-06, "loss": 1.1335, "step": 3444 }, { "epoch": 0.8727042431918937, "grad_norm": 3.520543336868286, "learning_rate": 9.581837500385503e-06, "loss": 1.2358, "step": 3445 }, { "epoch": 0.872957568081064, "grad_norm": 3.4179327487945557, "learning_rate": 9.581502013807422e-06, "loss": 1.142, "step": 3446 }, { "epoch": 0.8732108929702344, "grad_norm": 3.337519407272339, "learning_rate": 9.58116639858291e-06, "loss": 1.1565, "step": 3447 }, { "epoch": 0.8734642178594046, "grad_norm": 3.9267821311950684, "learning_rate": 9.580830654721393e-06, "loss": 1.1889, "step": 3448 }, { "epoch": 0.873717542748575, "grad_norm": 3.376887559890747, "learning_rate": 9.580494782232299e-06, "loss": 1.0976, "step": 3449 }, { "epoch": 0.8739708676377455, "grad_norm": 3.618590831756592, "learning_rate": 9.580158781125058e-06, "loss": 1.211, "step": 3450 }, { "epoch": 0.8742241925269157, "grad_norm": 3.1070926189422607, "learning_rate": 9.579822651409105e-06, "loss": 1.2301, "step": 3451 }, { "epoch": 0.8744775174160861, "grad_norm": 3.3936069011688232, "learning_rate": 9.579486393093878e-06, "loss": 1.1922, "step": 3452 }, { "epoch": 0.8747308423052564, "grad_norm": 3.7701573371887207, "learning_rate": 9.57915000618882e-06, "loss": 1.1539, "step": 3453 }, { "epoch": 0.8749841671944268, "grad_norm": 3.4328768253326416, "learning_rate": 9.578813490703375e-06, "loss": 1.1615, "step": 3454 }, { "epoch": 0.8752374920835972, "grad_norm": 3.3606112003326416, "learning_rate": 9.578476846646994e-06, "loss": 1.0825, "step": 3455 }, { "epoch": 0.8754908169727675, "grad_norm": 3.7307562828063965, "learning_rate": 9.57814007402913e-06, "loss": 1.2022, "step": 3456 }, { "epoch": 0.8757441418619379, "grad_norm": 3.4997165203094482, "learning_rate": 9.577803172859236e-06, "loss": 1.1344, "step": 3457 }, { "epoch": 0.8759974667511083, "grad_norm": 3.4206204414367676, "learning_rate": 9.577466143146777e-06, "loss": 1.2185, "step": 3458 }, { "epoch": 0.8762507916402786, "grad_norm": 3.629504919052124, "learning_rate": 9.577128984901212e-06, "loss": 1.0822, "step": 3459 }, { "epoch": 0.876504116529449, "grad_norm": 3.5562291145324707, "learning_rate": 9.57679169813201e-06, "loss": 1.2349, "step": 3460 }, { "epoch": 0.8767574414186193, "grad_norm": 3.643573045730591, "learning_rate": 9.576454282848645e-06, "loss": 1.2338, "step": 3461 }, { "epoch": 0.8770107663077897, "grad_norm": 3.4442660808563232, "learning_rate": 9.576116739060585e-06, "loss": 1.0719, "step": 3462 }, { "epoch": 0.8772640911969601, "grad_norm": 3.5599753856658936, "learning_rate": 9.575779066777316e-06, "loss": 1.1835, "step": 3463 }, { "epoch": 0.8775174160861304, "grad_norm": 3.6639044284820557, "learning_rate": 9.575441266008312e-06, "loss": 1.2526, "step": 3464 }, { "epoch": 0.8777707409753008, "grad_norm": 3.6009585857391357, "learning_rate": 9.575103336763063e-06, "loss": 1.102, "step": 3465 }, { "epoch": 0.8780240658644712, "grad_norm": 3.8054094314575195, "learning_rate": 9.574765279051055e-06, "loss": 1.1178, "step": 3466 }, { "epoch": 0.8782773907536415, "grad_norm": 3.4370920658111572, "learning_rate": 9.574427092881784e-06, "loss": 1.0412, "step": 3467 }, { "epoch": 0.8785307156428119, "grad_norm": 3.3152363300323486, "learning_rate": 9.574088778264744e-06, "loss": 1.1768, "step": 3468 }, { "epoch": 0.8787840405319822, "grad_norm": 3.4283127784729004, "learning_rate": 9.573750335209433e-06, "loss": 1.1101, "step": 3469 }, { "epoch": 0.8790373654211526, "grad_norm": 3.6179544925689697, "learning_rate": 9.573411763725358e-06, "loss": 1.2943, "step": 3470 }, { "epoch": 0.879290690310323, "grad_norm": 3.568488359451294, "learning_rate": 9.573073063822023e-06, "loss": 1.2498, "step": 3471 }, { "epoch": 0.8795440151994933, "grad_norm": 3.5222697257995605, "learning_rate": 9.572734235508941e-06, "loss": 1.0618, "step": 3472 }, { "epoch": 0.8797973400886637, "grad_norm": 3.1682956218719482, "learning_rate": 9.572395278795622e-06, "loss": 1.0945, "step": 3473 }, { "epoch": 0.880050664977834, "grad_norm": 3.5997045040130615, "learning_rate": 9.57205619369159e-06, "loss": 1.1523, "step": 3474 }, { "epoch": 0.8803039898670044, "grad_norm": 3.4013473987579346, "learning_rate": 9.57171698020636e-06, "loss": 1.0699, "step": 3475 }, { "epoch": 0.8805573147561748, "grad_norm": 3.2505035400390625, "learning_rate": 9.571377638349462e-06, "loss": 1.0694, "step": 3476 }, { "epoch": 0.8808106396453451, "grad_norm": 3.628488779067993, "learning_rate": 9.571038168130422e-06, "loss": 1.1776, "step": 3477 }, { "epoch": 0.8810639645345155, "grad_norm": 3.3734829425811768, "learning_rate": 9.570698569558771e-06, "loss": 1.102, "step": 3478 }, { "epoch": 0.8813172894236859, "grad_norm": 6.371610641479492, "learning_rate": 9.57035884264405e-06, "loss": 1.1883, "step": 3479 }, { "epoch": 0.8815706143128562, "grad_norm": 3.6515114307403564, "learning_rate": 9.57001898739579e-06, "loss": 1.1875, "step": 3480 }, { "epoch": 0.8818239392020266, "grad_norm": 3.4682676792144775, "learning_rate": 9.569679003823542e-06, "loss": 1.1922, "step": 3481 }, { "epoch": 0.8820772640911969, "grad_norm": 3.526710033416748, "learning_rate": 9.56933889193685e-06, "loss": 1.202, "step": 3482 }, { "epoch": 0.8823305889803673, "grad_norm": 3.291552782058716, "learning_rate": 9.56899865174526e-06, "loss": 1.0547, "step": 3483 }, { "epoch": 0.8825839138695377, "grad_norm": 3.222761631011963, "learning_rate": 9.568658283258331e-06, "loss": 1.1274, "step": 3484 }, { "epoch": 0.882837238758708, "grad_norm": 3.328718900680542, "learning_rate": 9.568317786485619e-06, "loss": 1.2029, "step": 3485 }, { "epoch": 0.8830905636478784, "grad_norm": 3.3436994552612305, "learning_rate": 9.567977161436685e-06, "loss": 1.2401, "step": 3486 }, { "epoch": 0.8833438885370488, "grad_norm": 3.443335771560669, "learning_rate": 9.567636408121092e-06, "loss": 1.1754, "step": 3487 }, { "epoch": 0.8835972134262191, "grad_norm": 3.813403606414795, "learning_rate": 9.56729552654841e-06, "loss": 1.2644, "step": 3488 }, { "epoch": 0.8838505383153895, "grad_norm": 3.697558879852295, "learning_rate": 9.56695451672821e-06, "loss": 1.1746, "step": 3489 }, { "epoch": 0.8841038632045598, "grad_norm": 3.2442736625671387, "learning_rate": 9.566613378670068e-06, "loss": 1.1262, "step": 3490 }, { "epoch": 0.8843571880937302, "grad_norm": 3.6892635822296143, "learning_rate": 9.566272112383563e-06, "loss": 1.1524, "step": 3491 }, { "epoch": 0.8846105129829006, "grad_norm": 3.149174928665161, "learning_rate": 9.565930717878276e-06, "loss": 1.0615, "step": 3492 }, { "epoch": 0.8848638378720709, "grad_norm": 3.460679531097412, "learning_rate": 9.565589195163796e-06, "loss": 1.2664, "step": 3493 }, { "epoch": 0.8851171627612413, "grad_norm": 3.4815971851348877, "learning_rate": 9.565247544249709e-06, "loss": 1.2627, "step": 3494 }, { "epoch": 0.8853704876504117, "grad_norm": 3.1610448360443115, "learning_rate": 9.564905765145611e-06, "loss": 1.0562, "step": 3495 }, { "epoch": 0.885623812539582, "grad_norm": 3.1781082153320312, "learning_rate": 9.5645638578611e-06, "loss": 1.1922, "step": 3496 }, { "epoch": 0.8858771374287524, "grad_norm": 3.4381654262542725, "learning_rate": 9.564221822405774e-06, "loss": 1.1594, "step": 3497 }, { "epoch": 0.8861304623179227, "grad_norm": 3.464570999145508, "learning_rate": 9.563879658789239e-06, "loss": 1.1795, "step": 3498 }, { "epoch": 0.8863837872070931, "grad_norm": 3.122692823410034, "learning_rate": 9.563537367021103e-06, "loss": 1.1004, "step": 3499 }, { "epoch": 0.8866371120962635, "grad_norm": 3.4979028701782227, "learning_rate": 9.563194947110975e-06, "loss": 1.2198, "step": 3500 }, { "epoch": 0.8866371120962635, "eval_loss": 1.1842656135559082, "eval_runtime": 12.3015, "eval_samples_per_second": 32.516, "eval_steps_per_second": 4.065, "step": 3500 }, { "epoch": 0.8868904369854338, "grad_norm": 3.350447416305542, "learning_rate": 9.562852399068472e-06, "loss": 1.1479, "step": 3501 }, { "epoch": 0.8871437618746042, "grad_norm": 3.1752302646636963, "learning_rate": 9.562509722903213e-06, "loss": 1.1348, "step": 3502 }, { "epoch": 0.8873970867637745, "grad_norm": 3.252607583999634, "learning_rate": 9.562166918624817e-06, "loss": 1.1539, "step": 3503 }, { "epoch": 0.8876504116529449, "grad_norm": 3.5568649768829346, "learning_rate": 9.561823986242916e-06, "loss": 1.1659, "step": 3504 }, { "epoch": 0.8879037365421153, "grad_norm": 3.367532968521118, "learning_rate": 9.561480925767133e-06, "loss": 1.2785, "step": 3505 }, { "epoch": 0.8881570614312856, "grad_norm": 3.3485682010650635, "learning_rate": 9.561137737207103e-06, "loss": 1.2095, "step": 3506 }, { "epoch": 0.888410386320456, "grad_norm": 3.486233949661255, "learning_rate": 9.560794420572464e-06, "loss": 1.2842, "step": 3507 }, { "epoch": 0.8886637112096264, "grad_norm": 3.257869243621826, "learning_rate": 9.560450975872855e-06, "loss": 1.0405, "step": 3508 }, { "epoch": 0.8889170360987967, "grad_norm": 3.318464756011963, "learning_rate": 9.56010740311792e-06, "loss": 1.1972, "step": 3509 }, { "epoch": 0.8891703609879671, "grad_norm": 3.7074925899505615, "learning_rate": 9.559763702317306e-06, "loss": 1.2003, "step": 3510 }, { "epoch": 0.8894236858771374, "grad_norm": 3.423823356628418, "learning_rate": 9.559419873480664e-06, "loss": 1.1482, "step": 3511 }, { "epoch": 0.8896770107663078, "grad_norm": 3.256509304046631, "learning_rate": 9.559075916617649e-06, "loss": 1.0957, "step": 3512 }, { "epoch": 0.8899303356554782, "grad_norm": 3.308105707168579, "learning_rate": 9.55873183173792e-06, "loss": 1.0717, "step": 3513 }, { "epoch": 0.8901836605446485, "grad_norm": 3.4461209774017334, "learning_rate": 9.558387618851137e-06, "loss": 1.1767, "step": 3514 }, { "epoch": 0.8904369854338189, "grad_norm": 3.324324131011963, "learning_rate": 9.558043277966967e-06, "loss": 1.0425, "step": 3515 }, { "epoch": 0.8906903103229893, "grad_norm": 3.5780386924743652, "learning_rate": 9.557698809095076e-06, "loss": 1.2086, "step": 3516 }, { "epoch": 0.8909436352121596, "grad_norm": 3.3247432708740234, "learning_rate": 9.55735421224514e-06, "loss": 1.1104, "step": 3517 }, { "epoch": 0.89119696010133, "grad_norm": 3.3876378536224365, "learning_rate": 9.557009487426834e-06, "loss": 1.2138, "step": 3518 }, { "epoch": 0.8914502849905003, "grad_norm": 3.313143730163574, "learning_rate": 9.556664634649837e-06, "loss": 1.1164, "step": 3519 }, { "epoch": 0.8917036098796707, "grad_norm": 3.8288326263427734, "learning_rate": 9.55631965392383e-06, "loss": 1.2153, "step": 3520 }, { "epoch": 0.8919569347688411, "grad_norm": 3.245234966278076, "learning_rate": 9.555974545258507e-06, "loss": 1.1173, "step": 3521 }, { "epoch": 0.8922102596580114, "grad_norm": 3.3028581142425537, "learning_rate": 9.555629308663553e-06, "loss": 1.2273, "step": 3522 }, { "epoch": 0.8924635845471818, "grad_norm": 3.3003835678100586, "learning_rate": 9.555283944148661e-06, "loss": 1.1889, "step": 3523 }, { "epoch": 0.8927169094363521, "grad_norm": 3.2953989505767822, "learning_rate": 9.554938451723533e-06, "loss": 1.0949, "step": 3524 }, { "epoch": 0.8929702343255225, "grad_norm": 3.2672576904296875, "learning_rate": 9.554592831397866e-06, "loss": 1.1747, "step": 3525 }, { "epoch": 0.8932235592146929, "grad_norm": 3.283121109008789, "learning_rate": 9.554247083181369e-06, "loss": 1.0598, "step": 3526 }, { "epoch": 0.8934768841038632, "grad_norm": 3.359567880630493, "learning_rate": 9.553901207083746e-06, "loss": 1.1151, "step": 3527 }, { "epoch": 0.8937302089930336, "grad_norm": 3.2975292205810547, "learning_rate": 9.553555203114713e-06, "loss": 1.0455, "step": 3528 }, { "epoch": 0.893983533882204, "grad_norm": 3.4688210487365723, "learning_rate": 9.553209071283984e-06, "loss": 1.1231, "step": 3529 }, { "epoch": 0.8942368587713743, "grad_norm": 3.655614137649536, "learning_rate": 9.552862811601279e-06, "loss": 1.2476, "step": 3530 }, { "epoch": 0.8944901836605447, "grad_norm": 3.1691958904266357, "learning_rate": 9.552516424076318e-06, "loss": 1.1201, "step": 3531 }, { "epoch": 0.894743508549715, "grad_norm": 3.238905191421509, "learning_rate": 9.552169908718831e-06, "loss": 1.1532, "step": 3532 }, { "epoch": 0.8949968334388854, "grad_norm": 3.3464627265930176, "learning_rate": 9.551823265538546e-06, "loss": 1.1665, "step": 3533 }, { "epoch": 0.8952501583280558, "grad_norm": 3.670006036758423, "learning_rate": 9.551476494545198e-06, "loss": 1.251, "step": 3534 }, { "epoch": 0.8955034832172261, "grad_norm": 3.0911617279052734, "learning_rate": 9.551129595748521e-06, "loss": 1.1598, "step": 3535 }, { "epoch": 0.8957568081063965, "grad_norm": 3.6140267848968506, "learning_rate": 9.55078256915826e-06, "loss": 1.1254, "step": 3536 }, { "epoch": 0.8960101329955669, "grad_norm": 4.0415778160095215, "learning_rate": 9.550435414784157e-06, "loss": 1.4153, "step": 3537 }, { "epoch": 0.8962634578847372, "grad_norm": 3.4572088718414307, "learning_rate": 9.55008813263596e-06, "loss": 1.1791, "step": 3538 }, { "epoch": 0.8965167827739076, "grad_norm": 3.47280216217041, "learning_rate": 9.549740722723419e-06, "loss": 1.1283, "step": 3539 }, { "epoch": 0.8967701076630779, "grad_norm": 3.799362897872925, "learning_rate": 9.549393185056292e-06, "loss": 1.2423, "step": 3540 }, { "epoch": 0.8970234325522483, "grad_norm": 3.3728370666503906, "learning_rate": 9.549045519644338e-06, "loss": 1.1087, "step": 3541 }, { "epoch": 0.8972767574414187, "grad_norm": 3.5982282161712646, "learning_rate": 9.548697726497319e-06, "loss": 1.2767, "step": 3542 }, { "epoch": 0.897530082330589, "grad_norm": 3.420250654220581, "learning_rate": 9.548349805624997e-06, "loss": 1.213, "step": 3543 }, { "epoch": 0.8977834072197594, "grad_norm": 3.512740135192871, "learning_rate": 9.548001757037143e-06, "loss": 1.1563, "step": 3544 }, { "epoch": 0.8980367321089296, "grad_norm": 3.475491523742676, "learning_rate": 9.547653580743534e-06, "loss": 1.3159, "step": 3545 }, { "epoch": 0.8982900569981, "grad_norm": 3.273994207382202, "learning_rate": 9.547305276753942e-06, "loss": 1.114, "step": 3546 }, { "epoch": 0.8985433818872705, "grad_norm": 3.3316612243652344, "learning_rate": 9.546956845078151e-06, "loss": 1.1047, "step": 3547 }, { "epoch": 0.8987967067764407, "grad_norm": 3.2396793365478516, "learning_rate": 9.54660828572594e-06, "loss": 1.2443, "step": 3548 }, { "epoch": 0.8990500316656111, "grad_norm": 3.4848010540008545, "learning_rate": 9.546259598707102e-06, "loss": 1.1434, "step": 3549 }, { "epoch": 0.8993033565547816, "grad_norm": 3.5532639026641846, "learning_rate": 9.545910784031425e-06, "loss": 1.2154, "step": 3550 }, { "epoch": 0.8995566814439518, "grad_norm": 3.2470998764038086, "learning_rate": 9.545561841708702e-06, "loss": 1.1101, "step": 3551 }, { "epoch": 0.8998100063331222, "grad_norm": 3.5670552253723145, "learning_rate": 9.545212771748734e-06, "loss": 1.2471, "step": 3552 }, { "epoch": 0.9000633312222925, "grad_norm": 3.370518207550049, "learning_rate": 9.544863574161322e-06, "loss": 1.1992, "step": 3553 }, { "epoch": 0.9003166561114629, "grad_norm": 3.3459818363189697, "learning_rate": 9.54451424895627e-06, "loss": 1.1867, "step": 3554 }, { "epoch": 0.9005699810006333, "grad_norm": 3.3378310203552246, "learning_rate": 9.544164796143386e-06, "loss": 1.1293, "step": 3555 }, { "epoch": 0.9008233058898036, "grad_norm": 3.452312469482422, "learning_rate": 9.543815215732488e-06, "loss": 1.2222, "step": 3556 }, { "epoch": 0.901076630778974, "grad_norm": 3.7270290851593018, "learning_rate": 9.543465507733387e-06, "loss": 1.0793, "step": 3557 }, { "epoch": 0.9013299556681444, "grad_norm": 3.4278666973114014, "learning_rate": 9.543115672155903e-06, "loss": 1.1119, "step": 3558 }, { "epoch": 0.9015832805573147, "grad_norm": 3.2745046615600586, "learning_rate": 9.54276570900986e-06, "loss": 1.1795, "step": 3559 }, { "epoch": 0.9018366054464851, "grad_norm": 3.317507743835449, "learning_rate": 9.542415618305088e-06, "loss": 1.1609, "step": 3560 }, { "epoch": 0.9020899303356554, "grad_norm": 3.190793991088867, "learning_rate": 9.542065400051412e-06, "loss": 1.0849, "step": 3561 }, { "epoch": 0.9023432552248258, "grad_norm": 3.4192872047424316, "learning_rate": 9.541715054258667e-06, "loss": 1.2198, "step": 3562 }, { "epoch": 0.9025965801139962, "grad_norm": 3.187180757522583, "learning_rate": 9.541364580936694e-06, "loss": 1.0867, "step": 3563 }, { "epoch": 0.9028499050031665, "grad_norm": 3.841717481613159, "learning_rate": 9.541013980095331e-06, "loss": 1.1032, "step": 3564 }, { "epoch": 0.9031032298923369, "grad_norm": 3.3154666423797607, "learning_rate": 9.540663251744425e-06, "loss": 1.2579, "step": 3565 }, { "epoch": 0.9033565547815073, "grad_norm": 3.5662498474121094, "learning_rate": 9.540312395893823e-06, "loss": 1.1479, "step": 3566 }, { "epoch": 0.9036098796706776, "grad_norm": 3.458418369293213, "learning_rate": 9.539961412553375e-06, "loss": 1.0921, "step": 3567 }, { "epoch": 0.903863204559848, "grad_norm": 2.8619308471679688, "learning_rate": 9.53961030173294e-06, "loss": 1.0249, "step": 3568 }, { "epoch": 0.9041165294490183, "grad_norm": 3.324341297149658, "learning_rate": 9.539259063442375e-06, "loss": 1.1141, "step": 3569 }, { "epoch": 0.9043698543381887, "grad_norm": 3.535216808319092, "learning_rate": 9.538907697691542e-06, "loss": 1.3503, "step": 3570 }, { "epoch": 0.9046231792273591, "grad_norm": 3.4408106803894043, "learning_rate": 9.538556204490308e-06, "loss": 1.1379, "step": 3571 }, { "epoch": 0.9048765041165294, "grad_norm": 3.6078426837921143, "learning_rate": 9.538204583848544e-06, "loss": 1.3365, "step": 3572 }, { "epoch": 0.9051298290056998, "grad_norm": 3.498480796813965, "learning_rate": 9.537852835776123e-06, "loss": 1.1158, "step": 3573 }, { "epoch": 0.9053831538948701, "grad_norm": 3.3247604370117188, "learning_rate": 9.537500960282919e-06, "loss": 1.1844, "step": 3574 }, { "epoch": 0.9056364787840405, "grad_norm": 3.5207560062408447, "learning_rate": 9.537148957378816e-06, "loss": 1.2253, "step": 3575 }, { "epoch": 0.9058898036732109, "grad_norm": 3.20007586479187, "learning_rate": 9.536796827073696e-06, "loss": 1.089, "step": 3576 }, { "epoch": 0.9061431285623812, "grad_norm": 3.0342764854431152, "learning_rate": 9.536444569377447e-06, "loss": 1.0194, "step": 3577 }, { "epoch": 0.9063964534515516, "grad_norm": 3.244046926498413, "learning_rate": 9.536092184299963e-06, "loss": 1.04, "step": 3578 }, { "epoch": 0.906649778340722, "grad_norm": 3.3093714714050293, "learning_rate": 9.535739671851134e-06, "loss": 1.0467, "step": 3579 }, { "epoch": 0.9069031032298923, "grad_norm": 3.905623435974121, "learning_rate": 9.53538703204086e-06, "loss": 1.2277, "step": 3580 }, { "epoch": 0.9071564281190627, "grad_norm": 3.680220127105713, "learning_rate": 9.535034264879047e-06, "loss": 1.2586, "step": 3581 }, { "epoch": 0.907409753008233, "grad_norm": 3.434997797012329, "learning_rate": 9.534681370375595e-06, "loss": 1.0123, "step": 3582 }, { "epoch": 0.9076630778974034, "grad_norm": 3.155743360519409, "learning_rate": 9.534328348540417e-06, "loss": 1.172, "step": 3583 }, { "epoch": 0.9079164027865738, "grad_norm": 3.8581161499023438, "learning_rate": 9.533975199383423e-06, "loss": 1.2394, "step": 3584 }, { "epoch": 0.9081697276757441, "grad_norm": 3.4210739135742188, "learning_rate": 9.533621922914532e-06, "loss": 1.1136, "step": 3585 }, { "epoch": 0.9084230525649145, "grad_norm": 3.2781097888946533, "learning_rate": 9.53326851914366e-06, "loss": 1.1038, "step": 3586 }, { "epoch": 0.9086763774540849, "grad_norm": 3.327735662460327, "learning_rate": 9.532914988080734e-06, "loss": 1.0744, "step": 3587 }, { "epoch": 0.9089297023432552, "grad_norm": 3.6758453845977783, "learning_rate": 9.532561329735678e-06, "loss": 1.1279, "step": 3588 }, { "epoch": 0.9091830272324256, "grad_norm": 3.8179948329925537, "learning_rate": 9.532207544118426e-06, "loss": 1.1381, "step": 3589 }, { "epoch": 0.9094363521215959, "grad_norm": 3.188765287399292, "learning_rate": 9.53185363123891e-06, "loss": 1.1702, "step": 3590 }, { "epoch": 0.9096896770107663, "grad_norm": 4.352060317993164, "learning_rate": 9.531499591107068e-06, "loss": 1.5593, "step": 3591 }, { "epoch": 0.9099430018999367, "grad_norm": 3.6396377086639404, "learning_rate": 9.531145423732839e-06, "loss": 1.221, "step": 3592 }, { "epoch": 0.910196326789107, "grad_norm": 3.6435651779174805, "learning_rate": 9.530791129126174e-06, "loss": 1.265, "step": 3593 }, { "epoch": 0.9104496516782774, "grad_norm": 3.252295970916748, "learning_rate": 9.530436707297015e-06, "loss": 1.0908, "step": 3594 }, { "epoch": 0.9107029765674477, "grad_norm": 3.793752431869507, "learning_rate": 9.530082158255317e-06, "loss": 1.2533, "step": 3595 }, { "epoch": 0.9109563014566181, "grad_norm": 3.2656681537628174, "learning_rate": 9.529727482011036e-06, "loss": 1.204, "step": 3596 }, { "epoch": 0.9112096263457885, "grad_norm": 3.5367040634155273, "learning_rate": 9.529372678574129e-06, "loss": 1.125, "step": 3597 }, { "epoch": 0.9114629512349588, "grad_norm": 3.358474016189575, "learning_rate": 9.529017747954561e-06, "loss": 1.0224, "step": 3598 }, { "epoch": 0.9117162761241292, "grad_norm": 3.8379125595092773, "learning_rate": 9.528662690162296e-06, "loss": 1.2502, "step": 3599 }, { "epoch": 0.9119696010132996, "grad_norm": 3.674384355545044, "learning_rate": 9.528307505207307e-06, "loss": 1.294, "step": 3600 }, { "epoch": 0.9122229259024699, "grad_norm": 3.563316822052002, "learning_rate": 9.527952193099564e-06, "loss": 1.3051, "step": 3601 }, { "epoch": 0.9124762507916403, "grad_norm": 3.5329408645629883, "learning_rate": 9.527596753849046e-06, "loss": 1.232, "step": 3602 }, { "epoch": 0.9127295756808106, "grad_norm": 3.2041163444519043, "learning_rate": 9.527241187465735e-06, "loss": 0.9945, "step": 3603 }, { "epoch": 0.912982900569981, "grad_norm": 3.312466859817505, "learning_rate": 9.52688549395961e-06, "loss": 1.1157, "step": 3604 }, { "epoch": 0.9132362254591514, "grad_norm": 2.954261302947998, "learning_rate": 9.526529673340665e-06, "loss": 1.0741, "step": 3605 }, { "epoch": 0.9134895503483217, "grad_norm": 3.649989604949951, "learning_rate": 9.526173725618885e-06, "loss": 1.1959, "step": 3606 }, { "epoch": 0.9137428752374921, "grad_norm": 3.1549792289733887, "learning_rate": 9.525817650804271e-06, "loss": 1.1578, "step": 3607 }, { "epoch": 0.9139962001266625, "grad_norm": 3.561000347137451, "learning_rate": 9.525461448906817e-06, "loss": 1.1482, "step": 3608 }, { "epoch": 0.9142495250158328, "grad_norm": 3.3429903984069824, "learning_rate": 9.525105119936528e-06, "loss": 1.1744, "step": 3609 }, { "epoch": 0.9145028499050032, "grad_norm": 3.5031373500823975, "learning_rate": 9.524748663903408e-06, "loss": 1.0668, "step": 3610 }, { "epoch": 0.9147561747941735, "grad_norm": 3.495927572250366, "learning_rate": 9.524392080817465e-06, "loss": 1.283, "step": 3611 }, { "epoch": 0.9150094996833439, "grad_norm": 3.4305431842803955, "learning_rate": 9.52403537068871e-06, "loss": 1.1616, "step": 3612 }, { "epoch": 0.9152628245725143, "grad_norm": 3.8657901287078857, "learning_rate": 9.523678533527166e-06, "loss": 1.3108, "step": 3613 }, { "epoch": 0.9155161494616846, "grad_norm": 3.1496613025665283, "learning_rate": 9.52332156934285e-06, "loss": 1.0218, "step": 3614 }, { "epoch": 0.915769474350855, "grad_norm": 3.193406343460083, "learning_rate": 9.52296447814578e-06, "loss": 1.0279, "step": 3615 }, { "epoch": 0.9160227992400254, "grad_norm": 3.3336079120635986, "learning_rate": 9.52260725994599e-06, "loss": 1.1338, "step": 3616 }, { "epoch": 0.9162761241291957, "grad_norm": 3.353907823562622, "learning_rate": 9.522249914753507e-06, "loss": 1.0635, "step": 3617 }, { "epoch": 0.9165294490183661, "grad_norm": 3.2871077060699463, "learning_rate": 9.521892442578368e-06, "loss": 1.1136, "step": 3618 }, { "epoch": 0.9167827739075364, "grad_norm": 3.2008469104766846, "learning_rate": 9.521534843430606e-06, "loss": 1.1852, "step": 3619 }, { "epoch": 0.9170360987967068, "grad_norm": 4.270013809204102, "learning_rate": 9.521177117320267e-06, "loss": 1.301, "step": 3620 }, { "epoch": 0.9172894236858772, "grad_norm": 3.3976364135742188, "learning_rate": 9.52081926425739e-06, "loss": 1.1561, "step": 3621 }, { "epoch": 0.9175427485750475, "grad_norm": 3.1098673343658447, "learning_rate": 9.520461284252029e-06, "loss": 1.1531, "step": 3622 }, { "epoch": 0.9177960734642179, "grad_norm": 3.628095865249634, "learning_rate": 9.520103177314235e-06, "loss": 1.1812, "step": 3623 }, { "epoch": 0.9180493983533882, "grad_norm": 3.203521251678467, "learning_rate": 9.519744943454062e-06, "loss": 1.016, "step": 3624 }, { "epoch": 0.9183027232425586, "grad_norm": 3.242738962173462, "learning_rate": 9.519386582681566e-06, "loss": 1.2456, "step": 3625 }, { "epoch": 0.918556048131729, "grad_norm": 3.5318028926849365, "learning_rate": 9.519028095006817e-06, "loss": 1.104, "step": 3626 }, { "epoch": 0.9188093730208993, "grad_norm": 3.5023603439331055, "learning_rate": 9.518669480439875e-06, "loss": 1.1201, "step": 3627 }, { "epoch": 0.9190626979100697, "grad_norm": 3.1643176078796387, "learning_rate": 9.51831073899081e-06, "loss": 1.1703, "step": 3628 }, { "epoch": 0.9193160227992401, "grad_norm": 3.1051294803619385, "learning_rate": 9.5179518706697e-06, "loss": 1.1061, "step": 3629 }, { "epoch": 0.9195693476884104, "grad_norm": 3.1064279079437256, "learning_rate": 9.517592875486619e-06, "loss": 1.0738, "step": 3630 }, { "epoch": 0.9198226725775808, "grad_norm": 3.454463005065918, "learning_rate": 9.517233753451646e-06, "loss": 1.1865, "step": 3631 }, { "epoch": 0.9200759974667511, "grad_norm": 3.2854862213134766, "learning_rate": 9.516874504574866e-06, "loss": 1.0756, "step": 3632 }, { "epoch": 0.9203293223559215, "grad_norm": 3.3770976066589355, "learning_rate": 9.516515128866368e-06, "loss": 1.1921, "step": 3633 }, { "epoch": 0.9205826472450919, "grad_norm": 3.346442937850952, "learning_rate": 9.51615562633624e-06, "loss": 1.2264, "step": 3634 }, { "epoch": 0.9208359721342622, "grad_norm": 3.339327096939087, "learning_rate": 9.515795996994582e-06, "loss": 1.2777, "step": 3635 }, { "epoch": 0.9210892970234326, "grad_norm": 3.4643490314483643, "learning_rate": 9.515436240851486e-06, "loss": 0.9849, "step": 3636 }, { "epoch": 0.921342621912603, "grad_norm": 3.3188140392303467, "learning_rate": 9.515076357917059e-06, "loss": 1.1642, "step": 3637 }, { "epoch": 0.9215959468017733, "grad_norm": 3.399202585220337, "learning_rate": 9.514716348201403e-06, "loss": 1.1187, "step": 3638 }, { "epoch": 0.9218492716909437, "grad_norm": 3.731959581375122, "learning_rate": 9.514356211714626e-06, "loss": 1.1744, "step": 3639 }, { "epoch": 0.922102596580114, "grad_norm": 3.2492988109588623, "learning_rate": 9.513995948466844e-06, "loss": 1.1625, "step": 3640 }, { "epoch": 0.9223559214692844, "grad_norm": 3.396430492401123, "learning_rate": 9.513635558468172e-06, "loss": 1.2304, "step": 3641 }, { "epoch": 0.9226092463584548, "grad_norm": 3.5798380374908447, "learning_rate": 9.513275041728728e-06, "loss": 1.1121, "step": 3642 }, { "epoch": 0.922862571247625, "grad_norm": 3.0054409503936768, "learning_rate": 9.512914398258637e-06, "loss": 1.0951, "step": 3643 }, { "epoch": 0.9231158961367955, "grad_norm": 3.5808658599853516, "learning_rate": 9.512553628068024e-06, "loss": 1.2808, "step": 3644 }, { "epoch": 0.9233692210259657, "grad_norm": 3.384359359741211, "learning_rate": 9.51219273116702e-06, "loss": 1.1461, "step": 3645 }, { "epoch": 0.9236225459151362, "grad_norm": 3.1516263484954834, "learning_rate": 9.511831707565758e-06, "loss": 1.0265, "step": 3646 }, { "epoch": 0.9238758708043066, "grad_norm": 3.3705384731292725, "learning_rate": 9.511470557274379e-06, "loss": 1.1197, "step": 3647 }, { "epoch": 0.9241291956934768, "grad_norm": 3.4362244606018066, "learning_rate": 9.511109280303018e-06, "loss": 1.0621, "step": 3648 }, { "epoch": 0.9243825205826472, "grad_norm": 3.3836193084716797, "learning_rate": 9.510747876661825e-06, "loss": 1.2772, "step": 3649 }, { "epoch": 0.9246358454718177, "grad_norm": 3.540104627609253, "learning_rate": 9.510386346360945e-06, "loss": 1.2575, "step": 3650 }, { "epoch": 0.9248891703609879, "grad_norm": 3.4364829063415527, "learning_rate": 9.51002468941053e-06, "loss": 1.105, "step": 3651 }, { "epoch": 0.9251424952501583, "grad_norm": 3.6412746906280518, "learning_rate": 9.509662905820736e-06, "loss": 1.2104, "step": 3652 }, { "epoch": 0.9253958201393286, "grad_norm": 3.5720605850219727, "learning_rate": 9.50930099560172e-06, "loss": 1.2214, "step": 3653 }, { "epoch": 0.925649145028499, "grad_norm": 3.4222095012664795, "learning_rate": 9.508938958763647e-06, "loss": 1.1668, "step": 3654 }, { "epoch": 0.9259024699176694, "grad_norm": 3.6412322521209717, "learning_rate": 9.50857679531668e-06, "loss": 1.2624, "step": 3655 }, { "epoch": 0.9261557948068397, "grad_norm": 3.187004566192627, "learning_rate": 9.508214505270989e-06, "loss": 1.1644, "step": 3656 }, { "epoch": 0.9264091196960101, "grad_norm": 3.4312942028045654, "learning_rate": 9.50785208863675e-06, "loss": 1.1829, "step": 3657 }, { "epoch": 0.9266624445851805, "grad_norm": 3.1643357276916504, "learning_rate": 9.507489545424135e-06, "loss": 1.1241, "step": 3658 }, { "epoch": 0.9269157694743508, "grad_norm": 3.2400124073028564, "learning_rate": 9.507126875643328e-06, "loss": 1.2106, "step": 3659 }, { "epoch": 0.9271690943635212, "grad_norm": 3.43381667137146, "learning_rate": 9.506764079304508e-06, "loss": 1.2335, "step": 3660 }, { "epoch": 0.9274224192526915, "grad_norm": 3.0889716148376465, "learning_rate": 9.506401156417868e-06, "loss": 1.0938, "step": 3661 }, { "epoch": 0.9276757441418619, "grad_norm": 3.585270404815674, "learning_rate": 9.506038106993594e-06, "loss": 1.1914, "step": 3662 }, { "epoch": 0.9279290690310323, "grad_norm": 3.6273610591888428, "learning_rate": 9.505674931041882e-06, "loss": 1.184, "step": 3663 }, { "epoch": 0.9281823939202026, "grad_norm": 3.4800095558166504, "learning_rate": 9.50531162857293e-06, "loss": 1.1435, "step": 3664 }, { "epoch": 0.928435718809373, "grad_norm": 3.5298163890838623, "learning_rate": 9.504948199596938e-06, "loss": 1.2875, "step": 3665 }, { "epoch": 0.9286890436985434, "grad_norm": 3.3472063541412354, "learning_rate": 9.504584644124113e-06, "loss": 1.0663, "step": 3666 }, { "epoch": 0.9289423685877137, "grad_norm": 3.5220470428466797, "learning_rate": 9.504220962164662e-06, "loss": 1.1808, "step": 3667 }, { "epoch": 0.9291956934768841, "grad_norm": 3.6811187267303467, "learning_rate": 9.503857153728797e-06, "loss": 1.2056, "step": 3668 }, { "epoch": 0.9294490183660544, "grad_norm": 3.4177627563476562, "learning_rate": 9.503493218826734e-06, "loss": 1.1911, "step": 3669 }, { "epoch": 0.9297023432552248, "grad_norm": 3.5350520610809326, "learning_rate": 9.503129157468693e-06, "loss": 1.2154, "step": 3670 }, { "epoch": 0.9299556681443952, "grad_norm": 3.4798848628997803, "learning_rate": 9.502764969664895e-06, "loss": 1.3087, "step": 3671 }, { "epoch": 0.9302089930335655, "grad_norm": 3.2628650665283203, "learning_rate": 9.502400655425567e-06, "loss": 1.1072, "step": 3672 }, { "epoch": 0.9304623179227359, "grad_norm": 3.29524827003479, "learning_rate": 9.50203621476094e-06, "loss": 1.1156, "step": 3673 }, { "epoch": 0.9307156428119062, "grad_norm": 3.172996997833252, "learning_rate": 9.501671647681246e-06, "loss": 1.0985, "step": 3674 }, { "epoch": 0.9309689677010766, "grad_norm": 3.3947994709014893, "learning_rate": 9.50130695419672e-06, "loss": 1.1692, "step": 3675 }, { "epoch": 0.931222292590247, "grad_norm": 3.2290871143341064, "learning_rate": 9.500942134317605e-06, "loss": 1.1601, "step": 3676 }, { "epoch": 0.9314756174794173, "grad_norm": 3.762852907180786, "learning_rate": 9.500577188054146e-06, "loss": 1.3201, "step": 3677 }, { "epoch": 0.9317289423685877, "grad_norm": 3.3253703117370605, "learning_rate": 9.500212115416588e-06, "loss": 1.287, "step": 3678 }, { "epoch": 0.9319822672577581, "grad_norm": 3.639549970626831, "learning_rate": 9.49984691641518e-06, "loss": 1.2524, "step": 3679 }, { "epoch": 0.9322355921469284, "grad_norm": 3.2327334880828857, "learning_rate": 9.499481591060183e-06, "loss": 1.1269, "step": 3680 }, { "epoch": 0.9324889170360988, "grad_norm": 3.175328493118286, "learning_rate": 9.499116139361852e-06, "loss": 1.0656, "step": 3681 }, { "epoch": 0.9327422419252691, "grad_norm": 3.2034428119659424, "learning_rate": 9.498750561330448e-06, "loss": 1.0962, "step": 3682 }, { "epoch": 0.9329955668144395, "grad_norm": 3.437746286392212, "learning_rate": 9.498384856976235e-06, "loss": 1.1874, "step": 3683 }, { "epoch": 0.9332488917036099, "grad_norm": 3.2602434158325195, "learning_rate": 9.498019026309484e-06, "loss": 1.1412, "step": 3684 }, { "epoch": 0.9335022165927802, "grad_norm": 3.566904067993164, "learning_rate": 9.497653069340467e-06, "loss": 1.2866, "step": 3685 }, { "epoch": 0.9337555414819506, "grad_norm": 3.1169989109039307, "learning_rate": 9.497286986079462e-06, "loss": 1.1, "step": 3686 }, { "epoch": 0.934008866371121, "grad_norm": 3.468841314315796, "learning_rate": 9.496920776536745e-06, "loss": 1.0942, "step": 3687 }, { "epoch": 0.9342621912602913, "grad_norm": 3.2861669063568115, "learning_rate": 9.496554440722598e-06, "loss": 1.1, "step": 3688 }, { "epoch": 0.9345155161494617, "grad_norm": 3.364629030227661, "learning_rate": 9.49618797864731e-06, "loss": 1.0949, "step": 3689 }, { "epoch": 0.934768841038632, "grad_norm": 3.7341485023498535, "learning_rate": 9.495821390321173e-06, "loss": 1.1573, "step": 3690 }, { "epoch": 0.9350221659278024, "grad_norm": 3.2584400177001953, "learning_rate": 9.49545467575448e-06, "loss": 1.0675, "step": 3691 }, { "epoch": 0.9352754908169728, "grad_norm": 3.204885244369507, "learning_rate": 9.495087834957523e-06, "loss": 1.1662, "step": 3692 }, { "epoch": 0.9355288157061431, "grad_norm": 3.42419695854187, "learning_rate": 9.494720867940608e-06, "loss": 1.1485, "step": 3693 }, { "epoch": 0.9357821405953135, "grad_norm": 3.1303205490112305, "learning_rate": 9.494353774714036e-06, "loss": 1.0381, "step": 3694 }, { "epoch": 0.9360354654844838, "grad_norm": 3.7704291343688965, "learning_rate": 9.493986555288118e-06, "loss": 1.293, "step": 3695 }, { "epoch": 0.9362887903736542, "grad_norm": 3.3077592849731445, "learning_rate": 9.493619209673164e-06, "loss": 1.0866, "step": 3696 }, { "epoch": 0.9365421152628246, "grad_norm": 3.1638870239257812, "learning_rate": 9.493251737879488e-06, "loss": 1.1623, "step": 3697 }, { "epoch": 0.9367954401519949, "grad_norm": 3.3037116527557373, "learning_rate": 9.492884139917408e-06, "loss": 1.0958, "step": 3698 }, { "epoch": 0.9370487650411653, "grad_norm": 3.504441499710083, "learning_rate": 9.492516415797249e-06, "loss": 1.0941, "step": 3699 }, { "epoch": 0.9373020899303357, "grad_norm": 3.4657788276672363, "learning_rate": 9.492148565529333e-06, "loss": 1.0397, "step": 3700 }, { "epoch": 0.937555414819506, "grad_norm": 3.318652868270874, "learning_rate": 9.491780589123991e-06, "loss": 1.1455, "step": 3701 }, { "epoch": 0.9378087397086764, "grad_norm": 3.411210775375366, "learning_rate": 9.491412486591556e-06, "loss": 1.1889, "step": 3702 }, { "epoch": 0.9380620645978467, "grad_norm": 3.2670881748199463, "learning_rate": 9.491044257942364e-06, "loss": 1.0567, "step": 3703 }, { "epoch": 0.9383153894870171, "grad_norm": 3.5867013931274414, "learning_rate": 9.490675903186753e-06, "loss": 1.2733, "step": 3704 }, { "epoch": 0.9385687143761875, "grad_norm": 3.686847686767578, "learning_rate": 9.490307422335068e-06, "loss": 1.0941, "step": 3705 }, { "epoch": 0.9388220392653578, "grad_norm": 3.306431293487549, "learning_rate": 9.489938815397655e-06, "loss": 1.2053, "step": 3706 }, { "epoch": 0.9390753641545282, "grad_norm": 3.5809178352355957, "learning_rate": 9.489570082384863e-06, "loss": 1.1933, "step": 3707 }, { "epoch": 0.9393286890436986, "grad_norm": 3.156501293182373, "learning_rate": 9.489201223307048e-06, "loss": 1.062, "step": 3708 }, { "epoch": 0.9395820139328689, "grad_norm": 3.27620005607605, "learning_rate": 9.488832238174568e-06, "loss": 1.1342, "step": 3709 }, { "epoch": 0.9398353388220393, "grad_norm": 3.3848562240600586, "learning_rate": 9.48846312699778e-06, "loss": 1.1495, "step": 3710 }, { "epoch": 0.9400886637112096, "grad_norm": 3.2755961418151855, "learning_rate": 9.488093889787053e-06, "loss": 1.1533, "step": 3711 }, { "epoch": 0.94034198860038, "grad_norm": 3.1494967937469482, "learning_rate": 9.487724526552753e-06, "loss": 1.1192, "step": 3712 }, { "epoch": 0.9405953134895504, "grad_norm": 3.6119613647460938, "learning_rate": 9.48735503730525e-06, "loss": 1.1016, "step": 3713 }, { "epoch": 0.9408486383787207, "grad_norm": 3.4080798625946045, "learning_rate": 9.486985422054921e-06, "loss": 1.2853, "step": 3714 }, { "epoch": 0.9411019632678911, "grad_norm": 3.7177653312683105, "learning_rate": 9.486615680812146e-06, "loss": 1.1727, "step": 3715 }, { "epoch": 0.9413552881570614, "grad_norm": 3.400573492050171, "learning_rate": 9.486245813587305e-06, "loss": 1.1045, "step": 3716 }, { "epoch": 0.9416086130462318, "grad_norm": 3.753406286239624, "learning_rate": 9.485875820390783e-06, "loss": 1.33, "step": 3717 }, { "epoch": 0.9418619379354022, "grad_norm": 3.4892420768737793, "learning_rate": 9.48550570123297e-06, "loss": 1.2081, "step": 3718 }, { "epoch": 0.9421152628245725, "grad_norm": 3.635849952697754, "learning_rate": 9.485135456124261e-06, "loss": 1.128, "step": 3719 }, { "epoch": 0.9423685877137429, "grad_norm": 3.3166277408599854, "learning_rate": 9.48476508507505e-06, "loss": 1.1779, "step": 3720 }, { "epoch": 0.9426219126029133, "grad_norm": 3.4502623081207275, "learning_rate": 9.484394588095738e-06, "loss": 1.2057, "step": 3721 }, { "epoch": 0.9428752374920836, "grad_norm": 3.455456256866455, "learning_rate": 9.484023965196726e-06, "loss": 1.2209, "step": 3722 }, { "epoch": 0.943128562381254, "grad_norm": 3.287853717803955, "learning_rate": 9.483653216388424e-06, "loss": 1.1678, "step": 3723 }, { "epoch": 0.9433818872704243, "grad_norm": 3.548009157180786, "learning_rate": 9.48328234168124e-06, "loss": 1.2503, "step": 3724 }, { "epoch": 0.9436352121595947, "grad_norm": 3.4027111530303955, "learning_rate": 9.48291134108559e-06, "loss": 1.3112, "step": 3725 }, { "epoch": 0.9438885370487651, "grad_norm": 3.3963234424591064, "learning_rate": 9.482540214611888e-06, "loss": 1.1574, "step": 3726 }, { "epoch": 0.9441418619379354, "grad_norm": 3.3159518241882324, "learning_rate": 9.482168962270561e-06, "loss": 1.1946, "step": 3727 }, { "epoch": 0.9443951868271058, "grad_norm": 3.2820098400115967, "learning_rate": 9.48179758407203e-06, "loss": 1.1389, "step": 3728 }, { "epoch": 0.9446485117162762, "grad_norm": 3.4625706672668457, "learning_rate": 9.48142608002672e-06, "loss": 1.1033, "step": 3729 }, { "epoch": 0.9449018366054465, "grad_norm": 3.100806951522827, "learning_rate": 9.481054450145069e-06, "loss": 1.0783, "step": 3730 }, { "epoch": 0.9451551614946169, "grad_norm": 3.291769504547119, "learning_rate": 9.480682694437508e-06, "loss": 1.2077, "step": 3731 }, { "epoch": 0.9454084863837872, "grad_norm": 3.1551342010498047, "learning_rate": 9.480310812914477e-06, "loss": 1.0391, "step": 3732 }, { "epoch": 0.9456618112729576, "grad_norm": 3.392557382583618, "learning_rate": 9.47993880558642e-06, "loss": 1.2441, "step": 3733 }, { "epoch": 0.945915136162128, "grad_norm": 3.3054678440093994, "learning_rate": 9.47956667246378e-06, "loss": 1.1569, "step": 3734 }, { "epoch": 0.9461684610512983, "grad_norm": 3.2276017665863037, "learning_rate": 9.479194413557007e-06, "loss": 1.2043, "step": 3735 }, { "epoch": 0.9464217859404687, "grad_norm": 3.59144926071167, "learning_rate": 9.478822028876553e-06, "loss": 1.2623, "step": 3736 }, { "epoch": 0.9466751108296391, "grad_norm": 3.2537569999694824, "learning_rate": 9.478449518432878e-06, "loss": 1.1272, "step": 3737 }, { "epoch": 0.9469284357188094, "grad_norm": 3.278841257095337, "learning_rate": 9.478076882236437e-06, "loss": 1.3562, "step": 3738 }, { "epoch": 0.9471817606079798, "grad_norm": 3.6335675716400146, "learning_rate": 9.477704120297698e-06, "loss": 1.1144, "step": 3739 }, { "epoch": 0.94743508549715, "grad_norm": 3.020799160003662, "learning_rate": 9.477331232627124e-06, "loss": 1.0823, "step": 3740 }, { "epoch": 0.9476884103863205, "grad_norm": 3.571873664855957, "learning_rate": 9.476958219235189e-06, "loss": 1.1729, "step": 3741 }, { "epoch": 0.9479417352754909, "grad_norm": 3.5291309356689453, "learning_rate": 9.476585080132365e-06, "loss": 1.1524, "step": 3742 }, { "epoch": 0.9481950601646612, "grad_norm": 3.2429070472717285, "learning_rate": 9.47621181532913e-06, "loss": 1.2193, "step": 3743 }, { "epoch": 0.9484483850538316, "grad_norm": 3.3645012378692627, "learning_rate": 9.475838424835964e-06, "loss": 1.1676, "step": 3744 }, { "epoch": 0.9487017099430018, "grad_norm": 3.4241702556610107, "learning_rate": 9.475464908663355e-06, "loss": 1.0979, "step": 3745 }, { "epoch": 0.9489550348321723, "grad_norm": 3.4654715061187744, "learning_rate": 9.475091266821786e-06, "loss": 1.1196, "step": 3746 }, { "epoch": 0.9492083597213427, "grad_norm": 3.587653160095215, "learning_rate": 9.474717499321754e-06, "loss": 1.2102, "step": 3747 }, { "epoch": 0.949461684610513, "grad_norm": 3.1175477504730225, "learning_rate": 9.474343606173751e-06, "loss": 1.0849, "step": 3748 }, { "epoch": 0.9497150094996833, "grad_norm": 3.8891992568969727, "learning_rate": 9.473969587388277e-06, "loss": 1.2371, "step": 3749 }, { "epoch": 0.9499683343888538, "grad_norm": 3.7875583171844482, "learning_rate": 9.473595442975834e-06, "loss": 1.2844, "step": 3750 }, { "epoch": 0.950221659278024, "grad_norm": 3.4973838329315186, "learning_rate": 9.473221172946926e-06, "loss": 1.1984, "step": 3751 }, { "epoch": 0.9504749841671944, "grad_norm": 3.3866705894470215, "learning_rate": 9.472846777312065e-06, "loss": 1.1878, "step": 3752 }, { "epoch": 0.9507283090563647, "grad_norm": 3.5690484046936035, "learning_rate": 9.472472256081765e-06, "loss": 1.1684, "step": 3753 }, { "epoch": 0.9509816339455351, "grad_norm": 3.505452871322632, "learning_rate": 9.472097609266535e-06, "loss": 1.1924, "step": 3754 }, { "epoch": 0.9512349588347055, "grad_norm": 3.671903610229492, "learning_rate": 9.471722836876905e-06, "loss": 1.1182, "step": 3755 }, { "epoch": 0.9514882837238758, "grad_norm": 3.6271207332611084, "learning_rate": 9.471347938923392e-06, "loss": 1.1628, "step": 3756 }, { "epoch": 0.9517416086130462, "grad_norm": 3.468125104904175, "learning_rate": 9.470972915416522e-06, "loss": 1.0835, "step": 3757 }, { "epoch": 0.9519949335022166, "grad_norm": 3.480942726135254, "learning_rate": 9.47059776636683e-06, "loss": 1.1908, "step": 3758 }, { "epoch": 0.9522482583913869, "grad_norm": 3.083986759185791, "learning_rate": 9.470222491784849e-06, "loss": 1.0621, "step": 3759 }, { "epoch": 0.9525015832805573, "grad_norm": 3.3976480960845947, "learning_rate": 9.469847091681115e-06, "loss": 1.0349, "step": 3760 }, { "epoch": 0.9527549081697276, "grad_norm": 3.461347818374634, "learning_rate": 9.46947156606617e-06, "loss": 1.2073, "step": 3761 }, { "epoch": 0.953008233058898, "grad_norm": 3.341705560684204, "learning_rate": 9.469095914950556e-06, "loss": 1.0939, "step": 3762 }, { "epoch": 0.9532615579480684, "grad_norm": 3.6789677143096924, "learning_rate": 9.468720138344825e-06, "loss": 1.1137, "step": 3763 }, { "epoch": 0.9535148828372387, "grad_norm": 3.5974793434143066, "learning_rate": 9.468344236259528e-06, "loss": 1.2334, "step": 3764 }, { "epoch": 0.9537682077264091, "grad_norm": 3.2468185424804688, "learning_rate": 9.467968208705217e-06, "loss": 1.2708, "step": 3765 }, { "epoch": 0.9540215326155794, "grad_norm": 3.4909257888793945, "learning_rate": 9.467592055692454e-06, "loss": 1.1995, "step": 3766 }, { "epoch": 0.9542748575047498, "grad_norm": 3.160750150680542, "learning_rate": 9.4672157772318e-06, "loss": 0.9491, "step": 3767 }, { "epoch": 0.9545281823939202, "grad_norm": 3.3668110370635986, "learning_rate": 9.466839373333818e-06, "loss": 1.1766, "step": 3768 }, { "epoch": 0.9547815072830905, "grad_norm": 3.286818742752075, "learning_rate": 9.466462844009083e-06, "loss": 1.0569, "step": 3769 }, { "epoch": 0.9550348321722609, "grad_norm": 3.1187360286712646, "learning_rate": 9.466086189268163e-06, "loss": 1.022, "step": 3770 }, { "epoch": 0.9552881570614313, "grad_norm": 3.311356544494629, "learning_rate": 9.465709409121638e-06, "loss": 1.1193, "step": 3771 }, { "epoch": 0.9555414819506016, "grad_norm": 3.3468382358551025, "learning_rate": 9.465332503580083e-06, "loss": 1.0767, "step": 3772 }, { "epoch": 0.955794806839772, "grad_norm": 3.2264561653137207, "learning_rate": 9.464955472654085e-06, "loss": 1.2398, "step": 3773 }, { "epoch": 0.9560481317289423, "grad_norm": 3.4525232315063477, "learning_rate": 9.464578316354231e-06, "loss": 1.289, "step": 3774 }, { "epoch": 0.9563014566181127, "grad_norm": 3.4776265621185303, "learning_rate": 9.46420103469111e-06, "loss": 1.1543, "step": 3775 }, { "epoch": 0.9565547815072831, "grad_norm": 3.537991762161255, "learning_rate": 9.463823627675314e-06, "loss": 1.1817, "step": 3776 }, { "epoch": 0.9568081063964534, "grad_norm": 3.2594330310821533, "learning_rate": 9.463446095317445e-06, "loss": 1.107, "step": 3777 }, { "epoch": 0.9570614312856238, "grad_norm": 3.2585158348083496, "learning_rate": 9.463068437628102e-06, "loss": 1.1603, "step": 3778 }, { "epoch": 0.9573147561747942, "grad_norm": 3.54490327835083, "learning_rate": 9.462690654617888e-06, "loss": 1.1565, "step": 3779 }, { "epoch": 0.9575680810639645, "grad_norm": 3.3803181648254395, "learning_rate": 9.462312746297413e-06, "loss": 1.082, "step": 3780 }, { "epoch": 0.9578214059531349, "grad_norm": 3.3924155235290527, "learning_rate": 9.461934712677286e-06, "loss": 1.0383, "step": 3781 }, { "epoch": 0.9580747308423052, "grad_norm": 3.3958303928375244, "learning_rate": 9.461556553768124e-06, "loss": 1.1977, "step": 3782 }, { "epoch": 0.9583280557314756, "grad_norm": 3.1743788719177246, "learning_rate": 9.461178269580546e-06, "loss": 1.0083, "step": 3783 }, { "epoch": 0.958581380620646, "grad_norm": 3.194161891937256, "learning_rate": 9.460799860125171e-06, "loss": 1.0622, "step": 3784 }, { "epoch": 0.9588347055098163, "grad_norm": 3.3265326023101807, "learning_rate": 9.460421325412627e-06, "loss": 1.1222, "step": 3785 }, { "epoch": 0.9590880303989867, "grad_norm": 3.457919120788574, "learning_rate": 9.460042665453543e-06, "loss": 1.103, "step": 3786 }, { "epoch": 0.9593413552881571, "grad_norm": 3.9475507736206055, "learning_rate": 9.459663880258554e-06, "loss": 1.1429, "step": 3787 }, { "epoch": 0.9595946801773274, "grad_norm": 3.5583839416503906, "learning_rate": 9.45928496983829e-06, "loss": 1.2336, "step": 3788 }, { "epoch": 0.9598480050664978, "grad_norm": 3.488065242767334, "learning_rate": 9.458905934203395e-06, "loss": 1.272, "step": 3789 }, { "epoch": 0.9601013299556681, "grad_norm": 3.486656427383423, "learning_rate": 9.458526773364512e-06, "loss": 1.1709, "step": 3790 }, { "epoch": 0.9603546548448385, "grad_norm": 3.8320512771606445, "learning_rate": 9.458147487332285e-06, "loss": 1.4177, "step": 3791 }, { "epoch": 0.9606079797340089, "grad_norm": 3.292505979537964, "learning_rate": 9.457768076117368e-06, "loss": 1.2246, "step": 3792 }, { "epoch": 0.9608613046231792, "grad_norm": 3.4512226581573486, "learning_rate": 9.457388539730411e-06, "loss": 1.3752, "step": 3793 }, { "epoch": 0.9611146295123496, "grad_norm": 3.332423210144043, "learning_rate": 9.457008878182074e-06, "loss": 1.0874, "step": 3794 }, { "epoch": 0.9613679544015199, "grad_norm": 3.1790709495544434, "learning_rate": 9.456629091483015e-06, "loss": 1.0322, "step": 3795 }, { "epoch": 0.9616212792906903, "grad_norm": 3.306790590286255, "learning_rate": 9.456249179643901e-06, "loss": 1.1335, "step": 3796 }, { "epoch": 0.9618746041798607, "grad_norm": 3.4400649070739746, "learning_rate": 9.4558691426754e-06, "loss": 1.0695, "step": 3797 }, { "epoch": 0.962127929069031, "grad_norm": 3.24611234664917, "learning_rate": 9.45548898058818e-06, "loss": 1.1332, "step": 3798 }, { "epoch": 0.9623812539582014, "grad_norm": 3.5424106121063232, "learning_rate": 9.455108693392918e-06, "loss": 1.1823, "step": 3799 }, { "epoch": 0.9626345788473718, "grad_norm": 3.662250280380249, "learning_rate": 9.454728281100292e-06, "loss": 1.1708, "step": 3800 }, { "epoch": 0.9628879037365421, "grad_norm": 3.3638834953308105, "learning_rate": 9.454347743720985e-06, "loss": 1.1165, "step": 3801 }, { "epoch": 0.9631412286257125, "grad_norm": 3.294171094894409, "learning_rate": 9.453967081265679e-06, "loss": 1.0476, "step": 3802 }, { "epoch": 0.9633945535148828, "grad_norm": 3.36968994140625, "learning_rate": 9.453586293745065e-06, "loss": 0.9926, "step": 3803 }, { "epoch": 0.9636478784040532, "grad_norm": 3.604736566543579, "learning_rate": 9.453205381169836e-06, "loss": 1.1909, "step": 3804 }, { "epoch": 0.9639012032932236, "grad_norm": 3.4355592727661133, "learning_rate": 9.452824343550686e-06, "loss": 1.245, "step": 3805 }, { "epoch": 0.9641545281823939, "grad_norm": 3.1951956748962402, "learning_rate": 9.452443180898316e-06, "loss": 1.1425, "step": 3806 }, { "epoch": 0.9644078530715643, "grad_norm": 3.562960386276245, "learning_rate": 9.452061893223428e-06, "loss": 1.2107, "step": 3807 }, { "epoch": 0.9646611779607347, "grad_norm": 3.605611801147461, "learning_rate": 9.45168048053673e-06, "loss": 1.0697, "step": 3808 }, { "epoch": 0.964914502849905, "grad_norm": 3.229020357131958, "learning_rate": 9.451298942848928e-06, "loss": 1.0341, "step": 3809 }, { "epoch": 0.9651678277390754, "grad_norm": 3.192826271057129, "learning_rate": 9.45091728017074e-06, "loss": 1.1174, "step": 3810 }, { "epoch": 0.9654211526282457, "grad_norm": 3.4178497791290283, "learning_rate": 9.45053549251288e-06, "loss": 1.0368, "step": 3811 }, { "epoch": 0.9656744775174161, "grad_norm": 3.4938018321990967, "learning_rate": 9.45015357988607e-06, "loss": 1.3107, "step": 3812 }, { "epoch": 0.9659278024065865, "grad_norm": 3.2385363578796387, "learning_rate": 9.449771542301031e-06, "loss": 1.1503, "step": 3813 }, { "epoch": 0.9661811272957568, "grad_norm": 3.6335394382476807, "learning_rate": 9.449389379768495e-06, "loss": 1.2816, "step": 3814 }, { "epoch": 0.9664344521849272, "grad_norm": 3.1078078746795654, "learning_rate": 9.44900709229919e-06, "loss": 1.1068, "step": 3815 }, { "epoch": 0.9666877770740975, "grad_norm": 3.5607919692993164, "learning_rate": 9.44862467990385e-06, "loss": 1.2817, "step": 3816 }, { "epoch": 0.9669411019632679, "grad_norm": 3.2077252864837646, "learning_rate": 9.448242142593216e-06, "loss": 1.0986, "step": 3817 }, { "epoch": 0.9671944268524383, "grad_norm": 3.0507113933563232, "learning_rate": 9.447859480378025e-06, "loss": 1.1553, "step": 3818 }, { "epoch": 0.9674477517416086, "grad_norm": 3.2941465377807617, "learning_rate": 9.447476693269027e-06, "loss": 1.2877, "step": 3819 }, { "epoch": 0.967701076630779, "grad_norm": 3.351145029067993, "learning_rate": 9.447093781276965e-06, "loss": 1.2199, "step": 3820 }, { "epoch": 0.9679544015199494, "grad_norm": 3.4783740043640137, "learning_rate": 9.446710744412595e-06, "loss": 1.1059, "step": 3821 }, { "epoch": 0.9682077264091197, "grad_norm": 3.4025323390960693, "learning_rate": 9.446327582686672e-06, "loss": 1.2659, "step": 3822 }, { "epoch": 0.9684610512982901, "grad_norm": 3.8878393173217773, "learning_rate": 9.445944296109954e-06, "loss": 1.4049, "step": 3823 }, { "epoch": 0.9687143761874604, "grad_norm": 3.270028829574585, "learning_rate": 9.445560884693203e-06, "loss": 1.2016, "step": 3824 }, { "epoch": 0.9689677010766308, "grad_norm": 3.1180226802825928, "learning_rate": 9.445177348447187e-06, "loss": 1.1235, "step": 3825 }, { "epoch": 0.9692210259658012, "grad_norm": 3.4842357635498047, "learning_rate": 9.444793687382674e-06, "loss": 1.1148, "step": 3826 }, { "epoch": 0.9694743508549715, "grad_norm": 3.5103421211242676, "learning_rate": 9.444409901510439e-06, "loss": 1.1294, "step": 3827 }, { "epoch": 0.9697276757441419, "grad_norm": 3.630034923553467, "learning_rate": 9.444025990841254e-06, "loss": 1.2975, "step": 3828 }, { "epoch": 0.9699810006333123, "grad_norm": 3.3602428436279297, "learning_rate": 9.443641955385904e-06, "loss": 1.1799, "step": 3829 }, { "epoch": 0.9702343255224826, "grad_norm": 3.3463146686553955, "learning_rate": 9.44325779515517e-06, "loss": 1.2184, "step": 3830 }, { "epoch": 0.970487650411653, "grad_norm": 3.0899195671081543, "learning_rate": 9.44287351015984e-06, "loss": 0.9963, "step": 3831 }, { "epoch": 0.9707409753008233, "grad_norm": 2.956557273864746, "learning_rate": 9.442489100410704e-06, "loss": 1.0106, "step": 3832 }, { "epoch": 0.9709943001899937, "grad_norm": 3.286870002746582, "learning_rate": 9.442104565918555e-06, "loss": 1.2033, "step": 3833 }, { "epoch": 0.9712476250791641, "grad_norm": 3.317532777786255, "learning_rate": 9.441719906694194e-06, "loss": 1.0545, "step": 3834 }, { "epoch": 0.9715009499683344, "grad_norm": 3.214738130569458, "learning_rate": 9.441335122748418e-06, "loss": 1.2286, "step": 3835 }, { "epoch": 0.9717542748575048, "grad_norm": 3.272801160812378, "learning_rate": 9.440950214092033e-06, "loss": 1.0557, "step": 3836 }, { "epoch": 0.9720075997466752, "grad_norm": 3.406696319580078, "learning_rate": 9.440565180735848e-06, "loss": 1.1168, "step": 3837 }, { "epoch": 0.9722609246358455, "grad_norm": 3.1612672805786133, "learning_rate": 9.440180022690674e-06, "loss": 1.1292, "step": 3838 }, { "epoch": 0.9725142495250159, "grad_norm": 3.3064706325531006, "learning_rate": 9.439794739967326e-06, "loss": 1.1617, "step": 3839 }, { "epoch": 0.9727675744141862, "grad_norm": 3.067596197128296, "learning_rate": 9.439409332576624e-06, "loss": 1.0532, "step": 3840 }, { "epoch": 0.9730208993033566, "grad_norm": 3.350423812866211, "learning_rate": 9.439023800529385e-06, "loss": 1.2133, "step": 3841 }, { "epoch": 0.973274224192527, "grad_norm": 3.269676446914673, "learning_rate": 9.43863814383644e-06, "loss": 1.0703, "step": 3842 }, { "epoch": 0.9735275490816973, "grad_norm": 3.0289134979248047, "learning_rate": 9.438252362508617e-06, "loss": 1.0969, "step": 3843 }, { "epoch": 0.9737808739708677, "grad_norm": 3.3679349422454834, "learning_rate": 9.437866456556747e-06, "loss": 1.1468, "step": 3844 }, { "epoch": 0.974034198860038, "grad_norm": 3.594999074935913, "learning_rate": 9.437480425991668e-06, "loss": 1.1801, "step": 3845 }, { "epoch": 0.9742875237492084, "grad_norm": 3.608299493789673, "learning_rate": 9.437094270824218e-06, "loss": 1.244, "step": 3846 }, { "epoch": 0.9745408486383788, "grad_norm": 3.7407684326171875, "learning_rate": 9.43670799106524e-06, "loss": 1.186, "step": 3847 }, { "epoch": 0.974794173527549, "grad_norm": 3.3501274585723877, "learning_rate": 9.43632158672558e-06, "loss": 1.1088, "step": 3848 }, { "epoch": 0.9750474984167194, "grad_norm": 3.390684127807617, "learning_rate": 9.43593505781609e-06, "loss": 1.2245, "step": 3849 }, { "epoch": 0.9753008233058899, "grad_norm": 3.3741683959960938, "learning_rate": 9.435548404347623e-06, "loss": 1.1435, "step": 3850 }, { "epoch": 0.9755541481950601, "grad_norm": 3.3280506134033203, "learning_rate": 9.435161626331034e-06, "loss": 1.037, "step": 3851 }, { "epoch": 0.9758074730842305, "grad_norm": 3.345827579498291, "learning_rate": 9.434774723777187e-06, "loss": 1.168, "step": 3852 }, { "epoch": 0.9760607979734008, "grad_norm": 3.2142157554626465, "learning_rate": 9.434387696696942e-06, "loss": 1.0419, "step": 3853 }, { "epoch": 0.9763141228625712, "grad_norm": 3.3951430320739746, "learning_rate": 9.434000545101172e-06, "loss": 1.1057, "step": 3854 }, { "epoch": 0.9765674477517416, "grad_norm": 3.477745294570923, "learning_rate": 9.433613269000743e-06, "loss": 1.1029, "step": 3855 }, { "epoch": 0.9768207726409119, "grad_norm": 3.197359800338745, "learning_rate": 9.433225868406531e-06, "loss": 1.0358, "step": 3856 }, { "epoch": 0.9770740975300823, "grad_norm": 3.2064270973205566, "learning_rate": 9.432838343329416e-06, "loss": 1.1123, "step": 3857 }, { "epoch": 0.9773274224192527, "grad_norm": 3.4815673828125, "learning_rate": 9.432450693780275e-06, "loss": 1.1419, "step": 3858 }, { "epoch": 0.977580747308423, "grad_norm": 2.9322943687438965, "learning_rate": 9.43206291977e-06, "loss": 1.0544, "step": 3859 }, { "epoch": 0.9778340721975934, "grad_norm": 3.3347134590148926, "learning_rate": 9.431675021309472e-06, "loss": 1.1945, "step": 3860 }, { "epoch": 0.9780873970867637, "grad_norm": 3.537545919418335, "learning_rate": 9.431286998409587e-06, "loss": 1.0261, "step": 3861 }, { "epoch": 0.9783407219759341, "grad_norm": 3.597611904144287, "learning_rate": 9.43089885108124e-06, "loss": 1.2227, "step": 3862 }, { "epoch": 0.9785940468651045, "grad_norm": 2.9854297637939453, "learning_rate": 9.430510579335331e-06, "loss": 1.0479, "step": 3863 }, { "epoch": 0.9788473717542748, "grad_norm": 3.229724645614624, "learning_rate": 9.43012218318276e-06, "loss": 1.0965, "step": 3864 }, { "epoch": 0.9791006966434452, "grad_norm": 3.2314443588256836, "learning_rate": 9.429733662634436e-06, "loss": 1.0474, "step": 3865 }, { "epoch": 0.9793540215326155, "grad_norm": 3.617342233657837, "learning_rate": 9.429345017701265e-06, "loss": 1.2463, "step": 3866 }, { "epoch": 0.9796073464217859, "grad_norm": 3.136746644973755, "learning_rate": 9.428956248394164e-06, "loss": 1.1454, "step": 3867 }, { "epoch": 0.9798606713109563, "grad_norm": 2.977908134460449, "learning_rate": 9.428567354724047e-06, "loss": 1.1663, "step": 3868 }, { "epoch": 0.9801139962001266, "grad_norm": 3.4196927547454834, "learning_rate": 9.428178336701833e-06, "loss": 1.0333, "step": 3869 }, { "epoch": 0.980367321089297, "grad_norm": 3.3801467418670654, "learning_rate": 9.427789194338447e-06, "loss": 1.0466, "step": 3870 }, { "epoch": 0.9806206459784674, "grad_norm": 3.2432374954223633, "learning_rate": 9.427399927644817e-06, "loss": 1.1126, "step": 3871 }, { "epoch": 0.9808739708676377, "grad_norm": 3.3430604934692383, "learning_rate": 9.42701053663187e-06, "loss": 1.3454, "step": 3872 }, { "epoch": 0.9811272957568081, "grad_norm": 3.461338520050049, "learning_rate": 9.426621021310542e-06, "loss": 0.9893, "step": 3873 }, { "epoch": 0.9813806206459784, "grad_norm": 3.28102707862854, "learning_rate": 9.42623138169177e-06, "loss": 1.087, "step": 3874 }, { "epoch": 0.9816339455351488, "grad_norm": 3.4910247325897217, "learning_rate": 9.425841617786498e-06, "loss": 1.1913, "step": 3875 }, { "epoch": 0.9818872704243192, "grad_norm": 3.680506706237793, "learning_rate": 9.425451729605665e-06, "loss": 1.234, "step": 3876 }, { "epoch": 0.9821405953134895, "grad_norm": 3.419874668121338, "learning_rate": 9.425061717160224e-06, "loss": 1.1164, "step": 3877 }, { "epoch": 0.9823939202026599, "grad_norm": 3.5245721340179443, "learning_rate": 9.424671580461122e-06, "loss": 1.213, "step": 3878 }, { "epoch": 0.9826472450918303, "grad_norm": 3.2231924533843994, "learning_rate": 9.424281319519315e-06, "loss": 1.1113, "step": 3879 }, { "epoch": 0.9829005699810006, "grad_norm": 3.562605857849121, "learning_rate": 9.423890934345765e-06, "loss": 1.2658, "step": 3880 }, { "epoch": 0.983153894870171, "grad_norm": 3.5420823097229004, "learning_rate": 9.423500424951428e-06, "loss": 1.1124, "step": 3881 }, { "epoch": 0.9834072197593413, "grad_norm": 3.412123680114746, "learning_rate": 9.423109791347274e-06, "loss": 1.0965, "step": 3882 }, { "epoch": 0.9836605446485117, "grad_norm": 3.1747586727142334, "learning_rate": 9.422719033544269e-06, "loss": 0.9925, "step": 3883 }, { "epoch": 0.9839138695376821, "grad_norm": 3.5493829250335693, "learning_rate": 9.422328151553388e-06, "loss": 1.0916, "step": 3884 }, { "epoch": 0.9841671944268524, "grad_norm": 3.297175168991089, "learning_rate": 9.421937145385604e-06, "loss": 1.0271, "step": 3885 }, { "epoch": 0.9844205193160228, "grad_norm": 3.282716989517212, "learning_rate": 9.4215460150519e-06, "loss": 1.0088, "step": 3886 }, { "epoch": 0.9846738442051931, "grad_norm": 3.3660576343536377, "learning_rate": 9.421154760563253e-06, "loss": 1.1502, "step": 3887 }, { "epoch": 0.9849271690943635, "grad_norm": 3.6032137870788574, "learning_rate": 9.420763381930653e-06, "loss": 1.1942, "step": 3888 }, { "epoch": 0.9851804939835339, "grad_norm": 3.5022025108337402, "learning_rate": 9.420371879165089e-06, "loss": 1.0985, "step": 3889 }, { "epoch": 0.9854338188727042, "grad_norm": 3.558393955230713, "learning_rate": 9.419980252277554e-06, "loss": 1.1885, "step": 3890 }, { "epoch": 0.9856871437618746, "grad_norm": 3.6664602756500244, "learning_rate": 9.419588501279047e-06, "loss": 1.247, "step": 3891 }, { "epoch": 0.985940468651045, "grad_norm": 3.4450783729553223, "learning_rate": 9.419196626180565e-06, "loss": 1.1347, "step": 3892 }, { "epoch": 0.9861937935402153, "grad_norm": 3.165170192718506, "learning_rate": 9.418804626993111e-06, "loss": 1.1621, "step": 3893 }, { "epoch": 0.9864471184293857, "grad_norm": 3.789806842803955, "learning_rate": 9.418412503727697e-06, "loss": 1.2786, "step": 3894 }, { "epoch": 0.986700443318556, "grad_norm": 3.4419732093811035, "learning_rate": 9.41802025639533e-06, "loss": 1.2254, "step": 3895 }, { "epoch": 0.9869537682077264, "grad_norm": 3.2875359058380127, "learning_rate": 9.417627885007025e-06, "loss": 1.1569, "step": 3896 }, { "epoch": 0.9872070930968968, "grad_norm": 2.901073455810547, "learning_rate": 9.4172353895738e-06, "loss": 1.0742, "step": 3897 }, { "epoch": 0.9874604179860671, "grad_norm": 3.3430562019348145, "learning_rate": 9.416842770106673e-06, "loss": 1.1992, "step": 3898 }, { "epoch": 0.9877137428752375, "grad_norm": 3.46517014503479, "learning_rate": 9.416450026616672e-06, "loss": 1.139, "step": 3899 }, { "epoch": 0.9879670677644079, "grad_norm": 3.2958526611328125, "learning_rate": 9.416057159114826e-06, "loss": 1.283, "step": 3900 }, { "epoch": 0.9882203926535782, "grad_norm": 3.2788097858428955, "learning_rate": 9.415664167612164e-06, "loss": 1.1059, "step": 3901 }, { "epoch": 0.9884737175427486, "grad_norm": 3.61752986907959, "learning_rate": 9.415271052119721e-06, "loss": 1.1303, "step": 3902 }, { "epoch": 0.9887270424319189, "grad_norm": 3.313946485519409, "learning_rate": 9.414877812648535e-06, "loss": 1.1279, "step": 3903 }, { "epoch": 0.9889803673210893, "grad_norm": 3.685398817062378, "learning_rate": 9.414484449209652e-06, "loss": 1.1137, "step": 3904 }, { "epoch": 0.9892336922102597, "grad_norm": 3.203425168991089, "learning_rate": 9.414090961814114e-06, "loss": 1.1434, "step": 3905 }, { "epoch": 0.98948701709943, "grad_norm": 3.2387688159942627, "learning_rate": 9.413697350472968e-06, "loss": 1.1431, "step": 3906 }, { "epoch": 0.9897403419886004, "grad_norm": 3.46217942237854, "learning_rate": 9.413303615197272e-06, "loss": 1.166, "step": 3907 }, { "epoch": 0.9899936668777708, "grad_norm": 2.9703526496887207, "learning_rate": 9.412909755998077e-06, "loss": 1.0074, "step": 3908 }, { "epoch": 0.9902469917669411, "grad_norm": 3.5861029624938965, "learning_rate": 9.412515772886446e-06, "loss": 1.1414, "step": 3909 }, { "epoch": 0.9905003166561115, "grad_norm": 3.4567534923553467, "learning_rate": 9.412121665873437e-06, "loss": 1.2878, "step": 3910 }, { "epoch": 0.9907536415452818, "grad_norm": 3.2463295459747314, "learning_rate": 9.411727434970121e-06, "loss": 1.1724, "step": 3911 }, { "epoch": 0.9910069664344522, "grad_norm": 3.3823342323303223, "learning_rate": 9.411333080187568e-06, "loss": 1.267, "step": 3912 }, { "epoch": 0.9912602913236226, "grad_norm": 3.239588499069214, "learning_rate": 9.410938601536848e-06, "loss": 1.0305, "step": 3913 }, { "epoch": 0.9915136162127929, "grad_norm": 3.7073850631713867, "learning_rate": 9.41054399902904e-06, "loss": 1.2407, "step": 3914 }, { "epoch": 0.9917669411019633, "grad_norm": 3.311387538909912, "learning_rate": 9.410149272675224e-06, "loss": 1.0787, "step": 3915 }, { "epoch": 0.9920202659911336, "grad_norm": 3.822373390197754, "learning_rate": 9.409754422486482e-06, "loss": 1.2348, "step": 3916 }, { "epoch": 0.992273590880304, "grad_norm": 3.1513609886169434, "learning_rate": 9.409359448473904e-06, "loss": 1.1415, "step": 3917 }, { "epoch": 0.9925269157694744, "grad_norm": 3.7293031215667725, "learning_rate": 9.40896435064858e-06, "loss": 1.4051, "step": 3918 }, { "epoch": 0.9927802406586447, "grad_norm": 3.0820281505584717, "learning_rate": 9.4085691290216e-06, "loss": 1.0717, "step": 3919 }, { "epoch": 0.9930335655478151, "grad_norm": 3.3282504081726074, "learning_rate": 9.408173783604068e-06, "loss": 1.1197, "step": 3920 }, { "epoch": 0.9932868904369855, "grad_norm": 3.198674201965332, "learning_rate": 9.407778314407081e-06, "loss": 1.1, "step": 3921 }, { "epoch": 0.9935402153261558, "grad_norm": 3.5387020111083984, "learning_rate": 9.407382721441744e-06, "loss": 1.1417, "step": 3922 }, { "epoch": 0.9937935402153262, "grad_norm": 3.6065056324005127, "learning_rate": 9.406987004719169e-06, "loss": 1.232, "step": 3923 }, { "epoch": 0.9940468651044965, "grad_norm": 3.1034014225006104, "learning_rate": 9.406591164250462e-06, "loss": 1.0804, "step": 3924 }, { "epoch": 0.9943001899936669, "grad_norm": 3.61995005607605, "learning_rate": 9.406195200046739e-06, "loss": 1.2058, "step": 3925 }, { "epoch": 0.9945535148828373, "grad_norm": 3.545271635055542, "learning_rate": 9.405799112119123e-06, "loss": 1.1187, "step": 3926 }, { "epoch": 0.9948068397720076, "grad_norm": 3.5722460746765137, "learning_rate": 9.405402900478731e-06, "loss": 1.2531, "step": 3927 }, { "epoch": 0.995060164661178, "grad_norm": 3.2239632606506348, "learning_rate": 9.40500656513669e-06, "loss": 1.1244, "step": 3928 }, { "epoch": 0.9953134895503484, "grad_norm": 3.4617607593536377, "learning_rate": 9.404610106104131e-06, "loss": 1.1761, "step": 3929 }, { "epoch": 0.9955668144395187, "grad_norm": 3.228865146636963, "learning_rate": 9.404213523392183e-06, "loss": 1.015, "step": 3930 }, { "epoch": 0.9958201393286891, "grad_norm": 3.3742833137512207, "learning_rate": 9.403816817011984e-06, "loss": 1.1929, "step": 3931 }, { "epoch": 0.9960734642178594, "grad_norm": 3.2746987342834473, "learning_rate": 9.403419986974671e-06, "loss": 1.039, "step": 3932 }, { "epoch": 0.9963267891070298, "grad_norm": 3.239696502685547, "learning_rate": 9.40302303329139e-06, "loss": 1.0192, "step": 3933 }, { "epoch": 0.9965801139962002, "grad_norm": 3.384113073348999, "learning_rate": 9.402625955973286e-06, "loss": 0.9779, "step": 3934 }, { "epoch": 0.9968334388853705, "grad_norm": 2.995961904525757, "learning_rate": 9.402228755031508e-06, "loss": 1.0088, "step": 3935 }, { "epoch": 0.9970867637745409, "grad_norm": 3.698370933532715, "learning_rate": 9.401831430477211e-06, "loss": 1.2532, "step": 3936 }, { "epoch": 0.9973400886637112, "grad_norm": 3.454690933227539, "learning_rate": 9.40143398232155e-06, "loss": 1.1475, "step": 3937 }, { "epoch": 0.9975934135528816, "grad_norm": 3.397371292114258, "learning_rate": 9.401036410575686e-06, "loss": 1.1621, "step": 3938 }, { "epoch": 0.997846738442052, "grad_norm": 3.0821456909179688, "learning_rate": 9.40063871525078e-06, "loss": 1.0157, "step": 3939 }, { "epoch": 0.9981000633312223, "grad_norm": 3.279803991317749, "learning_rate": 9.400240896358003e-06, "loss": 1.1192, "step": 3940 }, { "epoch": 0.9983533882203927, "grad_norm": 3.269456624984741, "learning_rate": 9.399842953908525e-06, "loss": 1.19, "step": 3941 }, { "epoch": 0.9986067131095631, "grad_norm": 3.564135789871216, "learning_rate": 9.399444887913517e-06, "loss": 1.0596, "step": 3942 }, { "epoch": 0.9988600379987334, "grad_norm": 3.460651397705078, "learning_rate": 9.39904669838416e-06, "loss": 1.1363, "step": 3943 }, { "epoch": 0.9991133628879038, "grad_norm": 3.2880966663360596, "learning_rate": 9.398648385331632e-06, "loss": 1.1396, "step": 3944 }, { "epoch": 0.999366687777074, "grad_norm": 3.545489549636841, "learning_rate": 9.39824994876712e-06, "loss": 1.1993, "step": 3945 }, { "epoch": 0.9996200126662445, "grad_norm": 3.4143259525299072, "learning_rate": 9.397851388701811e-06, "loss": 1.042, "step": 3946 }, { "epoch": 0.9998733375554149, "grad_norm": 3.38020920753479, "learning_rate": 9.397452705146895e-06, "loss": 1.2462, "step": 3947 }, { "epoch": 1.0001266624445853, "grad_norm": 3.2661054134368896, "learning_rate": 9.397053898113569e-06, "loss": 1.0958, "step": 3948 }, { "epoch": 1.0003799873337555, "grad_norm": 3.2052109241485596, "learning_rate": 9.39665496761303e-06, "loss": 0.6504, "step": 3949 }, { "epoch": 1.0006333122229258, "grad_norm": 3.3197989463806152, "learning_rate": 9.39625591365648e-06, "loss": 0.7845, "step": 3950 }, { "epoch": 1.0008866371120964, "grad_norm": 3.427189588546753, "learning_rate": 9.395856736255125e-06, "loss": 0.9611, "step": 3951 }, { "epoch": 1.0011399620012666, "grad_norm": 3.1614127159118652, "learning_rate": 9.395457435420172e-06, "loss": 0.7962, "step": 3952 }, { "epoch": 1.001393286890437, "grad_norm": 3.2238099575042725, "learning_rate": 9.395058011162835e-06, "loss": 0.9196, "step": 3953 }, { "epoch": 1.0016466117796075, "grad_norm": 2.831684112548828, "learning_rate": 9.394658463494328e-06, "loss": 0.7302, "step": 3954 }, { "epoch": 1.0018999366687777, "grad_norm": 3.56166934967041, "learning_rate": 9.394258792425873e-06, "loss": 0.8171, "step": 3955 }, { "epoch": 1.002153261557948, "grad_norm": 3.648761510848999, "learning_rate": 9.393858997968687e-06, "loss": 0.8623, "step": 3956 }, { "epoch": 1.0024065864471183, "grad_norm": 3.7296135425567627, "learning_rate": 9.393459080134003e-06, "loss": 0.7937, "step": 3957 }, { "epoch": 1.0026599113362888, "grad_norm": 3.5527822971343994, "learning_rate": 9.393059038933046e-06, "loss": 0.7415, "step": 3958 }, { "epoch": 1.0029132362254591, "grad_norm": 4.277530193328857, "learning_rate": 9.39265887437705e-06, "loss": 0.9787, "step": 3959 }, { "epoch": 1.0031665611146294, "grad_norm": 4.242221832275391, "learning_rate": 9.392258586477252e-06, "loss": 0.9013, "step": 3960 }, { "epoch": 1.0034198860038, "grad_norm": 3.581080675125122, "learning_rate": 9.391858175244892e-06, "loss": 0.722, "step": 3961 }, { "epoch": 1.0036732108929702, "grad_norm": 3.8707706928253174, "learning_rate": 9.391457640691212e-06, "loss": 0.7524, "step": 3962 }, { "epoch": 1.0039265357821405, "grad_norm": 4.110296726226807, "learning_rate": 9.39105698282746e-06, "loss": 0.9165, "step": 3963 }, { "epoch": 1.004179860671311, "grad_norm": 3.7063021659851074, "learning_rate": 9.390656201664885e-06, "loss": 0.7811, "step": 3964 }, { "epoch": 1.0044331855604813, "grad_norm": 3.5401947498321533, "learning_rate": 9.390255297214743e-06, "loss": 0.7897, "step": 3965 }, { "epoch": 1.0046865104496516, "grad_norm": 3.8825721740722656, "learning_rate": 9.389854269488288e-06, "loss": 0.9127, "step": 3966 }, { "epoch": 1.0049398353388221, "grad_norm": 3.7624917030334473, "learning_rate": 9.389453118496784e-06, "loss": 0.7913, "step": 3967 }, { "epoch": 1.0051931602279924, "grad_norm": 3.513160228729248, "learning_rate": 9.389051844251493e-06, "loss": 0.8007, "step": 3968 }, { "epoch": 1.0054464851171627, "grad_norm": 3.7435648441314697, "learning_rate": 9.388650446763685e-06, "loss": 0.8864, "step": 3969 }, { "epoch": 1.005699810006333, "grad_norm": 3.542984962463379, "learning_rate": 9.388248926044627e-06, "loss": 0.7748, "step": 3970 }, { "epoch": 1.0059531348955035, "grad_norm": 3.4242076873779297, "learning_rate": 9.387847282105597e-06, "loss": 0.7556, "step": 3971 }, { "epoch": 1.0062064597846738, "grad_norm": 3.671903610229492, "learning_rate": 9.387445514957872e-06, "loss": 0.8695, "step": 3972 }, { "epoch": 1.0064597846738441, "grad_norm": 3.238417625427246, "learning_rate": 9.387043624612733e-06, "loss": 0.8229, "step": 3973 }, { "epoch": 1.0067131095630146, "grad_norm": 3.5176565647125244, "learning_rate": 9.386641611081464e-06, "loss": 0.8807, "step": 3974 }, { "epoch": 1.006966434452185, "grad_norm": 3.416443347930908, "learning_rate": 9.386239474375354e-06, "loss": 0.7816, "step": 3975 }, { "epoch": 1.0072197593413552, "grad_norm": 3.7881276607513428, "learning_rate": 9.385837214505697e-06, "loss": 0.898, "step": 3976 }, { "epoch": 1.0074730842305257, "grad_norm": 4.878559112548828, "learning_rate": 9.385434831483787e-06, "loss": 0.7363, "step": 3977 }, { "epoch": 1.007726409119696, "grad_norm": 3.616326093673706, "learning_rate": 9.385032325320921e-06, "loss": 0.8663, "step": 3978 }, { "epoch": 1.0079797340088663, "grad_norm": 3.512878179550171, "learning_rate": 9.384629696028404e-06, "loss": 0.7816, "step": 3979 }, { "epoch": 1.0082330588980368, "grad_norm": 3.7067666053771973, "learning_rate": 9.384226943617538e-06, "loss": 0.7679, "step": 3980 }, { "epoch": 1.0084863837872071, "grad_norm": 3.615403413772583, "learning_rate": 9.383824068099637e-06, "loss": 0.7131, "step": 3981 }, { "epoch": 1.0087397086763774, "grad_norm": 3.7785820960998535, "learning_rate": 9.383421069486009e-06, "loss": 0.895, "step": 3982 }, { "epoch": 1.008993033565548, "grad_norm": 3.4927902221679688, "learning_rate": 9.383017947787972e-06, "loss": 0.8078, "step": 3983 }, { "epoch": 1.0092463584547182, "grad_norm": 3.422529458999634, "learning_rate": 9.382614703016845e-06, "loss": 0.7221, "step": 3984 }, { "epoch": 1.0094996833438885, "grad_norm": 3.8402881622314453, "learning_rate": 9.382211335183951e-06, "loss": 0.9036, "step": 3985 }, { "epoch": 1.0097530082330588, "grad_norm": 3.8119280338287354, "learning_rate": 9.381807844300617e-06, "loss": 0.7648, "step": 3986 }, { "epoch": 1.0100063331222293, "grad_norm": 3.590049982070923, "learning_rate": 9.381404230378171e-06, "loss": 0.7746, "step": 3987 }, { "epoch": 1.0102596580113996, "grad_norm": 3.670738935470581, "learning_rate": 9.38100049342795e-06, "loss": 0.8161, "step": 3988 }, { "epoch": 1.01051298290057, "grad_norm": 3.62028431892395, "learning_rate": 9.380596633461288e-06, "loss": 0.7307, "step": 3989 }, { "epoch": 1.0107663077897404, "grad_norm": 3.922258138656616, "learning_rate": 9.380192650489523e-06, "loss": 0.9206, "step": 3990 }, { "epoch": 1.0110196326789107, "grad_norm": 3.51464581489563, "learning_rate": 9.379788544524004e-06, "loss": 0.825, "step": 3991 }, { "epoch": 1.011272957568081, "grad_norm": 3.8684113025665283, "learning_rate": 9.379384315576075e-06, "loss": 0.9172, "step": 3992 }, { "epoch": 1.0115262824572515, "grad_norm": 3.779548406600952, "learning_rate": 9.378979963657087e-06, "loss": 0.7806, "step": 3993 }, { "epoch": 1.0117796073464218, "grad_norm": 3.2644236087799072, "learning_rate": 9.378575488778392e-06, "loss": 0.8051, "step": 3994 }, { "epoch": 1.012032932235592, "grad_norm": 3.8024179935455322, "learning_rate": 9.378170890951352e-06, "loss": 0.8841, "step": 3995 }, { "epoch": 1.0122862571247626, "grad_norm": 3.60563588142395, "learning_rate": 9.377766170187324e-06, "loss": 0.7384, "step": 3996 }, { "epoch": 1.012539582013933, "grad_norm": 3.503539562225342, "learning_rate": 9.377361326497673e-06, "loss": 0.7236, "step": 3997 }, { "epoch": 1.0127929069031032, "grad_norm": 3.932176113128662, "learning_rate": 9.376956359893769e-06, "loss": 0.8586, "step": 3998 }, { "epoch": 1.0130462317922735, "grad_norm": 3.7912957668304443, "learning_rate": 9.376551270386983e-06, "loss": 0.8211, "step": 3999 }, { "epoch": 1.013299556681444, "grad_norm": 3.5588326454162598, "learning_rate": 9.376146057988686e-06, "loss": 0.7828, "step": 4000 }, { "epoch": 1.013299556681444, "eval_loss": 1.1961894035339355, "eval_runtime": 13.9493, "eval_samples_per_second": 28.675, "eval_steps_per_second": 3.584, "step": 4000 }, { "epoch": 1.0135528815706143, "grad_norm": 3.7038869857788086, "learning_rate": 9.375740722710257e-06, "loss": 0.8325, "step": 4001 }, { "epoch": 1.0138062064597846, "grad_norm": 3.4084620475769043, "learning_rate": 9.375335264563083e-06, "loss": 0.6979, "step": 4002 }, { "epoch": 1.014059531348955, "grad_norm": 3.8237576484680176, "learning_rate": 9.374929683558545e-06, "loss": 0.8107, "step": 4003 }, { "epoch": 1.0143128562381254, "grad_norm": 3.8058247566223145, "learning_rate": 9.37452397970803e-06, "loss": 0.7667, "step": 4004 }, { "epoch": 1.0145661811272957, "grad_norm": 3.383089065551758, "learning_rate": 9.374118153022934e-06, "loss": 0.7403, "step": 4005 }, { "epoch": 1.0148195060164662, "grad_norm": 4.054951190948486, "learning_rate": 9.373712203514649e-06, "loss": 0.8392, "step": 4006 }, { "epoch": 1.0150728309056365, "grad_norm": 3.5813748836517334, "learning_rate": 9.373306131194575e-06, "loss": 0.8302, "step": 4007 }, { "epoch": 1.0153261557948068, "grad_norm": 3.783907651901245, "learning_rate": 9.372899936074117e-06, "loss": 0.7773, "step": 4008 }, { "epoch": 1.0155794806839773, "grad_norm": 3.4219818115234375, "learning_rate": 9.372493618164675e-06, "loss": 0.7994, "step": 4009 }, { "epoch": 1.0158328055731476, "grad_norm": 3.5228030681610107, "learning_rate": 9.372087177477662e-06, "loss": 0.7912, "step": 4010 }, { "epoch": 1.0160861304623179, "grad_norm": 3.52055025100708, "learning_rate": 9.371680614024493e-06, "loss": 0.6163, "step": 4011 }, { "epoch": 1.0163394553514882, "grad_norm": 3.9310102462768555, "learning_rate": 9.371273927816577e-06, "loss": 0.8431, "step": 4012 }, { "epoch": 1.0165927802406587, "grad_norm": 3.408386468887329, "learning_rate": 9.370867118865341e-06, "loss": 0.7651, "step": 4013 }, { "epoch": 1.016846105129829, "grad_norm": 3.851137161254883, "learning_rate": 9.370460187182204e-06, "loss": 0.8768, "step": 4014 }, { "epoch": 1.0170994300189993, "grad_norm": 3.3667516708374023, "learning_rate": 9.370053132778594e-06, "loss": 0.7137, "step": 4015 }, { "epoch": 1.0173527549081698, "grad_norm": 4.034343719482422, "learning_rate": 9.369645955665938e-06, "loss": 0.973, "step": 4016 }, { "epoch": 1.01760607979734, "grad_norm": 3.683751344680786, "learning_rate": 9.369238655855672e-06, "loss": 0.7618, "step": 4017 }, { "epoch": 1.0178594046865104, "grad_norm": 4.034209728240967, "learning_rate": 9.368831233359234e-06, "loss": 0.8932, "step": 4018 }, { "epoch": 1.0181127295756809, "grad_norm": 3.8687524795532227, "learning_rate": 9.368423688188062e-06, "loss": 0.9488, "step": 4019 }, { "epoch": 1.0183660544648512, "grad_norm": 4.022247791290283, "learning_rate": 9.3680160203536e-06, "loss": 0.8182, "step": 4020 }, { "epoch": 1.0186193793540215, "grad_norm": 3.3809804916381836, "learning_rate": 9.367608229867295e-06, "loss": 0.7617, "step": 4021 }, { "epoch": 1.018872704243192, "grad_norm": 3.5665273666381836, "learning_rate": 9.367200316740597e-06, "loss": 0.7599, "step": 4022 }, { "epoch": 1.0191260291323623, "grad_norm": 3.5940616130828857, "learning_rate": 9.366792280984964e-06, "loss": 0.7481, "step": 4023 }, { "epoch": 1.0193793540215326, "grad_norm": 3.7060680389404297, "learning_rate": 9.366384122611846e-06, "loss": 0.8695, "step": 4024 }, { "epoch": 1.019632678910703, "grad_norm": 3.90010142326355, "learning_rate": 9.365975841632712e-06, "loss": 0.7519, "step": 4025 }, { "epoch": 1.0198860037998734, "grad_norm": 3.875436544418335, "learning_rate": 9.365567438059022e-06, "loss": 0.7557, "step": 4026 }, { "epoch": 1.0201393286890437, "grad_norm": 3.612962245941162, "learning_rate": 9.365158911902244e-06, "loss": 0.8537, "step": 4027 }, { "epoch": 1.020392653578214, "grad_norm": 3.8356757164001465, "learning_rate": 9.364750263173849e-06, "loss": 0.816, "step": 4028 }, { "epoch": 1.0206459784673845, "grad_norm": 3.585089683532715, "learning_rate": 9.364341491885312e-06, "loss": 0.7298, "step": 4029 }, { "epoch": 1.0208993033565548, "grad_norm": 3.722064971923828, "learning_rate": 9.363932598048112e-06, "loss": 0.7424, "step": 4030 }, { "epoch": 1.021152628245725, "grad_norm": 3.741924285888672, "learning_rate": 9.363523581673731e-06, "loss": 0.7275, "step": 4031 }, { "epoch": 1.0214059531348956, "grad_norm": 4.047133922576904, "learning_rate": 9.363114442773651e-06, "loss": 0.9372, "step": 4032 }, { "epoch": 1.0216592780240659, "grad_norm": 3.6290786266326904, "learning_rate": 9.362705181359362e-06, "loss": 0.716, "step": 4033 }, { "epoch": 1.0219126029132362, "grad_norm": 3.848945379257202, "learning_rate": 9.362295797442358e-06, "loss": 0.7539, "step": 4034 }, { "epoch": 1.0221659278024067, "grad_norm": 3.30218505859375, "learning_rate": 9.361886291034132e-06, "loss": 0.6905, "step": 4035 }, { "epoch": 1.022419252691577, "grad_norm": 3.9459948539733887, "learning_rate": 9.361476662146183e-06, "loss": 0.6884, "step": 4036 }, { "epoch": 1.0226725775807473, "grad_norm": 3.8152194023132324, "learning_rate": 9.361066910790013e-06, "loss": 0.8528, "step": 4037 }, { "epoch": 1.0229259024699178, "grad_norm": 3.5387020111083984, "learning_rate": 9.36065703697713e-06, "loss": 0.8407, "step": 4038 }, { "epoch": 1.023179227359088, "grad_norm": 3.615628242492676, "learning_rate": 9.36024704071904e-06, "loss": 0.7512, "step": 4039 }, { "epoch": 1.0234325522482584, "grad_norm": 3.804381847381592, "learning_rate": 9.359836922027255e-06, "loss": 0.8224, "step": 4040 }, { "epoch": 1.0236858771374286, "grad_norm": 3.6101276874542236, "learning_rate": 9.359426680913295e-06, "loss": 0.7711, "step": 4041 }, { "epoch": 1.0239392020265992, "grad_norm": 3.5813560485839844, "learning_rate": 9.359016317388677e-06, "loss": 0.9614, "step": 4042 }, { "epoch": 1.0241925269157695, "grad_norm": 3.7858824729919434, "learning_rate": 9.358605831464921e-06, "loss": 0.7475, "step": 4043 }, { "epoch": 1.0244458518049397, "grad_norm": 3.810811996459961, "learning_rate": 9.358195223153558e-06, "loss": 0.862, "step": 4044 }, { "epoch": 1.0246991766941103, "grad_norm": 3.565434455871582, "learning_rate": 9.357784492466116e-06, "loss": 0.798, "step": 4045 }, { "epoch": 1.0249525015832806, "grad_norm": 3.3674070835113525, "learning_rate": 9.357373639414127e-06, "loss": 0.816, "step": 4046 }, { "epoch": 1.0252058264724508, "grad_norm": 3.9376728534698486, "learning_rate": 9.356962664009129e-06, "loss": 0.7253, "step": 4047 }, { "epoch": 1.0254591513616214, "grad_norm": 4.085208892822266, "learning_rate": 9.356551566262661e-06, "loss": 0.7994, "step": 4048 }, { "epoch": 1.0257124762507916, "grad_norm": 3.700139045715332, "learning_rate": 9.356140346186266e-06, "loss": 0.7552, "step": 4049 }, { "epoch": 1.025965801139962, "grad_norm": 3.817131280899048, "learning_rate": 9.355729003791494e-06, "loss": 0.7647, "step": 4050 }, { "epoch": 1.0262191260291325, "grad_norm": 3.4104766845703125, "learning_rate": 9.35531753908989e-06, "loss": 0.6676, "step": 4051 }, { "epoch": 1.0264724509183027, "grad_norm": 3.7993674278259277, "learning_rate": 9.354905952093015e-06, "loss": 0.8102, "step": 4052 }, { "epoch": 1.026725775807473, "grad_norm": 3.708205223083496, "learning_rate": 9.354494242812418e-06, "loss": 0.7942, "step": 4053 }, { "epoch": 1.0269791006966436, "grad_norm": 4.100223064422607, "learning_rate": 9.354082411259664e-06, "loss": 0.8309, "step": 4054 }, { "epoch": 1.0272324255858138, "grad_norm": 3.8497891426086426, "learning_rate": 9.353670457446318e-06, "loss": 0.8815, "step": 4055 }, { "epoch": 1.0274857504749841, "grad_norm": 3.486553430557251, "learning_rate": 9.353258381383944e-06, "loss": 0.7857, "step": 4056 }, { "epoch": 1.0277390753641544, "grad_norm": 3.7564120292663574, "learning_rate": 9.352846183084119e-06, "loss": 0.7394, "step": 4057 }, { "epoch": 1.027992400253325, "grad_norm": 3.5400476455688477, "learning_rate": 9.35243386255841e-06, "loss": 0.7344, "step": 4058 }, { "epoch": 1.0282457251424952, "grad_norm": 3.2935454845428467, "learning_rate": 9.352021419818398e-06, "loss": 0.7182, "step": 4059 }, { "epoch": 1.0284990500316655, "grad_norm": 3.8735382556915283, "learning_rate": 9.351608854875665e-06, "loss": 0.7742, "step": 4060 }, { "epoch": 1.028752374920836, "grad_norm": 3.4187848567962646, "learning_rate": 9.351196167741796e-06, "loss": 0.7383, "step": 4061 }, { "epoch": 1.0290056998100063, "grad_norm": 3.6276071071624756, "learning_rate": 9.350783358428375e-06, "loss": 0.7414, "step": 4062 }, { "epoch": 1.0292590246991766, "grad_norm": 4.093981742858887, "learning_rate": 9.350370426946998e-06, "loss": 0.7992, "step": 4063 }, { "epoch": 1.0295123495883471, "grad_norm": 4.762815952301025, "learning_rate": 9.349957373309259e-06, "loss": 0.8498, "step": 4064 }, { "epoch": 1.0297656744775174, "grad_norm": 3.806854009628296, "learning_rate": 9.349544197526755e-06, "loss": 0.7386, "step": 4065 }, { "epoch": 1.0300189993666877, "grad_norm": 3.7432122230529785, "learning_rate": 9.349130899611088e-06, "loss": 0.8265, "step": 4066 }, { "epoch": 1.0302723242558582, "grad_norm": 4.310154914855957, "learning_rate": 9.348717479573865e-06, "loss": 0.8894, "step": 4067 }, { "epoch": 1.0305256491450285, "grad_norm": 3.9261465072631836, "learning_rate": 9.348303937426692e-06, "loss": 0.9846, "step": 4068 }, { "epoch": 1.0307789740341988, "grad_norm": 4.200754642486572, "learning_rate": 9.347890273181183e-06, "loss": 0.8683, "step": 4069 }, { "epoch": 1.0310322989233691, "grad_norm": 3.166792631149292, "learning_rate": 9.347476486848954e-06, "loss": 0.7617, "step": 4070 }, { "epoch": 1.0312856238125396, "grad_norm": 3.516883611679077, "learning_rate": 9.347062578441622e-06, "loss": 0.7223, "step": 4071 }, { "epoch": 1.03153894870171, "grad_norm": 3.4659571647644043, "learning_rate": 9.346648547970809e-06, "loss": 0.7658, "step": 4072 }, { "epoch": 1.0317922735908802, "grad_norm": 3.5516104698181152, "learning_rate": 9.346234395448143e-06, "loss": 0.9041, "step": 4073 }, { "epoch": 1.0320455984800507, "grad_norm": 3.813924789428711, "learning_rate": 9.345820120885252e-06, "loss": 0.8464, "step": 4074 }, { "epoch": 1.032298923369221, "grad_norm": 4.0255937576293945, "learning_rate": 9.345405724293767e-06, "loss": 0.9182, "step": 4075 }, { "epoch": 1.0325522482583913, "grad_norm": 3.270711898803711, "learning_rate": 9.34499120568533e-06, "loss": 0.644, "step": 4076 }, { "epoch": 1.0328055731475618, "grad_norm": 3.5418875217437744, "learning_rate": 9.344576565071572e-06, "loss": 0.7468, "step": 4077 }, { "epoch": 1.0330588980367321, "grad_norm": 3.934638738632202, "learning_rate": 9.344161802464143e-06, "loss": 0.7684, "step": 4078 }, { "epoch": 1.0333122229259024, "grad_norm": 3.533428430557251, "learning_rate": 9.343746917874686e-06, "loss": 0.8012, "step": 4079 }, { "epoch": 1.033565547815073, "grad_norm": 3.596932888031006, "learning_rate": 9.34333191131485e-06, "loss": 0.7605, "step": 4080 }, { "epoch": 1.0338188727042432, "grad_norm": 3.7631914615631104, "learning_rate": 9.342916782796291e-06, "loss": 0.8154, "step": 4081 }, { "epoch": 1.0340721975934135, "grad_norm": 3.557291030883789, "learning_rate": 9.342501532330666e-06, "loss": 0.8532, "step": 4082 }, { "epoch": 1.0343255224825838, "grad_norm": 3.152371406555176, "learning_rate": 9.342086159929629e-06, "loss": 0.7563, "step": 4083 }, { "epoch": 1.0345788473717543, "grad_norm": 3.6007800102233887, "learning_rate": 9.34167066560485e-06, "loss": 0.647, "step": 4084 }, { "epoch": 1.0348321722609246, "grad_norm": 3.5349671840667725, "learning_rate": 9.341255049367994e-06, "loss": 0.6733, "step": 4085 }, { "epoch": 1.035085497150095, "grad_norm": 3.530944347381592, "learning_rate": 9.34083931123073e-06, "loss": 0.7737, "step": 4086 }, { "epoch": 1.0353388220392654, "grad_norm": 3.9375393390655518, "learning_rate": 9.340423451204733e-06, "loss": 0.857, "step": 4087 }, { "epoch": 1.0355921469284357, "grad_norm": 3.490175247192383, "learning_rate": 9.34000746930168e-06, "loss": 0.7594, "step": 4088 }, { "epoch": 1.035845471817606, "grad_norm": 3.2738397121429443, "learning_rate": 9.33959136553325e-06, "loss": 0.7471, "step": 4089 }, { "epoch": 1.0360987967067765, "grad_norm": 3.5194242000579834, "learning_rate": 9.33917513991113e-06, "loss": 0.6387, "step": 4090 }, { "epoch": 1.0363521215959468, "grad_norm": 3.405498504638672, "learning_rate": 9.338758792447005e-06, "loss": 0.7327, "step": 4091 }, { "epoch": 1.036605446485117, "grad_norm": 4.062540531158447, "learning_rate": 9.338342323152566e-06, "loss": 0.8626, "step": 4092 }, { "epoch": 1.0368587713742876, "grad_norm": 3.594367742538452, "learning_rate": 9.337925732039508e-06, "loss": 0.7588, "step": 4093 }, { "epoch": 1.037112096263458, "grad_norm": 3.48313307762146, "learning_rate": 9.337509019119529e-06, "loss": 0.7734, "step": 4094 }, { "epoch": 1.0373654211526282, "grad_norm": 3.8747398853302, "learning_rate": 9.33709218440433e-06, "loss": 0.7602, "step": 4095 }, { "epoch": 1.0376187460417987, "grad_norm": 3.6556296348571777, "learning_rate": 9.336675227905614e-06, "loss": 0.7492, "step": 4096 }, { "epoch": 1.037872070930969, "grad_norm": 3.5307297706604004, "learning_rate": 9.336258149635091e-06, "loss": 0.6898, "step": 4097 }, { "epoch": 1.0381253958201393, "grad_norm": 3.6485016345977783, "learning_rate": 9.33584094960447e-06, "loss": 0.8027, "step": 4098 }, { "epoch": 1.0383787207093096, "grad_norm": 3.7099509239196777, "learning_rate": 9.33542362782547e-06, "loss": 0.8457, "step": 4099 }, { "epoch": 1.03863204559848, "grad_norm": 3.612884283065796, "learning_rate": 9.335006184309805e-06, "loss": 0.7799, "step": 4100 }, { "epoch": 1.0388853704876504, "grad_norm": 3.448322057723999, "learning_rate": 9.334588619069197e-06, "loss": 0.7126, "step": 4101 }, { "epoch": 1.0391386953768207, "grad_norm": 3.669529914855957, "learning_rate": 9.334170932115375e-06, "loss": 0.6772, "step": 4102 }, { "epoch": 1.0393920202659912, "grad_norm": 3.4435253143310547, "learning_rate": 9.333753123460061e-06, "loss": 0.6991, "step": 4103 }, { "epoch": 1.0396453451551615, "grad_norm": 3.9661593437194824, "learning_rate": 9.333335193114992e-06, "loss": 0.8298, "step": 4104 }, { "epoch": 1.0398986700443318, "grad_norm": 3.6540608406066895, "learning_rate": 9.332917141091903e-06, "loss": 0.7116, "step": 4105 }, { "epoch": 1.0401519949335023, "grad_norm": 3.6850311756134033, "learning_rate": 9.33249896740253e-06, "loss": 0.6842, "step": 4106 }, { "epoch": 1.0404053198226726, "grad_norm": 3.594804286956787, "learning_rate": 9.332080672058617e-06, "loss": 0.708, "step": 4107 }, { "epoch": 1.0406586447118429, "grad_norm": 3.6079134941101074, "learning_rate": 9.331662255071909e-06, "loss": 0.8033, "step": 4108 }, { "epoch": 1.0409119696010134, "grad_norm": 4.036489963531494, "learning_rate": 9.331243716454154e-06, "loss": 0.9127, "step": 4109 }, { "epoch": 1.0411652944901837, "grad_norm": 4.392895221710205, "learning_rate": 9.330825056217105e-06, "loss": 0.7351, "step": 4110 }, { "epoch": 1.041418619379354, "grad_norm": 3.399625062942505, "learning_rate": 9.330406274372521e-06, "loss": 0.7421, "step": 4111 }, { "epoch": 1.0416719442685243, "grad_norm": 3.749479055404663, "learning_rate": 9.329987370932157e-06, "loss": 0.8002, "step": 4112 }, { "epoch": 1.0419252691576948, "grad_norm": 3.7531774044036865, "learning_rate": 9.329568345907776e-06, "loss": 0.7491, "step": 4113 }, { "epoch": 1.042178594046865, "grad_norm": 4.106726169586182, "learning_rate": 9.329149199311148e-06, "loss": 0.7408, "step": 4114 }, { "epoch": 1.0424319189360354, "grad_norm": 3.7518279552459717, "learning_rate": 9.328729931154036e-06, "loss": 0.7299, "step": 4115 }, { "epoch": 1.0426852438252059, "grad_norm": 3.997710704803467, "learning_rate": 9.328310541448218e-06, "loss": 0.7315, "step": 4116 }, { "epoch": 1.0429385687143762, "grad_norm": 3.344733476638794, "learning_rate": 9.327891030205467e-06, "loss": 0.7416, "step": 4117 }, { "epoch": 1.0431918936035465, "grad_norm": 3.4776430130004883, "learning_rate": 9.327471397437567e-06, "loss": 0.7233, "step": 4118 }, { "epoch": 1.043445218492717, "grad_norm": 4.643021583557129, "learning_rate": 9.327051643156295e-06, "loss": 0.9628, "step": 4119 }, { "epoch": 1.0436985433818873, "grad_norm": 3.613051414489746, "learning_rate": 9.326631767373444e-06, "loss": 0.8216, "step": 4120 }, { "epoch": 1.0439518682710576, "grad_norm": 4.063212871551514, "learning_rate": 9.3262117701008e-06, "loss": 0.7857, "step": 4121 }, { "epoch": 1.044205193160228, "grad_norm": 3.6081511974334717, "learning_rate": 9.325791651350156e-06, "loss": 0.6622, "step": 4122 }, { "epoch": 1.0444585180493984, "grad_norm": 3.584641933441162, "learning_rate": 9.325371411133309e-06, "loss": 0.6775, "step": 4123 }, { "epoch": 1.0447118429385687, "grad_norm": 3.308696985244751, "learning_rate": 9.324951049462063e-06, "loss": 0.7193, "step": 4124 }, { "epoch": 1.0449651678277392, "grad_norm": 3.8555233478546143, "learning_rate": 9.324530566348215e-06, "loss": 0.857, "step": 4125 }, { "epoch": 1.0452184927169095, "grad_norm": 3.670700788497925, "learning_rate": 9.324109961803578e-06, "loss": 0.8167, "step": 4126 }, { "epoch": 1.0454718176060798, "grad_norm": 3.8492424488067627, "learning_rate": 9.32368923583996e-06, "loss": 0.7953, "step": 4127 }, { "epoch": 1.04572514249525, "grad_norm": 3.4988362789154053, "learning_rate": 9.323268388469173e-06, "loss": 0.8068, "step": 4128 }, { "epoch": 1.0459784673844206, "grad_norm": 3.37612247467041, "learning_rate": 9.322847419703037e-06, "loss": 0.7807, "step": 4129 }, { "epoch": 1.0462317922735909, "grad_norm": 4.227519512176514, "learning_rate": 9.322426329553371e-06, "loss": 0.8334, "step": 4130 }, { "epoch": 1.0464851171627612, "grad_norm": 3.745054244995117, "learning_rate": 9.322005118032e-06, "loss": 0.77, "step": 4131 }, { "epoch": 1.0467384420519317, "grad_norm": 3.4740939140319824, "learning_rate": 9.32158378515075e-06, "loss": 0.8007, "step": 4132 }, { "epoch": 1.046991766941102, "grad_norm": 3.788421154022217, "learning_rate": 9.321162330921453e-06, "loss": 0.7977, "step": 4133 }, { "epoch": 1.0472450918302723, "grad_norm": 3.160989999771118, "learning_rate": 9.320740755355944e-06, "loss": 0.6265, "step": 4134 }, { "epoch": 1.0474984167194428, "grad_norm": 3.3964905738830566, "learning_rate": 9.32031905846606e-06, "loss": 0.8452, "step": 4135 }, { "epoch": 1.047751741608613, "grad_norm": 3.5575075149536133, "learning_rate": 9.31989724026364e-06, "loss": 0.8274, "step": 4136 }, { "epoch": 1.0480050664977834, "grad_norm": 4.0009942054748535, "learning_rate": 9.319475300760531e-06, "loss": 0.9045, "step": 4137 }, { "epoch": 1.0482583913869539, "grad_norm": 3.404421329498291, "learning_rate": 9.319053239968581e-06, "loss": 0.7146, "step": 4138 }, { "epoch": 1.0485117162761242, "grad_norm": 3.66922664642334, "learning_rate": 9.318631057899639e-06, "loss": 0.7487, "step": 4139 }, { "epoch": 1.0487650411652945, "grad_norm": 3.483755350112915, "learning_rate": 9.31820875456556e-06, "loss": 0.6974, "step": 4140 }, { "epoch": 1.0490183660544647, "grad_norm": 3.725436210632324, "learning_rate": 9.317786329978204e-06, "loss": 0.8188, "step": 4141 }, { "epoch": 1.0492716909436353, "grad_norm": 3.7148492336273193, "learning_rate": 9.317363784149432e-06, "loss": 0.7631, "step": 4142 }, { "epoch": 1.0495250158328056, "grad_norm": 3.5920324325561523, "learning_rate": 9.316941117091107e-06, "loss": 0.7792, "step": 4143 }, { "epoch": 1.0497783407219758, "grad_norm": 3.592470407485962, "learning_rate": 9.3165183288151e-06, "loss": 0.7027, "step": 4144 }, { "epoch": 1.0500316656111464, "grad_norm": 3.8422300815582275, "learning_rate": 9.316095419333281e-06, "loss": 0.7744, "step": 4145 }, { "epoch": 1.0502849905003167, "grad_norm": 3.559274196624756, "learning_rate": 9.315672388657527e-06, "loss": 0.7609, "step": 4146 }, { "epoch": 1.050538315389487, "grad_norm": 3.2121517658233643, "learning_rate": 9.315249236799713e-06, "loss": 0.7698, "step": 4147 }, { "epoch": 1.0507916402786575, "grad_norm": 3.810584306716919, "learning_rate": 9.314825963771724e-06, "loss": 0.7424, "step": 4148 }, { "epoch": 1.0510449651678277, "grad_norm": 4.001245021820068, "learning_rate": 9.314402569585443e-06, "loss": 0.8759, "step": 4149 }, { "epoch": 1.051298290056998, "grad_norm": 4.0187506675720215, "learning_rate": 9.31397905425276e-06, "loss": 0.8156, "step": 4150 }, { "epoch": 1.0515516149461686, "grad_norm": 3.8050217628479004, "learning_rate": 9.313555417785568e-06, "loss": 0.7594, "step": 4151 }, { "epoch": 1.0518049398353388, "grad_norm": 3.811541795730591, "learning_rate": 9.31313166019576e-06, "loss": 0.7524, "step": 4152 }, { "epoch": 1.0520582647245091, "grad_norm": 3.808600664138794, "learning_rate": 9.312707781495239e-06, "loss": 0.869, "step": 4153 }, { "epoch": 1.0523115896136797, "grad_norm": 3.384950876235962, "learning_rate": 9.312283781695903e-06, "loss": 0.6546, "step": 4154 }, { "epoch": 1.05256491450285, "grad_norm": 3.976621389389038, "learning_rate": 9.31185966080966e-06, "loss": 0.8433, "step": 4155 }, { "epoch": 1.0528182393920202, "grad_norm": 3.789478302001953, "learning_rate": 9.311435418848419e-06, "loss": 0.8427, "step": 4156 }, { "epoch": 1.0530715642811905, "grad_norm": 4.111769199371338, "learning_rate": 9.311011055824089e-06, "loss": 0.9546, "step": 4157 }, { "epoch": 1.053324889170361, "grad_norm": 3.746201515197754, "learning_rate": 9.310586571748592e-06, "loss": 0.8288, "step": 4158 }, { "epoch": 1.0535782140595313, "grad_norm": 3.287363052368164, "learning_rate": 9.310161966633845e-06, "loss": 0.6317, "step": 4159 }, { "epoch": 1.0538315389487016, "grad_norm": 3.618128538131714, "learning_rate": 9.309737240491767e-06, "loss": 0.8061, "step": 4160 }, { "epoch": 1.0540848638378721, "grad_norm": 3.80902361869812, "learning_rate": 9.30931239333429e-06, "loss": 0.749, "step": 4161 }, { "epoch": 1.0543381887270424, "grad_norm": 3.424065113067627, "learning_rate": 9.308887425173339e-06, "loss": 0.7547, "step": 4162 }, { "epoch": 1.0545915136162127, "grad_norm": 3.779470443725586, "learning_rate": 9.308462336020849e-06, "loss": 0.8846, "step": 4163 }, { "epoch": 1.0548448385053832, "grad_norm": 3.467844247817993, "learning_rate": 9.308037125888756e-06, "loss": 0.7141, "step": 4164 }, { "epoch": 1.0550981633945535, "grad_norm": 3.766876697540283, "learning_rate": 9.307611794789001e-06, "loss": 0.8758, "step": 4165 }, { "epoch": 1.0553514882837238, "grad_norm": 3.8177969455718994, "learning_rate": 9.307186342733525e-06, "loss": 0.7208, "step": 4166 }, { "epoch": 1.0556048131728943, "grad_norm": 3.287339925765991, "learning_rate": 9.306760769734273e-06, "loss": 0.7097, "step": 4167 }, { "epoch": 1.0558581380620646, "grad_norm": 3.8283634185791016, "learning_rate": 9.3063350758032e-06, "loss": 0.8535, "step": 4168 }, { "epoch": 1.056111462951235, "grad_norm": 3.535898447036743, "learning_rate": 9.305909260952255e-06, "loss": 0.6666, "step": 4169 }, { "epoch": 1.0563647878404052, "grad_norm": 3.6630406379699707, "learning_rate": 9.305483325193397e-06, "loss": 0.8016, "step": 4170 }, { "epoch": 1.0566181127295757, "grad_norm": 4.214574337005615, "learning_rate": 9.305057268538581e-06, "loss": 0.8248, "step": 4171 }, { "epoch": 1.056871437618746, "grad_norm": 3.893692970275879, "learning_rate": 9.304631090999779e-06, "loss": 0.8624, "step": 4172 }, { "epoch": 1.0571247625079163, "grad_norm": 3.6482512950897217, "learning_rate": 9.304204792588953e-06, "loss": 0.8034, "step": 4173 }, { "epoch": 1.0573780873970868, "grad_norm": 4.257894515991211, "learning_rate": 9.303778373318073e-06, "loss": 0.7821, "step": 4174 }, { "epoch": 1.0576314122862571, "grad_norm": 3.569042682647705, "learning_rate": 9.303351833199113e-06, "loss": 0.8068, "step": 4175 }, { "epoch": 1.0578847371754274, "grad_norm": 3.841977596282959, "learning_rate": 9.30292517224405e-06, "loss": 0.8009, "step": 4176 }, { "epoch": 1.058138062064598, "grad_norm": 4.106107711791992, "learning_rate": 9.302498390464868e-06, "loss": 0.7948, "step": 4177 }, { "epoch": 1.0583913869537682, "grad_norm": 3.645066022872925, "learning_rate": 9.302071487873543e-06, "loss": 0.7916, "step": 4178 }, { "epoch": 1.0586447118429385, "grad_norm": 3.900665283203125, "learning_rate": 9.30164446448207e-06, "loss": 0.7928, "step": 4179 }, { "epoch": 1.058898036732109, "grad_norm": 3.4762229919433594, "learning_rate": 9.301217320302436e-06, "loss": 0.7257, "step": 4180 }, { "epoch": 1.0591513616212793, "grad_norm": 3.898374557495117, "learning_rate": 9.300790055346634e-06, "loss": 0.7139, "step": 4181 }, { "epoch": 1.0594046865104496, "grad_norm": 3.7925961017608643, "learning_rate": 9.300362669626665e-06, "loss": 0.8422, "step": 4182 }, { "epoch": 1.0596580113996201, "grad_norm": 3.7468175888061523, "learning_rate": 9.299935163154527e-06, "loss": 0.78, "step": 4183 }, { "epoch": 1.0599113362887904, "grad_norm": 3.210975170135498, "learning_rate": 9.299507535942224e-06, "loss": 0.7004, "step": 4184 }, { "epoch": 1.0601646611779607, "grad_norm": 3.7563984394073486, "learning_rate": 9.299079788001766e-06, "loss": 0.788, "step": 4185 }, { "epoch": 1.060417986067131, "grad_norm": 3.920912742614746, "learning_rate": 9.29865191934516e-06, "loss": 0.7621, "step": 4186 }, { "epoch": 1.0606713109563015, "grad_norm": 3.855454921722412, "learning_rate": 9.298223929984425e-06, "loss": 0.6967, "step": 4187 }, { "epoch": 1.0609246358454718, "grad_norm": 4.34567403793335, "learning_rate": 9.297795819931576e-06, "loss": 0.962, "step": 4188 }, { "epoch": 1.061177960734642, "grad_norm": 3.305553436279297, "learning_rate": 9.297367589198635e-06, "loss": 0.7294, "step": 4189 }, { "epoch": 1.0614312856238126, "grad_norm": 3.634536027908325, "learning_rate": 9.296939237797626e-06, "loss": 0.8723, "step": 4190 }, { "epoch": 1.061684610512983, "grad_norm": 3.124066114425659, "learning_rate": 9.296510765740577e-06, "loss": 0.7415, "step": 4191 }, { "epoch": 1.0619379354021532, "grad_norm": 3.7073328495025635, "learning_rate": 9.296082173039519e-06, "loss": 0.7426, "step": 4192 }, { "epoch": 1.0621912602913237, "grad_norm": 3.358052968978882, "learning_rate": 9.295653459706488e-06, "loss": 0.8083, "step": 4193 }, { "epoch": 1.062444585180494, "grad_norm": 3.457728624343872, "learning_rate": 9.29522462575352e-06, "loss": 0.7909, "step": 4194 }, { "epoch": 1.0626979100696643, "grad_norm": 3.4607529640197754, "learning_rate": 9.294795671192657e-06, "loss": 0.8017, "step": 4195 }, { "epoch": 1.0629512349588346, "grad_norm": 3.8210277557373047, "learning_rate": 9.294366596035947e-06, "loss": 0.8498, "step": 4196 }, { "epoch": 1.063204559848005, "grad_norm": 3.636653423309326, "learning_rate": 9.293937400295433e-06, "loss": 0.8401, "step": 4197 }, { "epoch": 1.0634578847371754, "grad_norm": 3.7521755695343018, "learning_rate": 9.293508083983171e-06, "loss": 0.6958, "step": 4198 }, { "epoch": 1.0637112096263457, "grad_norm": 3.7031736373901367, "learning_rate": 9.293078647111214e-06, "loss": 0.7144, "step": 4199 }, { "epoch": 1.0639645345155162, "grad_norm": 3.623506546020508, "learning_rate": 9.29264908969162e-06, "loss": 0.6882, "step": 4200 }, { "epoch": 1.0642178594046865, "grad_norm": 3.9294145107269287, "learning_rate": 9.292219411736452e-06, "loss": 0.7163, "step": 4201 }, { "epoch": 1.0644711842938568, "grad_norm": 4.184792995452881, "learning_rate": 9.291789613257774e-06, "loss": 0.8392, "step": 4202 }, { "epoch": 1.0647245091830273, "grad_norm": 4.025232791900635, "learning_rate": 9.291359694267655e-06, "loss": 0.6967, "step": 4203 }, { "epoch": 1.0649778340721976, "grad_norm": 4.411888599395752, "learning_rate": 9.290929654778168e-06, "loss": 0.9006, "step": 4204 }, { "epoch": 1.0652311589613679, "grad_norm": 3.92000412940979, "learning_rate": 9.290499494801387e-06, "loss": 0.7887, "step": 4205 }, { "epoch": 1.0654844838505384, "grad_norm": 3.5988378524780273, "learning_rate": 9.290069214349391e-06, "loss": 0.7804, "step": 4206 }, { "epoch": 1.0657378087397087, "grad_norm": 3.8604774475097656, "learning_rate": 9.289638813434261e-06, "loss": 0.8488, "step": 4207 }, { "epoch": 1.065991133628879, "grad_norm": 3.5935158729553223, "learning_rate": 9.289208292068086e-06, "loss": 0.7512, "step": 4208 }, { "epoch": 1.0662444585180495, "grad_norm": 3.192046880722046, "learning_rate": 9.28877765026295e-06, "loss": 0.69, "step": 4209 }, { "epoch": 1.0664977834072198, "grad_norm": 3.207685708999634, "learning_rate": 9.288346888030948e-06, "loss": 0.7661, "step": 4210 }, { "epoch": 1.06675110829639, "grad_norm": 3.6800575256347656, "learning_rate": 9.287916005384177e-06, "loss": 0.7619, "step": 4211 }, { "epoch": 1.0670044331855606, "grad_norm": 3.293300151824951, "learning_rate": 9.287485002334732e-06, "loss": 0.7534, "step": 4212 }, { "epoch": 1.0672577580747309, "grad_norm": 3.7135701179504395, "learning_rate": 9.28705387889472e-06, "loss": 0.8989, "step": 4213 }, { "epoch": 1.0675110829639012, "grad_norm": 3.69555926322937, "learning_rate": 9.286622635076242e-06, "loss": 0.7713, "step": 4214 }, { "epoch": 1.0677644078530715, "grad_norm": 3.442887783050537, "learning_rate": 9.28619127089141e-06, "loss": 0.7726, "step": 4215 }, { "epoch": 1.068017732742242, "grad_norm": 3.5746614933013916, "learning_rate": 9.285759786352337e-06, "loss": 0.7489, "step": 4216 }, { "epoch": 1.0682710576314123, "grad_norm": 3.65659499168396, "learning_rate": 9.285328181471138e-06, "loss": 0.7464, "step": 4217 }, { "epoch": 1.0685243825205826, "grad_norm": 3.24165940284729, "learning_rate": 9.28489645625993e-06, "loss": 0.6833, "step": 4218 }, { "epoch": 1.068777707409753, "grad_norm": 3.432204008102417, "learning_rate": 9.284464610730842e-06, "loss": 0.7081, "step": 4219 }, { "epoch": 1.0690310322989234, "grad_norm": 3.7620961666107178, "learning_rate": 9.28403264489599e-06, "loss": 0.7481, "step": 4220 }, { "epoch": 1.0692843571880937, "grad_norm": 3.600041389465332, "learning_rate": 9.283600558767514e-06, "loss": 0.753, "step": 4221 }, { "epoch": 1.0695376820772642, "grad_norm": 3.680054187774658, "learning_rate": 9.28316835235754e-06, "loss": 0.765, "step": 4222 }, { "epoch": 1.0697910069664345, "grad_norm": 4.144865036010742, "learning_rate": 9.282736025678206e-06, "loss": 0.9797, "step": 4223 }, { "epoch": 1.0700443318556048, "grad_norm": 3.513779640197754, "learning_rate": 9.282303578741652e-06, "loss": 0.8246, "step": 4224 }, { "epoch": 1.070297656744775, "grad_norm": 3.870331048965454, "learning_rate": 9.28187101156002e-06, "loss": 0.8284, "step": 4225 }, { "epoch": 1.0705509816339456, "grad_norm": 3.7399344444274902, "learning_rate": 9.281438324145454e-06, "loss": 0.745, "step": 4226 }, { "epoch": 1.0708043065231159, "grad_norm": 3.684959650039673, "learning_rate": 9.28100551651011e-06, "loss": 0.7379, "step": 4227 }, { "epoch": 1.0710576314122862, "grad_norm": 3.779805898666382, "learning_rate": 9.280572588666139e-06, "loss": 0.8191, "step": 4228 }, { "epoch": 1.0713109563014567, "grad_norm": 3.8943259716033936, "learning_rate": 9.280139540625693e-06, "loss": 0.9724, "step": 4229 }, { "epoch": 1.071564281190627, "grad_norm": 4.2728705406188965, "learning_rate": 9.279706372400936e-06, "loss": 0.7738, "step": 4230 }, { "epoch": 1.0718176060797973, "grad_norm": 3.7429397106170654, "learning_rate": 9.27927308400403e-06, "loss": 0.8156, "step": 4231 }, { "epoch": 1.0720709309689678, "grad_norm": 3.7998812198638916, "learning_rate": 9.27883967544714e-06, "loss": 0.7416, "step": 4232 }, { "epoch": 1.072324255858138, "grad_norm": 3.844186305999756, "learning_rate": 9.278406146742438e-06, "loss": 0.8633, "step": 4233 }, { "epoch": 1.0725775807473084, "grad_norm": 3.5452141761779785, "learning_rate": 9.277972497902097e-06, "loss": 0.7541, "step": 4234 }, { "epoch": 1.0728309056364789, "grad_norm": 3.764220952987671, "learning_rate": 9.277538728938294e-06, "loss": 0.8352, "step": 4235 }, { "epoch": 1.0730842305256492, "grad_norm": 3.211428642272949, "learning_rate": 9.277104839863207e-06, "loss": 0.6962, "step": 4236 }, { "epoch": 1.0733375554148195, "grad_norm": 3.6833016872406006, "learning_rate": 9.276670830689021e-06, "loss": 0.8521, "step": 4237 }, { "epoch": 1.07359088030399, "grad_norm": 3.590811252593994, "learning_rate": 9.276236701427924e-06, "loss": 0.8868, "step": 4238 }, { "epoch": 1.0738442051931603, "grad_norm": 3.729499578475952, "learning_rate": 9.275802452092101e-06, "loss": 0.874, "step": 4239 }, { "epoch": 1.0740975300823306, "grad_norm": 3.695556163787842, "learning_rate": 9.275368082693752e-06, "loss": 0.737, "step": 4240 }, { "epoch": 1.0743508549715008, "grad_norm": 3.678680896759033, "learning_rate": 9.27493359324507e-06, "loss": 0.7896, "step": 4241 }, { "epoch": 1.0746041798606714, "grad_norm": 3.8089263439178467, "learning_rate": 9.274498983758255e-06, "loss": 0.8519, "step": 4242 }, { "epoch": 1.0748575047498417, "grad_norm": 3.6422061920166016, "learning_rate": 9.274064254245514e-06, "loss": 0.8217, "step": 4243 }, { "epoch": 1.075110829639012, "grad_norm": 3.469003200531006, "learning_rate": 9.27362940471905e-06, "loss": 0.695, "step": 4244 }, { "epoch": 1.0753641545281825, "grad_norm": 3.448909044265747, "learning_rate": 9.273194435191078e-06, "loss": 0.7327, "step": 4245 }, { "epoch": 1.0756174794173528, "grad_norm": 3.759049654006958, "learning_rate": 9.272759345673807e-06, "loss": 0.925, "step": 4246 }, { "epoch": 1.075870804306523, "grad_norm": 3.563659906387329, "learning_rate": 9.272324136179459e-06, "loss": 0.7678, "step": 4247 }, { "epoch": 1.0761241291956936, "grad_norm": 3.738771438598633, "learning_rate": 9.271888806720248e-06, "loss": 0.7844, "step": 4248 }, { "epoch": 1.0763774540848638, "grad_norm": 3.7911908626556396, "learning_rate": 9.271453357308401e-06, "loss": 0.9547, "step": 4249 }, { "epoch": 1.0766307789740341, "grad_norm": 3.996831178665161, "learning_rate": 9.27101778795615e-06, "loss": 0.8331, "step": 4250 }, { "epoch": 1.0768841038632047, "grad_norm": 3.803457021713257, "learning_rate": 9.270582098675718e-06, "loss": 0.7729, "step": 4251 }, { "epoch": 1.077137428752375, "grad_norm": 3.7224628925323486, "learning_rate": 9.270146289479343e-06, "loss": 0.7928, "step": 4252 }, { "epoch": 1.0773907536415452, "grad_norm": 3.6929430961608887, "learning_rate": 9.26971036037926e-06, "loss": 0.8014, "step": 4253 }, { "epoch": 1.0776440785307155, "grad_norm": 3.998534917831421, "learning_rate": 9.269274311387712e-06, "loss": 0.7043, "step": 4254 }, { "epoch": 1.077897403419886, "grad_norm": 3.6632144451141357, "learning_rate": 9.268838142516943e-06, "loss": 0.7599, "step": 4255 }, { "epoch": 1.0781507283090563, "grad_norm": 3.255547523498535, "learning_rate": 9.2684018537792e-06, "loss": 0.7823, "step": 4256 }, { "epoch": 1.0784040531982266, "grad_norm": 4.060213565826416, "learning_rate": 9.267965445186733e-06, "loss": 0.8022, "step": 4257 }, { "epoch": 1.0786573780873971, "grad_norm": 3.7716310024261475, "learning_rate": 9.267528916751796e-06, "loss": 0.8277, "step": 4258 }, { "epoch": 1.0789107029765674, "grad_norm": 3.8046209812164307, "learning_rate": 9.267092268486648e-06, "loss": 0.7937, "step": 4259 }, { "epoch": 1.0791640278657377, "grad_norm": 3.2104485034942627, "learning_rate": 9.266655500403549e-06, "loss": 0.6956, "step": 4260 }, { "epoch": 1.0794173527549082, "grad_norm": 3.81565523147583, "learning_rate": 9.266218612514763e-06, "loss": 0.9505, "step": 4261 }, { "epoch": 1.0796706776440785, "grad_norm": 3.8160922527313232, "learning_rate": 9.265781604832558e-06, "loss": 0.8061, "step": 4262 }, { "epoch": 1.0799240025332488, "grad_norm": 3.815609931945801, "learning_rate": 9.265344477369203e-06, "loss": 0.7305, "step": 4263 }, { "epoch": 1.0801773274224193, "grad_norm": 4.010029315948486, "learning_rate": 9.264907230136977e-06, "loss": 0.8887, "step": 4264 }, { "epoch": 1.0804306523115896, "grad_norm": 4.082635879516602, "learning_rate": 9.264469863148152e-06, "loss": 0.8839, "step": 4265 }, { "epoch": 1.08068397720076, "grad_norm": 3.54335618019104, "learning_rate": 9.264032376415013e-06, "loss": 0.6708, "step": 4266 }, { "epoch": 1.0809373020899304, "grad_norm": 3.5170133113861084, "learning_rate": 9.263594769949845e-06, "loss": 0.7863, "step": 4267 }, { "epoch": 1.0811906269791007, "grad_norm": 4.150489807128906, "learning_rate": 9.263157043764932e-06, "loss": 0.8792, "step": 4268 }, { "epoch": 1.081443951868271, "grad_norm": 3.6395976543426514, "learning_rate": 9.262719197872569e-06, "loss": 0.7188, "step": 4269 }, { "epoch": 1.0816972767574413, "grad_norm": 3.2906672954559326, "learning_rate": 9.262281232285048e-06, "loss": 0.7369, "step": 4270 }, { "epoch": 1.0819506016466118, "grad_norm": 3.4226391315460205, "learning_rate": 9.261843147014666e-06, "loss": 0.7741, "step": 4271 }, { "epoch": 1.0822039265357821, "grad_norm": 3.590555191040039, "learning_rate": 9.261404942073729e-06, "loss": 0.7568, "step": 4272 }, { "epoch": 1.0824572514249524, "grad_norm": 3.4189095497131348, "learning_rate": 9.260966617474535e-06, "loss": 0.7826, "step": 4273 }, { "epoch": 1.082710576314123, "grad_norm": 4.2814130783081055, "learning_rate": 9.260528173229399e-06, "loss": 0.7474, "step": 4274 }, { "epoch": 1.0829639012032932, "grad_norm": 3.827186346054077, "learning_rate": 9.260089609350626e-06, "loss": 0.7962, "step": 4275 }, { "epoch": 1.0832172260924635, "grad_norm": 4.3773298263549805, "learning_rate": 9.259650925850534e-06, "loss": 0.9521, "step": 4276 }, { "epoch": 1.083470550981634, "grad_norm": 3.643171548843384, "learning_rate": 9.259212122741441e-06, "loss": 0.8232, "step": 4277 }, { "epoch": 1.0837238758708043, "grad_norm": 4.062769412994385, "learning_rate": 9.258773200035666e-06, "loss": 0.9115, "step": 4278 }, { "epoch": 1.0839772007599746, "grad_norm": 3.6062681674957275, "learning_rate": 9.258334157745538e-06, "loss": 0.8954, "step": 4279 }, { "epoch": 1.0842305256491451, "grad_norm": 3.7051634788513184, "learning_rate": 9.257894995883382e-06, "loss": 0.7412, "step": 4280 }, { "epoch": 1.0844838505383154, "grad_norm": 3.7709696292877197, "learning_rate": 9.25745571446153e-06, "loss": 0.7234, "step": 4281 }, { "epoch": 1.0847371754274857, "grad_norm": 3.7264342308044434, "learning_rate": 9.257016313492317e-06, "loss": 0.7977, "step": 4282 }, { "epoch": 1.084990500316656, "grad_norm": 3.64595103263855, "learning_rate": 9.25657679298808e-06, "loss": 0.8048, "step": 4283 }, { "epoch": 1.0852438252058265, "grad_norm": 3.6637768745422363, "learning_rate": 9.256137152961162e-06, "loss": 0.8779, "step": 4284 }, { "epoch": 1.0854971500949968, "grad_norm": 3.79441499710083, "learning_rate": 9.255697393423907e-06, "loss": 0.71, "step": 4285 }, { "epoch": 1.085750474984167, "grad_norm": 4.033945560455322, "learning_rate": 9.255257514388665e-06, "loss": 0.8279, "step": 4286 }, { "epoch": 1.0860037998733376, "grad_norm": 3.7677478790283203, "learning_rate": 9.254817515867786e-06, "loss": 0.8403, "step": 4287 }, { "epoch": 1.086257124762508, "grad_norm": 3.4312496185302734, "learning_rate": 9.254377397873626e-06, "loss": 0.6923, "step": 4288 }, { "epoch": 1.0865104496516782, "grad_norm": 3.560711622238159, "learning_rate": 9.253937160418542e-06, "loss": 0.8237, "step": 4289 }, { "epoch": 1.0867637745408487, "grad_norm": 3.5195810794830322, "learning_rate": 9.253496803514896e-06, "loss": 0.6966, "step": 4290 }, { "epoch": 1.087017099430019, "grad_norm": 4.035468101501465, "learning_rate": 9.253056327175054e-06, "loss": 0.7802, "step": 4291 }, { "epoch": 1.0872704243191893, "grad_norm": 3.76127552986145, "learning_rate": 9.252615731411382e-06, "loss": 0.8538, "step": 4292 }, { "epoch": 1.0875237492083598, "grad_norm": 3.775036334991455, "learning_rate": 9.252175016236254e-06, "loss": 0.787, "step": 4293 }, { "epoch": 1.08777707409753, "grad_norm": 3.646313190460205, "learning_rate": 9.251734181662046e-06, "loss": 0.7934, "step": 4294 }, { "epoch": 1.0880303989867004, "grad_norm": 3.2216339111328125, "learning_rate": 9.251293227701133e-06, "loss": 0.65, "step": 4295 }, { "epoch": 1.088283723875871, "grad_norm": 3.8053488731384277, "learning_rate": 9.250852154365898e-06, "loss": 0.8331, "step": 4296 }, { "epoch": 1.0885370487650412, "grad_norm": 3.5517189502716064, "learning_rate": 9.25041096166873e-06, "loss": 0.7879, "step": 4297 }, { "epoch": 1.0887903736542115, "grad_norm": 3.8531367778778076, "learning_rate": 9.249969649622013e-06, "loss": 0.8339, "step": 4298 }, { "epoch": 1.0890436985433818, "grad_norm": 3.5429439544677734, "learning_rate": 9.249528218238139e-06, "loss": 0.7619, "step": 4299 }, { "epoch": 1.0892970234325523, "grad_norm": 3.2794110774993896, "learning_rate": 9.249086667529504e-06, "loss": 0.7274, "step": 4300 }, { "epoch": 1.0895503483217226, "grad_norm": 3.6612181663513184, "learning_rate": 9.248644997508506e-06, "loss": 0.7675, "step": 4301 }, { "epoch": 1.0898036732108929, "grad_norm": 3.957714557647705, "learning_rate": 9.248203208187551e-06, "loss": 0.9582, "step": 4302 }, { "epoch": 1.0900569981000634, "grad_norm": 3.532169818878174, "learning_rate": 9.24776129957904e-06, "loss": 0.6714, "step": 4303 }, { "epoch": 1.0903103229892337, "grad_norm": 3.4459142684936523, "learning_rate": 9.247319271695382e-06, "loss": 0.7545, "step": 4304 }, { "epoch": 1.090563647878404, "grad_norm": 3.7273051738739014, "learning_rate": 9.246877124548988e-06, "loss": 0.6319, "step": 4305 }, { "epoch": 1.0908169727675745, "grad_norm": 4.333641529083252, "learning_rate": 9.246434858152277e-06, "loss": 1.0639, "step": 4306 }, { "epoch": 1.0910702976567448, "grad_norm": 3.7943003177642822, "learning_rate": 9.245992472517664e-06, "loss": 0.7914, "step": 4307 }, { "epoch": 1.091323622545915, "grad_norm": 3.623751163482666, "learning_rate": 9.245549967657572e-06, "loss": 0.7618, "step": 4308 }, { "epoch": 1.0915769474350856, "grad_norm": 3.7345826625823975, "learning_rate": 9.245107343584427e-06, "loss": 0.8078, "step": 4309 }, { "epoch": 1.091830272324256, "grad_norm": 3.5183663368225098, "learning_rate": 9.244664600310659e-06, "loss": 0.7537, "step": 4310 }, { "epoch": 1.0920835972134262, "grad_norm": 3.544497489929199, "learning_rate": 9.244221737848697e-06, "loss": 0.7847, "step": 4311 }, { "epoch": 1.0923369221025965, "grad_norm": 3.587022304534912, "learning_rate": 9.243778756210979e-06, "loss": 0.8602, "step": 4312 }, { "epoch": 1.092590246991767, "grad_norm": 3.5137152671813965, "learning_rate": 9.243335655409941e-06, "loss": 0.7009, "step": 4313 }, { "epoch": 1.0928435718809373, "grad_norm": 3.9993653297424316, "learning_rate": 9.242892435458026e-06, "loss": 0.7765, "step": 4314 }, { "epoch": 1.0930968967701076, "grad_norm": 3.4785187244415283, "learning_rate": 9.242449096367683e-06, "loss": 0.6808, "step": 4315 }, { "epoch": 1.093350221659278, "grad_norm": 3.496853828430176, "learning_rate": 9.242005638151354e-06, "loss": 0.7718, "step": 4316 }, { "epoch": 1.0936035465484484, "grad_norm": 3.3387272357940674, "learning_rate": 9.241562060821498e-06, "loss": 0.7598, "step": 4317 }, { "epoch": 1.0938568714376187, "grad_norm": 4.0735859870910645, "learning_rate": 9.241118364390565e-06, "loss": 0.9256, "step": 4318 }, { "epoch": 1.0941101963267892, "grad_norm": 3.7407758235931396, "learning_rate": 9.240674548871017e-06, "loss": 0.6623, "step": 4319 }, { "epoch": 1.0943635212159595, "grad_norm": 3.656137228012085, "learning_rate": 9.240230614275316e-06, "loss": 0.7394, "step": 4320 }, { "epoch": 1.0946168461051298, "grad_norm": 3.55597186088562, "learning_rate": 9.239786560615925e-06, "loss": 0.7031, "step": 4321 }, { "epoch": 1.0948701709943003, "grad_norm": 4.085122108459473, "learning_rate": 9.239342387905314e-06, "loss": 0.768, "step": 4322 }, { "epoch": 1.0951234958834706, "grad_norm": 4.219598293304443, "learning_rate": 9.238898096155958e-06, "loss": 0.8081, "step": 4323 }, { "epoch": 1.0953768207726409, "grad_norm": 3.7133283615112305, "learning_rate": 9.238453685380329e-06, "loss": 0.6618, "step": 4324 }, { "epoch": 1.0956301456618114, "grad_norm": 3.9537534713745117, "learning_rate": 9.238009155590906e-06, "loss": 0.8014, "step": 4325 }, { "epoch": 1.0958834705509817, "grad_norm": 3.4699196815490723, "learning_rate": 9.237564506800174e-06, "loss": 0.7975, "step": 4326 }, { "epoch": 1.096136795440152, "grad_norm": 3.6257779598236084, "learning_rate": 9.237119739020616e-06, "loss": 0.7796, "step": 4327 }, { "epoch": 1.0963901203293223, "grad_norm": 3.7349250316619873, "learning_rate": 9.23667485226472e-06, "loss": 0.7903, "step": 4328 }, { "epoch": 1.0966434452184928, "grad_norm": 3.5463314056396484, "learning_rate": 9.236229846544983e-06, "loss": 0.8036, "step": 4329 }, { "epoch": 1.096896770107663, "grad_norm": 3.45389461517334, "learning_rate": 9.235784721873895e-06, "loss": 0.8413, "step": 4330 }, { "epoch": 1.0971500949968334, "grad_norm": 3.915783166885376, "learning_rate": 9.235339478263958e-06, "loss": 0.7464, "step": 4331 }, { "epoch": 1.0974034198860039, "grad_norm": 3.667205810546875, "learning_rate": 9.234894115727673e-06, "loss": 0.8264, "step": 4332 }, { "epoch": 1.0976567447751742, "grad_norm": 3.0400261878967285, "learning_rate": 9.234448634277547e-06, "loss": 0.6946, "step": 4333 }, { "epoch": 1.0979100696643445, "grad_norm": 3.7669754028320312, "learning_rate": 9.234003033926087e-06, "loss": 0.7198, "step": 4334 }, { "epoch": 1.098163394553515, "grad_norm": 3.7670071125030518, "learning_rate": 9.233557314685806e-06, "loss": 0.7183, "step": 4335 }, { "epoch": 1.0984167194426853, "grad_norm": 4.155233860015869, "learning_rate": 9.23311147656922e-06, "loss": 0.8613, "step": 4336 }, { "epoch": 1.0986700443318556, "grad_norm": 3.7832155227661133, "learning_rate": 9.232665519588848e-06, "loss": 0.7233, "step": 4337 }, { "epoch": 1.098923369221026, "grad_norm": 3.5161235332489014, "learning_rate": 9.232219443757212e-06, "loss": 0.808, "step": 4338 }, { "epoch": 1.0991766941101964, "grad_norm": 3.6883702278137207, "learning_rate": 9.231773249086838e-06, "loss": 0.7893, "step": 4339 }, { "epoch": 1.0994300189993667, "grad_norm": 3.724613904953003, "learning_rate": 9.231326935590252e-06, "loss": 0.7927, "step": 4340 }, { "epoch": 1.099683343888537, "grad_norm": 4.20599365234375, "learning_rate": 9.230880503279991e-06, "loss": 0.8809, "step": 4341 }, { "epoch": 1.0999366687777075, "grad_norm": 3.3758602142333984, "learning_rate": 9.230433952168588e-06, "loss": 0.7751, "step": 4342 }, { "epoch": 1.1001899936668778, "grad_norm": 4.082566261291504, "learning_rate": 9.229987282268582e-06, "loss": 0.9636, "step": 4343 }, { "epoch": 1.100443318556048, "grad_norm": 3.775946617126465, "learning_rate": 9.229540493592517e-06, "loss": 0.762, "step": 4344 }, { "epoch": 1.1006966434452186, "grad_norm": 3.816842794418335, "learning_rate": 9.229093586152936e-06, "loss": 0.811, "step": 4345 }, { "epoch": 1.1009499683343889, "grad_norm": 3.738156795501709, "learning_rate": 9.228646559962388e-06, "loss": 0.7242, "step": 4346 }, { "epoch": 1.1012032932235591, "grad_norm": 3.452763557434082, "learning_rate": 9.22819941503343e-06, "loss": 0.7588, "step": 4347 }, { "epoch": 1.1014566181127297, "grad_norm": 3.708622932434082, "learning_rate": 9.22775215137861e-06, "loss": 0.8296, "step": 4348 }, { "epoch": 1.1017099430019, "grad_norm": 3.7021007537841797, "learning_rate": 9.227304769010494e-06, "loss": 0.8233, "step": 4349 }, { "epoch": 1.1019632678910702, "grad_norm": 3.600891351699829, "learning_rate": 9.22685726794164e-06, "loss": 0.6824, "step": 4350 }, { "epoch": 1.1022165927802408, "grad_norm": 3.5252370834350586, "learning_rate": 9.226409648184616e-06, "loss": 0.7261, "step": 4351 }, { "epoch": 1.102469917669411, "grad_norm": 3.488820791244507, "learning_rate": 9.225961909751987e-06, "loss": 0.6659, "step": 4352 }, { "epoch": 1.1027232425585813, "grad_norm": 3.602959394454956, "learning_rate": 9.225514052656332e-06, "loss": 0.7595, "step": 4353 }, { "epoch": 1.1029765674477519, "grad_norm": 3.727651834487915, "learning_rate": 9.22506607691022e-06, "loss": 0.8803, "step": 4354 }, { "epoch": 1.1032298923369221, "grad_norm": 4.054435729980469, "learning_rate": 9.224617982526236e-06, "loss": 0.8313, "step": 4355 }, { "epoch": 1.1034832172260924, "grad_norm": 3.574852466583252, "learning_rate": 9.224169769516957e-06, "loss": 0.7235, "step": 4356 }, { "epoch": 1.1037365421152627, "grad_norm": 3.9324018955230713, "learning_rate": 9.22372143789497e-06, "loss": 0.7576, "step": 4357 }, { "epoch": 1.1039898670044332, "grad_norm": 3.902390718460083, "learning_rate": 9.223272987672865e-06, "loss": 0.9231, "step": 4358 }, { "epoch": 1.1042431918936035, "grad_norm": 3.8140463829040527, "learning_rate": 9.222824418863234e-06, "loss": 0.6814, "step": 4359 }, { "epoch": 1.1044965167827738, "grad_norm": 4.1273722648620605, "learning_rate": 9.222375731478673e-06, "loss": 0.8463, "step": 4360 }, { "epoch": 1.1047498416719443, "grad_norm": 3.649075746536255, "learning_rate": 9.221926925531778e-06, "loss": 0.6866, "step": 4361 }, { "epoch": 1.1050031665611146, "grad_norm": 3.250063180923462, "learning_rate": 9.221478001035158e-06, "loss": 0.8044, "step": 4362 }, { "epoch": 1.105256491450285, "grad_norm": 3.6262922286987305, "learning_rate": 9.221028958001411e-06, "loss": 0.7421, "step": 4363 }, { "epoch": 1.1055098163394554, "grad_norm": 3.5227932929992676, "learning_rate": 9.22057979644315e-06, "loss": 0.7195, "step": 4364 }, { "epoch": 1.1057631412286257, "grad_norm": 3.579219102859497, "learning_rate": 9.220130516372986e-06, "loss": 0.8423, "step": 4365 }, { "epoch": 1.106016466117796, "grad_norm": 3.35093355178833, "learning_rate": 9.219681117803537e-06, "loss": 0.7214, "step": 4366 }, { "epoch": 1.1062697910069663, "grad_norm": 3.8864123821258545, "learning_rate": 9.219231600747418e-06, "loss": 0.686, "step": 4367 }, { "epoch": 1.1065231158961368, "grad_norm": 3.799591302871704, "learning_rate": 9.218781965217252e-06, "loss": 0.7165, "step": 4368 }, { "epoch": 1.1067764407853071, "grad_norm": 3.962235450744629, "learning_rate": 9.218332211225667e-06, "loss": 0.8037, "step": 4369 }, { "epoch": 1.1070297656744774, "grad_norm": 3.679168939590454, "learning_rate": 9.21788233878529e-06, "loss": 0.7558, "step": 4370 }, { "epoch": 1.107283090563648, "grad_norm": 3.9936811923980713, "learning_rate": 9.217432347908754e-06, "loss": 0.8641, "step": 4371 }, { "epoch": 1.1075364154528182, "grad_norm": 3.7278389930725098, "learning_rate": 9.216982238608696e-06, "loss": 0.8261, "step": 4372 }, { "epoch": 1.1077897403419885, "grad_norm": 4.024977684020996, "learning_rate": 9.216532010897751e-06, "loss": 0.8757, "step": 4373 }, { "epoch": 1.108043065231159, "grad_norm": 3.846114158630371, "learning_rate": 9.216081664788565e-06, "loss": 0.8498, "step": 4374 }, { "epoch": 1.1082963901203293, "grad_norm": 3.6417713165283203, "learning_rate": 9.21563120029378e-06, "loss": 0.7203, "step": 4375 }, { "epoch": 1.1085497150094996, "grad_norm": 3.7245826721191406, "learning_rate": 9.215180617426047e-06, "loss": 0.8541, "step": 4376 }, { "epoch": 1.1088030398986701, "grad_norm": 3.698324680328369, "learning_rate": 9.21472991619802e-06, "loss": 0.8451, "step": 4377 }, { "epoch": 1.1090563647878404, "grad_norm": 4.14930534362793, "learning_rate": 9.21427909662235e-06, "loss": 0.8199, "step": 4378 }, { "epoch": 1.1093096896770107, "grad_norm": 3.709798574447632, "learning_rate": 9.213828158711699e-06, "loss": 0.755, "step": 4379 }, { "epoch": 1.1095630145661812, "grad_norm": 3.5829548835754395, "learning_rate": 9.213377102478728e-06, "loss": 0.7758, "step": 4380 }, { "epoch": 1.1098163394553515, "grad_norm": 4.226644992828369, "learning_rate": 9.212925927936103e-06, "loss": 1.0089, "step": 4381 }, { "epoch": 1.1100696643445218, "grad_norm": 3.9692909717559814, "learning_rate": 9.21247463509649e-06, "loss": 0.8052, "step": 4382 }, { "epoch": 1.1103229892336923, "grad_norm": 4.0409111976623535, "learning_rate": 9.212023223972568e-06, "loss": 0.8921, "step": 4383 }, { "epoch": 1.1105763141228626, "grad_norm": 3.563741445541382, "learning_rate": 9.211571694577006e-06, "loss": 0.7763, "step": 4384 }, { "epoch": 1.110829639012033, "grad_norm": 4.491901397705078, "learning_rate": 9.211120046922483e-06, "loss": 0.731, "step": 4385 }, { "epoch": 1.1110829639012032, "grad_norm": 3.8411054611206055, "learning_rate": 9.210668281021683e-06, "loss": 0.8005, "step": 4386 }, { "epoch": 1.1113362887903737, "grad_norm": 4.127683639526367, "learning_rate": 9.210216396887293e-06, "loss": 0.8026, "step": 4387 }, { "epoch": 1.111589613679544, "grad_norm": 3.6287953853607178, "learning_rate": 9.209764394532e-06, "loss": 0.7264, "step": 4388 }, { "epoch": 1.1118429385687143, "grad_norm": 4.0084733963012695, "learning_rate": 9.209312273968493e-06, "loss": 0.8431, "step": 4389 }, { "epoch": 1.1120962634578848, "grad_norm": 3.1336419582366943, "learning_rate": 9.208860035209472e-06, "loss": 0.687, "step": 4390 }, { "epoch": 1.112349588347055, "grad_norm": 3.6700150966644287, "learning_rate": 9.208407678267635e-06, "loss": 0.8033, "step": 4391 }, { "epoch": 1.1126029132362254, "grad_norm": 3.8876240253448486, "learning_rate": 9.207955203155681e-06, "loss": 0.7619, "step": 4392 }, { "epoch": 1.112856238125396, "grad_norm": 3.584474802017212, "learning_rate": 9.20750260988632e-06, "loss": 0.7602, "step": 4393 }, { "epoch": 1.1131095630145662, "grad_norm": 3.9350109100341797, "learning_rate": 9.207049898472253e-06, "loss": 0.8195, "step": 4394 }, { "epoch": 1.1133628879037365, "grad_norm": 3.685990571975708, "learning_rate": 9.2065970689262e-06, "loss": 0.8204, "step": 4395 }, { "epoch": 1.1136162127929068, "grad_norm": 4.11929178237915, "learning_rate": 9.206144121260871e-06, "loss": 0.755, "step": 4396 }, { "epoch": 1.1138695376820773, "grad_norm": 3.9196510314941406, "learning_rate": 9.205691055488987e-06, "loss": 0.8139, "step": 4397 }, { "epoch": 1.1141228625712476, "grad_norm": 3.9029970169067383, "learning_rate": 9.20523787162327e-06, "loss": 0.7376, "step": 4398 }, { "epoch": 1.1143761874604179, "grad_norm": 3.4818239212036133, "learning_rate": 9.204784569676444e-06, "loss": 0.7489, "step": 4399 }, { "epoch": 1.1146295123495884, "grad_norm": 3.872526168823242, "learning_rate": 9.20433114966124e-06, "loss": 0.6817, "step": 4400 }, { "epoch": 1.1148828372387587, "grad_norm": 3.580820083618164, "learning_rate": 9.203877611590384e-06, "loss": 0.833, "step": 4401 }, { "epoch": 1.115136162127929, "grad_norm": 3.9937806129455566, "learning_rate": 9.203423955476616e-06, "loss": 0.7673, "step": 4402 }, { "epoch": 1.1153894870170995, "grad_norm": 3.680983066558838, "learning_rate": 9.202970181332674e-06, "loss": 0.6896, "step": 4403 }, { "epoch": 1.1156428119062698, "grad_norm": 3.7662012577056885, "learning_rate": 9.2025162891713e-06, "loss": 0.8438, "step": 4404 }, { "epoch": 1.11589613679544, "grad_norm": 3.911691188812256, "learning_rate": 9.202062279005237e-06, "loss": 0.9176, "step": 4405 }, { "epoch": 1.1161494616846106, "grad_norm": 3.595184326171875, "learning_rate": 9.201608150847234e-06, "loss": 0.7462, "step": 4406 }, { "epoch": 1.116402786573781, "grad_norm": 4.015063762664795, "learning_rate": 9.201153904710045e-06, "loss": 0.8581, "step": 4407 }, { "epoch": 1.1166561114629512, "grad_norm": 3.923367500305176, "learning_rate": 9.200699540606423e-06, "loss": 0.7719, "step": 4408 }, { "epoch": 1.1169094363521217, "grad_norm": 3.704188823699951, "learning_rate": 9.200245058549127e-06, "loss": 0.7193, "step": 4409 }, { "epoch": 1.117162761241292, "grad_norm": 4.123978137969971, "learning_rate": 9.199790458550917e-06, "loss": 0.7962, "step": 4410 }, { "epoch": 1.1174160861304623, "grad_norm": 3.5524094104766846, "learning_rate": 9.19933574062456e-06, "loss": 0.7907, "step": 4411 }, { "epoch": 1.1176694110196326, "grad_norm": 3.885824203491211, "learning_rate": 9.198880904782823e-06, "loss": 0.7299, "step": 4412 }, { "epoch": 1.117922735908803, "grad_norm": 3.6361563205718994, "learning_rate": 9.19842595103848e-06, "loss": 0.7897, "step": 4413 }, { "epoch": 1.1181760607979734, "grad_norm": 3.3709397315979004, "learning_rate": 9.1979708794043e-06, "loss": 0.6722, "step": 4414 }, { "epoch": 1.1184293856871437, "grad_norm": 3.6834933757781982, "learning_rate": 9.197515689893068e-06, "loss": 0.7568, "step": 4415 }, { "epoch": 1.1186827105763142, "grad_norm": 3.6808292865753174, "learning_rate": 9.197060382517563e-06, "loss": 0.682, "step": 4416 }, { "epoch": 1.1189360354654845, "grad_norm": 3.3640832901000977, "learning_rate": 9.196604957290569e-06, "loss": 0.7307, "step": 4417 }, { "epoch": 1.1191893603546548, "grad_norm": 3.6205482482910156, "learning_rate": 9.196149414224875e-06, "loss": 0.7453, "step": 4418 }, { "epoch": 1.1194426852438253, "grad_norm": 3.865307092666626, "learning_rate": 9.195693753333271e-06, "loss": 0.8446, "step": 4419 }, { "epoch": 1.1196960101329956, "grad_norm": 3.702802896499634, "learning_rate": 9.195237974628555e-06, "loss": 0.6828, "step": 4420 }, { "epoch": 1.1199493350221659, "grad_norm": 3.967543125152588, "learning_rate": 9.19478207812352e-06, "loss": 0.8518, "step": 4421 }, { "epoch": 1.1202026599113364, "grad_norm": 3.5002949237823486, "learning_rate": 9.194326063830972e-06, "loss": 0.6606, "step": 4422 }, { "epoch": 1.1204559848005067, "grad_norm": 3.9500465393066406, "learning_rate": 9.193869931763715e-06, "loss": 0.8085, "step": 4423 }, { "epoch": 1.120709309689677, "grad_norm": 3.9784748554229736, "learning_rate": 9.193413681934553e-06, "loss": 0.6886, "step": 4424 }, { "epoch": 1.1209626345788473, "grad_norm": 3.8072926998138428, "learning_rate": 9.192957314356303e-06, "loss": 0.8054, "step": 4425 }, { "epoch": 1.1212159594680178, "grad_norm": 3.774982452392578, "learning_rate": 9.192500829041775e-06, "loss": 0.8279, "step": 4426 }, { "epoch": 1.121469284357188, "grad_norm": 3.39636492729187, "learning_rate": 9.19204422600379e-06, "loss": 0.7936, "step": 4427 }, { "epoch": 1.1217226092463584, "grad_norm": 3.933150053024292, "learning_rate": 9.191587505255166e-06, "loss": 0.7482, "step": 4428 }, { "epoch": 1.1219759341355289, "grad_norm": 3.645341634750366, "learning_rate": 9.19113066680873e-06, "loss": 0.8449, "step": 4429 }, { "epoch": 1.1222292590246992, "grad_norm": 3.5464110374450684, "learning_rate": 9.190673710677308e-06, "loss": 0.7274, "step": 4430 }, { "epoch": 1.1224825839138695, "grad_norm": 3.369464635848999, "learning_rate": 9.190216636873735e-06, "loss": 0.749, "step": 4431 }, { "epoch": 1.12273590880304, "grad_norm": 3.8639848232269287, "learning_rate": 9.189759445410841e-06, "loss": 0.8226, "step": 4432 }, { "epoch": 1.1229892336922103, "grad_norm": 3.737210988998413, "learning_rate": 9.189302136301466e-06, "loss": 0.7601, "step": 4433 }, { "epoch": 1.1232425585813806, "grad_norm": 4.071779251098633, "learning_rate": 9.18884470955845e-06, "loss": 0.9538, "step": 4434 }, { "epoch": 1.123495883470551, "grad_norm": 3.3898377418518066, "learning_rate": 9.18838716519464e-06, "loss": 0.7252, "step": 4435 }, { "epoch": 1.1237492083597214, "grad_norm": 3.7422585487365723, "learning_rate": 9.187929503222879e-06, "loss": 0.7957, "step": 4436 }, { "epoch": 1.1240025332488917, "grad_norm": 3.3443500995635986, "learning_rate": 9.18747172365602e-06, "loss": 0.7858, "step": 4437 }, { "epoch": 1.1242558581380622, "grad_norm": 3.7862918376922607, "learning_rate": 9.187013826506919e-06, "loss": 0.7939, "step": 4438 }, { "epoch": 1.1245091830272325, "grad_norm": 3.5818467140197754, "learning_rate": 9.186555811788431e-06, "loss": 0.8862, "step": 4439 }, { "epoch": 1.1247625079164028, "grad_norm": 3.5962002277374268, "learning_rate": 9.186097679513419e-06, "loss": 0.8394, "step": 4440 }, { "epoch": 1.1250158328055733, "grad_norm": 3.585381031036377, "learning_rate": 9.185639429694744e-06, "loss": 0.6962, "step": 4441 }, { "epoch": 1.1252691576947436, "grad_norm": 3.802293300628662, "learning_rate": 9.185181062345278e-06, "loss": 0.8747, "step": 4442 }, { "epoch": 1.1255224825839139, "grad_norm": 3.661857843399048, "learning_rate": 9.184722577477889e-06, "loss": 0.7727, "step": 4443 }, { "epoch": 1.1257758074730841, "grad_norm": 3.70066237449646, "learning_rate": 9.18426397510545e-06, "loss": 0.7525, "step": 4444 }, { "epoch": 1.1260291323622547, "grad_norm": 3.3943498134613037, "learning_rate": 9.183805255240841e-06, "loss": 0.6573, "step": 4445 }, { "epoch": 1.126282457251425, "grad_norm": 3.671624183654785, "learning_rate": 9.183346417896938e-06, "loss": 0.8727, "step": 4446 }, { "epoch": 1.1265357821405952, "grad_norm": 4.023936748504639, "learning_rate": 9.182887463086633e-06, "loss": 0.8415, "step": 4447 }, { "epoch": 1.1267891070297658, "grad_norm": 3.890685796737671, "learning_rate": 9.182428390822806e-06, "loss": 0.9273, "step": 4448 }, { "epoch": 1.127042431918936, "grad_norm": 3.540053129196167, "learning_rate": 9.18196920111835e-06, "loss": 0.7377, "step": 4449 }, { "epoch": 1.1272957568081063, "grad_norm": 3.9170687198638916, "learning_rate": 9.181509893986158e-06, "loss": 0.7624, "step": 4450 }, { "epoch": 1.1275490816972766, "grad_norm": 3.3918919563293457, "learning_rate": 9.18105046943913e-06, "loss": 0.7004, "step": 4451 }, { "epoch": 1.1278024065864471, "grad_norm": 3.777207136154175, "learning_rate": 9.180590927490163e-06, "loss": 0.8214, "step": 4452 }, { "epoch": 1.1280557314756174, "grad_norm": 3.8879199028015137, "learning_rate": 9.180131268152164e-06, "loss": 0.8458, "step": 4453 }, { "epoch": 1.1283090563647877, "grad_norm": 4.480652332305908, "learning_rate": 9.179671491438035e-06, "loss": 0.8267, "step": 4454 }, { "epoch": 1.1285623812539582, "grad_norm": 3.8782782554626465, "learning_rate": 9.179211597360693e-06, "loss": 0.8176, "step": 4455 }, { "epoch": 1.1288157061431285, "grad_norm": 4.272160530090332, "learning_rate": 9.178751585933046e-06, "loss": 0.8025, "step": 4456 }, { "epoch": 1.1290690310322988, "grad_norm": 3.7379496097564697, "learning_rate": 9.178291457168012e-06, "loss": 0.8301, "step": 4457 }, { "epoch": 1.1293223559214693, "grad_norm": 3.911961317062378, "learning_rate": 9.177831211078513e-06, "loss": 0.6796, "step": 4458 }, { "epoch": 1.1295756808106396, "grad_norm": 3.614490032196045, "learning_rate": 9.177370847677472e-06, "loss": 0.7909, "step": 4459 }, { "epoch": 1.12982900569981, "grad_norm": 4.423027038574219, "learning_rate": 9.176910366977816e-06, "loss": 0.7512, "step": 4460 }, { "epoch": 1.1300823305889804, "grad_norm": 3.891252279281616, "learning_rate": 9.176449768992474e-06, "loss": 0.9182, "step": 4461 }, { "epoch": 1.1303356554781507, "grad_norm": 4.018182754516602, "learning_rate": 9.175989053734379e-06, "loss": 0.8079, "step": 4462 }, { "epoch": 1.130588980367321, "grad_norm": 3.9636101722717285, "learning_rate": 9.17552822121647e-06, "loss": 0.8613, "step": 4463 }, { "epoch": 1.1308423052564915, "grad_norm": 3.582052230834961, "learning_rate": 9.175067271451685e-06, "loss": 0.9291, "step": 4464 }, { "epoch": 1.1310956301456618, "grad_norm": 3.36541748046875, "learning_rate": 9.174606204452967e-06, "loss": 0.7971, "step": 4465 }, { "epoch": 1.1313489550348321, "grad_norm": 3.6572256088256836, "learning_rate": 9.174145020233265e-06, "loss": 0.8778, "step": 4466 }, { "epoch": 1.1316022799240026, "grad_norm": 3.8592522144317627, "learning_rate": 9.173683718805525e-06, "loss": 0.9217, "step": 4467 }, { "epoch": 1.131855604813173, "grad_norm": 3.251784563064575, "learning_rate": 9.173222300182706e-06, "loss": 0.7041, "step": 4468 }, { "epoch": 1.1321089297023432, "grad_norm": 3.740300178527832, "learning_rate": 9.172760764377755e-06, "loss": 0.8344, "step": 4469 }, { "epoch": 1.1323622545915137, "grad_norm": 3.6480960845947266, "learning_rate": 9.172299111403643e-06, "loss": 0.7285, "step": 4470 }, { "epoch": 1.132615579480684, "grad_norm": 3.844107151031494, "learning_rate": 9.171837341273324e-06, "loss": 0.7676, "step": 4471 }, { "epoch": 1.1328689043698543, "grad_norm": 3.5058791637420654, "learning_rate": 9.17137545399977e-06, "loss": 0.8908, "step": 4472 }, { "epoch": 1.1331222292590246, "grad_norm": 4.316401958465576, "learning_rate": 9.170913449595945e-06, "loss": 0.9873, "step": 4473 }, { "epoch": 1.1333755541481951, "grad_norm": 3.805889368057251, "learning_rate": 9.170451328074828e-06, "loss": 0.8489, "step": 4474 }, { "epoch": 1.1336288790373654, "grad_norm": 4.286030292510986, "learning_rate": 9.16998908944939e-06, "loss": 0.9839, "step": 4475 }, { "epoch": 1.1338822039265357, "grad_norm": 3.9902102947235107, "learning_rate": 9.169526733732614e-06, "loss": 0.7551, "step": 4476 }, { "epoch": 1.1341355288157062, "grad_norm": 3.488490581512451, "learning_rate": 9.169064260937483e-06, "loss": 0.8311, "step": 4477 }, { "epoch": 1.1343888537048765, "grad_norm": 4.064438343048096, "learning_rate": 9.16860167107698e-06, "loss": 0.885, "step": 4478 }, { "epoch": 1.1346421785940468, "grad_norm": 3.749305248260498, "learning_rate": 9.168138964164096e-06, "loss": 0.8578, "step": 4479 }, { "epoch": 1.134895503483217, "grad_norm": 3.4141809940338135, "learning_rate": 9.167676140211823e-06, "loss": 0.8509, "step": 4480 }, { "epoch": 1.1351488283723876, "grad_norm": 4.0905303955078125, "learning_rate": 9.167213199233159e-06, "loss": 0.905, "step": 4481 }, { "epoch": 1.135402153261558, "grad_norm": 4.127016544342041, "learning_rate": 9.166750141241099e-06, "loss": 0.8783, "step": 4482 }, { "epoch": 1.1356554781507282, "grad_norm": 3.519977569580078, "learning_rate": 9.166286966248648e-06, "loss": 0.7849, "step": 4483 }, { "epoch": 1.1359088030398987, "grad_norm": 3.529370069503784, "learning_rate": 9.165823674268812e-06, "loss": 0.7462, "step": 4484 }, { "epoch": 1.136162127929069, "grad_norm": 3.660217761993408, "learning_rate": 9.165360265314601e-06, "loss": 0.6998, "step": 4485 }, { "epoch": 1.1364154528182393, "grad_norm": 3.851372718811035, "learning_rate": 9.164896739399026e-06, "loss": 0.7012, "step": 4486 }, { "epoch": 1.1366687777074098, "grad_norm": 3.846855401992798, "learning_rate": 9.164433096535102e-06, "loss": 0.6293, "step": 4487 }, { "epoch": 1.13692210259658, "grad_norm": 3.5173442363739014, "learning_rate": 9.163969336735847e-06, "loss": 0.7445, "step": 4488 }, { "epoch": 1.1371754274857504, "grad_norm": 3.500772714614868, "learning_rate": 9.163505460014286e-06, "loss": 0.8659, "step": 4489 }, { "epoch": 1.137428752374921, "grad_norm": 4.18075704574585, "learning_rate": 9.163041466383443e-06, "loss": 0.7625, "step": 4490 }, { "epoch": 1.1376820772640912, "grad_norm": 3.931058168411255, "learning_rate": 9.162577355856346e-06, "loss": 0.862, "step": 4491 }, { "epoch": 1.1379354021532615, "grad_norm": 3.5401864051818848, "learning_rate": 9.162113128446028e-06, "loss": 0.6734, "step": 4492 }, { "epoch": 1.138188727042432, "grad_norm": 3.6016790866851807, "learning_rate": 9.161648784165525e-06, "loss": 0.7529, "step": 4493 }, { "epoch": 1.1384420519316023, "grad_norm": 4.87125301361084, "learning_rate": 9.161184323027874e-06, "loss": 0.8357, "step": 4494 }, { "epoch": 1.1386953768207726, "grad_norm": 3.4067635536193848, "learning_rate": 9.160719745046117e-06, "loss": 0.7535, "step": 4495 }, { "epoch": 1.1389487017099431, "grad_norm": 3.5701889991760254, "learning_rate": 9.1602550502333e-06, "loss": 0.8978, "step": 4496 }, { "epoch": 1.1392020265991134, "grad_norm": 3.7475717067718506, "learning_rate": 9.15979023860247e-06, "loss": 0.7395, "step": 4497 }, { "epoch": 1.1394553514882837, "grad_norm": 3.598468542098999, "learning_rate": 9.159325310166683e-06, "loss": 0.7488, "step": 4498 }, { "epoch": 1.139708676377454, "grad_norm": 3.6979329586029053, "learning_rate": 9.158860264938987e-06, "loss": 0.7688, "step": 4499 }, { "epoch": 1.1399620012666245, "grad_norm": 4.062190055847168, "learning_rate": 9.158395102932445e-06, "loss": 0.9437, "step": 4500 }, { "epoch": 1.1399620012666245, "eval_loss": 1.1921050548553467, "eval_runtime": 13.9602, "eval_samples_per_second": 28.653, "eval_steps_per_second": 3.582, "step": 4500 }, { "epoch": 1.1402153261557948, "grad_norm": 3.9083962440490723, "learning_rate": 9.157929824160117e-06, "loss": 0.7845, "step": 4501 }, { "epoch": 1.140468651044965, "grad_norm": 4.207871437072754, "learning_rate": 9.15746442863507e-06, "loss": 0.9516, "step": 4502 }, { "epoch": 1.1407219759341356, "grad_norm": 4.152109622955322, "learning_rate": 9.15699891637037e-06, "loss": 0.9171, "step": 4503 }, { "epoch": 1.140975300823306, "grad_norm": 3.7536070346832275, "learning_rate": 9.156533287379088e-06, "loss": 0.7167, "step": 4504 }, { "epoch": 1.1412286257124762, "grad_norm": 4.014231204986572, "learning_rate": 9.1560675416743e-06, "loss": 0.7548, "step": 4505 }, { "epoch": 1.1414819506016467, "grad_norm": 3.6225619316101074, "learning_rate": 9.155601679269082e-06, "loss": 0.7183, "step": 4506 }, { "epoch": 1.141735275490817, "grad_norm": 3.1658689975738525, "learning_rate": 9.155135700176517e-06, "loss": 0.7276, "step": 4507 }, { "epoch": 1.1419886003799873, "grad_norm": 3.6831867694854736, "learning_rate": 9.15466960440969e-06, "loss": 0.7494, "step": 4508 }, { "epoch": 1.1422419252691576, "grad_norm": 3.932875871658325, "learning_rate": 9.154203391981687e-06, "loss": 0.9567, "step": 4509 }, { "epoch": 1.142495250158328, "grad_norm": 3.699507236480713, "learning_rate": 9.1537370629056e-06, "loss": 0.6713, "step": 4510 }, { "epoch": 1.1427485750474984, "grad_norm": 3.869405746459961, "learning_rate": 9.15327061719452e-06, "loss": 0.7468, "step": 4511 }, { "epoch": 1.1430018999366687, "grad_norm": 3.7737667560577393, "learning_rate": 9.152804054861552e-06, "loss": 0.884, "step": 4512 }, { "epoch": 1.1432552248258392, "grad_norm": 4.137895107269287, "learning_rate": 9.152337375919792e-06, "loss": 0.7774, "step": 4513 }, { "epoch": 1.1435085497150095, "grad_norm": 3.4484589099884033, "learning_rate": 9.151870580382343e-06, "loss": 0.7318, "step": 4514 }, { "epoch": 1.1437618746041798, "grad_norm": 3.6685168743133545, "learning_rate": 9.151403668262314e-06, "loss": 0.7481, "step": 4515 }, { "epoch": 1.1440151994933503, "grad_norm": 3.924276351928711, "learning_rate": 9.150936639572816e-06, "loss": 0.8262, "step": 4516 }, { "epoch": 1.1442685243825206, "grad_norm": 3.643343210220337, "learning_rate": 9.150469494326961e-06, "loss": 0.8521, "step": 4517 }, { "epoch": 1.1445218492716909, "grad_norm": 3.7538487911224365, "learning_rate": 9.15000223253787e-06, "loss": 0.856, "step": 4518 }, { "epoch": 1.1447751741608614, "grad_norm": 3.8249545097351074, "learning_rate": 9.14953485421866e-06, "loss": 0.7416, "step": 4519 }, { "epoch": 1.1450284990500317, "grad_norm": 3.8859763145446777, "learning_rate": 9.149067359382457e-06, "loss": 0.8388, "step": 4520 }, { "epoch": 1.145281823939202, "grad_norm": 3.4407546520233154, "learning_rate": 9.148599748042388e-06, "loss": 0.8163, "step": 4521 }, { "epoch": 1.1455351488283725, "grad_norm": 3.633143186569214, "learning_rate": 9.148132020211582e-06, "loss": 0.7831, "step": 4522 }, { "epoch": 1.1457884737175428, "grad_norm": 4.0248188972473145, "learning_rate": 9.147664175903172e-06, "loss": 0.9102, "step": 4523 }, { "epoch": 1.146041798606713, "grad_norm": 3.8170149326324463, "learning_rate": 9.147196215130295e-06, "loss": 0.7716, "step": 4524 }, { "epoch": 1.1462951234958836, "grad_norm": 4.042696475982666, "learning_rate": 9.146728137906093e-06, "loss": 0.8385, "step": 4525 }, { "epoch": 1.1465484483850539, "grad_norm": 3.540112257003784, "learning_rate": 9.146259944243709e-06, "loss": 0.7392, "step": 4526 }, { "epoch": 1.1468017732742242, "grad_norm": 3.671898126602173, "learning_rate": 9.14579163415629e-06, "loss": 0.7941, "step": 4527 }, { "epoch": 1.1470550981633945, "grad_norm": 4.141158103942871, "learning_rate": 9.145323207656983e-06, "loss": 0.8339, "step": 4528 }, { "epoch": 1.147308423052565, "grad_norm": 3.43936824798584, "learning_rate": 9.144854664758943e-06, "loss": 0.6344, "step": 4529 }, { "epoch": 1.1475617479417353, "grad_norm": 3.8091511726379395, "learning_rate": 9.144386005475328e-06, "loss": 0.8165, "step": 4530 }, { "epoch": 1.1478150728309056, "grad_norm": 3.5215532779693604, "learning_rate": 9.143917229819296e-06, "loss": 0.7855, "step": 4531 }, { "epoch": 1.148068397720076, "grad_norm": 3.872860908508301, "learning_rate": 9.143448337804011e-06, "loss": 0.8541, "step": 4532 }, { "epoch": 1.1483217226092464, "grad_norm": 3.8704535961151123, "learning_rate": 9.142979329442636e-06, "loss": 0.8286, "step": 4533 }, { "epoch": 1.1485750474984167, "grad_norm": 3.385845422744751, "learning_rate": 9.142510204748345e-06, "loss": 0.6775, "step": 4534 }, { "epoch": 1.1488283723875872, "grad_norm": 3.9064671993255615, "learning_rate": 9.142040963734308e-06, "loss": 0.7115, "step": 4535 }, { "epoch": 1.1490816972767575, "grad_norm": 3.4068045616149902, "learning_rate": 9.141571606413704e-06, "loss": 0.7802, "step": 4536 }, { "epoch": 1.1493350221659278, "grad_norm": 3.667557716369629, "learning_rate": 9.141102132799708e-06, "loss": 0.704, "step": 4537 }, { "epoch": 1.149588347055098, "grad_norm": 3.244607925415039, "learning_rate": 9.140632542905508e-06, "loss": 0.7116, "step": 4538 }, { "epoch": 1.1498416719442686, "grad_norm": 3.388017416000366, "learning_rate": 9.140162836744284e-06, "loss": 0.6418, "step": 4539 }, { "epoch": 1.1500949968334389, "grad_norm": 3.6708273887634277, "learning_rate": 9.13969301432923e-06, "loss": 0.7388, "step": 4540 }, { "epoch": 1.1503483217226091, "grad_norm": 3.588341236114502, "learning_rate": 9.139223075673534e-06, "loss": 0.7893, "step": 4541 }, { "epoch": 1.1506016466117797, "grad_norm": 3.6004638671875, "learning_rate": 9.138753020790396e-06, "loss": 0.7852, "step": 4542 }, { "epoch": 1.15085497150095, "grad_norm": 4.104678630828857, "learning_rate": 9.138282849693013e-06, "loss": 0.8434, "step": 4543 }, { "epoch": 1.1511082963901202, "grad_norm": 4.082339286804199, "learning_rate": 9.137812562394585e-06, "loss": 0.7289, "step": 4544 }, { "epoch": 1.1513616212792908, "grad_norm": 3.8769257068634033, "learning_rate": 9.13734215890832e-06, "loss": 0.7936, "step": 4545 }, { "epoch": 1.151614946168461, "grad_norm": 4.094728469848633, "learning_rate": 9.136871639247428e-06, "loss": 0.8328, "step": 4546 }, { "epoch": 1.1518682710576313, "grad_norm": 3.725374698638916, "learning_rate": 9.136401003425117e-06, "loss": 0.7151, "step": 4547 }, { "epoch": 1.1521215959468019, "grad_norm": 3.8494014739990234, "learning_rate": 9.135930251454607e-06, "loss": 0.8312, "step": 4548 }, { "epoch": 1.1523749208359721, "grad_norm": 3.7849738597869873, "learning_rate": 9.135459383349113e-06, "loss": 0.8568, "step": 4549 }, { "epoch": 1.1526282457251424, "grad_norm": 3.453423261642456, "learning_rate": 9.134988399121856e-06, "loss": 0.7342, "step": 4550 }, { "epoch": 1.152881570614313, "grad_norm": 3.2565577030181885, "learning_rate": 9.134517298786065e-06, "loss": 0.8274, "step": 4551 }, { "epoch": 1.1531348955034832, "grad_norm": 3.9076638221740723, "learning_rate": 9.134046082354965e-06, "loss": 0.9612, "step": 4552 }, { "epoch": 1.1533882203926535, "grad_norm": 3.7145302295684814, "learning_rate": 9.133574749841789e-06, "loss": 0.8045, "step": 4553 }, { "epoch": 1.153641545281824, "grad_norm": 3.959188222885132, "learning_rate": 9.133103301259772e-06, "loss": 0.7974, "step": 4554 }, { "epoch": 1.1538948701709943, "grad_norm": 3.694349765777588, "learning_rate": 9.132631736622148e-06, "loss": 0.8629, "step": 4555 }, { "epoch": 1.1541481950601646, "grad_norm": 3.4247512817382812, "learning_rate": 9.132160055942165e-06, "loss": 0.8294, "step": 4556 }, { "epoch": 1.154401519949335, "grad_norm": 3.6549019813537598, "learning_rate": 9.131688259233063e-06, "loss": 0.7506, "step": 4557 }, { "epoch": 1.1546548448385054, "grad_norm": 3.674513339996338, "learning_rate": 9.131216346508092e-06, "loss": 0.7861, "step": 4558 }, { "epoch": 1.1549081697276757, "grad_norm": 3.335111379623413, "learning_rate": 9.130744317780503e-06, "loss": 0.6282, "step": 4559 }, { "epoch": 1.155161494616846, "grad_norm": 3.6455440521240234, "learning_rate": 9.130272173063547e-06, "loss": 0.8454, "step": 4560 }, { "epoch": 1.1554148195060165, "grad_norm": 3.5580334663391113, "learning_rate": 9.129799912370485e-06, "loss": 0.8475, "step": 4561 }, { "epoch": 1.1556681443951868, "grad_norm": 3.539486885070801, "learning_rate": 9.129327535714578e-06, "loss": 0.6811, "step": 4562 }, { "epoch": 1.1559214692843571, "grad_norm": 3.76601243019104, "learning_rate": 9.128855043109088e-06, "loss": 0.8203, "step": 4563 }, { "epoch": 1.1561747941735276, "grad_norm": 3.333263397216797, "learning_rate": 9.128382434567285e-06, "loss": 0.7483, "step": 4564 }, { "epoch": 1.156428119062698, "grad_norm": 3.8397083282470703, "learning_rate": 9.127909710102435e-06, "loss": 0.8516, "step": 4565 }, { "epoch": 1.1566814439518682, "grad_norm": 3.8690736293792725, "learning_rate": 9.12743686972782e-06, "loss": 0.7916, "step": 4566 }, { "epoch": 1.1569347688410385, "grad_norm": 3.472472906112671, "learning_rate": 9.126963913456708e-06, "loss": 0.7204, "step": 4567 }, { "epoch": 1.157188093730209, "grad_norm": 3.4159295558929443, "learning_rate": 9.126490841302384e-06, "loss": 0.7895, "step": 4568 }, { "epoch": 1.1574414186193793, "grad_norm": 3.65435528755188, "learning_rate": 9.126017653278132e-06, "loss": 0.855, "step": 4569 }, { "epoch": 1.1576947435085496, "grad_norm": 3.797349214553833, "learning_rate": 9.125544349397238e-06, "loss": 0.791, "step": 4570 }, { "epoch": 1.1579480683977201, "grad_norm": 3.4813103675842285, "learning_rate": 9.125070929672993e-06, "loss": 0.8287, "step": 4571 }, { "epoch": 1.1582013932868904, "grad_norm": 3.6932687759399414, "learning_rate": 9.124597394118688e-06, "loss": 0.7619, "step": 4572 }, { "epoch": 1.1584547181760607, "grad_norm": 3.941201686859131, "learning_rate": 9.124123742747622e-06, "loss": 0.8806, "step": 4573 }, { "epoch": 1.1587080430652312, "grad_norm": 3.5929315090179443, "learning_rate": 9.123649975573095e-06, "loss": 0.7398, "step": 4574 }, { "epoch": 1.1589613679544015, "grad_norm": 3.7371160984039307, "learning_rate": 9.123176092608408e-06, "loss": 0.7426, "step": 4575 }, { "epoch": 1.1592146928435718, "grad_norm": 3.800302267074585, "learning_rate": 9.12270209386687e-06, "loss": 0.866, "step": 4576 }, { "epoch": 1.1594680177327423, "grad_norm": 3.735410213470459, "learning_rate": 9.122227979361789e-06, "loss": 0.9066, "step": 4577 }, { "epoch": 1.1597213426219126, "grad_norm": 3.0655813217163086, "learning_rate": 9.121753749106478e-06, "loss": 0.6971, "step": 4578 }, { "epoch": 1.159974667511083, "grad_norm": 4.177846431732178, "learning_rate": 9.121279403114253e-06, "loss": 0.8673, "step": 4579 }, { "epoch": 1.1602279924002534, "grad_norm": 4.221340656280518, "learning_rate": 9.120804941398435e-06, "loss": 0.7972, "step": 4580 }, { "epoch": 1.1604813172894237, "grad_norm": 3.9534647464752197, "learning_rate": 9.120330363972345e-06, "loss": 0.7697, "step": 4581 }, { "epoch": 1.160734642178594, "grad_norm": 3.3120903968811035, "learning_rate": 9.11985567084931e-06, "loss": 0.7504, "step": 4582 }, { "epoch": 1.1609879670677645, "grad_norm": 3.8544650077819824, "learning_rate": 9.119380862042659e-06, "loss": 0.794, "step": 4583 }, { "epoch": 1.1612412919569348, "grad_norm": 4.032211780548096, "learning_rate": 9.118905937565723e-06, "loss": 0.8554, "step": 4584 }, { "epoch": 1.161494616846105, "grad_norm": 3.4280614852905273, "learning_rate": 9.118430897431839e-06, "loss": 0.7677, "step": 4585 }, { "epoch": 1.1617479417352754, "grad_norm": 3.9021902084350586, "learning_rate": 9.117955741654346e-06, "loss": 0.758, "step": 4586 }, { "epoch": 1.162001266624446, "grad_norm": 3.649503469467163, "learning_rate": 9.117480470246584e-06, "loss": 0.7399, "step": 4587 }, { "epoch": 1.1622545915136162, "grad_norm": 3.409973382949829, "learning_rate": 9.117005083221903e-06, "loss": 0.7951, "step": 4588 }, { "epoch": 1.1625079164027865, "grad_norm": 3.909235715866089, "learning_rate": 9.116529580593647e-06, "loss": 0.9294, "step": 4589 }, { "epoch": 1.162761241291957, "grad_norm": 4.342448711395264, "learning_rate": 9.11605396237517e-06, "loss": 0.8511, "step": 4590 }, { "epoch": 1.1630145661811273, "grad_norm": 3.296447515487671, "learning_rate": 9.115578228579826e-06, "loss": 0.6998, "step": 4591 }, { "epoch": 1.1632678910702976, "grad_norm": 3.938145160675049, "learning_rate": 9.115102379220978e-06, "loss": 0.9381, "step": 4592 }, { "epoch": 1.1635212159594681, "grad_norm": 3.656583786010742, "learning_rate": 9.11462641431198e-06, "loss": 0.907, "step": 4593 }, { "epoch": 1.1637745408486384, "grad_norm": 4.242737293243408, "learning_rate": 9.114150333866201e-06, "loss": 0.7964, "step": 4594 }, { "epoch": 1.1640278657378087, "grad_norm": 3.5653722286224365, "learning_rate": 9.11367413789701e-06, "loss": 0.795, "step": 4595 }, { "epoch": 1.164281190626979, "grad_norm": 3.39385724067688, "learning_rate": 9.113197826417776e-06, "loss": 0.7885, "step": 4596 }, { "epoch": 1.1645345155161495, "grad_norm": 3.6165449619293213, "learning_rate": 9.112721399441877e-06, "loss": 0.6814, "step": 4597 }, { "epoch": 1.1647878404053198, "grad_norm": 4.119375705718994, "learning_rate": 9.112244856982687e-06, "loss": 0.8861, "step": 4598 }, { "epoch": 1.16504116529449, "grad_norm": 3.5786070823669434, "learning_rate": 9.111768199053588e-06, "loss": 0.7515, "step": 4599 }, { "epoch": 1.1652944901836606, "grad_norm": 3.670085906982422, "learning_rate": 9.111291425667967e-06, "loss": 0.844, "step": 4600 }, { "epoch": 1.165547815072831, "grad_norm": 3.777029514312744, "learning_rate": 9.110814536839208e-06, "loss": 1.0063, "step": 4601 }, { "epoch": 1.1658011399620012, "grad_norm": 3.5752599239349365, "learning_rate": 9.110337532580705e-06, "loss": 0.9175, "step": 4602 }, { "epoch": 1.1660544648511717, "grad_norm": 4.313035488128662, "learning_rate": 9.109860412905849e-06, "loss": 0.8021, "step": 4603 }, { "epoch": 1.166307789740342, "grad_norm": 3.4963371753692627, "learning_rate": 9.10938317782804e-06, "loss": 0.7787, "step": 4604 }, { "epoch": 1.1665611146295123, "grad_norm": 3.543499708175659, "learning_rate": 9.108905827360677e-06, "loss": 0.8232, "step": 4605 }, { "epoch": 1.1668144395186828, "grad_norm": 4.013868808746338, "learning_rate": 9.108428361517163e-06, "loss": 0.8854, "step": 4606 }, { "epoch": 1.167067764407853, "grad_norm": 3.909543037414551, "learning_rate": 9.107950780310908e-06, "loss": 0.8123, "step": 4607 }, { "epoch": 1.1673210892970234, "grad_norm": 3.912917137145996, "learning_rate": 9.107473083755317e-06, "loss": 0.9191, "step": 4608 }, { "epoch": 1.167574414186194, "grad_norm": 3.570385694503784, "learning_rate": 9.10699527186381e-06, "loss": 0.7581, "step": 4609 }, { "epoch": 1.1678277390753642, "grad_norm": 3.5049171447753906, "learning_rate": 9.106517344649802e-06, "loss": 0.8042, "step": 4610 }, { "epoch": 1.1680810639645345, "grad_norm": 3.562222957611084, "learning_rate": 9.106039302126709e-06, "loss": 0.8154, "step": 4611 }, { "epoch": 1.168334388853705, "grad_norm": 3.671994686126709, "learning_rate": 9.105561144307958e-06, "loss": 0.809, "step": 4612 }, { "epoch": 1.1685877137428753, "grad_norm": 3.3304800987243652, "learning_rate": 9.105082871206972e-06, "loss": 0.77, "step": 4613 }, { "epoch": 1.1688410386320456, "grad_norm": 3.5379252433776855, "learning_rate": 9.104604482837184e-06, "loss": 0.7443, "step": 4614 }, { "epoch": 1.1690943635212159, "grad_norm": 3.332242965698242, "learning_rate": 9.104125979212027e-06, "loss": 0.8041, "step": 4615 }, { "epoch": 1.1693476884103864, "grad_norm": 3.415736675262451, "learning_rate": 9.103647360344937e-06, "loss": 0.6914, "step": 4616 }, { "epoch": 1.1696010132995567, "grad_norm": 3.767634153366089, "learning_rate": 9.10316862624935e-06, "loss": 0.809, "step": 4617 }, { "epoch": 1.169854338188727, "grad_norm": 3.434682846069336, "learning_rate": 9.102689776938711e-06, "loss": 0.698, "step": 4618 }, { "epoch": 1.1701076630778975, "grad_norm": 4.115676403045654, "learning_rate": 9.102210812426467e-06, "loss": 0.834, "step": 4619 }, { "epoch": 1.1703609879670678, "grad_norm": 3.654546022415161, "learning_rate": 9.101731732726066e-06, "loss": 0.7559, "step": 4620 }, { "epoch": 1.170614312856238, "grad_norm": 3.954991340637207, "learning_rate": 9.10125253785096e-06, "loss": 0.8116, "step": 4621 }, { "epoch": 1.1708676377454084, "grad_norm": 3.3564887046813965, "learning_rate": 9.100773227814606e-06, "loss": 0.7457, "step": 4622 }, { "epoch": 1.1711209626345789, "grad_norm": 4.064774036407471, "learning_rate": 9.10029380263046e-06, "loss": 0.8447, "step": 4623 }, { "epoch": 1.1713742875237492, "grad_norm": 3.9697160720825195, "learning_rate": 9.099814262311986e-06, "loss": 0.856, "step": 4624 }, { "epoch": 1.1716276124129195, "grad_norm": 3.4555749893188477, "learning_rate": 9.09933460687265e-06, "loss": 0.7495, "step": 4625 }, { "epoch": 1.17188093730209, "grad_norm": 4.025459289550781, "learning_rate": 9.098854836325918e-06, "loss": 0.7282, "step": 4626 }, { "epoch": 1.1721342621912603, "grad_norm": 3.783754587173462, "learning_rate": 9.098374950685265e-06, "loss": 0.8051, "step": 4627 }, { "epoch": 1.1723875870804306, "grad_norm": 3.386687755584717, "learning_rate": 9.097894949964164e-06, "loss": 0.7492, "step": 4628 }, { "epoch": 1.172640911969601, "grad_norm": 3.3886213302612305, "learning_rate": 9.097414834176092e-06, "loss": 0.8315, "step": 4629 }, { "epoch": 1.1728942368587714, "grad_norm": 3.9223880767822266, "learning_rate": 9.096934603334533e-06, "loss": 0.7318, "step": 4630 }, { "epoch": 1.1731475617479417, "grad_norm": 3.5498759746551514, "learning_rate": 9.096454257452968e-06, "loss": 0.7495, "step": 4631 }, { "epoch": 1.1734008866371122, "grad_norm": 3.227283239364624, "learning_rate": 9.095973796544891e-06, "loss": 0.7463, "step": 4632 }, { "epoch": 1.1736542115262825, "grad_norm": 3.749796152114868, "learning_rate": 9.095493220623787e-06, "loss": 0.7447, "step": 4633 }, { "epoch": 1.1739075364154528, "grad_norm": 3.7740070819854736, "learning_rate": 9.095012529703156e-06, "loss": 0.8366, "step": 4634 }, { "epoch": 1.1741608613046233, "grad_norm": 3.5309395790100098, "learning_rate": 9.09453172379649e-06, "loss": 0.7738, "step": 4635 }, { "epoch": 1.1744141861937936, "grad_norm": 3.762718677520752, "learning_rate": 9.094050802917291e-06, "loss": 0.8424, "step": 4636 }, { "epoch": 1.1746675110829639, "grad_norm": 3.685856580734253, "learning_rate": 9.093569767079065e-06, "loss": 0.7974, "step": 4637 }, { "epoch": 1.1749208359721344, "grad_norm": 3.5668835639953613, "learning_rate": 9.093088616295321e-06, "loss": 0.7287, "step": 4638 }, { "epoch": 1.1751741608613047, "grad_norm": 3.8816702365875244, "learning_rate": 9.092607350579563e-06, "loss": 0.7434, "step": 4639 }, { "epoch": 1.175427485750475, "grad_norm": 3.4125139713287354, "learning_rate": 9.092125969945311e-06, "loss": 0.7396, "step": 4640 }, { "epoch": 1.1756808106396455, "grad_norm": 3.3155534267425537, "learning_rate": 9.09164447440608e-06, "loss": 0.6941, "step": 4641 }, { "epoch": 1.1759341355288158, "grad_norm": 3.691645383834839, "learning_rate": 9.09116286397539e-06, "loss": 0.7019, "step": 4642 }, { "epoch": 1.176187460417986, "grad_norm": 3.39418363571167, "learning_rate": 9.090681138666763e-06, "loss": 0.6927, "step": 4643 }, { "epoch": 1.1764407853071563, "grad_norm": 3.94716739654541, "learning_rate": 9.090199298493727e-06, "loss": 0.7232, "step": 4644 }, { "epoch": 1.1766941101963269, "grad_norm": 4.030582904815674, "learning_rate": 9.089717343469812e-06, "loss": 0.9173, "step": 4645 }, { "epoch": 1.1769474350854972, "grad_norm": 3.814612865447998, "learning_rate": 9.089235273608552e-06, "loss": 0.7907, "step": 4646 }, { "epoch": 1.1772007599746674, "grad_norm": 3.907712697982788, "learning_rate": 9.08875308892348e-06, "loss": 0.7244, "step": 4647 }, { "epoch": 1.177454084863838, "grad_norm": 3.4128739833831787, "learning_rate": 9.08827078942814e-06, "loss": 0.7244, "step": 4648 }, { "epoch": 1.1777074097530082, "grad_norm": 3.8643686771392822, "learning_rate": 9.08778837513607e-06, "loss": 0.7758, "step": 4649 }, { "epoch": 1.1779607346421785, "grad_norm": 3.5134050846099854, "learning_rate": 9.087305846060818e-06, "loss": 0.7238, "step": 4650 }, { "epoch": 1.1782140595313488, "grad_norm": 3.5021824836730957, "learning_rate": 9.086823202215935e-06, "loss": 0.7189, "step": 4651 }, { "epoch": 1.1784673844205193, "grad_norm": 3.862809181213379, "learning_rate": 9.086340443614972e-06, "loss": 0.7798, "step": 4652 }, { "epoch": 1.1787207093096896, "grad_norm": 3.533787727355957, "learning_rate": 9.085857570271484e-06, "loss": 0.6933, "step": 4653 }, { "epoch": 1.17897403419886, "grad_norm": 3.475367307662964, "learning_rate": 9.08537458219903e-06, "loss": 0.7725, "step": 4654 }, { "epoch": 1.1792273590880304, "grad_norm": 3.678919792175293, "learning_rate": 9.084891479411172e-06, "loss": 0.8077, "step": 4655 }, { "epoch": 1.1794806839772007, "grad_norm": 3.676244020462036, "learning_rate": 9.084408261921477e-06, "loss": 0.6353, "step": 4656 }, { "epoch": 1.179734008866371, "grad_norm": 3.4303646087646484, "learning_rate": 9.083924929743512e-06, "loss": 0.8347, "step": 4657 }, { "epoch": 1.1799873337555415, "grad_norm": 4.191209316253662, "learning_rate": 9.08344148289085e-06, "loss": 0.8738, "step": 4658 }, { "epoch": 1.1802406586447118, "grad_norm": 4.154456615447998, "learning_rate": 9.082957921377063e-06, "loss": 0.8625, "step": 4659 }, { "epoch": 1.1804939835338821, "grad_norm": 3.559074878692627, "learning_rate": 9.08247424521573e-06, "loss": 0.7643, "step": 4660 }, { "epoch": 1.1807473084230526, "grad_norm": 3.6135287284851074, "learning_rate": 9.081990454420437e-06, "loss": 0.8673, "step": 4661 }, { "epoch": 1.181000633312223, "grad_norm": 3.6605186462402344, "learning_rate": 9.081506549004763e-06, "loss": 0.6727, "step": 4662 }, { "epoch": 1.1812539582013932, "grad_norm": 4.238001346588135, "learning_rate": 9.081022528982298e-06, "loss": 0.8279, "step": 4663 }, { "epoch": 1.1815072830905637, "grad_norm": 3.378873586654663, "learning_rate": 9.080538394366636e-06, "loss": 0.7547, "step": 4664 }, { "epoch": 1.181760607979734, "grad_norm": 3.2367618083953857, "learning_rate": 9.080054145171365e-06, "loss": 0.7417, "step": 4665 }, { "epoch": 1.1820139328689043, "grad_norm": 3.37278413772583, "learning_rate": 9.079569781410087e-06, "loss": 0.7176, "step": 4666 }, { "epoch": 1.1822672577580748, "grad_norm": 3.4264819622039795, "learning_rate": 9.079085303096401e-06, "loss": 0.7502, "step": 4667 }, { "epoch": 1.1825205826472451, "grad_norm": 3.859360933303833, "learning_rate": 9.078600710243912e-06, "loss": 0.7934, "step": 4668 }, { "epoch": 1.1827739075364154, "grad_norm": 3.580442428588867, "learning_rate": 9.078116002866226e-06, "loss": 0.8133, "step": 4669 }, { "epoch": 1.1830272324255857, "grad_norm": 3.8393847942352295, "learning_rate": 9.077631180976955e-06, "loss": 0.758, "step": 4670 }, { "epoch": 1.1832805573147562, "grad_norm": 3.4573380947113037, "learning_rate": 9.077146244589712e-06, "loss": 0.6716, "step": 4671 }, { "epoch": 1.1835338822039265, "grad_norm": 3.5827207565307617, "learning_rate": 9.076661193718111e-06, "loss": 0.8539, "step": 4672 }, { "epoch": 1.1837872070930968, "grad_norm": 3.2626802921295166, "learning_rate": 9.076176028375775e-06, "loss": 0.6498, "step": 4673 }, { "epoch": 1.1840405319822673, "grad_norm": 3.4087109565734863, "learning_rate": 9.075690748576327e-06, "loss": 0.6383, "step": 4674 }, { "epoch": 1.1842938568714376, "grad_norm": 3.3773810863494873, "learning_rate": 9.075205354333394e-06, "loss": 0.6406, "step": 4675 }, { "epoch": 1.184547181760608, "grad_norm": 3.88681960105896, "learning_rate": 9.074719845660605e-06, "loss": 0.79, "step": 4676 }, { "epoch": 1.1848005066497784, "grad_norm": 3.5406460762023926, "learning_rate": 9.07423422257159e-06, "loss": 0.7606, "step": 4677 }, { "epoch": 1.1850538315389487, "grad_norm": 3.60103178024292, "learning_rate": 9.07374848507999e-06, "loss": 0.8023, "step": 4678 }, { "epoch": 1.185307156428119, "grad_norm": 3.9661853313446045, "learning_rate": 9.07326263319944e-06, "loss": 0.8699, "step": 4679 }, { "epoch": 1.1855604813172893, "grad_norm": 4.237826347351074, "learning_rate": 9.072776666943583e-06, "loss": 0.7779, "step": 4680 }, { "epoch": 1.1858138062064598, "grad_norm": 3.834801435470581, "learning_rate": 9.07229058632607e-06, "loss": 0.8709, "step": 4681 }, { "epoch": 1.1860671310956301, "grad_norm": 3.58707332611084, "learning_rate": 9.071804391360544e-06, "loss": 0.8227, "step": 4682 }, { "epoch": 1.1863204559848004, "grad_norm": 3.898444414138794, "learning_rate": 9.071318082060659e-06, "loss": 0.8405, "step": 4683 }, { "epoch": 1.186573780873971, "grad_norm": 3.8865280151367188, "learning_rate": 9.070831658440068e-06, "loss": 0.8815, "step": 4684 }, { "epoch": 1.1868271057631412, "grad_norm": 3.698413848876953, "learning_rate": 9.070345120512436e-06, "loss": 0.8733, "step": 4685 }, { "epoch": 1.1870804306523115, "grad_norm": 4.0502800941467285, "learning_rate": 9.069858468291417e-06, "loss": 0.9704, "step": 4686 }, { "epoch": 1.187333755541482, "grad_norm": 3.8300623893737793, "learning_rate": 9.069371701790684e-06, "loss": 0.8485, "step": 4687 }, { "epoch": 1.1875870804306523, "grad_norm": 3.7541427612304688, "learning_rate": 9.068884821023898e-06, "loss": 0.8075, "step": 4688 }, { "epoch": 1.1878404053198226, "grad_norm": 4.094725131988525, "learning_rate": 9.068397826004734e-06, "loss": 0.8, "step": 4689 }, { "epoch": 1.1880937302089931, "grad_norm": 3.4738681316375732, "learning_rate": 9.067910716746863e-06, "loss": 0.8071, "step": 4690 }, { "epoch": 1.1883470550981634, "grad_norm": 3.5837690830230713, "learning_rate": 9.067423493263969e-06, "loss": 0.7731, "step": 4691 }, { "epoch": 1.1886003799873337, "grad_norm": 4.020441055297852, "learning_rate": 9.066936155569728e-06, "loss": 0.7003, "step": 4692 }, { "epoch": 1.1888537048765042, "grad_norm": 3.8502564430236816, "learning_rate": 9.066448703677828e-06, "loss": 0.7282, "step": 4693 }, { "epoch": 1.1891070297656745, "grad_norm": 3.1409623622894287, "learning_rate": 9.065961137601953e-06, "loss": 0.6211, "step": 4694 }, { "epoch": 1.1893603546548448, "grad_norm": 3.890843152999878, "learning_rate": 9.065473457355793e-06, "loss": 0.8454, "step": 4695 }, { "epoch": 1.1896136795440153, "grad_norm": 3.9443559646606445, "learning_rate": 9.064985662953046e-06, "loss": 0.7721, "step": 4696 }, { "epoch": 1.1898670044331856, "grad_norm": 3.839155673980713, "learning_rate": 9.064497754407407e-06, "loss": 0.8109, "step": 4697 }, { "epoch": 1.190120329322356, "grad_norm": 3.730426549911499, "learning_rate": 9.064009731732574e-06, "loss": 0.7671, "step": 4698 }, { "epoch": 1.1903736542115262, "grad_norm": 3.659146308898926, "learning_rate": 9.063521594942254e-06, "loss": 0.7655, "step": 4699 }, { "epoch": 1.1906269791006967, "grad_norm": 3.4877068996429443, "learning_rate": 9.063033344050151e-06, "loss": 0.7731, "step": 4700 }, { "epoch": 1.190880303989867, "grad_norm": 3.3288068771362305, "learning_rate": 9.062544979069977e-06, "loss": 0.65, "step": 4701 }, { "epoch": 1.1911336288790373, "grad_norm": 3.840134382247925, "learning_rate": 9.062056500015443e-06, "loss": 0.8196, "step": 4702 }, { "epoch": 1.1913869537682078, "grad_norm": 3.9276678562164307, "learning_rate": 9.061567906900268e-06, "loss": 0.721, "step": 4703 }, { "epoch": 1.191640278657378, "grad_norm": 4.0413360595703125, "learning_rate": 9.061079199738168e-06, "loss": 0.807, "step": 4704 }, { "epoch": 1.1918936035465484, "grad_norm": 3.896489381790161, "learning_rate": 9.060590378542868e-06, "loss": 0.6801, "step": 4705 }, { "epoch": 1.192146928435719, "grad_norm": 3.962916612625122, "learning_rate": 9.060101443328092e-06, "loss": 0.8777, "step": 4706 }, { "epoch": 1.1924002533248892, "grad_norm": 3.965481758117676, "learning_rate": 9.059612394107573e-06, "loss": 0.7118, "step": 4707 }, { "epoch": 1.1926535782140595, "grad_norm": 3.788297653198242, "learning_rate": 9.05912323089504e-06, "loss": 0.7165, "step": 4708 }, { "epoch": 1.1929069031032298, "grad_norm": 3.574528217315674, "learning_rate": 9.058633953704228e-06, "loss": 0.8802, "step": 4709 }, { "epoch": 1.1931602279924003, "grad_norm": 3.7018349170684814, "learning_rate": 9.058144562548876e-06, "loss": 0.8945, "step": 4710 }, { "epoch": 1.1934135528815706, "grad_norm": 3.8939034938812256, "learning_rate": 9.057655057442728e-06, "loss": 0.7497, "step": 4711 }, { "epoch": 1.1936668777707409, "grad_norm": 3.595820188522339, "learning_rate": 9.057165438399528e-06, "loss": 0.8388, "step": 4712 }, { "epoch": 1.1939202026599114, "grad_norm": 3.4211912155151367, "learning_rate": 9.056675705433024e-06, "loss": 0.6714, "step": 4713 }, { "epoch": 1.1941735275490817, "grad_norm": 3.458573341369629, "learning_rate": 9.056185858556966e-06, "loss": 0.6966, "step": 4714 }, { "epoch": 1.194426852438252, "grad_norm": 3.368644952774048, "learning_rate": 9.055695897785113e-06, "loss": 0.8037, "step": 4715 }, { "epoch": 1.1946801773274225, "grad_norm": 3.442203998565674, "learning_rate": 9.055205823131217e-06, "loss": 0.7374, "step": 4716 }, { "epoch": 1.1949335022165928, "grad_norm": 3.0957603454589844, "learning_rate": 9.054715634609043e-06, "loss": 0.7183, "step": 4717 }, { "epoch": 1.195186827105763, "grad_norm": 4.103257179260254, "learning_rate": 9.054225332232355e-06, "loss": 0.8436, "step": 4718 }, { "epoch": 1.1954401519949336, "grad_norm": 3.9489400386810303, "learning_rate": 9.053734916014918e-06, "loss": 0.8258, "step": 4719 }, { "epoch": 1.1956934768841039, "grad_norm": 4.136053562164307, "learning_rate": 9.053244385970507e-06, "loss": 0.719, "step": 4720 }, { "epoch": 1.1959468017732742, "grad_norm": 3.3901286125183105, "learning_rate": 9.052753742112893e-06, "loss": 0.7553, "step": 4721 }, { "epoch": 1.1962001266624447, "grad_norm": 3.6780190467834473, "learning_rate": 9.052262984455851e-06, "loss": 0.8364, "step": 4722 }, { "epoch": 1.196453451551615, "grad_norm": 3.696352481842041, "learning_rate": 9.051772113013166e-06, "loss": 0.8566, "step": 4723 }, { "epoch": 1.1967067764407853, "grad_norm": 3.751959800720215, "learning_rate": 9.051281127798617e-06, "loss": 0.8296, "step": 4724 }, { "epoch": 1.1969601013299558, "grad_norm": 4.118260383605957, "learning_rate": 9.050790028825994e-06, "loss": 0.849, "step": 4725 }, { "epoch": 1.197213426219126, "grad_norm": 3.5884218215942383, "learning_rate": 9.050298816109085e-06, "loss": 0.8572, "step": 4726 }, { "epoch": 1.1974667511082964, "grad_norm": 3.55743145942688, "learning_rate": 9.049807489661683e-06, "loss": 0.823, "step": 4727 }, { "epoch": 1.1977200759974667, "grad_norm": 4.042413234710693, "learning_rate": 9.049316049497587e-06, "loss": 0.9174, "step": 4728 }, { "epoch": 1.1979734008866372, "grad_norm": 3.994415044784546, "learning_rate": 9.048824495630593e-06, "loss": 0.9475, "step": 4729 }, { "epoch": 1.1982267257758075, "grad_norm": 3.970856189727783, "learning_rate": 9.048332828074504e-06, "loss": 0.8147, "step": 4730 }, { "epoch": 1.1984800506649778, "grad_norm": 3.8265199661254883, "learning_rate": 9.047841046843126e-06, "loss": 0.7644, "step": 4731 }, { "epoch": 1.1987333755541483, "grad_norm": 3.4341936111450195, "learning_rate": 9.047349151950272e-06, "loss": 0.7143, "step": 4732 }, { "epoch": 1.1989867004433186, "grad_norm": 3.607229232788086, "learning_rate": 9.046857143409746e-06, "loss": 0.65, "step": 4733 }, { "epoch": 1.1992400253324889, "grad_norm": 3.93404221534729, "learning_rate": 9.04636502123537e-06, "loss": 0.9532, "step": 4734 }, { "epoch": 1.1994933502216594, "grad_norm": 4.027116298675537, "learning_rate": 9.045872785440961e-06, "loss": 0.8919, "step": 4735 }, { "epoch": 1.1997466751108297, "grad_norm": 3.4875340461730957, "learning_rate": 9.04538043604034e-06, "loss": 0.7551, "step": 4736 }, { "epoch": 1.2, "grad_norm": 4.019157886505127, "learning_rate": 9.044887973047335e-06, "loss": 0.858, "step": 4737 }, { "epoch": 1.2002533248891702, "grad_norm": 3.2707157135009766, "learning_rate": 9.044395396475767e-06, "loss": 0.7807, "step": 4738 }, { "epoch": 1.2005066497783408, "grad_norm": 4.049706935882568, "learning_rate": 9.043902706339474e-06, "loss": 0.9211, "step": 4739 }, { "epoch": 1.200759974667511, "grad_norm": 3.1721930503845215, "learning_rate": 9.043409902652288e-06, "loss": 0.7179, "step": 4740 }, { "epoch": 1.2010132995566813, "grad_norm": 4.242766857147217, "learning_rate": 9.042916985428048e-06, "loss": 0.7944, "step": 4741 }, { "epoch": 1.2012666244458519, "grad_norm": 3.860823154449463, "learning_rate": 9.042423954680592e-06, "loss": 0.8658, "step": 4742 }, { "epoch": 1.2015199493350222, "grad_norm": 4.302134990692139, "learning_rate": 9.041930810423768e-06, "loss": 0.75, "step": 4743 }, { "epoch": 1.2017732742241924, "grad_norm": 3.4918928146362305, "learning_rate": 9.041437552671421e-06, "loss": 0.7826, "step": 4744 }, { "epoch": 1.202026599113363, "grad_norm": 3.3742682933807373, "learning_rate": 9.0409441814374e-06, "loss": 0.8313, "step": 4745 }, { "epoch": 1.2022799240025333, "grad_norm": 3.8758504390716553, "learning_rate": 9.04045069673556e-06, "loss": 0.8295, "step": 4746 }, { "epoch": 1.2025332488917035, "grad_norm": 3.509568214416504, "learning_rate": 9.039957098579762e-06, "loss": 0.8013, "step": 4747 }, { "epoch": 1.202786573780874, "grad_norm": 3.639634609222412, "learning_rate": 9.039463386983857e-06, "loss": 0.8307, "step": 4748 }, { "epoch": 1.2030398986700443, "grad_norm": 3.486271619796753, "learning_rate": 9.038969561961716e-06, "loss": 0.6925, "step": 4749 }, { "epoch": 1.2032932235592146, "grad_norm": 3.6544554233551025, "learning_rate": 9.038475623527204e-06, "loss": 0.8655, "step": 4750 }, { "epoch": 1.2035465484483852, "grad_norm": 3.636603832244873, "learning_rate": 9.037981571694187e-06, "loss": 0.7629, "step": 4751 }, { "epoch": 1.2037998733375554, "grad_norm": 3.749607801437378, "learning_rate": 9.037487406476541e-06, "loss": 0.9063, "step": 4752 }, { "epoch": 1.2040531982267257, "grad_norm": 4.07143497467041, "learning_rate": 9.03699312788814e-06, "loss": 0.8602, "step": 4753 }, { "epoch": 1.2043065231158963, "grad_norm": 3.489014148712158, "learning_rate": 9.036498735942865e-06, "loss": 0.8314, "step": 4754 }, { "epoch": 1.2045598480050665, "grad_norm": 3.5965168476104736, "learning_rate": 9.036004230654595e-06, "loss": 0.8041, "step": 4755 }, { "epoch": 1.2048131728942368, "grad_norm": 3.958611011505127, "learning_rate": 9.03550961203722e-06, "loss": 0.9175, "step": 4756 }, { "epoch": 1.2050664977834071, "grad_norm": 4.254213333129883, "learning_rate": 9.035014880104626e-06, "loss": 0.9666, "step": 4757 }, { "epoch": 1.2053198226725776, "grad_norm": 3.5343542098999023, "learning_rate": 9.034520034870706e-06, "loss": 0.7728, "step": 4758 }, { "epoch": 1.205573147561748, "grad_norm": 4.204711437225342, "learning_rate": 9.034025076349353e-06, "loss": 0.8475, "step": 4759 }, { "epoch": 1.2058264724509182, "grad_norm": 3.3257641792297363, "learning_rate": 9.033530004554467e-06, "loss": 0.7982, "step": 4760 }, { "epoch": 1.2060797973400887, "grad_norm": 3.382373332977295, "learning_rate": 9.033034819499951e-06, "loss": 0.6749, "step": 4761 }, { "epoch": 1.206333122229259, "grad_norm": 3.4364678859710693, "learning_rate": 9.032539521199705e-06, "loss": 0.8166, "step": 4762 }, { "epoch": 1.2065864471184293, "grad_norm": 3.706695556640625, "learning_rate": 9.032044109667639e-06, "loss": 0.8283, "step": 4763 }, { "epoch": 1.2068397720075998, "grad_norm": 3.9466264247894287, "learning_rate": 9.031548584917666e-06, "loss": 0.7703, "step": 4764 }, { "epoch": 1.2070930968967701, "grad_norm": 3.926689624786377, "learning_rate": 9.031052946963697e-06, "loss": 0.7708, "step": 4765 }, { "epoch": 1.2073464217859404, "grad_norm": 4.062969207763672, "learning_rate": 9.030557195819649e-06, "loss": 0.8716, "step": 4766 }, { "epoch": 1.2075997466751107, "grad_norm": 4.117637634277344, "learning_rate": 9.030061331499446e-06, "loss": 0.7546, "step": 4767 }, { "epoch": 1.2078530715642812, "grad_norm": 3.3397328853607178, "learning_rate": 9.029565354017009e-06, "loss": 0.753, "step": 4768 }, { "epoch": 1.2081063964534515, "grad_norm": 4.444040298461914, "learning_rate": 9.029069263386267e-06, "loss": 0.7658, "step": 4769 }, { "epoch": 1.2083597213426218, "grad_norm": 3.76849627494812, "learning_rate": 9.028573059621146e-06, "loss": 0.7475, "step": 4770 }, { "epoch": 1.2086130462317923, "grad_norm": 3.8576104640960693, "learning_rate": 9.028076742735583e-06, "loss": 0.7425, "step": 4771 }, { "epoch": 1.2088663711209626, "grad_norm": 3.9090235233306885, "learning_rate": 9.027580312743512e-06, "loss": 0.8225, "step": 4772 }, { "epoch": 1.209119696010133, "grad_norm": 3.612471103668213, "learning_rate": 9.027083769658875e-06, "loss": 0.7807, "step": 4773 }, { "epoch": 1.2093730208993034, "grad_norm": 3.902747631072998, "learning_rate": 9.026587113495612e-06, "loss": 0.7209, "step": 4774 }, { "epoch": 1.2096263457884737, "grad_norm": 3.997373342514038, "learning_rate": 9.026090344267669e-06, "loss": 0.8836, "step": 4775 }, { "epoch": 1.209879670677644, "grad_norm": 3.82385516166687, "learning_rate": 9.025593461988998e-06, "loss": 0.7881, "step": 4776 }, { "epoch": 1.2101329955668145, "grad_norm": 3.9499361515045166, "learning_rate": 9.025096466673548e-06, "loss": 0.8651, "step": 4777 }, { "epoch": 1.2103863204559848, "grad_norm": 4.139962196350098, "learning_rate": 9.024599358335278e-06, "loss": 0.9294, "step": 4778 }, { "epoch": 1.2106396453451551, "grad_norm": 3.218672037124634, "learning_rate": 9.024102136988141e-06, "loss": 0.6617, "step": 4779 }, { "epoch": 1.2108929702343256, "grad_norm": 3.538665294647217, "learning_rate": 9.023604802646104e-06, "loss": 0.686, "step": 4780 }, { "epoch": 1.211146295123496, "grad_norm": 3.94869065284729, "learning_rate": 9.02310735532313e-06, "loss": 0.8399, "step": 4781 }, { "epoch": 1.2113996200126662, "grad_norm": 3.490705966949463, "learning_rate": 9.022609795033187e-06, "loss": 0.7787, "step": 4782 }, { "epoch": 1.2116529449018367, "grad_norm": 3.956037998199463, "learning_rate": 9.022112121790243e-06, "loss": 0.8172, "step": 4783 }, { "epoch": 1.211906269791007, "grad_norm": 2.635892868041992, "learning_rate": 9.021614335608279e-06, "loss": 0.6711, "step": 4784 }, { "epoch": 1.2121595946801773, "grad_norm": 3.9293594360351562, "learning_rate": 9.02111643650127e-06, "loss": 0.7689, "step": 4785 }, { "epoch": 1.2124129195693476, "grad_norm": 3.5471110343933105, "learning_rate": 9.020618424483195e-06, "loss": 0.6563, "step": 4786 }, { "epoch": 1.2126662444585181, "grad_norm": 4.197956562042236, "learning_rate": 9.020120299568038e-06, "loss": 0.9316, "step": 4787 }, { "epoch": 1.2129195693476884, "grad_norm": 3.879873037338257, "learning_rate": 9.019622061769789e-06, "loss": 0.9289, "step": 4788 }, { "epoch": 1.2131728942368587, "grad_norm": 3.908384323120117, "learning_rate": 9.019123711102434e-06, "loss": 0.7307, "step": 4789 }, { "epoch": 1.2134262191260292, "grad_norm": 3.6532509326934814, "learning_rate": 9.01862524757997e-06, "loss": 0.731, "step": 4790 }, { "epoch": 1.2136795440151995, "grad_norm": 3.716970205307007, "learning_rate": 9.018126671216392e-06, "loss": 0.7126, "step": 4791 }, { "epoch": 1.2139328689043698, "grad_norm": 3.8923962116241455, "learning_rate": 9.017627982025701e-06, "loss": 0.7515, "step": 4792 }, { "epoch": 1.21418619379354, "grad_norm": 4.065270900726318, "learning_rate": 9.0171291800219e-06, "loss": 0.9012, "step": 4793 }, { "epoch": 1.2144395186827106, "grad_norm": 3.6236536502838135, "learning_rate": 9.016630265218994e-06, "loss": 0.7596, "step": 4794 }, { "epoch": 1.214692843571881, "grad_norm": 3.9246580600738525, "learning_rate": 9.016131237630992e-06, "loss": 0.9081, "step": 4795 }, { "epoch": 1.2149461684610512, "grad_norm": 3.6482737064361572, "learning_rate": 9.015632097271906e-06, "loss": 0.7758, "step": 4796 }, { "epoch": 1.2151994933502217, "grad_norm": 3.962167978286743, "learning_rate": 9.015132844155755e-06, "loss": 0.7765, "step": 4797 }, { "epoch": 1.215452818239392, "grad_norm": 3.6573903560638428, "learning_rate": 9.014633478296554e-06, "loss": 0.8502, "step": 4798 }, { "epoch": 1.2157061431285623, "grad_norm": 3.6809349060058594, "learning_rate": 9.014133999708328e-06, "loss": 0.8705, "step": 4799 }, { "epoch": 1.2159594680177328, "grad_norm": 4.255880832672119, "learning_rate": 9.013634408405098e-06, "loss": 0.7849, "step": 4800 }, { "epoch": 1.216212792906903, "grad_norm": 4.187831401824951, "learning_rate": 9.013134704400898e-06, "loss": 0.8874, "step": 4801 }, { "epoch": 1.2164661177960734, "grad_norm": 3.6912200450897217, "learning_rate": 9.012634887709755e-06, "loss": 0.7232, "step": 4802 }, { "epoch": 1.216719442685244, "grad_norm": 3.335015296936035, "learning_rate": 9.012134958345703e-06, "loss": 0.7511, "step": 4803 }, { "epoch": 1.2169727675744142, "grad_norm": 3.792112112045288, "learning_rate": 9.011634916322785e-06, "loss": 0.7904, "step": 4804 }, { "epoch": 1.2172260924635845, "grad_norm": 4.308746814727783, "learning_rate": 9.011134761655039e-06, "loss": 0.8286, "step": 4805 }, { "epoch": 1.217479417352755, "grad_norm": 3.910618782043457, "learning_rate": 9.010634494356507e-06, "loss": 0.8875, "step": 4806 }, { "epoch": 1.2177327422419253, "grad_norm": 3.9517557621002197, "learning_rate": 9.010134114441237e-06, "loss": 0.8079, "step": 4807 }, { "epoch": 1.2179860671310956, "grad_norm": 3.4831385612487793, "learning_rate": 9.009633621923282e-06, "loss": 0.7058, "step": 4808 }, { "epoch": 1.218239392020266, "grad_norm": 3.7121262550354004, "learning_rate": 9.009133016816694e-06, "loss": 0.7318, "step": 4809 }, { "epoch": 1.2184927169094364, "grad_norm": 3.54034161567688, "learning_rate": 9.00863229913553e-06, "loss": 0.7555, "step": 4810 }, { "epoch": 1.2187460417986067, "grad_norm": 3.896744966506958, "learning_rate": 9.00813146889385e-06, "loss": 0.908, "step": 4811 }, { "epoch": 1.2189993666877772, "grad_norm": 3.977553129196167, "learning_rate": 9.007630526105718e-06, "loss": 0.6372, "step": 4812 }, { "epoch": 1.2192526915769475, "grad_norm": 5.474034786224365, "learning_rate": 9.007129470785196e-06, "loss": 0.9058, "step": 4813 }, { "epoch": 1.2195060164661178, "grad_norm": 3.76747989654541, "learning_rate": 9.00662830294636e-06, "loss": 0.8183, "step": 4814 }, { "epoch": 1.219759341355288, "grad_norm": 3.620443105697632, "learning_rate": 9.006127022603276e-06, "loss": 0.6893, "step": 4815 }, { "epoch": 1.2200126662444586, "grad_norm": 3.8057548999786377, "learning_rate": 9.005625629770024e-06, "loss": 0.8761, "step": 4816 }, { "epoch": 1.2202659911336289, "grad_norm": 3.5878427028656006, "learning_rate": 9.005124124460682e-06, "loss": 0.7336, "step": 4817 }, { "epoch": 1.2205193160227992, "grad_norm": 4.016486167907715, "learning_rate": 9.004622506689331e-06, "loss": 0.7536, "step": 4818 }, { "epoch": 1.2207726409119697, "grad_norm": 3.7963743209838867, "learning_rate": 9.004120776470058e-06, "loss": 0.7734, "step": 4819 }, { "epoch": 1.22102596580114, "grad_norm": 3.6075387001037598, "learning_rate": 9.003618933816948e-06, "loss": 0.7353, "step": 4820 }, { "epoch": 1.2212792906903103, "grad_norm": 4.058751583099365, "learning_rate": 9.003116978744098e-06, "loss": 0.8964, "step": 4821 }, { "epoch": 1.2215326155794806, "grad_norm": 3.674062967300415, "learning_rate": 9.002614911265598e-06, "loss": 0.6853, "step": 4822 }, { "epoch": 1.221785940468651, "grad_norm": 3.7696738243103027, "learning_rate": 9.002112731395544e-06, "loss": 0.7808, "step": 4823 }, { "epoch": 1.2220392653578214, "grad_norm": 4.296994686126709, "learning_rate": 9.001610439148046e-06, "loss": 0.9033, "step": 4824 }, { "epoch": 1.2222925902469917, "grad_norm": 3.666095018386841, "learning_rate": 9.001108034537199e-06, "loss": 0.6923, "step": 4825 }, { "epoch": 1.2225459151361622, "grad_norm": 3.87882137298584, "learning_rate": 9.000605517577113e-06, "loss": 0.7728, "step": 4826 }, { "epoch": 1.2227992400253325, "grad_norm": 3.883906126022339, "learning_rate": 9.000102888281901e-06, "loss": 0.7317, "step": 4827 }, { "epoch": 1.2230525649145028, "grad_norm": 4.187375068664551, "learning_rate": 8.999600146665672e-06, "loss": 0.8786, "step": 4828 }, { "epoch": 1.2233058898036733, "grad_norm": 3.488924741744995, "learning_rate": 8.999097292742549e-06, "loss": 0.8264, "step": 4829 }, { "epoch": 1.2235592146928436, "grad_norm": 3.888960599899292, "learning_rate": 8.998594326526647e-06, "loss": 0.8025, "step": 4830 }, { "epoch": 1.2238125395820139, "grad_norm": 3.666673183441162, "learning_rate": 8.998091248032089e-06, "loss": 0.7531, "step": 4831 }, { "epoch": 1.2240658644711844, "grad_norm": 3.860265016555786, "learning_rate": 8.997588057273004e-06, "loss": 0.7896, "step": 4832 }, { "epoch": 1.2243191893603547, "grad_norm": 3.5526556968688965, "learning_rate": 8.997084754263519e-06, "loss": 0.7484, "step": 4833 }, { "epoch": 1.224572514249525, "grad_norm": 3.7557437419891357, "learning_rate": 8.996581339017767e-06, "loss": 0.7255, "step": 4834 }, { "epoch": 1.2248258391386955, "grad_norm": 3.388437271118164, "learning_rate": 8.996077811549886e-06, "loss": 0.8126, "step": 4835 }, { "epoch": 1.2250791640278658, "grad_norm": 3.607306718826294, "learning_rate": 8.995574171874011e-06, "loss": 0.8345, "step": 4836 }, { "epoch": 1.225332488917036, "grad_norm": 3.809396982192993, "learning_rate": 8.995070420004286e-06, "loss": 0.8228, "step": 4837 }, { "epoch": 1.2255858138062066, "grad_norm": 4.179544448852539, "learning_rate": 8.994566555954858e-06, "loss": 0.8792, "step": 4838 }, { "epoch": 1.2258391386953769, "grad_norm": 3.894911766052246, "learning_rate": 8.994062579739871e-06, "loss": 0.7968, "step": 4839 }, { "epoch": 1.2260924635845472, "grad_norm": 3.6074442863464355, "learning_rate": 8.993558491373479e-06, "loss": 0.7674, "step": 4840 }, { "epoch": 1.2263457884737174, "grad_norm": 3.7121362686157227, "learning_rate": 8.993054290869838e-06, "loss": 0.9287, "step": 4841 }, { "epoch": 1.226599113362888, "grad_norm": 3.6276259422302246, "learning_rate": 8.992549978243104e-06, "loss": 0.7449, "step": 4842 }, { "epoch": 1.2268524382520583, "grad_norm": 3.604879856109619, "learning_rate": 8.992045553507436e-06, "loss": 0.858, "step": 4843 }, { "epoch": 1.2271057631412285, "grad_norm": 3.6814112663269043, "learning_rate": 8.991541016677002e-06, "loss": 0.8183, "step": 4844 }, { "epoch": 1.227359088030399, "grad_norm": 3.4473230838775635, "learning_rate": 8.991036367765964e-06, "loss": 0.7232, "step": 4845 }, { "epoch": 1.2276124129195694, "grad_norm": 3.860246181488037, "learning_rate": 8.990531606788497e-06, "loss": 0.773, "step": 4846 }, { "epoch": 1.2278657378087396, "grad_norm": 3.4309897422790527, "learning_rate": 8.990026733758772e-06, "loss": 0.7406, "step": 4847 }, { "epoch": 1.2281190626979102, "grad_norm": 3.4666717052459717, "learning_rate": 8.989521748690969e-06, "loss": 0.8216, "step": 4848 }, { "epoch": 1.2283723875870804, "grad_norm": 3.6101233959198, "learning_rate": 8.989016651599262e-06, "loss": 0.7905, "step": 4849 }, { "epoch": 1.2286257124762507, "grad_norm": 3.8484609127044678, "learning_rate": 8.988511442497839e-06, "loss": 0.8098, "step": 4850 }, { "epoch": 1.228879037365421, "grad_norm": 3.559965133666992, "learning_rate": 8.988006121400881e-06, "loss": 0.7643, "step": 4851 }, { "epoch": 1.2291323622545915, "grad_norm": 3.5645501613616943, "learning_rate": 8.987500688322583e-06, "loss": 0.6875, "step": 4852 }, { "epoch": 1.2293856871437618, "grad_norm": 3.5796780586242676, "learning_rate": 8.986995143277134e-06, "loss": 0.741, "step": 4853 }, { "epoch": 1.2296390120329321, "grad_norm": 3.296039581298828, "learning_rate": 8.98648948627873e-06, "loss": 0.7354, "step": 4854 }, { "epoch": 1.2298923369221026, "grad_norm": 3.3527204990386963, "learning_rate": 8.985983717341568e-06, "loss": 0.7294, "step": 4855 }, { "epoch": 1.230145661811273, "grad_norm": 4.831778526306152, "learning_rate": 8.985477836479855e-06, "loss": 1.0119, "step": 4856 }, { "epoch": 1.2303989867004432, "grad_norm": 3.175151824951172, "learning_rate": 8.984971843707787e-06, "loss": 0.7934, "step": 4857 }, { "epoch": 1.2306523115896137, "grad_norm": 3.8539159297943115, "learning_rate": 8.984465739039583e-06, "loss": 0.9443, "step": 4858 }, { "epoch": 1.230905636478784, "grad_norm": 3.8513283729553223, "learning_rate": 8.983959522489445e-06, "loss": 0.7991, "step": 4859 }, { "epoch": 1.2311589613679543, "grad_norm": 3.5532569885253906, "learning_rate": 8.983453194071592e-06, "loss": 0.883, "step": 4860 }, { "epoch": 1.2314122862571248, "grad_norm": 3.6331706047058105, "learning_rate": 8.982946753800238e-06, "loss": 0.8583, "step": 4861 }, { "epoch": 1.2316656111462951, "grad_norm": 3.5020289421081543, "learning_rate": 8.982440201689609e-06, "loss": 0.7938, "step": 4862 }, { "epoch": 1.2319189360354654, "grad_norm": 3.7837648391723633, "learning_rate": 8.981933537753925e-06, "loss": 0.7653, "step": 4863 }, { "epoch": 1.232172260924636, "grad_norm": 3.6381020545959473, "learning_rate": 8.981426762007412e-06, "loss": 0.8849, "step": 4864 }, { "epoch": 1.2324255858138062, "grad_norm": 3.292473077774048, "learning_rate": 8.980919874464302e-06, "loss": 0.7163, "step": 4865 }, { "epoch": 1.2326789107029765, "grad_norm": 3.657729148864746, "learning_rate": 8.98041287513883e-06, "loss": 0.7659, "step": 4866 }, { "epoch": 1.232932235592147, "grad_norm": 3.368623971939087, "learning_rate": 8.979905764045227e-06, "loss": 0.7878, "step": 4867 }, { "epoch": 1.2331855604813173, "grad_norm": 3.797635793685913, "learning_rate": 8.979398541197736e-06, "loss": 0.7507, "step": 4868 }, { "epoch": 1.2334388853704876, "grad_norm": 3.610055446624756, "learning_rate": 8.9788912066106e-06, "loss": 0.9107, "step": 4869 }, { "epoch": 1.233692210259658, "grad_norm": 3.4671168327331543, "learning_rate": 8.978383760298063e-06, "loss": 0.7952, "step": 4870 }, { "epoch": 1.2339455351488284, "grad_norm": 3.547825336456299, "learning_rate": 8.977876202274377e-06, "loss": 0.7668, "step": 4871 }, { "epoch": 1.2341988600379987, "grad_norm": 3.617056369781494, "learning_rate": 8.977368532553787e-06, "loss": 0.7, "step": 4872 }, { "epoch": 1.234452184927169, "grad_norm": 3.9896669387817383, "learning_rate": 8.976860751150555e-06, "loss": 0.822, "step": 4873 }, { "epoch": 1.2347055098163395, "grad_norm": 3.564788341522217, "learning_rate": 8.976352858078938e-06, "loss": 0.8184, "step": 4874 }, { "epoch": 1.2349588347055098, "grad_norm": 3.5641210079193115, "learning_rate": 8.975844853353195e-06, "loss": 0.848, "step": 4875 }, { "epoch": 1.2352121595946801, "grad_norm": 3.7378146648406982, "learning_rate": 8.975336736987593e-06, "loss": 0.7395, "step": 4876 }, { "epoch": 1.2354654844838506, "grad_norm": 3.9268436431884766, "learning_rate": 8.974828508996398e-06, "loss": 0.7369, "step": 4877 }, { "epoch": 1.235718809373021, "grad_norm": 3.9070422649383545, "learning_rate": 8.974320169393882e-06, "loss": 0.7026, "step": 4878 }, { "epoch": 1.2359721342621912, "grad_norm": 3.6587164402008057, "learning_rate": 8.973811718194317e-06, "loss": 0.8198, "step": 4879 }, { "epoch": 1.2362254591513615, "grad_norm": 3.9976251125335693, "learning_rate": 8.973303155411981e-06, "loss": 0.847, "step": 4880 }, { "epoch": 1.236478784040532, "grad_norm": 3.8499391078948975, "learning_rate": 8.972794481061156e-06, "loss": 0.8002, "step": 4881 }, { "epoch": 1.2367321089297023, "grad_norm": 3.5910487174987793, "learning_rate": 8.972285695156124e-06, "loss": 0.8595, "step": 4882 }, { "epoch": 1.2369854338188726, "grad_norm": 3.729759931564331, "learning_rate": 8.971776797711171e-06, "loss": 0.7493, "step": 4883 }, { "epoch": 1.2372387587080431, "grad_norm": 3.6986725330352783, "learning_rate": 8.971267788740587e-06, "loss": 0.8213, "step": 4884 }, { "epoch": 1.2374920835972134, "grad_norm": 4.1502909660339355, "learning_rate": 8.970758668258665e-06, "loss": 0.7147, "step": 4885 }, { "epoch": 1.2377454084863837, "grad_norm": 3.77043080329895, "learning_rate": 8.970249436279702e-06, "loss": 0.7631, "step": 4886 }, { "epoch": 1.2379987333755542, "grad_norm": 3.1170456409454346, "learning_rate": 8.969740092817992e-06, "loss": 0.691, "step": 4887 }, { "epoch": 1.2382520582647245, "grad_norm": 3.482490301132202, "learning_rate": 8.969230637887842e-06, "loss": 0.7393, "step": 4888 }, { "epoch": 1.2385053831538948, "grad_norm": 4.178841590881348, "learning_rate": 8.968721071503557e-06, "loss": 0.9059, "step": 4889 }, { "epoch": 1.2387587080430653, "grad_norm": 4.210383415222168, "learning_rate": 8.968211393679445e-06, "loss": 0.8748, "step": 4890 }, { "epoch": 1.2390120329322356, "grad_norm": 4.279092311859131, "learning_rate": 8.967701604429814e-06, "loss": 0.8227, "step": 4891 }, { "epoch": 1.239265357821406, "grad_norm": 3.8279335498809814, "learning_rate": 8.967191703768984e-06, "loss": 0.7862, "step": 4892 }, { "epoch": 1.2395186827105764, "grad_norm": 3.838911533355713, "learning_rate": 8.96668169171127e-06, "loss": 0.7087, "step": 4893 }, { "epoch": 1.2397720075997467, "grad_norm": 3.7137227058410645, "learning_rate": 8.966171568270994e-06, "loss": 0.7594, "step": 4894 }, { "epoch": 1.240025332488917, "grad_norm": 3.868379831314087, "learning_rate": 8.965661333462478e-06, "loss": 0.6866, "step": 4895 }, { "epoch": 1.2402786573780875, "grad_norm": 3.4717066287994385, "learning_rate": 8.965150987300052e-06, "loss": 0.7789, "step": 4896 }, { "epoch": 1.2405319822672578, "grad_norm": 3.9209249019622803, "learning_rate": 8.964640529798041e-06, "loss": 0.8094, "step": 4897 }, { "epoch": 1.240785307156428, "grad_norm": 3.8062753677368164, "learning_rate": 8.964129960970785e-06, "loss": 0.8163, "step": 4898 }, { "epoch": 1.2410386320455984, "grad_norm": 3.9788119792938232, "learning_rate": 8.963619280832617e-06, "loss": 0.8038, "step": 4899 }, { "epoch": 1.241291956934769, "grad_norm": 3.948587417602539, "learning_rate": 8.963108489397875e-06, "loss": 0.9206, "step": 4900 }, { "epoch": 1.2415452818239392, "grad_norm": 3.2996327877044678, "learning_rate": 8.962597586680908e-06, "loss": 0.8708, "step": 4901 }, { "epoch": 1.2417986067131095, "grad_norm": 3.3904151916503906, "learning_rate": 8.962086572696055e-06, "loss": 0.7771, "step": 4902 }, { "epoch": 1.24205193160228, "grad_norm": 3.5035574436187744, "learning_rate": 8.961575447457669e-06, "loss": 0.8709, "step": 4903 }, { "epoch": 1.2423052564914503, "grad_norm": 3.2244651317596436, "learning_rate": 8.961064210980101e-06, "loss": 0.7146, "step": 4904 }, { "epoch": 1.2425585813806206, "grad_norm": 3.9018938541412354, "learning_rate": 8.960552863277707e-06, "loss": 0.8699, "step": 4905 }, { "epoch": 1.242811906269791, "grad_norm": 3.8801350593566895, "learning_rate": 8.960041404364845e-06, "loss": 0.866, "step": 4906 }, { "epoch": 1.2430652311589614, "grad_norm": 3.586246967315674, "learning_rate": 8.959529834255876e-06, "loss": 0.7806, "step": 4907 }, { "epoch": 1.2433185560481317, "grad_norm": 3.703272581100464, "learning_rate": 8.959018152965164e-06, "loss": 0.7762, "step": 4908 }, { "epoch": 1.243571880937302, "grad_norm": 3.905339479446411, "learning_rate": 8.958506360507077e-06, "loss": 0.7887, "step": 4909 }, { "epoch": 1.2438252058264725, "grad_norm": 3.5449533462524414, "learning_rate": 8.957994456895989e-06, "loss": 0.6952, "step": 4910 }, { "epoch": 1.2440785307156428, "grad_norm": 3.859621524810791, "learning_rate": 8.957482442146271e-06, "loss": 0.8361, "step": 4911 }, { "epoch": 1.244331855604813, "grad_norm": 3.862034797668457, "learning_rate": 8.956970316272301e-06, "loss": 0.7858, "step": 4912 }, { "epoch": 1.2445851804939836, "grad_norm": 3.644862651824951, "learning_rate": 8.956458079288459e-06, "loss": 0.8152, "step": 4913 }, { "epoch": 1.2448385053831539, "grad_norm": 3.7374160289764404, "learning_rate": 8.955945731209128e-06, "loss": 0.7904, "step": 4914 }, { "epoch": 1.2450918302723242, "grad_norm": 3.565932035446167, "learning_rate": 8.955433272048694e-06, "loss": 0.7909, "step": 4915 }, { "epoch": 1.2453451551614947, "grad_norm": 3.4232900142669678, "learning_rate": 8.95492070182155e-06, "loss": 0.7504, "step": 4916 }, { "epoch": 1.245598480050665, "grad_norm": 3.5261430740356445, "learning_rate": 8.954408020542084e-06, "loss": 0.6876, "step": 4917 }, { "epoch": 1.2458518049398353, "grad_norm": 3.4949758052825928, "learning_rate": 8.953895228224697e-06, "loss": 0.6847, "step": 4918 }, { "epoch": 1.2461051298290058, "grad_norm": 3.655601739883423, "learning_rate": 8.953382324883782e-06, "loss": 0.7402, "step": 4919 }, { "epoch": 1.246358454718176, "grad_norm": 3.472031593322754, "learning_rate": 8.952869310533744e-06, "loss": 0.6989, "step": 4920 }, { "epoch": 1.2466117796073464, "grad_norm": 3.8667638301849365, "learning_rate": 8.95235618518899e-06, "loss": 0.8203, "step": 4921 }, { "epoch": 1.2468651044965169, "grad_norm": 3.60306978225708, "learning_rate": 8.951842948863927e-06, "loss": 0.8294, "step": 4922 }, { "epoch": 1.2471184293856872, "grad_norm": 3.7473983764648438, "learning_rate": 8.951329601572965e-06, "loss": 0.7854, "step": 4923 }, { "epoch": 1.2473717542748575, "grad_norm": 3.660061836242676, "learning_rate": 8.95081614333052e-06, "loss": 0.8648, "step": 4924 }, { "epoch": 1.247625079164028, "grad_norm": 3.6376841068267822, "learning_rate": 8.95030257415101e-06, "loss": 0.8261, "step": 4925 }, { "epoch": 1.2478784040531983, "grad_norm": 3.5895543098449707, "learning_rate": 8.949788894048853e-06, "loss": 0.811, "step": 4926 }, { "epoch": 1.2481317289423686, "grad_norm": 3.393707752227783, "learning_rate": 8.949275103038479e-06, "loss": 0.8807, "step": 4927 }, { "epoch": 1.2483850538315389, "grad_norm": 3.668567657470703, "learning_rate": 8.948761201134309e-06, "loss": 0.6899, "step": 4928 }, { "epoch": 1.2486383787207094, "grad_norm": 3.5369865894317627, "learning_rate": 8.948247188350773e-06, "loss": 0.8388, "step": 4929 }, { "epoch": 1.2488917036098797, "grad_norm": 3.3084373474121094, "learning_rate": 8.947733064702308e-06, "loss": 0.7367, "step": 4930 }, { "epoch": 1.24914502849905, "grad_norm": 3.5315730571746826, "learning_rate": 8.94721883020335e-06, "loss": 0.7929, "step": 4931 }, { "epoch": 1.2493983533882205, "grad_norm": 3.397585391998291, "learning_rate": 8.946704484868337e-06, "loss": 0.8362, "step": 4932 }, { "epoch": 1.2496516782773908, "grad_norm": 4.010646343231201, "learning_rate": 8.946190028711712e-06, "loss": 0.7752, "step": 4933 }, { "epoch": 1.249905003166561, "grad_norm": 4.345476150512695, "learning_rate": 8.945675461747919e-06, "loss": 0.7777, "step": 4934 }, { "epoch": 1.2501583280557313, "grad_norm": 3.873446464538574, "learning_rate": 8.94516078399141e-06, "loss": 0.7596, "step": 4935 }, { "epoch": 1.2504116529449019, "grad_norm": 3.6672351360321045, "learning_rate": 8.944645995456634e-06, "loss": 0.7857, "step": 4936 }, { "epoch": 1.2506649778340722, "grad_norm": 3.8857362270355225, "learning_rate": 8.944131096158046e-06, "loss": 0.7407, "step": 4937 }, { "epoch": 1.2509183027232424, "grad_norm": 4.123960494995117, "learning_rate": 8.943616086110107e-06, "loss": 0.8535, "step": 4938 }, { "epoch": 1.251171627612413, "grad_norm": 4.322494983673096, "learning_rate": 8.943100965327276e-06, "loss": 0.7959, "step": 4939 }, { "epoch": 1.2514249525015833, "grad_norm": 3.6760008335113525, "learning_rate": 8.942585733824018e-06, "loss": 0.7764, "step": 4940 }, { "epoch": 1.2516782773907535, "grad_norm": 3.91406512260437, "learning_rate": 8.9420703916148e-06, "loss": 0.9002, "step": 4941 }, { "epoch": 1.251931602279924, "grad_norm": 3.5883524417877197, "learning_rate": 8.941554938714094e-06, "loss": 0.65, "step": 4942 }, { "epoch": 1.2521849271690944, "grad_norm": 3.617281675338745, "learning_rate": 8.94103937513637e-06, "loss": 0.7746, "step": 4943 }, { "epoch": 1.2524382520582646, "grad_norm": 3.4269120693206787, "learning_rate": 8.940523700896111e-06, "loss": 0.8321, "step": 4944 }, { "epoch": 1.2526915769474352, "grad_norm": 3.3844025135040283, "learning_rate": 8.940007916007792e-06, "loss": 0.8338, "step": 4945 }, { "epoch": 1.2529449018366055, "grad_norm": 4.153983116149902, "learning_rate": 8.939492020485898e-06, "loss": 0.9274, "step": 4946 }, { "epoch": 1.2531982267257757, "grad_norm": 3.826216459274292, "learning_rate": 8.938976014344913e-06, "loss": 0.6986, "step": 4947 }, { "epoch": 1.2534515516149463, "grad_norm": 3.4436357021331787, "learning_rate": 8.938459897599327e-06, "loss": 0.6969, "step": 4948 }, { "epoch": 1.2537048765041165, "grad_norm": 4.002293586730957, "learning_rate": 8.937943670263636e-06, "loss": 0.6836, "step": 4949 }, { "epoch": 1.2539582013932868, "grad_norm": 3.919435501098633, "learning_rate": 8.93742733235233e-06, "loss": 0.7949, "step": 4950 }, { "epoch": 1.2542115262824574, "grad_norm": 3.728658437728882, "learning_rate": 8.93691088387991e-06, "loss": 0.7893, "step": 4951 }, { "epoch": 1.2544648511716276, "grad_norm": 3.820829153060913, "learning_rate": 8.93639432486088e-06, "loss": 0.8745, "step": 4952 }, { "epoch": 1.254718176060798, "grad_norm": 3.7555582523345947, "learning_rate": 8.935877655309739e-06, "loss": 0.7299, "step": 4953 }, { "epoch": 1.2549715009499685, "grad_norm": 3.7295260429382324, "learning_rate": 8.935360875241e-06, "loss": 0.8915, "step": 4954 }, { "epoch": 1.2552248258391387, "grad_norm": 3.7932441234588623, "learning_rate": 8.934843984669171e-06, "loss": 0.7672, "step": 4955 }, { "epoch": 1.255478150728309, "grad_norm": 4.009216785430908, "learning_rate": 8.934326983608768e-06, "loss": 0.8522, "step": 4956 }, { "epoch": 1.2557314756174796, "grad_norm": 3.5970778465270996, "learning_rate": 8.933809872074306e-06, "loss": 0.8272, "step": 4957 }, { "epoch": 1.2559848005066498, "grad_norm": 3.852548599243164, "learning_rate": 8.933292650080307e-06, "loss": 0.7751, "step": 4958 }, { "epoch": 1.2562381253958201, "grad_norm": 3.8101701736450195, "learning_rate": 8.932775317641294e-06, "loss": 0.7083, "step": 4959 }, { "epoch": 1.2564914502849904, "grad_norm": 4.030754566192627, "learning_rate": 8.932257874771792e-06, "loss": 0.9273, "step": 4960 }, { "epoch": 1.256744775174161, "grad_norm": 4.145448684692383, "learning_rate": 8.931740321486335e-06, "loss": 0.7546, "step": 4961 }, { "epoch": 1.2569981000633312, "grad_norm": 3.3041367530822754, "learning_rate": 8.93122265779945e-06, "loss": 0.857, "step": 4962 }, { "epoch": 1.2572514249525015, "grad_norm": 3.5868442058563232, "learning_rate": 8.930704883725676e-06, "loss": 0.7319, "step": 4963 }, { "epoch": 1.2575047498416718, "grad_norm": 3.6959526538848877, "learning_rate": 8.93018699927955e-06, "loss": 0.7746, "step": 4964 }, { "epoch": 1.2577580747308423, "grad_norm": 3.8091931343078613, "learning_rate": 8.929669004475616e-06, "loss": 0.8816, "step": 4965 }, { "epoch": 1.2580113996200126, "grad_norm": 3.836026430130005, "learning_rate": 8.929150899328418e-06, "loss": 0.7411, "step": 4966 }, { "epoch": 1.258264724509183, "grad_norm": 3.737752914428711, "learning_rate": 8.928632683852504e-06, "loss": 0.7715, "step": 4967 }, { "epoch": 1.2585180493983534, "grad_norm": 3.351428270339966, "learning_rate": 8.928114358062428e-06, "loss": 0.7465, "step": 4968 }, { "epoch": 1.2587713742875237, "grad_norm": 4.012657165527344, "learning_rate": 8.927595921972738e-06, "loss": 0.7875, "step": 4969 }, { "epoch": 1.259024699176694, "grad_norm": 3.795178174972534, "learning_rate": 8.927077375597997e-06, "loss": 0.8054, "step": 4970 }, { "epoch": 1.2592780240658645, "grad_norm": 3.275935173034668, "learning_rate": 8.926558718952765e-06, "loss": 0.7714, "step": 4971 }, { "epoch": 1.2595313489550348, "grad_norm": 4.029940128326416, "learning_rate": 8.926039952051603e-06, "loss": 0.8743, "step": 4972 }, { "epoch": 1.2597846738442051, "grad_norm": 3.473362922668457, "learning_rate": 8.925521074909078e-06, "loss": 0.6853, "step": 4973 }, { "epoch": 1.2600379987333756, "grad_norm": 3.401608467102051, "learning_rate": 8.925002087539763e-06, "loss": 0.7765, "step": 4974 }, { "epoch": 1.260291323622546, "grad_norm": 4.135184288024902, "learning_rate": 8.92448298995823e-06, "loss": 0.7321, "step": 4975 }, { "epoch": 1.2605446485117162, "grad_norm": 3.2951271533966064, "learning_rate": 8.923963782179051e-06, "loss": 0.7111, "step": 4976 }, { "epoch": 1.2607979734008867, "grad_norm": 3.373471975326538, "learning_rate": 8.92344446421681e-06, "loss": 0.7699, "step": 4977 }, { "epoch": 1.261051298290057, "grad_norm": 3.8345279693603516, "learning_rate": 8.922925036086086e-06, "loss": 0.7885, "step": 4978 }, { "epoch": 1.2613046231792273, "grad_norm": 3.6304073333740234, "learning_rate": 8.922405497801468e-06, "loss": 0.8356, "step": 4979 }, { "epoch": 1.2615579480683978, "grad_norm": 3.9937851428985596, "learning_rate": 8.921885849377539e-06, "loss": 0.8689, "step": 4980 }, { "epoch": 1.2618112729575681, "grad_norm": 4.083383560180664, "learning_rate": 8.921366090828896e-06, "loss": 0.8164, "step": 4981 }, { "epoch": 1.2620645978467384, "grad_norm": 3.7671871185302734, "learning_rate": 8.920846222170129e-06, "loss": 0.7979, "step": 4982 }, { "epoch": 1.262317922735909, "grad_norm": 4.072494029998779, "learning_rate": 8.920326243415839e-06, "loss": 0.8004, "step": 4983 }, { "epoch": 1.2625712476250792, "grad_norm": 3.728637933731079, "learning_rate": 8.919806154580623e-06, "loss": 0.8023, "step": 4984 }, { "epoch": 1.2628245725142495, "grad_norm": 3.6757984161376953, "learning_rate": 8.919285955679092e-06, "loss": 0.8087, "step": 4985 }, { "epoch": 1.26307789740342, "grad_norm": 3.940227508544922, "learning_rate": 8.918765646725845e-06, "loss": 0.7942, "step": 4986 }, { "epoch": 1.2633312222925903, "grad_norm": 3.649754762649536, "learning_rate": 8.918245227735494e-06, "loss": 0.727, "step": 4987 }, { "epoch": 1.2635845471817606, "grad_norm": 3.6697895526885986, "learning_rate": 8.917724698722657e-06, "loss": 0.683, "step": 4988 }, { "epoch": 1.263837872070931, "grad_norm": 4.008494853973389, "learning_rate": 8.917204059701942e-06, "loss": 0.7574, "step": 4989 }, { "epoch": 1.2640911969601012, "grad_norm": 3.700213670730591, "learning_rate": 8.916683310687977e-06, "loss": 0.6426, "step": 4990 }, { "epoch": 1.2643445218492717, "grad_norm": 4.02217435836792, "learning_rate": 8.916162451695378e-06, "loss": 0.7463, "step": 4991 }, { "epoch": 1.264597846738442, "grad_norm": 3.6228909492492676, "learning_rate": 8.915641482738775e-06, "loss": 0.7692, "step": 4992 }, { "epoch": 1.2648511716276123, "grad_norm": 3.844886541366577, "learning_rate": 8.915120403832793e-06, "loss": 0.7234, "step": 4993 }, { "epoch": 1.2651044965167828, "grad_norm": 3.5308997631073, "learning_rate": 8.914599214992065e-06, "loss": 0.8154, "step": 4994 }, { "epoch": 1.265357821405953, "grad_norm": 4.345712184906006, "learning_rate": 8.914077916231225e-06, "loss": 0.8576, "step": 4995 }, { "epoch": 1.2656111462951234, "grad_norm": 3.661726474761963, "learning_rate": 8.913556507564914e-06, "loss": 0.6412, "step": 4996 }, { "epoch": 1.265864471184294, "grad_norm": 3.592749834060669, "learning_rate": 8.913034989007767e-06, "loss": 0.8385, "step": 4997 }, { "epoch": 1.2661177960734642, "grad_norm": 3.512347459793091, "learning_rate": 8.912513360574435e-06, "loss": 0.8836, "step": 4998 }, { "epoch": 1.2663711209626345, "grad_norm": 3.67021107673645, "learning_rate": 8.911991622279559e-06, "loss": 0.7278, "step": 4999 }, { "epoch": 1.266624445851805, "grad_norm": 3.180130958557129, "learning_rate": 8.911469774137793e-06, "loss": 0.7645, "step": 5000 }, { "epoch": 1.266624445851805, "eval_loss": 1.1878291368484497, "eval_runtime": 13.9366, "eval_samples_per_second": 28.701, "eval_steps_per_second": 3.588, "step": 5000 }, { "epoch": 1.2668777707409753, "grad_norm": 3.49995493888855, "learning_rate": 8.910947816163787e-06, "loss": 0.7172, "step": 5001 }, { "epoch": 1.2671310956301456, "grad_norm": 3.2964305877685547, "learning_rate": 8.910425748372202e-06, "loss": 0.7972, "step": 5002 }, { "epoch": 1.267384420519316, "grad_norm": 4.145137310028076, "learning_rate": 8.909903570777692e-06, "loss": 0.8347, "step": 5003 }, { "epoch": 1.2676377454084864, "grad_norm": 3.467320442199707, "learning_rate": 8.909381283394925e-06, "loss": 0.8494, "step": 5004 }, { "epoch": 1.2678910702976567, "grad_norm": 3.2528982162475586, "learning_rate": 8.908858886238562e-06, "loss": 0.7902, "step": 5005 }, { "epoch": 1.2681443951868272, "grad_norm": 3.5220718383789062, "learning_rate": 8.908336379323274e-06, "loss": 0.7353, "step": 5006 }, { "epoch": 1.2683977200759975, "grad_norm": 3.4364192485809326, "learning_rate": 8.907813762663731e-06, "loss": 0.8121, "step": 5007 }, { "epoch": 1.2686510449651678, "grad_norm": 3.683168411254883, "learning_rate": 8.907291036274612e-06, "loss": 0.7455, "step": 5008 }, { "epoch": 1.2689043698543383, "grad_norm": 3.633958101272583, "learning_rate": 8.906768200170589e-06, "loss": 0.6622, "step": 5009 }, { "epoch": 1.2691576947435086, "grad_norm": 3.8055920600891113, "learning_rate": 8.906245254366348e-06, "loss": 0.8261, "step": 5010 }, { "epoch": 1.2694110196326789, "grad_norm": 3.2654600143432617, "learning_rate": 8.90572219887657e-06, "loss": 0.6898, "step": 5011 }, { "epoch": 1.2696643445218494, "grad_norm": 4.933192253112793, "learning_rate": 8.905199033715943e-06, "loss": 0.8274, "step": 5012 }, { "epoch": 1.2699176694110197, "grad_norm": 3.869718074798584, "learning_rate": 8.904675758899157e-06, "loss": 0.7575, "step": 5013 }, { "epoch": 1.27017099430019, "grad_norm": 3.6533114910125732, "learning_rate": 8.904152374440908e-06, "loss": 0.8566, "step": 5014 }, { "epoch": 1.2704243191893603, "grad_norm": 3.694594383239746, "learning_rate": 8.903628880355888e-06, "loss": 0.6968, "step": 5015 }, { "epoch": 1.2706776440785308, "grad_norm": 3.6560215950012207, "learning_rate": 8.903105276658799e-06, "loss": 0.8285, "step": 5016 }, { "epoch": 1.270930968967701, "grad_norm": 4.171489238739014, "learning_rate": 8.902581563364344e-06, "loss": 0.8613, "step": 5017 }, { "epoch": 1.2711842938568714, "grad_norm": 3.6669111251831055, "learning_rate": 8.902057740487227e-06, "loss": 0.6709, "step": 5018 }, { "epoch": 1.2714376187460417, "grad_norm": 3.866795301437378, "learning_rate": 8.901533808042157e-06, "loss": 0.8865, "step": 5019 }, { "epoch": 1.2716909436352122, "grad_norm": 4.075090408325195, "learning_rate": 8.901009766043846e-06, "loss": 0.7882, "step": 5020 }, { "epoch": 1.2719442685243825, "grad_norm": 3.5593883991241455, "learning_rate": 8.90048561450701e-06, "loss": 0.8008, "step": 5021 }, { "epoch": 1.2721975934135528, "grad_norm": 4.1571478843688965, "learning_rate": 8.899961353446367e-06, "loss": 0.6434, "step": 5022 }, { "epoch": 1.2724509183027233, "grad_norm": 3.5136566162109375, "learning_rate": 8.899436982876635e-06, "loss": 0.753, "step": 5023 }, { "epoch": 1.2727042431918936, "grad_norm": 4.385815620422363, "learning_rate": 8.89891250281254e-06, "loss": 0.814, "step": 5024 }, { "epoch": 1.2729575680810639, "grad_norm": 4.039702892303467, "learning_rate": 8.898387913268812e-06, "loss": 0.9063, "step": 5025 }, { "epoch": 1.2732108929702344, "grad_norm": 3.526564121246338, "learning_rate": 8.897863214260176e-06, "loss": 0.7967, "step": 5026 }, { "epoch": 1.2734642178594047, "grad_norm": 3.5580739974975586, "learning_rate": 8.897338405801368e-06, "loss": 0.8186, "step": 5027 }, { "epoch": 1.273717542748575, "grad_norm": 3.797952175140381, "learning_rate": 8.896813487907125e-06, "loss": 0.6974, "step": 5028 }, { "epoch": 1.2739708676377455, "grad_norm": 3.9370381832122803, "learning_rate": 8.896288460592187e-06, "loss": 0.7742, "step": 5029 }, { "epoch": 1.2742241925269158, "grad_norm": 3.9475274085998535, "learning_rate": 8.895763323871292e-06, "loss": 0.8672, "step": 5030 }, { "epoch": 1.274477517416086, "grad_norm": 3.5705692768096924, "learning_rate": 8.89523807775919e-06, "loss": 0.7926, "step": 5031 }, { "epoch": 1.2747308423052566, "grad_norm": 5.121907711029053, "learning_rate": 8.89471272227063e-06, "loss": 0.7513, "step": 5032 }, { "epoch": 1.2749841671944269, "grad_norm": 3.21358060836792, "learning_rate": 8.89418725742036e-06, "loss": 0.8127, "step": 5033 }, { "epoch": 1.2752374920835972, "grad_norm": 3.715261220932007, "learning_rate": 8.893661683223136e-06, "loss": 0.8201, "step": 5034 }, { "epoch": 1.2754908169727677, "grad_norm": 3.8979718685150146, "learning_rate": 8.89313599969372e-06, "loss": 0.8807, "step": 5035 }, { "epoch": 1.275744141861938, "grad_norm": 3.51139497756958, "learning_rate": 8.892610206846868e-06, "loss": 0.7374, "step": 5036 }, { "epoch": 1.2759974667511083, "grad_norm": 3.727140188217163, "learning_rate": 8.892084304697347e-06, "loss": 0.8441, "step": 5037 }, { "epoch": 1.2762507916402788, "grad_norm": 3.341991424560547, "learning_rate": 8.891558293259921e-06, "loss": 0.6944, "step": 5038 }, { "epoch": 1.276504116529449, "grad_norm": 3.6908040046691895, "learning_rate": 8.891032172549362e-06, "loss": 0.7233, "step": 5039 }, { "epoch": 1.2767574414186194, "grad_norm": 3.654240608215332, "learning_rate": 8.890505942580443e-06, "loss": 0.8648, "step": 5040 }, { "epoch": 1.2770107663077899, "grad_norm": 3.500528335571289, "learning_rate": 8.889979603367942e-06, "loss": 0.7325, "step": 5041 }, { "epoch": 1.2772640911969602, "grad_norm": 3.7596962451934814, "learning_rate": 8.889453154926637e-06, "loss": 0.96, "step": 5042 }, { "epoch": 1.2775174160861305, "grad_norm": 3.9248828887939453, "learning_rate": 8.88892659727131e-06, "loss": 0.8253, "step": 5043 }, { "epoch": 1.2777707409753007, "grad_norm": 3.842301845550537, "learning_rate": 8.888399930416746e-06, "loss": 0.7854, "step": 5044 }, { "epoch": 1.2780240658644713, "grad_norm": 3.546661138534546, "learning_rate": 8.887873154377736e-06, "loss": 0.7607, "step": 5045 }, { "epoch": 1.2782773907536416, "grad_norm": 3.448086977005005, "learning_rate": 8.887346269169067e-06, "loss": 0.7556, "step": 5046 }, { "epoch": 1.2785307156428118, "grad_norm": 3.7830214500427246, "learning_rate": 8.88681927480554e-06, "loss": 0.8122, "step": 5047 }, { "epoch": 1.2787840405319821, "grad_norm": 3.8307809829711914, "learning_rate": 8.886292171301947e-06, "loss": 0.9047, "step": 5048 }, { "epoch": 1.2790373654211526, "grad_norm": 3.741891622543335, "learning_rate": 8.885764958673093e-06, "loss": 0.7657, "step": 5049 }, { "epoch": 1.279290690310323, "grad_norm": 3.4128377437591553, "learning_rate": 8.885237636933779e-06, "loss": 0.7411, "step": 5050 }, { "epoch": 1.2795440151994932, "grad_norm": 3.633951187133789, "learning_rate": 8.884710206098814e-06, "loss": 0.8994, "step": 5051 }, { "epoch": 1.2797973400886637, "grad_norm": 3.897345542907715, "learning_rate": 8.884182666183005e-06, "loss": 0.8265, "step": 5052 }, { "epoch": 1.280050664977834, "grad_norm": 3.6153879165649414, "learning_rate": 8.883655017201169e-06, "loss": 0.8037, "step": 5053 }, { "epoch": 1.2803039898670043, "grad_norm": 4.0165791511535645, "learning_rate": 8.883127259168121e-06, "loss": 0.8108, "step": 5054 }, { "epoch": 1.2805573147561748, "grad_norm": 3.8056552410125732, "learning_rate": 8.882599392098677e-06, "loss": 0.7588, "step": 5055 }, { "epoch": 1.2808106396453451, "grad_norm": 4.42263126373291, "learning_rate": 8.882071416007664e-06, "loss": 1.0186, "step": 5056 }, { "epoch": 1.2810639645345154, "grad_norm": 3.960272789001465, "learning_rate": 8.881543330909905e-06, "loss": 0.7764, "step": 5057 }, { "epoch": 1.281317289423686, "grad_norm": 4.283044338226318, "learning_rate": 8.881015136820227e-06, "loss": 0.7066, "step": 5058 }, { "epoch": 1.2815706143128562, "grad_norm": 3.494903087615967, "learning_rate": 8.880486833753464e-06, "loss": 0.6419, "step": 5059 }, { "epoch": 1.2818239392020265, "grad_norm": 3.93373441696167, "learning_rate": 8.879958421724448e-06, "loss": 0.6975, "step": 5060 }, { "epoch": 1.282077264091197, "grad_norm": 3.516568422317505, "learning_rate": 8.87942990074802e-06, "loss": 0.6778, "step": 5061 }, { "epoch": 1.2823305889803673, "grad_norm": 3.7583367824554443, "learning_rate": 8.878901270839017e-06, "loss": 0.772, "step": 5062 }, { "epoch": 1.2825839138695376, "grad_norm": 3.836733102798462, "learning_rate": 8.878372532012285e-06, "loss": 0.7258, "step": 5063 }, { "epoch": 1.2828372387587081, "grad_norm": 3.661405086517334, "learning_rate": 8.87784368428267e-06, "loss": 0.7599, "step": 5064 }, { "epoch": 1.2830905636478784, "grad_norm": 3.767148733139038, "learning_rate": 8.877314727665021e-06, "loss": 0.7297, "step": 5065 }, { "epoch": 1.2833438885370487, "grad_norm": 4.096794605255127, "learning_rate": 8.876785662174193e-06, "loss": 0.7756, "step": 5066 }, { "epoch": 1.2835972134262192, "grad_norm": 3.8847033977508545, "learning_rate": 8.876256487825041e-06, "loss": 0.7152, "step": 5067 }, { "epoch": 1.2838505383153895, "grad_norm": 3.8769469261169434, "learning_rate": 8.875727204632422e-06, "loss": 0.8622, "step": 5068 }, { "epoch": 1.2841038632045598, "grad_norm": 3.176262378692627, "learning_rate": 8.8751978126112e-06, "loss": 0.7312, "step": 5069 }, { "epoch": 1.2843571880937303, "grad_norm": 3.7866973876953125, "learning_rate": 8.874668311776239e-06, "loss": 0.8017, "step": 5070 }, { "epoch": 1.2846105129829006, "grad_norm": 3.927074670791626, "learning_rate": 8.87413870214241e-06, "loss": 0.8997, "step": 5071 }, { "epoch": 1.284863837872071, "grad_norm": 3.751995086669922, "learning_rate": 8.87360898372458e-06, "loss": 0.8129, "step": 5072 }, { "epoch": 1.2851171627612412, "grad_norm": 3.669092893600464, "learning_rate": 8.873079156537625e-06, "loss": 0.7774, "step": 5073 }, { "epoch": 1.2853704876504117, "grad_norm": 3.3392302989959717, "learning_rate": 8.872549220596422e-06, "loss": 0.7465, "step": 5074 }, { "epoch": 1.285623812539582, "grad_norm": 3.6528210639953613, "learning_rate": 8.872019175915854e-06, "loss": 0.8081, "step": 5075 }, { "epoch": 1.2858771374287523, "grad_norm": 3.4234163761138916, "learning_rate": 8.871489022510801e-06, "loss": 0.7314, "step": 5076 }, { "epoch": 1.2861304623179226, "grad_norm": 3.5174355506896973, "learning_rate": 8.870958760396151e-06, "loss": 0.7822, "step": 5077 }, { "epoch": 1.2863837872070931, "grad_norm": 3.6532506942749023, "learning_rate": 8.870428389586794e-06, "loss": 0.766, "step": 5078 }, { "epoch": 1.2866371120962634, "grad_norm": 3.323868989944458, "learning_rate": 8.86989791009762e-06, "loss": 0.7974, "step": 5079 }, { "epoch": 1.2868904369854337, "grad_norm": 3.9011785984039307, "learning_rate": 8.869367321943527e-06, "loss": 0.6893, "step": 5080 }, { "epoch": 1.2871437618746042, "grad_norm": 3.767474412918091, "learning_rate": 8.868836625139415e-06, "loss": 0.8577, "step": 5081 }, { "epoch": 1.2873970867637745, "grad_norm": 4.177369594573975, "learning_rate": 8.868305819700181e-06, "loss": 0.7859, "step": 5082 }, { "epoch": 1.2876504116529448, "grad_norm": 3.96870493888855, "learning_rate": 8.867774905640733e-06, "loss": 0.7837, "step": 5083 }, { "epoch": 1.2879037365421153, "grad_norm": 3.806058645248413, "learning_rate": 8.86724388297598e-06, "loss": 0.6973, "step": 5084 }, { "epoch": 1.2881570614312856, "grad_norm": 3.4279656410217285, "learning_rate": 8.866712751720831e-06, "loss": 0.8016, "step": 5085 }, { "epoch": 1.288410386320456, "grad_norm": 3.970402479171753, "learning_rate": 8.8661815118902e-06, "loss": 0.7277, "step": 5086 }, { "epoch": 1.2886637112096264, "grad_norm": 3.720019817352295, "learning_rate": 8.865650163499004e-06, "loss": 0.8066, "step": 5087 }, { "epoch": 1.2889170360987967, "grad_norm": 4.123616695404053, "learning_rate": 8.865118706562164e-06, "loss": 0.8142, "step": 5088 }, { "epoch": 1.289170360987967, "grad_norm": 4.003778457641602, "learning_rate": 8.8645871410946e-06, "loss": 0.8571, "step": 5089 }, { "epoch": 1.2894236858771375, "grad_norm": 3.4247050285339355, "learning_rate": 8.86405546711124e-06, "loss": 0.8019, "step": 5090 }, { "epoch": 1.2896770107663078, "grad_norm": 3.3717615604400635, "learning_rate": 8.863523684627018e-06, "loss": 0.727, "step": 5091 }, { "epoch": 1.289930335655478, "grad_norm": 3.3375675678253174, "learning_rate": 8.862991793656858e-06, "loss": 0.753, "step": 5092 }, { "epoch": 1.2901836605446486, "grad_norm": 3.5768208503723145, "learning_rate": 8.8624597942157e-06, "loss": 0.8196, "step": 5093 }, { "epoch": 1.290436985433819, "grad_norm": 3.681488275527954, "learning_rate": 8.86192768631848e-06, "loss": 0.8417, "step": 5094 }, { "epoch": 1.2906903103229892, "grad_norm": 3.708117961883545, "learning_rate": 8.861395469980142e-06, "loss": 0.8578, "step": 5095 }, { "epoch": 1.2909436352121597, "grad_norm": 3.418780565261841, "learning_rate": 8.860863145215627e-06, "loss": 0.7713, "step": 5096 }, { "epoch": 1.29119696010133, "grad_norm": 3.7200613021850586, "learning_rate": 8.860330712039886e-06, "loss": 0.7198, "step": 5097 }, { "epoch": 1.2914502849905003, "grad_norm": 3.6819093227386475, "learning_rate": 8.859798170467868e-06, "loss": 0.8028, "step": 5098 }, { "epoch": 1.2917036098796708, "grad_norm": 3.46396541595459, "learning_rate": 8.859265520514525e-06, "loss": 0.8006, "step": 5099 }, { "epoch": 1.291956934768841, "grad_norm": 3.258542537689209, "learning_rate": 8.858732762194816e-06, "loss": 0.6876, "step": 5100 }, { "epoch": 1.2922102596580114, "grad_norm": 3.6014840602874756, "learning_rate": 8.8581998955237e-06, "loss": 0.6743, "step": 5101 }, { "epoch": 1.2924635845471817, "grad_norm": 4.297863006591797, "learning_rate": 8.857666920516139e-06, "loss": 0.8732, "step": 5102 }, { "epoch": 1.2927169094363522, "grad_norm": 3.884458065032959, "learning_rate": 8.8571338371871e-06, "loss": 0.9417, "step": 5103 }, { "epoch": 1.2929702343255225, "grad_norm": 3.376234769821167, "learning_rate": 8.856600645551549e-06, "loss": 0.6684, "step": 5104 }, { "epoch": 1.2932235592146928, "grad_norm": 3.6533520221710205, "learning_rate": 8.85606734562446e-06, "loss": 0.7386, "step": 5105 }, { "epoch": 1.293476884103863, "grad_norm": 3.8629558086395264, "learning_rate": 8.855533937420806e-06, "loss": 0.7538, "step": 5106 }, { "epoch": 1.2937302089930336, "grad_norm": 3.934384346008301, "learning_rate": 8.855000420955567e-06, "loss": 0.8641, "step": 5107 }, { "epoch": 1.2939835338822039, "grad_norm": 3.7146103382110596, "learning_rate": 8.854466796243722e-06, "loss": 0.8123, "step": 5108 }, { "epoch": 1.2942368587713742, "grad_norm": 3.9950599670410156, "learning_rate": 8.853933063300258e-06, "loss": 0.6525, "step": 5109 }, { "epoch": 1.2944901836605447, "grad_norm": 3.913001537322998, "learning_rate": 8.853399222140158e-06, "loss": 0.8264, "step": 5110 }, { "epoch": 1.294743508549715, "grad_norm": 3.609727382659912, "learning_rate": 8.852865272778413e-06, "loss": 0.7613, "step": 5111 }, { "epoch": 1.2949968334388853, "grad_norm": 3.5861222743988037, "learning_rate": 8.85233121523002e-06, "loss": 0.9255, "step": 5112 }, { "epoch": 1.2952501583280558, "grad_norm": 4.302942276000977, "learning_rate": 8.85179704950997e-06, "loss": 0.8833, "step": 5113 }, { "epoch": 1.295503483217226, "grad_norm": 3.647034168243408, "learning_rate": 8.851262775633263e-06, "loss": 0.7407, "step": 5114 }, { "epoch": 1.2957568081063964, "grad_norm": 3.4388327598571777, "learning_rate": 8.850728393614903e-06, "loss": 0.8952, "step": 5115 }, { "epoch": 1.2960101329955669, "grad_norm": 3.5317115783691406, "learning_rate": 8.850193903469895e-06, "loss": 0.8687, "step": 5116 }, { "epoch": 1.2962634578847372, "grad_norm": 3.7548828125, "learning_rate": 8.849659305213248e-06, "loss": 0.7272, "step": 5117 }, { "epoch": 1.2965167827739075, "grad_norm": 3.6353676319122314, "learning_rate": 8.84912459885997e-06, "loss": 0.7473, "step": 5118 }, { "epoch": 1.296770107663078, "grad_norm": 3.826050043106079, "learning_rate": 8.848589784425076e-06, "loss": 0.8157, "step": 5119 }, { "epoch": 1.2970234325522483, "grad_norm": 4.017717361450195, "learning_rate": 8.848054861923587e-06, "loss": 0.9376, "step": 5120 }, { "epoch": 1.2972767574414186, "grad_norm": 3.9414191246032715, "learning_rate": 8.847519831370522e-06, "loss": 0.7703, "step": 5121 }, { "epoch": 1.297530082330589, "grad_norm": 3.7812705039978027, "learning_rate": 8.8469846927809e-06, "loss": 0.7178, "step": 5122 }, { "epoch": 1.2977834072197594, "grad_norm": 3.833151340484619, "learning_rate": 8.846449446169754e-06, "loss": 0.7842, "step": 5123 }, { "epoch": 1.2980367321089297, "grad_norm": 4.485802173614502, "learning_rate": 8.845914091552108e-06, "loss": 0.9194, "step": 5124 }, { "epoch": 1.2982900569981002, "grad_norm": 3.8566842079162598, "learning_rate": 8.845378628942998e-06, "loss": 0.7548, "step": 5125 }, { "epoch": 1.2985433818872705, "grad_norm": 3.787710666656494, "learning_rate": 8.844843058357458e-06, "loss": 0.7981, "step": 5126 }, { "epoch": 1.2987967067764408, "grad_norm": 3.9272103309631348, "learning_rate": 8.844307379810526e-06, "loss": 0.7384, "step": 5127 }, { "epoch": 1.2990500316656113, "grad_norm": 4.102965831756592, "learning_rate": 8.843771593317246e-06, "loss": 0.7695, "step": 5128 }, { "epoch": 1.2993033565547816, "grad_norm": 4.810306072235107, "learning_rate": 8.843235698892661e-06, "loss": 0.8306, "step": 5129 }, { "epoch": 1.2995566814439519, "grad_norm": 3.666740655899048, "learning_rate": 8.842699696551818e-06, "loss": 0.7999, "step": 5130 }, { "epoch": 1.2998100063331222, "grad_norm": 3.9185757637023926, "learning_rate": 8.84216358630977e-06, "loss": 0.7595, "step": 5131 }, { "epoch": 1.3000633312222927, "grad_norm": 3.7936949729919434, "learning_rate": 8.841627368181569e-06, "loss": 0.8162, "step": 5132 }, { "epoch": 1.300316656111463, "grad_norm": 3.8769707679748535, "learning_rate": 8.84109104218227e-06, "loss": 0.7763, "step": 5133 }, { "epoch": 1.3005699810006333, "grad_norm": 3.700146436691284, "learning_rate": 8.840554608326935e-06, "loss": 0.7458, "step": 5134 }, { "epoch": 1.3008233058898035, "grad_norm": 3.6501848697662354, "learning_rate": 8.84001806663063e-06, "loss": 0.7223, "step": 5135 }, { "epoch": 1.301076630778974, "grad_norm": 3.628479242324829, "learning_rate": 8.839481417108414e-06, "loss": 0.8136, "step": 5136 }, { "epoch": 1.3013299556681444, "grad_norm": 3.7592058181762695, "learning_rate": 8.838944659775358e-06, "loss": 0.7518, "step": 5137 }, { "epoch": 1.3015832805573146, "grad_norm": 3.9640681743621826, "learning_rate": 8.83840779464654e-06, "loss": 0.8824, "step": 5138 }, { "epoch": 1.3018366054464852, "grad_norm": 3.3016247749328613, "learning_rate": 8.837870821737027e-06, "loss": 0.7037, "step": 5139 }, { "epoch": 1.3020899303356555, "grad_norm": 3.74817156791687, "learning_rate": 8.8373337410619e-06, "loss": 0.7651, "step": 5140 }, { "epoch": 1.3023432552248257, "grad_norm": 3.2207884788513184, "learning_rate": 8.83679655263624e-06, "loss": 0.6062, "step": 5141 }, { "epoch": 1.3025965801139963, "grad_norm": 3.3142166137695312, "learning_rate": 8.836259256475132e-06, "loss": 0.6988, "step": 5142 }, { "epoch": 1.3028499050031666, "grad_norm": 4.2149434089660645, "learning_rate": 8.835721852593661e-06, "loss": 0.8357, "step": 5143 }, { "epoch": 1.3031032298923368, "grad_norm": 4.146454334259033, "learning_rate": 8.835184341006919e-06, "loss": 0.7967, "step": 5144 }, { "epoch": 1.3033565547815074, "grad_norm": 4.046872138977051, "learning_rate": 8.834646721729999e-06, "loss": 0.817, "step": 5145 }, { "epoch": 1.3036098796706777, "grad_norm": 3.705402374267578, "learning_rate": 8.834108994777995e-06, "loss": 0.7499, "step": 5146 }, { "epoch": 1.303863204559848, "grad_norm": 3.4352595806121826, "learning_rate": 8.833571160166007e-06, "loss": 0.7402, "step": 5147 }, { "epoch": 1.3041165294490185, "grad_norm": 3.3411202430725098, "learning_rate": 8.83303321790914e-06, "loss": 0.8052, "step": 5148 }, { "epoch": 1.3043698543381888, "grad_norm": 3.522878408432007, "learning_rate": 8.832495168022494e-06, "loss": 0.6031, "step": 5149 }, { "epoch": 1.304623179227359, "grad_norm": 4.035628795623779, "learning_rate": 8.83195701052118e-06, "loss": 0.7687, "step": 5150 }, { "epoch": 1.3048765041165296, "grad_norm": 3.939291477203369, "learning_rate": 8.83141874542031e-06, "loss": 0.8159, "step": 5151 }, { "epoch": 1.3051298290056998, "grad_norm": 3.952495574951172, "learning_rate": 8.830880372734998e-06, "loss": 0.7899, "step": 5152 }, { "epoch": 1.3053831538948701, "grad_norm": 3.5980591773986816, "learning_rate": 8.830341892480359e-06, "loss": 0.8068, "step": 5153 }, { "epoch": 1.3056364787840407, "grad_norm": 3.4994215965270996, "learning_rate": 8.829803304671515e-06, "loss": 0.6223, "step": 5154 }, { "epoch": 1.305889803673211, "grad_norm": 4.123564720153809, "learning_rate": 8.82926460932359e-06, "loss": 0.8317, "step": 5155 }, { "epoch": 1.3061431285623812, "grad_norm": 4.1981987953186035, "learning_rate": 8.82872580645171e-06, "loss": 0.779, "step": 5156 }, { "epoch": 1.3063964534515518, "grad_norm": 3.631875514984131, "learning_rate": 8.828186896071003e-06, "loss": 0.785, "step": 5157 }, { "epoch": 1.306649778340722, "grad_norm": 3.7511484622955322, "learning_rate": 8.8276478781966e-06, "loss": 0.6887, "step": 5158 }, { "epoch": 1.3069031032298923, "grad_norm": 4.104414939880371, "learning_rate": 8.827108752843642e-06, "loss": 0.8932, "step": 5159 }, { "epoch": 1.3071564281190626, "grad_norm": 3.691878318786621, "learning_rate": 8.826569520027262e-06, "loss": 0.7989, "step": 5160 }, { "epoch": 1.307409753008233, "grad_norm": 3.4031450748443604, "learning_rate": 8.826030179762604e-06, "loss": 0.6762, "step": 5161 }, { "epoch": 1.3076630778974034, "grad_norm": 4.122935771942139, "learning_rate": 8.825490732064813e-06, "loss": 0.7819, "step": 5162 }, { "epoch": 1.3079164027865737, "grad_norm": 3.300018787384033, "learning_rate": 8.824951176949032e-06, "loss": 0.6875, "step": 5163 }, { "epoch": 1.308169727675744, "grad_norm": 3.7323172092437744, "learning_rate": 8.824411514430417e-06, "loss": 0.8033, "step": 5164 }, { "epoch": 1.3084230525649145, "grad_norm": 3.7336745262145996, "learning_rate": 8.823871744524118e-06, "loss": 0.7597, "step": 5165 }, { "epoch": 1.3086763774540848, "grad_norm": 3.9146523475646973, "learning_rate": 8.823331867245293e-06, "loss": 0.7806, "step": 5166 }, { "epoch": 1.3089297023432551, "grad_norm": 3.8385565280914307, "learning_rate": 8.8227918826091e-06, "loss": 0.7252, "step": 5167 }, { "epoch": 1.3091830272324256, "grad_norm": 4.111937999725342, "learning_rate": 8.822251790630705e-06, "loss": 0.9118, "step": 5168 }, { "epoch": 1.309436352121596, "grad_norm": 3.8348045349121094, "learning_rate": 8.82171159132527e-06, "loss": 0.7847, "step": 5169 }, { "epoch": 1.3096896770107662, "grad_norm": 4.485629558563232, "learning_rate": 8.821171284707964e-06, "loss": 0.829, "step": 5170 }, { "epoch": 1.3099430018999367, "grad_norm": 3.9793319702148438, "learning_rate": 8.82063087079396e-06, "loss": 0.8615, "step": 5171 }, { "epoch": 1.310196326789107, "grad_norm": 3.95802903175354, "learning_rate": 8.82009034959843e-06, "loss": 0.7396, "step": 5172 }, { "epoch": 1.3104496516782773, "grad_norm": 3.820927143096924, "learning_rate": 8.819549721136555e-06, "loss": 0.6987, "step": 5173 }, { "epoch": 1.3107029765674478, "grad_norm": 3.222822427749634, "learning_rate": 8.819008985423514e-06, "loss": 0.6992, "step": 5174 }, { "epoch": 1.3109563014566181, "grad_norm": 3.582071542739868, "learning_rate": 8.818468142474492e-06, "loss": 0.8175, "step": 5175 }, { "epoch": 1.3112096263457884, "grad_norm": 3.913431167602539, "learning_rate": 8.817927192304671e-06, "loss": 0.8305, "step": 5176 }, { "epoch": 1.311462951234959, "grad_norm": 3.6144256591796875, "learning_rate": 8.817386134929246e-06, "loss": 0.7904, "step": 5177 }, { "epoch": 1.3117162761241292, "grad_norm": 4.031813144683838, "learning_rate": 8.816844970363408e-06, "loss": 0.8826, "step": 5178 }, { "epoch": 1.3119696010132995, "grad_norm": 3.829005002975464, "learning_rate": 8.81630369862235e-06, "loss": 0.763, "step": 5179 }, { "epoch": 1.31222292590247, "grad_norm": 3.531787633895874, "learning_rate": 8.815762319721274e-06, "loss": 0.8515, "step": 5180 }, { "epoch": 1.3124762507916403, "grad_norm": 3.567216634750366, "learning_rate": 8.81522083367538e-06, "loss": 0.7158, "step": 5181 }, { "epoch": 1.3127295756808106, "grad_norm": 3.6887264251708984, "learning_rate": 8.814679240499872e-06, "loss": 0.7349, "step": 5182 }, { "epoch": 1.3129829005699811, "grad_norm": 3.498737335205078, "learning_rate": 8.814137540209962e-06, "loss": 0.7118, "step": 5183 }, { "epoch": 1.3132362254591514, "grad_norm": 3.7857372760772705, "learning_rate": 8.813595732820854e-06, "loss": 0.8331, "step": 5184 }, { "epoch": 1.3134895503483217, "grad_norm": 3.5319156646728516, "learning_rate": 8.813053818347768e-06, "loss": 0.735, "step": 5185 }, { "epoch": 1.313742875237492, "grad_norm": 3.691115617752075, "learning_rate": 8.812511796805915e-06, "loss": 0.7919, "step": 5186 }, { "epoch": 1.3139962001266625, "grad_norm": 4.09224271774292, "learning_rate": 8.811969668210521e-06, "loss": 0.8109, "step": 5187 }, { "epoch": 1.3142495250158328, "grad_norm": 3.2587223052978516, "learning_rate": 8.811427432576803e-06, "loss": 0.6939, "step": 5188 }, { "epoch": 1.314502849905003, "grad_norm": 3.6646640300750732, "learning_rate": 8.810885089919989e-06, "loss": 0.8137, "step": 5189 }, { "epoch": 1.3147561747941734, "grad_norm": 3.475882053375244, "learning_rate": 8.810342640255307e-06, "loss": 0.7515, "step": 5190 }, { "epoch": 1.315009499683344, "grad_norm": 3.985877752304077, "learning_rate": 8.809800083597991e-06, "loss": 0.8788, "step": 5191 }, { "epoch": 1.3152628245725142, "grad_norm": 3.800947666168213, "learning_rate": 8.809257419963275e-06, "loss": 0.8266, "step": 5192 }, { "epoch": 1.3155161494616845, "grad_norm": 3.7874348163604736, "learning_rate": 8.808714649366395e-06, "loss": 0.8686, "step": 5193 }, { "epoch": 1.315769474350855, "grad_norm": 3.6544885635375977, "learning_rate": 8.808171771822592e-06, "loss": 0.6952, "step": 5194 }, { "epoch": 1.3160227992400253, "grad_norm": 3.4128148555755615, "learning_rate": 8.807628787347111e-06, "loss": 0.7234, "step": 5195 }, { "epoch": 1.3162761241291956, "grad_norm": 3.8209598064422607, "learning_rate": 8.807085695955197e-06, "loss": 0.8171, "step": 5196 }, { "epoch": 1.316529449018366, "grad_norm": 4.04181432723999, "learning_rate": 8.806542497662103e-06, "loss": 0.7815, "step": 5197 }, { "epoch": 1.3167827739075364, "grad_norm": 3.8541886806488037, "learning_rate": 8.80599919248308e-06, "loss": 0.8588, "step": 5198 }, { "epoch": 1.3170360987967067, "grad_norm": 3.6714396476745605, "learning_rate": 8.805455780433382e-06, "loss": 0.8267, "step": 5199 }, { "epoch": 1.3172894236858772, "grad_norm": 4.040272235870361, "learning_rate": 8.804912261528269e-06, "loss": 0.7836, "step": 5200 }, { "epoch": 1.3175427485750475, "grad_norm": 3.6184239387512207, "learning_rate": 8.804368635783002e-06, "loss": 0.7391, "step": 5201 }, { "epoch": 1.3177960734642178, "grad_norm": 3.2228989601135254, "learning_rate": 8.80382490321285e-06, "loss": 0.7524, "step": 5202 }, { "epoch": 1.3180493983533883, "grad_norm": 3.5631792545318604, "learning_rate": 8.803281063833076e-06, "loss": 0.708, "step": 5203 }, { "epoch": 1.3183027232425586, "grad_norm": 3.6151304244995117, "learning_rate": 8.802737117658952e-06, "loss": 0.8097, "step": 5204 }, { "epoch": 1.3185560481317289, "grad_norm": 3.557908535003662, "learning_rate": 8.802193064705753e-06, "loss": 0.9571, "step": 5205 }, { "epoch": 1.3188093730208994, "grad_norm": 3.8364086151123047, "learning_rate": 8.801648904988753e-06, "loss": 0.6698, "step": 5206 }, { "epoch": 1.3190626979100697, "grad_norm": 3.4950175285339355, "learning_rate": 8.801104638523233e-06, "loss": 0.7809, "step": 5207 }, { "epoch": 1.31931602279924, "grad_norm": 3.430807113647461, "learning_rate": 8.800560265324477e-06, "loss": 0.7195, "step": 5208 }, { "epoch": 1.3195693476884105, "grad_norm": 4.011314868927002, "learning_rate": 8.800015785407771e-06, "loss": 0.8731, "step": 5209 }, { "epoch": 1.3198226725775808, "grad_norm": 3.559040069580078, "learning_rate": 8.799471198788402e-06, "loss": 0.6956, "step": 5210 }, { "epoch": 1.320075997466751, "grad_norm": 3.3630099296569824, "learning_rate": 8.798926505481664e-06, "loss": 0.6953, "step": 5211 }, { "epoch": 1.3203293223559216, "grad_norm": 3.2390987873077393, "learning_rate": 8.798381705502847e-06, "loss": 0.7014, "step": 5212 }, { "epoch": 1.320582647245092, "grad_norm": 3.863812208175659, "learning_rate": 8.797836798867255e-06, "loss": 0.7841, "step": 5213 }, { "epoch": 1.3208359721342622, "grad_norm": 3.636995792388916, "learning_rate": 8.797291785590184e-06, "loss": 0.7871, "step": 5214 }, { "epoch": 1.3210892970234325, "grad_norm": 3.817232608795166, "learning_rate": 8.79674666568694e-06, "loss": 0.8858, "step": 5215 }, { "epoch": 1.321342621912603, "grad_norm": 3.6206257343292236, "learning_rate": 8.796201439172829e-06, "loss": 0.7299, "step": 5216 }, { "epoch": 1.3215959468017733, "grad_norm": 3.469076156616211, "learning_rate": 8.795656106063161e-06, "loss": 0.7902, "step": 5217 }, { "epoch": 1.3218492716909436, "grad_norm": 3.588515281677246, "learning_rate": 8.795110666373248e-06, "loss": 0.7992, "step": 5218 }, { "epoch": 1.3221025965801139, "grad_norm": 3.2227203845977783, "learning_rate": 8.794565120118406e-06, "loss": 0.6846, "step": 5219 }, { "epoch": 1.3223559214692844, "grad_norm": 3.3197062015533447, "learning_rate": 8.794019467313955e-06, "loss": 0.7482, "step": 5220 }, { "epoch": 1.3226092463584547, "grad_norm": 3.9905765056610107, "learning_rate": 8.793473707975214e-06, "loss": 0.8468, "step": 5221 }, { "epoch": 1.322862571247625, "grad_norm": 3.633476734161377, "learning_rate": 8.792927842117509e-06, "loss": 0.6167, "step": 5222 }, { "epoch": 1.3231158961367955, "grad_norm": 3.5059194564819336, "learning_rate": 8.792381869756168e-06, "loss": 0.8105, "step": 5223 }, { "epoch": 1.3233692210259658, "grad_norm": 3.969255208969116, "learning_rate": 8.791835790906525e-06, "loss": 0.92, "step": 5224 }, { "epoch": 1.323622545915136, "grad_norm": 4.204358100891113, "learning_rate": 8.791289605583907e-06, "loss": 0.8562, "step": 5225 }, { "epoch": 1.3238758708043066, "grad_norm": 4.095635414123535, "learning_rate": 8.790743313803653e-06, "loss": 0.9031, "step": 5226 }, { "epoch": 1.3241291956934769, "grad_norm": 3.7705609798431396, "learning_rate": 8.790196915581104e-06, "loss": 0.8469, "step": 5227 }, { "epoch": 1.3243825205826472, "grad_norm": 3.862409830093384, "learning_rate": 8.789650410931603e-06, "loss": 0.7578, "step": 5228 }, { "epoch": 1.3246358454718177, "grad_norm": 3.427957057952881, "learning_rate": 8.789103799870493e-06, "loss": 0.6602, "step": 5229 }, { "epoch": 1.324889170360988, "grad_norm": 4.275664806365967, "learning_rate": 8.788557082413128e-06, "loss": 0.8078, "step": 5230 }, { "epoch": 1.3251424952501583, "grad_norm": 3.465346097946167, "learning_rate": 8.788010258574851e-06, "loss": 0.6734, "step": 5231 }, { "epoch": 1.3253958201393288, "grad_norm": 3.960186719894409, "learning_rate": 8.787463328371023e-06, "loss": 0.6724, "step": 5232 }, { "epoch": 1.325649145028499, "grad_norm": 3.8265140056610107, "learning_rate": 8.786916291817e-06, "loss": 0.8207, "step": 5233 }, { "epoch": 1.3259024699176694, "grad_norm": 3.872192859649658, "learning_rate": 8.786369148928143e-06, "loss": 0.7123, "step": 5234 }, { "epoch": 1.3261557948068399, "grad_norm": 4.1361188888549805, "learning_rate": 8.785821899719815e-06, "loss": 0.866, "step": 5235 }, { "epoch": 1.3264091196960102, "grad_norm": 3.589270830154419, "learning_rate": 8.785274544207382e-06, "loss": 0.8116, "step": 5236 }, { "epoch": 1.3266624445851805, "grad_norm": 3.4907267093658447, "learning_rate": 8.784727082406214e-06, "loss": 0.7012, "step": 5237 }, { "epoch": 1.326915769474351, "grad_norm": 3.7468090057373047, "learning_rate": 8.784179514331683e-06, "loss": 0.883, "step": 5238 }, { "epoch": 1.3271690943635213, "grad_norm": 3.560201644897461, "learning_rate": 8.783631839999163e-06, "loss": 0.7044, "step": 5239 }, { "epoch": 1.3274224192526916, "grad_norm": 3.9083845615386963, "learning_rate": 8.783084059424037e-06, "loss": 0.7542, "step": 5240 }, { "epoch": 1.327675744141862, "grad_norm": 3.9807448387145996, "learning_rate": 8.782536172621682e-06, "loss": 0.8834, "step": 5241 }, { "epoch": 1.3279290690310324, "grad_norm": 3.6231117248535156, "learning_rate": 8.781988179607486e-06, "loss": 0.8607, "step": 5242 }, { "epoch": 1.3281823939202027, "grad_norm": 4.24287748336792, "learning_rate": 8.781440080396833e-06, "loss": 0.9153, "step": 5243 }, { "epoch": 1.328435718809373, "grad_norm": 3.889406442642212, "learning_rate": 8.780891875005116e-06, "loss": 0.7577, "step": 5244 }, { "epoch": 1.3286890436985435, "grad_norm": 3.5610406398773193, "learning_rate": 8.780343563447725e-06, "loss": 0.7354, "step": 5245 }, { "epoch": 1.3289423685877138, "grad_norm": 3.858607769012451, "learning_rate": 8.77979514574006e-06, "loss": 0.8152, "step": 5246 }, { "epoch": 1.329195693476884, "grad_norm": 3.9125075340270996, "learning_rate": 8.779246621897519e-06, "loss": 0.7909, "step": 5247 }, { "epoch": 1.3294490183660543, "grad_norm": 3.6272919178009033, "learning_rate": 8.778697991935503e-06, "loss": 0.6938, "step": 5248 }, { "epoch": 1.3297023432552249, "grad_norm": 3.786247491836548, "learning_rate": 8.77814925586942e-06, "loss": 0.7825, "step": 5249 }, { "epoch": 1.3299556681443951, "grad_norm": 4.02630615234375, "learning_rate": 8.777600413714675e-06, "loss": 0.765, "step": 5250 }, { "epoch": 1.3302089930335654, "grad_norm": 3.590681791305542, "learning_rate": 8.77705146548668e-06, "loss": 0.7976, "step": 5251 }, { "epoch": 1.330462317922736, "grad_norm": 3.867377996444702, "learning_rate": 8.776502411200852e-06, "loss": 0.7073, "step": 5252 }, { "epoch": 1.3307156428119062, "grad_norm": 3.9471781253814697, "learning_rate": 8.775953250872606e-06, "loss": 0.7559, "step": 5253 }, { "epoch": 1.3309689677010765, "grad_norm": 3.415630340576172, "learning_rate": 8.775403984517363e-06, "loss": 0.7754, "step": 5254 }, { "epoch": 1.331222292590247, "grad_norm": 3.7324411869049072, "learning_rate": 8.774854612150544e-06, "loss": 0.8234, "step": 5255 }, { "epoch": 1.3314756174794173, "grad_norm": 3.692962408065796, "learning_rate": 8.774305133787577e-06, "loss": 0.7491, "step": 5256 }, { "epoch": 1.3317289423685876, "grad_norm": 4.279314041137695, "learning_rate": 8.773755549443892e-06, "loss": 0.928, "step": 5257 }, { "epoch": 1.3319822672577581, "grad_norm": 3.6146011352539062, "learning_rate": 8.773205859134918e-06, "loss": 0.7835, "step": 5258 }, { "epoch": 1.3322355921469284, "grad_norm": 3.7248826026916504, "learning_rate": 8.772656062876094e-06, "loss": 0.8181, "step": 5259 }, { "epoch": 1.3324889170360987, "grad_norm": 4.029184818267822, "learning_rate": 8.772106160682853e-06, "loss": 0.8218, "step": 5260 }, { "epoch": 1.3327422419252692, "grad_norm": 3.4886903762817383, "learning_rate": 8.771556152570643e-06, "loss": 0.8564, "step": 5261 }, { "epoch": 1.3329955668144395, "grad_norm": 4.029396057128906, "learning_rate": 8.771006038554901e-06, "loss": 0.716, "step": 5262 }, { "epoch": 1.3332488917036098, "grad_norm": 3.870342254638672, "learning_rate": 8.770455818651078e-06, "loss": 0.8832, "step": 5263 }, { "epoch": 1.3335022165927803, "grad_norm": 3.8330764770507812, "learning_rate": 8.769905492874622e-06, "loss": 0.6664, "step": 5264 }, { "epoch": 1.3337555414819506, "grad_norm": 3.253298044204712, "learning_rate": 8.769355061240987e-06, "loss": 0.7199, "step": 5265 }, { "epoch": 1.334008866371121, "grad_norm": 4.0499467849731445, "learning_rate": 8.768804523765629e-06, "loss": 0.8866, "step": 5266 }, { "epoch": 1.3342621912602914, "grad_norm": 3.5482170581817627, "learning_rate": 8.768253880464005e-06, "loss": 0.6303, "step": 5267 }, { "epoch": 1.3345155161494617, "grad_norm": 4.1437249183654785, "learning_rate": 8.76770313135158e-06, "loss": 0.7959, "step": 5268 }, { "epoch": 1.334768841038632, "grad_norm": 3.7345595359802246, "learning_rate": 8.767152276443814e-06, "loss": 0.7115, "step": 5269 }, { "epoch": 1.3350221659278025, "grad_norm": 3.7965118885040283, "learning_rate": 8.766601315756181e-06, "loss": 0.7337, "step": 5270 }, { "epoch": 1.3352754908169728, "grad_norm": 3.638084888458252, "learning_rate": 8.766050249304145e-06, "loss": 0.8228, "step": 5271 }, { "epoch": 1.3355288157061431, "grad_norm": 3.803407669067383, "learning_rate": 8.765499077103185e-06, "loss": 0.8366, "step": 5272 }, { "epoch": 1.3357821405953134, "grad_norm": 4.4520134925842285, "learning_rate": 8.764947799168777e-06, "loss": 0.824, "step": 5273 }, { "epoch": 1.336035465484484, "grad_norm": 3.487675189971924, "learning_rate": 8.764396415516396e-06, "loss": 0.6718, "step": 5274 }, { "epoch": 1.3362887903736542, "grad_norm": 3.448404312133789, "learning_rate": 8.763844926161531e-06, "loss": 0.721, "step": 5275 }, { "epoch": 1.3365421152628245, "grad_norm": 3.6722261905670166, "learning_rate": 8.763293331119664e-06, "loss": 0.854, "step": 5276 }, { "epoch": 1.3367954401519948, "grad_norm": 3.83449125289917, "learning_rate": 8.762741630406282e-06, "loss": 0.7718, "step": 5277 }, { "epoch": 1.3370487650411653, "grad_norm": 4.610817909240723, "learning_rate": 8.76218982403688e-06, "loss": 0.9348, "step": 5278 }, { "epoch": 1.3373020899303356, "grad_norm": 3.6970901489257812, "learning_rate": 8.761637912026951e-06, "loss": 0.7857, "step": 5279 }, { "epoch": 1.337555414819506, "grad_norm": 3.252974033355713, "learning_rate": 8.761085894391991e-06, "loss": 0.6503, "step": 5280 }, { "epoch": 1.3378087397086764, "grad_norm": 3.4152369499206543, "learning_rate": 8.760533771147505e-06, "loss": 0.6993, "step": 5281 }, { "epoch": 1.3380620645978467, "grad_norm": 3.610792636871338, "learning_rate": 8.759981542308991e-06, "loss": 0.7445, "step": 5282 }, { "epoch": 1.338315389487017, "grad_norm": 3.5054173469543457, "learning_rate": 8.759429207891959e-06, "loss": 0.7702, "step": 5283 }, { "epoch": 1.3385687143761875, "grad_norm": 3.4513771533966064, "learning_rate": 8.758876767911914e-06, "loss": 0.6192, "step": 5284 }, { "epoch": 1.3388220392653578, "grad_norm": 4.195075035095215, "learning_rate": 8.758324222384373e-06, "loss": 0.8552, "step": 5285 }, { "epoch": 1.339075364154528, "grad_norm": 3.7252817153930664, "learning_rate": 8.75777157132485e-06, "loss": 0.743, "step": 5286 }, { "epoch": 1.3393286890436986, "grad_norm": 3.722015142440796, "learning_rate": 8.75721881474886e-06, "loss": 0.8452, "step": 5287 }, { "epoch": 1.339582013932869, "grad_norm": 3.226900577545166, "learning_rate": 8.756665952671928e-06, "loss": 0.7867, "step": 5288 }, { "epoch": 1.3398353388220392, "grad_norm": 3.866802930831909, "learning_rate": 8.756112985109577e-06, "loss": 0.7965, "step": 5289 }, { "epoch": 1.3400886637112097, "grad_norm": 3.816505193710327, "learning_rate": 8.755559912077333e-06, "loss": 0.7942, "step": 5290 }, { "epoch": 1.34034198860038, "grad_norm": 3.6338398456573486, "learning_rate": 8.755006733590729e-06, "loss": 0.803, "step": 5291 }, { "epoch": 1.3405953134895503, "grad_norm": 3.7093474864959717, "learning_rate": 8.754453449665294e-06, "loss": 0.8079, "step": 5292 }, { "epoch": 1.3408486383787208, "grad_norm": 3.889493227005005, "learning_rate": 8.753900060316565e-06, "loss": 0.9149, "step": 5293 }, { "epoch": 1.341101963267891, "grad_norm": 3.9428391456604004, "learning_rate": 8.753346565560084e-06, "loss": 0.7434, "step": 5294 }, { "epoch": 1.3413552881570614, "grad_norm": 3.623548984527588, "learning_rate": 8.752792965411389e-06, "loss": 0.702, "step": 5295 }, { "epoch": 1.341608613046232, "grad_norm": 3.752361536026001, "learning_rate": 8.752239259886026e-06, "loss": 0.7456, "step": 5296 }, { "epoch": 1.3418619379354022, "grad_norm": 3.6794497966766357, "learning_rate": 8.751685448999545e-06, "loss": 0.6938, "step": 5297 }, { "epoch": 1.3421152628245725, "grad_norm": 3.465989351272583, "learning_rate": 8.751131532767491e-06, "loss": 0.7443, "step": 5298 }, { "epoch": 1.342368587713743, "grad_norm": 3.6318061351776123, "learning_rate": 8.750577511205425e-06, "loss": 0.7404, "step": 5299 }, { "epoch": 1.3426219126029133, "grad_norm": 3.705566167831421, "learning_rate": 8.750023384328901e-06, "loss": 0.7912, "step": 5300 }, { "epoch": 1.3428752374920836, "grad_norm": 3.9725582599639893, "learning_rate": 8.749469152153475e-06, "loss": 0.8353, "step": 5301 }, { "epoch": 1.3431285623812539, "grad_norm": 3.7491159439086914, "learning_rate": 8.748914814694715e-06, "loss": 0.7185, "step": 5302 }, { "epoch": 1.3433818872704244, "grad_norm": 3.601637125015259, "learning_rate": 8.748360371968183e-06, "loss": 0.774, "step": 5303 }, { "epoch": 1.3436352121595947, "grad_norm": 3.9110145568847656, "learning_rate": 8.747805823989448e-06, "loss": 0.8749, "step": 5304 }, { "epoch": 1.343888537048765, "grad_norm": 3.7703335285186768, "learning_rate": 8.747251170774083e-06, "loss": 0.8924, "step": 5305 }, { "epoch": 1.3441418619379353, "grad_norm": 3.8879683017730713, "learning_rate": 8.746696412337661e-06, "loss": 0.9428, "step": 5306 }, { "epoch": 1.3443951868271058, "grad_norm": 3.670351505279541, "learning_rate": 8.74614154869576e-06, "loss": 0.7602, "step": 5307 }, { "epoch": 1.344648511716276, "grad_norm": 3.466641426086426, "learning_rate": 8.745586579863958e-06, "loss": 0.7725, "step": 5308 }, { "epoch": 1.3449018366054464, "grad_norm": 3.7397964000701904, "learning_rate": 8.745031505857842e-06, "loss": 0.7807, "step": 5309 }, { "epoch": 1.345155161494617, "grad_norm": 3.730457067489624, "learning_rate": 8.744476326692998e-06, "loss": 0.7872, "step": 5310 }, { "epoch": 1.3454084863837872, "grad_norm": 3.326808452606201, "learning_rate": 8.743921042385012e-06, "loss": 0.7866, "step": 5311 }, { "epoch": 1.3456618112729575, "grad_norm": 3.538274049758911, "learning_rate": 8.743365652949479e-06, "loss": 0.7308, "step": 5312 }, { "epoch": 1.345915136162128, "grad_norm": 4.167849063873291, "learning_rate": 8.742810158401991e-06, "loss": 0.9237, "step": 5313 }, { "epoch": 1.3461684610512983, "grad_norm": 3.679926872253418, "learning_rate": 8.742254558758147e-06, "loss": 0.7973, "step": 5314 }, { "epoch": 1.3464217859404686, "grad_norm": 3.9718079566955566, "learning_rate": 8.741698854033552e-06, "loss": 0.732, "step": 5315 }, { "epoch": 1.346675110829639, "grad_norm": 3.6564199924468994, "learning_rate": 8.741143044243805e-06, "loss": 0.7546, "step": 5316 }, { "epoch": 1.3469284357188094, "grad_norm": 3.748065710067749, "learning_rate": 8.740587129404517e-06, "loss": 0.751, "step": 5317 }, { "epoch": 1.3471817606079797, "grad_norm": 4.218088150024414, "learning_rate": 8.740031109531293e-06, "loss": 0.8092, "step": 5318 }, { "epoch": 1.3474350854971502, "grad_norm": 3.18046236038208, "learning_rate": 8.739474984639749e-06, "loss": 0.671, "step": 5319 }, { "epoch": 1.3476884103863205, "grad_norm": 4.337594032287598, "learning_rate": 8.7389187547455e-06, "loss": 0.9311, "step": 5320 }, { "epoch": 1.3479417352754908, "grad_norm": 3.796656370162964, "learning_rate": 8.738362419864164e-06, "loss": 0.8098, "step": 5321 }, { "epoch": 1.3481950601646613, "grad_norm": 3.755025863647461, "learning_rate": 8.737805980011365e-06, "loss": 0.7505, "step": 5322 }, { "epoch": 1.3484483850538316, "grad_norm": 3.7076237201690674, "learning_rate": 8.737249435202725e-06, "loss": 0.8071, "step": 5323 }, { "epoch": 1.3487017099430019, "grad_norm": 3.968423843383789, "learning_rate": 8.736692785453873e-06, "loss": 0.9095, "step": 5324 }, { "epoch": 1.3489550348321724, "grad_norm": 3.9761288166046143, "learning_rate": 8.736136030780438e-06, "loss": 0.7993, "step": 5325 }, { "epoch": 1.3492083597213427, "grad_norm": 4.578857421875, "learning_rate": 8.735579171198055e-06, "loss": 0.9795, "step": 5326 }, { "epoch": 1.349461684610513, "grad_norm": 3.433751106262207, "learning_rate": 8.735022206722359e-06, "loss": 0.7931, "step": 5327 }, { "epoch": 1.3497150094996835, "grad_norm": 3.4457149505615234, "learning_rate": 8.73446513736899e-06, "loss": 0.7807, "step": 5328 }, { "epoch": 1.3499683343888538, "grad_norm": 4.20115852355957, "learning_rate": 8.733907963153592e-06, "loss": 0.8967, "step": 5329 }, { "epoch": 1.350221659278024, "grad_norm": 3.8374433517456055, "learning_rate": 8.733350684091806e-06, "loss": 0.7224, "step": 5330 }, { "epoch": 1.3504749841671944, "grad_norm": 3.5178754329681396, "learning_rate": 8.732793300199284e-06, "loss": 0.7479, "step": 5331 }, { "epoch": 1.3507283090563647, "grad_norm": 3.638697624206543, "learning_rate": 8.732235811491675e-06, "loss": 0.8221, "step": 5332 }, { "epoch": 1.3509816339455352, "grad_norm": 4.061330795288086, "learning_rate": 8.731678217984633e-06, "loss": 0.7904, "step": 5333 }, { "epoch": 1.3512349588347055, "grad_norm": 3.571578025817871, "learning_rate": 8.731120519693817e-06, "loss": 0.6903, "step": 5334 }, { "epoch": 1.3514882837238757, "grad_norm": 3.828092575073242, "learning_rate": 8.730562716634885e-06, "loss": 0.8504, "step": 5335 }, { "epoch": 1.3517416086130463, "grad_norm": 3.6241204738616943, "learning_rate": 8.7300048088235e-06, "loss": 0.7688, "step": 5336 }, { "epoch": 1.3519949335022166, "grad_norm": 3.739471435546875, "learning_rate": 8.729446796275329e-06, "loss": 0.8968, "step": 5337 }, { "epoch": 1.3522482583913868, "grad_norm": 3.1405632495880127, "learning_rate": 8.728888679006038e-06, "loss": 0.705, "step": 5338 }, { "epoch": 1.3525015832805574, "grad_norm": 4.076598167419434, "learning_rate": 8.728330457031302e-06, "loss": 0.8274, "step": 5339 }, { "epoch": 1.3527549081697277, "grad_norm": 3.6819043159484863, "learning_rate": 8.727772130366793e-06, "loss": 0.6646, "step": 5340 }, { "epoch": 1.353008233058898, "grad_norm": 4.0459442138671875, "learning_rate": 8.727213699028193e-06, "loss": 0.8875, "step": 5341 }, { "epoch": 1.3532615579480685, "grad_norm": 3.763627052307129, "learning_rate": 8.726655163031175e-06, "loss": 0.771, "step": 5342 }, { "epoch": 1.3535148828372388, "grad_norm": 3.540144920349121, "learning_rate": 8.72609652239143e-06, "loss": 0.7245, "step": 5343 }, { "epoch": 1.353768207726409, "grad_norm": 3.6217358112335205, "learning_rate": 8.725537777124639e-06, "loss": 0.7704, "step": 5344 }, { "epoch": 1.3540215326155796, "grad_norm": 3.867257595062256, "learning_rate": 8.724978927246493e-06, "loss": 0.7022, "step": 5345 }, { "epoch": 1.3542748575047499, "grad_norm": 3.4698808193206787, "learning_rate": 8.724419972772685e-06, "loss": 0.8088, "step": 5346 }, { "epoch": 1.3545281823939201, "grad_norm": 3.513871669769287, "learning_rate": 8.72386091371891e-06, "loss": 0.6515, "step": 5347 }, { "epoch": 1.3547815072830907, "grad_norm": 3.530122756958008, "learning_rate": 8.723301750100866e-06, "loss": 0.7009, "step": 5348 }, { "epoch": 1.355034832172261, "grad_norm": 3.7477927207946777, "learning_rate": 8.722742481934253e-06, "loss": 0.7698, "step": 5349 }, { "epoch": 1.3552881570614312, "grad_norm": 3.7715811729431152, "learning_rate": 8.722183109234776e-06, "loss": 0.7976, "step": 5350 }, { "epoch": 1.3555414819506018, "grad_norm": 3.772736072540283, "learning_rate": 8.721623632018144e-06, "loss": 0.8296, "step": 5351 }, { "epoch": 1.355794806839772, "grad_norm": 4.373363971710205, "learning_rate": 8.721064050300062e-06, "loss": 0.8761, "step": 5352 }, { "epoch": 1.3560481317289423, "grad_norm": 4.46722936630249, "learning_rate": 8.720504364096247e-06, "loss": 0.8954, "step": 5353 }, { "epoch": 1.3563014566181129, "grad_norm": 3.9622766971588135, "learning_rate": 8.719944573422413e-06, "loss": 0.8905, "step": 5354 }, { "epoch": 1.3565547815072831, "grad_norm": 4.295718669891357, "learning_rate": 8.719384678294278e-06, "loss": 0.8181, "step": 5355 }, { "epoch": 1.3568081063964534, "grad_norm": 3.521519184112549, "learning_rate": 8.718824678727564e-06, "loss": 0.6659, "step": 5356 }, { "epoch": 1.3570614312856237, "grad_norm": 3.8578474521636963, "learning_rate": 8.718264574737998e-06, "loss": 0.7713, "step": 5357 }, { "epoch": 1.3573147561747942, "grad_norm": 3.9559175968170166, "learning_rate": 8.717704366341305e-06, "loss": 0.9289, "step": 5358 }, { "epoch": 1.3575680810639645, "grad_norm": 3.6543405055999756, "learning_rate": 8.717144053553213e-06, "loss": 0.7082, "step": 5359 }, { "epoch": 1.3578214059531348, "grad_norm": 3.4608185291290283, "learning_rate": 8.71658363638946e-06, "loss": 0.8287, "step": 5360 }, { "epoch": 1.3580747308423051, "grad_norm": 3.783193349838257, "learning_rate": 8.716023114865781e-06, "loss": 0.8052, "step": 5361 }, { "epoch": 1.3583280557314756, "grad_norm": 4.195127964019775, "learning_rate": 8.715462488997915e-06, "loss": 0.8677, "step": 5362 }, { "epoch": 1.358581380620646, "grad_norm": 3.9031248092651367, "learning_rate": 8.714901758801602e-06, "loss": 0.887, "step": 5363 }, { "epoch": 1.3588347055098162, "grad_norm": 3.688333034515381, "learning_rate": 8.71434092429259e-06, "loss": 0.8154, "step": 5364 }, { "epoch": 1.3590880303989867, "grad_norm": 4.0218071937561035, "learning_rate": 8.713779985486624e-06, "loss": 0.7567, "step": 5365 }, { "epoch": 1.359341355288157, "grad_norm": 3.3400766849517822, "learning_rate": 8.71321894239946e-06, "loss": 0.7612, "step": 5366 }, { "epoch": 1.3595946801773273, "grad_norm": 3.4791815280914307, "learning_rate": 8.712657795046843e-06, "loss": 0.742, "step": 5367 }, { "epoch": 1.3598480050664978, "grad_norm": 3.78662109375, "learning_rate": 8.71209654344454e-06, "loss": 0.8712, "step": 5368 }, { "epoch": 1.3601013299556681, "grad_norm": 3.7294909954071045, "learning_rate": 8.711535187608303e-06, "loss": 0.8447, "step": 5369 }, { "epoch": 1.3603546548448384, "grad_norm": 3.913294792175293, "learning_rate": 8.710973727553898e-06, "loss": 0.816, "step": 5370 }, { "epoch": 1.360607979734009, "grad_norm": 2.970475435256958, "learning_rate": 8.710412163297092e-06, "loss": 0.6601, "step": 5371 }, { "epoch": 1.3608613046231792, "grad_norm": 3.540560245513916, "learning_rate": 8.709850494853649e-06, "loss": 0.8807, "step": 5372 }, { "epoch": 1.3611146295123495, "grad_norm": 3.5531113147735596, "learning_rate": 8.709288722239345e-06, "loss": 0.6864, "step": 5373 }, { "epoch": 1.36136795440152, "grad_norm": 3.8616247177124023, "learning_rate": 8.70872684546995e-06, "loss": 0.7388, "step": 5374 }, { "epoch": 1.3616212792906903, "grad_norm": 3.792313575744629, "learning_rate": 8.708164864561244e-06, "loss": 0.7444, "step": 5375 }, { "epoch": 1.3618746041798606, "grad_norm": 4.210550308227539, "learning_rate": 8.707602779529007e-06, "loss": 0.9281, "step": 5376 }, { "epoch": 1.3621279290690311, "grad_norm": 3.5212748050689697, "learning_rate": 8.707040590389023e-06, "loss": 0.7365, "step": 5377 }, { "epoch": 1.3623812539582014, "grad_norm": 3.9914774894714355, "learning_rate": 8.706478297157075e-06, "loss": 0.8508, "step": 5378 }, { "epoch": 1.3626345788473717, "grad_norm": 3.8686389923095703, "learning_rate": 8.705915899848952e-06, "loss": 0.7543, "step": 5379 }, { "epoch": 1.3628879037365422, "grad_norm": 3.6494946479797363, "learning_rate": 8.70535339848045e-06, "loss": 0.7657, "step": 5380 }, { "epoch": 1.3631412286257125, "grad_norm": 4.332513332366943, "learning_rate": 8.70479079306736e-06, "loss": 0.8377, "step": 5381 }, { "epoch": 1.3633945535148828, "grad_norm": 3.740971326828003, "learning_rate": 8.70422808362548e-06, "loss": 0.7013, "step": 5382 }, { "epoch": 1.3636478784040533, "grad_norm": 3.3125898838043213, "learning_rate": 8.703665270170613e-06, "loss": 0.7548, "step": 5383 }, { "epoch": 1.3639012032932236, "grad_norm": 3.634493589401245, "learning_rate": 8.70310235271856e-06, "loss": 0.7967, "step": 5384 }, { "epoch": 1.364154528182394, "grad_norm": 3.581219434738159, "learning_rate": 8.70253933128513e-06, "loss": 0.8396, "step": 5385 }, { "epoch": 1.3644078530715642, "grad_norm": 3.6488876342773438, "learning_rate": 8.701976205886128e-06, "loss": 0.7908, "step": 5386 }, { "epoch": 1.3646611779607347, "grad_norm": 3.6243093013763428, "learning_rate": 8.701412976537374e-06, "loss": 0.7404, "step": 5387 }, { "epoch": 1.364914502849905, "grad_norm": 3.7664456367492676, "learning_rate": 8.700849643254674e-06, "loss": 0.7708, "step": 5388 }, { "epoch": 1.3651678277390753, "grad_norm": 3.6289851665496826, "learning_rate": 8.700286206053851e-06, "loss": 0.8135, "step": 5389 }, { "epoch": 1.3654211526282456, "grad_norm": 4.124152183532715, "learning_rate": 8.699722664950728e-06, "loss": 0.8847, "step": 5390 }, { "epoch": 1.365674477517416, "grad_norm": 4.026323318481445, "learning_rate": 8.699159019961125e-06, "loss": 0.962, "step": 5391 }, { "epoch": 1.3659278024065864, "grad_norm": 3.9823355674743652, "learning_rate": 8.698595271100872e-06, "loss": 0.752, "step": 5392 }, { "epoch": 1.3661811272957567, "grad_norm": 3.1845438480377197, "learning_rate": 8.698031418385795e-06, "loss": 0.733, "step": 5393 }, { "epoch": 1.3664344521849272, "grad_norm": 3.4990522861480713, "learning_rate": 8.69746746183173e-06, "loss": 0.6656, "step": 5394 }, { "epoch": 1.3666877770740975, "grad_norm": 3.5090506076812744, "learning_rate": 8.69690340145451e-06, "loss": 0.753, "step": 5395 }, { "epoch": 1.3669411019632678, "grad_norm": 3.798260450363159, "learning_rate": 8.696339237269976e-06, "loss": 0.868, "step": 5396 }, { "epoch": 1.3671944268524383, "grad_norm": 3.562178134918213, "learning_rate": 8.69577496929397e-06, "loss": 0.9206, "step": 5397 }, { "epoch": 1.3674477517416086, "grad_norm": 3.871551752090454, "learning_rate": 8.695210597542335e-06, "loss": 0.831, "step": 5398 }, { "epoch": 1.367701076630779, "grad_norm": 3.3575351238250732, "learning_rate": 8.694646122030918e-06, "loss": 0.7636, "step": 5399 }, { "epoch": 1.3679544015199494, "grad_norm": 4.550872802734375, "learning_rate": 8.694081542775568e-06, "loss": 0.7773, "step": 5400 }, { "epoch": 1.3682077264091197, "grad_norm": 3.6302905082702637, "learning_rate": 8.693516859792141e-06, "loss": 0.763, "step": 5401 }, { "epoch": 1.36846105129829, "grad_norm": 3.4151811599731445, "learning_rate": 8.69295207309649e-06, "loss": 0.7456, "step": 5402 }, { "epoch": 1.3687143761874605, "grad_norm": 3.618662118911743, "learning_rate": 8.692387182704478e-06, "loss": 0.7602, "step": 5403 }, { "epoch": 1.3689677010766308, "grad_norm": 3.916361093521118, "learning_rate": 8.691822188631963e-06, "loss": 0.7612, "step": 5404 }, { "epoch": 1.369221025965801, "grad_norm": 3.935793161392212, "learning_rate": 8.69125709089481e-06, "loss": 0.7893, "step": 5405 }, { "epoch": 1.3694743508549716, "grad_norm": 4.025719165802002, "learning_rate": 8.690691889508892e-06, "loss": 0.7205, "step": 5406 }, { "epoch": 1.369727675744142, "grad_norm": 3.435882806777954, "learning_rate": 8.690126584490072e-06, "loss": 0.7244, "step": 5407 }, { "epoch": 1.3699810006333122, "grad_norm": 3.6226847171783447, "learning_rate": 8.689561175854227e-06, "loss": 0.7393, "step": 5408 }, { "epoch": 1.3702343255224827, "grad_norm": 3.647475004196167, "learning_rate": 8.688995663617235e-06, "loss": 0.8259, "step": 5409 }, { "epoch": 1.370487650411653, "grad_norm": 3.8193819522857666, "learning_rate": 8.688430047794974e-06, "loss": 0.8489, "step": 5410 }, { "epoch": 1.3707409753008233, "grad_norm": 3.629404306411743, "learning_rate": 8.687864328403324e-06, "loss": 0.7589, "step": 5411 }, { "epoch": 1.3709943001899938, "grad_norm": 3.832561492919922, "learning_rate": 8.687298505458173e-06, "loss": 0.82, "step": 5412 }, { "epoch": 1.371247625079164, "grad_norm": 3.3676552772521973, "learning_rate": 8.686732578975407e-06, "loss": 0.6939, "step": 5413 }, { "epoch": 1.3715009499683344, "grad_norm": 3.587935447692871, "learning_rate": 8.68616654897092e-06, "loss": 0.6813, "step": 5414 }, { "epoch": 1.3717542748575047, "grad_norm": 3.995159864425659, "learning_rate": 8.685600415460603e-06, "loss": 0.774, "step": 5415 }, { "epoch": 1.3720075997466752, "grad_norm": 3.79481840133667, "learning_rate": 8.685034178460354e-06, "loss": 0.7407, "step": 5416 }, { "epoch": 1.3722609246358455, "grad_norm": 4.003698348999023, "learning_rate": 8.684467837986072e-06, "loss": 0.8186, "step": 5417 }, { "epoch": 1.3725142495250158, "grad_norm": 4.8956708908081055, "learning_rate": 8.68390139405366e-06, "loss": 0.7143, "step": 5418 }, { "epoch": 1.372767574414186, "grad_norm": 3.907975196838379, "learning_rate": 8.683334846679023e-06, "loss": 0.7404, "step": 5419 }, { "epoch": 1.3730208993033566, "grad_norm": 3.841737747192383, "learning_rate": 8.68276819587807e-06, "loss": 0.8271, "step": 5420 }, { "epoch": 1.3732742241925269, "grad_norm": 3.39402437210083, "learning_rate": 8.682201441666713e-06, "loss": 0.7675, "step": 5421 }, { "epoch": 1.3735275490816972, "grad_norm": 3.759916305541992, "learning_rate": 8.681634584060865e-06, "loss": 0.7823, "step": 5422 }, { "epoch": 1.3737808739708677, "grad_norm": 3.8228251934051514, "learning_rate": 8.681067623076442e-06, "loss": 0.8867, "step": 5423 }, { "epoch": 1.374034198860038, "grad_norm": 3.8812716007232666, "learning_rate": 8.680500558729367e-06, "loss": 0.7029, "step": 5424 }, { "epoch": 1.3742875237492083, "grad_norm": 3.287914991378784, "learning_rate": 8.67993339103556e-06, "loss": 0.7383, "step": 5425 }, { "epoch": 1.3745408486383788, "grad_norm": 3.307253837585449, "learning_rate": 8.67936612001095e-06, "loss": 0.7984, "step": 5426 }, { "epoch": 1.374794173527549, "grad_norm": 3.8354382514953613, "learning_rate": 8.678798745671462e-06, "loss": 0.9244, "step": 5427 }, { "epoch": 1.3750474984167194, "grad_norm": 3.7706069946289062, "learning_rate": 8.67823126803303e-06, "loss": 0.7722, "step": 5428 }, { "epoch": 1.3753008233058899, "grad_norm": 4.056471347808838, "learning_rate": 8.677663687111589e-06, "loss": 0.6343, "step": 5429 }, { "epoch": 1.3755541481950602, "grad_norm": 3.8904762268066406, "learning_rate": 8.677096002923073e-06, "loss": 0.8716, "step": 5430 }, { "epoch": 1.3758074730842305, "grad_norm": 3.622706174850464, "learning_rate": 8.676528215483426e-06, "loss": 0.7528, "step": 5431 }, { "epoch": 1.376060797973401, "grad_norm": 3.5586719512939453, "learning_rate": 8.67596032480859e-06, "loss": 0.6346, "step": 5432 }, { "epoch": 1.3763141228625713, "grad_norm": 3.39465069770813, "learning_rate": 8.67539233091451e-06, "loss": 0.8285, "step": 5433 }, { "epoch": 1.3765674477517416, "grad_norm": 3.934678554534912, "learning_rate": 8.674824233817136e-06, "loss": 0.7948, "step": 5434 }, { "epoch": 1.376820772640912, "grad_norm": 3.390430450439453, "learning_rate": 8.67425603353242e-06, "loss": 0.8003, "step": 5435 }, { "epoch": 1.3770740975300824, "grad_norm": 3.2829158306121826, "learning_rate": 8.673687730076317e-06, "loss": 0.6213, "step": 5436 }, { "epoch": 1.3773274224192527, "grad_norm": 3.317941427230835, "learning_rate": 8.673119323464785e-06, "loss": 0.7675, "step": 5437 }, { "epoch": 1.3775807473084232, "grad_norm": 3.8105340003967285, "learning_rate": 8.672550813713782e-06, "loss": 0.7542, "step": 5438 }, { "epoch": 1.3778340721975935, "grad_norm": 3.4200289249420166, "learning_rate": 8.671982200839276e-06, "loss": 0.7855, "step": 5439 }, { "epoch": 1.3780873970867638, "grad_norm": 4.118893146514893, "learning_rate": 8.67141348485723e-06, "loss": 0.8599, "step": 5440 }, { "epoch": 1.3783407219759343, "grad_norm": 3.455399990081787, "learning_rate": 8.670844665783613e-06, "loss": 0.8014, "step": 5441 }, { "epoch": 1.3785940468651046, "grad_norm": 3.6838300228118896, "learning_rate": 8.670275743634398e-06, "loss": 0.8718, "step": 5442 }, { "epoch": 1.3788473717542749, "grad_norm": 3.4987316131591797, "learning_rate": 8.66970671842556e-06, "loss": 0.7655, "step": 5443 }, { "epoch": 1.3791006966434451, "grad_norm": 3.2173705101013184, "learning_rate": 8.669137590173078e-06, "loss": 0.7933, "step": 5444 }, { "epoch": 1.3793540215326157, "grad_norm": 3.6370177268981934, "learning_rate": 8.668568358892933e-06, "loss": 0.8318, "step": 5445 }, { "epoch": 1.379607346421786, "grad_norm": 3.6624679565429688, "learning_rate": 8.667999024601106e-06, "loss": 0.8758, "step": 5446 }, { "epoch": 1.3798606713109562, "grad_norm": 3.8879568576812744, "learning_rate": 8.667429587313588e-06, "loss": 0.8166, "step": 5447 }, { "epoch": 1.3801139962001265, "grad_norm": 3.623021125793457, "learning_rate": 8.666860047046364e-06, "loss": 0.8983, "step": 5448 }, { "epoch": 1.380367321089297, "grad_norm": 3.325852632522583, "learning_rate": 8.666290403815429e-06, "loss": 0.7732, "step": 5449 }, { "epoch": 1.3806206459784673, "grad_norm": 3.675074815750122, "learning_rate": 8.665720657636779e-06, "loss": 0.9026, "step": 5450 }, { "epoch": 1.3808739708676376, "grad_norm": 3.3649652004241943, "learning_rate": 8.665150808526407e-06, "loss": 0.7851, "step": 5451 }, { "epoch": 1.3811272957568081, "grad_norm": 4.415377140045166, "learning_rate": 8.664580856500321e-06, "loss": 1.0493, "step": 5452 }, { "epoch": 1.3813806206459784, "grad_norm": 3.6024553775787354, "learning_rate": 8.664010801574523e-06, "loss": 0.8162, "step": 5453 }, { "epoch": 1.3816339455351487, "grad_norm": 3.582939386367798, "learning_rate": 8.663440643765018e-06, "loss": 0.6934, "step": 5454 }, { "epoch": 1.3818872704243192, "grad_norm": 3.8280136585235596, "learning_rate": 8.662870383087816e-06, "loss": 0.9264, "step": 5455 }, { "epoch": 1.3821405953134895, "grad_norm": 3.5490760803222656, "learning_rate": 8.662300019558931e-06, "loss": 0.6925, "step": 5456 }, { "epoch": 1.3823939202026598, "grad_norm": 3.7981576919555664, "learning_rate": 8.661729553194378e-06, "loss": 0.7711, "step": 5457 }, { "epoch": 1.3826472450918303, "grad_norm": 3.8355305194854736, "learning_rate": 8.661158984010177e-06, "loss": 0.9728, "step": 5458 }, { "epoch": 1.3829005699810006, "grad_norm": 4.154177665710449, "learning_rate": 8.660588312022345e-06, "loss": 0.798, "step": 5459 }, { "epoch": 1.383153894870171, "grad_norm": 3.3235461711883545, "learning_rate": 8.660017537246908e-06, "loss": 0.6501, "step": 5460 }, { "epoch": 1.3834072197593414, "grad_norm": 4.393592357635498, "learning_rate": 8.659446659699896e-06, "loss": 0.8839, "step": 5461 }, { "epoch": 1.3836605446485117, "grad_norm": 4.016276836395264, "learning_rate": 8.658875679397335e-06, "loss": 0.7344, "step": 5462 }, { "epoch": 1.383913869537682, "grad_norm": 3.751662492752075, "learning_rate": 8.658304596355263e-06, "loss": 0.7765, "step": 5463 }, { "epoch": 1.3841671944268525, "grad_norm": 3.7409374713897705, "learning_rate": 8.657733410589711e-06, "loss": 0.7355, "step": 5464 }, { "epoch": 1.3844205193160228, "grad_norm": 3.8616786003112793, "learning_rate": 8.657162122116718e-06, "loss": 0.7349, "step": 5465 }, { "epoch": 1.3846738442051931, "grad_norm": 4.051181793212891, "learning_rate": 8.656590730952327e-06, "loss": 0.7971, "step": 5466 }, { "epoch": 1.3849271690943636, "grad_norm": 3.5221352577209473, "learning_rate": 8.656019237112583e-06, "loss": 0.6992, "step": 5467 }, { "epoch": 1.385180493983534, "grad_norm": 3.9331021308898926, "learning_rate": 8.655447640613532e-06, "loss": 0.928, "step": 5468 }, { "epoch": 1.3854338188727042, "grad_norm": 3.5726869106292725, "learning_rate": 8.654875941471223e-06, "loss": 0.6088, "step": 5469 }, { "epoch": 1.3856871437618747, "grad_norm": 3.962658405303955, "learning_rate": 8.654304139701712e-06, "loss": 0.811, "step": 5470 }, { "epoch": 1.385940468651045, "grad_norm": 3.7248291969299316, "learning_rate": 8.653732235321054e-06, "loss": 0.719, "step": 5471 }, { "epoch": 1.3861937935402153, "grad_norm": 4.063478469848633, "learning_rate": 8.653160228345306e-06, "loss": 0.8795, "step": 5472 }, { "epoch": 1.3864471184293856, "grad_norm": 3.4277076721191406, "learning_rate": 8.652588118790531e-06, "loss": 0.7127, "step": 5473 }, { "epoch": 1.3867004433185561, "grad_norm": 3.921884298324585, "learning_rate": 8.652015906672795e-06, "loss": 0.8055, "step": 5474 }, { "epoch": 1.3869537682077264, "grad_norm": 3.5158169269561768, "learning_rate": 8.651443592008162e-06, "loss": 0.7718, "step": 5475 }, { "epoch": 1.3872070930968967, "grad_norm": 3.964005470275879, "learning_rate": 8.650871174812706e-06, "loss": 0.8302, "step": 5476 }, { "epoch": 1.387460417986067, "grad_norm": 3.9888625144958496, "learning_rate": 8.650298655102498e-06, "loss": 0.8448, "step": 5477 }, { "epoch": 1.3877137428752375, "grad_norm": 3.528465747833252, "learning_rate": 8.649726032893614e-06, "loss": 0.7621, "step": 5478 }, { "epoch": 1.3879670677644078, "grad_norm": 3.5892975330352783, "learning_rate": 8.649153308202133e-06, "loss": 0.7764, "step": 5479 }, { "epoch": 1.388220392653578, "grad_norm": 3.736577033996582, "learning_rate": 8.648580481044138e-06, "loss": 0.69, "step": 5480 }, { "epoch": 1.3884737175427486, "grad_norm": 4.016088008880615, "learning_rate": 8.648007551435713e-06, "loss": 0.9458, "step": 5481 }, { "epoch": 1.388727042431919, "grad_norm": 4.121703624725342, "learning_rate": 8.647434519392947e-06, "loss": 0.8649, "step": 5482 }, { "epoch": 1.3889803673210892, "grad_norm": 3.927715539932251, "learning_rate": 8.646861384931928e-06, "loss": 0.7293, "step": 5483 }, { "epoch": 1.3892336922102597, "grad_norm": 3.837312936782837, "learning_rate": 8.646288148068751e-06, "loss": 0.8717, "step": 5484 }, { "epoch": 1.38948701709943, "grad_norm": 4.266937255859375, "learning_rate": 8.64571480881951e-06, "loss": 1.0201, "step": 5485 }, { "epoch": 1.3897403419886003, "grad_norm": 3.5972275733947754, "learning_rate": 8.645141367200307e-06, "loss": 0.8153, "step": 5486 }, { "epoch": 1.3899936668777708, "grad_norm": 4.050802230834961, "learning_rate": 8.644567823227244e-06, "loss": 0.7836, "step": 5487 }, { "epoch": 1.390246991766941, "grad_norm": 3.532475233078003, "learning_rate": 8.643994176916423e-06, "loss": 0.8349, "step": 5488 }, { "epoch": 1.3905003166561114, "grad_norm": 3.3756234645843506, "learning_rate": 8.643420428283955e-06, "loss": 0.7766, "step": 5489 }, { "epoch": 1.390753641545282, "grad_norm": 3.807551860809326, "learning_rate": 8.642846577345948e-06, "loss": 0.8755, "step": 5490 }, { "epoch": 1.3910069664344522, "grad_norm": 3.5619986057281494, "learning_rate": 8.642272624118516e-06, "loss": 0.8169, "step": 5491 }, { "epoch": 1.3912602913236225, "grad_norm": 3.6280767917633057, "learning_rate": 8.641698568617776e-06, "loss": 0.9237, "step": 5492 }, { "epoch": 1.391513616212793, "grad_norm": 3.6135356426239014, "learning_rate": 8.641124410859846e-06, "loss": 0.7958, "step": 5493 }, { "epoch": 1.3917669411019633, "grad_norm": 3.8960044384002686, "learning_rate": 8.640550150860852e-06, "loss": 0.8649, "step": 5494 }, { "epoch": 1.3920202659911336, "grad_norm": 3.3087053298950195, "learning_rate": 8.639975788636914e-06, "loss": 0.7286, "step": 5495 }, { "epoch": 1.3922735908803041, "grad_norm": 3.6514222621917725, "learning_rate": 8.639401324204161e-06, "loss": 0.8522, "step": 5496 }, { "epoch": 1.3925269157694744, "grad_norm": 3.496098279953003, "learning_rate": 8.638826757578727e-06, "loss": 0.701, "step": 5497 }, { "epoch": 1.3927802406586447, "grad_norm": 3.282693386077881, "learning_rate": 8.638252088776743e-06, "loss": 0.6856, "step": 5498 }, { "epoch": 1.3930335655478152, "grad_norm": 3.991990804672241, "learning_rate": 8.637677317814342e-06, "loss": 0.8066, "step": 5499 }, { "epoch": 1.3932868904369855, "grad_norm": 4.0754218101501465, "learning_rate": 8.63710244470767e-06, "loss": 0.9027, "step": 5500 }, { "epoch": 1.3932868904369855, "eval_loss": 1.1785508394241333, "eval_runtime": 13.9529, "eval_samples_per_second": 28.668, "eval_steps_per_second": 3.583, "step": 5500 }, { "epoch": 1.3935402153261558, "grad_norm": 3.994100332260132, "learning_rate": 8.636527469472864e-06, "loss": 0.8649, "step": 5501 }, { "epoch": 1.393793540215326, "grad_norm": 3.872138738632202, "learning_rate": 8.635952392126072e-06, "loss": 0.8447, "step": 5502 }, { "epoch": 1.3940468651044964, "grad_norm": 4.0767059326171875, "learning_rate": 8.635377212683442e-06, "loss": 0.737, "step": 5503 }, { "epoch": 1.394300189993667, "grad_norm": 3.502206325531006, "learning_rate": 8.634801931161122e-06, "loss": 0.6714, "step": 5504 }, { "epoch": 1.3945535148828372, "grad_norm": 3.764141082763672, "learning_rate": 8.634226547575268e-06, "loss": 0.7611, "step": 5505 }, { "epoch": 1.3948068397720075, "grad_norm": 3.837657928466797, "learning_rate": 8.633651061942035e-06, "loss": 0.7611, "step": 5506 }, { "epoch": 1.395060164661178, "grad_norm": 3.842958450317383, "learning_rate": 8.633075474277585e-06, "loss": 0.7583, "step": 5507 }, { "epoch": 1.3953134895503483, "grad_norm": 3.8636865615844727, "learning_rate": 8.632499784598076e-06, "loss": 0.8368, "step": 5508 }, { "epoch": 1.3955668144395186, "grad_norm": 3.5453438758850098, "learning_rate": 8.631923992919677e-06, "loss": 0.7305, "step": 5509 }, { "epoch": 1.395820139328689, "grad_norm": 3.8817951679229736, "learning_rate": 8.631348099258556e-06, "loss": 0.7234, "step": 5510 }, { "epoch": 1.3960734642178594, "grad_norm": 3.8134021759033203, "learning_rate": 8.63077210363088e-06, "loss": 0.8138, "step": 5511 }, { "epoch": 1.3963267891070297, "grad_norm": 3.794355630874634, "learning_rate": 8.630196006052826e-06, "loss": 0.7811, "step": 5512 }, { "epoch": 1.3965801139962002, "grad_norm": 3.3116185665130615, "learning_rate": 8.629619806540568e-06, "loss": 0.7572, "step": 5513 }, { "epoch": 1.3968334388853705, "grad_norm": 3.5003790855407715, "learning_rate": 8.629043505110288e-06, "loss": 0.761, "step": 5514 }, { "epoch": 1.3970867637745408, "grad_norm": 3.635342597961426, "learning_rate": 8.628467101778168e-06, "loss": 0.6976, "step": 5515 }, { "epoch": 1.3973400886637113, "grad_norm": 3.8918471336364746, "learning_rate": 8.627890596560392e-06, "loss": 0.8624, "step": 5516 }, { "epoch": 1.3975934135528816, "grad_norm": 4.08282470703125, "learning_rate": 8.627313989473147e-06, "loss": 0.8639, "step": 5517 }, { "epoch": 1.3978467384420519, "grad_norm": 3.855114221572876, "learning_rate": 8.626737280532626e-06, "loss": 0.8408, "step": 5518 }, { "epoch": 1.3981000633312224, "grad_norm": 3.85269832611084, "learning_rate": 8.626160469755024e-06, "loss": 0.9654, "step": 5519 }, { "epoch": 1.3983533882203927, "grad_norm": 4.240458965301514, "learning_rate": 8.625583557156534e-06, "loss": 0.888, "step": 5520 }, { "epoch": 1.398606713109563, "grad_norm": 3.8642497062683105, "learning_rate": 8.625006542753355e-06, "loss": 0.7675, "step": 5521 }, { "epoch": 1.3988600379987335, "grad_norm": 3.7889339923858643, "learning_rate": 8.624429426561694e-06, "loss": 0.7812, "step": 5522 }, { "epoch": 1.3991133628879038, "grad_norm": 3.741114854812622, "learning_rate": 8.623852208597751e-06, "loss": 0.7559, "step": 5523 }, { "epoch": 1.399366687777074, "grad_norm": 3.518021821975708, "learning_rate": 8.62327488887774e-06, "loss": 0.8203, "step": 5524 }, { "epoch": 1.3996200126662446, "grad_norm": 3.691943883895874, "learning_rate": 8.622697467417864e-06, "loss": 0.7384, "step": 5525 }, { "epoch": 1.3998733375554149, "grad_norm": 3.277179718017578, "learning_rate": 8.622119944234344e-06, "loss": 0.7238, "step": 5526 }, { "epoch": 1.4001266624445852, "grad_norm": 3.4786086082458496, "learning_rate": 8.621542319343392e-06, "loss": 0.8367, "step": 5527 }, { "epoch": 1.4003799873337555, "grad_norm": 3.466938018798828, "learning_rate": 8.62096459276123e-06, "loss": 0.7996, "step": 5528 }, { "epoch": 1.400633312222926, "grad_norm": 4.1230549812316895, "learning_rate": 8.620386764504079e-06, "loss": 0.9145, "step": 5529 }, { "epoch": 1.4008866371120963, "grad_norm": 3.90274977684021, "learning_rate": 8.619808834588163e-06, "loss": 0.7999, "step": 5530 }, { "epoch": 1.4011399620012666, "grad_norm": 3.6755170822143555, "learning_rate": 8.619230803029715e-06, "loss": 0.747, "step": 5531 }, { "epoch": 1.4013932868904369, "grad_norm": 3.832758665084839, "learning_rate": 8.618652669844959e-06, "loss": 0.7941, "step": 5532 }, { "epoch": 1.4016466117796074, "grad_norm": 3.6194913387298584, "learning_rate": 8.618074435050134e-06, "loss": 0.8249, "step": 5533 }, { "epoch": 1.4018999366687777, "grad_norm": 3.7348620891571045, "learning_rate": 8.617496098661474e-06, "loss": 0.8321, "step": 5534 }, { "epoch": 1.402153261557948, "grad_norm": 4.187718391418457, "learning_rate": 8.616917660695218e-06, "loss": 0.9079, "step": 5535 }, { "epoch": 1.4024065864471185, "grad_norm": 3.6806905269622803, "learning_rate": 8.61633912116761e-06, "loss": 0.6829, "step": 5536 }, { "epoch": 1.4026599113362888, "grad_norm": 3.9087767601013184, "learning_rate": 8.615760480094893e-06, "loss": 0.7842, "step": 5537 }, { "epoch": 1.402913236225459, "grad_norm": 3.9356274604797363, "learning_rate": 8.615181737493318e-06, "loss": 0.9304, "step": 5538 }, { "epoch": 1.4031665611146296, "grad_norm": 3.6604814529418945, "learning_rate": 8.614602893379134e-06, "loss": 0.7697, "step": 5539 }, { "epoch": 1.4034198860037999, "grad_norm": 3.9226155281066895, "learning_rate": 8.614023947768596e-06, "loss": 0.766, "step": 5540 }, { "epoch": 1.4036732108929701, "grad_norm": 4.183846950531006, "learning_rate": 8.613444900677956e-06, "loss": 0.8535, "step": 5541 }, { "epoch": 1.4039265357821407, "grad_norm": 3.5136399269104004, "learning_rate": 8.61286575212348e-06, "loss": 0.7414, "step": 5542 }, { "epoch": 1.404179860671311, "grad_norm": 3.9637434482574463, "learning_rate": 8.612286502121425e-06, "loss": 0.7569, "step": 5543 }, { "epoch": 1.4044331855604812, "grad_norm": 4.18505859375, "learning_rate": 8.61170715068806e-06, "loss": 0.886, "step": 5544 }, { "epoch": 1.4046865104496518, "grad_norm": 3.609485626220703, "learning_rate": 8.611127697839649e-06, "loss": 0.8163, "step": 5545 }, { "epoch": 1.404939835338822, "grad_norm": 4.104346752166748, "learning_rate": 8.610548143592465e-06, "loss": 0.7772, "step": 5546 }, { "epoch": 1.4051931602279923, "grad_norm": 3.5270981788635254, "learning_rate": 8.60996848796278e-06, "loss": 0.78, "step": 5547 }, { "epoch": 1.4054464851171629, "grad_norm": 3.4089438915252686, "learning_rate": 8.609388730966875e-06, "loss": 0.7123, "step": 5548 }, { "epoch": 1.4056998100063332, "grad_norm": 3.5410640239715576, "learning_rate": 8.608808872621025e-06, "loss": 0.7352, "step": 5549 }, { "epoch": 1.4059531348955034, "grad_norm": 3.7271721363067627, "learning_rate": 8.608228912941513e-06, "loss": 0.7688, "step": 5550 }, { "epoch": 1.406206459784674, "grad_norm": 4.498554706573486, "learning_rate": 8.607648851944624e-06, "loss": 0.798, "step": 5551 }, { "epoch": 1.4064597846738442, "grad_norm": 3.568249225616455, "learning_rate": 8.607068689646645e-06, "loss": 0.8086, "step": 5552 }, { "epoch": 1.4067131095630145, "grad_norm": 3.853929042816162, "learning_rate": 8.606488426063868e-06, "loss": 0.7863, "step": 5553 }, { "epoch": 1.406966434452185, "grad_norm": 4.166558742523193, "learning_rate": 8.605908061212588e-06, "loss": 0.8253, "step": 5554 }, { "epoch": 1.4072197593413553, "grad_norm": 3.6892173290252686, "learning_rate": 8.605327595109099e-06, "loss": 0.7393, "step": 5555 }, { "epoch": 1.4074730842305256, "grad_norm": 3.8383421897888184, "learning_rate": 8.6047470277697e-06, "loss": 0.7559, "step": 5556 }, { "epoch": 1.407726409119696, "grad_norm": 3.5189883708953857, "learning_rate": 8.604166359210695e-06, "loss": 0.8178, "step": 5557 }, { "epoch": 1.4079797340088664, "grad_norm": 4.024250030517578, "learning_rate": 8.603585589448387e-06, "loss": 0.8893, "step": 5558 }, { "epoch": 1.4082330588980367, "grad_norm": 4.513950824737549, "learning_rate": 8.603004718499084e-06, "loss": 0.8941, "step": 5559 }, { "epoch": 1.408486383787207, "grad_norm": 3.8571219444274902, "learning_rate": 8.602423746379098e-06, "loss": 0.7141, "step": 5560 }, { "epoch": 1.4087397086763773, "grad_norm": 3.1437816619873047, "learning_rate": 8.601842673104743e-06, "loss": 0.7115, "step": 5561 }, { "epoch": 1.4089930335655478, "grad_norm": 3.529634714126587, "learning_rate": 8.601261498692332e-06, "loss": 0.8655, "step": 5562 }, { "epoch": 1.4092463584547181, "grad_norm": 3.311220645904541, "learning_rate": 8.600680223158186e-06, "loss": 0.7382, "step": 5563 }, { "epoch": 1.4094996833438884, "grad_norm": 3.498736619949341, "learning_rate": 8.600098846518628e-06, "loss": 0.7842, "step": 5564 }, { "epoch": 1.409753008233059, "grad_norm": 4.130739688873291, "learning_rate": 8.599517368789981e-06, "loss": 0.83, "step": 5565 }, { "epoch": 1.4100063331222292, "grad_norm": 3.8548920154571533, "learning_rate": 8.598935789988572e-06, "loss": 0.9314, "step": 5566 }, { "epoch": 1.4102596580113995, "grad_norm": 3.650334358215332, "learning_rate": 8.598354110130734e-06, "loss": 0.7953, "step": 5567 }, { "epoch": 1.41051298290057, "grad_norm": 3.7701852321624756, "learning_rate": 8.597772329232799e-06, "loss": 0.8699, "step": 5568 }, { "epoch": 1.4107663077897403, "grad_norm": 4.091150760650635, "learning_rate": 8.597190447311104e-06, "loss": 0.8185, "step": 5569 }, { "epoch": 1.4110196326789106, "grad_norm": 3.8292407989501953, "learning_rate": 8.596608464381987e-06, "loss": 0.7758, "step": 5570 }, { "epoch": 1.4112729575680811, "grad_norm": 3.3151817321777344, "learning_rate": 8.596026380461789e-06, "loss": 0.7212, "step": 5571 }, { "epoch": 1.4115262824572514, "grad_norm": 3.7377889156341553, "learning_rate": 8.595444195566856e-06, "loss": 0.7684, "step": 5572 }, { "epoch": 1.4117796073464217, "grad_norm": 3.6139307022094727, "learning_rate": 8.594861909713534e-06, "loss": 0.7257, "step": 5573 }, { "epoch": 1.4120329322355922, "grad_norm": 3.971952438354492, "learning_rate": 8.594279522918176e-06, "loss": 0.8327, "step": 5574 }, { "epoch": 1.4122862571247625, "grad_norm": 4.215395927429199, "learning_rate": 8.593697035197133e-06, "loss": 0.8302, "step": 5575 }, { "epoch": 1.4125395820139328, "grad_norm": 3.487239122390747, "learning_rate": 8.593114446566762e-06, "loss": 0.6953, "step": 5576 }, { "epoch": 1.4127929069031033, "grad_norm": 3.808400869369507, "learning_rate": 8.59253175704342e-06, "loss": 0.7871, "step": 5577 }, { "epoch": 1.4130462317922736, "grad_norm": 3.6702208518981934, "learning_rate": 8.59194896664347e-06, "loss": 0.8086, "step": 5578 }, { "epoch": 1.413299556681444, "grad_norm": 3.7358663082122803, "learning_rate": 8.591366075383278e-06, "loss": 0.7254, "step": 5579 }, { "epoch": 1.4135528815706144, "grad_norm": 3.666933059692383, "learning_rate": 8.59078308327921e-06, "loss": 0.8223, "step": 5580 }, { "epoch": 1.4138062064597847, "grad_norm": 3.9592697620391846, "learning_rate": 8.590199990347634e-06, "loss": 0.8267, "step": 5581 }, { "epoch": 1.414059531348955, "grad_norm": 3.7985219955444336, "learning_rate": 8.589616796604927e-06, "loss": 0.8266, "step": 5582 }, { "epoch": 1.4143128562381255, "grad_norm": 3.6899030208587646, "learning_rate": 8.589033502067462e-06, "loss": 0.7936, "step": 5583 }, { "epoch": 1.4145661811272958, "grad_norm": 3.7015511989593506, "learning_rate": 8.588450106751617e-06, "loss": 0.7876, "step": 5584 }, { "epoch": 1.414819506016466, "grad_norm": 3.870482921600342, "learning_rate": 8.587866610673777e-06, "loss": 0.8474, "step": 5585 }, { "epoch": 1.4150728309056364, "grad_norm": 4.06734037399292, "learning_rate": 8.587283013850322e-06, "loss": 0.7302, "step": 5586 }, { "epoch": 1.415326155794807, "grad_norm": 3.912153959274292, "learning_rate": 8.586699316297645e-06, "loss": 0.8961, "step": 5587 }, { "epoch": 1.4155794806839772, "grad_norm": 3.841379165649414, "learning_rate": 8.586115518032128e-06, "loss": 0.8071, "step": 5588 }, { "epoch": 1.4158328055731475, "grad_norm": 3.9042487144470215, "learning_rate": 8.585531619070168e-06, "loss": 0.8927, "step": 5589 }, { "epoch": 1.4160861304623178, "grad_norm": 3.9281656742095947, "learning_rate": 8.584947619428164e-06, "loss": 0.6475, "step": 5590 }, { "epoch": 1.4163394553514883, "grad_norm": 3.717174768447876, "learning_rate": 8.584363519122508e-06, "loss": 0.7899, "step": 5591 }, { "epoch": 1.4165927802406586, "grad_norm": 3.817960500717163, "learning_rate": 8.583779318169605e-06, "loss": 0.7954, "step": 5592 }, { "epoch": 1.416846105129829, "grad_norm": 3.9455277919769287, "learning_rate": 8.58319501658586e-06, "loss": 0.7176, "step": 5593 }, { "epoch": 1.4170994300189994, "grad_norm": 3.388749361038208, "learning_rate": 8.582610614387676e-06, "loss": 0.8285, "step": 5594 }, { "epoch": 1.4173527549081697, "grad_norm": 3.77699613571167, "learning_rate": 8.582026111591468e-06, "loss": 0.7809, "step": 5595 }, { "epoch": 1.41760607979734, "grad_norm": 3.567117929458618, "learning_rate": 8.581441508213644e-06, "loss": 0.7515, "step": 5596 }, { "epoch": 1.4178594046865105, "grad_norm": 3.967083215713501, "learning_rate": 8.58085680427062e-06, "loss": 0.9218, "step": 5597 }, { "epoch": 1.4181127295756808, "grad_norm": 3.3822312355041504, "learning_rate": 8.580271999778816e-06, "loss": 0.7725, "step": 5598 }, { "epoch": 1.418366054464851, "grad_norm": 3.7598750591278076, "learning_rate": 8.579687094754651e-06, "loss": 0.8146, "step": 5599 }, { "epoch": 1.4186193793540216, "grad_norm": 3.576932668685913, "learning_rate": 8.579102089214553e-06, "loss": 0.7237, "step": 5600 }, { "epoch": 1.418872704243192, "grad_norm": 3.6534159183502197, "learning_rate": 8.578516983174943e-06, "loss": 0.6223, "step": 5601 }, { "epoch": 1.4191260291323622, "grad_norm": 3.956620216369629, "learning_rate": 8.577931776652256e-06, "loss": 0.8515, "step": 5602 }, { "epoch": 1.4193793540215327, "grad_norm": 3.514312505722046, "learning_rate": 8.57734646966292e-06, "loss": 0.7917, "step": 5603 }, { "epoch": 1.419632678910703, "grad_norm": 3.4261457920074463, "learning_rate": 8.576761062223371e-06, "loss": 0.7618, "step": 5604 }, { "epoch": 1.4198860037998733, "grad_norm": 3.548288106918335, "learning_rate": 8.576175554350048e-06, "loss": 0.7737, "step": 5605 }, { "epoch": 1.4201393286890438, "grad_norm": 4.057507038116455, "learning_rate": 8.57558994605939e-06, "loss": 0.7519, "step": 5606 }, { "epoch": 1.420392653578214, "grad_norm": 3.3332347869873047, "learning_rate": 8.575004237367845e-06, "loss": 0.7509, "step": 5607 }, { "epoch": 1.4206459784673844, "grad_norm": 3.819678783416748, "learning_rate": 8.574418428291857e-06, "loss": 0.7421, "step": 5608 }, { "epoch": 1.420899303356555, "grad_norm": 3.7176005840301514, "learning_rate": 8.573832518847874e-06, "loss": 0.7297, "step": 5609 }, { "epoch": 1.4211526282457252, "grad_norm": 4.009670734405518, "learning_rate": 8.573246509052346e-06, "loss": 0.8571, "step": 5610 }, { "epoch": 1.4214059531348955, "grad_norm": 3.8512253761291504, "learning_rate": 8.572660398921734e-06, "loss": 0.7957, "step": 5611 }, { "epoch": 1.421659278024066, "grad_norm": 4.28138542175293, "learning_rate": 8.572074188472492e-06, "loss": 0.8452, "step": 5612 }, { "epoch": 1.4219126029132363, "grad_norm": 3.7458267211914062, "learning_rate": 8.571487877721082e-06, "loss": 0.7774, "step": 5613 }, { "epoch": 1.4221659278024066, "grad_norm": 3.1351423263549805, "learning_rate": 8.570901466683963e-06, "loss": 0.7704, "step": 5614 }, { "epoch": 1.4224192526915769, "grad_norm": 3.2749578952789307, "learning_rate": 8.570314955377606e-06, "loss": 0.6954, "step": 5615 }, { "epoch": 1.4226725775807474, "grad_norm": 3.894059419631958, "learning_rate": 8.569728343818478e-06, "loss": 0.8329, "step": 5616 }, { "epoch": 1.4229259024699177, "grad_norm": 3.810101270675659, "learning_rate": 8.569141632023052e-06, "loss": 0.8064, "step": 5617 }, { "epoch": 1.423179227359088, "grad_norm": 4.048166751861572, "learning_rate": 8.5685548200078e-06, "loss": 0.8891, "step": 5618 }, { "epoch": 1.4234325522482583, "grad_norm": 3.691485643386841, "learning_rate": 8.567967907789202e-06, "loss": 0.7965, "step": 5619 }, { "epoch": 1.4236858771374288, "grad_norm": 3.894789457321167, "learning_rate": 8.567380895383738e-06, "loss": 0.8681, "step": 5620 }, { "epoch": 1.423939202026599, "grad_norm": 3.653616428375244, "learning_rate": 8.566793782807892e-06, "loss": 0.7101, "step": 5621 }, { "epoch": 1.4241925269157694, "grad_norm": 3.6345016956329346, "learning_rate": 8.566206570078147e-06, "loss": 0.8477, "step": 5622 }, { "epoch": 1.4244458518049399, "grad_norm": 4.286646366119385, "learning_rate": 8.56561925721099e-06, "loss": 0.8333, "step": 5623 }, { "epoch": 1.4246991766941102, "grad_norm": 3.6579551696777344, "learning_rate": 8.56503184422292e-06, "loss": 0.7263, "step": 5624 }, { "epoch": 1.4249525015832805, "grad_norm": 3.4948604106903076, "learning_rate": 8.564444331130423e-06, "loss": 0.7977, "step": 5625 }, { "epoch": 1.425205826472451, "grad_norm": 3.829361915588379, "learning_rate": 8.563856717950002e-06, "loss": 0.8717, "step": 5626 }, { "epoch": 1.4254591513616213, "grad_norm": 3.8146557807922363, "learning_rate": 8.563269004698153e-06, "loss": 0.8735, "step": 5627 }, { "epoch": 1.4257124762507916, "grad_norm": 3.6773152351379395, "learning_rate": 8.562681191391382e-06, "loss": 0.7744, "step": 5628 }, { "epoch": 1.425965801139962, "grad_norm": 4.1448564529418945, "learning_rate": 8.562093278046193e-06, "loss": 0.8276, "step": 5629 }, { "epoch": 1.4262191260291324, "grad_norm": 4.072507381439209, "learning_rate": 8.561505264679093e-06, "loss": 0.7953, "step": 5630 }, { "epoch": 1.4264724509183027, "grad_norm": 3.472202777862549, "learning_rate": 8.560917151306594e-06, "loss": 0.8121, "step": 5631 }, { "epoch": 1.4267257758074732, "grad_norm": 3.6753032207489014, "learning_rate": 8.560328937945212e-06, "loss": 0.929, "step": 5632 }, { "epoch": 1.4269791006966435, "grad_norm": 3.6754982471466064, "learning_rate": 8.559740624611462e-06, "loss": 0.8513, "step": 5633 }, { "epoch": 1.4272324255858138, "grad_norm": 3.8619563579559326, "learning_rate": 8.559152211321862e-06, "loss": 0.7544, "step": 5634 }, { "epoch": 1.4274857504749843, "grad_norm": 3.8075153827667236, "learning_rate": 8.558563698092937e-06, "loss": 0.7834, "step": 5635 }, { "epoch": 1.4277390753641546, "grad_norm": 3.598496437072754, "learning_rate": 8.557975084941211e-06, "loss": 0.7952, "step": 5636 }, { "epoch": 1.4279924002533249, "grad_norm": 3.260812759399414, "learning_rate": 8.557386371883212e-06, "loss": 0.8042, "step": 5637 }, { "epoch": 1.4282457251424954, "grad_norm": 3.1854093074798584, "learning_rate": 8.55679755893547e-06, "loss": 0.7317, "step": 5638 }, { "epoch": 1.4284990500316657, "grad_norm": 3.2150797843933105, "learning_rate": 8.556208646114521e-06, "loss": 0.6752, "step": 5639 }, { "epoch": 1.428752374920836, "grad_norm": 3.4865453243255615, "learning_rate": 8.5556196334369e-06, "loss": 0.6955, "step": 5640 }, { "epoch": 1.4290056998100065, "grad_norm": 3.760668992996216, "learning_rate": 8.555030520919146e-06, "loss": 0.9463, "step": 5641 }, { "epoch": 1.4292590246991768, "grad_norm": 3.4559175968170166, "learning_rate": 8.554441308577799e-06, "loss": 0.7351, "step": 5642 }, { "epoch": 1.429512349588347, "grad_norm": 4.000383377075195, "learning_rate": 8.553851996429407e-06, "loss": 0.8208, "step": 5643 }, { "epoch": 1.4297656744775173, "grad_norm": 3.9102861881256104, "learning_rate": 8.553262584490517e-06, "loss": 0.7239, "step": 5644 }, { "epoch": 1.4300189993666879, "grad_norm": 3.3569469451904297, "learning_rate": 8.55267307277768e-06, "loss": 0.7529, "step": 5645 }, { "epoch": 1.4302723242558582, "grad_norm": 3.330298662185669, "learning_rate": 8.552083461307447e-06, "loss": 0.7734, "step": 5646 }, { "epoch": 1.4305256491450284, "grad_norm": 4.039393901824951, "learning_rate": 8.551493750096376e-06, "loss": 0.7744, "step": 5647 }, { "epoch": 1.4307789740341987, "grad_norm": 3.752800941467285, "learning_rate": 8.550903939161024e-06, "loss": 0.8754, "step": 5648 }, { "epoch": 1.4310322989233693, "grad_norm": 3.302180051803589, "learning_rate": 8.550314028517956e-06, "loss": 0.8537, "step": 5649 }, { "epoch": 1.4312856238125395, "grad_norm": 3.8774683475494385, "learning_rate": 8.549724018183732e-06, "loss": 0.7596, "step": 5650 }, { "epoch": 1.4315389487017098, "grad_norm": 3.963120698928833, "learning_rate": 8.54913390817492e-06, "loss": 0.8536, "step": 5651 }, { "epoch": 1.4317922735908803, "grad_norm": 3.766277551651001, "learning_rate": 8.548543698508094e-06, "loss": 0.7581, "step": 5652 }, { "epoch": 1.4320455984800506, "grad_norm": 3.777329683303833, "learning_rate": 8.547953389199823e-06, "loss": 0.769, "step": 5653 }, { "epoch": 1.432298923369221, "grad_norm": 3.928480625152588, "learning_rate": 8.547362980266683e-06, "loss": 0.8342, "step": 5654 }, { "epoch": 1.4325522482583914, "grad_norm": 3.643662452697754, "learning_rate": 8.546772471725253e-06, "loss": 0.7035, "step": 5655 }, { "epoch": 1.4328055731475617, "grad_norm": 3.8660430908203125, "learning_rate": 8.546181863592115e-06, "loss": 0.823, "step": 5656 }, { "epoch": 1.433058898036732, "grad_norm": 3.334833860397339, "learning_rate": 8.545591155883853e-06, "loss": 0.7723, "step": 5657 }, { "epoch": 1.4333122229259025, "grad_norm": 3.6953554153442383, "learning_rate": 8.545000348617052e-06, "loss": 0.808, "step": 5658 }, { "epoch": 1.4335655478150728, "grad_norm": 3.6601805686950684, "learning_rate": 8.544409441808303e-06, "loss": 0.8103, "step": 5659 }, { "epoch": 1.4338188727042431, "grad_norm": 3.3238003253936768, "learning_rate": 8.543818435474199e-06, "loss": 0.723, "step": 5660 }, { "epoch": 1.4340721975934136, "grad_norm": 3.308959722518921, "learning_rate": 8.543227329631332e-06, "loss": 0.7424, "step": 5661 }, { "epoch": 1.434325522482584, "grad_norm": 3.794067144393921, "learning_rate": 8.542636124296303e-06, "loss": 0.8461, "step": 5662 }, { "epoch": 1.4345788473717542, "grad_norm": 3.4544320106506348, "learning_rate": 8.542044819485714e-06, "loss": 0.7192, "step": 5663 }, { "epoch": 1.4348321722609247, "grad_norm": 4.148547649383545, "learning_rate": 8.541453415216165e-06, "loss": 0.9252, "step": 5664 }, { "epoch": 1.435085497150095, "grad_norm": 3.8484859466552734, "learning_rate": 8.540861911504264e-06, "loss": 0.7667, "step": 5665 }, { "epoch": 1.4353388220392653, "grad_norm": 3.4362473487854004, "learning_rate": 8.540270308366618e-06, "loss": 0.6631, "step": 5666 }, { "epoch": 1.4355921469284358, "grad_norm": 3.6026856899261475, "learning_rate": 8.539678605819843e-06, "loss": 0.6789, "step": 5667 }, { "epoch": 1.4358454718176061, "grad_norm": 3.5054380893707275, "learning_rate": 8.539086803880552e-06, "loss": 0.7149, "step": 5668 }, { "epoch": 1.4360987967067764, "grad_norm": 3.5280587673187256, "learning_rate": 8.538494902565362e-06, "loss": 0.8204, "step": 5669 }, { "epoch": 1.436352121595947, "grad_norm": 3.6347293853759766, "learning_rate": 8.537902901890893e-06, "loss": 0.7337, "step": 5670 }, { "epoch": 1.4366054464851172, "grad_norm": 3.789095401763916, "learning_rate": 8.53731080187377e-06, "loss": 0.8534, "step": 5671 }, { "epoch": 1.4368587713742875, "grad_norm": 3.7971978187561035, "learning_rate": 8.536718602530616e-06, "loss": 0.7479, "step": 5672 }, { "epoch": 1.4371120962634578, "grad_norm": 3.743420124053955, "learning_rate": 8.536126303878063e-06, "loss": 0.8196, "step": 5673 }, { "epoch": 1.437365421152628, "grad_norm": 3.237114191055298, "learning_rate": 8.535533905932739e-06, "loss": 0.718, "step": 5674 }, { "epoch": 1.4376187460417986, "grad_norm": 3.5328898429870605, "learning_rate": 8.534941408711279e-06, "loss": 0.6758, "step": 5675 }, { "epoch": 1.437872070930969, "grad_norm": 3.87256121635437, "learning_rate": 8.534348812230324e-06, "loss": 0.7721, "step": 5676 }, { "epoch": 1.4381253958201392, "grad_norm": 3.472590923309326, "learning_rate": 8.533756116506508e-06, "loss": 0.7263, "step": 5677 }, { "epoch": 1.4383787207093097, "grad_norm": 3.7455976009368896, "learning_rate": 8.533163321556479e-06, "loss": 0.7487, "step": 5678 }, { "epoch": 1.43863204559848, "grad_norm": 4.153329849243164, "learning_rate": 8.532570427396877e-06, "loss": 0.8924, "step": 5679 }, { "epoch": 1.4388853704876503, "grad_norm": 4.1146240234375, "learning_rate": 8.531977434044356e-06, "loss": 0.7607, "step": 5680 }, { "epoch": 1.4391386953768208, "grad_norm": 3.8720202445983887, "learning_rate": 8.531384341515562e-06, "loss": 0.7506, "step": 5681 }, { "epoch": 1.4393920202659911, "grad_norm": 3.8613243103027344, "learning_rate": 8.530791149827152e-06, "loss": 0.8279, "step": 5682 }, { "epoch": 1.4396453451551614, "grad_norm": 3.2245066165924072, "learning_rate": 8.53019785899578e-06, "loss": 0.7867, "step": 5683 }, { "epoch": 1.439898670044332, "grad_norm": 3.619999647140503, "learning_rate": 8.529604469038106e-06, "loss": 0.6425, "step": 5684 }, { "epoch": 1.4401519949335022, "grad_norm": 3.314887762069702, "learning_rate": 8.529010979970796e-06, "loss": 0.8123, "step": 5685 }, { "epoch": 1.4404053198226725, "grad_norm": 3.6747682094573975, "learning_rate": 8.528417391810508e-06, "loss": 0.6957, "step": 5686 }, { "epoch": 1.440658644711843, "grad_norm": 4.630380153656006, "learning_rate": 8.527823704573916e-06, "loss": 0.8388, "step": 5687 }, { "epoch": 1.4409119696010133, "grad_norm": 3.862455368041992, "learning_rate": 8.527229918277688e-06, "loss": 0.761, "step": 5688 }, { "epoch": 1.4411652944901836, "grad_norm": 3.640131950378418, "learning_rate": 8.526636032938495e-06, "loss": 0.7136, "step": 5689 }, { "epoch": 1.4414186193793541, "grad_norm": 3.6640071868896484, "learning_rate": 8.526042048573016e-06, "loss": 0.8372, "step": 5690 }, { "epoch": 1.4416719442685244, "grad_norm": 4.051027774810791, "learning_rate": 8.525447965197928e-06, "loss": 0.7735, "step": 5691 }, { "epoch": 1.4419252691576947, "grad_norm": 3.6243577003479004, "learning_rate": 8.524853782829915e-06, "loss": 0.8937, "step": 5692 }, { "epoch": 1.4421785940468652, "grad_norm": 3.4633452892303467, "learning_rate": 8.524259501485658e-06, "loss": 0.7356, "step": 5693 }, { "epoch": 1.4424319189360355, "grad_norm": 4.155950546264648, "learning_rate": 8.523665121181848e-06, "loss": 0.8695, "step": 5694 }, { "epoch": 1.4426852438252058, "grad_norm": 3.9396297931671143, "learning_rate": 8.523070641935172e-06, "loss": 0.7235, "step": 5695 }, { "epoch": 1.4429385687143763, "grad_norm": 3.9359302520751953, "learning_rate": 8.522476063762322e-06, "loss": 0.7997, "step": 5696 }, { "epoch": 1.4431918936035466, "grad_norm": 3.7869248390197754, "learning_rate": 8.521881386679997e-06, "loss": 0.8, "step": 5697 }, { "epoch": 1.443445218492717, "grad_norm": 3.976806640625, "learning_rate": 8.521286610704893e-06, "loss": 0.78, "step": 5698 }, { "epoch": 1.4436985433818872, "grad_norm": 3.9138104915618896, "learning_rate": 8.520691735853712e-06, "loss": 0.8162, "step": 5699 }, { "epoch": 1.4439518682710577, "grad_norm": 3.900667667388916, "learning_rate": 8.520096762143155e-06, "loss": 0.7667, "step": 5700 }, { "epoch": 1.444205193160228, "grad_norm": 3.848956823348999, "learning_rate": 8.519501689589932e-06, "loss": 0.9847, "step": 5701 }, { "epoch": 1.4444585180493983, "grad_norm": 4.102713108062744, "learning_rate": 8.518906518210751e-06, "loss": 0.9319, "step": 5702 }, { "epoch": 1.4447118429385686, "grad_norm": 4.555942058563232, "learning_rate": 8.518311248022324e-06, "loss": 0.8847, "step": 5703 }, { "epoch": 1.444965167827739, "grad_norm": 3.8152759075164795, "learning_rate": 8.517715879041366e-06, "loss": 0.8646, "step": 5704 }, { "epoch": 1.4452184927169094, "grad_norm": 3.4794349670410156, "learning_rate": 8.517120411284594e-06, "loss": 0.8137, "step": 5705 }, { "epoch": 1.4454718176060797, "grad_norm": 3.4123058319091797, "learning_rate": 8.516524844768733e-06, "loss": 0.7919, "step": 5706 }, { "epoch": 1.4457251424952502, "grad_norm": 3.657371997833252, "learning_rate": 8.515929179510498e-06, "loss": 0.8292, "step": 5707 }, { "epoch": 1.4459784673844205, "grad_norm": 3.533569574356079, "learning_rate": 8.515333415526622e-06, "loss": 0.7663, "step": 5708 }, { "epoch": 1.4462317922735908, "grad_norm": 3.5966460704803467, "learning_rate": 8.51473755283383e-06, "loss": 0.7478, "step": 5709 }, { "epoch": 1.4464851171627613, "grad_norm": 3.9291951656341553, "learning_rate": 8.514141591448854e-06, "loss": 0.7542, "step": 5710 }, { "epoch": 1.4467384420519316, "grad_norm": 3.6165096759796143, "learning_rate": 8.513545531388432e-06, "loss": 0.7914, "step": 5711 }, { "epoch": 1.4469917669411019, "grad_norm": 3.74674391746521, "learning_rate": 8.512949372669298e-06, "loss": 0.7878, "step": 5712 }, { "epoch": 1.4472450918302724, "grad_norm": 3.324605703353882, "learning_rate": 8.512353115308189e-06, "loss": 0.7061, "step": 5713 }, { "epoch": 1.4474984167194427, "grad_norm": 3.780344247817993, "learning_rate": 8.511756759321852e-06, "loss": 0.8317, "step": 5714 }, { "epoch": 1.447751741608613, "grad_norm": 3.839493751525879, "learning_rate": 8.511160304727031e-06, "loss": 0.8069, "step": 5715 }, { "epoch": 1.4480050664977835, "grad_norm": 3.744105339050293, "learning_rate": 8.510563751540475e-06, "loss": 0.7982, "step": 5716 }, { "epoch": 1.4482583913869538, "grad_norm": 3.512763261795044, "learning_rate": 8.509967099778934e-06, "loss": 0.8294, "step": 5717 }, { "epoch": 1.448511716276124, "grad_norm": 3.56489634513855, "learning_rate": 8.509370349459161e-06, "loss": 0.8668, "step": 5718 }, { "epoch": 1.4487650411652946, "grad_norm": 3.4126763343811035, "learning_rate": 8.508773500597915e-06, "loss": 0.6083, "step": 5719 }, { "epoch": 1.4490183660544649, "grad_norm": 3.7884819507598877, "learning_rate": 8.508176553211951e-06, "loss": 0.7594, "step": 5720 }, { "epoch": 1.4492716909436352, "grad_norm": 3.8706231117248535, "learning_rate": 8.507579507318036e-06, "loss": 0.7924, "step": 5721 }, { "epoch": 1.4495250158328057, "grad_norm": 4.001307010650635, "learning_rate": 8.506982362932932e-06, "loss": 0.8956, "step": 5722 }, { "epoch": 1.449778340721976, "grad_norm": 3.834406852722168, "learning_rate": 8.506385120073406e-06, "loss": 0.7862, "step": 5723 }, { "epoch": 1.4500316656111463, "grad_norm": 3.5482423305511475, "learning_rate": 8.50578777875623e-06, "loss": 0.7288, "step": 5724 }, { "epoch": 1.4502849905003168, "grad_norm": 3.502732515335083, "learning_rate": 8.505190338998176e-06, "loss": 0.7459, "step": 5725 }, { "epoch": 1.450538315389487, "grad_norm": 3.8053665161132812, "learning_rate": 8.50459280081602e-06, "loss": 0.7643, "step": 5726 }, { "epoch": 1.4507916402786574, "grad_norm": 4.070937633514404, "learning_rate": 8.503995164226542e-06, "loss": 0.7852, "step": 5727 }, { "epoch": 1.4510449651678277, "grad_norm": 3.704869270324707, "learning_rate": 8.50339742924652e-06, "loss": 0.6839, "step": 5728 }, { "epoch": 1.4512982900569982, "grad_norm": 3.3609752655029297, "learning_rate": 8.50279959589274e-06, "loss": 0.7276, "step": 5729 }, { "epoch": 1.4515516149461685, "grad_norm": 3.966762065887451, "learning_rate": 8.50220166418199e-06, "loss": 0.9289, "step": 5730 }, { "epoch": 1.4518049398353388, "grad_norm": 3.9899070262908936, "learning_rate": 8.50160363413106e-06, "loss": 0.7779, "step": 5731 }, { "epoch": 1.452058264724509, "grad_norm": 3.5301764011383057, "learning_rate": 8.501005505756738e-06, "loss": 0.7488, "step": 5732 }, { "epoch": 1.4523115896136796, "grad_norm": 3.3429617881774902, "learning_rate": 8.500407279075824e-06, "loss": 0.7595, "step": 5733 }, { "epoch": 1.4525649145028499, "grad_norm": 3.5351319313049316, "learning_rate": 8.499808954105115e-06, "loss": 0.8505, "step": 5734 }, { "epoch": 1.4528182393920201, "grad_norm": 3.7764217853546143, "learning_rate": 8.499210530861409e-06, "loss": 0.9221, "step": 5735 }, { "epoch": 1.4530715642811907, "grad_norm": 4.354663848876953, "learning_rate": 8.498612009361513e-06, "loss": 0.9962, "step": 5736 }, { "epoch": 1.453324889170361, "grad_norm": 3.265813112258911, "learning_rate": 8.498013389622231e-06, "loss": 0.7001, "step": 5737 }, { "epoch": 1.4535782140595312, "grad_norm": 4.211230278015137, "learning_rate": 8.497414671660372e-06, "loss": 0.8179, "step": 5738 }, { "epoch": 1.4538315389487018, "grad_norm": 3.9012246131896973, "learning_rate": 8.496815855492749e-06, "loss": 0.8726, "step": 5739 }, { "epoch": 1.454084863837872, "grad_norm": 3.661397695541382, "learning_rate": 8.496216941136174e-06, "loss": 0.774, "step": 5740 }, { "epoch": 1.4543381887270423, "grad_norm": 3.973367929458618, "learning_rate": 8.495617928607467e-06, "loss": 0.7984, "step": 5741 }, { "epoch": 1.4545915136162129, "grad_norm": 3.723512887954712, "learning_rate": 8.495018817923445e-06, "loss": 0.9125, "step": 5742 }, { "epoch": 1.4548448385053832, "grad_norm": 3.5836737155914307, "learning_rate": 8.494419609100936e-06, "loss": 0.7135, "step": 5743 }, { "epoch": 1.4550981633945534, "grad_norm": 3.5797481536865234, "learning_rate": 8.49382030215676e-06, "loss": 0.6496, "step": 5744 }, { "epoch": 1.455351488283724, "grad_norm": 3.581573009490967, "learning_rate": 8.493220897107748e-06, "loss": 0.7338, "step": 5745 }, { "epoch": 1.4556048131728943, "grad_norm": 3.822969675064087, "learning_rate": 8.492621393970731e-06, "loss": 0.8432, "step": 5746 }, { "epoch": 1.4558581380620645, "grad_norm": 3.8776535987854004, "learning_rate": 8.492021792762542e-06, "loss": 0.7807, "step": 5747 }, { "epoch": 1.456111462951235, "grad_norm": 3.600105047225952, "learning_rate": 8.491422093500016e-06, "loss": 0.7384, "step": 5748 }, { "epoch": 1.4563647878404054, "grad_norm": 3.469967842102051, "learning_rate": 8.490822296199995e-06, "loss": 0.7266, "step": 5749 }, { "epoch": 1.4566181127295756, "grad_norm": 4.049229621887207, "learning_rate": 8.49022240087932e-06, "loss": 0.7418, "step": 5750 }, { "epoch": 1.4568714376187462, "grad_norm": 3.541820764541626, "learning_rate": 8.489622407554837e-06, "loss": 0.8721, "step": 5751 }, { "epoch": 1.4571247625079164, "grad_norm": 3.556708812713623, "learning_rate": 8.489022316243391e-06, "loss": 0.72, "step": 5752 }, { "epoch": 1.4573780873970867, "grad_norm": 3.6263678073883057, "learning_rate": 8.488422126961833e-06, "loss": 0.6651, "step": 5753 }, { "epoch": 1.4576314122862573, "grad_norm": 3.4090118408203125, "learning_rate": 8.487821839727018e-06, "loss": 0.7079, "step": 5754 }, { "epoch": 1.4578847371754275, "grad_norm": 3.7343039512634277, "learning_rate": 8.4872214545558e-06, "loss": 0.7699, "step": 5755 }, { "epoch": 1.4581380620645978, "grad_norm": 3.486272096633911, "learning_rate": 8.486620971465039e-06, "loss": 0.7504, "step": 5756 }, { "epoch": 1.4583913869537681, "grad_norm": 3.899477005004883, "learning_rate": 8.486020390471593e-06, "loss": 0.7199, "step": 5757 }, { "epoch": 1.4586447118429386, "grad_norm": 3.771094799041748, "learning_rate": 8.485419711592329e-06, "loss": 0.7408, "step": 5758 }, { "epoch": 1.458898036732109, "grad_norm": 3.6506221294403076, "learning_rate": 8.484818934844112e-06, "loss": 0.7438, "step": 5759 }, { "epoch": 1.4591513616212792, "grad_norm": 3.7030067443847656, "learning_rate": 8.484218060243816e-06, "loss": 0.7507, "step": 5760 }, { "epoch": 1.4594046865104495, "grad_norm": 3.648280143737793, "learning_rate": 8.483617087808307e-06, "loss": 0.6922, "step": 5761 }, { "epoch": 1.45965801139962, "grad_norm": 3.9507155418395996, "learning_rate": 8.483016017554463e-06, "loss": 0.8309, "step": 5762 }, { "epoch": 1.4599113362887903, "grad_norm": 3.9942426681518555, "learning_rate": 8.482414849499164e-06, "loss": 0.8634, "step": 5763 }, { "epoch": 1.4601646611779606, "grad_norm": 4.629600524902344, "learning_rate": 8.481813583659285e-06, "loss": 0.8879, "step": 5764 }, { "epoch": 1.4604179860671311, "grad_norm": 3.8053503036499023, "learning_rate": 8.481212220051713e-06, "loss": 0.7588, "step": 5765 }, { "epoch": 1.4606713109563014, "grad_norm": 3.998711347579956, "learning_rate": 8.480610758693335e-06, "loss": 0.743, "step": 5766 }, { "epoch": 1.4609246358454717, "grad_norm": 4.426061153411865, "learning_rate": 8.480009199601039e-06, "loss": 0.8845, "step": 5767 }, { "epoch": 1.4611779607346422, "grad_norm": 3.610963821411133, "learning_rate": 8.479407542791712e-06, "loss": 0.6508, "step": 5768 }, { "epoch": 1.4614312856238125, "grad_norm": 4.172120571136475, "learning_rate": 8.478805788282254e-06, "loss": 0.8132, "step": 5769 }, { "epoch": 1.4616846105129828, "grad_norm": 3.924996852874756, "learning_rate": 8.47820393608956e-06, "loss": 0.8374, "step": 5770 }, { "epoch": 1.4619379354021533, "grad_norm": 3.486543655395508, "learning_rate": 8.47760198623053e-06, "loss": 0.7782, "step": 5771 }, { "epoch": 1.4621912602913236, "grad_norm": 3.5728392601013184, "learning_rate": 8.476999938722066e-06, "loss": 0.8747, "step": 5772 }, { "epoch": 1.462444585180494, "grad_norm": 3.4813034534454346, "learning_rate": 8.476397793581073e-06, "loss": 0.7351, "step": 5773 }, { "epoch": 1.4626979100696644, "grad_norm": 3.6758840084075928, "learning_rate": 8.475795550824459e-06, "loss": 0.8178, "step": 5774 }, { "epoch": 1.4629512349588347, "grad_norm": 3.9572718143463135, "learning_rate": 8.475193210469135e-06, "loss": 0.8281, "step": 5775 }, { "epoch": 1.463204559848005, "grad_norm": 3.5994718074798584, "learning_rate": 8.474590772532015e-06, "loss": 0.7396, "step": 5776 }, { "epoch": 1.4634578847371755, "grad_norm": 3.2121098041534424, "learning_rate": 8.473988237030013e-06, "loss": 0.8072, "step": 5777 }, { "epoch": 1.4637112096263458, "grad_norm": 3.8271384239196777, "learning_rate": 8.473385603980051e-06, "loss": 1.0376, "step": 5778 }, { "epoch": 1.4639645345155161, "grad_norm": 3.357984781265259, "learning_rate": 8.472782873399049e-06, "loss": 0.6981, "step": 5779 }, { "epoch": 1.4642178594046866, "grad_norm": 3.8599822521209717, "learning_rate": 8.472180045303932e-06, "loss": 0.8763, "step": 5780 }, { "epoch": 1.464471184293857, "grad_norm": 3.2101244926452637, "learning_rate": 8.471577119711625e-06, "loss": 0.6959, "step": 5781 }, { "epoch": 1.4647245091830272, "grad_norm": 3.952605724334717, "learning_rate": 8.470974096639061e-06, "loss": 0.786, "step": 5782 }, { "epoch": 1.4649778340721977, "grad_norm": 3.574605703353882, "learning_rate": 8.470370976103171e-06, "loss": 0.7881, "step": 5783 }, { "epoch": 1.465231158961368, "grad_norm": 4.299081802368164, "learning_rate": 8.469767758120888e-06, "loss": 0.8145, "step": 5784 }, { "epoch": 1.4654844838505383, "grad_norm": 3.7053792476654053, "learning_rate": 8.469164442709156e-06, "loss": 0.8087, "step": 5785 }, { "epoch": 1.4657378087397086, "grad_norm": 3.570216417312622, "learning_rate": 8.468561029884912e-06, "loss": 0.7369, "step": 5786 }, { "epoch": 1.4659911336288791, "grad_norm": 3.463874340057373, "learning_rate": 8.467957519665098e-06, "loss": 0.8314, "step": 5787 }, { "epoch": 1.4662444585180494, "grad_norm": 3.9500622749328613, "learning_rate": 8.467353912066662e-06, "loss": 0.914, "step": 5788 }, { "epoch": 1.4664977834072197, "grad_norm": 3.8794491291046143, "learning_rate": 8.466750207106555e-06, "loss": 0.7971, "step": 5789 }, { "epoch": 1.46675110829639, "grad_norm": 3.631758451461792, "learning_rate": 8.466146404801727e-06, "loss": 0.7149, "step": 5790 }, { "epoch": 1.4670044331855605, "grad_norm": 3.9623847007751465, "learning_rate": 8.46554250516913e-06, "loss": 0.7581, "step": 5791 }, { "epoch": 1.4672577580747308, "grad_norm": 3.570362091064453, "learning_rate": 8.464938508225726e-06, "loss": 0.6783, "step": 5792 }, { "epoch": 1.467511082963901, "grad_norm": 4.077816009521484, "learning_rate": 8.464334413988474e-06, "loss": 0.7831, "step": 5793 }, { "epoch": 1.4677644078530716, "grad_norm": 4.263125419616699, "learning_rate": 8.463730222474332e-06, "loss": 0.9054, "step": 5794 }, { "epoch": 1.468017732742242, "grad_norm": 3.4157943725585938, "learning_rate": 8.463125933700271e-06, "loss": 0.674, "step": 5795 }, { "epoch": 1.4682710576314122, "grad_norm": 3.7338168621063232, "learning_rate": 8.462521547683255e-06, "loss": 0.8226, "step": 5796 }, { "epoch": 1.4685243825205827, "grad_norm": 3.9859232902526855, "learning_rate": 8.461917064440258e-06, "loss": 0.8197, "step": 5797 }, { "epoch": 1.468777707409753, "grad_norm": 3.4105770587921143, "learning_rate": 8.461312483988252e-06, "loss": 0.7145, "step": 5798 }, { "epoch": 1.4690310322989233, "grad_norm": 4.011654376983643, "learning_rate": 8.460707806344215e-06, "loss": 0.909, "step": 5799 }, { "epoch": 1.4692843571880938, "grad_norm": 3.96620774269104, "learning_rate": 8.460103031525123e-06, "loss": 0.8062, "step": 5800 }, { "epoch": 1.469537682077264, "grad_norm": 4.274330139160156, "learning_rate": 8.45949815954796e-06, "loss": 0.901, "step": 5801 }, { "epoch": 1.4697910069664344, "grad_norm": 4.059905052185059, "learning_rate": 8.458893190429709e-06, "loss": 0.7766, "step": 5802 }, { "epoch": 1.470044331855605, "grad_norm": 3.70166277885437, "learning_rate": 8.45828812418736e-06, "loss": 0.7859, "step": 5803 }, { "epoch": 1.4702976567447752, "grad_norm": 3.623785972595215, "learning_rate": 8.457682960837901e-06, "loss": 0.866, "step": 5804 }, { "epoch": 1.4705509816339455, "grad_norm": 3.6237847805023193, "learning_rate": 8.457077700398325e-06, "loss": 0.763, "step": 5805 }, { "epoch": 1.470804306523116, "grad_norm": 3.97985577583313, "learning_rate": 8.456472342885626e-06, "loss": 0.9793, "step": 5806 }, { "epoch": 1.4710576314122863, "grad_norm": 3.4481773376464844, "learning_rate": 8.455866888316806e-06, "loss": 0.7253, "step": 5807 }, { "epoch": 1.4713109563014566, "grad_norm": 3.6168854236602783, "learning_rate": 8.455261336708861e-06, "loss": 0.8586, "step": 5808 }, { "epoch": 1.471564281190627, "grad_norm": 3.81908917427063, "learning_rate": 8.4546556880788e-06, "loss": 0.7193, "step": 5809 }, { "epoch": 1.4718176060797974, "grad_norm": 3.9625723361968994, "learning_rate": 8.454049942443624e-06, "loss": 0.7025, "step": 5810 }, { "epoch": 1.4720709309689677, "grad_norm": 3.6989450454711914, "learning_rate": 8.453444099820346e-06, "loss": 0.805, "step": 5811 }, { "epoch": 1.4723242558581382, "grad_norm": 4.027205944061279, "learning_rate": 8.452838160225974e-06, "loss": 0.7952, "step": 5812 }, { "epoch": 1.4725775807473085, "grad_norm": 3.91361665725708, "learning_rate": 8.452232123677526e-06, "loss": 0.8229, "step": 5813 }, { "epoch": 1.4728309056364788, "grad_norm": 3.8778324127197266, "learning_rate": 8.451625990192019e-06, "loss": 0.8737, "step": 5814 }, { "epoch": 1.473084230525649, "grad_norm": 4.140817642211914, "learning_rate": 8.451019759786471e-06, "loss": 0.8426, "step": 5815 }, { "epoch": 1.4733375554148196, "grad_norm": 3.918105125427246, "learning_rate": 8.450413432477904e-06, "loss": 0.7913, "step": 5816 }, { "epoch": 1.4735908803039899, "grad_norm": 3.3620104789733887, "learning_rate": 8.449807008283348e-06, "loss": 0.6683, "step": 5817 }, { "epoch": 1.4738442051931602, "grad_norm": 3.3116416931152344, "learning_rate": 8.449200487219826e-06, "loss": 0.7992, "step": 5818 }, { "epoch": 1.4740975300823305, "grad_norm": 3.637850046157837, "learning_rate": 8.44859386930437e-06, "loss": 0.8076, "step": 5819 }, { "epoch": 1.474350854971501, "grad_norm": 3.6628856658935547, "learning_rate": 8.447987154554018e-06, "loss": 0.6968, "step": 5820 }, { "epoch": 1.4746041798606713, "grad_norm": 3.631540060043335, "learning_rate": 8.4473803429858e-06, "loss": 0.7686, "step": 5821 }, { "epoch": 1.4748575047498416, "grad_norm": 3.455829620361328, "learning_rate": 8.446773434616757e-06, "loss": 0.7153, "step": 5822 }, { "epoch": 1.475110829639012, "grad_norm": 3.7421679496765137, "learning_rate": 8.446166429463933e-06, "loss": 0.7868, "step": 5823 }, { "epoch": 1.4753641545281824, "grad_norm": 4.175968170166016, "learning_rate": 8.445559327544372e-06, "loss": 0.8762, "step": 5824 }, { "epoch": 1.4756174794173527, "grad_norm": 4.218408584594727, "learning_rate": 8.444952128875118e-06, "loss": 1.0124, "step": 5825 }, { "epoch": 1.4758708043065232, "grad_norm": 3.955867290496826, "learning_rate": 8.444344833473222e-06, "loss": 0.7887, "step": 5826 }, { "epoch": 1.4761241291956935, "grad_norm": 4.2899651527404785, "learning_rate": 8.44373744135574e-06, "loss": 0.8016, "step": 5827 }, { "epoch": 1.4763774540848638, "grad_norm": 3.8998396396636963, "learning_rate": 8.443129952539722e-06, "loss": 0.7813, "step": 5828 }, { "epoch": 1.4766307789740343, "grad_norm": 3.8248023986816406, "learning_rate": 8.44252236704223e-06, "loss": 0.7968, "step": 5829 }, { "epoch": 1.4768841038632046, "grad_norm": 3.8769917488098145, "learning_rate": 8.441914684880324e-06, "loss": 0.7902, "step": 5830 }, { "epoch": 1.4771374287523749, "grad_norm": 4.380746841430664, "learning_rate": 8.441306906071065e-06, "loss": 0.8351, "step": 5831 }, { "epoch": 1.4773907536415454, "grad_norm": 3.37375545501709, "learning_rate": 8.440699030631523e-06, "loss": 0.7702, "step": 5832 }, { "epoch": 1.4776440785307157, "grad_norm": 3.931594133377075, "learning_rate": 8.440091058578763e-06, "loss": 0.8683, "step": 5833 }, { "epoch": 1.477897403419886, "grad_norm": 3.6735098361968994, "learning_rate": 8.439482989929859e-06, "loss": 0.8508, "step": 5834 }, { "epoch": 1.4781507283090565, "grad_norm": 3.7484383583068848, "learning_rate": 8.438874824701884e-06, "loss": 0.7568, "step": 5835 }, { "epoch": 1.4784040531982268, "grad_norm": 4.0925211906433105, "learning_rate": 8.438266562911917e-06, "loss": 0.8033, "step": 5836 }, { "epoch": 1.478657378087397, "grad_norm": 3.8534984588623047, "learning_rate": 8.437658204577035e-06, "loss": 0.7125, "step": 5837 }, { "epoch": 1.4789107029765676, "grad_norm": 3.55655837059021, "learning_rate": 8.437049749714323e-06, "loss": 0.7976, "step": 5838 }, { "epoch": 1.4791640278657379, "grad_norm": 3.5970618724823, "learning_rate": 8.436441198340864e-06, "loss": 0.8552, "step": 5839 }, { "epoch": 1.4794173527549082, "grad_norm": 3.8830974102020264, "learning_rate": 8.435832550473748e-06, "loss": 0.7632, "step": 5840 }, { "epoch": 1.4796706776440787, "grad_norm": 3.6924026012420654, "learning_rate": 8.435223806130063e-06, "loss": 0.8214, "step": 5841 }, { "epoch": 1.479924002533249, "grad_norm": 3.2904129028320312, "learning_rate": 8.434614965326904e-06, "loss": 0.6991, "step": 5842 }, { "epoch": 1.4801773274224193, "grad_norm": 3.5483806133270264, "learning_rate": 8.434006028081365e-06, "loss": 0.7987, "step": 5843 }, { "epoch": 1.4804306523115895, "grad_norm": 3.480278730392456, "learning_rate": 8.433396994410548e-06, "loss": 0.8048, "step": 5844 }, { "epoch": 1.4806839772007598, "grad_norm": 4.472566604614258, "learning_rate": 8.432787864331553e-06, "loss": 0.9373, "step": 5845 }, { "epoch": 1.4809373020899304, "grad_norm": 3.7041289806365967, "learning_rate": 8.432178637861483e-06, "loss": 0.6865, "step": 5846 }, { "epoch": 1.4811906269791006, "grad_norm": 3.6985971927642822, "learning_rate": 8.431569315017444e-06, "loss": 0.7535, "step": 5847 }, { "epoch": 1.481443951868271, "grad_norm": 3.5373966693878174, "learning_rate": 8.430959895816548e-06, "loss": 0.8311, "step": 5848 }, { "epoch": 1.4816972767574415, "grad_norm": 3.5233519077301025, "learning_rate": 8.430350380275907e-06, "loss": 0.8306, "step": 5849 }, { "epoch": 1.4819506016466117, "grad_norm": 3.967841625213623, "learning_rate": 8.429740768412636e-06, "loss": 0.734, "step": 5850 }, { "epoch": 1.482203926535782, "grad_norm": 3.6912450790405273, "learning_rate": 8.42913106024385e-06, "loss": 0.7144, "step": 5851 }, { "epoch": 1.4824572514249525, "grad_norm": 3.5270895957946777, "learning_rate": 8.42852125578667e-06, "loss": 0.7088, "step": 5852 }, { "epoch": 1.4827105763141228, "grad_norm": 3.1713204383850098, "learning_rate": 8.42791135505822e-06, "loss": 0.7399, "step": 5853 }, { "epoch": 1.4829639012032931, "grad_norm": 4.049830436706543, "learning_rate": 8.427301358075627e-06, "loss": 0.8691, "step": 5854 }, { "epoch": 1.4832172260924636, "grad_norm": 3.835407018661499, "learning_rate": 8.426691264856019e-06, "loss": 0.7234, "step": 5855 }, { "epoch": 1.483470550981634, "grad_norm": 3.9003965854644775, "learning_rate": 8.426081075416524e-06, "loss": 0.881, "step": 5856 }, { "epoch": 1.4837238758708042, "grad_norm": 3.7081782817840576, "learning_rate": 8.42547078977428e-06, "loss": 0.8501, "step": 5857 }, { "epoch": 1.4839772007599747, "grad_norm": 4.005224227905273, "learning_rate": 8.424860407946421e-06, "loss": 0.8632, "step": 5858 }, { "epoch": 1.484230525649145, "grad_norm": 4.061598300933838, "learning_rate": 8.424249929950087e-06, "loss": 0.7495, "step": 5859 }, { "epoch": 1.4844838505383153, "grad_norm": 3.4367449283599854, "learning_rate": 8.42363935580242e-06, "loss": 0.7512, "step": 5860 }, { "epoch": 1.4847371754274858, "grad_norm": 3.680394411087036, "learning_rate": 8.423028685520565e-06, "loss": 0.6946, "step": 5861 }, { "epoch": 1.4849905003166561, "grad_norm": 3.8052899837493896, "learning_rate": 8.422417919121666e-06, "loss": 0.911, "step": 5862 }, { "epoch": 1.4852438252058264, "grad_norm": 3.931506633758545, "learning_rate": 8.421807056622879e-06, "loss": 0.9362, "step": 5863 }, { "epoch": 1.485497150094997, "grad_norm": 4.07252836227417, "learning_rate": 8.421196098041352e-06, "loss": 0.8866, "step": 5864 }, { "epoch": 1.4857504749841672, "grad_norm": 3.3964853286743164, "learning_rate": 8.420585043394243e-06, "loss": 0.8341, "step": 5865 }, { "epoch": 1.4860037998733375, "grad_norm": 3.8817386627197266, "learning_rate": 8.419973892698708e-06, "loss": 0.7812, "step": 5866 }, { "epoch": 1.486257124762508, "grad_norm": 3.665032386779785, "learning_rate": 8.419362645971909e-06, "loss": 0.6833, "step": 5867 }, { "epoch": 1.4865104496516783, "grad_norm": 3.324069023132324, "learning_rate": 8.41875130323101e-06, "loss": 0.856, "step": 5868 }, { "epoch": 1.4867637745408486, "grad_norm": 3.504530906677246, "learning_rate": 8.418139864493178e-06, "loss": 0.6517, "step": 5869 }, { "epoch": 1.487017099430019, "grad_norm": 3.9483985900878906, "learning_rate": 8.41752832977558e-06, "loss": 0.699, "step": 5870 }, { "epoch": 1.4872704243191894, "grad_norm": 3.775846481323242, "learning_rate": 8.416916699095385e-06, "loss": 0.8915, "step": 5871 }, { "epoch": 1.4875237492083597, "grad_norm": 3.772552728652954, "learning_rate": 8.416304972469774e-06, "loss": 0.7129, "step": 5872 }, { "epoch": 1.48777707409753, "grad_norm": 4.275998115539551, "learning_rate": 8.41569314991592e-06, "loss": 0.8379, "step": 5873 }, { "epoch": 1.4880303989867003, "grad_norm": 3.6327340602874756, "learning_rate": 8.415081231451004e-06, "loss": 0.6871, "step": 5874 }, { "epoch": 1.4882837238758708, "grad_norm": 3.631073236465454, "learning_rate": 8.414469217092206e-06, "loss": 0.7939, "step": 5875 }, { "epoch": 1.4885370487650411, "grad_norm": 3.3416197299957275, "learning_rate": 8.413857106856711e-06, "loss": 0.7443, "step": 5876 }, { "epoch": 1.4887903736542114, "grad_norm": 3.8389337062835693, "learning_rate": 8.41324490076171e-06, "loss": 0.8646, "step": 5877 }, { "epoch": 1.489043698543382, "grad_norm": 3.729714870452881, "learning_rate": 8.412632598824395e-06, "loss": 0.753, "step": 5878 }, { "epoch": 1.4892970234325522, "grad_norm": 3.532261848449707, "learning_rate": 8.412020201061952e-06, "loss": 0.7507, "step": 5879 }, { "epoch": 1.4895503483217225, "grad_norm": 3.9118940830230713, "learning_rate": 8.411407707491584e-06, "loss": 0.8067, "step": 5880 }, { "epoch": 1.489803673210893, "grad_norm": 3.997044324874878, "learning_rate": 8.410795118130483e-06, "loss": 0.756, "step": 5881 }, { "epoch": 1.4900569981000633, "grad_norm": 3.8973376750946045, "learning_rate": 8.410182432995855e-06, "loss": 0.7656, "step": 5882 }, { "epoch": 1.4903103229892336, "grad_norm": 3.140139579772949, "learning_rate": 8.409569652104905e-06, "loss": 0.7283, "step": 5883 }, { "epoch": 1.4905636478784041, "grad_norm": 3.708293914794922, "learning_rate": 8.408956775474835e-06, "loss": 0.7921, "step": 5884 }, { "epoch": 1.4908169727675744, "grad_norm": 3.818350315093994, "learning_rate": 8.408343803122856e-06, "loss": 0.8596, "step": 5885 }, { "epoch": 1.4910702976567447, "grad_norm": 3.35256028175354, "learning_rate": 8.407730735066179e-06, "loss": 0.7102, "step": 5886 }, { "epoch": 1.4913236225459152, "grad_norm": 3.895524740219116, "learning_rate": 8.407117571322023e-06, "loss": 0.7286, "step": 5887 }, { "epoch": 1.4915769474350855, "grad_norm": 3.782482624053955, "learning_rate": 8.406504311907602e-06, "loss": 0.8667, "step": 5888 }, { "epoch": 1.4918302723242558, "grad_norm": 3.7339634895324707, "learning_rate": 8.405890956840136e-06, "loss": 0.8495, "step": 5889 }, { "epoch": 1.4920835972134263, "grad_norm": 3.5503053665161133, "learning_rate": 8.40527750613685e-06, "loss": 0.8218, "step": 5890 }, { "epoch": 1.4923369221025966, "grad_norm": 3.9296751022338867, "learning_rate": 8.404663959814964e-06, "loss": 0.8036, "step": 5891 }, { "epoch": 1.492590246991767, "grad_norm": 4.208617687225342, "learning_rate": 8.40405031789171e-06, "loss": 0.8858, "step": 5892 }, { "epoch": 1.4928435718809374, "grad_norm": 3.8549439907073975, "learning_rate": 8.403436580384321e-06, "loss": 0.7757, "step": 5893 }, { "epoch": 1.4930968967701077, "grad_norm": 3.352830410003662, "learning_rate": 8.402822747310026e-06, "loss": 0.7826, "step": 5894 }, { "epoch": 1.493350221659278, "grad_norm": 3.9335927963256836, "learning_rate": 8.402208818686066e-06, "loss": 0.8549, "step": 5895 }, { "epoch": 1.4936035465484485, "grad_norm": 3.6129844188690186, "learning_rate": 8.401594794529674e-06, "loss": 0.6835, "step": 5896 }, { "epoch": 1.4938568714376188, "grad_norm": 3.4007370471954346, "learning_rate": 8.400980674858095e-06, "loss": 0.8272, "step": 5897 }, { "epoch": 1.494110196326789, "grad_norm": 3.628884792327881, "learning_rate": 8.400366459688574e-06, "loss": 0.7141, "step": 5898 }, { "epoch": 1.4943635212159594, "grad_norm": 3.4113211631774902, "learning_rate": 8.399752149038355e-06, "loss": 0.6891, "step": 5899 }, { "epoch": 1.49461684610513, "grad_norm": 4.004037857055664, "learning_rate": 8.39913774292469e-06, "loss": 0.7637, "step": 5900 }, { "epoch": 1.4948701709943002, "grad_norm": 3.8062174320220947, "learning_rate": 8.398523241364829e-06, "loss": 0.7391, "step": 5901 }, { "epoch": 1.4951234958834705, "grad_norm": 3.5870893001556396, "learning_rate": 8.397908644376029e-06, "loss": 0.8925, "step": 5902 }, { "epoch": 1.4953768207726408, "grad_norm": 3.6207046508789062, "learning_rate": 8.397293951975546e-06, "loss": 0.7037, "step": 5903 }, { "epoch": 1.4956301456618113, "grad_norm": 3.3794548511505127, "learning_rate": 8.396679164180641e-06, "loss": 0.6752, "step": 5904 }, { "epoch": 1.4958834705509816, "grad_norm": 3.95609974861145, "learning_rate": 8.39606428100858e-06, "loss": 0.7661, "step": 5905 }, { "epoch": 1.4961367954401519, "grad_norm": 3.7931015491485596, "learning_rate": 8.395449302476623e-06, "loss": 0.8, "step": 5906 }, { "epoch": 1.4963901203293224, "grad_norm": 3.8280766010284424, "learning_rate": 8.39483422860204e-06, "loss": 0.8512, "step": 5907 }, { "epoch": 1.4966434452184927, "grad_norm": 4.196775913238525, "learning_rate": 8.394219059402106e-06, "loss": 0.7962, "step": 5908 }, { "epoch": 1.496896770107663, "grad_norm": 3.526451587677002, "learning_rate": 8.39360379489409e-06, "loss": 0.7187, "step": 5909 }, { "epoch": 1.4971500949968335, "grad_norm": 3.7106826305389404, "learning_rate": 8.392988435095268e-06, "loss": 0.7334, "step": 5910 }, { "epoch": 1.4974034198860038, "grad_norm": 3.939286231994629, "learning_rate": 8.392372980022923e-06, "loss": 0.8712, "step": 5911 }, { "epoch": 1.497656744775174, "grad_norm": 3.819657802581787, "learning_rate": 8.391757429694336e-06, "loss": 0.7607, "step": 5912 }, { "epoch": 1.4979100696643446, "grad_norm": 4.027267932891846, "learning_rate": 8.391141784126789e-06, "loss": 0.7891, "step": 5913 }, { "epoch": 1.4981633945535149, "grad_norm": 3.8232924938201904, "learning_rate": 8.390526043337568e-06, "loss": 0.8584, "step": 5914 }, { "epoch": 1.4984167194426852, "grad_norm": 4.053662300109863, "learning_rate": 8.389910207343967e-06, "loss": 0.8404, "step": 5915 }, { "epoch": 1.4986700443318557, "grad_norm": 4.3264360427856445, "learning_rate": 8.389294276163276e-06, "loss": 0.8765, "step": 5916 }, { "epoch": 1.498923369221026, "grad_norm": 3.9155824184417725, "learning_rate": 8.388678249812789e-06, "loss": 0.8662, "step": 5917 }, { "epoch": 1.4991766941101963, "grad_norm": 3.5717077255249023, "learning_rate": 8.388062128309806e-06, "loss": 0.7999, "step": 5918 }, { "epoch": 1.4994300189993668, "grad_norm": 3.320936918258667, "learning_rate": 8.387445911671626e-06, "loss": 0.7039, "step": 5919 }, { "epoch": 1.499683343888537, "grad_norm": 3.697159767150879, "learning_rate": 8.38682959991555e-06, "loss": 0.7215, "step": 5920 }, { "epoch": 1.4999366687777074, "grad_norm": 3.7197892665863037, "learning_rate": 8.38621319305889e-06, "loss": 0.7379, "step": 5921 }, { "epoch": 1.5001899936668779, "grad_norm": 3.9652554988861084, "learning_rate": 8.38559669111895e-06, "loss": 0.6469, "step": 5922 }, { "epoch": 1.5004433185560482, "grad_norm": 3.6747124195098877, "learning_rate": 8.384980094113042e-06, "loss": 0.7645, "step": 5923 }, { "epoch": 1.5006966434452185, "grad_norm": 3.62958025932312, "learning_rate": 8.384363402058477e-06, "loss": 0.7681, "step": 5924 }, { "epoch": 1.500949968334389, "grad_norm": 3.7773592472076416, "learning_rate": 8.383746614972574e-06, "loss": 0.8069, "step": 5925 }, { "epoch": 1.501203293223559, "grad_norm": 3.7049543857574463, "learning_rate": 8.383129732872654e-06, "loss": 0.8527, "step": 5926 }, { "epoch": 1.5014566181127296, "grad_norm": 3.6561789512634277, "learning_rate": 8.382512755776036e-06, "loss": 0.728, "step": 5927 }, { "epoch": 1.5017099430019, "grad_norm": 3.594363212585449, "learning_rate": 8.381895683700045e-06, "loss": 0.7722, "step": 5928 }, { "epoch": 1.5019632678910702, "grad_norm": 3.8756494522094727, "learning_rate": 8.381278516662009e-06, "loss": 0.7587, "step": 5929 }, { "epoch": 1.5022165927802407, "grad_norm": 3.8373332023620605, "learning_rate": 8.380661254679254e-06, "loss": 0.8258, "step": 5930 }, { "epoch": 1.5024699176694112, "grad_norm": 3.442307233810425, "learning_rate": 8.380043897769118e-06, "loss": 0.7129, "step": 5931 }, { "epoch": 1.5027232425585813, "grad_norm": 4.300326824188232, "learning_rate": 8.379426445948933e-06, "loss": 0.8127, "step": 5932 }, { "epoch": 1.5029765674477518, "grad_norm": 4.1712422370910645, "learning_rate": 8.378808899236037e-06, "loss": 0.7044, "step": 5933 }, { "epoch": 1.503229892336922, "grad_norm": 3.6050972938537598, "learning_rate": 8.378191257647772e-06, "loss": 0.7865, "step": 5934 }, { "epoch": 1.5034832172260923, "grad_norm": 4.093749046325684, "learning_rate": 8.377573521201479e-06, "loss": 0.8434, "step": 5935 }, { "epoch": 1.5037365421152629, "grad_norm": 3.5932419300079346, "learning_rate": 8.376955689914503e-06, "loss": 0.8215, "step": 5936 }, { "epoch": 1.5039898670044332, "grad_norm": 3.8248860836029053, "learning_rate": 8.376337763804196e-06, "loss": 0.7794, "step": 5937 }, { "epoch": 1.5042431918936034, "grad_norm": 3.567612648010254, "learning_rate": 8.375719742887906e-06, "loss": 0.6918, "step": 5938 }, { "epoch": 1.504496516782774, "grad_norm": 3.5386483669281006, "learning_rate": 8.375101627182986e-06, "loss": 0.8891, "step": 5939 }, { "epoch": 1.5047498416719443, "grad_norm": 3.645420551300049, "learning_rate": 8.374483416706797e-06, "loss": 0.8245, "step": 5940 }, { "epoch": 1.5050031665611145, "grad_norm": 3.855353593826294, "learning_rate": 8.373865111476694e-06, "loss": 0.9123, "step": 5941 }, { "epoch": 1.505256491450285, "grad_norm": 3.625429391860962, "learning_rate": 8.373246711510041e-06, "loss": 0.7567, "step": 5942 }, { "epoch": 1.5055098163394554, "grad_norm": 3.689708948135376, "learning_rate": 8.372628216824202e-06, "loss": 0.845, "step": 5943 }, { "epoch": 1.5057631412286256, "grad_norm": 3.7238686084747314, "learning_rate": 8.372009627436539e-06, "loss": 0.7627, "step": 5944 }, { "epoch": 1.5060164661177962, "grad_norm": 3.5374462604522705, "learning_rate": 8.37139094336443e-06, "loss": 0.7364, "step": 5945 }, { "epoch": 1.5062697910069665, "grad_norm": 3.7947185039520264, "learning_rate": 8.370772164625242e-06, "loss": 0.726, "step": 5946 }, { "epoch": 1.5065231158961367, "grad_norm": 3.910332441329956, "learning_rate": 8.370153291236351e-06, "loss": 0.7871, "step": 5947 }, { "epoch": 1.5067764407853073, "grad_norm": 4.113910675048828, "learning_rate": 8.369534323215136e-06, "loss": 0.841, "step": 5948 }, { "epoch": 1.5070297656744776, "grad_norm": 3.6673974990844727, "learning_rate": 8.368915260578976e-06, "loss": 0.8295, "step": 5949 }, { "epoch": 1.5072830905636478, "grad_norm": 3.6641221046447754, "learning_rate": 8.368296103345255e-06, "loss": 0.7011, "step": 5950 }, { "epoch": 1.5075364154528184, "grad_norm": 3.5324556827545166, "learning_rate": 8.367676851531359e-06, "loss": 0.8896, "step": 5951 }, { "epoch": 1.5077897403419886, "grad_norm": 3.4693355560302734, "learning_rate": 8.367057505154674e-06, "loss": 0.8551, "step": 5952 }, { "epoch": 1.508043065231159, "grad_norm": 3.685776710510254, "learning_rate": 8.366438064232591e-06, "loss": 0.7481, "step": 5953 }, { "epoch": 1.5082963901203295, "grad_norm": 3.870347738265991, "learning_rate": 8.365818528782506e-06, "loss": 0.9026, "step": 5954 }, { "epoch": 1.5085497150094995, "grad_norm": 3.93721079826355, "learning_rate": 8.365198898821816e-06, "loss": 0.6967, "step": 5955 }, { "epoch": 1.50880303989867, "grad_norm": 4.287343978881836, "learning_rate": 8.364579174367914e-06, "loss": 0.7866, "step": 5956 }, { "epoch": 1.5090563647878406, "grad_norm": 3.6782515048980713, "learning_rate": 8.36395935543821e-06, "loss": 0.819, "step": 5957 }, { "epoch": 1.5093096896770106, "grad_norm": 3.3889825344085693, "learning_rate": 8.363339442050102e-06, "loss": 0.7005, "step": 5958 }, { "epoch": 1.5095630145661811, "grad_norm": 3.817948341369629, "learning_rate": 8.362719434220999e-06, "loss": 0.77, "step": 5959 }, { "epoch": 1.5098163394553517, "grad_norm": 3.713702440261841, "learning_rate": 8.36209933196831e-06, "loss": 0.7629, "step": 5960 }, { "epoch": 1.5100696643445217, "grad_norm": 3.784099817276001, "learning_rate": 8.361479135309448e-06, "loss": 0.9719, "step": 5961 }, { "epoch": 1.5103229892336922, "grad_norm": 3.7557411193847656, "learning_rate": 8.360858844261828e-06, "loss": 0.7447, "step": 5962 }, { "epoch": 1.5105763141228625, "grad_norm": 4.17030668258667, "learning_rate": 8.360238458842866e-06, "loss": 0.7602, "step": 5963 }, { "epoch": 1.5108296390120328, "grad_norm": 3.6455342769622803, "learning_rate": 8.359617979069983e-06, "loss": 0.7874, "step": 5964 }, { "epoch": 1.5110829639012033, "grad_norm": 4.094343662261963, "learning_rate": 8.3589974049606e-06, "loss": 0.7528, "step": 5965 }, { "epoch": 1.5113362887903736, "grad_norm": 3.721818685531616, "learning_rate": 8.358376736532147e-06, "loss": 0.7711, "step": 5966 }, { "epoch": 1.511589613679544, "grad_norm": 3.379783868789673, "learning_rate": 8.357755973802048e-06, "loss": 0.8169, "step": 5967 }, { "epoch": 1.5118429385687144, "grad_norm": 3.173121213912964, "learning_rate": 8.357135116787736e-06, "loss": 0.8007, "step": 5968 }, { "epoch": 1.5120962634578847, "grad_norm": 3.5667548179626465, "learning_rate": 8.356514165506642e-06, "loss": 0.7001, "step": 5969 }, { "epoch": 1.512349588347055, "grad_norm": 3.823620557785034, "learning_rate": 8.355893119976203e-06, "loss": 0.7559, "step": 5970 }, { "epoch": 1.5126029132362255, "grad_norm": 4.1985931396484375, "learning_rate": 8.355271980213859e-06, "loss": 0.756, "step": 5971 }, { "epoch": 1.5128562381253958, "grad_norm": 4.071039199829102, "learning_rate": 8.35465074623705e-06, "loss": 0.7873, "step": 5972 }, { "epoch": 1.5131095630145661, "grad_norm": 4.075260639190674, "learning_rate": 8.35402941806322e-06, "loss": 0.7944, "step": 5973 }, { "epoch": 1.5133628879037366, "grad_norm": 3.681140899658203, "learning_rate": 8.353407995709818e-06, "loss": 0.8534, "step": 5974 }, { "epoch": 1.513616212792907, "grad_norm": 3.6451416015625, "learning_rate": 8.352786479194288e-06, "loss": 0.831, "step": 5975 }, { "epoch": 1.5138695376820772, "grad_norm": 3.862677574157715, "learning_rate": 8.352164868534085e-06, "loss": 0.7449, "step": 5976 }, { "epoch": 1.5141228625712477, "grad_norm": 3.7428784370422363, "learning_rate": 8.351543163746667e-06, "loss": 0.7445, "step": 5977 }, { "epoch": 1.514376187460418, "grad_norm": 4.085680961608887, "learning_rate": 8.350921364849485e-06, "loss": 0.857, "step": 5978 }, { "epoch": 1.5146295123495883, "grad_norm": 3.952937364578247, "learning_rate": 8.350299471860003e-06, "loss": 0.7919, "step": 5979 }, { "epoch": 1.5148828372387588, "grad_norm": 3.6056969165802, "learning_rate": 8.34967748479568e-06, "loss": 0.8, "step": 5980 }, { "epoch": 1.5151361621279291, "grad_norm": 3.8927102088928223, "learning_rate": 8.349055403673984e-06, "loss": 0.8612, "step": 5981 }, { "epoch": 1.5153894870170994, "grad_norm": 4.043025970458984, "learning_rate": 8.348433228512382e-06, "loss": 0.7934, "step": 5982 }, { "epoch": 1.51564281190627, "grad_norm": 3.595097780227661, "learning_rate": 8.347810959328346e-06, "loss": 0.878, "step": 5983 }, { "epoch": 1.51589613679544, "grad_norm": 3.671607732772827, "learning_rate": 8.347188596139346e-06, "loss": 0.7493, "step": 5984 }, { "epoch": 1.5161494616846105, "grad_norm": 3.481942892074585, "learning_rate": 8.34656613896286e-06, "loss": 0.7861, "step": 5985 }, { "epoch": 1.516402786573781, "grad_norm": 3.783560037612915, "learning_rate": 8.345943587816363e-06, "loss": 0.7773, "step": 5986 }, { "epoch": 1.516656111462951, "grad_norm": 3.979797840118408, "learning_rate": 8.345320942717339e-06, "loss": 0.814, "step": 5987 }, { "epoch": 1.5169094363521216, "grad_norm": 3.455920934677124, "learning_rate": 8.344698203683273e-06, "loss": 0.7721, "step": 5988 }, { "epoch": 1.517162761241292, "grad_norm": 4.146459579467773, "learning_rate": 8.344075370731646e-06, "loss": 0.9018, "step": 5989 }, { "epoch": 1.5174160861304622, "grad_norm": 4.218571662902832, "learning_rate": 8.343452443879951e-06, "loss": 0.9181, "step": 5990 }, { "epoch": 1.5176694110196327, "grad_norm": 3.3880550861358643, "learning_rate": 8.34282942314568e-06, "loss": 0.784, "step": 5991 }, { "epoch": 1.517922735908803, "grad_norm": 3.512906789779663, "learning_rate": 8.342206308546323e-06, "loss": 0.8124, "step": 5992 }, { "epoch": 1.5181760607979733, "grad_norm": 4.195394515991211, "learning_rate": 8.341583100099379e-06, "loss": 0.8566, "step": 5993 }, { "epoch": 1.5184293856871438, "grad_norm": 3.700171947479248, "learning_rate": 8.34095979782235e-06, "loss": 0.7759, "step": 5994 }, { "epoch": 1.518682710576314, "grad_norm": 3.8103458881378174, "learning_rate": 8.340336401732733e-06, "loss": 0.8734, "step": 5995 }, { "epoch": 1.5189360354654844, "grad_norm": 3.7539734840393066, "learning_rate": 8.339712911848039e-06, "loss": 0.8815, "step": 5996 }, { "epoch": 1.519189360354655, "grad_norm": 3.4617977142333984, "learning_rate": 8.33908932818577e-06, "loss": 0.7813, "step": 5997 }, { "epoch": 1.5194426852438252, "grad_norm": 3.9092464447021484, "learning_rate": 8.338465650763437e-06, "loss": 0.7633, "step": 5998 }, { "epoch": 1.5196960101329955, "grad_norm": 3.7686939239501953, "learning_rate": 8.337841879598554e-06, "loss": 0.8092, "step": 5999 }, { "epoch": 1.519949335022166, "grad_norm": 3.54663348197937, "learning_rate": 8.337218014708635e-06, "loss": 0.7971, "step": 6000 }, { "epoch": 1.519949335022166, "eval_loss": 1.1638351678848267, "eval_runtime": 13.1002, "eval_samples_per_second": 30.534, "eval_steps_per_second": 3.817, "step": 6000 }, { "epoch": 1.5202026599113363, "grad_norm": 3.7645788192749023, "learning_rate": 8.336594056111197e-06, "loss": 0.9124, "step": 6001 }, { "epoch": 1.5204559848005066, "grad_norm": 4.3909149169921875, "learning_rate": 8.335970003823763e-06, "loss": 0.8832, "step": 6002 }, { "epoch": 1.520709309689677, "grad_norm": 3.7304556369781494, "learning_rate": 8.335345857863855e-06, "loss": 0.706, "step": 6003 }, { "epoch": 1.5209626345788474, "grad_norm": 3.730750560760498, "learning_rate": 8.334721618248998e-06, "loss": 0.9056, "step": 6004 }, { "epoch": 1.5212159594680177, "grad_norm": 3.7754292488098145, "learning_rate": 8.334097284996721e-06, "loss": 0.799, "step": 6005 }, { "epoch": 1.5214692843571882, "grad_norm": 3.810028076171875, "learning_rate": 8.333472858124557e-06, "loss": 0.8023, "step": 6006 }, { "epoch": 1.5217226092463585, "grad_norm": 3.8349390029907227, "learning_rate": 8.332848337650034e-06, "loss": 0.8124, "step": 6007 }, { "epoch": 1.5219759341355288, "grad_norm": 3.920043468475342, "learning_rate": 8.332223723590693e-06, "loss": 0.8803, "step": 6008 }, { "epoch": 1.5222292590246993, "grad_norm": 3.2182974815368652, "learning_rate": 8.331599015964071e-06, "loss": 0.6727, "step": 6009 }, { "epoch": 1.5224825839138696, "grad_norm": 3.548264265060425, "learning_rate": 8.330974214787712e-06, "loss": 0.7973, "step": 6010 }, { "epoch": 1.5227359088030399, "grad_norm": 3.722736358642578, "learning_rate": 8.330349320079156e-06, "loss": 0.8003, "step": 6011 }, { "epoch": 1.5229892336922104, "grad_norm": 3.8040225505828857, "learning_rate": 8.329724331855953e-06, "loss": 0.6691, "step": 6012 }, { "epoch": 1.5232425585813805, "grad_norm": 3.4361138343811035, "learning_rate": 8.329099250135652e-06, "loss": 0.7329, "step": 6013 }, { "epoch": 1.523495883470551, "grad_norm": 4.150000095367432, "learning_rate": 8.328474074935803e-06, "loss": 0.7398, "step": 6014 }, { "epoch": 1.5237492083597215, "grad_norm": 4.133837699890137, "learning_rate": 8.327848806273962e-06, "loss": 0.8963, "step": 6015 }, { "epoch": 1.5240025332488916, "grad_norm": 3.950253486633301, "learning_rate": 8.327223444167688e-06, "loss": 0.8378, "step": 6016 }, { "epoch": 1.524255858138062, "grad_norm": 3.9877703189849854, "learning_rate": 8.326597988634538e-06, "loss": 0.8598, "step": 6017 }, { "epoch": 1.5245091830272324, "grad_norm": 3.686001777648926, "learning_rate": 8.325972439692075e-06, "loss": 0.7589, "step": 6018 }, { "epoch": 1.5247625079164027, "grad_norm": 3.9073050022125244, "learning_rate": 8.325346797357865e-06, "loss": 0.9342, "step": 6019 }, { "epoch": 1.5250158328055732, "grad_norm": 3.502190113067627, "learning_rate": 8.324721061649475e-06, "loss": 0.6362, "step": 6020 }, { "epoch": 1.5252691576947435, "grad_norm": 3.5541303157806396, "learning_rate": 8.324095232584477e-06, "loss": 0.6861, "step": 6021 }, { "epoch": 1.5255224825839138, "grad_norm": 3.7686498165130615, "learning_rate": 8.323469310180442e-06, "loss": 0.8424, "step": 6022 }, { "epoch": 1.5257758074730843, "grad_norm": 3.8931078910827637, "learning_rate": 8.322843294454946e-06, "loss": 0.8232, "step": 6023 }, { "epoch": 1.5260291323622546, "grad_norm": 4.055556774139404, "learning_rate": 8.322217185425568e-06, "loss": 0.8232, "step": 6024 }, { "epoch": 1.5262824572514249, "grad_norm": 3.690748453140259, "learning_rate": 8.321590983109889e-06, "loss": 0.7097, "step": 6025 }, { "epoch": 1.5265357821405954, "grad_norm": 3.429391622543335, "learning_rate": 8.320964687525492e-06, "loss": 0.8544, "step": 6026 }, { "epoch": 1.5267891070297657, "grad_norm": 3.9555764198303223, "learning_rate": 8.320338298689963e-06, "loss": 0.7345, "step": 6027 }, { "epoch": 1.527042431918936, "grad_norm": 3.7725577354431152, "learning_rate": 8.31971181662089e-06, "loss": 0.6701, "step": 6028 }, { "epoch": 1.5272957568081065, "grad_norm": 3.3939309120178223, "learning_rate": 8.319085241335865e-06, "loss": 0.7715, "step": 6029 }, { "epoch": 1.5275490816972768, "grad_norm": 3.706507682800293, "learning_rate": 8.318458572852484e-06, "loss": 0.7827, "step": 6030 }, { "epoch": 1.527802406586447, "grad_norm": 3.487157106399536, "learning_rate": 8.317831811188339e-06, "loss": 0.7933, "step": 6031 }, { "epoch": 1.5280557314756176, "grad_norm": 4.3159708976745605, "learning_rate": 8.317204956361033e-06, "loss": 0.7806, "step": 6032 }, { "epoch": 1.5283090563647879, "grad_norm": 3.9915390014648438, "learning_rate": 8.316578008388165e-06, "loss": 0.7466, "step": 6033 }, { "epoch": 1.5285623812539582, "grad_norm": 3.8348469734191895, "learning_rate": 8.315950967287343e-06, "loss": 0.8585, "step": 6034 }, { "epoch": 1.5288157061431287, "grad_norm": 4.7072672843933105, "learning_rate": 8.315323833076171e-06, "loss": 0.7936, "step": 6035 }, { "epoch": 1.529069031032299, "grad_norm": 3.6637141704559326, "learning_rate": 8.31469660577226e-06, "loss": 0.7369, "step": 6036 }, { "epoch": 1.5293223559214693, "grad_norm": 3.2571606636047363, "learning_rate": 8.314069285393222e-06, "loss": 0.7446, "step": 6037 }, { "epoch": 1.5295756808106398, "grad_norm": 3.9346470832824707, "learning_rate": 8.313441871956671e-06, "loss": 0.6638, "step": 6038 }, { "epoch": 1.5298290056998098, "grad_norm": 3.5464725494384766, "learning_rate": 8.312814365480225e-06, "loss": 0.8284, "step": 6039 }, { "epoch": 1.5300823305889804, "grad_norm": 3.887022018432617, "learning_rate": 8.312186765981504e-06, "loss": 0.7729, "step": 6040 }, { "epoch": 1.5303356554781509, "grad_norm": 3.848708152770996, "learning_rate": 8.311559073478133e-06, "loss": 0.8094, "step": 6041 }, { "epoch": 1.530588980367321, "grad_norm": 3.7966063022613525, "learning_rate": 8.310931287987733e-06, "loss": 0.8061, "step": 6042 }, { "epoch": 1.5308423052564915, "grad_norm": 3.894991636276245, "learning_rate": 8.310303409527935e-06, "loss": 0.9106, "step": 6043 }, { "epoch": 1.531095630145662, "grad_norm": 3.733093023300171, "learning_rate": 8.30967543811637e-06, "loss": 0.7332, "step": 6044 }, { "epoch": 1.531348955034832, "grad_norm": 3.7139980792999268, "learning_rate": 8.309047373770669e-06, "loss": 0.7842, "step": 6045 }, { "epoch": 1.5316022799240026, "grad_norm": 3.51163911819458, "learning_rate": 8.308419216508467e-06, "loss": 0.7031, "step": 6046 }, { "epoch": 1.5318556048131728, "grad_norm": 4.045239448547363, "learning_rate": 8.307790966347407e-06, "loss": 0.8218, "step": 6047 }, { "epoch": 1.5321089297023431, "grad_norm": 3.847369909286499, "learning_rate": 8.307162623305125e-06, "loss": 0.6955, "step": 6048 }, { "epoch": 1.5323622545915137, "grad_norm": 3.456996440887451, "learning_rate": 8.306534187399267e-06, "loss": 0.8216, "step": 6049 }, { "epoch": 1.532615579480684, "grad_norm": 3.546422243118286, "learning_rate": 8.305905658647478e-06, "loss": 0.7855, "step": 6050 }, { "epoch": 1.5328689043698542, "grad_norm": 3.4630866050720215, "learning_rate": 8.305277037067409e-06, "loss": 0.7698, "step": 6051 }, { "epoch": 1.5331222292590247, "grad_norm": 3.5826759338378906, "learning_rate": 8.304648322676708e-06, "loss": 0.7885, "step": 6052 }, { "epoch": 1.533375554148195, "grad_norm": 3.8534812927246094, "learning_rate": 8.304019515493031e-06, "loss": 0.7569, "step": 6053 }, { "epoch": 1.5336288790373653, "grad_norm": 3.715991973876953, "learning_rate": 8.303390615534037e-06, "loss": 0.8131, "step": 6054 }, { "epoch": 1.5338822039265358, "grad_norm": 4.065611839294434, "learning_rate": 8.302761622817381e-06, "loss": 0.8437, "step": 6055 }, { "epoch": 1.5341355288157061, "grad_norm": 3.571732521057129, "learning_rate": 8.302132537360726e-06, "loss": 0.7383, "step": 6056 }, { "epoch": 1.5343888537048764, "grad_norm": 3.355422258377075, "learning_rate": 8.301503359181738e-06, "loss": 0.6935, "step": 6057 }, { "epoch": 1.534642178594047, "grad_norm": 3.804828643798828, "learning_rate": 8.300874088298083e-06, "loss": 0.7726, "step": 6058 }, { "epoch": 1.5348955034832172, "grad_norm": 3.91565203666687, "learning_rate": 8.30024472472743e-06, "loss": 0.9187, "step": 6059 }, { "epoch": 1.5351488283723875, "grad_norm": 3.788347005844116, "learning_rate": 8.299615268487454e-06, "loss": 0.6832, "step": 6060 }, { "epoch": 1.535402153261558, "grad_norm": 3.871217966079712, "learning_rate": 8.298985719595824e-06, "loss": 0.8161, "step": 6061 }, { "epoch": 1.5356554781507283, "grad_norm": 3.481473445892334, "learning_rate": 8.298356078070223e-06, "loss": 0.7086, "step": 6062 }, { "epoch": 1.5359088030398986, "grad_norm": 3.7261691093444824, "learning_rate": 8.29772634392833e-06, "loss": 0.7234, "step": 6063 }, { "epoch": 1.5361621279290691, "grad_norm": 3.566100835800171, "learning_rate": 8.297096517187826e-06, "loss": 0.6692, "step": 6064 }, { "epoch": 1.5364154528182394, "grad_norm": 3.87302827835083, "learning_rate": 8.296466597866398e-06, "loss": 0.8576, "step": 6065 }, { "epoch": 1.5366687777074097, "grad_norm": 4.200307369232178, "learning_rate": 8.295836585981731e-06, "loss": 0.8331, "step": 6066 }, { "epoch": 1.5369221025965802, "grad_norm": 3.3144524097442627, "learning_rate": 8.295206481551518e-06, "loss": 0.6906, "step": 6067 }, { "epoch": 1.5371754274857503, "grad_norm": 3.8662407398223877, "learning_rate": 8.294576284593453e-06, "loss": 0.8347, "step": 6068 }, { "epoch": 1.5374287523749208, "grad_norm": 3.577904224395752, "learning_rate": 8.293945995125228e-06, "loss": 0.7337, "step": 6069 }, { "epoch": 1.5376820772640913, "grad_norm": 3.6100378036499023, "learning_rate": 8.293315613164545e-06, "loss": 0.8172, "step": 6070 }, { "epoch": 1.5379354021532614, "grad_norm": 3.3867650032043457, "learning_rate": 8.292685138729103e-06, "loss": 0.7596, "step": 6071 }, { "epoch": 1.538188727042432, "grad_norm": 3.6087770462036133, "learning_rate": 8.292054571836604e-06, "loss": 0.7493, "step": 6072 }, { "epoch": 1.5384420519316024, "grad_norm": 3.534525156021118, "learning_rate": 8.291423912504755e-06, "loss": 0.676, "step": 6073 }, { "epoch": 1.5386953768207725, "grad_norm": 3.5308618545532227, "learning_rate": 8.290793160751267e-06, "loss": 0.9538, "step": 6074 }, { "epoch": 1.538948701709943, "grad_norm": 3.7343785762786865, "learning_rate": 8.290162316593848e-06, "loss": 0.8, "step": 6075 }, { "epoch": 1.5392020265991133, "grad_norm": 4.010598182678223, "learning_rate": 8.289531380050215e-06, "loss": 0.7769, "step": 6076 }, { "epoch": 1.5394553514882836, "grad_norm": 3.695286989212036, "learning_rate": 8.28890035113808e-06, "loss": 0.7746, "step": 6077 }, { "epoch": 1.5397086763774541, "grad_norm": 3.6864147186279297, "learning_rate": 8.288269229875167e-06, "loss": 0.8323, "step": 6078 }, { "epoch": 1.5399620012666244, "grad_norm": 3.7015504837036133, "learning_rate": 8.287638016279193e-06, "loss": 0.7643, "step": 6079 }, { "epoch": 1.5402153261557947, "grad_norm": 4.204253196716309, "learning_rate": 8.287006710367888e-06, "loss": 0.9048, "step": 6080 }, { "epoch": 1.5404686510449652, "grad_norm": 3.3711719512939453, "learning_rate": 8.286375312158972e-06, "loss": 0.733, "step": 6081 }, { "epoch": 1.5407219759341355, "grad_norm": 4.128683090209961, "learning_rate": 8.285743821670177e-06, "loss": 0.8307, "step": 6082 }, { "epoch": 1.5409753008233058, "grad_norm": 4.091029644012451, "learning_rate": 8.285112238919237e-06, "loss": 0.8763, "step": 6083 }, { "epoch": 1.5412286257124763, "grad_norm": 3.8250246047973633, "learning_rate": 8.284480563923884e-06, "loss": 0.9446, "step": 6084 }, { "epoch": 1.5414819506016466, "grad_norm": 3.400613784790039, "learning_rate": 8.283848796701858e-06, "loss": 0.8054, "step": 6085 }, { "epoch": 1.541735275490817, "grad_norm": 3.6402640342712402, "learning_rate": 8.283216937270895e-06, "loss": 0.8743, "step": 6086 }, { "epoch": 1.5419886003799874, "grad_norm": 3.620732307434082, "learning_rate": 8.282584985648741e-06, "loss": 0.8085, "step": 6087 }, { "epoch": 1.5422419252691577, "grad_norm": 3.1914310455322266, "learning_rate": 8.281952941853137e-06, "loss": 0.7268, "step": 6088 }, { "epoch": 1.542495250158328, "grad_norm": 3.587397813796997, "learning_rate": 8.281320805901833e-06, "loss": 0.7668, "step": 6089 }, { "epoch": 1.5427485750474985, "grad_norm": 3.9668045043945312, "learning_rate": 8.28068857781258e-06, "loss": 0.7671, "step": 6090 }, { "epoch": 1.5430018999366688, "grad_norm": 4.050612926483154, "learning_rate": 8.280056257603128e-06, "loss": 0.8197, "step": 6091 }, { "epoch": 1.543255224825839, "grad_norm": 3.5672447681427, "learning_rate": 8.279423845291234e-06, "loss": 0.8322, "step": 6092 }, { "epoch": 1.5435085497150096, "grad_norm": 3.5721328258514404, "learning_rate": 8.278791340894657e-06, "loss": 0.7212, "step": 6093 }, { "epoch": 1.54376187460418, "grad_norm": 3.526211738586426, "learning_rate": 8.278158744431153e-06, "loss": 0.7312, "step": 6094 }, { "epoch": 1.5440151994933502, "grad_norm": 3.6647751331329346, "learning_rate": 8.27752605591849e-06, "loss": 0.7607, "step": 6095 }, { "epoch": 1.5442685243825207, "grad_norm": 3.6663804054260254, "learning_rate": 8.27689327537443e-06, "loss": 0.7713, "step": 6096 }, { "epoch": 1.5445218492716908, "grad_norm": 4.1124162673950195, "learning_rate": 8.276260402816743e-06, "loss": 0.8451, "step": 6097 }, { "epoch": 1.5447751741608613, "grad_norm": 4.174565315246582, "learning_rate": 8.2756274382632e-06, "loss": 0.8722, "step": 6098 }, { "epoch": 1.5450284990500318, "grad_norm": 3.2694077491760254, "learning_rate": 8.274994381731574e-06, "loss": 0.6446, "step": 6099 }, { "epoch": 1.5452818239392019, "grad_norm": 3.4651026725769043, "learning_rate": 8.27436123323964e-06, "loss": 0.7838, "step": 6100 }, { "epoch": 1.5455351488283724, "grad_norm": 3.81234073638916, "learning_rate": 8.273727992805177e-06, "loss": 0.797, "step": 6101 }, { "epoch": 1.545788473717543, "grad_norm": 3.965603828430176, "learning_rate": 8.273094660445966e-06, "loss": 0.6945, "step": 6102 }, { "epoch": 1.546041798606713, "grad_norm": 3.6192595958709717, "learning_rate": 8.272461236179792e-06, "loss": 0.8607, "step": 6103 }, { "epoch": 1.5462951234958835, "grad_norm": 4.0152764320373535, "learning_rate": 8.27182772002444e-06, "loss": 0.8548, "step": 6104 }, { "epoch": 1.5465484483850538, "grad_norm": 3.598031520843506, "learning_rate": 8.271194111997698e-06, "loss": 0.8291, "step": 6105 }, { "epoch": 1.546801773274224, "grad_norm": 3.3997223377227783, "learning_rate": 8.270560412117359e-06, "loss": 0.7448, "step": 6106 }, { "epoch": 1.5470550981633946, "grad_norm": 4.2239556312561035, "learning_rate": 8.269926620401216e-06, "loss": 0.887, "step": 6107 }, { "epoch": 1.5473084230525649, "grad_norm": 3.8402645587921143, "learning_rate": 8.269292736867067e-06, "loss": 0.7522, "step": 6108 }, { "epoch": 1.5475617479417352, "grad_norm": 3.5056710243225098, "learning_rate": 8.26865876153271e-06, "loss": 0.8097, "step": 6109 }, { "epoch": 1.5478150728309057, "grad_norm": 4.089310169219971, "learning_rate": 8.268024694415949e-06, "loss": 0.8353, "step": 6110 }, { "epoch": 1.548068397720076, "grad_norm": 3.8327584266662598, "learning_rate": 8.267390535534581e-06, "loss": 0.8954, "step": 6111 }, { "epoch": 1.5483217226092463, "grad_norm": 3.519998550415039, "learning_rate": 8.266756284906421e-06, "loss": 0.7823, "step": 6112 }, { "epoch": 1.5485750474984168, "grad_norm": 3.7734134197235107, "learning_rate": 8.266121942549276e-06, "loss": 0.7635, "step": 6113 }, { "epoch": 1.548828372387587, "grad_norm": 3.833378553390503, "learning_rate": 8.265487508480958e-06, "loss": 0.8667, "step": 6114 }, { "epoch": 1.5490816972767574, "grad_norm": 3.733335018157959, "learning_rate": 8.264852982719282e-06, "loss": 0.8155, "step": 6115 }, { "epoch": 1.5493350221659279, "grad_norm": 3.489326238632202, "learning_rate": 8.264218365282061e-06, "loss": 0.7451, "step": 6116 }, { "epoch": 1.5495883470550982, "grad_norm": 4.047409534454346, "learning_rate": 8.263583656187122e-06, "loss": 0.9377, "step": 6117 }, { "epoch": 1.5498416719442685, "grad_norm": 3.5313308238983154, "learning_rate": 8.26294885545228e-06, "loss": 0.7151, "step": 6118 }, { "epoch": 1.550094996833439, "grad_norm": 3.8570024967193604, "learning_rate": 8.262313963095366e-06, "loss": 0.9263, "step": 6119 }, { "epoch": 1.5503483217226093, "grad_norm": 4.1115899085998535, "learning_rate": 8.261678979134204e-06, "loss": 0.797, "step": 6120 }, { "epoch": 1.5506016466117796, "grad_norm": 3.700599193572998, "learning_rate": 8.261043903586625e-06, "loss": 0.8243, "step": 6121 }, { "epoch": 1.55085497150095, "grad_norm": 3.876948595046997, "learning_rate": 8.260408736470462e-06, "loss": 0.8056, "step": 6122 }, { "epoch": 1.5511082963901204, "grad_norm": 3.469296932220459, "learning_rate": 8.259773477803548e-06, "loss": 0.7443, "step": 6123 }, { "epoch": 1.5513616212792907, "grad_norm": 3.2719638347625732, "learning_rate": 8.259138127603725e-06, "loss": 0.7492, "step": 6124 }, { "epoch": 1.5516149461684612, "grad_norm": 3.869877576828003, "learning_rate": 8.25850268588883e-06, "loss": 0.8034, "step": 6125 }, { "epoch": 1.5518682710576313, "grad_norm": 3.4584498405456543, "learning_rate": 8.257867152676705e-06, "loss": 0.6768, "step": 6126 }, { "epoch": 1.5521215959468018, "grad_norm": 4.024600982666016, "learning_rate": 8.257231527985198e-06, "loss": 0.7455, "step": 6127 }, { "epoch": 1.5523749208359723, "grad_norm": 3.710503339767456, "learning_rate": 8.256595811832158e-06, "loss": 0.7653, "step": 6128 }, { "epoch": 1.5526282457251424, "grad_norm": 3.544790267944336, "learning_rate": 8.255960004235433e-06, "loss": 0.7088, "step": 6129 }, { "epoch": 1.5528815706143129, "grad_norm": 3.613865613937378, "learning_rate": 8.255324105212876e-06, "loss": 0.7456, "step": 6130 }, { "epoch": 1.5531348955034834, "grad_norm": 3.8291051387786865, "learning_rate": 8.254688114782346e-06, "loss": 0.86, "step": 6131 }, { "epoch": 1.5533882203926535, "grad_norm": 3.9924874305725098, "learning_rate": 8.254052032961697e-06, "loss": 0.8344, "step": 6132 }, { "epoch": 1.553641545281824, "grad_norm": 4.155417442321777, "learning_rate": 8.253415859768791e-06, "loss": 0.705, "step": 6133 }, { "epoch": 1.5538948701709943, "grad_norm": 3.7591300010681152, "learning_rate": 8.252779595221496e-06, "loss": 0.863, "step": 6134 }, { "epoch": 1.5541481950601646, "grad_norm": 4.381962776184082, "learning_rate": 8.252143239337673e-06, "loss": 0.8352, "step": 6135 }, { "epoch": 1.554401519949335, "grad_norm": 3.820098876953125, "learning_rate": 8.25150679213519e-06, "loss": 0.7966, "step": 6136 }, { "epoch": 1.5546548448385054, "grad_norm": 4.8242926597595215, "learning_rate": 8.250870253631924e-06, "loss": 0.7907, "step": 6137 }, { "epoch": 1.5549081697276756, "grad_norm": 4.002001762390137, "learning_rate": 8.250233623845742e-06, "loss": 0.7635, "step": 6138 }, { "epoch": 1.5551614946168462, "grad_norm": 3.7615208625793457, "learning_rate": 8.249596902794526e-06, "loss": 0.8191, "step": 6139 }, { "epoch": 1.5554148195060165, "grad_norm": 3.6249327659606934, "learning_rate": 8.24896009049615e-06, "loss": 0.833, "step": 6140 }, { "epoch": 1.5556681443951867, "grad_norm": 3.682542324066162, "learning_rate": 8.248323186968496e-06, "loss": 0.8265, "step": 6141 }, { "epoch": 1.5559214692843573, "grad_norm": 4.007565975189209, "learning_rate": 8.24768619222945e-06, "loss": 0.968, "step": 6142 }, { "epoch": 1.5561747941735276, "grad_norm": 4.109283924102783, "learning_rate": 8.2470491062969e-06, "loss": 0.9266, "step": 6143 }, { "epoch": 1.5564281190626978, "grad_norm": 3.45259428024292, "learning_rate": 8.24641192918873e-06, "loss": 0.7521, "step": 6144 }, { "epoch": 1.5566814439518684, "grad_norm": 3.5808463096618652, "learning_rate": 8.245774660922838e-06, "loss": 0.796, "step": 6145 }, { "epoch": 1.5569347688410387, "grad_norm": 3.752659797668457, "learning_rate": 8.245137301517112e-06, "loss": 0.8555, "step": 6146 }, { "epoch": 1.557188093730209, "grad_norm": 3.394653797149658, "learning_rate": 8.244499850989453e-06, "loss": 0.669, "step": 6147 }, { "epoch": 1.5574414186193795, "grad_norm": 3.9455931186676025, "learning_rate": 8.243862309357757e-06, "loss": 0.7548, "step": 6148 }, { "epoch": 1.5576947435085498, "grad_norm": 3.751262664794922, "learning_rate": 8.243224676639929e-06, "loss": 0.7901, "step": 6149 }, { "epoch": 1.55794806839772, "grad_norm": 3.648343324661255, "learning_rate": 8.242586952853872e-06, "loss": 0.8097, "step": 6150 }, { "epoch": 1.5582013932868906, "grad_norm": 3.5928211212158203, "learning_rate": 8.241949138017494e-06, "loss": 0.7535, "step": 6151 }, { "epoch": 1.5584547181760608, "grad_norm": 3.3643546104431152, "learning_rate": 8.2413112321487e-06, "loss": 0.6916, "step": 6152 }, { "epoch": 1.5587080430652311, "grad_norm": 3.8847146034240723, "learning_rate": 8.24067323526541e-06, "loss": 0.7521, "step": 6153 }, { "epoch": 1.5589613679544017, "grad_norm": 3.9309847354888916, "learning_rate": 8.240035147385532e-06, "loss": 0.9314, "step": 6154 }, { "epoch": 1.5592146928435717, "grad_norm": 3.60764741897583, "learning_rate": 8.239396968526988e-06, "loss": 0.7827, "step": 6155 }, { "epoch": 1.5594680177327422, "grad_norm": 4.254142761230469, "learning_rate": 8.238758698707693e-06, "loss": 0.8276, "step": 6156 }, { "epoch": 1.5597213426219128, "grad_norm": 3.733480930328369, "learning_rate": 8.238120337945573e-06, "loss": 0.8402, "step": 6157 }, { "epoch": 1.5599746675110828, "grad_norm": 3.5674009323120117, "learning_rate": 8.237481886258552e-06, "loss": 0.7259, "step": 6158 }, { "epoch": 1.5602279924002533, "grad_norm": 3.84652042388916, "learning_rate": 8.236843343664555e-06, "loss": 0.709, "step": 6159 }, { "epoch": 1.5604813172894236, "grad_norm": 3.706516742706299, "learning_rate": 8.236204710181515e-06, "loss": 0.7761, "step": 6160 }, { "epoch": 1.560734642178594, "grad_norm": 3.8853776454925537, "learning_rate": 8.235565985827363e-06, "loss": 0.7936, "step": 6161 }, { "epoch": 1.5609879670677644, "grad_norm": 3.6674692630767822, "learning_rate": 8.234927170620034e-06, "loss": 0.7314, "step": 6162 }, { "epoch": 1.5612412919569347, "grad_norm": 3.5735464096069336, "learning_rate": 8.234288264577469e-06, "loss": 0.7055, "step": 6163 }, { "epoch": 1.561494616846105, "grad_norm": 3.875180721282959, "learning_rate": 8.233649267717602e-06, "loss": 0.7637, "step": 6164 }, { "epoch": 1.5617479417352755, "grad_norm": 3.7974531650543213, "learning_rate": 8.23301018005838e-06, "loss": 0.7814, "step": 6165 }, { "epoch": 1.5620012666244458, "grad_norm": 3.64311146736145, "learning_rate": 8.232371001617748e-06, "loss": 0.7853, "step": 6166 }, { "epoch": 1.5622545915136161, "grad_norm": 3.3769659996032715, "learning_rate": 8.231731732413653e-06, "loss": 0.7777, "step": 6167 }, { "epoch": 1.5625079164027866, "grad_norm": 3.6891138553619385, "learning_rate": 8.231092372464048e-06, "loss": 0.8313, "step": 6168 }, { "epoch": 1.562761241291957, "grad_norm": 3.456343412399292, "learning_rate": 8.230452921786878e-06, "loss": 0.8007, "step": 6169 }, { "epoch": 1.5630145661811272, "grad_norm": 3.2791130542755127, "learning_rate": 8.229813380400109e-06, "loss": 0.695, "step": 6170 }, { "epoch": 1.5632678910702977, "grad_norm": 4.603332042694092, "learning_rate": 8.229173748321691e-06, "loss": 0.8945, "step": 6171 }, { "epoch": 1.563521215959468, "grad_norm": 3.6999411582946777, "learning_rate": 8.228534025569589e-06, "loss": 0.7581, "step": 6172 }, { "epoch": 1.5637745408486383, "grad_norm": 3.513685464859009, "learning_rate": 8.227894212161765e-06, "loss": 0.7587, "step": 6173 }, { "epoch": 1.5640278657378088, "grad_norm": 3.9109554290771484, "learning_rate": 8.227254308116184e-06, "loss": 0.7102, "step": 6174 }, { "epoch": 1.5642811906269791, "grad_norm": 3.712085247039795, "learning_rate": 8.226614313450814e-06, "loss": 0.8107, "step": 6175 }, { "epoch": 1.5645345155161494, "grad_norm": 3.6505749225616455, "learning_rate": 8.225974228183626e-06, "loss": 0.7926, "step": 6176 }, { "epoch": 1.56478784040532, "grad_norm": 4.02791166305542, "learning_rate": 8.225334052332596e-06, "loss": 0.8007, "step": 6177 }, { "epoch": 1.5650411652944902, "grad_norm": 4.112579345703125, "learning_rate": 8.224693785915697e-06, "loss": 0.8168, "step": 6178 }, { "epoch": 1.5652944901836605, "grad_norm": 3.6569886207580566, "learning_rate": 8.224053428950904e-06, "loss": 0.8175, "step": 6179 }, { "epoch": 1.565547815072831, "grad_norm": 3.2978391647338867, "learning_rate": 8.223412981456205e-06, "loss": 0.8381, "step": 6180 }, { "epoch": 1.5658011399620013, "grad_norm": 4.3896894454956055, "learning_rate": 8.22277244344958e-06, "loss": 0.8294, "step": 6181 }, { "epoch": 1.5660544648511716, "grad_norm": 3.775959014892578, "learning_rate": 8.222131814949015e-06, "loss": 0.8364, "step": 6182 }, { "epoch": 1.5663077897403421, "grad_norm": 4.301949501037598, "learning_rate": 8.221491095972498e-06, "loss": 0.7441, "step": 6183 }, { "epoch": 1.5665611146295122, "grad_norm": 3.5109403133392334, "learning_rate": 8.220850286538022e-06, "loss": 0.7974, "step": 6184 }, { "epoch": 1.5668144395186827, "grad_norm": 3.6153130531311035, "learning_rate": 8.22020938666358e-06, "loss": 0.6844, "step": 6185 }, { "epoch": 1.5670677644078532, "grad_norm": 3.187896251678467, "learning_rate": 8.219568396367166e-06, "loss": 0.8435, "step": 6186 }, { "epoch": 1.5673210892970233, "grad_norm": 3.3308053016662598, "learning_rate": 8.21892731566678e-06, "loss": 0.7454, "step": 6187 }, { "epoch": 1.5675744141861938, "grad_norm": 3.4108846187591553, "learning_rate": 8.218286144580425e-06, "loss": 0.7689, "step": 6188 }, { "epoch": 1.567827739075364, "grad_norm": 3.6168429851531982, "learning_rate": 8.217644883126103e-06, "loss": 0.7955, "step": 6189 }, { "epoch": 1.5680810639645344, "grad_norm": 3.7692012786865234, "learning_rate": 8.21700353132182e-06, "loss": 0.9237, "step": 6190 }, { "epoch": 1.568334388853705, "grad_norm": 3.907435894012451, "learning_rate": 8.216362089185587e-06, "loss": 0.7422, "step": 6191 }, { "epoch": 1.5685877137428752, "grad_norm": 4.282109260559082, "learning_rate": 8.215720556735413e-06, "loss": 0.9337, "step": 6192 }, { "epoch": 1.5688410386320455, "grad_norm": 3.630056142807007, "learning_rate": 8.215078933989314e-06, "loss": 0.714, "step": 6193 }, { "epoch": 1.569094363521216, "grad_norm": 3.63059663772583, "learning_rate": 8.214437220965305e-06, "loss": 0.7876, "step": 6194 }, { "epoch": 1.5693476884103863, "grad_norm": 3.2414753437042236, "learning_rate": 8.213795417681405e-06, "loss": 0.6832, "step": 6195 }, { "epoch": 1.5696010132995566, "grad_norm": 4.0775299072265625, "learning_rate": 8.213153524155635e-06, "loss": 0.9201, "step": 6196 }, { "epoch": 1.569854338188727, "grad_norm": 3.5848569869995117, "learning_rate": 8.212511540406022e-06, "loss": 0.7547, "step": 6197 }, { "epoch": 1.5701076630778974, "grad_norm": 3.9914603233337402, "learning_rate": 8.211869466450589e-06, "loss": 0.8342, "step": 6198 }, { "epoch": 1.5703609879670677, "grad_norm": 3.6283152103424072, "learning_rate": 8.211227302307367e-06, "loss": 0.7353, "step": 6199 }, { "epoch": 1.5706143128562382, "grad_norm": 3.5175790786743164, "learning_rate": 8.210585047994389e-06, "loss": 0.745, "step": 6200 }, { "epoch": 1.5708676377454085, "grad_norm": 3.6858301162719727, "learning_rate": 8.209942703529685e-06, "loss": 0.7077, "step": 6201 }, { "epoch": 1.5711209626345788, "grad_norm": 3.3568482398986816, "learning_rate": 8.209300268931295e-06, "loss": 0.7556, "step": 6202 }, { "epoch": 1.5713742875237493, "grad_norm": 3.8684773445129395, "learning_rate": 8.20865774421726e-06, "loss": 0.8231, "step": 6203 }, { "epoch": 1.5716276124129196, "grad_norm": 3.443450450897217, "learning_rate": 8.208015129405615e-06, "loss": 0.6694, "step": 6204 }, { "epoch": 1.5718809373020899, "grad_norm": 4.031293869018555, "learning_rate": 8.207372424514413e-06, "loss": 0.8379, "step": 6205 }, { "epoch": 1.5721342621912604, "grad_norm": 3.6145505905151367, "learning_rate": 8.206729629561693e-06, "loss": 0.7973, "step": 6206 }, { "epoch": 1.5723875870804307, "grad_norm": 3.6999881267547607, "learning_rate": 8.206086744565509e-06, "loss": 0.9395, "step": 6207 }, { "epoch": 1.572640911969601, "grad_norm": 3.5318551063537598, "learning_rate": 8.205443769543914e-06, "loss": 0.7197, "step": 6208 }, { "epoch": 1.5728942368587715, "grad_norm": 3.9561591148376465, "learning_rate": 8.20480070451496e-06, "loss": 0.7936, "step": 6209 }, { "epoch": 1.5731475617479416, "grad_norm": 4.161046028137207, "learning_rate": 8.204157549496701e-06, "loss": 0.9315, "step": 6210 }, { "epoch": 1.573400886637112, "grad_norm": 4.216246128082275, "learning_rate": 8.203514304507201e-06, "loss": 0.839, "step": 6211 }, { "epoch": 1.5736542115262826, "grad_norm": 4.26318883895874, "learning_rate": 8.202870969564522e-06, "loss": 0.8246, "step": 6212 }, { "epoch": 1.5739075364154527, "grad_norm": 3.482059955596924, "learning_rate": 8.202227544686727e-06, "loss": 0.8972, "step": 6213 }, { "epoch": 1.5741608613046232, "grad_norm": 4.002997875213623, "learning_rate": 8.201584029891883e-06, "loss": 0.7151, "step": 6214 }, { "epoch": 1.5744141861937937, "grad_norm": 4.160106658935547, "learning_rate": 8.20094042519806e-06, "loss": 0.8671, "step": 6215 }, { "epoch": 1.5746675110829638, "grad_norm": 3.74727725982666, "learning_rate": 8.20029673062333e-06, "loss": 0.7904, "step": 6216 }, { "epoch": 1.5749208359721343, "grad_norm": 4.062234878540039, "learning_rate": 8.199652946185768e-06, "loss": 0.775, "step": 6217 }, { "epoch": 1.5751741608613046, "grad_norm": 3.474189281463623, "learning_rate": 8.19900907190345e-06, "loss": 0.7235, "step": 6218 }, { "epoch": 1.5754274857504749, "grad_norm": 3.7074568271636963, "learning_rate": 8.198365107794457e-06, "loss": 0.7201, "step": 6219 }, { "epoch": 1.5756808106396454, "grad_norm": 4.050851345062256, "learning_rate": 8.197721053876871e-06, "loss": 0.9168, "step": 6220 }, { "epoch": 1.5759341355288157, "grad_norm": 3.330204486846924, "learning_rate": 8.197076910168777e-06, "loss": 0.82, "step": 6221 }, { "epoch": 1.576187460417986, "grad_norm": 3.733776569366455, "learning_rate": 8.196432676688261e-06, "loss": 0.7382, "step": 6222 }, { "epoch": 1.5764407853071565, "grad_norm": 4.167757034301758, "learning_rate": 8.195788353453412e-06, "loss": 0.8078, "step": 6223 }, { "epoch": 1.5766941101963268, "grad_norm": 3.423680305480957, "learning_rate": 8.195143940482326e-06, "loss": 0.7227, "step": 6224 }, { "epoch": 1.576947435085497, "grad_norm": 3.5550029277801514, "learning_rate": 8.194499437793097e-06, "loss": 0.7647, "step": 6225 }, { "epoch": 1.5772007599746676, "grad_norm": 3.564436912536621, "learning_rate": 8.193854845403819e-06, "loss": 0.7744, "step": 6226 }, { "epoch": 1.5774540848638379, "grad_norm": 3.8896231651306152, "learning_rate": 8.193210163332595e-06, "loss": 0.8559, "step": 6227 }, { "epoch": 1.5777074097530082, "grad_norm": 3.517395257949829, "learning_rate": 8.192565391597524e-06, "loss": 0.7062, "step": 6228 }, { "epoch": 1.5779607346421787, "grad_norm": 3.6545779705047607, "learning_rate": 8.191920530216714e-06, "loss": 0.9066, "step": 6229 }, { "epoch": 1.578214059531349, "grad_norm": 3.967564105987549, "learning_rate": 8.19127557920827e-06, "loss": 0.799, "step": 6230 }, { "epoch": 1.5784673844205193, "grad_norm": 3.537055015563965, "learning_rate": 8.190630538590307e-06, "loss": 0.7337, "step": 6231 }, { "epoch": 1.5787207093096898, "grad_norm": 3.7698004245758057, "learning_rate": 8.189985408380934e-06, "loss": 0.9251, "step": 6232 }, { "epoch": 1.57897403419886, "grad_norm": 3.4278464317321777, "learning_rate": 8.189340188598263e-06, "loss": 0.7252, "step": 6233 }, { "epoch": 1.5792273590880304, "grad_norm": 3.5584816932678223, "learning_rate": 8.188694879260415e-06, "loss": 0.7443, "step": 6234 }, { "epoch": 1.5794806839772009, "grad_norm": 3.652059555053711, "learning_rate": 8.18804948038551e-06, "loss": 0.766, "step": 6235 }, { "epoch": 1.5797340088663712, "grad_norm": 3.8366751670837402, "learning_rate": 8.187403991991668e-06, "loss": 0.7452, "step": 6236 }, { "epoch": 1.5799873337555415, "grad_norm": 3.522740602493286, "learning_rate": 8.186758414097018e-06, "loss": 0.763, "step": 6237 }, { "epoch": 1.580240658644712, "grad_norm": 3.8008928298950195, "learning_rate": 8.186112746719683e-06, "loss": 0.8408, "step": 6238 }, { "epoch": 1.580493983533882, "grad_norm": 3.2887730598449707, "learning_rate": 8.185466989877797e-06, "loss": 0.652, "step": 6239 }, { "epoch": 1.5807473084230526, "grad_norm": 3.574138879776001, "learning_rate": 8.18482114358949e-06, "loss": 0.7602, "step": 6240 }, { "epoch": 1.581000633312223, "grad_norm": 4.546648025512695, "learning_rate": 8.184175207872899e-06, "loss": 0.9222, "step": 6241 }, { "epoch": 1.5812539582013931, "grad_norm": 3.4243948459625244, "learning_rate": 8.183529182746159e-06, "loss": 0.8364, "step": 6242 }, { "epoch": 1.5815072830905637, "grad_norm": 3.651503324508667, "learning_rate": 8.182883068227412e-06, "loss": 0.7696, "step": 6243 }, { "epoch": 1.5817606079797342, "grad_norm": 3.653803825378418, "learning_rate": 8.182236864334801e-06, "loss": 0.7628, "step": 6244 }, { "epoch": 1.5820139328689042, "grad_norm": 3.7829039096832275, "learning_rate": 8.181590571086471e-06, "loss": 0.8652, "step": 6245 }, { "epoch": 1.5822672577580748, "grad_norm": 3.639207601547241, "learning_rate": 8.180944188500567e-06, "loss": 0.8595, "step": 6246 }, { "epoch": 1.582520582647245, "grad_norm": 3.445230484008789, "learning_rate": 8.180297716595242e-06, "loss": 0.7211, "step": 6247 }, { "epoch": 1.5827739075364153, "grad_norm": 3.762030601501465, "learning_rate": 8.179651155388648e-06, "loss": 0.7447, "step": 6248 }, { "epoch": 1.5830272324255859, "grad_norm": 4.305011749267578, "learning_rate": 8.17900450489894e-06, "loss": 0.9536, "step": 6249 }, { "epoch": 1.5832805573147561, "grad_norm": 3.5451323986053467, "learning_rate": 8.178357765144274e-06, "loss": 0.8557, "step": 6250 }, { "epoch": 1.5835338822039264, "grad_norm": 3.3520631790161133, "learning_rate": 8.177710936142813e-06, "loss": 0.7228, "step": 6251 }, { "epoch": 1.583787207093097, "grad_norm": 3.792992115020752, "learning_rate": 8.177064017912717e-06, "loss": 0.7737, "step": 6252 }, { "epoch": 1.5840405319822672, "grad_norm": 3.613748788833618, "learning_rate": 8.176417010472153e-06, "loss": 0.8975, "step": 6253 }, { "epoch": 1.5842938568714375, "grad_norm": 3.6810131072998047, "learning_rate": 8.175769913839289e-06, "loss": 0.7641, "step": 6254 }, { "epoch": 1.584547181760608, "grad_norm": 3.514597177505493, "learning_rate": 8.175122728032292e-06, "loss": 0.6976, "step": 6255 }, { "epoch": 1.5848005066497783, "grad_norm": 3.624741554260254, "learning_rate": 8.174475453069339e-06, "loss": 0.7989, "step": 6256 }, { "epoch": 1.5850538315389486, "grad_norm": 3.5870413780212402, "learning_rate": 8.173828088968603e-06, "loss": 0.7174, "step": 6257 }, { "epoch": 1.5853071564281191, "grad_norm": 3.642335891723633, "learning_rate": 8.17318063574826e-06, "loss": 0.7536, "step": 6258 }, { "epoch": 1.5855604813172894, "grad_norm": 3.785813331604004, "learning_rate": 8.172533093426493e-06, "loss": 0.718, "step": 6259 }, { "epoch": 1.5858138062064597, "grad_norm": 4.186957359313965, "learning_rate": 8.171885462021485e-06, "loss": 0.8191, "step": 6260 }, { "epoch": 1.5860671310956302, "grad_norm": 4.091099739074707, "learning_rate": 8.171237741551416e-06, "loss": 0.9345, "step": 6261 }, { "epoch": 1.5863204559848005, "grad_norm": 4.292994976043701, "learning_rate": 8.170589932034482e-06, "loss": 0.788, "step": 6262 }, { "epoch": 1.5865737808739708, "grad_norm": 3.8370442390441895, "learning_rate": 8.169942033488867e-06, "loss": 0.7674, "step": 6263 }, { "epoch": 1.5868271057631413, "grad_norm": 4.250424385070801, "learning_rate": 8.169294045932764e-06, "loss": 0.8521, "step": 6264 }, { "epoch": 1.5870804306523116, "grad_norm": 4.1435227394104, "learning_rate": 8.16864596938437e-06, "loss": 1.0227, "step": 6265 }, { "epoch": 1.587333755541482, "grad_norm": 3.793520927429199, "learning_rate": 8.167997803861882e-06, "loss": 0.86, "step": 6266 }, { "epoch": 1.5875870804306524, "grad_norm": 3.6536591053009033, "learning_rate": 8.167349549383502e-06, "loss": 0.8048, "step": 6267 }, { "epoch": 1.5878404053198225, "grad_norm": 3.6870744228363037, "learning_rate": 8.16670120596743e-06, "loss": 0.8393, "step": 6268 }, { "epoch": 1.588093730208993, "grad_norm": 3.8530001640319824, "learning_rate": 8.166052773631874e-06, "loss": 0.8902, "step": 6269 }, { "epoch": 1.5883470550981635, "grad_norm": 3.391432523727417, "learning_rate": 8.165404252395038e-06, "loss": 0.7476, "step": 6270 }, { "epoch": 1.5886003799873336, "grad_norm": 4.023676872253418, "learning_rate": 8.164755642275135e-06, "loss": 0.7463, "step": 6271 }, { "epoch": 1.5888537048765041, "grad_norm": 3.3576619625091553, "learning_rate": 8.164106943290378e-06, "loss": 0.7238, "step": 6272 }, { "epoch": 1.5891070297656746, "grad_norm": 3.752878189086914, "learning_rate": 8.163458155458978e-06, "loss": 0.7281, "step": 6273 }, { "epoch": 1.5893603546548447, "grad_norm": 3.7723312377929688, "learning_rate": 8.162809278799157e-06, "loss": 0.8379, "step": 6274 }, { "epoch": 1.5896136795440152, "grad_norm": 3.6335339546203613, "learning_rate": 8.162160313329136e-06, "loss": 0.749, "step": 6275 }, { "epoch": 1.5898670044331855, "grad_norm": 3.7029337882995605, "learning_rate": 8.161511259067132e-06, "loss": 0.8376, "step": 6276 }, { "epoch": 1.5901203293223558, "grad_norm": 3.6301651000976562, "learning_rate": 8.160862116031377e-06, "loss": 0.8992, "step": 6277 }, { "epoch": 1.5903736542115263, "grad_norm": 3.716869592666626, "learning_rate": 8.160212884240092e-06, "loss": 0.922, "step": 6278 }, { "epoch": 1.5906269791006966, "grad_norm": 3.329904317855835, "learning_rate": 8.159563563711512e-06, "loss": 0.7718, "step": 6279 }, { "epoch": 1.590880303989867, "grad_norm": 3.7120020389556885, "learning_rate": 8.158914154463867e-06, "loss": 0.7568, "step": 6280 }, { "epoch": 1.5911336288790374, "grad_norm": 3.8210768699645996, "learning_rate": 8.158264656515394e-06, "loss": 0.7399, "step": 6281 }, { "epoch": 1.5913869537682077, "grad_norm": 3.99299955368042, "learning_rate": 8.157615069884329e-06, "loss": 0.8497, "step": 6282 }, { "epoch": 1.591640278657378, "grad_norm": 3.907954692840576, "learning_rate": 8.156965394588912e-06, "loss": 0.8495, "step": 6283 }, { "epoch": 1.5918936035465485, "grad_norm": 4.188817977905273, "learning_rate": 8.156315630647388e-06, "loss": 0.7751, "step": 6284 }, { "epoch": 1.5921469284357188, "grad_norm": 3.5710716247558594, "learning_rate": 8.155665778077999e-06, "loss": 0.6729, "step": 6285 }, { "epoch": 1.592400253324889, "grad_norm": 3.619842529296875, "learning_rate": 8.155015836898996e-06, "loss": 0.7356, "step": 6286 }, { "epoch": 1.5926535782140596, "grad_norm": 3.6929092407226562, "learning_rate": 8.154365807128626e-06, "loss": 0.7076, "step": 6287 }, { "epoch": 1.59290690310323, "grad_norm": 3.8376362323760986, "learning_rate": 8.153715688785142e-06, "loss": 0.7177, "step": 6288 }, { "epoch": 1.5931602279924002, "grad_norm": 4.196521282196045, "learning_rate": 8.153065481886799e-06, "loss": 0.8779, "step": 6289 }, { "epoch": 1.5934135528815707, "grad_norm": 3.584399700164795, "learning_rate": 8.152415186451855e-06, "loss": 0.6938, "step": 6290 }, { "epoch": 1.593666877770741, "grad_norm": 3.694575071334839, "learning_rate": 8.151764802498573e-06, "loss": 0.8249, "step": 6291 }, { "epoch": 1.5939202026599113, "grad_norm": 3.6853747367858887, "learning_rate": 8.15111433004521e-06, "loss": 0.7948, "step": 6292 }, { "epoch": 1.5941735275490818, "grad_norm": 3.983799934387207, "learning_rate": 8.150463769110032e-06, "loss": 0.916, "step": 6293 }, { "epoch": 1.594426852438252, "grad_norm": 3.8763108253479004, "learning_rate": 8.149813119711309e-06, "loss": 0.8828, "step": 6294 }, { "epoch": 1.5946801773274224, "grad_norm": 3.5073606967926025, "learning_rate": 8.14916238186731e-06, "loss": 0.7702, "step": 6295 }, { "epoch": 1.594933502216593, "grad_norm": 3.7843570709228516, "learning_rate": 8.14851155559631e-06, "loss": 0.7596, "step": 6296 }, { "epoch": 1.595186827105763, "grad_norm": 3.7611119747161865, "learning_rate": 8.147860640916578e-06, "loss": 0.7774, "step": 6297 }, { "epoch": 1.5954401519949335, "grad_norm": 3.4019343852996826, "learning_rate": 8.147209637846396e-06, "loss": 0.8387, "step": 6298 }, { "epoch": 1.595693476884104, "grad_norm": 4.175518989562988, "learning_rate": 8.14655854640404e-06, "loss": 0.9184, "step": 6299 }, { "epoch": 1.595946801773274, "grad_norm": 3.809389591217041, "learning_rate": 8.145907366607798e-06, "loss": 0.848, "step": 6300 }, { "epoch": 1.5962001266624446, "grad_norm": 3.6634557247161865, "learning_rate": 8.145256098475952e-06, "loss": 0.7103, "step": 6301 }, { "epoch": 1.596453451551615, "grad_norm": 3.7986724376678467, "learning_rate": 8.144604742026785e-06, "loss": 0.7568, "step": 6302 }, { "epoch": 1.5967067764407852, "grad_norm": 3.6229984760284424, "learning_rate": 8.143953297278593e-06, "loss": 0.7285, "step": 6303 }, { "epoch": 1.5969601013299557, "grad_norm": 3.87339186668396, "learning_rate": 8.143301764249664e-06, "loss": 0.735, "step": 6304 }, { "epoch": 1.597213426219126, "grad_norm": 3.9502570629119873, "learning_rate": 8.142650142958296e-06, "loss": 0.7562, "step": 6305 }, { "epoch": 1.5974667511082963, "grad_norm": 4.399330139160156, "learning_rate": 8.141998433422787e-06, "loss": 0.7972, "step": 6306 }, { "epoch": 1.5977200759974668, "grad_norm": 4.016336917877197, "learning_rate": 8.141346635661432e-06, "loss": 0.8932, "step": 6307 }, { "epoch": 1.597973400886637, "grad_norm": 3.827622413635254, "learning_rate": 8.140694749692538e-06, "loss": 0.8481, "step": 6308 }, { "epoch": 1.5982267257758074, "grad_norm": 3.6018435955047607, "learning_rate": 8.140042775534405e-06, "loss": 0.7741, "step": 6309 }, { "epoch": 1.598480050664978, "grad_norm": 3.784566879272461, "learning_rate": 8.139390713205341e-06, "loss": 0.932, "step": 6310 }, { "epoch": 1.5987333755541482, "grad_norm": 4.132288455963135, "learning_rate": 8.138738562723661e-06, "loss": 0.8902, "step": 6311 }, { "epoch": 1.5989867004433185, "grad_norm": 3.89188289642334, "learning_rate": 8.138086324107673e-06, "loss": 0.8423, "step": 6312 }, { "epoch": 1.599240025332489, "grad_norm": 4.1564788818359375, "learning_rate": 8.13743399737569e-06, "loss": 0.8726, "step": 6313 }, { "epoch": 1.5994933502216593, "grad_norm": 4.011749267578125, "learning_rate": 8.136781582546031e-06, "loss": 0.9034, "step": 6314 }, { "epoch": 1.5997466751108296, "grad_norm": 3.8636090755462646, "learning_rate": 8.136129079637015e-06, "loss": 0.8528, "step": 6315 }, { "epoch": 1.6, "grad_norm": 3.402066469192505, "learning_rate": 8.135476488666964e-06, "loss": 0.704, "step": 6316 }, { "epoch": 1.6002533248891704, "grad_norm": 4.022968769073486, "learning_rate": 8.134823809654205e-06, "loss": 0.8638, "step": 6317 }, { "epoch": 1.6005066497783407, "grad_norm": 3.585280179977417, "learning_rate": 8.13417104261706e-06, "loss": 0.7055, "step": 6318 }, { "epoch": 1.6007599746675112, "grad_norm": 3.2944531440734863, "learning_rate": 8.133518187573864e-06, "loss": 0.7464, "step": 6319 }, { "epoch": 1.6010132995566815, "grad_norm": 3.7818500995635986, "learning_rate": 8.132865244542942e-06, "loss": 0.8499, "step": 6320 }, { "epoch": 1.6012666244458518, "grad_norm": 3.902923822402954, "learning_rate": 8.132212213542636e-06, "loss": 0.97, "step": 6321 }, { "epoch": 1.6015199493350223, "grad_norm": 3.6301355361938477, "learning_rate": 8.131559094591278e-06, "loss": 0.7429, "step": 6322 }, { "epoch": 1.6017732742241926, "grad_norm": 3.7234926223754883, "learning_rate": 8.130905887707208e-06, "loss": 0.7835, "step": 6323 }, { "epoch": 1.6020265991133629, "grad_norm": 3.9502055644989014, "learning_rate": 8.130252592908766e-06, "loss": 0.77, "step": 6324 }, { "epoch": 1.6022799240025334, "grad_norm": 4.068994045257568, "learning_rate": 8.1295992102143e-06, "loss": 0.7259, "step": 6325 }, { "epoch": 1.6025332488917035, "grad_norm": 3.827765703201294, "learning_rate": 8.128945739642155e-06, "loss": 0.7152, "step": 6326 }, { "epoch": 1.602786573780874, "grad_norm": 3.77705717086792, "learning_rate": 8.128292181210681e-06, "loss": 0.7546, "step": 6327 }, { "epoch": 1.6030398986700445, "grad_norm": 3.294593334197998, "learning_rate": 8.127638534938227e-06, "loss": 0.7279, "step": 6328 }, { "epoch": 1.6032932235592146, "grad_norm": 3.7223188877105713, "learning_rate": 8.12698480084315e-06, "loss": 0.6827, "step": 6329 }, { "epoch": 1.603546548448385, "grad_norm": 3.2967376708984375, "learning_rate": 8.126330978943806e-06, "loss": 0.7499, "step": 6330 }, { "epoch": 1.6037998733375554, "grad_norm": 4.234163284301758, "learning_rate": 8.125677069258551e-06, "loss": 0.8274, "step": 6331 }, { "epoch": 1.6040531982267257, "grad_norm": 3.841139078140259, "learning_rate": 8.125023071805752e-06, "loss": 0.8236, "step": 6332 }, { "epoch": 1.6043065231158962, "grad_norm": 3.4479053020477295, "learning_rate": 8.124368986603767e-06, "loss": 0.7997, "step": 6333 }, { "epoch": 1.6045598480050665, "grad_norm": 4.069573402404785, "learning_rate": 8.123714813670967e-06, "loss": 0.7907, "step": 6334 }, { "epoch": 1.6048131728942368, "grad_norm": 3.8663175106048584, "learning_rate": 8.123060553025716e-06, "loss": 0.7616, "step": 6335 }, { "epoch": 1.6050664977834073, "grad_norm": 3.6568849086761475, "learning_rate": 8.122406204686391e-06, "loss": 0.7551, "step": 6336 }, { "epoch": 1.6053198226725776, "grad_norm": 3.8331940174102783, "learning_rate": 8.121751768671363e-06, "loss": 0.7646, "step": 6337 }, { "epoch": 1.6055731475617478, "grad_norm": 3.666332244873047, "learning_rate": 8.121097244999007e-06, "loss": 0.7805, "step": 6338 }, { "epoch": 1.6058264724509184, "grad_norm": 3.944143056869507, "learning_rate": 8.120442633687705e-06, "loss": 0.7167, "step": 6339 }, { "epoch": 1.6060797973400887, "grad_norm": 3.5900228023529053, "learning_rate": 8.119787934755836e-06, "loss": 0.7489, "step": 6340 }, { "epoch": 1.606333122229259, "grad_norm": 3.81634259223938, "learning_rate": 8.119133148221784e-06, "loss": 0.768, "step": 6341 }, { "epoch": 1.6065864471184295, "grad_norm": 3.515068531036377, "learning_rate": 8.118478274103934e-06, "loss": 0.7691, "step": 6342 }, { "epoch": 1.6068397720075998, "grad_norm": 3.412266731262207, "learning_rate": 8.117823312420676e-06, "loss": 0.6523, "step": 6343 }, { "epoch": 1.60709309689677, "grad_norm": 3.9870100021362305, "learning_rate": 8.117168263190401e-06, "loss": 0.8259, "step": 6344 }, { "epoch": 1.6073464217859406, "grad_norm": 3.4756460189819336, "learning_rate": 8.116513126431504e-06, "loss": 0.7214, "step": 6345 }, { "epoch": 1.6075997466751109, "grad_norm": 4.4472126960754395, "learning_rate": 8.115857902162377e-06, "loss": 0.8546, "step": 6346 }, { "epoch": 1.6078530715642811, "grad_norm": 3.385911226272583, "learning_rate": 8.11520259040142e-06, "loss": 0.5968, "step": 6347 }, { "epoch": 1.6081063964534517, "grad_norm": 3.713108777999878, "learning_rate": 8.114547191167034e-06, "loss": 0.8377, "step": 6348 }, { "epoch": 1.608359721342622, "grad_norm": 3.6968376636505127, "learning_rate": 8.113891704477623e-06, "loss": 0.8044, "step": 6349 }, { "epoch": 1.6086130462317922, "grad_norm": 3.7799293994903564, "learning_rate": 8.113236130351593e-06, "loss": 0.8189, "step": 6350 }, { "epoch": 1.6088663711209628, "grad_norm": 4.2639265060424805, "learning_rate": 8.112580468807352e-06, "loss": 0.8377, "step": 6351 }, { "epoch": 1.609119696010133, "grad_norm": 4.169806957244873, "learning_rate": 8.111924719863308e-06, "loss": 0.8916, "step": 6352 }, { "epoch": 1.6093730208993033, "grad_norm": 3.6500370502471924, "learning_rate": 8.111268883537879e-06, "loss": 0.8548, "step": 6353 }, { "epoch": 1.6096263457884739, "grad_norm": 3.3989417552948, "learning_rate": 8.110612959849477e-06, "loss": 0.7529, "step": 6354 }, { "epoch": 1.609879670677644, "grad_norm": 3.633368492126465, "learning_rate": 8.10995694881652e-06, "loss": 0.6994, "step": 6355 }, { "epoch": 1.6101329955668144, "grad_norm": 3.9261820316314697, "learning_rate": 8.10930085045743e-06, "loss": 0.8472, "step": 6356 }, { "epoch": 1.610386320455985, "grad_norm": 3.8959484100341797, "learning_rate": 8.10864466479063e-06, "loss": 0.7549, "step": 6357 }, { "epoch": 1.610639645345155, "grad_norm": 3.712040424346924, "learning_rate": 8.107988391834544e-06, "loss": 0.8342, "step": 6358 }, { "epoch": 1.6108929702343255, "grad_norm": 3.6048924922943115, "learning_rate": 8.107332031607602e-06, "loss": 0.7578, "step": 6359 }, { "epoch": 1.6111462951234958, "grad_norm": 4.19912052154541, "learning_rate": 8.106675584128232e-06, "loss": 0.8127, "step": 6360 }, { "epoch": 1.6113996200126661, "grad_norm": 3.623387575149536, "learning_rate": 8.106019049414867e-06, "loss": 0.7879, "step": 6361 }, { "epoch": 1.6116529449018366, "grad_norm": 3.098299503326416, "learning_rate": 8.105362427485942e-06, "loss": 0.6986, "step": 6362 }, { "epoch": 1.611906269791007, "grad_norm": 3.926645040512085, "learning_rate": 8.104705718359897e-06, "loss": 0.7917, "step": 6363 }, { "epoch": 1.6121595946801772, "grad_norm": 3.5370430946350098, "learning_rate": 8.10404892205517e-06, "loss": 0.8019, "step": 6364 }, { "epoch": 1.6124129195693477, "grad_norm": 3.5520987510681152, "learning_rate": 8.103392038590205e-06, "loss": 0.8157, "step": 6365 }, { "epoch": 1.612666244458518, "grad_norm": 3.9524483680725098, "learning_rate": 8.102735067983446e-06, "loss": 0.8213, "step": 6366 }, { "epoch": 1.6129195693476883, "grad_norm": 4.112865447998047, "learning_rate": 8.10207801025334e-06, "loss": 0.955, "step": 6367 }, { "epoch": 1.6131728942368588, "grad_norm": 3.4801909923553467, "learning_rate": 8.101420865418338e-06, "loss": 0.7196, "step": 6368 }, { "epoch": 1.6134262191260291, "grad_norm": 3.5850558280944824, "learning_rate": 8.100763633496889e-06, "loss": 0.8016, "step": 6369 }, { "epoch": 1.6136795440151994, "grad_norm": 3.788353204727173, "learning_rate": 8.100106314507454e-06, "loss": 0.9478, "step": 6370 }, { "epoch": 1.61393286890437, "grad_norm": 3.492480993270874, "learning_rate": 8.099448908468486e-06, "loss": 0.7154, "step": 6371 }, { "epoch": 1.6141861937935402, "grad_norm": 3.3306331634521484, "learning_rate": 8.098791415398442e-06, "loss": 0.805, "step": 6372 }, { "epoch": 1.6144395186827105, "grad_norm": 3.7816762924194336, "learning_rate": 8.09813383531579e-06, "loss": 0.8916, "step": 6373 }, { "epoch": 1.614692843571881, "grad_norm": 3.386228322982788, "learning_rate": 8.097476168238991e-06, "loss": 0.7459, "step": 6374 }, { "epoch": 1.6149461684610513, "grad_norm": 3.881789207458496, "learning_rate": 8.096818414186515e-06, "loss": 0.8246, "step": 6375 }, { "epoch": 1.6151994933502216, "grad_norm": 3.399620771408081, "learning_rate": 8.096160573176827e-06, "loss": 0.7961, "step": 6376 }, { "epoch": 1.6154528182393921, "grad_norm": 3.8142077922821045, "learning_rate": 8.095502645228402e-06, "loss": 0.8436, "step": 6377 }, { "epoch": 1.6157061431285624, "grad_norm": 3.5735161304473877, "learning_rate": 8.094844630359713e-06, "loss": 0.7599, "step": 6378 }, { "epoch": 1.6159594680177327, "grad_norm": 3.9081273078918457, "learning_rate": 8.094186528589238e-06, "loss": 0.7871, "step": 6379 }, { "epoch": 1.6162127929069032, "grad_norm": 3.8461575508117676, "learning_rate": 8.093528339935456e-06, "loss": 0.7515, "step": 6380 }, { "epoch": 1.6164661177960733, "grad_norm": 4.0900750160217285, "learning_rate": 8.092870064416847e-06, "loss": 0.7977, "step": 6381 }, { "epoch": 1.6167194426852438, "grad_norm": 3.374234676361084, "learning_rate": 8.092211702051898e-06, "loss": 0.7515, "step": 6382 }, { "epoch": 1.6169727675744143, "grad_norm": 3.309140682220459, "learning_rate": 8.091553252859091e-06, "loss": 0.7617, "step": 6383 }, { "epoch": 1.6172260924635844, "grad_norm": 3.498622179031372, "learning_rate": 8.090894716856918e-06, "loss": 0.7781, "step": 6384 }, { "epoch": 1.617479417352755, "grad_norm": 3.208578109741211, "learning_rate": 8.090236094063872e-06, "loss": 0.6439, "step": 6385 }, { "epoch": 1.6177327422419254, "grad_norm": 3.7767622470855713, "learning_rate": 8.089577384498443e-06, "loss": 0.7293, "step": 6386 }, { "epoch": 1.6179860671310955, "grad_norm": 3.8664450645446777, "learning_rate": 8.08891858817913e-06, "loss": 0.7885, "step": 6387 }, { "epoch": 1.618239392020266, "grad_norm": 4.010339736938477, "learning_rate": 8.08825970512443e-06, "loss": 0.8411, "step": 6388 }, { "epoch": 1.6184927169094363, "grad_norm": 4.171139717102051, "learning_rate": 8.087600735352844e-06, "loss": 0.85, "step": 6389 }, { "epoch": 1.6187460417986066, "grad_norm": 3.5802998542785645, "learning_rate": 8.08694167888288e-06, "loss": 0.6371, "step": 6390 }, { "epoch": 1.618999366687777, "grad_norm": 4.302485466003418, "learning_rate": 8.086282535733037e-06, "loss": 0.7943, "step": 6391 }, { "epoch": 1.6192526915769474, "grad_norm": 3.7403008937835693, "learning_rate": 8.085623305921828e-06, "loss": 0.6183, "step": 6392 }, { "epoch": 1.6195060164661177, "grad_norm": 3.924921751022339, "learning_rate": 8.084963989467761e-06, "loss": 0.7468, "step": 6393 }, { "epoch": 1.6197593413552882, "grad_norm": 3.962587594985962, "learning_rate": 8.084304586389355e-06, "loss": 0.8031, "step": 6394 }, { "epoch": 1.6200126662444585, "grad_norm": 3.9985625743865967, "learning_rate": 8.08364509670512e-06, "loss": 0.7817, "step": 6395 }, { "epoch": 1.6202659911336288, "grad_norm": 3.9099555015563965, "learning_rate": 8.082985520433575e-06, "loss": 0.817, "step": 6396 }, { "epoch": 1.6205193160227993, "grad_norm": 4.005500793457031, "learning_rate": 8.082325857593241e-06, "loss": 0.8748, "step": 6397 }, { "epoch": 1.6207726409119696, "grad_norm": 3.6754150390625, "learning_rate": 8.081666108202643e-06, "loss": 0.8098, "step": 6398 }, { "epoch": 1.62102596580114, "grad_norm": 3.8412888050079346, "learning_rate": 8.081006272280305e-06, "loss": 0.7834, "step": 6399 }, { "epoch": 1.6212792906903104, "grad_norm": 3.7987236976623535, "learning_rate": 8.080346349844755e-06, "loss": 0.8147, "step": 6400 }, { "epoch": 1.6215326155794807, "grad_norm": 4.28433084487915, "learning_rate": 8.07968634091452e-06, "loss": 0.9466, "step": 6401 }, { "epoch": 1.621785940468651, "grad_norm": 3.7046642303466797, "learning_rate": 8.079026245508139e-06, "loss": 0.7772, "step": 6402 }, { "epoch": 1.6220392653578215, "grad_norm": 3.5088951587677, "learning_rate": 8.078366063644144e-06, "loss": 0.6949, "step": 6403 }, { "epoch": 1.6222925902469918, "grad_norm": 3.5445315837860107, "learning_rate": 8.077705795341074e-06, "loss": 0.7836, "step": 6404 }, { "epoch": 1.622545915136162, "grad_norm": 3.655200719833374, "learning_rate": 8.077045440617465e-06, "loss": 0.7902, "step": 6405 }, { "epoch": 1.6227992400253326, "grad_norm": 3.1639909744262695, "learning_rate": 8.076384999491862e-06, "loss": 0.7129, "step": 6406 }, { "epoch": 1.623052564914503, "grad_norm": 3.459355592727661, "learning_rate": 8.075724471982811e-06, "loss": 0.8008, "step": 6407 }, { "epoch": 1.6233058898036732, "grad_norm": 3.36025071144104, "learning_rate": 8.075063858108859e-06, "loss": 0.7163, "step": 6408 }, { "epoch": 1.6235592146928437, "grad_norm": 3.429908275604248, "learning_rate": 8.074403157888556e-06, "loss": 0.7261, "step": 6409 }, { "epoch": 1.6238125395820138, "grad_norm": 4.139374256134033, "learning_rate": 8.07374237134045e-06, "loss": 0.9176, "step": 6410 }, { "epoch": 1.6240658644711843, "grad_norm": 4.374108791351318, "learning_rate": 8.073081498483101e-06, "loss": 0.7473, "step": 6411 }, { "epoch": 1.6243191893603548, "grad_norm": 3.589346408843994, "learning_rate": 8.072420539335063e-06, "loss": 0.7826, "step": 6412 }, { "epoch": 1.6245725142495249, "grad_norm": 3.336552858352661, "learning_rate": 8.071759493914897e-06, "loss": 0.7313, "step": 6413 }, { "epoch": 1.6248258391386954, "grad_norm": 3.78987193107605, "learning_rate": 8.071098362241164e-06, "loss": 0.6873, "step": 6414 }, { "epoch": 1.625079164027866, "grad_norm": 4.215850830078125, "learning_rate": 8.070437144332428e-06, "loss": 0.7653, "step": 6415 }, { "epoch": 1.625332488917036, "grad_norm": 3.558863878250122, "learning_rate": 8.069775840207256e-06, "loss": 0.7366, "step": 6416 }, { "epoch": 1.6255858138062065, "grad_norm": 3.667942762374878, "learning_rate": 8.069114449884217e-06, "loss": 0.7321, "step": 6417 }, { "epoch": 1.6258391386953768, "grad_norm": 3.6310346126556396, "learning_rate": 8.068452973381883e-06, "loss": 0.8066, "step": 6418 }, { "epoch": 1.626092463584547, "grad_norm": 3.59124755859375, "learning_rate": 8.067791410718829e-06, "loss": 0.845, "step": 6419 }, { "epoch": 1.6263457884737176, "grad_norm": 3.835885763168335, "learning_rate": 8.067129761913628e-06, "loss": 0.7575, "step": 6420 }, { "epoch": 1.6265991133628879, "grad_norm": 3.6241557598114014, "learning_rate": 8.066468026984861e-06, "loss": 0.8036, "step": 6421 }, { "epoch": 1.6268524382520582, "grad_norm": 3.6531600952148438, "learning_rate": 8.065806205951109e-06, "loss": 0.7127, "step": 6422 }, { "epoch": 1.6271057631412287, "grad_norm": 3.900205373764038, "learning_rate": 8.065144298830956e-06, "loss": 0.7207, "step": 6423 }, { "epoch": 1.627359088030399, "grad_norm": 4.193485736846924, "learning_rate": 8.064482305642989e-06, "loss": 0.8922, "step": 6424 }, { "epoch": 1.6276124129195693, "grad_norm": 4.118162155151367, "learning_rate": 8.063820226405793e-06, "loss": 0.8109, "step": 6425 }, { "epoch": 1.6278657378087398, "grad_norm": 3.196044921875, "learning_rate": 8.063158061137962e-06, "loss": 0.7352, "step": 6426 }, { "epoch": 1.62811906269791, "grad_norm": 3.3466603755950928, "learning_rate": 8.062495809858088e-06, "loss": 0.7719, "step": 6427 }, { "epoch": 1.6283723875870804, "grad_norm": 4.01998233795166, "learning_rate": 8.061833472584765e-06, "loss": 0.7421, "step": 6428 }, { "epoch": 1.6286257124762509, "grad_norm": 3.8521294593811035, "learning_rate": 8.061171049336595e-06, "loss": 0.8427, "step": 6429 }, { "epoch": 1.6288790373654212, "grad_norm": 4.203442573547363, "learning_rate": 8.060508540132179e-06, "loss": 0.8202, "step": 6430 }, { "epoch": 1.6291323622545915, "grad_norm": 3.664254903793335, "learning_rate": 8.059845944990114e-06, "loss": 0.7878, "step": 6431 }, { "epoch": 1.629385687143762, "grad_norm": 3.6004889011383057, "learning_rate": 8.05918326392901e-06, "loss": 0.6552, "step": 6432 }, { "epoch": 1.6296390120329323, "grad_norm": 3.683966636657715, "learning_rate": 8.058520496967475e-06, "loss": 0.6749, "step": 6433 }, { "epoch": 1.6298923369221026, "grad_norm": 3.49242901802063, "learning_rate": 8.057857644124116e-06, "loss": 0.8431, "step": 6434 }, { "epoch": 1.630145661811273, "grad_norm": 3.3450429439544678, "learning_rate": 8.057194705417548e-06, "loss": 0.7828, "step": 6435 }, { "epoch": 1.6303989867004434, "grad_norm": 3.3921709060668945, "learning_rate": 8.056531680866386e-06, "loss": 0.8418, "step": 6436 }, { "epoch": 1.6306523115896137, "grad_norm": 4.301143646240234, "learning_rate": 8.055868570489247e-06, "loss": 0.9744, "step": 6437 }, { "epoch": 1.6309056364787842, "grad_norm": 3.922924280166626, "learning_rate": 8.055205374304751e-06, "loss": 0.7169, "step": 6438 }, { "epoch": 1.6311589613679542, "grad_norm": 3.5931310653686523, "learning_rate": 8.054542092331518e-06, "loss": 0.643, "step": 6439 }, { "epoch": 1.6314122862571248, "grad_norm": 4.096027851104736, "learning_rate": 8.053878724588178e-06, "loss": 0.7857, "step": 6440 }, { "epoch": 1.6316656111462953, "grad_norm": 3.529952049255371, "learning_rate": 8.053215271093353e-06, "loss": 0.7983, "step": 6441 }, { "epoch": 1.6319189360354653, "grad_norm": 3.8757197856903076, "learning_rate": 8.052551731865673e-06, "loss": 0.8223, "step": 6442 }, { "epoch": 1.6321722609246359, "grad_norm": 3.756049394607544, "learning_rate": 8.051888106923773e-06, "loss": 0.7831, "step": 6443 }, { "epoch": 1.6324255858138064, "grad_norm": 3.713315010070801, "learning_rate": 8.051224396286283e-06, "loss": 0.7794, "step": 6444 }, { "epoch": 1.6326789107029764, "grad_norm": 3.53013277053833, "learning_rate": 8.050560599971844e-06, "loss": 0.7869, "step": 6445 }, { "epoch": 1.632932235592147, "grad_norm": 3.8916704654693604, "learning_rate": 8.049896717999094e-06, "loss": 0.8467, "step": 6446 }, { "epoch": 1.6331855604813172, "grad_norm": 4.025676727294922, "learning_rate": 8.049232750386671e-06, "loss": 0.8094, "step": 6447 }, { "epoch": 1.6334388853704875, "grad_norm": 3.3451850414276123, "learning_rate": 8.048568697153222e-06, "loss": 0.8509, "step": 6448 }, { "epoch": 1.633692210259658, "grad_norm": 3.587831974029541, "learning_rate": 8.047904558317394e-06, "loss": 0.6801, "step": 6449 }, { "epoch": 1.6339455351488283, "grad_norm": 3.618785858154297, "learning_rate": 8.047240333897834e-06, "loss": 0.7699, "step": 6450 }, { "epoch": 1.6341988600379986, "grad_norm": 3.751980781555176, "learning_rate": 8.046576023913193e-06, "loss": 0.8147, "step": 6451 }, { "epoch": 1.6344521849271691, "grad_norm": 3.8179969787597656, "learning_rate": 8.045911628382126e-06, "loss": 0.8151, "step": 6452 }, { "epoch": 1.6347055098163394, "grad_norm": 4.683689117431641, "learning_rate": 8.045247147323288e-06, "loss": 0.8582, "step": 6453 }, { "epoch": 1.6349588347055097, "grad_norm": 3.9231245517730713, "learning_rate": 8.044582580755336e-06, "loss": 0.7611, "step": 6454 }, { "epoch": 1.6352121595946802, "grad_norm": 3.878051280975342, "learning_rate": 8.043917928696933e-06, "loss": 0.7537, "step": 6455 }, { "epoch": 1.6354654844838505, "grad_norm": 3.9709792137145996, "learning_rate": 8.04325319116674e-06, "loss": 0.8344, "step": 6456 }, { "epoch": 1.6357188093730208, "grad_norm": 4.03284215927124, "learning_rate": 8.042588368183425e-06, "loss": 0.8706, "step": 6457 }, { "epoch": 1.6359721342621913, "grad_norm": 3.7195112705230713, "learning_rate": 8.041923459765655e-06, "loss": 0.7358, "step": 6458 }, { "epoch": 1.6362254591513616, "grad_norm": 3.867770195007324, "learning_rate": 8.0412584659321e-06, "loss": 0.8893, "step": 6459 }, { "epoch": 1.636478784040532, "grad_norm": 3.8629443645477295, "learning_rate": 8.040593386701431e-06, "loss": 0.8314, "step": 6460 }, { "epoch": 1.6367321089297024, "grad_norm": 3.434809923171997, "learning_rate": 8.039928222092326e-06, "loss": 0.6819, "step": 6461 }, { "epoch": 1.6369854338188727, "grad_norm": 3.4478530883789062, "learning_rate": 8.039262972123461e-06, "loss": 0.8779, "step": 6462 }, { "epoch": 1.637238758708043, "grad_norm": 4.03153133392334, "learning_rate": 8.038597636813517e-06, "loss": 0.8377, "step": 6463 }, { "epoch": 1.6374920835972135, "grad_norm": 3.9537808895111084, "learning_rate": 8.037932216181174e-06, "loss": 0.7889, "step": 6464 }, { "epoch": 1.6377454084863838, "grad_norm": 3.6047708988189697, "learning_rate": 8.03726671024512e-06, "loss": 0.8148, "step": 6465 }, { "epoch": 1.6379987333755541, "grad_norm": 3.5980916023254395, "learning_rate": 8.036601119024036e-06, "loss": 0.7766, "step": 6466 }, { "epoch": 1.6382520582647246, "grad_norm": 3.7234902381896973, "learning_rate": 8.03593544253662e-06, "loss": 0.6899, "step": 6467 }, { "epoch": 1.6385053831538947, "grad_norm": 3.416440725326538, "learning_rate": 8.03526968080156e-06, "loss": 0.778, "step": 6468 }, { "epoch": 1.6387587080430652, "grad_norm": 3.2809247970581055, "learning_rate": 8.034603833837547e-06, "loss": 0.8297, "step": 6469 }, { "epoch": 1.6390120329322357, "grad_norm": 3.8302032947540283, "learning_rate": 8.033937901663283e-06, "loss": 0.734, "step": 6470 }, { "epoch": 1.6392653578214058, "grad_norm": 3.3262205123901367, "learning_rate": 8.033271884297463e-06, "loss": 0.6733, "step": 6471 }, { "epoch": 1.6395186827105763, "grad_norm": 3.435903549194336, "learning_rate": 8.032605781758791e-06, "loss": 0.7515, "step": 6472 }, { "epoch": 1.6397720075997468, "grad_norm": 3.7613158226013184, "learning_rate": 8.03193959406597e-06, "loss": 0.8775, "step": 6473 }, { "epoch": 1.640025332488917, "grad_norm": 3.747976064682007, "learning_rate": 8.031273321237706e-06, "loss": 0.9734, "step": 6474 }, { "epoch": 1.6402786573780874, "grad_norm": 3.3887627124786377, "learning_rate": 8.030606963292709e-06, "loss": 0.8202, "step": 6475 }, { "epoch": 1.6405319822672577, "grad_norm": 3.9756176471710205, "learning_rate": 8.029940520249686e-06, "loss": 0.8735, "step": 6476 }, { "epoch": 1.640785307156428, "grad_norm": 3.6156015396118164, "learning_rate": 8.029273992127356e-06, "loss": 0.7155, "step": 6477 }, { "epoch": 1.6410386320455985, "grad_norm": 4.137899875640869, "learning_rate": 8.028607378944432e-06, "loss": 0.689, "step": 6478 }, { "epoch": 1.6412919569347688, "grad_norm": 3.173203229904175, "learning_rate": 8.02794068071963e-06, "loss": 0.9062, "step": 6479 }, { "epoch": 1.641545281823939, "grad_norm": 3.9908053874969482, "learning_rate": 8.027273897471674e-06, "loss": 0.7302, "step": 6480 }, { "epoch": 1.6417986067131096, "grad_norm": 3.3780364990234375, "learning_rate": 8.026607029219285e-06, "loss": 0.6795, "step": 6481 }, { "epoch": 1.64205193160228, "grad_norm": 4.1177520751953125, "learning_rate": 8.02594007598119e-06, "loss": 0.7486, "step": 6482 }, { "epoch": 1.6423052564914502, "grad_norm": 4.135778427124023, "learning_rate": 8.025273037776116e-06, "loss": 0.8212, "step": 6483 }, { "epoch": 1.6425585813806207, "grad_norm": 3.945587396621704, "learning_rate": 8.024605914622793e-06, "loss": 0.6565, "step": 6484 }, { "epoch": 1.642811906269791, "grad_norm": 3.3609964847564697, "learning_rate": 8.023938706539953e-06, "loss": 0.7832, "step": 6485 }, { "epoch": 1.6430652311589613, "grad_norm": 3.868649959564209, "learning_rate": 8.023271413546332e-06, "loss": 0.8281, "step": 6486 }, { "epoch": 1.6433185560481318, "grad_norm": 3.7476377487182617, "learning_rate": 8.022604035660666e-06, "loss": 0.7855, "step": 6487 }, { "epoch": 1.643571880937302, "grad_norm": 3.978663682937622, "learning_rate": 8.021936572901697e-06, "loss": 0.9225, "step": 6488 }, { "epoch": 1.6438252058264724, "grad_norm": 3.6580278873443604, "learning_rate": 8.021269025288163e-06, "loss": 0.8147, "step": 6489 }, { "epoch": 1.644078530715643, "grad_norm": 3.498389959335327, "learning_rate": 8.020601392838812e-06, "loss": 0.7242, "step": 6490 }, { "epoch": 1.6443318556048132, "grad_norm": 3.400737762451172, "learning_rate": 8.019933675572389e-06, "loss": 0.728, "step": 6491 }, { "epoch": 1.6445851804939835, "grad_norm": 3.63218092918396, "learning_rate": 8.019265873507644e-06, "loss": 0.7941, "step": 6492 }, { "epoch": 1.644838505383154, "grad_norm": 3.3384480476379395, "learning_rate": 8.018597986663328e-06, "loss": 0.7823, "step": 6493 }, { "epoch": 1.6450918302723243, "grad_norm": 3.6739799976348877, "learning_rate": 8.017930015058195e-06, "loss": 0.8028, "step": 6494 }, { "epoch": 1.6453451551614946, "grad_norm": 3.4653425216674805, "learning_rate": 8.017261958711003e-06, "loss": 0.7367, "step": 6495 }, { "epoch": 1.6455984800506651, "grad_norm": 3.514143228530884, "learning_rate": 8.016593817640507e-06, "loss": 0.7542, "step": 6496 }, { "epoch": 1.6458518049398352, "grad_norm": 4.184511184692383, "learning_rate": 8.015925591865474e-06, "loss": 0.7791, "step": 6497 }, { "epoch": 1.6461051298290057, "grad_norm": 4.014858722686768, "learning_rate": 8.015257281404662e-06, "loss": 0.8363, "step": 6498 }, { "epoch": 1.6463584547181762, "grad_norm": 3.3786003589630127, "learning_rate": 8.01458888627684e-06, "loss": 0.7193, "step": 6499 }, { "epoch": 1.6466117796073463, "grad_norm": 3.679405927658081, "learning_rate": 8.013920406500772e-06, "loss": 0.7936, "step": 6500 }, { "epoch": 1.6466117796073463, "eval_loss": 1.1629008054733276, "eval_runtime": 13.7409, "eval_samples_per_second": 29.11, "eval_steps_per_second": 3.639, "step": 6500 }, { "epoch": 1.6468651044965168, "grad_norm": 3.7600491046905518, "learning_rate": 8.013251842095234e-06, "loss": 0.6361, "step": 6501 }, { "epoch": 1.647118429385687, "grad_norm": 3.616704225540161, "learning_rate": 8.012583193078994e-06, "loss": 0.7376, "step": 6502 }, { "epoch": 1.6473717542748574, "grad_norm": 3.8548691272735596, "learning_rate": 8.011914459470832e-06, "loss": 0.8461, "step": 6503 }, { "epoch": 1.647625079164028, "grad_norm": 3.9855406284332275, "learning_rate": 8.011245641289522e-06, "loss": 0.7625, "step": 6504 }, { "epoch": 1.6478784040531982, "grad_norm": 3.7066471576690674, "learning_rate": 8.010576738553848e-06, "loss": 0.7952, "step": 6505 }, { "epoch": 1.6481317289423685, "grad_norm": 3.8302483558654785, "learning_rate": 8.009907751282588e-06, "loss": 0.7149, "step": 6506 }, { "epoch": 1.648385053831539, "grad_norm": 4.178551197052002, "learning_rate": 8.009238679494531e-06, "loss": 0.9022, "step": 6507 }, { "epoch": 1.6486383787207093, "grad_norm": 3.9367873668670654, "learning_rate": 8.00856952320846e-06, "loss": 0.7939, "step": 6508 }, { "epoch": 1.6488917036098796, "grad_norm": 3.814330816268921, "learning_rate": 8.007900282443168e-06, "loss": 0.7212, "step": 6509 }, { "epoch": 1.64914502849905, "grad_norm": 4.1374125480651855, "learning_rate": 8.007230957217447e-06, "loss": 0.8048, "step": 6510 }, { "epoch": 1.6493983533882204, "grad_norm": 4.204483985900879, "learning_rate": 8.006561547550089e-06, "loss": 0.9392, "step": 6511 }, { "epoch": 1.6496516782773907, "grad_norm": 4.255924701690674, "learning_rate": 8.005892053459892e-06, "loss": 0.9636, "step": 6512 }, { "epoch": 1.6499050031665612, "grad_norm": 3.670743942260742, "learning_rate": 8.005222474965654e-06, "loss": 0.7437, "step": 6513 }, { "epoch": 1.6501583280557315, "grad_norm": 3.753647804260254, "learning_rate": 8.004552812086179e-06, "loss": 0.7605, "step": 6514 }, { "epoch": 1.6504116529449018, "grad_norm": 3.7480428218841553, "learning_rate": 8.003883064840267e-06, "loss": 0.7204, "step": 6515 }, { "epoch": 1.6506649778340723, "grad_norm": 3.120974540710449, "learning_rate": 8.003213233246728e-06, "loss": 0.6214, "step": 6516 }, { "epoch": 1.6509183027232426, "grad_norm": 4.047748565673828, "learning_rate": 8.002543317324369e-06, "loss": 0.8907, "step": 6517 }, { "epoch": 1.6511716276124129, "grad_norm": 4.058399200439453, "learning_rate": 8.001873317092001e-06, "loss": 0.8473, "step": 6518 }, { "epoch": 1.6514249525015834, "grad_norm": 3.572179079055786, "learning_rate": 8.001203232568436e-06, "loss": 0.8149, "step": 6519 }, { "epoch": 1.6516782773907537, "grad_norm": 3.811018943786621, "learning_rate": 8.000533063772492e-06, "loss": 0.7781, "step": 6520 }, { "epoch": 1.651931602279924, "grad_norm": 4.158339977264404, "learning_rate": 7.999862810722985e-06, "loss": 0.7893, "step": 6521 }, { "epoch": 1.6521849271690945, "grad_norm": 3.688552141189575, "learning_rate": 7.999192473438737e-06, "loss": 0.8393, "step": 6522 }, { "epoch": 1.6524382520582648, "grad_norm": 3.995978355407715, "learning_rate": 7.99852205193857e-06, "loss": 0.8735, "step": 6523 }, { "epoch": 1.652691576947435, "grad_norm": 3.4606595039367676, "learning_rate": 7.997851546241308e-06, "loss": 0.8132, "step": 6524 }, { "epoch": 1.6529449018366056, "grad_norm": 4.029937267303467, "learning_rate": 7.99718095636578e-06, "loss": 0.83, "step": 6525 }, { "epoch": 1.6531982267257757, "grad_norm": 4.23452615737915, "learning_rate": 7.996510282330816e-06, "loss": 0.7501, "step": 6526 }, { "epoch": 1.6534515516149462, "grad_norm": 3.6601123809814453, "learning_rate": 7.995839524155249e-06, "loss": 0.8571, "step": 6527 }, { "epoch": 1.6537048765041167, "grad_norm": 3.5844037532806396, "learning_rate": 7.99516868185791e-06, "loss": 0.6626, "step": 6528 }, { "epoch": 1.6539582013932868, "grad_norm": 4.13832950592041, "learning_rate": 7.99449775545764e-06, "loss": 0.7667, "step": 6529 }, { "epoch": 1.6542115262824573, "grad_norm": 3.335920810699463, "learning_rate": 7.993826744973274e-06, "loss": 0.8313, "step": 6530 }, { "epoch": 1.6544648511716276, "grad_norm": 4.0323333740234375, "learning_rate": 7.993155650423658e-06, "loss": 0.8563, "step": 6531 }, { "epoch": 1.6547181760607979, "grad_norm": 4.23358678817749, "learning_rate": 7.992484471827634e-06, "loss": 0.8968, "step": 6532 }, { "epoch": 1.6549715009499684, "grad_norm": 3.3698556423187256, "learning_rate": 7.991813209204047e-06, "loss": 0.6257, "step": 6533 }, { "epoch": 1.6552248258391387, "grad_norm": 3.3595569133758545, "learning_rate": 7.991141862571749e-06, "loss": 0.8493, "step": 6534 }, { "epoch": 1.655478150728309, "grad_norm": 4.0302910804748535, "learning_rate": 7.990470431949588e-06, "loss": 0.9936, "step": 6535 }, { "epoch": 1.6557314756174795, "grad_norm": 3.870558023452759, "learning_rate": 7.989798917356422e-06, "loss": 0.7429, "step": 6536 }, { "epoch": 1.6559848005066498, "grad_norm": 3.3610174655914307, "learning_rate": 7.9891273188111e-06, "loss": 0.8919, "step": 6537 }, { "epoch": 1.65623812539582, "grad_norm": 3.419107675552368, "learning_rate": 7.988455636332487e-06, "loss": 0.8021, "step": 6538 }, { "epoch": 1.6564914502849906, "grad_norm": 3.5197911262512207, "learning_rate": 7.987783869939439e-06, "loss": 0.7365, "step": 6539 }, { "epoch": 1.6567447751741609, "grad_norm": 3.383847236633301, "learning_rate": 7.987112019650818e-06, "loss": 0.7167, "step": 6540 }, { "epoch": 1.6569981000633311, "grad_norm": 4.0686235427856445, "learning_rate": 7.986440085485494e-06, "loss": 0.9421, "step": 6541 }, { "epoch": 1.6572514249525017, "grad_norm": 3.9128847122192383, "learning_rate": 7.985768067462332e-06, "loss": 0.7651, "step": 6542 }, { "epoch": 1.657504749841672, "grad_norm": 3.5898830890655518, "learning_rate": 7.9850959656002e-06, "loss": 0.8104, "step": 6543 }, { "epoch": 1.6577580747308422, "grad_norm": 3.5501456260681152, "learning_rate": 7.984423779917974e-06, "loss": 0.7083, "step": 6544 }, { "epoch": 1.6580113996200128, "grad_norm": 3.715851306915283, "learning_rate": 7.983751510434528e-06, "loss": 0.8172, "step": 6545 }, { "epoch": 1.658264724509183, "grad_norm": 3.674283981323242, "learning_rate": 7.983079157168736e-06, "loss": 0.829, "step": 6546 }, { "epoch": 1.6585180493983533, "grad_norm": 3.1403985023498535, "learning_rate": 7.98240672013948e-06, "loss": 0.7097, "step": 6547 }, { "epoch": 1.6587713742875239, "grad_norm": 4.111692905426025, "learning_rate": 7.98173419936564e-06, "loss": 0.8722, "step": 6548 }, { "epoch": 1.6590246991766942, "grad_norm": 3.416438102722168, "learning_rate": 7.981061594866105e-06, "loss": 0.7898, "step": 6549 }, { "epoch": 1.6592780240658644, "grad_norm": 3.63065242767334, "learning_rate": 7.980388906659753e-06, "loss": 0.7435, "step": 6550 }, { "epoch": 1.659531348955035, "grad_norm": 3.939948797225952, "learning_rate": 7.979716134765481e-06, "loss": 0.7948, "step": 6551 }, { "epoch": 1.659784673844205, "grad_norm": 3.4028239250183105, "learning_rate": 7.979043279202175e-06, "loss": 0.7807, "step": 6552 }, { "epoch": 1.6600379987333755, "grad_norm": 3.9967074394226074, "learning_rate": 7.978370339988728e-06, "loss": 0.7236, "step": 6553 }, { "epoch": 1.660291323622546, "grad_norm": 3.631662607192993, "learning_rate": 7.97769731714404e-06, "loss": 0.7455, "step": 6554 }, { "epoch": 1.6605446485117161, "grad_norm": 3.5003862380981445, "learning_rate": 7.977024210687005e-06, "loss": 0.756, "step": 6555 }, { "epoch": 1.6607979734008866, "grad_norm": 3.761894464492798, "learning_rate": 7.976351020636528e-06, "loss": 0.7335, "step": 6556 }, { "epoch": 1.6610512982900572, "grad_norm": 3.5644938945770264, "learning_rate": 7.975677747011508e-06, "loss": 0.7435, "step": 6557 }, { "epoch": 1.6613046231792272, "grad_norm": 3.92643141746521, "learning_rate": 7.97500438983085e-06, "loss": 0.8846, "step": 6558 }, { "epoch": 1.6615579480683977, "grad_norm": 3.3626163005828857, "learning_rate": 7.974330949113466e-06, "loss": 0.6866, "step": 6559 }, { "epoch": 1.661811272957568, "grad_norm": 3.531470537185669, "learning_rate": 7.97365742487826e-06, "loss": 0.809, "step": 6560 }, { "epoch": 1.6620645978467383, "grad_norm": 4.272540092468262, "learning_rate": 7.972983817144151e-06, "loss": 0.8819, "step": 6561 }, { "epoch": 1.6623179227359088, "grad_norm": 3.836256980895996, "learning_rate": 7.972310125930047e-06, "loss": 0.8513, "step": 6562 }, { "epoch": 1.6625712476250791, "grad_norm": 3.676304578781128, "learning_rate": 7.971636351254868e-06, "loss": 0.7966, "step": 6563 }, { "epoch": 1.6628245725142494, "grad_norm": 3.809302806854248, "learning_rate": 7.970962493137533e-06, "loss": 0.8453, "step": 6564 }, { "epoch": 1.66307789740342, "grad_norm": 3.648832082748413, "learning_rate": 7.970288551596964e-06, "loss": 0.7627, "step": 6565 }, { "epoch": 1.6633312222925902, "grad_norm": 3.469412088394165, "learning_rate": 7.969614526652085e-06, "loss": 0.7832, "step": 6566 }, { "epoch": 1.6635845471817605, "grad_norm": 4.007893085479736, "learning_rate": 7.96894041832182e-06, "loss": 0.8051, "step": 6567 }, { "epoch": 1.663837872070931, "grad_norm": 3.579482078552246, "learning_rate": 7.968266226625102e-06, "loss": 0.7833, "step": 6568 }, { "epoch": 1.6640911969601013, "grad_norm": 4.226022720336914, "learning_rate": 7.967591951580857e-06, "loss": 0.7823, "step": 6569 }, { "epoch": 1.6643445218492716, "grad_norm": 3.9336376190185547, "learning_rate": 7.966917593208023e-06, "loss": 0.8148, "step": 6570 }, { "epoch": 1.6645978467384421, "grad_norm": 3.4499406814575195, "learning_rate": 7.966243151525534e-06, "loss": 0.7168, "step": 6571 }, { "epoch": 1.6648511716276124, "grad_norm": 3.765089750289917, "learning_rate": 7.965568626552324e-06, "loss": 0.8394, "step": 6572 }, { "epoch": 1.6651044965167827, "grad_norm": 3.9018733501434326, "learning_rate": 7.96489401830734e-06, "loss": 0.8357, "step": 6573 }, { "epoch": 1.6653578214059532, "grad_norm": 3.840449810028076, "learning_rate": 7.964219326809522e-06, "loss": 0.8492, "step": 6574 }, { "epoch": 1.6656111462951235, "grad_norm": 3.6411032676696777, "learning_rate": 7.963544552077813e-06, "loss": 0.8185, "step": 6575 }, { "epoch": 1.6658644711842938, "grad_norm": 3.478626251220703, "learning_rate": 7.96286969413116e-06, "loss": 0.7777, "step": 6576 }, { "epoch": 1.6661177960734643, "grad_norm": 3.901601791381836, "learning_rate": 7.962194752988519e-06, "loss": 0.8673, "step": 6577 }, { "epoch": 1.6663711209626346, "grad_norm": 3.6975276470184326, "learning_rate": 7.961519728668834e-06, "loss": 0.8515, "step": 6578 }, { "epoch": 1.666624445851805, "grad_norm": 4.250957489013672, "learning_rate": 7.960844621191065e-06, "loss": 0.7823, "step": 6579 }, { "epoch": 1.6668777707409754, "grad_norm": 4.216465950012207, "learning_rate": 7.960169430574166e-06, "loss": 0.8317, "step": 6580 }, { "epoch": 1.6671310956301455, "grad_norm": 3.7026209831237793, "learning_rate": 7.959494156837097e-06, "loss": 0.8145, "step": 6581 }, { "epoch": 1.667384420519316, "grad_norm": 4.24608039855957, "learning_rate": 7.95881879999882e-06, "loss": 0.9671, "step": 6582 }, { "epoch": 1.6676377454084865, "grad_norm": 3.7046186923980713, "learning_rate": 7.958143360078297e-06, "loss": 0.7755, "step": 6583 }, { "epoch": 1.6678910702976566, "grad_norm": 3.856071949005127, "learning_rate": 7.957467837094494e-06, "loss": 0.7805, "step": 6584 }, { "epoch": 1.6681443951868271, "grad_norm": 3.9666085243225098, "learning_rate": 7.95679223106638e-06, "loss": 0.7719, "step": 6585 }, { "epoch": 1.6683977200759976, "grad_norm": 3.8352930545806885, "learning_rate": 7.956116542012927e-06, "loss": 0.8631, "step": 6586 }, { "epoch": 1.6686510449651677, "grad_norm": 3.7137701511383057, "learning_rate": 7.955440769953108e-06, "loss": 0.9008, "step": 6587 }, { "epoch": 1.6689043698543382, "grad_norm": 3.8792643547058105, "learning_rate": 7.954764914905896e-06, "loss": 0.8702, "step": 6588 }, { "epoch": 1.6691576947435085, "grad_norm": 3.808335781097412, "learning_rate": 7.95408897689027e-06, "loss": 0.7988, "step": 6589 }, { "epoch": 1.6694110196326788, "grad_norm": 3.4590396881103516, "learning_rate": 7.95341295592521e-06, "loss": 0.7599, "step": 6590 }, { "epoch": 1.6696643445218493, "grad_norm": 4.048331260681152, "learning_rate": 7.952736852029699e-06, "loss": 0.7402, "step": 6591 }, { "epoch": 1.6699176694110196, "grad_norm": 3.6097097396850586, "learning_rate": 7.952060665222721e-06, "loss": 0.7074, "step": 6592 }, { "epoch": 1.67017099430019, "grad_norm": 3.667442798614502, "learning_rate": 7.951384395523262e-06, "loss": 0.7995, "step": 6593 }, { "epoch": 1.6704243191893604, "grad_norm": 3.7107222080230713, "learning_rate": 7.950708042950313e-06, "loss": 0.8229, "step": 6594 }, { "epoch": 1.6706776440785307, "grad_norm": 3.692390203475952, "learning_rate": 7.950031607522865e-06, "loss": 0.7444, "step": 6595 }, { "epoch": 1.670930968967701, "grad_norm": 3.5283172130584717, "learning_rate": 7.949355089259914e-06, "loss": 0.6734, "step": 6596 }, { "epoch": 1.6711842938568715, "grad_norm": 3.4671082496643066, "learning_rate": 7.948678488180452e-06, "loss": 0.8705, "step": 6597 }, { "epoch": 1.6714376187460418, "grad_norm": 3.5219898223876953, "learning_rate": 7.94800180430348e-06, "loss": 0.7039, "step": 6598 }, { "epoch": 1.671690943635212, "grad_norm": 3.6108665466308594, "learning_rate": 7.947325037648e-06, "loss": 0.7648, "step": 6599 }, { "epoch": 1.6719442685243826, "grad_norm": 3.6292128562927246, "learning_rate": 7.946648188233016e-06, "loss": 0.6964, "step": 6600 }, { "epoch": 1.672197593413553, "grad_norm": 3.8417787551879883, "learning_rate": 7.945971256077529e-06, "loss": 0.6973, "step": 6601 }, { "epoch": 1.6724509183027232, "grad_norm": 3.655839204788208, "learning_rate": 7.94529424120055e-06, "loss": 0.7429, "step": 6602 }, { "epoch": 1.6727042431918937, "grad_norm": 3.953800678253174, "learning_rate": 7.94461714362109e-06, "loss": 0.7063, "step": 6603 }, { "epoch": 1.672957568081064, "grad_norm": 3.713174343109131, "learning_rate": 7.94393996335816e-06, "loss": 0.7668, "step": 6604 }, { "epoch": 1.6732108929702343, "grad_norm": 4.089044570922852, "learning_rate": 7.943262700430777e-06, "loss": 0.8985, "step": 6605 }, { "epoch": 1.6734642178594048, "grad_norm": 3.4259583950042725, "learning_rate": 7.942585354857956e-06, "loss": 0.7928, "step": 6606 }, { "epoch": 1.673717542748575, "grad_norm": 3.7505693435668945, "learning_rate": 7.941907926658718e-06, "loss": 0.7894, "step": 6607 }, { "epoch": 1.6739708676377454, "grad_norm": 3.6627004146575928, "learning_rate": 7.941230415852084e-06, "loss": 0.7854, "step": 6608 }, { "epoch": 1.674224192526916, "grad_norm": 3.8761916160583496, "learning_rate": 7.940552822457078e-06, "loss": 0.8486, "step": 6609 }, { "epoch": 1.674477517416086, "grad_norm": 3.4008986949920654, "learning_rate": 7.939875146492725e-06, "loss": 0.7706, "step": 6610 }, { "epoch": 1.6747308423052565, "grad_norm": 3.642361640930176, "learning_rate": 7.93919738797806e-06, "loss": 0.7847, "step": 6611 }, { "epoch": 1.674984167194427, "grad_norm": 3.589169502258301, "learning_rate": 7.938519546932107e-06, "loss": 0.8092, "step": 6612 }, { "epoch": 1.675237492083597, "grad_norm": 3.996886730194092, "learning_rate": 7.937841623373904e-06, "loss": 0.8737, "step": 6613 }, { "epoch": 1.6754908169727676, "grad_norm": 3.890979051589966, "learning_rate": 7.937163617322484e-06, "loss": 0.8329, "step": 6614 }, { "epoch": 1.675744141861938, "grad_norm": 3.8833699226379395, "learning_rate": 7.936485528796884e-06, "loss": 0.8497, "step": 6615 }, { "epoch": 1.6759974667511082, "grad_norm": 3.4291770458221436, "learning_rate": 7.93580735781615e-06, "loss": 0.7488, "step": 6616 }, { "epoch": 1.6762507916402787, "grad_norm": 3.5235791206359863, "learning_rate": 7.93512910439932e-06, "loss": 0.7153, "step": 6617 }, { "epoch": 1.676504116529449, "grad_norm": 3.9646575450897217, "learning_rate": 7.934450768565441e-06, "loss": 0.7636, "step": 6618 }, { "epoch": 1.6767574414186193, "grad_norm": 3.3547985553741455, "learning_rate": 7.933772350333559e-06, "loss": 0.6881, "step": 6619 }, { "epoch": 1.6770107663077898, "grad_norm": 4.283260345458984, "learning_rate": 7.933093849722724e-06, "loss": 0.8028, "step": 6620 }, { "epoch": 1.67726409119696, "grad_norm": 3.5597147941589355, "learning_rate": 7.93241526675199e-06, "loss": 0.8416, "step": 6621 }, { "epoch": 1.6775174160861304, "grad_norm": 3.506744384765625, "learning_rate": 7.931736601440407e-06, "loss": 0.7056, "step": 6622 }, { "epoch": 1.6777707409753009, "grad_norm": 3.943193197250366, "learning_rate": 7.931057853807037e-06, "loss": 0.8271, "step": 6623 }, { "epoch": 1.6780240658644712, "grad_norm": 4.149829864501953, "learning_rate": 7.930379023870936e-06, "loss": 0.7197, "step": 6624 }, { "epoch": 1.6782773907536415, "grad_norm": 3.963623046875, "learning_rate": 7.929700111651165e-06, "loss": 0.7888, "step": 6625 }, { "epoch": 1.678530715642812, "grad_norm": 3.586610794067383, "learning_rate": 7.929021117166787e-06, "loss": 0.712, "step": 6626 }, { "epoch": 1.6787840405319823, "grad_norm": 3.788907289505005, "learning_rate": 7.92834204043687e-06, "loss": 0.8079, "step": 6627 }, { "epoch": 1.6790373654211526, "grad_norm": 3.8412363529205322, "learning_rate": 7.92766288148048e-06, "loss": 0.7998, "step": 6628 }, { "epoch": 1.679290690310323, "grad_norm": 3.252941370010376, "learning_rate": 7.926983640316688e-06, "loss": 0.7125, "step": 6629 }, { "epoch": 1.6795440151994934, "grad_norm": 3.6767332553863525, "learning_rate": 7.926304316964569e-06, "loss": 0.6974, "step": 6630 }, { "epoch": 1.6797973400886637, "grad_norm": 3.9207980632781982, "learning_rate": 7.925624911443194e-06, "loss": 0.7083, "step": 6631 }, { "epoch": 1.6800506649778342, "grad_norm": 3.551576852798462, "learning_rate": 7.924945423771643e-06, "loss": 0.839, "step": 6632 }, { "epoch": 1.6803039898670045, "grad_norm": 3.6196115016937256, "learning_rate": 7.924265853968996e-06, "loss": 0.8434, "step": 6633 }, { "epoch": 1.6805573147561748, "grad_norm": 3.624392509460449, "learning_rate": 7.923586202054336e-06, "loss": 0.6152, "step": 6634 }, { "epoch": 1.6808106396453453, "grad_norm": 3.9037556648254395, "learning_rate": 7.922906468046744e-06, "loss": 0.9458, "step": 6635 }, { "epoch": 1.6810639645345156, "grad_norm": 3.74167537689209, "learning_rate": 7.922226651965308e-06, "loss": 0.879, "step": 6636 }, { "epoch": 1.6813172894236859, "grad_norm": 3.045506477355957, "learning_rate": 7.921546753829117e-06, "loss": 0.6792, "step": 6637 }, { "epoch": 1.6815706143128564, "grad_norm": 3.6199963092803955, "learning_rate": 7.920866773657264e-06, "loss": 0.7662, "step": 6638 }, { "epoch": 1.6818239392020264, "grad_norm": 3.473907232284546, "learning_rate": 7.92018671146884e-06, "loss": 0.7948, "step": 6639 }, { "epoch": 1.682077264091197, "grad_norm": 4.178169250488281, "learning_rate": 7.919506567282941e-06, "loss": 0.776, "step": 6640 }, { "epoch": 1.6823305889803675, "grad_norm": 4.206727504730225, "learning_rate": 7.918826341118668e-06, "loss": 0.8756, "step": 6641 }, { "epoch": 1.6825839138695375, "grad_norm": 3.7250514030456543, "learning_rate": 7.918146032995117e-06, "loss": 0.7612, "step": 6642 }, { "epoch": 1.682837238758708, "grad_norm": 3.7533860206604004, "learning_rate": 7.917465642931395e-06, "loss": 0.7584, "step": 6643 }, { "epoch": 1.6830905636478786, "grad_norm": 3.769502639770508, "learning_rate": 7.916785170946603e-06, "loss": 0.8779, "step": 6644 }, { "epoch": 1.6833438885370486, "grad_norm": 4.405693531036377, "learning_rate": 7.916104617059853e-06, "loss": 0.8019, "step": 6645 }, { "epoch": 1.6835972134262192, "grad_norm": 3.6255602836608887, "learning_rate": 7.915423981290251e-06, "loss": 0.7301, "step": 6646 }, { "epoch": 1.6838505383153894, "grad_norm": 3.3909103870391846, "learning_rate": 7.91474326365691e-06, "loss": 0.6843, "step": 6647 }, { "epoch": 1.6841038632045597, "grad_norm": 3.8130462169647217, "learning_rate": 7.914062464178943e-06, "loss": 0.7176, "step": 6648 }, { "epoch": 1.6843571880937303, "grad_norm": 4.036614418029785, "learning_rate": 7.913381582875468e-06, "loss": 0.7759, "step": 6649 }, { "epoch": 1.6846105129829005, "grad_norm": 3.8046438694000244, "learning_rate": 7.912700619765605e-06, "loss": 0.7424, "step": 6650 }, { "epoch": 1.6848638378720708, "grad_norm": 3.53322172164917, "learning_rate": 7.912019574868473e-06, "loss": 0.7357, "step": 6651 }, { "epoch": 1.6851171627612413, "grad_norm": 3.907158851623535, "learning_rate": 7.911338448203197e-06, "loss": 0.8641, "step": 6652 }, { "epoch": 1.6853704876504116, "grad_norm": 3.558422327041626, "learning_rate": 7.9106572397889e-06, "loss": 0.797, "step": 6653 }, { "epoch": 1.685623812539582, "grad_norm": 3.729743480682373, "learning_rate": 7.909975949644713e-06, "loss": 0.8328, "step": 6654 }, { "epoch": 1.6858771374287524, "grad_norm": 3.8090381622314453, "learning_rate": 7.909294577789765e-06, "loss": 0.7349, "step": 6655 }, { "epoch": 1.6861304623179227, "grad_norm": 3.698411703109741, "learning_rate": 7.908613124243189e-06, "loss": 0.7703, "step": 6656 }, { "epoch": 1.686383787207093, "grad_norm": 3.94551944732666, "learning_rate": 7.907931589024119e-06, "loss": 0.742, "step": 6657 }, { "epoch": 1.6866371120962635, "grad_norm": 3.8150436878204346, "learning_rate": 7.907249972151694e-06, "loss": 0.8785, "step": 6658 }, { "epoch": 1.6868904369854338, "grad_norm": 3.8971149921417236, "learning_rate": 7.906568273645051e-06, "loss": 0.7242, "step": 6659 }, { "epoch": 1.6871437618746041, "grad_norm": 3.75036883354187, "learning_rate": 7.905886493523333e-06, "loss": 0.8107, "step": 6660 }, { "epoch": 1.6873970867637746, "grad_norm": 3.9910454750061035, "learning_rate": 7.905204631805686e-06, "loss": 0.8148, "step": 6661 }, { "epoch": 1.687650411652945, "grad_norm": 3.8019652366638184, "learning_rate": 7.904522688511253e-06, "loss": 0.817, "step": 6662 }, { "epoch": 1.6879037365421152, "grad_norm": 3.8441343307495117, "learning_rate": 7.903840663659186e-06, "loss": 0.85, "step": 6663 }, { "epoch": 1.6881570614312857, "grad_norm": 3.426865577697754, "learning_rate": 7.903158557268633e-06, "loss": 0.8892, "step": 6664 }, { "epoch": 1.688410386320456, "grad_norm": 3.5533223152160645, "learning_rate": 7.902476369358748e-06, "loss": 0.7583, "step": 6665 }, { "epoch": 1.6886637112096263, "grad_norm": 3.424185276031494, "learning_rate": 7.901794099948686e-06, "loss": 0.6838, "step": 6666 }, { "epoch": 1.6889170360987968, "grad_norm": 3.9086058139801025, "learning_rate": 7.901111749057606e-06, "loss": 0.8324, "step": 6667 }, { "epoch": 1.689170360987967, "grad_norm": 3.792022943496704, "learning_rate": 7.90042931670467e-06, "loss": 0.8141, "step": 6668 }, { "epoch": 1.6894236858771374, "grad_norm": 4.026228904724121, "learning_rate": 7.899746802909037e-06, "loss": 0.7859, "step": 6669 }, { "epoch": 1.689677010766308, "grad_norm": 4.015260696411133, "learning_rate": 7.899064207689873e-06, "loss": 0.8104, "step": 6670 }, { "epoch": 1.689930335655478, "grad_norm": 4.160395622253418, "learning_rate": 7.898381531066344e-06, "loss": 0.7034, "step": 6671 }, { "epoch": 1.6901836605446485, "grad_norm": 3.6861190795898438, "learning_rate": 7.89769877305762e-06, "loss": 0.6766, "step": 6672 }, { "epoch": 1.6904369854338188, "grad_norm": 4.139413356781006, "learning_rate": 7.897015933682873e-06, "loss": 0.7244, "step": 6673 }, { "epoch": 1.690690310322989, "grad_norm": 3.5917487144470215, "learning_rate": 7.896333012961277e-06, "loss": 0.7738, "step": 6674 }, { "epoch": 1.6909436352121596, "grad_norm": 3.593790054321289, "learning_rate": 7.895650010912007e-06, "loss": 0.7097, "step": 6675 }, { "epoch": 1.69119696010133, "grad_norm": 3.585502862930298, "learning_rate": 7.894966927554239e-06, "loss": 0.7099, "step": 6676 }, { "epoch": 1.6914502849905002, "grad_norm": 3.7600560188293457, "learning_rate": 7.89428376290716e-06, "loss": 0.8947, "step": 6677 }, { "epoch": 1.6917036098796707, "grad_norm": 3.956672191619873, "learning_rate": 7.893600516989948e-06, "loss": 0.8116, "step": 6678 }, { "epoch": 1.691956934768841, "grad_norm": 4.237945079803467, "learning_rate": 7.892917189821791e-06, "loss": 0.8744, "step": 6679 }, { "epoch": 1.6922102596580113, "grad_norm": 3.645401954650879, "learning_rate": 7.892233781421874e-06, "loss": 0.7532, "step": 6680 }, { "epoch": 1.6924635845471818, "grad_norm": 3.4173827171325684, "learning_rate": 7.891550291809388e-06, "loss": 0.7708, "step": 6681 }, { "epoch": 1.6927169094363521, "grad_norm": 3.700458288192749, "learning_rate": 7.890866721003525e-06, "loss": 0.8057, "step": 6682 }, { "epoch": 1.6929702343255224, "grad_norm": 3.7289626598358154, "learning_rate": 7.890183069023478e-06, "loss": 0.8639, "step": 6683 }, { "epoch": 1.693223559214693, "grad_norm": 3.5245094299316406, "learning_rate": 7.889499335888449e-06, "loss": 0.7177, "step": 6684 }, { "epoch": 1.6934768841038632, "grad_norm": 3.8554348945617676, "learning_rate": 7.88881552161763e-06, "loss": 0.8164, "step": 6685 }, { "epoch": 1.6937302089930335, "grad_norm": 3.527906894683838, "learning_rate": 7.888131626230224e-06, "loss": 0.8275, "step": 6686 }, { "epoch": 1.693983533882204, "grad_norm": 3.72906756401062, "learning_rate": 7.887447649745437e-06, "loss": 0.8171, "step": 6687 }, { "epoch": 1.6942368587713743, "grad_norm": 3.4595229625701904, "learning_rate": 7.886763592182474e-06, "loss": 0.7817, "step": 6688 }, { "epoch": 1.6944901836605446, "grad_norm": 3.7904465198516846, "learning_rate": 7.88607945356054e-06, "loss": 0.9005, "step": 6689 }, { "epoch": 1.6947435085497151, "grad_norm": 3.807465076446533, "learning_rate": 7.88539523389885e-06, "loss": 0.8108, "step": 6690 }, { "epoch": 1.6949968334388854, "grad_norm": 3.802807569503784, "learning_rate": 7.884710933216613e-06, "loss": 0.8049, "step": 6691 }, { "epoch": 1.6952501583280557, "grad_norm": 3.7613372802734375, "learning_rate": 7.884026551533046e-06, "loss": 0.8984, "step": 6692 }, { "epoch": 1.6955034832172262, "grad_norm": 3.503157615661621, "learning_rate": 7.883342088867364e-06, "loss": 0.7345, "step": 6693 }, { "epoch": 1.6957568081063965, "grad_norm": 3.8023993968963623, "learning_rate": 7.882657545238788e-06, "loss": 0.728, "step": 6694 }, { "epoch": 1.6960101329955668, "grad_norm": 3.663755178451538, "learning_rate": 7.881972920666538e-06, "loss": 0.7559, "step": 6695 }, { "epoch": 1.6962634578847373, "grad_norm": 3.451871633529663, "learning_rate": 7.88128821516984e-06, "loss": 0.8482, "step": 6696 }, { "epoch": 1.6965167827739074, "grad_norm": 3.7571277618408203, "learning_rate": 7.88060342876792e-06, "loss": 0.8549, "step": 6697 }, { "epoch": 1.696770107663078, "grad_norm": 3.491603374481201, "learning_rate": 7.879918561480006e-06, "loss": 0.7404, "step": 6698 }, { "epoch": 1.6970234325522484, "grad_norm": 3.8406331539154053, "learning_rate": 7.879233613325328e-06, "loss": 0.831, "step": 6699 }, { "epoch": 1.6972767574414185, "grad_norm": 3.4100091457366943, "learning_rate": 7.87854858432312e-06, "loss": 0.7306, "step": 6700 }, { "epoch": 1.697530082330589, "grad_norm": 3.4049994945526123, "learning_rate": 7.877863474492616e-06, "loss": 0.6876, "step": 6701 }, { "epoch": 1.6977834072197593, "grad_norm": 3.7596635818481445, "learning_rate": 7.877178283853053e-06, "loss": 0.8153, "step": 6702 }, { "epoch": 1.6980367321089296, "grad_norm": 4.205677509307861, "learning_rate": 7.876493012423674e-06, "loss": 0.7863, "step": 6703 }, { "epoch": 1.6982900569981, "grad_norm": 3.8486273288726807, "learning_rate": 7.87580766022372e-06, "loss": 0.7443, "step": 6704 }, { "epoch": 1.6985433818872704, "grad_norm": 3.5464746952056885, "learning_rate": 7.875122227272435e-06, "loss": 0.7064, "step": 6705 }, { "epoch": 1.6987967067764407, "grad_norm": 3.5107100009918213, "learning_rate": 7.874436713589065e-06, "loss": 0.7617, "step": 6706 }, { "epoch": 1.6990500316656112, "grad_norm": 4.054915904998779, "learning_rate": 7.873751119192857e-06, "loss": 0.9486, "step": 6707 }, { "epoch": 1.6993033565547815, "grad_norm": 3.6590044498443604, "learning_rate": 7.873065444103066e-06, "loss": 0.6986, "step": 6708 }, { "epoch": 1.6995566814439518, "grad_norm": 3.610285758972168, "learning_rate": 7.872379688338945e-06, "loss": 0.8146, "step": 6709 }, { "epoch": 1.6998100063331223, "grad_norm": 3.8275539875030518, "learning_rate": 7.871693851919747e-06, "loss": 0.8404, "step": 6710 }, { "epoch": 1.7000633312222926, "grad_norm": 3.4518802165985107, "learning_rate": 7.871007934864732e-06, "loss": 0.7698, "step": 6711 }, { "epoch": 1.7003166561114629, "grad_norm": 3.5977344512939453, "learning_rate": 7.87032193719316e-06, "loss": 0.6872, "step": 6712 }, { "epoch": 1.7005699810006334, "grad_norm": 3.663902521133423, "learning_rate": 7.869635858924293e-06, "loss": 0.8268, "step": 6713 }, { "epoch": 1.7008233058898037, "grad_norm": 3.621511220932007, "learning_rate": 7.868949700077396e-06, "loss": 0.7852, "step": 6714 }, { "epoch": 1.701076630778974, "grad_norm": 3.7081730365753174, "learning_rate": 7.868263460671737e-06, "loss": 0.7897, "step": 6715 }, { "epoch": 1.7013299556681445, "grad_norm": 3.8370795249938965, "learning_rate": 7.867577140726584e-06, "loss": 0.8091, "step": 6716 }, { "epoch": 1.7015832805573148, "grad_norm": 3.473798990249634, "learning_rate": 7.866890740261205e-06, "loss": 0.6665, "step": 6717 }, { "epoch": 1.701836605446485, "grad_norm": 3.8767638206481934, "learning_rate": 7.866204259294883e-06, "loss": 0.7544, "step": 6718 }, { "epoch": 1.7020899303356556, "grad_norm": 3.3869128227233887, "learning_rate": 7.865517697846887e-06, "loss": 0.8296, "step": 6719 }, { "epoch": 1.7023432552248259, "grad_norm": 3.7934815883636475, "learning_rate": 7.864831055936497e-06, "loss": 0.733, "step": 6720 }, { "epoch": 1.7025965801139962, "grad_norm": 4.060279846191406, "learning_rate": 7.864144333582993e-06, "loss": 0.8434, "step": 6721 }, { "epoch": 1.7028499050031667, "grad_norm": 3.429332733154297, "learning_rate": 7.863457530805659e-06, "loss": 0.8536, "step": 6722 }, { "epoch": 1.7031032298923368, "grad_norm": 3.738725185394287, "learning_rate": 7.86277064762378e-06, "loss": 0.8061, "step": 6723 }, { "epoch": 1.7033565547815073, "grad_norm": 3.696758270263672, "learning_rate": 7.862083684056641e-06, "loss": 0.7556, "step": 6724 }, { "epoch": 1.7036098796706778, "grad_norm": 3.453564167022705, "learning_rate": 7.861396640123535e-06, "loss": 0.9341, "step": 6725 }, { "epoch": 1.7038632045598479, "grad_norm": 3.4374876022338867, "learning_rate": 7.860709515843751e-06, "loss": 0.7263, "step": 6726 }, { "epoch": 1.7041165294490184, "grad_norm": 3.612027168273926, "learning_rate": 7.860022311236588e-06, "loss": 0.7434, "step": 6727 }, { "epoch": 1.7043698543381889, "grad_norm": 3.5617215633392334, "learning_rate": 7.859335026321336e-06, "loss": 0.729, "step": 6728 }, { "epoch": 1.704623179227359, "grad_norm": 3.6373393535614014, "learning_rate": 7.858647661117298e-06, "loss": 0.7424, "step": 6729 }, { "epoch": 1.7048765041165295, "grad_norm": 4.018084526062012, "learning_rate": 7.857960215643772e-06, "loss": 0.8158, "step": 6730 }, { "epoch": 1.7051298290056998, "grad_norm": 3.5702357292175293, "learning_rate": 7.857272689920064e-06, "loss": 0.7383, "step": 6731 }, { "epoch": 1.70538315389487, "grad_norm": 3.8847665786743164, "learning_rate": 7.856585083965477e-06, "loss": 0.7452, "step": 6732 }, { "epoch": 1.7056364787840406, "grad_norm": 3.6506965160369873, "learning_rate": 7.85589739779932e-06, "loss": 0.7257, "step": 6733 }, { "epoch": 1.7058898036732109, "grad_norm": 3.6071321964263916, "learning_rate": 7.855209631440904e-06, "loss": 0.8079, "step": 6734 }, { "epoch": 1.7061431285623812, "grad_norm": 3.5854055881500244, "learning_rate": 7.854521784909537e-06, "loss": 0.8451, "step": 6735 }, { "epoch": 1.7063964534515517, "grad_norm": 4.117568492889404, "learning_rate": 7.853833858224537e-06, "loss": 0.8651, "step": 6736 }, { "epoch": 1.706649778340722, "grad_norm": 3.4071497917175293, "learning_rate": 7.853145851405222e-06, "loss": 0.6812, "step": 6737 }, { "epoch": 1.7069031032298922, "grad_norm": 3.7544307708740234, "learning_rate": 7.852457764470907e-06, "loss": 0.7665, "step": 6738 }, { "epoch": 1.7071564281190628, "grad_norm": 3.820354700088501, "learning_rate": 7.851769597440915e-06, "loss": 0.8057, "step": 6739 }, { "epoch": 1.707409753008233, "grad_norm": 3.7876126766204834, "learning_rate": 7.851081350334568e-06, "loss": 0.8439, "step": 6740 }, { "epoch": 1.7076630778974033, "grad_norm": 3.3007097244262695, "learning_rate": 7.850393023171194e-06, "loss": 0.8229, "step": 6741 }, { "epoch": 1.7079164027865739, "grad_norm": 3.5454154014587402, "learning_rate": 7.84970461597012e-06, "loss": 0.7838, "step": 6742 }, { "epoch": 1.7081697276757442, "grad_norm": 3.9048492908477783, "learning_rate": 7.849016128750676e-06, "loss": 0.7753, "step": 6743 }, { "epoch": 1.7084230525649144, "grad_norm": 3.585662841796875, "learning_rate": 7.848327561532194e-06, "loss": 0.7363, "step": 6744 }, { "epoch": 1.708676377454085, "grad_norm": 3.464667558670044, "learning_rate": 7.84763891433401e-06, "loss": 0.7679, "step": 6745 }, { "epoch": 1.7089297023432553, "grad_norm": 3.897409200668335, "learning_rate": 7.846950187175458e-06, "loss": 0.6852, "step": 6746 }, { "epoch": 1.7091830272324255, "grad_norm": 3.634979009628296, "learning_rate": 7.84626138007588e-06, "loss": 0.7761, "step": 6747 }, { "epoch": 1.709436352121596, "grad_norm": 3.673316240310669, "learning_rate": 7.84557249305462e-06, "loss": 0.8161, "step": 6748 }, { "epoch": 1.7096896770107664, "grad_norm": 3.373833656311035, "learning_rate": 7.844883526131014e-06, "loss": 0.7044, "step": 6749 }, { "epoch": 1.7099430018999366, "grad_norm": 3.8875572681427, "learning_rate": 7.84419447932441e-06, "loss": 0.8219, "step": 6750 }, { "epoch": 1.7101963267891072, "grad_norm": 3.942722797393799, "learning_rate": 7.843505352654162e-06, "loss": 0.7947, "step": 6751 }, { "epoch": 1.7104496516782772, "grad_norm": 3.7861433029174805, "learning_rate": 7.842816146139613e-06, "loss": 0.721, "step": 6752 }, { "epoch": 1.7107029765674477, "grad_norm": 3.775155544281006, "learning_rate": 7.842126859800123e-06, "loss": 0.789, "step": 6753 }, { "epoch": 1.7109563014566183, "grad_norm": 3.84187388420105, "learning_rate": 7.841437493655039e-06, "loss": 0.7874, "step": 6754 }, { "epoch": 1.7112096263457883, "grad_norm": 3.837515354156494, "learning_rate": 7.840748047723726e-06, "loss": 0.7639, "step": 6755 }, { "epoch": 1.7114629512349588, "grad_norm": 3.7836356163024902, "learning_rate": 7.840058522025536e-06, "loss": 0.7773, "step": 6756 }, { "epoch": 1.7117162761241294, "grad_norm": 3.677025318145752, "learning_rate": 7.839368916579835e-06, "loss": 0.6485, "step": 6757 }, { "epoch": 1.7119696010132994, "grad_norm": 3.7167978286743164, "learning_rate": 7.838679231405985e-06, "loss": 0.7052, "step": 6758 }, { "epoch": 1.71222292590247, "grad_norm": 3.762216567993164, "learning_rate": 7.837989466523352e-06, "loss": 0.8725, "step": 6759 }, { "epoch": 1.7124762507916402, "grad_norm": 3.482800006866455, "learning_rate": 7.837299621951307e-06, "loss": 0.6542, "step": 6760 }, { "epoch": 1.7127295756808105, "grad_norm": 3.6897640228271484, "learning_rate": 7.836609697709216e-06, "loss": 0.7791, "step": 6761 }, { "epoch": 1.712982900569981, "grad_norm": 3.613353729248047, "learning_rate": 7.835919693816457e-06, "loss": 0.7184, "step": 6762 }, { "epoch": 1.7132362254591513, "grad_norm": 3.526890516281128, "learning_rate": 7.835229610292399e-06, "loss": 0.791, "step": 6763 }, { "epoch": 1.7134895503483216, "grad_norm": 3.9536054134368896, "learning_rate": 7.834539447156424e-06, "loss": 0.7518, "step": 6764 }, { "epoch": 1.7137428752374921, "grad_norm": 3.7822437286376953, "learning_rate": 7.833849204427909e-06, "loss": 0.7601, "step": 6765 }, { "epoch": 1.7139962001266624, "grad_norm": 3.176074266433716, "learning_rate": 7.833158882126237e-06, "loss": 0.6864, "step": 6766 }, { "epoch": 1.7142495250158327, "grad_norm": 3.7379746437072754, "learning_rate": 7.832468480270792e-06, "loss": 0.859, "step": 6767 }, { "epoch": 1.7145028499050032, "grad_norm": 3.548861503601074, "learning_rate": 7.831777998880958e-06, "loss": 0.7918, "step": 6768 }, { "epoch": 1.7147561747941735, "grad_norm": 4.13444185256958, "learning_rate": 7.831087437976127e-06, "loss": 0.8243, "step": 6769 }, { "epoch": 1.7150094996833438, "grad_norm": 3.840388774871826, "learning_rate": 7.830396797575687e-06, "loss": 0.8015, "step": 6770 }, { "epoch": 1.7152628245725143, "grad_norm": 3.7381751537323, "learning_rate": 7.82970607769903e-06, "loss": 0.8026, "step": 6771 }, { "epoch": 1.7155161494616846, "grad_norm": 3.682285785675049, "learning_rate": 7.829015278365555e-06, "loss": 0.7376, "step": 6772 }, { "epoch": 1.715769474350855, "grad_norm": 3.669313430786133, "learning_rate": 7.828324399594655e-06, "loss": 0.7427, "step": 6773 }, { "epoch": 1.7160227992400254, "grad_norm": 3.579893112182617, "learning_rate": 7.827633441405733e-06, "loss": 0.8783, "step": 6774 }, { "epoch": 1.7162761241291957, "grad_norm": 3.3315069675445557, "learning_rate": 7.826942403818187e-06, "loss": 0.7473, "step": 6775 }, { "epoch": 1.716529449018366, "grad_norm": 3.5394914150238037, "learning_rate": 7.826251286851425e-06, "loss": 0.758, "step": 6776 }, { "epoch": 1.7167827739075365, "grad_norm": 3.693305253982544, "learning_rate": 7.825560090524851e-06, "loss": 0.7425, "step": 6777 }, { "epoch": 1.7170360987967068, "grad_norm": 3.6263012886047363, "learning_rate": 7.824868814857873e-06, "loss": 0.7584, "step": 6778 }, { "epoch": 1.7172894236858771, "grad_norm": 3.2938084602355957, "learning_rate": 7.824177459869904e-06, "loss": 0.7826, "step": 6779 }, { "epoch": 1.7175427485750476, "grad_norm": 3.781751871109009, "learning_rate": 7.823486025580355e-06, "loss": 0.8017, "step": 6780 }, { "epoch": 1.7177960734642177, "grad_norm": 3.938286781311035, "learning_rate": 7.822794512008643e-06, "loss": 0.7905, "step": 6781 }, { "epoch": 1.7180493983533882, "grad_norm": 3.3979578018188477, "learning_rate": 7.822102919174182e-06, "loss": 0.8073, "step": 6782 }, { "epoch": 1.7183027232425587, "grad_norm": 3.630275011062622, "learning_rate": 7.821411247096395e-06, "loss": 0.7673, "step": 6783 }, { "epoch": 1.7185560481317288, "grad_norm": 3.248453140258789, "learning_rate": 7.820719495794701e-06, "loss": 0.7601, "step": 6784 }, { "epoch": 1.7188093730208993, "grad_norm": 3.203981876373291, "learning_rate": 7.820027665288527e-06, "loss": 0.7745, "step": 6785 }, { "epoch": 1.7190626979100698, "grad_norm": 3.7932839393615723, "learning_rate": 7.819335755597296e-06, "loss": 0.7866, "step": 6786 }, { "epoch": 1.71931602279924, "grad_norm": 3.8405110836029053, "learning_rate": 7.818643766740442e-06, "loss": 0.6981, "step": 6787 }, { "epoch": 1.7195693476884104, "grad_norm": 3.728456735610962, "learning_rate": 7.81795169873739e-06, "loss": 0.6513, "step": 6788 }, { "epoch": 1.7198226725775807, "grad_norm": 4.308320045471191, "learning_rate": 7.817259551607574e-06, "loss": 0.846, "step": 6789 }, { "epoch": 1.720075997466751, "grad_norm": 3.710430145263672, "learning_rate": 7.816567325370431e-06, "loss": 0.7239, "step": 6790 }, { "epoch": 1.7203293223559215, "grad_norm": 3.8289363384246826, "learning_rate": 7.815875020045398e-06, "loss": 0.8095, "step": 6791 }, { "epoch": 1.7205826472450918, "grad_norm": 3.556525707244873, "learning_rate": 7.815182635651913e-06, "loss": 0.8069, "step": 6792 }, { "epoch": 1.720835972134262, "grad_norm": 3.635162115097046, "learning_rate": 7.81449017220942e-06, "loss": 0.7704, "step": 6793 }, { "epoch": 1.7210892970234326, "grad_norm": 3.9452712535858154, "learning_rate": 7.813797629737361e-06, "loss": 0.792, "step": 6794 }, { "epoch": 1.721342621912603, "grad_norm": 4.019476413726807, "learning_rate": 7.813105008255185e-06, "loss": 0.8714, "step": 6795 }, { "epoch": 1.7215959468017732, "grad_norm": 3.9796302318573, "learning_rate": 7.812412307782338e-06, "loss": 0.8975, "step": 6796 }, { "epoch": 1.7218492716909437, "grad_norm": 3.6698226928710938, "learning_rate": 7.811719528338273e-06, "loss": 0.6774, "step": 6797 }, { "epoch": 1.722102596580114, "grad_norm": 3.5384151935577393, "learning_rate": 7.811026669942439e-06, "loss": 0.8, "step": 6798 }, { "epoch": 1.7223559214692843, "grad_norm": 3.7324986457824707, "learning_rate": 7.810333732614294e-06, "loss": 0.7856, "step": 6799 }, { "epoch": 1.7226092463584548, "grad_norm": 3.710684299468994, "learning_rate": 7.809640716373294e-06, "loss": 0.8713, "step": 6800 }, { "epoch": 1.722862571247625, "grad_norm": 3.9690101146698, "learning_rate": 7.808947621238903e-06, "loss": 0.9102, "step": 6801 }, { "epoch": 1.7231158961367954, "grad_norm": 3.204129457473755, "learning_rate": 7.808254447230576e-06, "loss": 0.7127, "step": 6802 }, { "epoch": 1.723369221025966, "grad_norm": 3.3978586196899414, "learning_rate": 7.807561194367783e-06, "loss": 0.6568, "step": 6803 }, { "epoch": 1.7236225459151362, "grad_norm": 4.004883766174316, "learning_rate": 7.806867862669985e-06, "loss": 0.9047, "step": 6804 }, { "epoch": 1.7238758708043065, "grad_norm": 3.607483148574829, "learning_rate": 7.806174452156654e-06, "loss": 0.6987, "step": 6805 }, { "epoch": 1.724129195693477, "grad_norm": 3.898294448852539, "learning_rate": 7.80548096284726e-06, "loss": 0.7756, "step": 6806 }, { "epoch": 1.7243825205826473, "grad_norm": 3.365597724914551, "learning_rate": 7.804787394761275e-06, "loss": 0.6364, "step": 6807 }, { "epoch": 1.7246358454718176, "grad_norm": 3.672537088394165, "learning_rate": 7.804093747918174e-06, "loss": 0.8562, "step": 6808 }, { "epoch": 1.724889170360988, "grad_norm": 3.7284512519836426, "learning_rate": 7.803400022337435e-06, "loss": 0.7594, "step": 6809 }, { "epoch": 1.7251424952501582, "grad_norm": 3.423053503036499, "learning_rate": 7.802706218038538e-06, "loss": 0.7733, "step": 6810 }, { "epoch": 1.7253958201393287, "grad_norm": 4.533360481262207, "learning_rate": 7.802012335040962e-06, "loss": 0.744, "step": 6811 }, { "epoch": 1.7256491450284992, "grad_norm": 3.670292377471924, "learning_rate": 7.801318373364195e-06, "loss": 0.6766, "step": 6812 }, { "epoch": 1.7259024699176693, "grad_norm": 3.8066697120666504, "learning_rate": 7.80062433302772e-06, "loss": 0.7875, "step": 6813 }, { "epoch": 1.7261557948068398, "grad_norm": 3.981668710708618, "learning_rate": 7.799930214051028e-06, "loss": 0.865, "step": 6814 }, { "epoch": 1.7264091196960103, "grad_norm": 3.672137975692749, "learning_rate": 7.799236016453606e-06, "loss": 0.7883, "step": 6815 }, { "epoch": 1.7266624445851804, "grad_norm": 3.7523081302642822, "learning_rate": 7.798541740254948e-06, "loss": 0.6858, "step": 6816 }, { "epoch": 1.7269157694743509, "grad_norm": 3.5492067337036133, "learning_rate": 7.797847385474552e-06, "loss": 0.7101, "step": 6817 }, { "epoch": 1.7271690943635212, "grad_norm": 3.3900222778320312, "learning_rate": 7.79715295213191e-06, "loss": 0.6167, "step": 6818 }, { "epoch": 1.7274224192526915, "grad_norm": 3.545079231262207, "learning_rate": 7.796458440246525e-06, "loss": 0.8016, "step": 6819 }, { "epoch": 1.727675744141862, "grad_norm": 3.9680957794189453, "learning_rate": 7.795763849837898e-06, "loss": 0.7968, "step": 6820 }, { "epoch": 1.7279290690310323, "grad_norm": 3.618556261062622, "learning_rate": 7.795069180925532e-06, "loss": 0.7006, "step": 6821 }, { "epoch": 1.7281823939202026, "grad_norm": 3.432621955871582, "learning_rate": 7.794374433528935e-06, "loss": 0.6631, "step": 6822 }, { "epoch": 1.728435718809373, "grad_norm": 3.838395833969116, "learning_rate": 7.793679607667612e-06, "loss": 0.9043, "step": 6823 }, { "epoch": 1.7286890436985434, "grad_norm": 3.751610040664673, "learning_rate": 7.792984703361076e-06, "loss": 0.7957, "step": 6824 }, { "epoch": 1.7289423685877137, "grad_norm": 3.9810147285461426, "learning_rate": 7.792289720628838e-06, "loss": 0.7891, "step": 6825 }, { "epoch": 1.7291956934768842, "grad_norm": 3.6827688217163086, "learning_rate": 7.791594659490414e-06, "loss": 0.7953, "step": 6826 }, { "epoch": 1.7294490183660545, "grad_norm": 4.050821304321289, "learning_rate": 7.79089951996532e-06, "loss": 0.9719, "step": 6827 }, { "epoch": 1.7297023432552248, "grad_norm": 3.864492893218994, "learning_rate": 7.790204302073074e-06, "loss": 0.773, "step": 6828 }, { "epoch": 1.7299556681443953, "grad_norm": 3.3809754848480225, "learning_rate": 7.789509005833201e-06, "loss": 0.7368, "step": 6829 }, { "epoch": 1.7302089930335656, "grad_norm": 3.8033883571624756, "learning_rate": 7.78881363126522e-06, "loss": 0.6927, "step": 6830 }, { "epoch": 1.7304623179227359, "grad_norm": 4.084654331207275, "learning_rate": 7.78811817838866e-06, "loss": 0.8477, "step": 6831 }, { "epoch": 1.7307156428119064, "grad_norm": 3.4256844520568848, "learning_rate": 7.787422647223052e-06, "loss": 0.7813, "step": 6832 }, { "epoch": 1.7309689677010767, "grad_norm": 4.229806423187256, "learning_rate": 7.786727037787919e-06, "loss": 0.8722, "step": 6833 }, { "epoch": 1.731222292590247, "grad_norm": 3.398742437362671, "learning_rate": 7.786031350102796e-06, "loss": 0.7392, "step": 6834 }, { "epoch": 1.7314756174794175, "grad_norm": 3.697896957397461, "learning_rate": 7.78533558418722e-06, "loss": 0.8457, "step": 6835 }, { "epoch": 1.7317289423685878, "grad_norm": 3.491771936416626, "learning_rate": 7.784639740060726e-06, "loss": 0.8108, "step": 6836 }, { "epoch": 1.731982267257758, "grad_norm": 4.137701034545898, "learning_rate": 7.783943817742852e-06, "loss": 0.7165, "step": 6837 }, { "epoch": 1.7322355921469286, "grad_norm": 3.52500581741333, "learning_rate": 7.783247817253143e-06, "loss": 0.7965, "step": 6838 }, { "epoch": 1.7324889170360986, "grad_norm": 3.7605082988739014, "learning_rate": 7.782551738611138e-06, "loss": 0.8274, "step": 6839 }, { "epoch": 1.7327422419252692, "grad_norm": 3.7687313556671143, "learning_rate": 7.781855581836384e-06, "loss": 0.9359, "step": 6840 }, { "epoch": 1.7329955668144397, "grad_norm": 3.346748113632202, "learning_rate": 7.781159346948431e-06, "loss": 0.7889, "step": 6841 }, { "epoch": 1.7332488917036097, "grad_norm": 3.6077446937561035, "learning_rate": 7.780463033966824e-06, "loss": 0.7087, "step": 6842 }, { "epoch": 1.7335022165927803, "grad_norm": 4.077489852905273, "learning_rate": 7.779766642911119e-06, "loss": 0.7461, "step": 6843 }, { "epoch": 1.7337555414819505, "grad_norm": 3.5096306800842285, "learning_rate": 7.77907017380087e-06, "loss": 0.7826, "step": 6844 }, { "epoch": 1.7340088663711208, "grad_norm": 3.6896841526031494, "learning_rate": 7.778373626655635e-06, "loss": 0.8887, "step": 6845 }, { "epoch": 1.7342621912602914, "grad_norm": 3.148482322692871, "learning_rate": 7.77767700149497e-06, "loss": 0.6511, "step": 6846 }, { "epoch": 1.7345155161494616, "grad_norm": 3.640227794647217, "learning_rate": 7.776980298338435e-06, "loss": 0.7533, "step": 6847 }, { "epoch": 1.734768841038632, "grad_norm": 4.133090496063232, "learning_rate": 7.776283517205596e-06, "loss": 0.9398, "step": 6848 }, { "epoch": 1.7350221659278025, "grad_norm": 3.8263661861419678, "learning_rate": 7.775586658116015e-06, "loss": 0.895, "step": 6849 }, { "epoch": 1.7352754908169727, "grad_norm": 3.7483906745910645, "learning_rate": 7.774889721089262e-06, "loss": 0.8356, "step": 6850 }, { "epoch": 1.735528815706143, "grad_norm": 3.824211359024048, "learning_rate": 7.77419270614491e-06, "loss": 0.7526, "step": 6851 }, { "epoch": 1.7357821405953135, "grad_norm": 3.5242981910705566, "learning_rate": 7.773495613302522e-06, "loss": 0.7361, "step": 6852 }, { "epoch": 1.7360354654844838, "grad_norm": 4.070830345153809, "learning_rate": 7.77279844258168e-06, "loss": 0.8171, "step": 6853 }, { "epoch": 1.7362887903736541, "grad_norm": 3.4309775829315186, "learning_rate": 7.772101194001955e-06, "loss": 0.7072, "step": 6854 }, { "epoch": 1.7365421152628246, "grad_norm": 4.016650199890137, "learning_rate": 7.77140386758293e-06, "loss": 0.7467, "step": 6855 }, { "epoch": 1.736795440151995, "grad_norm": 3.6484382152557373, "learning_rate": 7.770706463344183e-06, "loss": 0.9306, "step": 6856 }, { "epoch": 1.7370487650411652, "grad_norm": 3.8862953186035156, "learning_rate": 7.770008981305295e-06, "loss": 0.6854, "step": 6857 }, { "epoch": 1.7373020899303357, "grad_norm": 3.322340726852417, "learning_rate": 7.769311421485855e-06, "loss": 0.7536, "step": 6858 }, { "epoch": 1.737555414819506, "grad_norm": 3.605544090270996, "learning_rate": 7.768613783905448e-06, "loss": 0.8643, "step": 6859 }, { "epoch": 1.7378087397086763, "grad_norm": 3.678415536880493, "learning_rate": 7.767916068583662e-06, "loss": 0.7121, "step": 6860 }, { "epoch": 1.7380620645978468, "grad_norm": 3.8269083499908447, "learning_rate": 7.767218275540092e-06, "loss": 0.8628, "step": 6861 }, { "epoch": 1.7383153894870171, "grad_norm": 3.2733418941497803, "learning_rate": 7.766520404794329e-06, "loss": 0.6492, "step": 6862 }, { "epoch": 1.7385687143761874, "grad_norm": 4.225980281829834, "learning_rate": 7.76582245636597e-06, "loss": 0.8772, "step": 6863 }, { "epoch": 1.738822039265358, "grad_norm": 3.494497537612915, "learning_rate": 7.765124430274613e-06, "loss": 0.7491, "step": 6864 }, { "epoch": 1.7390753641545282, "grad_norm": 3.435108184814453, "learning_rate": 7.764426326539855e-06, "loss": 0.6523, "step": 6865 }, { "epoch": 1.7393286890436985, "grad_norm": 3.816903829574585, "learning_rate": 7.763728145181306e-06, "loss": 0.8311, "step": 6866 }, { "epoch": 1.739582013932869, "grad_norm": 3.6468505859375, "learning_rate": 7.763029886218563e-06, "loss": 0.7768, "step": 6867 }, { "epoch": 1.7398353388220391, "grad_norm": 3.1998164653778076, "learning_rate": 7.762331549671237e-06, "loss": 0.6335, "step": 6868 }, { "epoch": 1.7400886637112096, "grad_norm": 4.0369791984558105, "learning_rate": 7.761633135558935e-06, "loss": 0.7975, "step": 6869 }, { "epoch": 1.7403419886003801, "grad_norm": 3.863938093185425, "learning_rate": 7.760934643901269e-06, "loss": 0.7428, "step": 6870 }, { "epoch": 1.7405953134895502, "grad_norm": 3.4053783416748047, "learning_rate": 7.760236074717853e-06, "loss": 0.639, "step": 6871 }, { "epoch": 1.7408486383787207, "grad_norm": 3.5806350708007812, "learning_rate": 7.759537428028302e-06, "loss": 0.6776, "step": 6872 }, { "epoch": 1.741101963267891, "grad_norm": 3.7650678157806396, "learning_rate": 7.75883870385223e-06, "loss": 0.7025, "step": 6873 }, { "epoch": 1.7413552881570613, "grad_norm": 3.6960256099700928, "learning_rate": 7.758139902209262e-06, "loss": 0.6717, "step": 6874 }, { "epoch": 1.7416086130462318, "grad_norm": 3.6001505851745605, "learning_rate": 7.757441023119019e-06, "loss": 0.8484, "step": 6875 }, { "epoch": 1.7418619379354021, "grad_norm": 3.406071662902832, "learning_rate": 7.756742066601125e-06, "loss": 0.7173, "step": 6876 }, { "epoch": 1.7421152628245724, "grad_norm": 3.5370805263519287, "learning_rate": 7.756043032675205e-06, "loss": 0.7778, "step": 6877 }, { "epoch": 1.742368587713743, "grad_norm": 3.511587142944336, "learning_rate": 7.755343921360887e-06, "loss": 0.7654, "step": 6878 }, { "epoch": 1.7426219126029132, "grad_norm": 3.672236442565918, "learning_rate": 7.754644732677805e-06, "loss": 0.744, "step": 6879 }, { "epoch": 1.7428752374920835, "grad_norm": 3.903546094894409, "learning_rate": 7.753945466645589e-06, "loss": 0.7901, "step": 6880 }, { "epoch": 1.743128562381254, "grad_norm": 3.4485881328582764, "learning_rate": 7.753246123283875e-06, "loss": 0.7518, "step": 6881 }, { "epoch": 1.7433818872704243, "grad_norm": 3.6792099475860596, "learning_rate": 7.752546702612302e-06, "loss": 0.8035, "step": 6882 }, { "epoch": 1.7436352121595946, "grad_norm": 3.692992925643921, "learning_rate": 7.751847204650505e-06, "loss": 0.7208, "step": 6883 }, { "epoch": 1.7438885370487651, "grad_norm": 3.7795684337615967, "learning_rate": 7.75114762941813e-06, "loss": 0.7926, "step": 6884 }, { "epoch": 1.7441418619379354, "grad_norm": 4.182426452636719, "learning_rate": 7.750447976934818e-06, "loss": 0.8511, "step": 6885 }, { "epoch": 1.7443951868271057, "grad_norm": 3.8321166038513184, "learning_rate": 7.749748247220217e-06, "loss": 0.862, "step": 6886 }, { "epoch": 1.7446485117162762, "grad_norm": 3.795067310333252, "learning_rate": 7.749048440293973e-06, "loss": 0.8019, "step": 6887 }, { "epoch": 1.7449018366054465, "grad_norm": 3.5560405254364014, "learning_rate": 7.748348556175738e-06, "loss": 0.764, "step": 6888 }, { "epoch": 1.7451551614946168, "grad_norm": 3.8307154178619385, "learning_rate": 7.747648594885162e-06, "loss": 0.7913, "step": 6889 }, { "epoch": 1.7454084863837873, "grad_norm": 3.801604747772217, "learning_rate": 7.746948556441903e-06, "loss": 0.8011, "step": 6890 }, { "epoch": 1.7456618112729576, "grad_norm": 3.8079586029052734, "learning_rate": 7.746248440865616e-06, "loss": 0.6707, "step": 6891 }, { "epoch": 1.745915136162128, "grad_norm": 3.6608381271362305, "learning_rate": 7.745548248175958e-06, "loss": 0.718, "step": 6892 }, { "epoch": 1.7461684610512984, "grad_norm": 3.5695316791534424, "learning_rate": 7.744847978392593e-06, "loss": 0.6785, "step": 6893 }, { "epoch": 1.7464217859404685, "grad_norm": 4.088541030883789, "learning_rate": 7.744147631535183e-06, "loss": 0.7726, "step": 6894 }, { "epoch": 1.746675110829639, "grad_norm": 3.5368990898132324, "learning_rate": 7.743447207623394e-06, "loss": 0.8156, "step": 6895 }, { "epoch": 1.7469284357188095, "grad_norm": 4.152857303619385, "learning_rate": 7.742746706676893e-06, "loss": 0.7564, "step": 6896 }, { "epoch": 1.7471817606079796, "grad_norm": 3.8850326538085938, "learning_rate": 7.742046128715351e-06, "loss": 0.8044, "step": 6897 }, { "epoch": 1.74743508549715, "grad_norm": 3.511019468307495, "learning_rate": 7.741345473758438e-06, "loss": 0.688, "step": 6898 }, { "epoch": 1.7476884103863206, "grad_norm": 3.9197492599487305, "learning_rate": 7.740644741825828e-06, "loss": 0.9111, "step": 6899 }, { "epoch": 1.7479417352754907, "grad_norm": 3.8701112270355225, "learning_rate": 7.739943932937199e-06, "loss": 0.6694, "step": 6900 }, { "epoch": 1.7481950601646612, "grad_norm": 3.6412601470947266, "learning_rate": 7.739243047112228e-06, "loss": 0.6512, "step": 6901 }, { "epoch": 1.7484483850538315, "grad_norm": 4.168801307678223, "learning_rate": 7.738542084370598e-06, "loss": 0.841, "step": 6902 }, { "epoch": 1.7487017099430018, "grad_norm": 3.493496894836426, "learning_rate": 7.737841044731987e-06, "loss": 0.7524, "step": 6903 }, { "epoch": 1.7489550348321723, "grad_norm": 4.134896278381348, "learning_rate": 7.737139928216084e-06, "loss": 0.968, "step": 6904 }, { "epoch": 1.7492083597213426, "grad_norm": 3.665043592453003, "learning_rate": 7.736438734842574e-06, "loss": 0.8285, "step": 6905 }, { "epoch": 1.7494616846105129, "grad_norm": 4.110346794128418, "learning_rate": 7.735737464631149e-06, "loss": 0.8057, "step": 6906 }, { "epoch": 1.7497150094996834, "grad_norm": 3.8197755813598633, "learning_rate": 7.735036117601495e-06, "loss": 0.9154, "step": 6907 }, { "epoch": 1.7499683343888537, "grad_norm": 3.5153720378875732, "learning_rate": 7.73433469377331e-06, "loss": 0.7457, "step": 6908 }, { "epoch": 1.750221659278024, "grad_norm": 3.454111099243164, "learning_rate": 7.73363319316629e-06, "loss": 0.7152, "step": 6909 }, { "epoch": 1.7504749841671945, "grad_norm": 3.6645593643188477, "learning_rate": 7.73293161580013e-06, "loss": 0.8098, "step": 6910 }, { "epoch": 1.7507283090563648, "grad_norm": 3.816690444946289, "learning_rate": 7.732229961694531e-06, "loss": 0.9133, "step": 6911 }, { "epoch": 1.750981633945535, "grad_norm": 3.9455504417419434, "learning_rate": 7.731528230869194e-06, "loss": 0.8381, "step": 6912 }, { "epoch": 1.7512349588347056, "grad_norm": 3.983278751373291, "learning_rate": 7.730826423343825e-06, "loss": 0.8214, "step": 6913 }, { "epoch": 1.7514882837238759, "grad_norm": 3.3927857875823975, "learning_rate": 7.73012453913813e-06, "loss": 0.7316, "step": 6914 }, { "epoch": 1.7517416086130462, "grad_norm": 3.4403159618377686, "learning_rate": 7.729422578271818e-06, "loss": 0.7346, "step": 6915 }, { "epoch": 1.7519949335022167, "grad_norm": 3.7879765033721924, "learning_rate": 7.728720540764601e-06, "loss": 0.8101, "step": 6916 }, { "epoch": 1.752248258391387, "grad_norm": 3.7558810710906982, "learning_rate": 7.728018426636188e-06, "loss": 0.6966, "step": 6917 }, { "epoch": 1.7525015832805573, "grad_norm": 3.143333911895752, "learning_rate": 7.727316235906294e-06, "loss": 0.7613, "step": 6918 }, { "epoch": 1.7527549081697278, "grad_norm": 4.484152317047119, "learning_rate": 7.726613968594642e-06, "loss": 0.9042, "step": 6919 }, { "epoch": 1.753008233058898, "grad_norm": 3.9656481742858887, "learning_rate": 7.725911624720946e-06, "loss": 0.7662, "step": 6920 }, { "epoch": 1.7532615579480684, "grad_norm": 3.409182548522949, "learning_rate": 7.72520920430493e-06, "loss": 0.7463, "step": 6921 }, { "epoch": 1.7535148828372389, "grad_norm": 3.8630242347717285, "learning_rate": 7.724506707366317e-06, "loss": 0.8621, "step": 6922 }, { "epoch": 1.753768207726409, "grad_norm": 3.331718921661377, "learning_rate": 7.72380413392483e-06, "loss": 0.8027, "step": 6923 }, { "epoch": 1.7540215326155795, "grad_norm": 3.4763405323028564, "learning_rate": 7.723101484000201e-06, "loss": 0.751, "step": 6924 }, { "epoch": 1.75427485750475, "grad_norm": 3.9139089584350586, "learning_rate": 7.72239875761216e-06, "loss": 0.7899, "step": 6925 }, { "epoch": 1.75452818239392, "grad_norm": 3.5342273712158203, "learning_rate": 7.721695954780436e-06, "loss": 0.7511, "step": 6926 }, { "epoch": 1.7547815072830906, "grad_norm": 3.237011194229126, "learning_rate": 7.720993075524768e-06, "loss": 0.8379, "step": 6927 }, { "epoch": 1.755034832172261, "grad_norm": 3.958935499191284, "learning_rate": 7.720290119864887e-06, "loss": 0.722, "step": 6928 }, { "epoch": 1.7552881570614312, "grad_norm": 3.603102684020996, "learning_rate": 7.719587087820534e-06, "loss": 0.7568, "step": 6929 }, { "epoch": 1.7555414819506017, "grad_norm": 4.152163982391357, "learning_rate": 7.71888397941145e-06, "loss": 0.832, "step": 6930 }, { "epoch": 1.755794806839772, "grad_norm": 3.8672590255737305, "learning_rate": 7.718180794657382e-06, "loss": 0.7951, "step": 6931 }, { "epoch": 1.7560481317289423, "grad_norm": 3.665332078933716, "learning_rate": 7.717477533578069e-06, "loss": 0.7543, "step": 6932 }, { "epoch": 1.7563014566181128, "grad_norm": 3.344583749771118, "learning_rate": 7.716774196193259e-06, "loss": 0.7054, "step": 6933 }, { "epoch": 1.756554781507283, "grad_norm": 3.377333164215088, "learning_rate": 7.716070782522703e-06, "loss": 0.6812, "step": 6934 }, { "epoch": 1.7568081063964534, "grad_norm": 4.1590189933776855, "learning_rate": 7.715367292586153e-06, "loss": 0.8372, "step": 6935 }, { "epoch": 1.7570614312856239, "grad_norm": 3.581063985824585, "learning_rate": 7.714663726403363e-06, "loss": 0.8029, "step": 6936 }, { "epoch": 1.7573147561747942, "grad_norm": 3.578697443008423, "learning_rate": 7.713960083994088e-06, "loss": 0.7836, "step": 6937 }, { "epoch": 1.7575680810639644, "grad_norm": 3.8199892044067383, "learning_rate": 7.713256365378085e-06, "loss": 0.7364, "step": 6938 }, { "epoch": 1.757821405953135, "grad_norm": 3.588397264480591, "learning_rate": 7.712552570575114e-06, "loss": 0.8094, "step": 6939 }, { "epoch": 1.7580747308423053, "grad_norm": 3.999547004699707, "learning_rate": 7.711848699604941e-06, "loss": 0.7968, "step": 6940 }, { "epoch": 1.7583280557314755, "grad_norm": 3.7411811351776123, "learning_rate": 7.711144752487325e-06, "loss": 0.7764, "step": 6941 }, { "epoch": 1.758581380620646, "grad_norm": 4.176236152648926, "learning_rate": 7.710440729242034e-06, "loss": 0.8102, "step": 6942 }, { "epoch": 1.7588347055098164, "grad_norm": 4.13679838180542, "learning_rate": 7.70973662988884e-06, "loss": 0.8539, "step": 6943 }, { "epoch": 1.7590880303989866, "grad_norm": 3.5241591930389404, "learning_rate": 7.70903245444751e-06, "loss": 0.7768, "step": 6944 }, { "epoch": 1.7593413552881572, "grad_norm": 3.9646356105804443, "learning_rate": 7.70832820293782e-06, "loss": 0.8012, "step": 6945 }, { "epoch": 1.7595946801773275, "grad_norm": 3.668579339981079, "learning_rate": 7.707623875379542e-06, "loss": 0.6945, "step": 6946 }, { "epoch": 1.7598480050664977, "grad_norm": 3.9900786876678467, "learning_rate": 7.706919471792455e-06, "loss": 0.7609, "step": 6947 }, { "epoch": 1.7601013299556683, "grad_norm": 3.8448877334594727, "learning_rate": 7.706214992196338e-06, "loss": 0.7881, "step": 6948 }, { "epoch": 1.7603546548448386, "grad_norm": 3.9840433597564697, "learning_rate": 7.705510436610973e-06, "loss": 0.7961, "step": 6949 }, { "epoch": 1.7606079797340088, "grad_norm": 3.714884042739868, "learning_rate": 7.70480580505614e-06, "loss": 0.8159, "step": 6950 }, { "epoch": 1.7608613046231794, "grad_norm": 3.951197624206543, "learning_rate": 7.70410109755163e-06, "loss": 0.8104, "step": 6951 }, { "epoch": 1.7611146295123494, "grad_norm": 3.7357263565063477, "learning_rate": 7.703396314117229e-06, "loss": 0.8794, "step": 6952 }, { "epoch": 1.76136795440152, "grad_norm": 3.921632766723633, "learning_rate": 7.702691454772727e-06, "loss": 0.7692, "step": 6953 }, { "epoch": 1.7616212792906905, "grad_norm": 3.5576512813568115, "learning_rate": 7.701986519537914e-06, "loss": 0.728, "step": 6954 }, { "epoch": 1.7618746041798605, "grad_norm": 3.580655813217163, "learning_rate": 7.701281508432587e-06, "loss": 0.8122, "step": 6955 }, { "epoch": 1.762127929069031, "grad_norm": 3.6133105754852295, "learning_rate": 7.70057642147654e-06, "loss": 0.8181, "step": 6956 }, { "epoch": 1.7623812539582016, "grad_norm": 3.7095234394073486, "learning_rate": 7.699871258689574e-06, "loss": 0.8043, "step": 6957 }, { "epoch": 1.7626345788473716, "grad_norm": 4.261842250823975, "learning_rate": 7.699166020091489e-06, "loss": 0.7951, "step": 6958 }, { "epoch": 1.7628879037365421, "grad_norm": 3.069305896759033, "learning_rate": 7.698460705702085e-06, "loss": 0.7193, "step": 6959 }, { "epoch": 1.7631412286257124, "grad_norm": 3.832467555999756, "learning_rate": 7.69775531554117e-06, "loss": 0.7279, "step": 6960 }, { "epoch": 1.7633945535148827, "grad_norm": 3.7485153675079346, "learning_rate": 7.697049849628551e-06, "loss": 0.7454, "step": 6961 }, { "epoch": 1.7636478784040532, "grad_norm": 3.542663335800171, "learning_rate": 7.696344307984034e-06, "loss": 0.755, "step": 6962 }, { "epoch": 1.7639012032932235, "grad_norm": 3.6913812160491943, "learning_rate": 7.695638690627435e-06, "loss": 0.8295, "step": 6963 }, { "epoch": 1.7641545281823938, "grad_norm": 3.591423749923706, "learning_rate": 7.694932997578565e-06, "loss": 0.7852, "step": 6964 }, { "epoch": 1.7644078530715643, "grad_norm": 3.6839447021484375, "learning_rate": 7.694227228857239e-06, "loss": 0.7416, "step": 6965 }, { "epoch": 1.7646611779607346, "grad_norm": 3.771757125854492, "learning_rate": 7.693521384483274e-06, "loss": 0.7232, "step": 6966 }, { "epoch": 1.764914502849905, "grad_norm": 3.684927463531494, "learning_rate": 7.692815464476491e-06, "loss": 0.7959, "step": 6967 }, { "epoch": 1.7651678277390754, "grad_norm": 3.483133554458618, "learning_rate": 7.692109468856712e-06, "loss": 0.825, "step": 6968 }, { "epoch": 1.7654211526282457, "grad_norm": 3.5631825923919678, "learning_rate": 7.691403397643761e-06, "loss": 0.8089, "step": 6969 }, { "epoch": 1.765674477517416, "grad_norm": 3.9040019512176514, "learning_rate": 7.690697250857465e-06, "loss": 0.8031, "step": 6970 }, { "epoch": 1.7659278024065865, "grad_norm": 4.075888156890869, "learning_rate": 7.68999102851765e-06, "loss": 0.7231, "step": 6971 }, { "epoch": 1.7661811272957568, "grad_norm": 3.605731964111328, "learning_rate": 7.689284730644148e-06, "loss": 0.8215, "step": 6972 }, { "epoch": 1.7664344521849271, "grad_norm": 3.3009860515594482, "learning_rate": 7.688578357256792e-06, "loss": 0.7531, "step": 6973 }, { "epoch": 1.7666877770740976, "grad_norm": 3.64111328125, "learning_rate": 7.687871908375414e-06, "loss": 0.8276, "step": 6974 }, { "epoch": 1.766941101963268, "grad_norm": 3.633634567260742, "learning_rate": 7.687165384019855e-06, "loss": 0.7103, "step": 6975 }, { "epoch": 1.7671944268524382, "grad_norm": 3.75003981590271, "learning_rate": 7.68645878420995e-06, "loss": 0.6741, "step": 6976 }, { "epoch": 1.7674477517416087, "grad_norm": 3.559645414352417, "learning_rate": 7.685752108965541e-06, "loss": 0.7214, "step": 6977 }, { "epoch": 1.767701076630779, "grad_norm": 3.410543918609619, "learning_rate": 7.685045358306473e-06, "loss": 0.8695, "step": 6978 }, { "epoch": 1.7679544015199493, "grad_norm": 3.690166473388672, "learning_rate": 7.68433853225259e-06, "loss": 0.645, "step": 6979 }, { "epoch": 1.7682077264091198, "grad_norm": 3.7979559898376465, "learning_rate": 7.683631630823737e-06, "loss": 0.9006, "step": 6980 }, { "epoch": 1.76846105129829, "grad_norm": 3.594649076461792, "learning_rate": 7.682924654039768e-06, "loss": 0.7457, "step": 6981 }, { "epoch": 1.7687143761874604, "grad_norm": 3.811100721359253, "learning_rate": 7.682217601920529e-06, "loss": 0.7932, "step": 6982 }, { "epoch": 1.768967701076631, "grad_norm": 3.8262460231781006, "learning_rate": 7.68151047448588e-06, "loss": 0.9066, "step": 6983 }, { "epoch": 1.769221025965801, "grad_norm": 3.9167635440826416, "learning_rate": 7.680803271755672e-06, "loss": 0.797, "step": 6984 }, { "epoch": 1.7694743508549715, "grad_norm": 3.7758123874664307, "learning_rate": 7.680095993749763e-06, "loss": 0.7997, "step": 6985 }, { "epoch": 1.769727675744142, "grad_norm": 3.794173240661621, "learning_rate": 7.679388640488017e-06, "loss": 0.7015, "step": 6986 }, { "epoch": 1.769981000633312, "grad_norm": 3.8901114463806152, "learning_rate": 7.678681211990293e-06, "loss": 0.7464, "step": 6987 }, { "epoch": 1.7702343255224826, "grad_norm": 3.6819403171539307, "learning_rate": 7.677973708276456e-06, "loss": 0.7785, "step": 6988 }, { "epoch": 1.770487650411653, "grad_norm": 3.651333808898926, "learning_rate": 7.677266129366374e-06, "loss": 0.7664, "step": 6989 }, { "epoch": 1.7707409753008232, "grad_norm": 3.645397663116455, "learning_rate": 7.676558475279911e-06, "loss": 0.923, "step": 6990 }, { "epoch": 1.7709943001899937, "grad_norm": 3.6749892234802246, "learning_rate": 7.675850746036942e-06, "loss": 0.827, "step": 6991 }, { "epoch": 1.771247625079164, "grad_norm": 3.93105411529541, "learning_rate": 7.67514294165734e-06, "loss": 0.8064, "step": 6992 }, { "epoch": 1.7715009499683343, "grad_norm": 3.9308536052703857, "learning_rate": 7.674435062160974e-06, "loss": 0.7711, "step": 6993 }, { "epoch": 1.7717542748575048, "grad_norm": 3.6619958877563477, "learning_rate": 7.673727107567727e-06, "loss": 0.8562, "step": 6994 }, { "epoch": 1.772007599746675, "grad_norm": 3.6865439414978027, "learning_rate": 7.673019077897474e-06, "loss": 0.6428, "step": 6995 }, { "epoch": 1.7722609246358454, "grad_norm": 3.309852361679077, "learning_rate": 7.6723109731701e-06, "loss": 0.7573, "step": 6996 }, { "epoch": 1.772514249525016, "grad_norm": 3.565255880355835, "learning_rate": 7.671602793405487e-06, "loss": 0.7421, "step": 6997 }, { "epoch": 1.7727675744141862, "grad_norm": 3.638303518295288, "learning_rate": 7.67089453862352e-06, "loss": 0.7285, "step": 6998 }, { "epoch": 1.7730208993033565, "grad_norm": 3.856306552886963, "learning_rate": 7.670186208844084e-06, "loss": 0.8371, "step": 6999 }, { "epoch": 1.773274224192527, "grad_norm": 3.933645009994507, "learning_rate": 7.669477804087073e-06, "loss": 0.7924, "step": 7000 }, { "epoch": 1.773274224192527, "eval_loss": 1.1572972536087036, "eval_runtime": 13.8931, "eval_samples_per_second": 28.791, "eval_steps_per_second": 3.599, "step": 7000 }, { "epoch": 1.7735275490816973, "grad_norm": 3.429882526397705, "learning_rate": 7.668769324372374e-06, "loss": 0.7985, "step": 7001 }, { "epoch": 1.7737808739708676, "grad_norm": 3.635460615158081, "learning_rate": 7.668060769719885e-06, "loss": 0.8965, "step": 7002 }, { "epoch": 1.774034198860038, "grad_norm": 3.731748580932617, "learning_rate": 7.6673521401495e-06, "loss": 0.8512, "step": 7003 }, { "epoch": 1.7742875237492084, "grad_norm": 3.781571865081787, "learning_rate": 7.666643435681117e-06, "loss": 0.8569, "step": 7004 }, { "epoch": 1.7745408486383787, "grad_norm": 3.891633987426758, "learning_rate": 7.665934656334633e-06, "loss": 0.85, "step": 7005 }, { "epoch": 1.7747941735275492, "grad_norm": 3.5965399742126465, "learning_rate": 7.665225802129956e-06, "loss": 0.7723, "step": 7006 }, { "epoch": 1.7750474984167195, "grad_norm": 3.403123378753662, "learning_rate": 7.664516873086987e-06, "loss": 0.7731, "step": 7007 }, { "epoch": 1.7753008233058898, "grad_norm": 3.796185255050659, "learning_rate": 7.663807869225634e-06, "loss": 0.9992, "step": 7008 }, { "epoch": 1.7755541481950603, "grad_norm": 3.8913798332214355, "learning_rate": 7.663098790565803e-06, "loss": 0.7524, "step": 7009 }, { "epoch": 1.7758074730842304, "grad_norm": 3.4342589378356934, "learning_rate": 7.662389637127408e-06, "loss": 0.6338, "step": 7010 }, { "epoch": 1.7760607979734009, "grad_norm": 3.7228524684906006, "learning_rate": 7.661680408930358e-06, "loss": 0.8741, "step": 7011 }, { "epoch": 1.7763141228625714, "grad_norm": 3.449136972427368, "learning_rate": 7.66097110599457e-06, "loss": 0.7831, "step": 7012 }, { "epoch": 1.7765674477517415, "grad_norm": 3.6066532135009766, "learning_rate": 7.660261728339962e-06, "loss": 0.6429, "step": 7013 }, { "epoch": 1.776820772640912, "grad_norm": 4.536827087402344, "learning_rate": 7.65955227598645e-06, "loss": 0.9473, "step": 7014 }, { "epoch": 1.7770740975300825, "grad_norm": 3.375558853149414, "learning_rate": 7.658842748953957e-06, "loss": 0.7842, "step": 7015 }, { "epoch": 1.7773274224192526, "grad_norm": 3.7360715866088867, "learning_rate": 7.658133147262406e-06, "loss": 0.7374, "step": 7016 }, { "epoch": 1.777580747308423, "grad_norm": 3.8486199378967285, "learning_rate": 7.657423470931721e-06, "loss": 0.8768, "step": 7017 }, { "epoch": 1.7778340721975934, "grad_norm": 4.047220706939697, "learning_rate": 7.656713719981832e-06, "loss": 0.8172, "step": 7018 }, { "epoch": 1.7780873970867637, "grad_norm": 3.783325672149658, "learning_rate": 7.656003894432666e-06, "loss": 0.8239, "step": 7019 }, { "epoch": 1.7783407219759342, "grad_norm": 3.9551491737365723, "learning_rate": 7.655293994304154e-06, "loss": 0.8671, "step": 7020 }, { "epoch": 1.7785940468651045, "grad_norm": 3.8351070880889893, "learning_rate": 7.654584019616234e-06, "loss": 0.7545, "step": 7021 }, { "epoch": 1.7788473717542748, "grad_norm": 3.5610098838806152, "learning_rate": 7.653873970388836e-06, "loss": 0.7242, "step": 7022 }, { "epoch": 1.7791006966434453, "grad_norm": 3.9424612522125244, "learning_rate": 7.653163846641903e-06, "loss": 0.7859, "step": 7023 }, { "epoch": 1.7793540215326156, "grad_norm": 3.8060503005981445, "learning_rate": 7.652453648395373e-06, "loss": 0.7809, "step": 7024 }, { "epoch": 1.7796073464217859, "grad_norm": 3.557339668273926, "learning_rate": 7.651743375669184e-06, "loss": 0.7623, "step": 7025 }, { "epoch": 1.7798606713109564, "grad_norm": 3.927065849304199, "learning_rate": 7.651033028483287e-06, "loss": 0.8165, "step": 7026 }, { "epoch": 1.7801139962001267, "grad_norm": 4.229694366455078, "learning_rate": 7.650322606857625e-06, "loss": 0.8584, "step": 7027 }, { "epoch": 1.780367321089297, "grad_norm": 4.551065444946289, "learning_rate": 7.649612110812145e-06, "loss": 0.8644, "step": 7028 }, { "epoch": 1.7806206459784675, "grad_norm": 4.079436779022217, "learning_rate": 7.6489015403668e-06, "loss": 0.7658, "step": 7029 }, { "epoch": 1.7808739708676378, "grad_norm": 3.6987128257751465, "learning_rate": 7.64819089554154e-06, "loss": 0.7306, "step": 7030 }, { "epoch": 1.781127295756808, "grad_norm": 3.443990707397461, "learning_rate": 7.647480176356321e-06, "loss": 0.837, "step": 7031 }, { "epoch": 1.7813806206459786, "grad_norm": 3.928731918334961, "learning_rate": 7.6467693828311e-06, "loss": 0.8902, "step": 7032 }, { "epoch": 1.7816339455351489, "grad_norm": 3.5158531665802, "learning_rate": 7.646058514985837e-06, "loss": 0.7547, "step": 7033 }, { "epoch": 1.7818872704243192, "grad_norm": 3.7430777549743652, "learning_rate": 7.64534757284049e-06, "loss": 0.8344, "step": 7034 }, { "epoch": 1.7821405953134897, "grad_norm": 3.3484010696411133, "learning_rate": 7.644636556415021e-06, "loss": 0.7328, "step": 7035 }, { "epoch": 1.78239392020266, "grad_norm": 3.6423325538635254, "learning_rate": 7.643925465729399e-06, "loss": 0.8076, "step": 7036 }, { "epoch": 1.7826472450918303, "grad_norm": 3.464024305343628, "learning_rate": 7.643214300803587e-06, "loss": 0.8435, "step": 7037 }, { "epoch": 1.7829005699810008, "grad_norm": 3.8605098724365234, "learning_rate": 7.642503061657558e-06, "loss": 0.72, "step": 7038 }, { "epoch": 1.7831538948701708, "grad_norm": 4.1303277015686035, "learning_rate": 7.64179174831128e-06, "loss": 0.8559, "step": 7039 }, { "epoch": 1.7834072197593414, "grad_norm": 3.7270262241363525, "learning_rate": 7.64108036078473e-06, "loss": 0.8068, "step": 7040 }, { "epoch": 1.7836605446485119, "grad_norm": 3.541299819946289, "learning_rate": 7.640368899097879e-06, "loss": 0.9051, "step": 7041 }, { "epoch": 1.783913869537682, "grad_norm": 3.303861141204834, "learning_rate": 7.639657363270708e-06, "loss": 0.7302, "step": 7042 }, { "epoch": 1.7841671944268525, "grad_norm": 3.3426244258880615, "learning_rate": 7.638945753323197e-06, "loss": 0.7338, "step": 7043 }, { "epoch": 1.7844205193160227, "grad_norm": 3.6744649410247803, "learning_rate": 7.638234069275324e-06, "loss": 0.7901, "step": 7044 }, { "epoch": 1.784673844205193, "grad_norm": 3.590378522872925, "learning_rate": 7.637522311147075e-06, "loss": 0.76, "step": 7045 }, { "epoch": 1.7849271690943636, "grad_norm": 4.428488254547119, "learning_rate": 7.636810478958434e-06, "loss": 0.8213, "step": 7046 }, { "epoch": 1.7851804939835338, "grad_norm": 3.6275784969329834, "learning_rate": 7.636098572729392e-06, "loss": 0.6797, "step": 7047 }, { "epoch": 1.7854338188727041, "grad_norm": 3.5639266967773438, "learning_rate": 7.63538659247994e-06, "loss": 0.6374, "step": 7048 }, { "epoch": 1.7856871437618747, "grad_norm": 3.8096024990081787, "learning_rate": 7.634674538230065e-06, "loss": 0.8411, "step": 7049 }, { "epoch": 1.785940468651045, "grad_norm": 3.4755046367645264, "learning_rate": 7.633962409999765e-06, "loss": 0.7631, "step": 7050 }, { "epoch": 1.7861937935402152, "grad_norm": 3.6860246658325195, "learning_rate": 7.633250207809034e-06, "loss": 0.8002, "step": 7051 }, { "epoch": 1.7864471184293857, "grad_norm": 3.7806756496429443, "learning_rate": 7.632537931677871e-06, "loss": 0.7146, "step": 7052 }, { "epoch": 1.786700443318556, "grad_norm": 3.540724039077759, "learning_rate": 7.631825581626278e-06, "loss": 0.7278, "step": 7053 }, { "epoch": 1.7869537682077263, "grad_norm": 3.7162272930145264, "learning_rate": 7.631113157674254e-06, "loss": 0.7267, "step": 7054 }, { "epoch": 1.7872070930968968, "grad_norm": 4.121738910675049, "learning_rate": 7.630400659841807e-06, "loss": 0.9696, "step": 7055 }, { "epoch": 1.7874604179860671, "grad_norm": 3.655700206756592, "learning_rate": 7.629688088148943e-06, "loss": 0.7802, "step": 7056 }, { "epoch": 1.7877137428752374, "grad_norm": 3.860203742980957, "learning_rate": 7.628975442615669e-06, "loss": 0.8842, "step": 7057 }, { "epoch": 1.787967067764408, "grad_norm": 3.4483695030212402, "learning_rate": 7.628262723261999e-06, "loss": 0.7747, "step": 7058 }, { "epoch": 1.7882203926535782, "grad_norm": 3.793168544769287, "learning_rate": 7.627549930107941e-06, "loss": 0.8551, "step": 7059 }, { "epoch": 1.7884737175427485, "grad_norm": 3.664818286895752, "learning_rate": 7.626837063173514e-06, "loss": 0.8862, "step": 7060 }, { "epoch": 1.788727042431919, "grad_norm": 3.545707941055298, "learning_rate": 7.626124122478731e-06, "loss": 0.7819, "step": 7061 }, { "epoch": 1.7889803673210893, "grad_norm": 3.70879864692688, "learning_rate": 7.625411108043616e-06, "loss": 0.8376, "step": 7062 }, { "epoch": 1.7892336922102596, "grad_norm": 3.9669227600097656, "learning_rate": 7.624698019888186e-06, "loss": 0.9086, "step": 7063 }, { "epoch": 1.7894870170994301, "grad_norm": 3.8559978008270264, "learning_rate": 7.623984858032467e-06, "loss": 0.9308, "step": 7064 }, { "epoch": 1.7897403419886002, "grad_norm": 4.075738430023193, "learning_rate": 7.623271622496482e-06, "loss": 0.9201, "step": 7065 }, { "epoch": 1.7899936668777707, "grad_norm": 3.997243642807007, "learning_rate": 7.622558313300259e-06, "loss": 0.7588, "step": 7066 }, { "epoch": 1.7902469917669412, "grad_norm": 3.4122109413146973, "learning_rate": 7.621844930463829e-06, "loss": 0.7998, "step": 7067 }, { "epoch": 1.7905003166561113, "grad_norm": 3.753685712814331, "learning_rate": 7.621131474007222e-06, "loss": 0.8078, "step": 7068 }, { "epoch": 1.7907536415452818, "grad_norm": 3.4711172580718994, "learning_rate": 7.620417943950469e-06, "loss": 0.8868, "step": 7069 }, { "epoch": 1.7910069664344523, "grad_norm": 3.7753467559814453, "learning_rate": 7.619704340313611e-06, "loss": 0.7757, "step": 7070 }, { "epoch": 1.7912602913236224, "grad_norm": 3.4311575889587402, "learning_rate": 7.618990663116681e-06, "loss": 0.8055, "step": 7071 }, { "epoch": 1.791513616212793, "grad_norm": 3.5160038471221924, "learning_rate": 7.618276912379723e-06, "loss": 0.6976, "step": 7072 }, { "epoch": 1.7917669411019632, "grad_norm": 4.61091947555542, "learning_rate": 7.617563088122775e-06, "loss": 0.741, "step": 7073 }, { "epoch": 1.7920202659911335, "grad_norm": 3.6943416595458984, "learning_rate": 7.616849190365882e-06, "loss": 0.7657, "step": 7074 }, { "epoch": 1.792273590880304, "grad_norm": 3.6395230293273926, "learning_rate": 7.616135219129093e-06, "loss": 0.7429, "step": 7075 }, { "epoch": 1.7925269157694743, "grad_norm": 3.929011821746826, "learning_rate": 7.615421174432449e-06, "loss": 0.7616, "step": 7076 }, { "epoch": 1.7927802406586446, "grad_norm": 4.091475009918213, "learning_rate": 7.614707056296008e-06, "loss": 0.8471, "step": 7077 }, { "epoch": 1.7930335655478151, "grad_norm": 4.02305793762207, "learning_rate": 7.613992864739816e-06, "loss": 0.7768, "step": 7078 }, { "epoch": 1.7932868904369854, "grad_norm": 3.849973440170288, "learning_rate": 7.613278599783929e-06, "loss": 0.7575, "step": 7079 }, { "epoch": 1.7935402153261557, "grad_norm": 3.536513090133667, "learning_rate": 7.612564261448405e-06, "loss": 0.7613, "step": 7080 }, { "epoch": 1.7937935402153262, "grad_norm": 3.466219902038574, "learning_rate": 7.611849849753301e-06, "loss": 0.8575, "step": 7081 }, { "epoch": 1.7940468651044965, "grad_norm": 3.7162365913391113, "learning_rate": 7.611135364718677e-06, "loss": 0.7508, "step": 7082 }, { "epoch": 1.7943001899936668, "grad_norm": 3.8962814807891846, "learning_rate": 7.6104208063645955e-06, "loss": 0.823, "step": 7083 }, { "epoch": 1.7945535148828373, "grad_norm": 3.6473841667175293, "learning_rate": 7.609706174711122e-06, "loss": 0.831, "step": 7084 }, { "epoch": 1.7948068397720076, "grad_norm": 3.5618393421173096, "learning_rate": 7.608991469778321e-06, "loss": 0.7275, "step": 7085 }, { "epoch": 1.795060164661178, "grad_norm": 3.5808346271514893, "learning_rate": 7.608276691586263e-06, "loss": 0.7537, "step": 7086 }, { "epoch": 1.7953134895503484, "grad_norm": 3.967834949493408, "learning_rate": 7.607561840155019e-06, "loss": 0.8148, "step": 7087 }, { "epoch": 1.7955668144395187, "grad_norm": 3.600773811340332, "learning_rate": 7.6068469155046595e-06, "loss": 0.9399, "step": 7088 }, { "epoch": 1.795820139328689, "grad_norm": 3.679380178451538, "learning_rate": 7.606131917655259e-06, "loss": 0.7059, "step": 7089 }, { "epoch": 1.7960734642178595, "grad_norm": 3.6357953548431396, "learning_rate": 7.605416846626899e-06, "loss": 0.662, "step": 7090 }, { "epoch": 1.7963267891070298, "grad_norm": 3.8809664249420166, "learning_rate": 7.604701702439652e-06, "loss": 0.8056, "step": 7091 }, { "epoch": 1.7965801139962, "grad_norm": 3.6901090145111084, "learning_rate": 7.603986485113604e-06, "loss": 0.7544, "step": 7092 }, { "epoch": 1.7968334388853706, "grad_norm": 3.48818302154541, "learning_rate": 7.603271194668835e-06, "loss": 0.7084, "step": 7093 }, { "epoch": 1.7970867637745407, "grad_norm": 3.4194297790527344, "learning_rate": 7.60255583112543e-06, "loss": 0.7291, "step": 7094 }, { "epoch": 1.7973400886637112, "grad_norm": 3.7268667221069336, "learning_rate": 7.601840394503478e-06, "loss": 0.7406, "step": 7095 }, { "epoch": 1.7975934135528817, "grad_norm": 3.3654115200042725, "learning_rate": 7.601124884823067e-06, "loss": 0.7558, "step": 7096 }, { "epoch": 1.7978467384420518, "grad_norm": 3.6973989009857178, "learning_rate": 7.600409302104289e-06, "loss": 0.7835, "step": 7097 }, { "epoch": 1.7981000633312223, "grad_norm": 3.9799880981445312, "learning_rate": 7.5996936463672365e-06, "loss": 0.9303, "step": 7098 }, { "epoch": 1.7983533882203928, "grad_norm": 3.7412776947021484, "learning_rate": 7.598977917632004e-06, "loss": 0.7856, "step": 7099 }, { "epoch": 1.7986067131095629, "grad_norm": 3.589317798614502, "learning_rate": 7.59826211591869e-06, "loss": 0.8086, "step": 7100 }, { "epoch": 1.7988600379987334, "grad_norm": 3.5574934482574463, "learning_rate": 7.597546241247393e-06, "loss": 0.8053, "step": 7101 }, { "epoch": 1.7991133628879037, "grad_norm": 3.4305031299591064, "learning_rate": 7.596830293638217e-06, "loss": 0.6677, "step": 7102 }, { "epoch": 1.799366687777074, "grad_norm": 3.3890435695648193, "learning_rate": 7.596114273111262e-06, "loss": 0.7626, "step": 7103 }, { "epoch": 1.7996200126662445, "grad_norm": 3.6799426078796387, "learning_rate": 7.595398179686635e-06, "loss": 0.6785, "step": 7104 }, { "epoch": 1.7998733375554148, "grad_norm": 3.8996875286102295, "learning_rate": 7.594682013384442e-06, "loss": 0.8432, "step": 7105 }, { "epoch": 1.800126662444585, "grad_norm": 4.103445529937744, "learning_rate": 7.593965774224796e-06, "loss": 0.9385, "step": 7106 }, { "epoch": 1.8003799873337556, "grad_norm": 4.211414337158203, "learning_rate": 7.593249462227807e-06, "loss": 0.7408, "step": 7107 }, { "epoch": 1.8006333122229259, "grad_norm": 3.424152135848999, "learning_rate": 7.592533077413586e-06, "loss": 0.7503, "step": 7108 }, { "epoch": 1.8008866371120962, "grad_norm": 3.4662649631500244, "learning_rate": 7.591816619802255e-06, "loss": 0.7922, "step": 7109 }, { "epoch": 1.8011399620012667, "grad_norm": 3.6717116832733154, "learning_rate": 7.591100089413925e-06, "loss": 0.7804, "step": 7110 }, { "epoch": 1.801393286890437, "grad_norm": 3.3055622577667236, "learning_rate": 7.59038348626872e-06, "loss": 0.7876, "step": 7111 }, { "epoch": 1.8016466117796073, "grad_norm": 3.6405632495880127, "learning_rate": 7.589666810386762e-06, "loss": 0.8317, "step": 7112 }, { "epoch": 1.8018999366687778, "grad_norm": 3.5185089111328125, "learning_rate": 7.5889500617881715e-06, "loss": 0.88, "step": 7113 }, { "epoch": 1.802153261557948, "grad_norm": 3.3841967582702637, "learning_rate": 7.588233240493078e-06, "loss": 0.7814, "step": 7114 }, { "epoch": 1.8024065864471184, "grad_norm": 3.6781198978424072, "learning_rate": 7.587516346521608e-06, "loss": 0.7237, "step": 7115 }, { "epoch": 1.802659911336289, "grad_norm": 3.568204641342163, "learning_rate": 7.586799379893892e-06, "loss": 0.8026, "step": 7116 }, { "epoch": 1.8029132362254592, "grad_norm": 3.2144687175750732, "learning_rate": 7.586082340630061e-06, "loss": 0.6116, "step": 7117 }, { "epoch": 1.8031665611146295, "grad_norm": 3.9251370429992676, "learning_rate": 7.58536522875025e-06, "loss": 0.941, "step": 7118 }, { "epoch": 1.8034198860038, "grad_norm": 3.2826364040374756, "learning_rate": 7.584648044274594e-06, "loss": 0.7135, "step": 7119 }, { "epoch": 1.8036732108929703, "grad_norm": 3.7196340560913086, "learning_rate": 7.583930787223233e-06, "loss": 0.8529, "step": 7120 }, { "epoch": 1.8039265357821406, "grad_norm": 3.5239436626434326, "learning_rate": 7.5832134576163085e-06, "loss": 0.6152, "step": 7121 }, { "epoch": 1.804179860671311, "grad_norm": 4.050826072692871, "learning_rate": 7.58249605547396e-06, "loss": 0.7714, "step": 7122 }, { "epoch": 1.8044331855604812, "grad_norm": 3.6415138244628906, "learning_rate": 7.581778580816331e-06, "loss": 0.7681, "step": 7123 }, { "epoch": 1.8046865104496517, "grad_norm": 3.7598166465759277, "learning_rate": 7.58106103366357e-06, "loss": 0.7684, "step": 7124 }, { "epoch": 1.8049398353388222, "grad_norm": 3.0675861835479736, "learning_rate": 7.580343414035826e-06, "loss": 0.6349, "step": 7125 }, { "epoch": 1.8051931602279923, "grad_norm": 3.739070415496826, "learning_rate": 7.579625721953247e-06, "loss": 0.7287, "step": 7126 }, { "epoch": 1.8054464851171628, "grad_norm": 3.461012125015259, "learning_rate": 7.578907957435988e-06, "loss": 0.666, "step": 7127 }, { "epoch": 1.8056998100063333, "grad_norm": 3.56827712059021, "learning_rate": 7.578190120504202e-06, "loss": 0.7404, "step": 7128 }, { "epoch": 1.8059531348955034, "grad_norm": 4.071817874908447, "learning_rate": 7.5774722111780454e-06, "loss": 0.9501, "step": 7129 }, { "epoch": 1.8062064597846739, "grad_norm": 3.8854007720947266, "learning_rate": 7.5767542294776765e-06, "loss": 0.8478, "step": 7130 }, { "epoch": 1.8064597846738442, "grad_norm": 3.4795539379119873, "learning_rate": 7.576036175423257e-06, "loss": 0.6753, "step": 7131 }, { "epoch": 1.8067131095630145, "grad_norm": 4.221245765686035, "learning_rate": 7.57531804903495e-06, "loss": 0.7443, "step": 7132 }, { "epoch": 1.806966434452185, "grad_norm": 4.0690813064575195, "learning_rate": 7.574599850332917e-06, "loss": 0.7844, "step": 7133 }, { "epoch": 1.8072197593413553, "grad_norm": 4.071067810058594, "learning_rate": 7.573881579337327e-06, "loss": 0.8057, "step": 7134 }, { "epoch": 1.8074730842305256, "grad_norm": 3.6989645957946777, "learning_rate": 7.57316323606835e-06, "loss": 0.7898, "step": 7135 }, { "epoch": 1.807726409119696, "grad_norm": 3.796231985092163, "learning_rate": 7.572444820546157e-06, "loss": 0.6651, "step": 7136 }, { "epoch": 1.8079797340088664, "grad_norm": 3.7054240703582764, "learning_rate": 7.571726332790916e-06, "loss": 0.6976, "step": 7137 }, { "epoch": 1.8082330588980366, "grad_norm": 3.9299869537353516, "learning_rate": 7.571007772822807e-06, "loss": 0.8387, "step": 7138 }, { "epoch": 1.8084863837872072, "grad_norm": 4.315001010894775, "learning_rate": 7.570289140662004e-06, "loss": 0.8349, "step": 7139 }, { "epoch": 1.8087397086763775, "grad_norm": 3.631094217300415, "learning_rate": 7.569570436328686e-06, "loss": 0.7793, "step": 7140 }, { "epoch": 1.8089930335655477, "grad_norm": 4.102321147918701, "learning_rate": 7.568851659843035e-06, "loss": 0.8238, "step": 7141 }, { "epoch": 1.8092463584547183, "grad_norm": 3.266963243484497, "learning_rate": 7.568132811225233e-06, "loss": 0.6934, "step": 7142 }, { "epoch": 1.8094996833438886, "grad_norm": 3.3723645210266113, "learning_rate": 7.567413890495465e-06, "loss": 0.7455, "step": 7143 }, { "epoch": 1.8097530082330588, "grad_norm": 3.8398401737213135, "learning_rate": 7.5666948976739175e-06, "loss": 0.9356, "step": 7144 }, { "epoch": 1.8100063331222294, "grad_norm": 4.05166482925415, "learning_rate": 7.565975832780781e-06, "loss": 0.8372, "step": 7145 }, { "epoch": 1.8102596580113997, "grad_norm": 3.7039108276367188, "learning_rate": 7.565256695836247e-06, "loss": 0.8427, "step": 7146 }, { "epoch": 1.81051298290057, "grad_norm": 3.8387906551361084, "learning_rate": 7.564537486860506e-06, "loss": 0.8148, "step": 7147 }, { "epoch": 1.8107663077897405, "grad_norm": 3.8914530277252197, "learning_rate": 7.5638182058737545e-06, "loss": 0.8513, "step": 7148 }, { "epoch": 1.8110196326789108, "grad_norm": 3.7207016944885254, "learning_rate": 7.563098852896189e-06, "loss": 0.8369, "step": 7149 }, { "epoch": 1.811272957568081, "grad_norm": 3.7874226570129395, "learning_rate": 7.56237942794801e-06, "loss": 0.7269, "step": 7150 }, { "epoch": 1.8115262824572516, "grad_norm": 3.577648878097534, "learning_rate": 7.561659931049418e-06, "loss": 0.8044, "step": 7151 }, { "epoch": 1.8117796073464216, "grad_norm": 3.5661303997039795, "learning_rate": 7.560940362220614e-06, "loss": 0.7571, "step": 7152 }, { "epoch": 1.8120329322355921, "grad_norm": 3.839750051498413, "learning_rate": 7.560220721481806e-06, "loss": 0.8257, "step": 7153 }, { "epoch": 1.8122862571247627, "grad_norm": 3.900562047958374, "learning_rate": 7.559501008853201e-06, "loss": 0.8372, "step": 7154 }, { "epoch": 1.8125395820139327, "grad_norm": 3.8204472064971924, "learning_rate": 7.558781224355005e-06, "loss": 0.8736, "step": 7155 }, { "epoch": 1.8127929069031032, "grad_norm": 3.8297312259674072, "learning_rate": 7.558061368007432e-06, "loss": 0.7803, "step": 7156 }, { "epoch": 1.8130462317922738, "grad_norm": 3.4910902976989746, "learning_rate": 7.557341439830694e-06, "loss": 0.8462, "step": 7157 }, { "epoch": 1.8132995566814438, "grad_norm": 3.6622111797332764, "learning_rate": 7.556621439845009e-06, "loss": 0.7792, "step": 7158 }, { "epoch": 1.8135528815706143, "grad_norm": 4.028879642486572, "learning_rate": 7.555901368070591e-06, "loss": 1.0484, "step": 7159 }, { "epoch": 1.8138062064597846, "grad_norm": 3.211138963699341, "learning_rate": 7.5551812245276615e-06, "loss": 0.739, "step": 7160 }, { "epoch": 1.814059531348955, "grad_norm": 3.726255178451538, "learning_rate": 7.5544610092364405e-06, "loss": 0.7369, "step": 7161 }, { "epoch": 1.8143128562381254, "grad_norm": 3.3407065868377686, "learning_rate": 7.553740722217151e-06, "loss": 0.7547, "step": 7162 }, { "epoch": 1.8145661811272957, "grad_norm": 3.7033278942108154, "learning_rate": 7.553020363490018e-06, "loss": 0.8514, "step": 7163 }, { "epoch": 1.814819506016466, "grad_norm": 3.4461936950683594, "learning_rate": 7.552299933075271e-06, "loss": 0.7574, "step": 7164 }, { "epoch": 1.8150728309056365, "grad_norm": 3.6751558780670166, "learning_rate": 7.551579430993138e-06, "loss": 0.8685, "step": 7165 }, { "epoch": 1.8153261557948068, "grad_norm": 3.683856725692749, "learning_rate": 7.550858857263851e-06, "loss": 0.8602, "step": 7166 }, { "epoch": 1.8155794806839771, "grad_norm": 3.5902152061462402, "learning_rate": 7.550138211907643e-06, "loss": 0.7665, "step": 7167 }, { "epoch": 1.8158328055731476, "grad_norm": 3.73374080657959, "learning_rate": 7.549417494944748e-06, "loss": 0.7848, "step": 7168 }, { "epoch": 1.816086130462318, "grad_norm": 4.09006404876709, "learning_rate": 7.548696706395403e-06, "loss": 0.9717, "step": 7169 }, { "epoch": 1.8163394553514882, "grad_norm": 4.417375087738037, "learning_rate": 7.5479758462798515e-06, "loss": 0.8825, "step": 7170 }, { "epoch": 1.8165927802406587, "grad_norm": 3.414821147918701, "learning_rate": 7.547254914618332e-06, "loss": 0.6906, "step": 7171 }, { "epoch": 1.816846105129829, "grad_norm": 3.8782753944396973, "learning_rate": 7.5465339114310885e-06, "loss": 0.91, "step": 7172 }, { "epoch": 1.8170994300189993, "grad_norm": 3.4348368644714355, "learning_rate": 7.545812836738366e-06, "loss": 0.7046, "step": 7173 }, { "epoch": 1.8173527549081698, "grad_norm": 3.380103826522827, "learning_rate": 7.545091690560411e-06, "loss": 0.751, "step": 7174 }, { "epoch": 1.8176060797973401, "grad_norm": 3.654703140258789, "learning_rate": 7.544370472917477e-06, "loss": 0.8529, "step": 7175 }, { "epoch": 1.8178594046865104, "grad_norm": 3.8183515071868896, "learning_rate": 7.5436491838298105e-06, "loss": 0.8491, "step": 7176 }, { "epoch": 1.818112729575681, "grad_norm": 3.9947562217712402, "learning_rate": 7.542927823317667e-06, "loss": 0.7692, "step": 7177 }, { "epoch": 1.8183660544648512, "grad_norm": 3.634908676147461, "learning_rate": 7.5422063914013034e-06, "loss": 0.7804, "step": 7178 }, { "epoch": 1.8186193793540215, "grad_norm": 4.077807426452637, "learning_rate": 7.541484888100974e-06, "loss": 0.8387, "step": 7179 }, { "epoch": 1.818872704243192, "grad_norm": 3.756589889526367, "learning_rate": 7.54076331343694e-06, "loss": 0.7606, "step": 7180 }, { "epoch": 1.819126029132362, "grad_norm": 3.5974605083465576, "learning_rate": 7.5400416674294655e-06, "loss": 0.8288, "step": 7181 }, { "epoch": 1.8193793540215326, "grad_norm": 3.844635486602783, "learning_rate": 7.539319950098809e-06, "loss": 0.7886, "step": 7182 }, { "epoch": 1.8196326789107031, "grad_norm": 3.892279863357544, "learning_rate": 7.538598161465239e-06, "loss": 0.756, "step": 7183 }, { "epoch": 1.8198860037998732, "grad_norm": 3.7107455730438232, "learning_rate": 7.537876301549023e-06, "loss": 0.6994, "step": 7184 }, { "epoch": 1.8201393286890437, "grad_norm": 3.5131747722625732, "learning_rate": 7.537154370370429e-06, "loss": 0.7482, "step": 7185 }, { "epoch": 1.8203926535782142, "grad_norm": 3.977311611175537, "learning_rate": 7.536432367949731e-06, "loss": 0.8869, "step": 7186 }, { "epoch": 1.8206459784673843, "grad_norm": 3.570775270462036, "learning_rate": 7.535710294307199e-06, "loss": 0.7425, "step": 7187 }, { "epoch": 1.8208993033565548, "grad_norm": 3.5198025703430176, "learning_rate": 7.534988149463112e-06, "loss": 0.8329, "step": 7188 }, { "epoch": 1.821152628245725, "grad_norm": 3.6174983978271484, "learning_rate": 7.534265933437746e-06, "loss": 0.7919, "step": 7189 }, { "epoch": 1.8214059531348954, "grad_norm": 4.100408554077148, "learning_rate": 7.53354364625138e-06, "loss": 0.8786, "step": 7190 }, { "epoch": 1.821659278024066, "grad_norm": 3.5623393058776855, "learning_rate": 7.532821287924294e-06, "loss": 0.8098, "step": 7191 }, { "epoch": 1.8219126029132362, "grad_norm": 3.5216543674468994, "learning_rate": 7.5320988584767755e-06, "loss": 0.6888, "step": 7192 }, { "epoch": 1.8221659278024065, "grad_norm": 4.106544494628906, "learning_rate": 7.5313763579291055e-06, "loss": 0.9342, "step": 7193 }, { "epoch": 1.822419252691577, "grad_norm": 3.917630672454834, "learning_rate": 7.530653786301575e-06, "loss": 1.0038, "step": 7194 }, { "epoch": 1.8226725775807473, "grad_norm": 3.456974744796753, "learning_rate": 7.529931143614473e-06, "loss": 0.7547, "step": 7195 }, { "epoch": 1.8229259024699176, "grad_norm": 3.7777621746063232, "learning_rate": 7.529208429888089e-06, "loss": 0.7315, "step": 7196 }, { "epoch": 1.823179227359088, "grad_norm": 3.515810251235962, "learning_rate": 7.528485645142718e-06, "loss": 0.7033, "step": 7197 }, { "epoch": 1.8234325522482584, "grad_norm": 4.235123634338379, "learning_rate": 7.527762789398656e-06, "loss": 0.8681, "step": 7198 }, { "epoch": 1.8236858771374287, "grad_norm": 3.604100227355957, "learning_rate": 7.527039862676198e-06, "loss": 0.7855, "step": 7199 }, { "epoch": 1.8239392020265992, "grad_norm": 3.5925567150115967, "learning_rate": 7.526316864995648e-06, "loss": 0.6977, "step": 7200 }, { "epoch": 1.8241925269157695, "grad_norm": 3.774470567703247, "learning_rate": 7.525593796377302e-06, "loss": 0.7816, "step": 7201 }, { "epoch": 1.8244458518049398, "grad_norm": 3.5756449699401855, "learning_rate": 7.524870656841466e-06, "loss": 0.7182, "step": 7202 }, { "epoch": 1.8246991766941103, "grad_norm": 3.6559081077575684, "learning_rate": 7.524147446408445e-06, "loss": 0.7439, "step": 7203 }, { "epoch": 1.8249525015832806, "grad_norm": 3.5166289806365967, "learning_rate": 7.523424165098547e-06, "loss": 0.7562, "step": 7204 }, { "epoch": 1.8252058264724509, "grad_norm": 3.5432941913604736, "learning_rate": 7.522700812932082e-06, "loss": 0.7382, "step": 7205 }, { "epoch": 1.8254591513616214, "grad_norm": 4.274906158447266, "learning_rate": 7.5219773899293605e-06, "loss": 0.9015, "step": 7206 }, { "epoch": 1.8257124762507917, "grad_norm": 4.043605804443359, "learning_rate": 7.521253896110695e-06, "loss": 0.9145, "step": 7207 }, { "epoch": 1.825965801139962, "grad_norm": 3.766462564468384, "learning_rate": 7.520530331496403e-06, "loss": 0.6623, "step": 7208 }, { "epoch": 1.8262191260291325, "grad_norm": 4.061577796936035, "learning_rate": 7.519806696106799e-06, "loss": 0.9031, "step": 7209 }, { "epoch": 1.8264724509183026, "grad_norm": 3.7861196994781494, "learning_rate": 7.5190829899622055e-06, "loss": 0.8022, "step": 7210 }, { "epoch": 1.826725775807473, "grad_norm": 4.028881549835205, "learning_rate": 7.5183592130829415e-06, "loss": 0.8434, "step": 7211 }, { "epoch": 1.8269791006966436, "grad_norm": 3.5819966793060303, "learning_rate": 7.517635365489331e-06, "loss": 0.7136, "step": 7212 }, { "epoch": 1.8272324255858137, "grad_norm": 3.858046293258667, "learning_rate": 7.516911447201699e-06, "loss": 0.7895, "step": 7213 }, { "epoch": 1.8274857504749842, "grad_norm": 3.5602328777313232, "learning_rate": 7.516187458240374e-06, "loss": 0.7321, "step": 7214 }, { "epoch": 1.8277390753641545, "grad_norm": 4.064763069152832, "learning_rate": 7.5154633986256845e-06, "loss": 0.8498, "step": 7215 }, { "epoch": 1.8279924002533248, "grad_norm": 3.7554080486297607, "learning_rate": 7.514739268377962e-06, "loss": 0.811, "step": 7216 }, { "epoch": 1.8282457251424953, "grad_norm": 3.447010040283203, "learning_rate": 7.514015067517537e-06, "loss": 0.6723, "step": 7217 }, { "epoch": 1.8284990500316656, "grad_norm": 3.7167718410491943, "learning_rate": 7.5132907960647495e-06, "loss": 0.7881, "step": 7218 }, { "epoch": 1.8287523749208359, "grad_norm": 4.058993816375732, "learning_rate": 7.512566454039934e-06, "loss": 0.6933, "step": 7219 }, { "epoch": 1.8290056998100064, "grad_norm": 4.2340617179870605, "learning_rate": 7.51184204146343e-06, "loss": 0.8528, "step": 7220 }, { "epoch": 1.8292590246991767, "grad_norm": 3.6348915100097656, "learning_rate": 7.511117558355579e-06, "loss": 0.7332, "step": 7221 }, { "epoch": 1.829512349588347, "grad_norm": 3.662551164627075, "learning_rate": 7.510393004736723e-06, "loss": 0.7758, "step": 7222 }, { "epoch": 1.8297656744775175, "grad_norm": 4.1006317138671875, "learning_rate": 7.509668380627207e-06, "loss": 0.9005, "step": 7223 }, { "epoch": 1.8300189993666878, "grad_norm": 4.0843186378479, "learning_rate": 7.508943686047381e-06, "loss": 0.8336, "step": 7224 }, { "epoch": 1.830272324255858, "grad_norm": 3.8807265758514404, "learning_rate": 7.508218921017593e-06, "loss": 0.8249, "step": 7225 }, { "epoch": 1.8305256491450286, "grad_norm": 3.4240353107452393, "learning_rate": 7.5074940855581915e-06, "loss": 0.7612, "step": 7226 }, { "epoch": 1.8307789740341989, "grad_norm": 3.7285470962524414, "learning_rate": 7.506769179689531e-06, "loss": 0.8268, "step": 7227 }, { "epoch": 1.8310322989233692, "grad_norm": 3.384099006652832, "learning_rate": 7.506044203431968e-06, "loss": 0.843, "step": 7228 }, { "epoch": 1.8312856238125397, "grad_norm": 3.5275726318359375, "learning_rate": 7.505319156805857e-06, "loss": 0.7429, "step": 7229 }, { "epoch": 1.83153894870171, "grad_norm": 3.8868675231933594, "learning_rate": 7.504594039831559e-06, "loss": 0.8878, "step": 7230 }, { "epoch": 1.8317922735908803, "grad_norm": 3.7927005290985107, "learning_rate": 7.503868852529435e-06, "loss": 0.6936, "step": 7231 }, { "epoch": 1.8320455984800508, "grad_norm": 3.8648810386657715, "learning_rate": 7.503143594919846e-06, "loss": 0.7206, "step": 7232 }, { "epoch": 1.832298923369221, "grad_norm": 4.037606239318848, "learning_rate": 7.5024182670231586e-06, "loss": 0.8805, "step": 7233 }, { "epoch": 1.8325522482583914, "grad_norm": 3.4119715690612793, "learning_rate": 7.50169286885974e-06, "loss": 0.8197, "step": 7234 }, { "epoch": 1.8328055731475619, "grad_norm": 3.813147783279419, "learning_rate": 7.500967400449957e-06, "loss": 0.7418, "step": 7235 }, { "epoch": 1.833058898036732, "grad_norm": 3.903792142868042, "learning_rate": 7.5002418618141815e-06, "loss": 0.8382, "step": 7236 }, { "epoch": 1.8333122229259025, "grad_norm": 4.018380641937256, "learning_rate": 7.499516252972786e-06, "loss": 0.7733, "step": 7237 }, { "epoch": 1.833565547815073, "grad_norm": 3.7444024085998535, "learning_rate": 7.498790573946146e-06, "loss": 0.839, "step": 7238 }, { "epoch": 1.833818872704243, "grad_norm": 3.6033787727355957, "learning_rate": 7.498064824754638e-06, "loss": 0.7652, "step": 7239 }, { "epoch": 1.8340721975934136, "grad_norm": 3.655672073364258, "learning_rate": 7.49733900541864e-06, "loss": 0.7462, "step": 7240 }, { "epoch": 1.834325522482584, "grad_norm": 3.443094491958618, "learning_rate": 7.4966131159585344e-06, "loss": 0.7305, "step": 7241 }, { "epoch": 1.8345788473717541, "grad_norm": 3.951950788497925, "learning_rate": 7.495887156394701e-06, "loss": 0.8662, "step": 7242 }, { "epoch": 1.8348321722609247, "grad_norm": 3.543978214263916, "learning_rate": 7.4951611267475265e-06, "loss": 0.6832, "step": 7243 }, { "epoch": 1.835085497150095, "grad_norm": 3.825437307357788, "learning_rate": 7.494435027037396e-06, "loss": 0.8139, "step": 7244 }, { "epoch": 1.8353388220392652, "grad_norm": 3.6683316230773926, "learning_rate": 7.4937088572847015e-06, "loss": 0.7841, "step": 7245 }, { "epoch": 1.8355921469284358, "grad_norm": 3.8685595989227295, "learning_rate": 7.49298261750983e-06, "loss": 0.9504, "step": 7246 }, { "epoch": 1.835845471817606, "grad_norm": 3.4584453105926514, "learning_rate": 7.492256307733174e-06, "loss": 0.6819, "step": 7247 }, { "epoch": 1.8360987967067763, "grad_norm": 4.211421489715576, "learning_rate": 7.49152992797513e-06, "loss": 0.7561, "step": 7248 }, { "epoch": 1.8363521215959469, "grad_norm": 3.5700526237487793, "learning_rate": 7.490803478256095e-06, "loss": 0.839, "step": 7249 }, { "epoch": 1.8366054464851171, "grad_norm": 3.460155963897705, "learning_rate": 7.490076958596463e-06, "loss": 0.7964, "step": 7250 }, { "epoch": 1.8368587713742874, "grad_norm": 3.575927972793579, "learning_rate": 7.489350369016637e-06, "loss": 0.759, "step": 7251 }, { "epoch": 1.837112096263458, "grad_norm": 3.743602752685547, "learning_rate": 7.488623709537021e-06, "loss": 0.8683, "step": 7252 }, { "epoch": 1.8373654211526282, "grad_norm": 3.7064273357391357, "learning_rate": 7.487896980178015e-06, "loss": 0.774, "step": 7253 }, { "epoch": 1.8376187460417985, "grad_norm": 3.5614736080169678, "learning_rate": 7.4871701809600304e-06, "loss": 0.7556, "step": 7254 }, { "epoch": 1.837872070930969, "grad_norm": 3.733701705932617, "learning_rate": 7.486443311903472e-06, "loss": 0.7736, "step": 7255 }, { "epoch": 1.8381253958201393, "grad_norm": 3.7013964653015137, "learning_rate": 7.485716373028751e-06, "loss": 0.7774, "step": 7256 }, { "epoch": 1.8383787207093096, "grad_norm": 3.250471353530884, "learning_rate": 7.4849893643562784e-06, "loss": 0.748, "step": 7257 }, { "epoch": 1.8386320455984801, "grad_norm": 3.583418846130371, "learning_rate": 7.484262285906469e-06, "loss": 0.8087, "step": 7258 }, { "epoch": 1.8388853704876504, "grad_norm": 3.2715556621551514, "learning_rate": 7.483535137699741e-06, "loss": 0.7186, "step": 7259 }, { "epoch": 1.8391386953768207, "grad_norm": 4.001896381378174, "learning_rate": 7.48280791975651e-06, "loss": 0.9446, "step": 7260 }, { "epoch": 1.8393920202659912, "grad_norm": 3.499667167663574, "learning_rate": 7.482080632097194e-06, "loss": 0.7591, "step": 7261 }, { "epoch": 1.8396453451551615, "grad_norm": 3.9465816020965576, "learning_rate": 7.481353274742218e-06, "loss": 0.8264, "step": 7262 }, { "epoch": 1.8398986700443318, "grad_norm": 3.7553086280822754, "learning_rate": 7.480625847712005e-06, "loss": 0.8215, "step": 7263 }, { "epoch": 1.8401519949335023, "grad_norm": 3.6734864711761475, "learning_rate": 7.479898351026982e-06, "loss": 0.7447, "step": 7264 }, { "epoch": 1.8404053198226724, "grad_norm": 3.592437982559204, "learning_rate": 7.479170784707574e-06, "loss": 0.8483, "step": 7265 }, { "epoch": 1.840658644711843, "grad_norm": 3.403601884841919, "learning_rate": 7.478443148774214e-06, "loss": 0.7511, "step": 7266 }, { "epoch": 1.8409119696010134, "grad_norm": 4.034810543060303, "learning_rate": 7.477715443247331e-06, "loss": 0.8785, "step": 7267 }, { "epoch": 1.8411652944901835, "grad_norm": 3.768348455429077, "learning_rate": 7.4769876681473595e-06, "loss": 0.8305, "step": 7268 }, { "epoch": 1.841418619379354, "grad_norm": 3.432314872741699, "learning_rate": 7.4762598234947345e-06, "loss": 0.8843, "step": 7269 }, { "epoch": 1.8416719442685245, "grad_norm": 3.3588666915893555, "learning_rate": 7.475531909309896e-06, "loss": 0.8027, "step": 7270 }, { "epoch": 1.8419252691576946, "grad_norm": 3.7070810794830322, "learning_rate": 7.4748039256132795e-06, "loss": 0.759, "step": 7271 }, { "epoch": 1.8421785940468651, "grad_norm": 3.649130344390869, "learning_rate": 7.474075872425331e-06, "loss": 0.8145, "step": 7272 }, { "epoch": 1.8424319189360354, "grad_norm": 3.6345674991607666, "learning_rate": 7.47334774976649e-06, "loss": 0.6566, "step": 7273 }, { "epoch": 1.8426852438252057, "grad_norm": 3.7379961013793945, "learning_rate": 7.4726195576572035e-06, "loss": 0.6899, "step": 7274 }, { "epoch": 1.8429385687143762, "grad_norm": 3.89823579788208, "learning_rate": 7.471891296117919e-06, "loss": 0.8179, "step": 7275 }, { "epoch": 1.8431918936035465, "grad_norm": 3.2547085285186768, "learning_rate": 7.4711629651690855e-06, "loss": 0.6627, "step": 7276 }, { "epoch": 1.8434452184927168, "grad_norm": 3.5351154804229736, "learning_rate": 7.470434564831154e-06, "loss": 0.7797, "step": 7277 }, { "epoch": 1.8436985433818873, "grad_norm": 3.759066343307495, "learning_rate": 7.469706095124578e-06, "loss": 0.7311, "step": 7278 }, { "epoch": 1.8439518682710576, "grad_norm": 3.729231357574463, "learning_rate": 7.468977556069812e-06, "loss": 0.7887, "step": 7279 }, { "epoch": 1.844205193160228, "grad_norm": 3.930779218673706, "learning_rate": 7.4682489476873136e-06, "loss": 0.8708, "step": 7280 }, { "epoch": 1.8444585180493984, "grad_norm": 3.8201029300689697, "learning_rate": 7.467520269997541e-06, "loss": 0.8285, "step": 7281 }, { "epoch": 1.8447118429385687, "grad_norm": 4.230695724487305, "learning_rate": 7.4667915230209565e-06, "loss": 0.8228, "step": 7282 }, { "epoch": 1.844965167827739, "grad_norm": 3.5004734992980957, "learning_rate": 7.466062706778021e-06, "loss": 0.7594, "step": 7283 }, { "epoch": 1.8452184927169095, "grad_norm": 4.275425434112549, "learning_rate": 7.4653338212892026e-06, "loss": 0.9477, "step": 7284 }, { "epoch": 1.8454718176060798, "grad_norm": 3.7084357738494873, "learning_rate": 7.464604866574965e-06, "loss": 0.7995, "step": 7285 }, { "epoch": 1.84572514249525, "grad_norm": 4.5522871017456055, "learning_rate": 7.463875842655776e-06, "loss": 0.8223, "step": 7286 }, { "epoch": 1.8459784673844206, "grad_norm": 4.052094459533691, "learning_rate": 7.46314674955211e-06, "loss": 0.7677, "step": 7287 }, { "epoch": 1.846231792273591, "grad_norm": 4.135094165802002, "learning_rate": 7.462417587284438e-06, "loss": 0.8058, "step": 7288 }, { "epoch": 1.8464851171627612, "grad_norm": 3.323655605316162, "learning_rate": 7.461688355873234e-06, "loss": 0.6835, "step": 7289 }, { "epoch": 1.8467384420519317, "grad_norm": 3.601247549057007, "learning_rate": 7.4609590553389746e-06, "loss": 0.6864, "step": 7290 }, { "epoch": 1.846991766941102, "grad_norm": 3.8734934329986572, "learning_rate": 7.460229685702137e-06, "loss": 0.8819, "step": 7291 }, { "epoch": 1.8472450918302723, "grad_norm": 3.679542303085327, "learning_rate": 7.459500246983204e-06, "loss": 0.7914, "step": 7292 }, { "epoch": 1.8474984167194428, "grad_norm": 3.858290433883667, "learning_rate": 7.458770739202656e-06, "loss": 0.854, "step": 7293 }, { "epoch": 1.8477517416086129, "grad_norm": 3.3771471977233887, "learning_rate": 7.458041162380979e-06, "loss": 0.7797, "step": 7294 }, { "epoch": 1.8480050664977834, "grad_norm": 3.539262056350708, "learning_rate": 7.4573115165386575e-06, "loss": 0.7457, "step": 7295 }, { "epoch": 1.848258391386954, "grad_norm": 3.617661952972412, "learning_rate": 7.456581801696181e-06, "loss": 0.7938, "step": 7296 }, { "epoch": 1.848511716276124, "grad_norm": 3.5524685382843018, "learning_rate": 7.455852017874038e-06, "loss": 0.7001, "step": 7297 }, { "epoch": 1.8487650411652945, "grad_norm": 3.7686891555786133, "learning_rate": 7.455122165092723e-06, "loss": 0.9146, "step": 7298 }, { "epoch": 1.849018366054465, "grad_norm": 3.892432689666748, "learning_rate": 7.454392243372727e-06, "loss": 0.7147, "step": 7299 }, { "epoch": 1.849271690943635, "grad_norm": 3.7161612510681152, "learning_rate": 7.453662252734547e-06, "loss": 0.8717, "step": 7300 }, { "epoch": 1.8495250158328056, "grad_norm": 3.782010078430176, "learning_rate": 7.4529321931986805e-06, "loss": 0.7736, "step": 7301 }, { "epoch": 1.8497783407219759, "grad_norm": 4.072753429412842, "learning_rate": 7.452202064785628e-06, "loss": 0.9576, "step": 7302 }, { "epoch": 1.8500316656111462, "grad_norm": 3.7230913639068604, "learning_rate": 7.4514718675158925e-06, "loss": 0.8127, "step": 7303 }, { "epoch": 1.8502849905003167, "grad_norm": 3.960256814956665, "learning_rate": 7.450741601409973e-06, "loss": 0.8021, "step": 7304 }, { "epoch": 1.850538315389487, "grad_norm": 3.41961932182312, "learning_rate": 7.4500112664883795e-06, "loss": 0.6983, "step": 7305 }, { "epoch": 1.8507916402786573, "grad_norm": 3.587430000305176, "learning_rate": 7.449280862771618e-06, "loss": 0.7786, "step": 7306 }, { "epoch": 1.8510449651678278, "grad_norm": 3.4382851123809814, "learning_rate": 7.448550390280197e-06, "loss": 0.7976, "step": 7307 }, { "epoch": 1.851298290056998, "grad_norm": 3.8374269008636475, "learning_rate": 7.44781984903463e-06, "loss": 0.7545, "step": 7308 }, { "epoch": 1.8515516149461684, "grad_norm": 4.093745708465576, "learning_rate": 7.447089239055428e-06, "loss": 0.753, "step": 7309 }, { "epoch": 1.851804939835339, "grad_norm": 3.698042154312134, "learning_rate": 7.446358560363107e-06, "loss": 0.7663, "step": 7310 }, { "epoch": 1.8520582647245092, "grad_norm": 3.391146183013916, "learning_rate": 7.445627812978184e-06, "loss": 0.6712, "step": 7311 }, { "epoch": 1.8523115896136795, "grad_norm": 3.688525676727295, "learning_rate": 7.444896996921179e-06, "loss": 0.7332, "step": 7312 }, { "epoch": 1.85256491450285, "grad_norm": 3.518315553665161, "learning_rate": 7.444166112212613e-06, "loss": 0.6559, "step": 7313 }, { "epoch": 1.8528182393920203, "grad_norm": 3.89802885055542, "learning_rate": 7.443435158873007e-06, "loss": 0.81, "step": 7314 }, { "epoch": 1.8530715642811906, "grad_norm": 3.615739583969116, "learning_rate": 7.442704136922887e-06, "loss": 0.7755, "step": 7315 }, { "epoch": 1.853324889170361, "grad_norm": 3.5944442749023438, "learning_rate": 7.441973046382779e-06, "loss": 0.8504, "step": 7316 }, { "epoch": 1.8535782140595314, "grad_norm": 3.965315818786621, "learning_rate": 7.441241887273213e-06, "loss": 0.7938, "step": 7317 }, { "epoch": 1.8538315389487017, "grad_norm": 3.6452298164367676, "learning_rate": 7.440510659614718e-06, "loss": 0.8022, "step": 7318 }, { "epoch": 1.8540848638378722, "grad_norm": 3.2947707176208496, "learning_rate": 7.439779363427828e-06, "loss": 0.7323, "step": 7319 }, { "epoch": 1.8543381887270425, "grad_norm": 3.4181292057037354, "learning_rate": 7.4390479987330775e-06, "loss": 0.6592, "step": 7320 }, { "epoch": 1.8545915136162128, "grad_norm": 4.534851551055908, "learning_rate": 7.438316565551001e-06, "loss": 0.7476, "step": 7321 }, { "epoch": 1.8548448385053833, "grad_norm": 3.6873083114624023, "learning_rate": 7.4375850639021395e-06, "loss": 0.7622, "step": 7322 }, { "epoch": 1.8550981633945534, "grad_norm": 3.62625789642334, "learning_rate": 7.436853493807032e-06, "loss": 0.7018, "step": 7323 }, { "epoch": 1.8553514882837239, "grad_norm": 3.600980520248413, "learning_rate": 7.43612185528622e-06, "loss": 0.7497, "step": 7324 }, { "epoch": 1.8556048131728944, "grad_norm": 3.5620505809783936, "learning_rate": 7.4353901483602485e-06, "loss": 0.7743, "step": 7325 }, { "epoch": 1.8558581380620645, "grad_norm": 3.5479280948638916, "learning_rate": 7.434658373049662e-06, "loss": 0.7191, "step": 7326 }, { "epoch": 1.856111462951235, "grad_norm": 3.9729976654052734, "learning_rate": 7.43392652937501e-06, "loss": 0.7144, "step": 7327 }, { "epoch": 1.8563647878404055, "grad_norm": 3.9694910049438477, "learning_rate": 7.433194617356844e-06, "loss": 0.8327, "step": 7328 }, { "epoch": 1.8566181127295756, "grad_norm": 3.4946181774139404, "learning_rate": 7.432462637015709e-06, "loss": 0.7549, "step": 7329 }, { "epoch": 1.856871437618746, "grad_norm": 4.2069501876831055, "learning_rate": 7.431730588372166e-06, "loss": 0.912, "step": 7330 }, { "epoch": 1.8571247625079164, "grad_norm": 3.2858669757843018, "learning_rate": 7.430998471446767e-06, "loss": 0.7898, "step": 7331 }, { "epoch": 1.8573780873970867, "grad_norm": 3.9718568325042725, "learning_rate": 7.43026628626007e-06, "loss": 0.8559, "step": 7332 }, { "epoch": 1.8576314122862572, "grad_norm": 3.5112857818603516, "learning_rate": 7.429534032832637e-06, "loss": 0.7235, "step": 7333 }, { "epoch": 1.8578847371754275, "grad_norm": 3.4231934547424316, "learning_rate": 7.428801711185025e-06, "loss": 0.7495, "step": 7334 }, { "epoch": 1.8581380620645978, "grad_norm": 3.660020351409912, "learning_rate": 7.428069321337799e-06, "loss": 0.7971, "step": 7335 }, { "epoch": 1.8583913869537683, "grad_norm": 3.602024793624878, "learning_rate": 7.4273368633115265e-06, "loss": 0.8713, "step": 7336 }, { "epoch": 1.8586447118429386, "grad_norm": 3.547377586364746, "learning_rate": 7.4266043371267705e-06, "loss": 0.7743, "step": 7337 }, { "epoch": 1.8588980367321088, "grad_norm": 3.8899481296539307, "learning_rate": 7.4258717428041025e-06, "loss": 0.7327, "step": 7338 }, { "epoch": 1.8591513616212794, "grad_norm": 4.131229877471924, "learning_rate": 7.425139080364094e-06, "loss": 0.7848, "step": 7339 }, { "epoch": 1.8594046865104497, "grad_norm": 3.900611400604248, "learning_rate": 7.424406349827315e-06, "loss": 0.8824, "step": 7340 }, { "epoch": 1.85965801139962, "grad_norm": 3.615609645843506, "learning_rate": 7.4236735512143434e-06, "loss": 0.8079, "step": 7341 }, { "epoch": 1.8599113362887905, "grad_norm": 3.6150715351104736, "learning_rate": 7.422940684545753e-06, "loss": 0.8096, "step": 7342 }, { "epoch": 1.8601646611779608, "grad_norm": 3.648995876312256, "learning_rate": 7.422207749842124e-06, "loss": 0.8403, "step": 7343 }, { "epoch": 1.860417986067131, "grad_norm": 3.13474702835083, "learning_rate": 7.421474747124038e-06, "loss": 0.6948, "step": 7344 }, { "epoch": 1.8606713109563016, "grad_norm": 4.048909664154053, "learning_rate": 7.420741676412075e-06, "loss": 0.8387, "step": 7345 }, { "epoch": 1.8609246358454719, "grad_norm": 4.388035774230957, "learning_rate": 7.4200085377268195e-06, "loss": 0.7915, "step": 7346 }, { "epoch": 1.8611779607346421, "grad_norm": 4.1641764640808105, "learning_rate": 7.4192753310888595e-06, "loss": 0.7966, "step": 7347 }, { "epoch": 1.8614312856238127, "grad_norm": 3.8473355770111084, "learning_rate": 7.4185420565187826e-06, "loss": 0.8086, "step": 7348 }, { "epoch": 1.861684610512983, "grad_norm": 3.894773244857788, "learning_rate": 7.4178087140371775e-06, "loss": 0.885, "step": 7349 }, { "epoch": 1.8619379354021532, "grad_norm": 3.767836809158325, "learning_rate": 7.417075303664637e-06, "loss": 0.7168, "step": 7350 }, { "epoch": 1.8621912602913238, "grad_norm": 4.533100128173828, "learning_rate": 7.416341825421755e-06, "loss": 0.7662, "step": 7351 }, { "epoch": 1.8624445851804938, "grad_norm": 3.6592841148376465, "learning_rate": 7.415608279329127e-06, "loss": 0.7133, "step": 7352 }, { "epoch": 1.8626979100696643, "grad_norm": 3.5006027221679688, "learning_rate": 7.41487466540735e-06, "loss": 0.7597, "step": 7353 }, { "epoch": 1.8629512349588349, "grad_norm": 3.608720064163208, "learning_rate": 7.414140983677024e-06, "loss": 0.8238, "step": 7354 }, { "epoch": 1.863204559848005, "grad_norm": 3.5064187049865723, "learning_rate": 7.4134072341587516e-06, "loss": 0.7651, "step": 7355 }, { "epoch": 1.8634578847371754, "grad_norm": 3.718505859375, "learning_rate": 7.412673416873134e-06, "loss": 0.8108, "step": 7356 }, { "epoch": 1.863711209626346, "grad_norm": 3.588686943054199, "learning_rate": 7.41193953184078e-06, "loss": 0.8105, "step": 7357 }, { "epoch": 1.863964534515516, "grad_norm": 3.4137682914733887, "learning_rate": 7.411205579082292e-06, "loss": 0.6981, "step": 7358 }, { "epoch": 1.8642178594046865, "grad_norm": 3.5827314853668213, "learning_rate": 7.4104715586182815e-06, "loss": 0.6481, "step": 7359 }, { "epoch": 1.8644711842938568, "grad_norm": 3.4952003955841064, "learning_rate": 7.409737470469361e-06, "loss": 0.962, "step": 7360 }, { "epoch": 1.8647245091830271, "grad_norm": 3.540165662765503, "learning_rate": 7.40900331465614e-06, "loss": 0.6896, "step": 7361 }, { "epoch": 1.8649778340721976, "grad_norm": 3.630929708480835, "learning_rate": 7.408269091199237e-06, "loss": 0.7493, "step": 7362 }, { "epoch": 1.865231158961368, "grad_norm": 3.743705987930298, "learning_rate": 7.407534800119265e-06, "loss": 0.8047, "step": 7363 }, { "epoch": 1.8654844838505382, "grad_norm": 3.5905940532684326, "learning_rate": 7.406800441436845e-06, "loss": 0.6626, "step": 7364 }, { "epoch": 1.8657378087397087, "grad_norm": 3.31368350982666, "learning_rate": 7.406066015172596e-06, "loss": 0.6748, "step": 7365 }, { "epoch": 1.865991133628879, "grad_norm": 4.063304424285889, "learning_rate": 7.405331521347141e-06, "loss": 0.8786, "step": 7366 }, { "epoch": 1.8662444585180493, "grad_norm": 3.6789088249206543, "learning_rate": 7.404596959981104e-06, "loss": 0.7786, "step": 7367 }, { "epoch": 1.8664977834072198, "grad_norm": 3.6941704750061035, "learning_rate": 7.403862331095112e-06, "loss": 0.7188, "step": 7368 }, { "epoch": 1.8667511082963901, "grad_norm": 3.8199117183685303, "learning_rate": 7.403127634709791e-06, "loss": 0.834, "step": 7369 }, { "epoch": 1.8670044331855604, "grad_norm": 3.6132707595825195, "learning_rate": 7.402392870845774e-06, "loss": 0.7723, "step": 7370 }, { "epoch": 1.867257758074731, "grad_norm": 3.8342978954315186, "learning_rate": 7.4016580395236906e-06, "loss": 0.8459, "step": 7371 }, { "epoch": 1.8675110829639012, "grad_norm": 3.7187366485595703, "learning_rate": 7.400923140764176e-06, "loss": 0.7621, "step": 7372 }, { "epoch": 1.8677644078530715, "grad_norm": 3.7745282649993896, "learning_rate": 7.400188174587863e-06, "loss": 0.7832, "step": 7373 }, { "epoch": 1.868017732742242, "grad_norm": 3.4873392581939697, "learning_rate": 7.399453141015392e-06, "loss": 0.7528, "step": 7374 }, { "epoch": 1.8682710576314123, "grad_norm": 3.6364986896514893, "learning_rate": 7.398718040067401e-06, "loss": 0.801, "step": 7375 }, { "epoch": 1.8685243825205826, "grad_norm": 3.6586132049560547, "learning_rate": 7.397982871764532e-06, "loss": 0.7347, "step": 7376 }, { "epoch": 1.8687777074097531, "grad_norm": 3.6796278953552246, "learning_rate": 7.397247636127428e-06, "loss": 0.7327, "step": 7377 }, { "epoch": 1.8690310322989234, "grad_norm": 3.7249855995178223, "learning_rate": 7.396512333176734e-06, "loss": 0.7736, "step": 7378 }, { "epoch": 1.8692843571880937, "grad_norm": 3.6299843788146973, "learning_rate": 7.395776962933097e-06, "loss": 0.7297, "step": 7379 }, { "epoch": 1.8695376820772642, "grad_norm": 4.824265003204346, "learning_rate": 7.395041525417164e-06, "loss": 0.8056, "step": 7380 }, { "epoch": 1.8697910069664343, "grad_norm": 3.879260301589966, "learning_rate": 7.394306020649588e-06, "loss": 0.8066, "step": 7381 }, { "epoch": 1.8700443318556048, "grad_norm": 3.766038179397583, "learning_rate": 7.393570448651023e-06, "loss": 0.7338, "step": 7382 }, { "epoch": 1.8702976567447753, "grad_norm": 3.6172983646392822, "learning_rate": 7.39283480944212e-06, "loss": 0.7236, "step": 7383 }, { "epoch": 1.8705509816339454, "grad_norm": 3.471947431564331, "learning_rate": 7.392099103043537e-06, "loss": 0.8803, "step": 7384 }, { "epoch": 1.870804306523116, "grad_norm": 3.7183072566986084, "learning_rate": 7.391363329475932e-06, "loss": 0.7875, "step": 7385 }, { "epoch": 1.8710576314122862, "grad_norm": 3.2768454551696777, "learning_rate": 7.390627488759967e-06, "loss": 0.7308, "step": 7386 }, { "epoch": 1.8713109563014565, "grad_norm": 3.899739980697632, "learning_rate": 7.389891580916302e-06, "loss": 0.8452, "step": 7387 }, { "epoch": 1.871564281190627, "grad_norm": 3.696506977081299, "learning_rate": 7.389155605965601e-06, "loss": 0.7286, "step": 7388 }, { "epoch": 1.8718176060797973, "grad_norm": 3.8265140056610107, "learning_rate": 7.38841956392853e-06, "loss": 0.7616, "step": 7389 }, { "epoch": 1.8720709309689676, "grad_norm": 3.4128339290618896, "learning_rate": 7.387683454825758e-06, "loss": 0.7428, "step": 7390 }, { "epoch": 1.872324255858138, "grad_norm": 3.8740456104278564, "learning_rate": 7.386947278677954e-06, "loss": 0.8434, "step": 7391 }, { "epoch": 1.8725775807473084, "grad_norm": 3.352545976638794, "learning_rate": 7.386211035505788e-06, "loss": 0.8019, "step": 7392 }, { "epoch": 1.8728309056364787, "grad_norm": 3.2622337341308594, "learning_rate": 7.385474725329935e-06, "loss": 0.7074, "step": 7393 }, { "epoch": 1.8730842305256492, "grad_norm": 3.2104744911193848, "learning_rate": 7.384738348171069e-06, "loss": 0.7666, "step": 7394 }, { "epoch": 1.8733375554148195, "grad_norm": 3.6817729473114014, "learning_rate": 7.384001904049869e-06, "loss": 0.8851, "step": 7395 }, { "epoch": 1.8735908803039898, "grad_norm": 3.282905340194702, "learning_rate": 7.383265392987011e-06, "loss": 0.6493, "step": 7396 }, { "epoch": 1.8738442051931603, "grad_norm": 3.8238494396209717, "learning_rate": 7.382528815003181e-06, "loss": 0.7047, "step": 7397 }, { "epoch": 1.8740975300823306, "grad_norm": 4.017204761505127, "learning_rate": 7.381792170119057e-06, "loss": 0.7825, "step": 7398 }, { "epoch": 1.874350854971501, "grad_norm": 3.700448751449585, "learning_rate": 7.381055458355324e-06, "loss": 0.7084, "step": 7399 }, { "epoch": 1.8746041798606714, "grad_norm": 3.997309684753418, "learning_rate": 7.38031867973267e-06, "loss": 0.8363, "step": 7400 }, { "epoch": 1.8748575047498417, "grad_norm": 3.56485652923584, "learning_rate": 7.379581834271785e-06, "loss": 0.7459, "step": 7401 }, { "epoch": 1.875110829639012, "grad_norm": 3.750089168548584, "learning_rate": 7.3788449219933555e-06, "loss": 0.6487, "step": 7402 }, { "epoch": 1.8753641545281825, "grad_norm": 4.218666076660156, "learning_rate": 7.378107942918076e-06, "loss": 0.8, "step": 7403 }, { "epoch": 1.8756174794173528, "grad_norm": 3.992666006088257, "learning_rate": 7.377370897066639e-06, "loss": 0.8409, "step": 7404 }, { "epoch": 1.875870804306523, "grad_norm": 4.031217098236084, "learning_rate": 7.376633784459741e-06, "loss": 0.9294, "step": 7405 }, { "epoch": 1.8761241291956936, "grad_norm": 3.875433921813965, "learning_rate": 7.375896605118083e-06, "loss": 0.8327, "step": 7406 }, { "epoch": 1.8763774540848637, "grad_norm": 3.15423583984375, "learning_rate": 7.375159359062361e-06, "loss": 0.6785, "step": 7407 }, { "epoch": 1.8766307789740342, "grad_norm": 3.5996413230895996, "learning_rate": 7.374422046313276e-06, "loss": 0.759, "step": 7408 }, { "epoch": 1.8768841038632047, "grad_norm": 3.8724873065948486, "learning_rate": 7.373684666891533e-06, "loss": 0.9113, "step": 7409 }, { "epoch": 1.8771374287523748, "grad_norm": 3.6898205280303955, "learning_rate": 7.372947220817837e-06, "loss": 0.7315, "step": 7410 }, { "epoch": 1.8773907536415453, "grad_norm": 3.7833163738250732, "learning_rate": 7.372209708112897e-06, "loss": 0.7906, "step": 7411 }, { "epoch": 1.8776440785307158, "grad_norm": 3.598475694656372, "learning_rate": 7.371472128797419e-06, "loss": 0.8126, "step": 7412 }, { "epoch": 1.8778974034198859, "grad_norm": 3.3231332302093506, "learning_rate": 7.3707344828921145e-06, "loss": 0.7783, "step": 7413 }, { "epoch": 1.8781507283090564, "grad_norm": 3.87412428855896, "learning_rate": 7.369996770417698e-06, "loss": 0.7888, "step": 7414 }, { "epoch": 1.8784040531982267, "grad_norm": 3.6098060607910156, "learning_rate": 7.369258991394882e-06, "loss": 0.8262, "step": 7415 }, { "epoch": 1.878657378087397, "grad_norm": 3.879749298095703, "learning_rate": 7.368521145844384e-06, "loss": 0.7725, "step": 7416 }, { "epoch": 1.8789107029765675, "grad_norm": 3.698374032974243, "learning_rate": 7.367783233786923e-06, "loss": 0.8039, "step": 7417 }, { "epoch": 1.8791640278657378, "grad_norm": 3.346038579940796, "learning_rate": 7.367045255243217e-06, "loss": 0.663, "step": 7418 }, { "epoch": 1.879417352754908, "grad_norm": 3.278337240219116, "learning_rate": 7.366307210233992e-06, "loss": 0.6843, "step": 7419 }, { "epoch": 1.8796706776440786, "grad_norm": 3.4066765308380127, "learning_rate": 7.365569098779968e-06, "loss": 0.7675, "step": 7420 }, { "epoch": 1.8799240025332489, "grad_norm": 4.109745502471924, "learning_rate": 7.364830920901873e-06, "loss": 0.8416, "step": 7421 }, { "epoch": 1.8801773274224192, "grad_norm": 3.6897478103637695, "learning_rate": 7.3640926766204335e-06, "loss": 0.7679, "step": 7422 }, { "epoch": 1.8804306523115897, "grad_norm": 3.8376998901367188, "learning_rate": 7.36335436595638e-06, "loss": 0.7612, "step": 7423 }, { "epoch": 1.88068397720076, "grad_norm": 3.4499313831329346, "learning_rate": 7.362615988930442e-06, "loss": 0.6903, "step": 7424 }, { "epoch": 1.8809373020899303, "grad_norm": 3.5727550983428955, "learning_rate": 7.3618775455633565e-06, "loss": 0.7162, "step": 7425 }, { "epoch": 1.8811906269791008, "grad_norm": 3.6529769897460938, "learning_rate": 7.3611390358758574e-06, "loss": 0.7865, "step": 7426 }, { "epoch": 1.881443951868271, "grad_norm": 3.797576665878296, "learning_rate": 7.360400459888678e-06, "loss": 0.8227, "step": 7427 }, { "epoch": 1.8816972767574414, "grad_norm": 3.2741506099700928, "learning_rate": 7.359661817622561e-06, "loss": 0.7678, "step": 7428 }, { "epoch": 1.8819506016466119, "grad_norm": 3.7047836780548096, "learning_rate": 7.358923109098246e-06, "loss": 0.792, "step": 7429 }, { "epoch": 1.8822039265357822, "grad_norm": 3.4118900299072266, "learning_rate": 7.358184334336476e-06, "loss": 0.7368, "step": 7430 }, { "epoch": 1.8824572514249525, "grad_norm": 3.859285831451416, "learning_rate": 7.357445493357995e-06, "loss": 0.7663, "step": 7431 }, { "epoch": 1.882710576314123, "grad_norm": 3.458205223083496, "learning_rate": 7.35670658618355e-06, "loss": 0.7991, "step": 7432 }, { "epoch": 1.8829639012032933, "grad_norm": 3.5803966522216797, "learning_rate": 7.355967612833887e-06, "loss": 0.7658, "step": 7433 }, { "epoch": 1.8832172260924636, "grad_norm": 3.285926342010498, "learning_rate": 7.355228573329759e-06, "loss": 0.6576, "step": 7434 }, { "epoch": 1.883470550981634, "grad_norm": 3.422349691390991, "learning_rate": 7.3544894676919155e-06, "loss": 0.7696, "step": 7435 }, { "epoch": 1.8837238758708041, "grad_norm": 3.667968988418579, "learning_rate": 7.353750295941113e-06, "loss": 0.7463, "step": 7436 }, { "epoch": 1.8839772007599747, "grad_norm": 3.90153431892395, "learning_rate": 7.353011058098104e-06, "loss": 0.8393, "step": 7437 }, { "epoch": 1.8842305256491452, "grad_norm": 3.7004783153533936, "learning_rate": 7.3522717541836475e-06, "loss": 0.7922, "step": 7438 }, { "epoch": 1.8844838505383152, "grad_norm": 3.6125683784484863, "learning_rate": 7.351532384218503e-06, "loss": 0.7269, "step": 7439 }, { "epoch": 1.8847371754274858, "grad_norm": 3.304962396621704, "learning_rate": 7.35079294822343e-06, "loss": 0.8017, "step": 7440 }, { "epoch": 1.8849905003166563, "grad_norm": 3.801954746246338, "learning_rate": 7.350053446219194e-06, "loss": 0.7969, "step": 7441 }, { "epoch": 1.8852438252058263, "grad_norm": 3.468815803527832, "learning_rate": 7.349313878226558e-06, "loss": 0.6211, "step": 7442 }, { "epoch": 1.8854971500949969, "grad_norm": 3.8287646770477295, "learning_rate": 7.348574244266289e-06, "loss": 0.9248, "step": 7443 }, { "epoch": 1.8857504749841671, "grad_norm": 3.3916845321655273, "learning_rate": 7.347834544359157e-06, "loss": 0.7739, "step": 7444 }, { "epoch": 1.8860037998733374, "grad_norm": 3.608035087585449, "learning_rate": 7.34709477852593e-06, "loss": 0.6815, "step": 7445 }, { "epoch": 1.886257124762508, "grad_norm": 3.456505060195923, "learning_rate": 7.346354946787384e-06, "loss": 0.756, "step": 7446 }, { "epoch": 1.8865104496516782, "grad_norm": 3.5758090019226074, "learning_rate": 7.34561504916429e-06, "loss": 0.883, "step": 7447 }, { "epoch": 1.8867637745408485, "grad_norm": 4.007209777832031, "learning_rate": 7.344875085677423e-06, "loss": 0.8203, "step": 7448 }, { "epoch": 1.887017099430019, "grad_norm": 4.382864475250244, "learning_rate": 7.3441350563475645e-06, "loss": 0.9269, "step": 7449 }, { "epoch": 1.8872704243191893, "grad_norm": 4.218381404876709, "learning_rate": 7.3433949611954915e-06, "loss": 0.7211, "step": 7450 }, { "epoch": 1.8875237492083596, "grad_norm": 4.290537357330322, "learning_rate": 7.342654800241986e-06, "loss": 0.8025, "step": 7451 }, { "epoch": 1.8877770740975301, "grad_norm": 3.8483150005340576, "learning_rate": 7.341914573507832e-06, "loss": 0.8395, "step": 7452 }, { "epoch": 1.8880303989867004, "grad_norm": 3.528327465057373, "learning_rate": 7.3411742810138146e-06, "loss": 0.7301, "step": 7453 }, { "epoch": 1.8882837238758707, "grad_norm": 3.4256582260131836, "learning_rate": 7.3404339227807205e-06, "loss": 0.7408, "step": 7454 }, { "epoch": 1.8885370487650412, "grad_norm": 3.589458465576172, "learning_rate": 7.3396934988293386e-06, "loss": 0.8343, "step": 7455 }, { "epoch": 1.8887903736542115, "grad_norm": 3.743173360824585, "learning_rate": 7.338953009180459e-06, "loss": 0.8369, "step": 7456 }, { "epoch": 1.8890436985433818, "grad_norm": 3.327045440673828, "learning_rate": 7.338212453854876e-06, "loss": 0.7343, "step": 7457 }, { "epoch": 1.8892970234325523, "grad_norm": 3.6900317668914795, "learning_rate": 7.337471832873383e-06, "loss": 0.8128, "step": 7458 }, { "epoch": 1.8895503483217226, "grad_norm": 3.558347225189209, "learning_rate": 7.336731146256777e-06, "loss": 0.8122, "step": 7459 }, { "epoch": 1.889803673210893, "grad_norm": 3.3445587158203125, "learning_rate": 7.335990394025856e-06, "loss": 0.7065, "step": 7460 }, { "epoch": 1.8900569981000634, "grad_norm": 3.516998767852783, "learning_rate": 7.3352495762014184e-06, "loss": 0.7819, "step": 7461 }, { "epoch": 1.8903103229892337, "grad_norm": 3.9598820209503174, "learning_rate": 7.3345086928042675e-06, "loss": 0.8612, "step": 7462 }, { "epoch": 1.890563647878404, "grad_norm": 3.2197012901306152, "learning_rate": 7.333767743855207e-06, "loss": 0.6761, "step": 7463 }, { "epoch": 1.8908169727675745, "grad_norm": 3.510225772857666, "learning_rate": 7.333026729375041e-06, "loss": 0.7569, "step": 7464 }, { "epoch": 1.8910702976567446, "grad_norm": 3.2959768772125244, "learning_rate": 7.332285649384578e-06, "loss": 0.6855, "step": 7465 }, { "epoch": 1.8913236225459151, "grad_norm": 3.7513856887817383, "learning_rate": 7.331544503904629e-06, "loss": 0.8205, "step": 7466 }, { "epoch": 1.8915769474350856, "grad_norm": 4.012969493865967, "learning_rate": 7.330803292956e-06, "loss": 0.7834, "step": 7467 }, { "epoch": 1.8918302723242557, "grad_norm": 4.250128269195557, "learning_rate": 7.330062016559509e-06, "loss": 0.8426, "step": 7468 }, { "epoch": 1.8920835972134262, "grad_norm": 3.455901861190796, "learning_rate": 7.329320674735968e-06, "loss": 0.8845, "step": 7469 }, { "epoch": 1.8923369221025967, "grad_norm": 3.4566681385040283, "learning_rate": 7.328579267506195e-06, "loss": 0.7361, "step": 7470 }, { "epoch": 1.8925902469917668, "grad_norm": 3.7052054405212402, "learning_rate": 7.3278377948910076e-06, "loss": 0.7984, "step": 7471 }, { "epoch": 1.8928435718809373, "grad_norm": 4.076179027557373, "learning_rate": 7.327096256911225e-06, "loss": 0.7878, "step": 7472 }, { "epoch": 1.8930968967701076, "grad_norm": 3.4270684719085693, "learning_rate": 7.326354653587669e-06, "loss": 0.75, "step": 7473 }, { "epoch": 1.893350221659278, "grad_norm": 3.6080360412597656, "learning_rate": 7.325612984941167e-06, "loss": 0.8201, "step": 7474 }, { "epoch": 1.8936035465484484, "grad_norm": 3.11811900138855, "learning_rate": 7.324871250992543e-06, "loss": 0.6514, "step": 7475 }, { "epoch": 1.8938568714376187, "grad_norm": 3.600656747817993, "learning_rate": 7.324129451762622e-06, "loss": 0.8541, "step": 7476 }, { "epoch": 1.894110196326789, "grad_norm": 3.697023391723633, "learning_rate": 7.323387587272235e-06, "loss": 0.8515, "step": 7477 }, { "epoch": 1.8943635212159595, "grad_norm": 3.2946245670318604, "learning_rate": 7.322645657542214e-06, "loss": 0.7835, "step": 7478 }, { "epoch": 1.8946168461051298, "grad_norm": 3.841540575027466, "learning_rate": 7.321903662593391e-06, "loss": 0.8945, "step": 7479 }, { "epoch": 1.8948701709943, "grad_norm": 4.031865119934082, "learning_rate": 7.321161602446601e-06, "loss": 0.9294, "step": 7480 }, { "epoch": 1.8951234958834706, "grad_norm": 3.5384581089019775, "learning_rate": 7.320419477122682e-06, "loss": 0.698, "step": 7481 }, { "epoch": 1.895376820772641, "grad_norm": 4.3465895652771, "learning_rate": 7.319677286642472e-06, "loss": 0.8124, "step": 7482 }, { "epoch": 1.8956301456618112, "grad_norm": 3.62138295173645, "learning_rate": 7.31893503102681e-06, "loss": 0.8068, "step": 7483 }, { "epoch": 1.8958834705509817, "grad_norm": 3.597529888153076, "learning_rate": 7.31819271029654e-06, "loss": 0.7252, "step": 7484 }, { "epoch": 1.896136795440152, "grad_norm": 3.8947126865386963, "learning_rate": 7.317450324472506e-06, "loss": 0.6933, "step": 7485 }, { "epoch": 1.8963901203293223, "grad_norm": 4.336637496948242, "learning_rate": 7.316707873575551e-06, "loss": 0.8949, "step": 7486 }, { "epoch": 1.8966434452184928, "grad_norm": 3.8488662242889404, "learning_rate": 7.315965357626527e-06, "loss": 0.8458, "step": 7487 }, { "epoch": 1.896896770107663, "grad_norm": 3.8070075511932373, "learning_rate": 7.315222776646279e-06, "loss": 0.7341, "step": 7488 }, { "epoch": 1.8971500949968334, "grad_norm": 3.8892059326171875, "learning_rate": 7.314480130655661e-06, "loss": 0.8279, "step": 7489 }, { "epoch": 1.897403419886004, "grad_norm": 4.109464645385742, "learning_rate": 7.313737419675526e-06, "loss": 0.7159, "step": 7490 }, { "epoch": 1.8976567447751742, "grad_norm": 3.9447784423828125, "learning_rate": 7.312994643726728e-06, "loss": 0.8137, "step": 7491 }, { "epoch": 1.8979100696643445, "grad_norm": 3.6503822803497314, "learning_rate": 7.312251802830126e-06, "loss": 0.7035, "step": 7492 }, { "epoch": 1.898163394553515, "grad_norm": 3.267232894897461, "learning_rate": 7.311508897006576e-06, "loss": 0.655, "step": 7493 }, { "epoch": 1.898416719442685, "grad_norm": 3.844186544418335, "learning_rate": 7.310765926276939e-06, "loss": 0.8321, "step": 7494 }, { "epoch": 1.8986700443318556, "grad_norm": 3.721618175506592, "learning_rate": 7.310022890662079e-06, "loss": 0.8112, "step": 7495 }, { "epoch": 1.8989233692210261, "grad_norm": 3.928321123123169, "learning_rate": 7.309279790182859e-06, "loss": 0.7592, "step": 7496 }, { "epoch": 1.8991766941101962, "grad_norm": 3.996778726577759, "learning_rate": 7.3085366248601445e-06, "loss": 0.8084, "step": 7497 }, { "epoch": 1.8994300189993667, "grad_norm": 3.359065055847168, "learning_rate": 7.307793394714803e-06, "loss": 0.688, "step": 7498 }, { "epoch": 1.8996833438885372, "grad_norm": 3.5219783782958984, "learning_rate": 7.307050099767704e-06, "loss": 0.7543, "step": 7499 }, { "epoch": 1.8999366687777073, "grad_norm": 3.678074359893799, "learning_rate": 7.306306740039722e-06, "loss": 0.9287, "step": 7500 }, { "epoch": 1.8999366687777073, "eval_loss": 1.1349515914916992, "eval_runtime": 14.2346, "eval_samples_per_second": 28.101, "eval_steps_per_second": 3.513, "step": 7500 }, { "epoch": 1.9001899936668778, "grad_norm": 3.832315683364868, "learning_rate": 7.305563315551725e-06, "loss": 0.8444, "step": 7501 }, { "epoch": 1.900443318556048, "grad_norm": 3.518361806869507, "learning_rate": 7.304819826324592e-06, "loss": 0.7014, "step": 7502 }, { "epoch": 1.9006966434452184, "grad_norm": 3.5828850269317627, "learning_rate": 7.3040762723791984e-06, "loss": 0.84, "step": 7503 }, { "epoch": 1.900949968334389, "grad_norm": 3.6892452239990234, "learning_rate": 7.303332653736421e-06, "loss": 0.7193, "step": 7504 }, { "epoch": 1.9012032932235592, "grad_norm": 3.7808103561401367, "learning_rate": 7.302588970417145e-06, "loss": 0.679, "step": 7505 }, { "epoch": 1.9014566181127295, "grad_norm": 3.564419984817505, "learning_rate": 7.301845222442248e-06, "loss": 0.7407, "step": 7506 }, { "epoch": 1.9017099430019, "grad_norm": 4.022425174713135, "learning_rate": 7.301101409832617e-06, "loss": 0.8077, "step": 7507 }, { "epoch": 1.9019632678910703, "grad_norm": 3.8452892303466797, "learning_rate": 7.300357532609137e-06, "loss": 0.8032, "step": 7508 }, { "epoch": 1.9022165927802406, "grad_norm": 3.6932547092437744, "learning_rate": 7.299613590792695e-06, "loss": 0.6808, "step": 7509 }, { "epoch": 1.902469917669411, "grad_norm": 3.6755733489990234, "learning_rate": 7.2988695844041816e-06, "loss": 0.8113, "step": 7510 }, { "epoch": 1.9027232425585814, "grad_norm": 3.6966922283172607, "learning_rate": 7.298125513464487e-06, "loss": 0.808, "step": 7511 }, { "epoch": 1.9029765674477517, "grad_norm": 3.531323194503784, "learning_rate": 7.297381377994506e-06, "loss": 0.7983, "step": 7512 }, { "epoch": 1.9032298923369222, "grad_norm": 3.661247730255127, "learning_rate": 7.296637178015131e-06, "loss": 0.7768, "step": 7513 }, { "epoch": 1.9034832172260925, "grad_norm": 4.042456150054932, "learning_rate": 7.295892913547264e-06, "loss": 0.787, "step": 7514 }, { "epoch": 1.9037365421152628, "grad_norm": 3.56840443611145, "learning_rate": 7.295148584611796e-06, "loss": 0.7819, "step": 7515 }, { "epoch": 1.9039898670044333, "grad_norm": 3.881992816925049, "learning_rate": 7.294404191229633e-06, "loss": 0.7504, "step": 7516 }, { "epoch": 1.9042431918936036, "grad_norm": 4.397042274475098, "learning_rate": 7.2936597334216755e-06, "loss": 0.8692, "step": 7517 }, { "epoch": 1.9044965167827739, "grad_norm": 3.9434621334075928, "learning_rate": 7.292915211208828e-06, "loss": 0.7889, "step": 7518 }, { "epoch": 1.9047498416719444, "grad_norm": 3.710800886154175, "learning_rate": 7.292170624611996e-06, "loss": 0.802, "step": 7519 }, { "epoch": 1.9050031665611147, "grad_norm": 3.4779880046844482, "learning_rate": 7.291425973652087e-06, "loss": 0.6578, "step": 7520 }, { "epoch": 1.905256491450285, "grad_norm": 4.214772701263428, "learning_rate": 7.290681258350011e-06, "loss": 0.8795, "step": 7521 }, { "epoch": 1.9055098163394555, "grad_norm": 3.4735662937164307, "learning_rate": 7.289936478726678e-06, "loss": 0.6979, "step": 7522 }, { "epoch": 1.9057631412286256, "grad_norm": 3.842702627182007, "learning_rate": 7.289191634803002e-06, "loss": 0.7598, "step": 7523 }, { "epoch": 1.906016466117796, "grad_norm": 4.186038970947266, "learning_rate": 7.288446726599899e-06, "loss": 0.8704, "step": 7524 }, { "epoch": 1.9062697910069666, "grad_norm": 4.115698337554932, "learning_rate": 7.287701754138283e-06, "loss": 0.7708, "step": 7525 }, { "epoch": 1.9065231158961367, "grad_norm": 3.6517629623413086, "learning_rate": 7.286956717439075e-06, "loss": 0.892, "step": 7526 }, { "epoch": 1.9067764407853072, "grad_norm": 4.028857231140137, "learning_rate": 7.286211616523193e-06, "loss": 0.8999, "step": 7527 }, { "epoch": 1.9070297656744777, "grad_norm": 3.448723554611206, "learning_rate": 7.285466451411562e-06, "loss": 0.7555, "step": 7528 }, { "epoch": 1.9072830905636478, "grad_norm": 3.609137535095215, "learning_rate": 7.2847212221251025e-06, "loss": 0.8393, "step": 7529 }, { "epoch": 1.9075364154528183, "grad_norm": 3.8167686462402344, "learning_rate": 7.283975928684743e-06, "loss": 0.8018, "step": 7530 }, { "epoch": 1.9077897403419886, "grad_norm": 3.806180715560913, "learning_rate": 7.2832305711114094e-06, "loss": 0.7599, "step": 7531 }, { "epoch": 1.9080430652311589, "grad_norm": 3.578470468521118, "learning_rate": 7.282485149426031e-06, "loss": 0.7468, "step": 7532 }, { "epoch": 1.9082963901203294, "grad_norm": 3.534583806991577, "learning_rate": 7.281739663649541e-06, "loss": 0.7641, "step": 7533 }, { "epoch": 1.9085497150094997, "grad_norm": 3.559796094894409, "learning_rate": 7.28099411380287e-06, "loss": 0.7771, "step": 7534 }, { "epoch": 1.90880303989867, "grad_norm": 3.4391186237335205, "learning_rate": 7.280248499906952e-06, "loss": 0.8154, "step": 7535 }, { "epoch": 1.9090563647878405, "grad_norm": 4.066721439361572, "learning_rate": 7.279502821982725e-06, "loss": 0.9031, "step": 7536 }, { "epoch": 1.9093096896770108, "grad_norm": 4.040509223937988, "learning_rate": 7.2787570800511284e-06, "loss": 0.7016, "step": 7537 }, { "epoch": 1.909563014566181, "grad_norm": 3.6556410789489746, "learning_rate": 7.278011274133101e-06, "loss": 0.7134, "step": 7538 }, { "epoch": 1.9098163394553516, "grad_norm": 3.554013967514038, "learning_rate": 7.277265404249585e-06, "loss": 0.7547, "step": 7539 }, { "epoch": 1.9100696643445219, "grad_norm": 3.970280408859253, "learning_rate": 7.276519470421521e-06, "loss": 0.8431, "step": 7540 }, { "epoch": 1.9103229892336921, "grad_norm": 3.8135056495666504, "learning_rate": 7.275773472669859e-06, "loss": 0.8514, "step": 7541 }, { "epoch": 1.9105763141228627, "grad_norm": 3.77400541305542, "learning_rate": 7.275027411015545e-06, "loss": 0.7811, "step": 7542 }, { "epoch": 1.910829639012033, "grad_norm": 4.023411750793457, "learning_rate": 7.274281285479527e-06, "loss": 0.791, "step": 7543 }, { "epoch": 1.9110829639012032, "grad_norm": 3.69769549369812, "learning_rate": 7.273535096082758e-06, "loss": 0.9101, "step": 7544 }, { "epoch": 1.9113362887903738, "grad_norm": 3.17036509513855, "learning_rate": 7.272788842846187e-06, "loss": 0.6548, "step": 7545 }, { "epoch": 1.911589613679544, "grad_norm": 3.6044716835021973, "learning_rate": 7.272042525790771e-06, "loss": 0.8813, "step": 7546 }, { "epoch": 1.9118429385687143, "grad_norm": 3.249027729034424, "learning_rate": 7.271296144937465e-06, "loss": 0.7574, "step": 7547 }, { "epoch": 1.9120962634578849, "grad_norm": 3.897709369659424, "learning_rate": 7.2705497003072286e-06, "loss": 0.8049, "step": 7548 }, { "epoch": 1.9123495883470552, "grad_norm": 4.343056678771973, "learning_rate": 7.269803191921022e-06, "loss": 0.9261, "step": 7549 }, { "epoch": 1.9126029132362254, "grad_norm": 3.8631136417388916, "learning_rate": 7.269056619799805e-06, "loss": 0.8151, "step": 7550 }, { "epoch": 1.912856238125396, "grad_norm": 3.8449199199676514, "learning_rate": 7.26830998396454e-06, "loss": 0.7861, "step": 7551 }, { "epoch": 1.913109563014566, "grad_norm": 4.171482086181641, "learning_rate": 7.267563284436194e-06, "loss": 0.8838, "step": 7552 }, { "epoch": 1.9133628879037365, "grad_norm": 4.200960159301758, "learning_rate": 7.266816521235736e-06, "loss": 0.7035, "step": 7553 }, { "epoch": 1.913616212792907, "grad_norm": 3.545377254486084, "learning_rate": 7.2660696943841304e-06, "loss": 0.7542, "step": 7554 }, { "epoch": 1.9138695376820771, "grad_norm": 3.7115321159362793, "learning_rate": 7.26532280390235e-06, "loss": 0.77, "step": 7555 }, { "epoch": 1.9141228625712476, "grad_norm": 3.9595353603363037, "learning_rate": 7.264575849811368e-06, "loss": 0.87, "step": 7556 }, { "epoch": 1.914376187460418, "grad_norm": 3.5592095851898193, "learning_rate": 7.263828832132156e-06, "loss": 0.7631, "step": 7557 }, { "epoch": 1.9146295123495882, "grad_norm": 3.449580192565918, "learning_rate": 7.2630817508856924e-06, "loss": 0.7732, "step": 7558 }, { "epoch": 1.9148828372387587, "grad_norm": 3.6107394695281982, "learning_rate": 7.262334606092954e-06, "loss": 0.664, "step": 7559 }, { "epoch": 1.915136162127929, "grad_norm": 3.481963634490967, "learning_rate": 7.26158739777492e-06, "loss": 0.7425, "step": 7560 }, { "epoch": 1.9153894870170993, "grad_norm": 3.831488847732544, "learning_rate": 7.2608401259525705e-06, "loss": 0.7904, "step": 7561 }, { "epoch": 1.9156428119062698, "grad_norm": 3.188628673553467, "learning_rate": 7.260092790646891e-06, "loss": 0.6817, "step": 7562 }, { "epoch": 1.9158961367954401, "grad_norm": 3.5356452465057373, "learning_rate": 7.259345391878866e-06, "loss": 0.8112, "step": 7563 }, { "epoch": 1.9161494616846104, "grad_norm": 3.9766602516174316, "learning_rate": 7.258597929669481e-06, "loss": 0.8041, "step": 7564 }, { "epoch": 1.916402786573781, "grad_norm": 3.374429225921631, "learning_rate": 7.257850404039724e-06, "loss": 0.7427, "step": 7565 }, { "epoch": 1.9166561114629512, "grad_norm": 4.160411834716797, "learning_rate": 7.257102815010585e-06, "loss": 0.9573, "step": 7566 }, { "epoch": 1.9169094363521215, "grad_norm": 3.426292657852173, "learning_rate": 7.256355162603058e-06, "loss": 0.6885, "step": 7567 }, { "epoch": 1.917162761241292, "grad_norm": 3.8988962173461914, "learning_rate": 7.255607446838136e-06, "loss": 0.9027, "step": 7568 }, { "epoch": 1.9174160861304623, "grad_norm": 3.6588001251220703, "learning_rate": 7.254859667736813e-06, "loss": 0.731, "step": 7569 }, { "epoch": 1.9176694110196326, "grad_norm": 3.37235689163208, "learning_rate": 7.254111825320088e-06, "loss": 0.8461, "step": 7570 }, { "epoch": 1.9179227359088031, "grad_norm": 3.7164995670318604, "learning_rate": 7.25336391960896e-06, "loss": 0.6265, "step": 7571 }, { "epoch": 1.9181760607979734, "grad_norm": 3.629835605621338, "learning_rate": 7.252615950624428e-06, "loss": 0.814, "step": 7572 }, { "epoch": 1.9184293856871437, "grad_norm": 3.7369308471679688, "learning_rate": 7.251867918387496e-06, "loss": 0.7857, "step": 7573 }, { "epoch": 1.9186827105763142, "grad_norm": 3.5513267517089844, "learning_rate": 7.25111982291917e-06, "loss": 0.7524, "step": 7574 }, { "epoch": 1.9189360354654845, "grad_norm": 3.8231332302093506, "learning_rate": 7.250371664240452e-06, "loss": 0.6639, "step": 7575 }, { "epoch": 1.9191893603546548, "grad_norm": 3.890836477279663, "learning_rate": 7.249623442372353e-06, "loss": 0.8387, "step": 7576 }, { "epoch": 1.9194426852438253, "grad_norm": 3.755363702774048, "learning_rate": 7.248875157335883e-06, "loss": 0.7449, "step": 7577 }, { "epoch": 1.9196960101329954, "grad_norm": 3.4669764041900635, "learning_rate": 7.248126809152052e-06, "loss": 0.7403, "step": 7578 }, { "epoch": 1.919949335022166, "grad_norm": 4.011867523193359, "learning_rate": 7.247378397841873e-06, "loss": 0.7917, "step": 7579 }, { "epoch": 1.9202026599113364, "grad_norm": 3.4292800426483154, "learning_rate": 7.246629923426363e-06, "loss": 0.8322, "step": 7580 }, { "epoch": 1.9204559848005065, "grad_norm": 3.6036596298217773, "learning_rate": 7.245881385926537e-06, "loss": 0.7647, "step": 7581 }, { "epoch": 1.920709309689677, "grad_norm": 3.850926160812378, "learning_rate": 7.2451327853634145e-06, "loss": 0.7483, "step": 7582 }, { "epoch": 1.9209626345788475, "grad_norm": 3.4227712154388428, "learning_rate": 7.2443841217580165e-06, "loss": 0.7001, "step": 7583 }, { "epoch": 1.9212159594680176, "grad_norm": 3.340449094772339, "learning_rate": 7.243635395131364e-06, "loss": 0.6489, "step": 7584 }, { "epoch": 1.9214692843571881, "grad_norm": 3.679422378540039, "learning_rate": 7.242886605504481e-06, "loss": 0.7541, "step": 7585 }, { "epoch": 1.9217226092463584, "grad_norm": 3.835430383682251, "learning_rate": 7.242137752898393e-06, "loss": 0.8837, "step": 7586 }, { "epoch": 1.9219759341355287, "grad_norm": 3.531416654586792, "learning_rate": 7.241388837334126e-06, "loss": 0.7448, "step": 7587 }, { "epoch": 1.9222292590246992, "grad_norm": 4.176506042480469, "learning_rate": 7.240639858832716e-06, "loss": 0.8177, "step": 7588 }, { "epoch": 1.9224825839138695, "grad_norm": 3.8392982482910156, "learning_rate": 7.239890817415185e-06, "loss": 0.8242, "step": 7589 }, { "epoch": 1.9227359088030398, "grad_norm": 4.653436183929443, "learning_rate": 7.239141713102569e-06, "loss": 1.0268, "step": 7590 }, { "epoch": 1.9229892336922103, "grad_norm": 3.7734992504119873, "learning_rate": 7.238392545915905e-06, "loss": 0.6918, "step": 7591 }, { "epoch": 1.9232425585813806, "grad_norm": 3.7001616954803467, "learning_rate": 7.2376433158762264e-06, "loss": 0.856, "step": 7592 }, { "epoch": 1.923495883470551, "grad_norm": 3.344609498977661, "learning_rate": 7.2368940230045724e-06, "loss": 0.7274, "step": 7593 }, { "epoch": 1.9237492083597214, "grad_norm": 3.9480466842651367, "learning_rate": 7.236144667321982e-06, "loss": 0.9505, "step": 7594 }, { "epoch": 1.9240025332488917, "grad_norm": 3.309288740158081, "learning_rate": 7.2353952488494994e-06, "loss": 0.7103, "step": 7595 }, { "epoch": 1.924255858138062, "grad_norm": 3.4350290298461914, "learning_rate": 7.2346457676081636e-06, "loss": 0.6724, "step": 7596 }, { "epoch": 1.9245091830272325, "grad_norm": 3.8128178119659424, "learning_rate": 7.233896223619023e-06, "loss": 0.8788, "step": 7597 }, { "epoch": 1.9247625079164028, "grad_norm": 3.7690844535827637, "learning_rate": 7.233146616903125e-06, "loss": 0.7628, "step": 7598 }, { "epoch": 1.925015832805573, "grad_norm": 3.6346118450164795, "learning_rate": 7.232396947481515e-06, "loss": 0.7439, "step": 7599 }, { "epoch": 1.9252691576947436, "grad_norm": 3.893569231033325, "learning_rate": 7.231647215375245e-06, "loss": 0.7907, "step": 7600 }, { "epoch": 1.925522482583914, "grad_norm": 3.619025945663452, "learning_rate": 7.230897420605367e-06, "loss": 0.7784, "step": 7601 }, { "epoch": 1.9257758074730842, "grad_norm": 4.2273101806640625, "learning_rate": 7.230147563192935e-06, "loss": 0.9102, "step": 7602 }, { "epoch": 1.9260291323622547, "grad_norm": 3.5847582817077637, "learning_rate": 7.229397643159005e-06, "loss": 0.7227, "step": 7603 }, { "epoch": 1.926282457251425, "grad_norm": 3.5376057624816895, "learning_rate": 7.228647660524634e-06, "loss": 0.8489, "step": 7604 }, { "epoch": 1.9265357821405953, "grad_norm": 3.898679733276367, "learning_rate": 7.227897615310881e-06, "loss": 0.8561, "step": 7605 }, { "epoch": 1.9267891070297658, "grad_norm": 3.8402748107910156, "learning_rate": 7.227147507538806e-06, "loss": 0.6799, "step": 7606 }, { "epoch": 1.9270424319189359, "grad_norm": 3.4418163299560547, "learning_rate": 7.226397337229475e-06, "loss": 0.7967, "step": 7607 }, { "epoch": 1.9272957568081064, "grad_norm": 3.647125720977783, "learning_rate": 7.22564710440395e-06, "loss": 0.7052, "step": 7608 }, { "epoch": 1.927549081697277, "grad_norm": 3.678846836090088, "learning_rate": 7.224896809083297e-06, "loss": 0.8326, "step": 7609 }, { "epoch": 1.927802406586447, "grad_norm": 3.386265277862549, "learning_rate": 7.2241464512885825e-06, "loss": 0.6196, "step": 7610 }, { "epoch": 1.9280557314756175, "grad_norm": 3.275310754776001, "learning_rate": 7.22339603104088e-06, "loss": 0.7026, "step": 7611 }, { "epoch": 1.928309056364788, "grad_norm": 3.610837697982788, "learning_rate": 7.222645548361259e-06, "loss": 0.6893, "step": 7612 }, { "epoch": 1.928562381253958, "grad_norm": 4.1723761558532715, "learning_rate": 7.221895003270794e-06, "loss": 0.859, "step": 7613 }, { "epoch": 1.9288157061431286, "grad_norm": 3.472421884536743, "learning_rate": 7.221144395790556e-06, "loss": 0.7079, "step": 7614 }, { "epoch": 1.9290690310322989, "grad_norm": 3.805896520614624, "learning_rate": 7.220393725941625e-06, "loss": 0.7836, "step": 7615 }, { "epoch": 1.9293223559214692, "grad_norm": 3.836207389831543, "learning_rate": 7.219642993745079e-06, "loss": 0.8581, "step": 7616 }, { "epoch": 1.9295756808106397, "grad_norm": 4.602386474609375, "learning_rate": 7.218892199221997e-06, "loss": 0.7302, "step": 7617 }, { "epoch": 1.92982900569981, "grad_norm": 3.5737602710723877, "learning_rate": 7.218141342393464e-06, "loss": 0.8373, "step": 7618 }, { "epoch": 1.9300823305889803, "grad_norm": 3.311408042907715, "learning_rate": 7.217390423280561e-06, "loss": 0.7424, "step": 7619 }, { "epoch": 1.9303356554781508, "grad_norm": 4.090540885925293, "learning_rate": 7.216639441904374e-06, "loss": 1.0196, "step": 7620 }, { "epoch": 1.930588980367321, "grad_norm": 3.4615042209625244, "learning_rate": 7.215888398285991e-06, "loss": 0.7255, "step": 7621 }, { "epoch": 1.9308423052564914, "grad_norm": 3.5449633598327637, "learning_rate": 7.215137292446499e-06, "loss": 0.7636, "step": 7622 }, { "epoch": 1.9310956301456619, "grad_norm": 3.802718162536621, "learning_rate": 7.214386124406992e-06, "loss": 0.6599, "step": 7623 }, { "epoch": 1.9313489550348322, "grad_norm": 3.457990884780884, "learning_rate": 7.213634894188559e-06, "loss": 0.7614, "step": 7624 }, { "epoch": 1.9316022799240025, "grad_norm": 3.6755120754241943, "learning_rate": 7.212883601812296e-06, "loss": 0.8009, "step": 7625 }, { "epoch": 1.931855604813173, "grad_norm": 3.7048819065093994, "learning_rate": 7.212132247299298e-06, "loss": 0.8258, "step": 7626 }, { "epoch": 1.9321089297023433, "grad_norm": 3.331568479537964, "learning_rate": 7.211380830670663e-06, "loss": 0.7657, "step": 7627 }, { "epoch": 1.9323622545915136, "grad_norm": 3.7705137729644775, "learning_rate": 7.210629351947491e-06, "loss": 0.7385, "step": 7628 }, { "epoch": 1.932615579480684, "grad_norm": 4.362046241760254, "learning_rate": 7.209877811150884e-06, "loss": 0.9805, "step": 7629 }, { "epoch": 1.9328689043698544, "grad_norm": 3.570105791091919, "learning_rate": 7.209126208301943e-06, "loss": 0.7812, "step": 7630 }, { "epoch": 1.9331222292590247, "grad_norm": 3.9807088375091553, "learning_rate": 7.208374543421773e-06, "loss": 0.7976, "step": 7631 }, { "epoch": 1.9333755541481952, "grad_norm": 3.6694552898406982, "learning_rate": 7.2076228165314835e-06, "loss": 0.8222, "step": 7632 }, { "epoch": 1.9336288790373655, "grad_norm": 3.9893441200256348, "learning_rate": 7.206871027652177e-06, "loss": 0.8547, "step": 7633 }, { "epoch": 1.9338822039265358, "grad_norm": 3.9237453937530518, "learning_rate": 7.206119176804966e-06, "loss": 0.8376, "step": 7634 }, { "epoch": 1.9341355288157063, "grad_norm": 3.7758641242980957, "learning_rate": 7.205367264010965e-06, "loss": 0.7581, "step": 7635 }, { "epoch": 1.9343888537048763, "grad_norm": 3.6023716926574707, "learning_rate": 7.204615289291283e-06, "loss": 0.777, "step": 7636 }, { "epoch": 1.9346421785940469, "grad_norm": 3.6123085021972656, "learning_rate": 7.203863252667038e-06, "loss": 0.6647, "step": 7637 }, { "epoch": 1.9348955034832174, "grad_norm": 4.211479187011719, "learning_rate": 7.203111154159346e-06, "loss": 0.8433, "step": 7638 }, { "epoch": 1.9351488283723874, "grad_norm": 3.6146914958953857, "learning_rate": 7.202358993789323e-06, "loss": 0.6477, "step": 7639 }, { "epoch": 1.935402153261558, "grad_norm": 3.787353277206421, "learning_rate": 7.201606771578092e-06, "loss": 0.7655, "step": 7640 }, { "epoch": 1.9356554781507285, "grad_norm": 3.9961917400360107, "learning_rate": 7.200854487546776e-06, "loss": 0.8435, "step": 7641 }, { "epoch": 1.9359088030398985, "grad_norm": 3.7698097229003906, "learning_rate": 7.200102141716498e-06, "loss": 0.6617, "step": 7642 }, { "epoch": 1.936162127929069, "grad_norm": 3.973029613494873, "learning_rate": 7.199349734108383e-06, "loss": 0.8296, "step": 7643 }, { "epoch": 1.9364154528182393, "grad_norm": 3.907489538192749, "learning_rate": 7.198597264743558e-06, "loss": 0.9128, "step": 7644 }, { "epoch": 1.9366687777074096, "grad_norm": 3.565837860107422, "learning_rate": 7.197844733643152e-06, "loss": 0.7948, "step": 7645 }, { "epoch": 1.9369221025965802, "grad_norm": 3.805122137069702, "learning_rate": 7.197092140828295e-06, "loss": 0.8112, "step": 7646 }, { "epoch": 1.9371754274857504, "grad_norm": 3.4625070095062256, "learning_rate": 7.196339486320122e-06, "loss": 0.8329, "step": 7647 }, { "epoch": 1.9374287523749207, "grad_norm": 4.034064292907715, "learning_rate": 7.195586770139764e-06, "loss": 0.681, "step": 7648 }, { "epoch": 1.9376820772640913, "grad_norm": 3.428379535675049, "learning_rate": 7.1948339923083596e-06, "loss": 0.7458, "step": 7649 }, { "epoch": 1.9379354021532615, "grad_norm": 3.5189270973205566, "learning_rate": 7.194081152847045e-06, "loss": 0.956, "step": 7650 }, { "epoch": 1.9381887270424318, "grad_norm": 3.7598392963409424, "learning_rate": 7.19332825177696e-06, "loss": 0.6577, "step": 7651 }, { "epoch": 1.9384420519316024, "grad_norm": 4.254335880279541, "learning_rate": 7.192575289119246e-06, "loss": 0.8169, "step": 7652 }, { "epoch": 1.9386953768207726, "grad_norm": 3.6726346015930176, "learning_rate": 7.191822264895045e-06, "loss": 0.7635, "step": 7653 }, { "epoch": 1.938948701709943, "grad_norm": 3.438404083251953, "learning_rate": 7.191069179125501e-06, "loss": 0.7811, "step": 7654 }, { "epoch": 1.9392020265991134, "grad_norm": 3.4573304653167725, "learning_rate": 7.190316031831763e-06, "loss": 0.8712, "step": 7655 }, { "epoch": 1.9394553514882837, "grad_norm": 3.2838821411132812, "learning_rate": 7.189562823034977e-06, "loss": 0.741, "step": 7656 }, { "epoch": 1.939708676377454, "grad_norm": 3.823986291885376, "learning_rate": 7.188809552756294e-06, "loss": 0.8245, "step": 7657 }, { "epoch": 1.9399620012666245, "grad_norm": 3.4117276668548584, "learning_rate": 7.188056221016864e-06, "loss": 0.7209, "step": 7658 }, { "epoch": 1.9402153261557948, "grad_norm": 3.7551310062408447, "learning_rate": 7.187302827837841e-06, "loss": 0.7397, "step": 7659 }, { "epoch": 1.9404686510449651, "grad_norm": 4.005005359649658, "learning_rate": 7.186549373240379e-06, "loss": 0.7896, "step": 7660 }, { "epoch": 1.9407219759341356, "grad_norm": 3.9484128952026367, "learning_rate": 7.185795857245634e-06, "loss": 0.8911, "step": 7661 }, { "epoch": 1.940975300823306, "grad_norm": 4.208717346191406, "learning_rate": 7.18504227987477e-06, "loss": 0.8898, "step": 7662 }, { "epoch": 1.9412286257124762, "grad_norm": 3.811157703399658, "learning_rate": 7.1842886411489395e-06, "loss": 0.9327, "step": 7663 }, { "epoch": 1.9414819506016467, "grad_norm": 3.709144115447998, "learning_rate": 7.183534941089308e-06, "loss": 0.8782, "step": 7664 }, { "epoch": 1.9417352754908168, "grad_norm": 3.7643768787384033, "learning_rate": 7.182781179717038e-06, "loss": 0.802, "step": 7665 }, { "epoch": 1.9419886003799873, "grad_norm": 4.248831748962402, "learning_rate": 7.182027357053297e-06, "loss": 0.7969, "step": 7666 }, { "epoch": 1.9422419252691578, "grad_norm": 3.9656169414520264, "learning_rate": 7.181273473119251e-06, "loss": 0.893, "step": 7667 }, { "epoch": 1.942495250158328, "grad_norm": 3.2689943313598633, "learning_rate": 7.1805195279360674e-06, "loss": 0.6699, "step": 7668 }, { "epoch": 1.9427485750474984, "grad_norm": 3.957042932510376, "learning_rate": 7.179765521524917e-06, "loss": 0.7986, "step": 7669 }, { "epoch": 1.943001899936669, "grad_norm": 4.168601989746094, "learning_rate": 7.1790114539069725e-06, "loss": 0.7537, "step": 7670 }, { "epoch": 1.943255224825839, "grad_norm": 3.1504342555999756, "learning_rate": 7.178257325103408e-06, "loss": 0.6949, "step": 7671 }, { "epoch": 1.9435085497150095, "grad_norm": 3.6926889419555664, "learning_rate": 7.177503135135399e-06, "loss": 0.8342, "step": 7672 }, { "epoch": 1.9437618746041798, "grad_norm": 3.5451834201812744, "learning_rate": 7.176748884024123e-06, "loss": 0.7534, "step": 7673 }, { "epoch": 1.94401519949335, "grad_norm": 3.647296190261841, "learning_rate": 7.175994571790756e-06, "loss": 0.8815, "step": 7674 }, { "epoch": 1.9442685243825206, "grad_norm": 3.3122143745422363, "learning_rate": 7.175240198456484e-06, "loss": 0.7635, "step": 7675 }, { "epoch": 1.944521849271691, "grad_norm": 3.637890338897705, "learning_rate": 7.174485764042485e-06, "loss": 0.9206, "step": 7676 }, { "epoch": 1.9447751741608612, "grad_norm": 3.588634729385376, "learning_rate": 7.1737312685699456e-06, "loss": 0.8351, "step": 7677 }, { "epoch": 1.9450284990500317, "grad_norm": 3.2763001918792725, "learning_rate": 7.17297671206005e-06, "loss": 0.7212, "step": 7678 }, { "epoch": 1.945281823939202, "grad_norm": 3.9934630393981934, "learning_rate": 7.172222094533987e-06, "loss": 0.8481, "step": 7679 }, { "epoch": 1.9455351488283723, "grad_norm": 3.5940845012664795, "learning_rate": 7.171467416012947e-06, "loss": 0.7426, "step": 7680 }, { "epoch": 1.9457884737175428, "grad_norm": 3.457130193710327, "learning_rate": 7.17071267651812e-06, "loss": 0.7158, "step": 7681 }, { "epoch": 1.9460417986067131, "grad_norm": 4.111364364624023, "learning_rate": 7.169957876070697e-06, "loss": 0.7249, "step": 7682 }, { "epoch": 1.9462951234958834, "grad_norm": 3.623413324356079, "learning_rate": 7.169203014691874e-06, "loss": 0.8585, "step": 7683 }, { "epoch": 1.946548448385054, "grad_norm": 3.4657838344573975, "learning_rate": 7.1684480924028466e-06, "loss": 0.7721, "step": 7684 }, { "epoch": 1.9468017732742242, "grad_norm": 3.9353878498077393, "learning_rate": 7.167693109224814e-06, "loss": 0.7129, "step": 7685 }, { "epoch": 1.9470550981633945, "grad_norm": 3.6031713485717773, "learning_rate": 7.166938065178976e-06, "loss": 0.7578, "step": 7686 }, { "epoch": 1.947308423052565, "grad_norm": 3.3692307472229004, "learning_rate": 7.166182960286531e-06, "loss": 0.8478, "step": 7687 }, { "epoch": 1.9475617479417353, "grad_norm": 3.4741501808166504, "learning_rate": 7.165427794568683e-06, "loss": 0.7355, "step": 7688 }, { "epoch": 1.9478150728309056, "grad_norm": 3.8740181922912598, "learning_rate": 7.1646725680466374e-06, "loss": 0.9451, "step": 7689 }, { "epoch": 1.9480683977200761, "grad_norm": 3.816019058227539, "learning_rate": 7.163917280741601e-06, "loss": 0.8195, "step": 7690 }, { "epoch": 1.9483217226092464, "grad_norm": 3.896796703338623, "learning_rate": 7.1631619326747836e-06, "loss": 0.8767, "step": 7691 }, { "epoch": 1.9485750474984167, "grad_norm": 3.359934091567993, "learning_rate": 7.162406523867391e-06, "loss": 0.5411, "step": 7692 }, { "epoch": 1.9488283723875872, "grad_norm": 3.7625036239624023, "learning_rate": 7.161651054340637e-06, "loss": 0.8589, "step": 7693 }, { "epoch": 1.9490816972767573, "grad_norm": 3.353876829147339, "learning_rate": 7.160895524115734e-06, "loss": 0.7515, "step": 7694 }, { "epoch": 1.9493350221659278, "grad_norm": 3.5972187519073486, "learning_rate": 7.160139933213899e-06, "loss": 0.8326, "step": 7695 }, { "epoch": 1.9495883470550983, "grad_norm": 4.180943489074707, "learning_rate": 7.159384281656346e-06, "loss": 0.8135, "step": 7696 }, { "epoch": 1.9498416719442684, "grad_norm": 3.5679547786712646, "learning_rate": 7.158628569464295e-06, "loss": 0.6982, "step": 7697 }, { "epoch": 1.950094996833439, "grad_norm": 3.4704604148864746, "learning_rate": 7.157872796658964e-06, "loss": 0.7795, "step": 7698 }, { "epoch": 1.9503483217226094, "grad_norm": 4.002320766448975, "learning_rate": 7.157116963261579e-06, "loss": 0.8208, "step": 7699 }, { "epoch": 1.9506016466117795, "grad_norm": 3.9060094356536865, "learning_rate": 7.156361069293358e-06, "loss": 0.9398, "step": 7700 }, { "epoch": 1.95085497150095, "grad_norm": 4.0416951179504395, "learning_rate": 7.15560511477553e-06, "loss": 0.7783, "step": 7701 }, { "epoch": 1.9511082963901203, "grad_norm": 3.742638111114502, "learning_rate": 7.15484909972932e-06, "loss": 0.8322, "step": 7702 }, { "epoch": 1.9513616212792906, "grad_norm": 3.8053700923919678, "learning_rate": 7.1540930241759575e-06, "loss": 0.8473, "step": 7703 }, { "epoch": 1.951614946168461, "grad_norm": 3.480557918548584, "learning_rate": 7.153336888136673e-06, "loss": 0.8242, "step": 7704 }, { "epoch": 1.9518682710576314, "grad_norm": 3.211710214614868, "learning_rate": 7.152580691632697e-06, "loss": 0.7533, "step": 7705 }, { "epoch": 1.9521215959468017, "grad_norm": 3.7976908683776855, "learning_rate": 7.151824434685265e-06, "loss": 0.8369, "step": 7706 }, { "epoch": 1.9523749208359722, "grad_norm": 3.8064568042755127, "learning_rate": 7.1510681173156116e-06, "loss": 0.7898, "step": 7707 }, { "epoch": 1.9526282457251425, "grad_norm": 3.578582525253296, "learning_rate": 7.150311739544973e-06, "loss": 0.7215, "step": 7708 }, { "epoch": 1.9528815706143128, "grad_norm": 3.4547502994537354, "learning_rate": 7.149555301394588e-06, "loss": 0.7414, "step": 7709 }, { "epoch": 1.9531348955034833, "grad_norm": 3.6342086791992188, "learning_rate": 7.148798802885698e-06, "loss": 0.7902, "step": 7710 }, { "epoch": 1.9533882203926536, "grad_norm": 3.623147964477539, "learning_rate": 7.148042244039546e-06, "loss": 0.6889, "step": 7711 }, { "epoch": 1.9536415452818239, "grad_norm": 3.8836662769317627, "learning_rate": 7.1472856248773725e-06, "loss": 0.8143, "step": 7712 }, { "epoch": 1.9538948701709944, "grad_norm": 3.334202766418457, "learning_rate": 7.146528945420426e-06, "loss": 0.7151, "step": 7713 }, { "epoch": 1.9541481950601647, "grad_norm": 3.4288785457611084, "learning_rate": 7.1457722056899525e-06, "loss": 0.7372, "step": 7714 }, { "epoch": 1.954401519949335, "grad_norm": 3.8028464317321777, "learning_rate": 7.1450154057072e-06, "loss": 0.8303, "step": 7715 }, { "epoch": 1.9546548448385055, "grad_norm": 3.7775213718414307, "learning_rate": 7.144258545493422e-06, "loss": 0.8075, "step": 7716 }, { "epoch": 1.9549081697276758, "grad_norm": 3.732243776321411, "learning_rate": 7.143501625069869e-06, "loss": 0.8185, "step": 7717 }, { "epoch": 1.955161494616846, "grad_norm": 3.6021058559417725, "learning_rate": 7.142744644457794e-06, "loss": 0.7003, "step": 7718 }, { "epoch": 1.9554148195060166, "grad_norm": 4.029145240783691, "learning_rate": 7.141987603678454e-06, "loss": 0.7883, "step": 7719 }, { "epoch": 1.9556681443951869, "grad_norm": 3.088573455810547, "learning_rate": 7.141230502753106e-06, "loss": 0.6743, "step": 7720 }, { "epoch": 1.9559214692843572, "grad_norm": 3.913045644760132, "learning_rate": 7.1404733417030105e-06, "loss": 0.8128, "step": 7721 }, { "epoch": 1.9561747941735277, "grad_norm": 3.5408408641815186, "learning_rate": 7.139716120549425e-06, "loss": 0.8064, "step": 7722 }, { "epoch": 1.9564281190626978, "grad_norm": 3.7428712844848633, "learning_rate": 7.138958839313613e-06, "loss": 0.6369, "step": 7723 }, { "epoch": 1.9566814439518683, "grad_norm": 3.7903828620910645, "learning_rate": 7.138201498016841e-06, "loss": 0.7891, "step": 7724 }, { "epoch": 1.9569347688410388, "grad_norm": 3.8979010581970215, "learning_rate": 7.137444096680373e-06, "loss": 0.9399, "step": 7725 }, { "epoch": 1.9571880937302089, "grad_norm": 3.3378214836120605, "learning_rate": 7.136686635325475e-06, "loss": 0.7309, "step": 7726 }, { "epoch": 1.9574414186193794, "grad_norm": 4.116669654846191, "learning_rate": 7.1359291139734185e-06, "loss": 0.8399, "step": 7727 }, { "epoch": 1.9576947435085497, "grad_norm": 3.8506109714508057, "learning_rate": 7.1351715326454725e-06, "loss": 0.7953, "step": 7728 }, { "epoch": 1.95794806839772, "grad_norm": 3.805879592895508, "learning_rate": 7.1344138913629125e-06, "loss": 0.8638, "step": 7729 }, { "epoch": 1.9582013932868905, "grad_norm": 3.966623544692993, "learning_rate": 7.13365619014701e-06, "loss": 0.822, "step": 7730 }, { "epoch": 1.9584547181760608, "grad_norm": 3.9578821659088135, "learning_rate": 7.132898429019043e-06, "loss": 0.7866, "step": 7731 }, { "epoch": 1.958708043065231, "grad_norm": 3.6943609714508057, "learning_rate": 7.132140608000286e-06, "loss": 0.8091, "step": 7732 }, { "epoch": 1.9589613679544016, "grad_norm": 3.6804726123809814, "learning_rate": 7.131382727112021e-06, "loss": 0.7785, "step": 7733 }, { "epoch": 1.9592146928435719, "grad_norm": 3.286087989807129, "learning_rate": 7.130624786375526e-06, "loss": 0.6549, "step": 7734 }, { "epoch": 1.9594680177327422, "grad_norm": 3.5736477375030518, "learning_rate": 7.129866785812087e-06, "loss": 0.6937, "step": 7735 }, { "epoch": 1.9597213426219127, "grad_norm": 3.499422073364258, "learning_rate": 7.129108725442988e-06, "loss": 0.8102, "step": 7736 }, { "epoch": 1.959974667511083, "grad_norm": 3.411736249923706, "learning_rate": 7.128350605289512e-06, "loss": 0.8138, "step": 7737 }, { "epoch": 1.9602279924002532, "grad_norm": 3.406217336654663, "learning_rate": 7.12759242537295e-06, "loss": 0.7493, "step": 7738 }, { "epoch": 1.9604813172894238, "grad_norm": 3.62760591506958, "learning_rate": 7.126834185714588e-06, "loss": 0.8138, "step": 7739 }, { "epoch": 1.960734642178594, "grad_norm": 4.046286582946777, "learning_rate": 7.12607588633572e-06, "loss": 0.7405, "step": 7740 }, { "epoch": 1.9609879670677643, "grad_norm": 3.9970147609710693, "learning_rate": 7.125317527257638e-06, "loss": 0.8729, "step": 7741 }, { "epoch": 1.9612412919569349, "grad_norm": 3.835890769958496, "learning_rate": 7.124559108501636e-06, "loss": 0.7778, "step": 7742 }, { "epoch": 1.9614946168461052, "grad_norm": 3.5552656650543213, "learning_rate": 7.123800630089008e-06, "loss": 0.6406, "step": 7743 }, { "epoch": 1.9617479417352754, "grad_norm": 4.11686372756958, "learning_rate": 7.123042092041056e-06, "loss": 0.6692, "step": 7744 }, { "epoch": 1.962001266624446, "grad_norm": 3.6176023483276367, "learning_rate": 7.122283494379076e-06, "loss": 0.7699, "step": 7745 }, { "epoch": 1.9622545915136163, "grad_norm": 3.621389627456665, "learning_rate": 7.12152483712437e-06, "loss": 0.8314, "step": 7746 }, { "epoch": 1.9625079164027865, "grad_norm": 3.503429412841797, "learning_rate": 7.1207661202982416e-06, "loss": 0.7435, "step": 7747 }, { "epoch": 1.962761241291957, "grad_norm": 3.614459276199341, "learning_rate": 7.120007343921994e-06, "loss": 0.8376, "step": 7748 }, { "epoch": 1.9630145661811271, "grad_norm": 3.402324914932251, "learning_rate": 7.119248508016934e-06, "loss": 0.7428, "step": 7749 }, { "epoch": 1.9632678910702976, "grad_norm": 4.012660980224609, "learning_rate": 7.118489612604369e-06, "loss": 0.817, "step": 7750 }, { "epoch": 1.9635212159594682, "grad_norm": 4.236724376678467, "learning_rate": 7.117730657705608e-06, "loss": 0.8299, "step": 7751 }, { "epoch": 1.9637745408486382, "grad_norm": 3.6919877529144287, "learning_rate": 7.116971643341964e-06, "loss": 0.8417, "step": 7752 }, { "epoch": 1.9640278657378087, "grad_norm": 3.5648860931396484, "learning_rate": 7.116212569534747e-06, "loss": 0.737, "step": 7753 }, { "epoch": 1.9642811906269793, "grad_norm": 3.3912346363067627, "learning_rate": 7.115453436305273e-06, "loss": 0.7417, "step": 7754 }, { "epoch": 1.9645345155161493, "grad_norm": 3.2913858890533447, "learning_rate": 7.11469424367486e-06, "loss": 0.6647, "step": 7755 }, { "epoch": 1.9647878404053198, "grad_norm": 3.5995373725891113, "learning_rate": 7.113934991664821e-06, "loss": 0.7586, "step": 7756 }, { "epoch": 1.9650411652944901, "grad_norm": 3.7692344188690186, "learning_rate": 7.11317568029648e-06, "loss": 0.7317, "step": 7757 }, { "epoch": 1.9652944901836604, "grad_norm": 3.9188623428344727, "learning_rate": 7.112416309591156e-06, "loss": 0.7836, "step": 7758 }, { "epoch": 1.965547815072831, "grad_norm": 3.634542942047119, "learning_rate": 7.111656879570173e-06, "loss": 0.761, "step": 7759 }, { "epoch": 1.9658011399620012, "grad_norm": 3.5849297046661377, "learning_rate": 7.110897390254853e-06, "loss": 0.8241, "step": 7760 }, { "epoch": 1.9660544648511715, "grad_norm": 3.7755422592163086, "learning_rate": 7.110137841666524e-06, "loss": 0.8205, "step": 7761 }, { "epoch": 1.966307789740342, "grad_norm": 3.55366587638855, "learning_rate": 7.109378233826513e-06, "loss": 0.7559, "step": 7762 }, { "epoch": 1.9665611146295123, "grad_norm": 4.0000834465026855, "learning_rate": 7.10861856675615e-06, "loss": 0.8388, "step": 7763 }, { "epoch": 1.9668144395186826, "grad_norm": 3.51295804977417, "learning_rate": 7.107858840476766e-06, "loss": 0.7637, "step": 7764 }, { "epoch": 1.9670677644078531, "grad_norm": 3.6552960872650146, "learning_rate": 7.107099055009694e-06, "loss": 0.8367, "step": 7765 }, { "epoch": 1.9673210892970234, "grad_norm": 3.5162038803100586, "learning_rate": 7.106339210376267e-06, "loss": 0.7118, "step": 7766 }, { "epoch": 1.9675744141861937, "grad_norm": 4.007128715515137, "learning_rate": 7.105579306597823e-06, "loss": 0.7799, "step": 7767 }, { "epoch": 1.9678277390753642, "grad_norm": 3.583599805831909, "learning_rate": 7.104819343695699e-06, "loss": 0.8737, "step": 7768 }, { "epoch": 1.9680810639645345, "grad_norm": 3.4805471897125244, "learning_rate": 7.1040593216912335e-06, "loss": 0.7568, "step": 7769 }, { "epoch": 1.9683343888537048, "grad_norm": 3.582451581954956, "learning_rate": 7.10329924060577e-06, "loss": 0.7771, "step": 7770 }, { "epoch": 1.9685877137428753, "grad_norm": 3.251368999481201, "learning_rate": 7.102539100460648e-06, "loss": 0.8377, "step": 7771 }, { "epoch": 1.9688410386320456, "grad_norm": 3.5506715774536133, "learning_rate": 7.101778901277214e-06, "loss": 0.76, "step": 7772 }, { "epoch": 1.969094363521216, "grad_norm": 3.9643442630767822, "learning_rate": 7.1010186430768134e-06, "loss": 0.8701, "step": 7773 }, { "epoch": 1.9693476884103864, "grad_norm": 3.7198781967163086, "learning_rate": 7.100258325880796e-06, "loss": 0.8223, "step": 7774 }, { "epoch": 1.9696010132995567, "grad_norm": 3.424121141433716, "learning_rate": 7.099497949710507e-06, "loss": 0.7835, "step": 7775 }, { "epoch": 1.969854338188727, "grad_norm": 3.452148914337158, "learning_rate": 7.0987375145872994e-06, "loss": 0.826, "step": 7776 }, { "epoch": 1.9701076630778975, "grad_norm": 3.364057779312134, "learning_rate": 7.097977020532526e-06, "loss": 0.6752, "step": 7777 }, { "epoch": 1.9703609879670676, "grad_norm": 3.64872145652771, "learning_rate": 7.097216467567542e-06, "loss": 0.7157, "step": 7778 }, { "epoch": 1.9706143128562381, "grad_norm": 3.607300281524658, "learning_rate": 7.096455855713702e-06, "loss": 0.6867, "step": 7779 }, { "epoch": 1.9708676377454086, "grad_norm": 3.4279701709747314, "learning_rate": 7.095695184992365e-06, "loss": 0.69, "step": 7780 }, { "epoch": 1.9711209626345787, "grad_norm": 3.260676383972168, "learning_rate": 7.094934455424889e-06, "loss": 0.7615, "step": 7781 }, { "epoch": 1.9713742875237492, "grad_norm": 3.7134289741516113, "learning_rate": 7.0941736670326346e-06, "loss": 0.7148, "step": 7782 }, { "epoch": 1.9716276124129197, "grad_norm": 3.8525795936584473, "learning_rate": 7.093412819836966e-06, "loss": 0.8088, "step": 7783 }, { "epoch": 1.9718809373020898, "grad_norm": 3.6673407554626465, "learning_rate": 7.092651913859246e-06, "loss": 0.7594, "step": 7784 }, { "epoch": 1.9721342621912603, "grad_norm": 3.580387592315674, "learning_rate": 7.091890949120841e-06, "loss": 0.6788, "step": 7785 }, { "epoch": 1.9723875870804306, "grad_norm": 4.053237438201904, "learning_rate": 7.091129925643119e-06, "loss": 0.8555, "step": 7786 }, { "epoch": 1.972640911969601, "grad_norm": 3.7616658210754395, "learning_rate": 7.090368843447448e-06, "loss": 0.7988, "step": 7787 }, { "epoch": 1.9728942368587714, "grad_norm": 3.4558634757995605, "learning_rate": 7.089607702555201e-06, "loss": 0.7842, "step": 7788 }, { "epoch": 1.9731475617479417, "grad_norm": 3.9048306941986084, "learning_rate": 7.088846502987747e-06, "loss": 0.7438, "step": 7789 }, { "epoch": 1.973400886637112, "grad_norm": 3.3912265300750732, "learning_rate": 7.088085244766464e-06, "loss": 0.751, "step": 7790 }, { "epoch": 1.9736542115262825, "grad_norm": 3.807003974914551, "learning_rate": 7.0873239279127246e-06, "loss": 0.7561, "step": 7791 }, { "epoch": 1.9739075364154528, "grad_norm": 3.7926597595214844, "learning_rate": 7.086562552447909e-06, "loss": 0.8205, "step": 7792 }, { "epoch": 1.974160861304623, "grad_norm": 3.7442386150360107, "learning_rate": 7.085801118393394e-06, "loss": 0.8445, "step": 7793 }, { "epoch": 1.9744141861937936, "grad_norm": 3.722290515899658, "learning_rate": 7.085039625770562e-06, "loss": 0.8169, "step": 7794 }, { "epoch": 1.974667511082964, "grad_norm": 3.7071523666381836, "learning_rate": 7.084278074600794e-06, "loss": 0.7872, "step": 7795 }, { "epoch": 1.9749208359721342, "grad_norm": 3.783977746963501, "learning_rate": 7.0835164649054744e-06, "loss": 0.8322, "step": 7796 }, { "epoch": 1.9751741608613047, "grad_norm": 3.0392532348632812, "learning_rate": 7.0827547967059885e-06, "loss": 0.7639, "step": 7797 }, { "epoch": 1.975427485750475, "grad_norm": 3.8044259548187256, "learning_rate": 7.081993070023725e-06, "loss": 0.8509, "step": 7798 }, { "epoch": 1.9756808106396453, "grad_norm": 3.57563853263855, "learning_rate": 7.081231284880071e-06, "loss": 0.7586, "step": 7799 }, { "epoch": 1.9759341355288158, "grad_norm": 3.971006393432617, "learning_rate": 7.080469441296418e-06, "loss": 0.8335, "step": 7800 }, { "epoch": 1.976187460417986, "grad_norm": 3.3764495849609375, "learning_rate": 7.079707539294158e-06, "loss": 0.7588, "step": 7801 }, { "epoch": 1.9764407853071564, "grad_norm": 3.7590389251708984, "learning_rate": 7.0789455788946845e-06, "loss": 0.8711, "step": 7802 }, { "epoch": 1.976694110196327, "grad_norm": 4.3643574714660645, "learning_rate": 7.078183560119395e-06, "loss": 0.8518, "step": 7803 }, { "epoch": 1.9769474350854972, "grad_norm": 3.7080447673797607, "learning_rate": 7.077421482989686e-06, "loss": 0.8306, "step": 7804 }, { "epoch": 1.9772007599746675, "grad_norm": 3.6435539722442627, "learning_rate": 7.076659347526954e-06, "loss": 0.7207, "step": 7805 }, { "epoch": 1.977454084863838, "grad_norm": 3.583136796951294, "learning_rate": 7.0758971537526e-06, "loss": 0.8254, "step": 7806 }, { "epoch": 1.977707409753008, "grad_norm": 4.014275550842285, "learning_rate": 7.075134901688028e-06, "loss": 0.9418, "step": 7807 }, { "epoch": 1.9779607346421786, "grad_norm": 3.8396565914154053, "learning_rate": 7.07437259135464e-06, "loss": 0.7992, "step": 7808 }, { "epoch": 1.978214059531349, "grad_norm": 3.843681573867798, "learning_rate": 7.073610222773844e-06, "loss": 0.7475, "step": 7809 }, { "epoch": 1.9784673844205192, "grad_norm": 3.5276191234588623, "learning_rate": 7.0728477959670415e-06, "loss": 0.7265, "step": 7810 }, { "epoch": 1.9787207093096897, "grad_norm": 3.8873448371887207, "learning_rate": 7.072085310955645e-06, "loss": 0.8069, "step": 7811 }, { "epoch": 1.9789740341988602, "grad_norm": 3.556809663772583, "learning_rate": 7.0713227677610655e-06, "loss": 0.7934, "step": 7812 }, { "epoch": 1.9792273590880303, "grad_norm": 3.4937283992767334, "learning_rate": 7.070560166404713e-06, "loss": 0.8782, "step": 7813 }, { "epoch": 1.9794806839772008, "grad_norm": 3.6915431022644043, "learning_rate": 7.069797506908e-06, "loss": 0.8141, "step": 7814 }, { "epoch": 1.979734008866371, "grad_norm": 4.639492511749268, "learning_rate": 7.069034789292345e-06, "loss": 0.8645, "step": 7815 }, { "epoch": 1.9799873337555414, "grad_norm": 3.667537212371826, "learning_rate": 7.068272013579163e-06, "loss": 0.6504, "step": 7816 }, { "epoch": 1.9802406586447119, "grad_norm": 4.093923091888428, "learning_rate": 7.067509179789871e-06, "loss": 0.8769, "step": 7817 }, { "epoch": 1.9804939835338822, "grad_norm": 3.478240489959717, "learning_rate": 7.06674628794589e-06, "loss": 0.7769, "step": 7818 }, { "epoch": 1.9807473084230525, "grad_norm": 3.2936277389526367, "learning_rate": 7.065983338068643e-06, "loss": 0.7441, "step": 7819 }, { "epoch": 1.981000633312223, "grad_norm": 3.8519601821899414, "learning_rate": 7.065220330179552e-06, "loss": 0.802, "step": 7820 }, { "epoch": 1.9812539582013933, "grad_norm": 3.640568494796753, "learning_rate": 7.064457264300041e-06, "loss": 0.7295, "step": 7821 }, { "epoch": 1.9815072830905636, "grad_norm": 3.5209779739379883, "learning_rate": 7.063694140451538e-06, "loss": 0.8936, "step": 7822 }, { "epoch": 1.981760607979734, "grad_norm": 3.9504730701446533, "learning_rate": 7.062930958655472e-06, "loss": 0.8196, "step": 7823 }, { "epoch": 1.9820139328689044, "grad_norm": 3.680300712585449, "learning_rate": 7.06216771893327e-06, "loss": 0.7316, "step": 7824 }, { "epoch": 1.9822672577580747, "grad_norm": 3.858332872390747, "learning_rate": 7.061404421306365e-06, "loss": 0.8491, "step": 7825 }, { "epoch": 1.9825205826472452, "grad_norm": 3.9465181827545166, "learning_rate": 7.06064106579619e-06, "loss": 0.7303, "step": 7826 }, { "epoch": 1.9827739075364155, "grad_norm": 3.9884231090545654, "learning_rate": 7.059877652424181e-06, "loss": 0.8044, "step": 7827 }, { "epoch": 1.9830272324255858, "grad_norm": 3.5908045768737793, "learning_rate": 7.059114181211771e-06, "loss": 0.7816, "step": 7828 }, { "epoch": 1.9832805573147563, "grad_norm": 3.938767910003662, "learning_rate": 7.058350652180401e-06, "loss": 0.7904, "step": 7829 }, { "epoch": 1.9835338822039266, "grad_norm": 3.7275545597076416, "learning_rate": 7.05758706535151e-06, "loss": 0.7388, "step": 7830 }, { "epoch": 1.9837872070930969, "grad_norm": 3.831284999847412, "learning_rate": 7.056823420746538e-06, "loss": 0.7877, "step": 7831 }, { "epoch": 1.9840405319822674, "grad_norm": 3.974714994430542, "learning_rate": 7.056059718386927e-06, "loss": 0.8221, "step": 7832 }, { "epoch": 1.9842938568714377, "grad_norm": 3.6349573135375977, "learning_rate": 7.055295958294124e-06, "loss": 0.7492, "step": 7833 }, { "epoch": 1.984547181760608, "grad_norm": 3.9935271739959717, "learning_rate": 7.054532140489575e-06, "loss": 0.7947, "step": 7834 }, { "epoch": 1.9848005066497785, "grad_norm": 3.484941005706787, "learning_rate": 7.053768264994725e-06, "loss": 0.6921, "step": 7835 }, { "epoch": 1.9850538315389485, "grad_norm": 3.6672487258911133, "learning_rate": 7.053004331831025e-06, "loss": 0.7615, "step": 7836 }, { "epoch": 1.985307156428119, "grad_norm": 3.7978787422180176, "learning_rate": 7.0522403410199245e-06, "loss": 0.762, "step": 7837 }, { "epoch": 1.9855604813172896, "grad_norm": 3.566520929336548, "learning_rate": 7.051476292582878e-06, "loss": 0.7821, "step": 7838 }, { "epoch": 1.9858138062064596, "grad_norm": 3.4677298069000244, "learning_rate": 7.050712186541339e-06, "loss": 0.7713, "step": 7839 }, { "epoch": 1.9860671310956302, "grad_norm": 3.8835580348968506, "learning_rate": 7.0499480229167615e-06, "loss": 0.8336, "step": 7840 }, { "epoch": 1.9863204559848007, "grad_norm": 3.6812779903411865, "learning_rate": 7.049183801730606e-06, "loss": 0.7183, "step": 7841 }, { "epoch": 1.9865737808739707, "grad_norm": 3.9878032207489014, "learning_rate": 7.048419523004328e-06, "loss": 0.8592, "step": 7842 }, { "epoch": 1.9868271057631413, "grad_norm": 4.390382289886475, "learning_rate": 7.0476551867593915e-06, "loss": 0.852, "step": 7843 }, { "epoch": 1.9870804306523115, "grad_norm": 3.578312397003174, "learning_rate": 7.046890793017257e-06, "loss": 0.8225, "step": 7844 }, { "epoch": 1.9873337555414818, "grad_norm": 3.5974812507629395, "learning_rate": 7.046126341799387e-06, "loss": 0.8266, "step": 7845 }, { "epoch": 1.9875870804306524, "grad_norm": 3.3824779987335205, "learning_rate": 7.045361833127249e-06, "loss": 0.6321, "step": 7846 }, { "epoch": 1.9878404053198226, "grad_norm": 3.554611921310425, "learning_rate": 7.04459726702231e-06, "loss": 0.8004, "step": 7847 }, { "epoch": 1.988093730208993, "grad_norm": 3.989422559738159, "learning_rate": 7.043832643506036e-06, "loss": 0.8787, "step": 7848 }, { "epoch": 1.9883470550981635, "grad_norm": 3.763200283050537, "learning_rate": 7.0430679625999035e-06, "loss": 0.8115, "step": 7849 }, { "epoch": 1.9886003799873337, "grad_norm": 3.778221607208252, "learning_rate": 7.042303224325375e-06, "loss": 0.8372, "step": 7850 }, { "epoch": 1.988853704876504, "grad_norm": 3.8948304653167725, "learning_rate": 7.041538428703931e-06, "loss": 0.756, "step": 7851 }, { "epoch": 1.9891070297656746, "grad_norm": 3.5517871379852295, "learning_rate": 7.040773575757045e-06, "loss": 0.7555, "step": 7852 }, { "epoch": 1.9893603546548448, "grad_norm": 3.535416841506958, "learning_rate": 7.040008665506195e-06, "loss": 0.8107, "step": 7853 }, { "epoch": 1.9896136795440151, "grad_norm": 4.027966499328613, "learning_rate": 7.039243697972856e-06, "loss": 0.8943, "step": 7854 }, { "epoch": 1.9898670044331856, "grad_norm": 3.422999382019043, "learning_rate": 7.03847867317851e-06, "loss": 0.7349, "step": 7855 }, { "epoch": 1.990120329322356, "grad_norm": 3.5104379653930664, "learning_rate": 7.03771359114464e-06, "loss": 0.7075, "step": 7856 }, { "epoch": 1.9903736542115262, "grad_norm": 4.1053667068481445, "learning_rate": 7.0369484518927245e-06, "loss": 0.8716, "step": 7857 }, { "epoch": 1.9906269791006967, "grad_norm": 3.7057645320892334, "learning_rate": 7.036183255444253e-06, "loss": 0.7537, "step": 7858 }, { "epoch": 1.990880303989867, "grad_norm": 3.6755993366241455, "learning_rate": 7.03541800182071e-06, "loss": 0.7499, "step": 7859 }, { "epoch": 1.9911336288790373, "grad_norm": 3.893325090408325, "learning_rate": 7.034652691043582e-06, "loss": 0.7036, "step": 7860 }, { "epoch": 1.9913869537682078, "grad_norm": 3.5237503051757812, "learning_rate": 7.033887323134361e-06, "loss": 0.756, "step": 7861 }, { "epoch": 1.9916402786573781, "grad_norm": 3.12933349609375, "learning_rate": 7.033121898114537e-06, "loss": 0.6639, "step": 7862 }, { "epoch": 1.9918936035465484, "grad_norm": 3.5588948726654053, "learning_rate": 7.032356416005603e-06, "loss": 0.7761, "step": 7863 }, { "epoch": 1.992146928435719, "grad_norm": 4.064525127410889, "learning_rate": 7.031590876829053e-06, "loss": 0.8608, "step": 7864 }, { "epoch": 1.992400253324889, "grad_norm": 3.3582210540771484, "learning_rate": 7.030825280606384e-06, "loss": 0.738, "step": 7865 }, { "epoch": 1.9926535782140595, "grad_norm": 3.7255241870880127, "learning_rate": 7.030059627359093e-06, "loss": 0.945, "step": 7866 }, { "epoch": 1.99290690310323, "grad_norm": 3.6472373008728027, "learning_rate": 7.029293917108678e-06, "loss": 0.7403, "step": 7867 }, { "epoch": 1.9931602279924001, "grad_norm": 3.845834732055664, "learning_rate": 7.028528149876644e-06, "loss": 0.8059, "step": 7868 }, { "epoch": 1.9934135528815706, "grad_norm": 3.227482318878174, "learning_rate": 7.027762325684488e-06, "loss": 0.7175, "step": 7869 }, { "epoch": 1.9936668777707411, "grad_norm": 4.206416130065918, "learning_rate": 7.026996444553716e-06, "loss": 0.7829, "step": 7870 }, { "epoch": 1.9939202026599112, "grad_norm": 3.581221580505371, "learning_rate": 7.026230506505834e-06, "loss": 0.823, "step": 7871 }, { "epoch": 1.9941735275490817, "grad_norm": 3.8868722915649414, "learning_rate": 7.025464511562347e-06, "loss": 0.7844, "step": 7872 }, { "epoch": 1.994426852438252, "grad_norm": 4.018174171447754, "learning_rate": 7.02469845974477e-06, "loss": 0.8255, "step": 7873 }, { "epoch": 1.9946801773274223, "grad_norm": 4.231254577636719, "learning_rate": 7.0239323510746074e-06, "loss": 0.8246, "step": 7874 }, { "epoch": 1.9949335022165928, "grad_norm": 4.085860252380371, "learning_rate": 7.023166185573371e-06, "loss": 0.7985, "step": 7875 }, { "epoch": 1.9951868271057631, "grad_norm": 3.792830228805542, "learning_rate": 7.022399963262578e-06, "loss": 0.8093, "step": 7876 }, { "epoch": 1.9954401519949334, "grad_norm": 3.3098559379577637, "learning_rate": 7.021633684163742e-06, "loss": 0.7373, "step": 7877 }, { "epoch": 1.995693476884104, "grad_norm": 3.6087119579315186, "learning_rate": 7.020867348298381e-06, "loss": 0.8426, "step": 7878 }, { "epoch": 1.9959468017732742, "grad_norm": 3.725323438644409, "learning_rate": 7.020100955688009e-06, "loss": 0.811, "step": 7879 }, { "epoch": 1.9962001266624445, "grad_norm": 3.5992720127105713, "learning_rate": 7.019334506354151e-06, "loss": 0.7686, "step": 7880 }, { "epoch": 1.996453451551615, "grad_norm": 4.135205268859863, "learning_rate": 7.018568000318327e-06, "loss": 0.7655, "step": 7881 }, { "epoch": 1.9967067764407853, "grad_norm": 3.6513140201568604, "learning_rate": 7.0178014376020575e-06, "loss": 0.7065, "step": 7882 }, { "epoch": 1.9969601013299556, "grad_norm": 3.5085322856903076, "learning_rate": 7.017034818226871e-06, "loss": 0.7248, "step": 7883 }, { "epoch": 1.9972134262191261, "grad_norm": 3.4335103034973145, "learning_rate": 7.016268142214291e-06, "loss": 0.7372, "step": 7884 }, { "epoch": 1.9974667511082964, "grad_norm": 3.483394145965576, "learning_rate": 7.015501409585847e-06, "loss": 0.6897, "step": 7885 }, { "epoch": 1.9977200759974667, "grad_norm": 3.5033979415893555, "learning_rate": 7.0147346203630686e-06, "loss": 0.7084, "step": 7886 }, { "epoch": 1.9979734008866372, "grad_norm": 4.055886268615723, "learning_rate": 7.013967774567485e-06, "loss": 0.8701, "step": 7887 }, { "epoch": 1.9982267257758075, "grad_norm": 3.7454984188079834, "learning_rate": 7.0132008722206316e-06, "loss": 0.715, "step": 7888 }, { "epoch": 1.9984800506649778, "grad_norm": 3.3161065578460693, "learning_rate": 7.01243391334404e-06, "loss": 0.7263, "step": 7889 }, { "epoch": 1.9987333755541483, "grad_norm": 3.382779598236084, "learning_rate": 7.0116668979592485e-06, "loss": 0.7841, "step": 7890 }, { "epoch": 1.9989867004433186, "grad_norm": 3.61897349357605, "learning_rate": 7.0108998260877925e-06, "loss": 0.8071, "step": 7891 }, { "epoch": 1.999240025332489, "grad_norm": 3.5087099075317383, "learning_rate": 7.010132697751212e-06, "loss": 0.6358, "step": 7892 }, { "epoch": 1.9994933502216594, "grad_norm": 3.284588575363159, "learning_rate": 7.009365512971048e-06, "loss": 0.7479, "step": 7893 }, { "epoch": 1.9997466751108295, "grad_norm": 3.6976845264434814, "learning_rate": 7.008598271768842e-06, "loss": 0.7084, "step": 7894 }, { "epoch": 2.0, "grad_norm": 3.472818613052368, "learning_rate": 7.007830974166138e-06, "loss": 0.7147, "step": 7895 } ], "logging_steps": 1.0, "max_steps": 19735, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.462352584434778e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }